108 files changed, 3167 insertions, 144 deletions
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_base.cpp
index 7feeb60d6..358b66d1c 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_base.cpp
@@ -96,7 +96,7 @@ namespace kernel_selector
         auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
         
         auto& kernel = kd.kernels[0];
-        FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point);
+        FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
         
         if (newParams.gradient)
             kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 1 });
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_axis.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_axis.cpp
index a69ce07c6..894101bff 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_axis.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_axis.cpp
@@ -75,7 +75,7 @@ namespace kernel_selector
         auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
 
         auto& kernel = kd.kernels[0];
-        FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point);
+        FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
 
         kd.estimatedTime = FORCE_PRIORITY_9;
 
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_base.cpp
index 455b249cc..a51824c52 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_base.cpp
@@ -78,7 +78,7 @@ namespace kernel_selector
 		auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
 
 		auto& kernel = kd.kernels[0];
-		FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point);
+		FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
 
 		kd.estimatedTime = estimatedTime;
 
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_opt.cpp
index 70af969b3..8e086dae6 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_opt.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_opt.cpp
@@ -78,7 +78,7 @@ namespace kernel_selector
             runInfo.lws1 = 1;
             runInfo.lws2 = 1;
 
-            FillCLKernelData(kernel, runInfo, kernelName, jit, entryPoint);
+            FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entryPoint);
             size = (size / 128 + 1) * topK;
         }
 
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/average_unpooling/average_unpooling_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/average_unpooling/average_unpooling_kernel_base.cpp
index 46e8e85d8..c94e624bd 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/average_unpooling/average_unpooling_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/average_unpooling/average_unpooling_kernel_base.cpp
@@ -95,7 +95,7 @@ namespace kernel_selector
         auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
 
         auto& kernel = kd.kernels[0];
-        FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point);
+        FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
 
         kd.estimatedTime = estimatedTime;
 
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm/batch_norm_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm/batch_norm_kernel_base.cpp
index 811d4412f..ebf881f50 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm/batch_norm_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm/batch_norm_kernel_base.cpp
@@ -80,7 +80,7 @@ namespace kernel_selector
 
         auto& kernel = kd.kernels[0];
         int inputs_num = 1 + orgParams.batchNormParams.with_inv_var;
-        FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point, "", false, false, inputs_num);
+        FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, "", false, false, inputs_num);
 
         kd.estimatedTime = estimatedTime;
 
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm_grad/batch_norm_grad_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm_grad/batch_norm_grad_kernel_base.cpp
index c5b15712a..25d9115dd 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm_grad/batch_norm_grad_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm_grad/batch_norm_grad_kernel_base.cpp
@@ -70,7 +70,7 @@ namespace kernel_selector
         auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
 
         auto& kernel = kd.kernels[0];
-        FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point, "", false, false, 3);
+        FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, "", false, false, 3);
 
         kd.estimatedTime = estimatedTime;
 
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_base.cpp
new file mode 100644
index 000000000..3346c4b0a
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_base.cpp
@@ -0,0 +1,78 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "border_kernel_base.h"
+
+#include "kernel_selector_utils.h"
+
+
+namespace kernel_selector 
+{
+    JitConstants BorderKernelBase::GetJitConstants(const border_params& params) const
+    {
+        JitConstants jit = MakeBaseParamsJitConstants(params);
+
+        jit.AddConstants({
+            MakeJitConstant("LT_SIZES",              params.lt_sizes),
+            MakeJitConstant("RB_SIZES",              params.rb_sizes),
+            MakeJitConstant(toString(params.b_type), "")
+        });
+
+        return jit;
+    }
+
+    BorderKernelBase::DispatchData BorderKernelBase::SetDefault(const border_params& params) const
+    {
+        const auto& output = params.output;
+
+        DispatchData kd;
+
+        kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
+
+        std::vector<size_t> global{output.X().v, output.Y().v, output.Batch().v * output.Feature().v};
+        const auto& local = GetOptimalLocalWorkGroupSizes(global);
+
+        kd.gws0 = global[0];
+        kd.gws1 = global[1];
+        kd.gws2 = global[2];
+
+        kd.lws0 = local[0];
+        kd.lws1 = local[1];
+        kd.lws2 = local[2];
+
+        return kd;
+    }
+
+    KernelsData BorderKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options, float estimated_time) const
+    {
+        assert(params.GetType() == KernelType::BORDER);
+
+        const auto& prim_params = static_cast<const border_params&>(params); // NOLINT(cppcoreguidelines-pro-type-static-cast-downcast)
+
+        auto run_info     = SetDefault(prim_params);
+        KernelData k_data = KernelData::Default<border_params>(params);
+
+        auto cldnn_jit   = GetJitConstants(prim_params);
+        auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, options);
+        auto jit         = CreateJit(kernelName, cldnn_jit, entry_point);
+
+        auto& kernel = k_data.kernels[0];
+        FillCLKernelData(kernel, run_info, params.engineInfo, kernelName, jit, entry_point);
+
+        k_data.estimatedTime = estimated_time;
+
+        return {k_data};
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_base.h
new file mode 100644
index 000000000..43c10c715
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_base.h
@@ -0,0 +1,72 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "common_kernel_base.h"
+#include "kernel_selector_params.h"
+
+
+namespace kernel_selector 
+{
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // border_params
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    struct border_params : public base_params
+    {
+        DimTensor<> lt_sizes;
+        DimTensor<> rb_sizes;
+        BorderType b_type = BorderType::ZERO;
+
+
+        border_params()
+            : base_params(KernelType::BORDER)
+        {
+        }
+
+        ParamsKey GetParamsKey() const override
+        {
+            ParamsKey k = base_params::GetParamsKey();
+            // k.EnableBorderType(b_type);
+            return k;
+        }
+    };
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // border_optional_params
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    struct border_optional_params : optional_params
+    {
+        border_optional_params()
+            : optional_params(KernelType::BORDER)
+        {
+        }
+    };
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // BorderKernelBase
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    class BorderKernelBase : public common_kernel_base
+    {
+    public:
+        using common_kernel_base::common_kernel_base;
+
+        using DispatchData = CommonDispatchData;
+
+    protected:
+        JitConstants GetJitConstants(const border_params& params) const;
+        DispatchData SetDefault(const border_params& params) const;
+        KernelsData GetCommonKernelsData(const Params& params, const optional_params&, float estimated_time) const;
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_ref.cpp
new file mode 100644
index 000000000..9029d7afc
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_ref.cpp
@@ -0,0 +1,51 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "border_kernel_ref.h"
+
+
+namespace kernel_selector
+{
+    ParamsKey BorderKernelRef::GetSupportedKey() const
+    {
+        ParamsKey k;
+
+        k.EnableInputDataType(Datatype::F16);
+        k.EnableInputDataType(Datatype::F32);
+        k.EnableInputDataType(Datatype::INT8);
+        k.EnableInputDataType(Datatype::UINT8);
+
+        k.EnableOutputDataType(Datatype::F32);
+        k.EnableOutputDataType(Datatype::F16);
+        k.EnableOutputDataType(Datatype::INT8);
+        k.EnableOutputDataType(Datatype::UINT8);
+
+        k.EnableInputLayout(DataLayout::bfyx);
+        k.EnableInputLayout(DataLayout::yxfb);
+        k.EnableInputLayout(DataLayout::byxf);
+
+        k.EnableOutputLayout(DataLayout::bfyx);
+        k.EnableOutputLayout(DataLayout::yxfb);
+        k.EnableOutputLayout(DataLayout::byxf);
+
+        k.EnableBatching();
+
+        return k;
+    }
+
+    KernelsData BorderKernelRef::GetKernelsData(const Params& params, const optional_params& options) const
+    {
+        return GetCommonKernelsData(params, options, FORCE_PRIORITY_9);
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_ref.h
new file mode 100644
index 000000000..0862ed144
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_ref.h
@@ -0,0 +1,30 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "border_kernel_base.h"
+
+
+namespace kernel_selector
+{
+    class BorderKernelRef : public BorderKernelBase
+    {
+    public:
+        BorderKernelRef() : BorderKernelBase("border_gpu_ref") {}
+
+        KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+        ParamsKey GetSupportedKey() const override;
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_selector.cpp
new file mode 100644
index 000000000..42e352c6a
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_selector.cpp
@@ -0,0 +1,30 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "border_kernel_selector.h"
+#include "border_kernel_ref.h"
+
+namespace kernel_selector 
+{
+    border_kernel_selector::border_kernel_selector()
+    {
+        Attach<BorderKernelRef>();
+    }
+
+    KernelsData border_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const
+    {
+        return GetNaiveBestKernel(params, options, KernelType::BORDER);
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_selector.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_selector.h
new file mode 100644
index 000000000..515a9a497
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_selector.h
@@ -0,0 +1,34 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "kernel_selector.h"
+
+
+namespace kernel_selector 
+{
+    class border_kernel_selector : public kernel_selector_base
+    {
+    public:
+        static border_kernel_selector &Instance() {
+            static border_kernel_selector instance;
+            return instance;
+        }
+
+        border_kernel_selector();
+
+        KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_base.cpp
new file mode 100644
index 000000000..3d3b2f4d3
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_base.cpp
@@ -0,0 +1,71 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "broadcast_kernel_base.h"
+
+#include "kernel_selector_utils.h"
+
+
+namespace kernel_selector 
+{
+    JitConstants BroadcastKernelBase::GetJitConstants(const broadcast_params& params)
+    {
+        JitConstants jit = MakeBaseParamsJitConstants(params);
+        return jit;
+    }
+
+    BroadcastKernelBase::DispatchData BroadcastKernelBase::SetDefault(const broadcast_params& params)
+    {
+        const auto& output = params.output;
+
+        DispatchData kd;
+
+        kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
+
+        std::vector<size_t> global{output.X().v, output.Y().v, output.Batch().v * output.Feature().v};
+        const auto& local = GetOptimalLocalWorkGroupSizes(global);
+
+        kd.gws0 = global[0];
+        kd.gws1 = global[1];
+        kd.gws2 = global[2];
+
+        kd.lws0 = local[0];
+        kd.lws1 = local[1];
+        kd.lws2 = local[2];
+
+        return kd;
+    }
+
+    KernelsData BroadcastKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options, float estimated_time) const
+    {
+        assert(params.GetType() == KernelType::BROADCAST);
+
+        const auto& prim_params = static_cast<const broadcast_params&>(params); // NOLINT(cppcoreguidelines-pro-type-static-cast-downcast)
+
+        auto run_info     = SetDefault(prim_params);
+        KernelData k_data = KernelData::Default<broadcast_params>(params);
+
+        auto cldnn_jit   = GetJitConstants(prim_params);
+        auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, options);
+        auto jit         = CreateJit(kernelName, cldnn_jit, entry_point);
+
+        auto& kernel = k_data.kernels[0];
+        FillCLKernelData(kernel, run_info, params.engineInfo, kernelName, jit, entry_point);
+
+        k_data.estimatedTime = estimated_time;
+
+        return {k_data};
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_base.h
new file mode 100644
index 000000000..cf4865e80
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_base.h
@@ -0,0 +1,60 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "common_kernel_base.h"
+#include "kernel_selector_params.h"
+
+
+namespace kernel_selector 
+{
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // broadcast_params
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    struct broadcast_params : public base_params
+    {
+        broadcast_params()
+            : base_params(KernelType::BROADCAST)
+        {
+        }
+    };
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // broadcast_optional_params
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    struct broadcast_optional_params : optional_params
+    {
+        broadcast_optional_params()
+            : optional_params(KernelType::BROADCAST)
+        {
+        }
+    };
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // BroadcastKernelBase
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    class BroadcastKernelBase : public common_kernel_base
+    {
+    public:
+        using common_kernel_base::common_kernel_base;
+
+        using DispatchData = CommonDispatchData;
+
+    protected:
+        static JitConstants GetJitConstants(const broadcast_params& params);
+        static DispatchData SetDefault(const broadcast_params& params);
+        KernelsData GetCommonKernelsData(const Params& params, const optional_params&, float estimated_time) const;
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_ref.cpp
new file mode 100644
index 000000000..0be42a5e2
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_ref.cpp
@@ -0,0 +1,51 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "broadcast_kernel_ref.h"
+
+
+namespace kernel_selector
+{
+    ParamsKey BroadcastKernelRef::GetSupportedKey() const
+    {
+        ParamsKey k;
+
+        k.EnableInputDataType(Datatype::F16);
+        k.EnableInputDataType(Datatype::F32);
+        k.EnableInputDataType(Datatype::INT8);
+        k.EnableInputDataType(Datatype::UINT8);
+
+        k.EnableOutputDataType(Datatype::F32);
+        k.EnableOutputDataType(Datatype::F16);
+        k.EnableOutputDataType(Datatype::INT8);
+        k.EnableOutputDataType(Datatype::UINT8);
+
+        k.EnableInputLayout(DataLayout::bfyx);
+        k.EnableInputLayout(DataLayout::yxfb);
+        k.EnableInputLayout(DataLayout::byxf);
+
+        k.EnableOutputLayout(DataLayout::bfyx);
+        k.EnableOutputLayout(DataLayout::yxfb);
+        k.EnableOutputLayout(DataLayout::byxf);
+
+        k.EnableBatching();
+
+        return k;
+    }
+
+    KernelsData BroadcastKernelRef::GetKernelsData(const Params& params, const optional_params& options) const
+    {
+        return GetCommonKernelsData(params, options, FORCE_PRIORITY_9);
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_ref.h
new file mode 100644
index 000000000..ccca397ab
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_ref.h
@@ -0,0 +1,30 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "broadcast_kernel_base.h"
+
+
+namespace kernel_selector
+{
+    class BroadcastKernelRef : public BroadcastKernelBase
+    {
+    public:
+        BroadcastKernelRef() : BroadcastKernelBase("broadcast_gpu_ref") {}
+
+        KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+        ParamsKey GetSupportedKey() const override;
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_selector.cpp
new file mode 100644
index 000000000..02ae904d2
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_selector.cpp
@@ -0,0 +1,30 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "broadcast_kernel_selector.h"
+#include "broadcast_kernel_ref.h"
+
+namespace kernel_selector 
+{
+    broadcast_kernel_selector::broadcast_kernel_selector()
+    {
+        Attach<BroadcastKernelRef>();
+    }
+
+    KernelsData broadcast_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const
+    {
+        return GetNaiveBestKernel(params, options, KernelType::BROADCAST);
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_selector.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_selector.h
new file mode 100644
index 000000000..ec7f4da55
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_selector.h
@@ -0,0 +1,34 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "kernel_selector.h"
+
+
+namespace kernel_selector 
+{
+    class broadcast_kernel_selector : public kernel_selector_base
+    {
+    public:
+        static broadcast_kernel_selector &Instance() {
+            static broadcast_kernel_selector instance;
+            return instance;
+        }
+
+        broadcast_kernel_selector();
+
+        KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_base.cpp
index b81cfe879..f9df941ad 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_base.cpp
@@ -71,9 +71,18 @@ namespace kernel_selector
         // Determine global work sizes.
         if (params.inputs[0].GetLayout() != params.output.GetLayout())
         {
-            kd.gws0 = dims.size() < 2 ? 1 : dims[2].v;
-            kd.gws1 = dims.size() < 3 ? 1 : dims[1].v;
-            kd.gws2 = dims.size() < 4 ? 1 : dims[0].v;                         
+            if (params.inputs[0].GetLayout() == kernel_selector::Tensor::DataLayout::yxfb)
+            {
+                kd.gws0 = dims.size() < 2 ? 1 : dims[3].v;
+                kd.gws1 = dims.size() < 3 ? 1 : dims[1].v;
+                kd.gws2 = dims.size() < 4 ? 1 : dims[0].v;
+            }
+            else
+            {
+                kd.gws0 = dims.size() < 2 ? 1 : dims[2].v;
+                kd.gws1 = dims.size() < 3 ? 1 : dims[1].v;
+                kd.gws2 = dims.size() < 4 ? 1 : dims[0].v;
+            }
         }
         else
         {
@@ -124,7 +133,7 @@ namespace kernel_selector
 
             kernel.workGroups.global = { runInfo.gws0, runInfo.gws1, runInfo.gws2 };
             kernel.workGroups.local = { runInfo.lws0, runInfo.lws1, runInfo.lws2 };
-            kernel.kernelString = GetKernelString(kernelName, jit, entryPoint);
+            kernel.kernelString = GetKernelString(kernelName, jit, entryPoint, params.engineInfo);
             kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, (uint32_t)i });
             kernel.arguments.push_back({ ArgumentDescriptor::Types::OUTPUT, 0 });
 
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_ref.cpp
index 5520f427a..c5c6ae53a 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_ref.cpp
@@ -26,9 +26,13 @@ namespace kernel_selector
         k.EnableInputDataType(Datatype::F16);
         k.EnableInputDataType(Datatype::F32);
         k.EnableInputDataType(Datatype::INT8);
+        k.EnableInputDataType(Datatype::INT32);
+        k.EnableInputDataType(Datatype::INT64);
         k.EnableOutputDataType(Datatype::F16);
         k.EnableOutputDataType(Datatype::F32);
         k.EnableOutputDataType(Datatype::INT8);
+        k.EnableOutputDataType(Datatype::INT32);
+        k.EnableOutputDataType(Datatype::INT64);
         k.EnableAllInputLayout();
         k.EnableAllOutputLayout();
         k.EnableTensorOffset();
@@ -60,10 +64,20 @@ namespace kernel_selector
         //case for input == bfyx, output == yxfb and input == yxfb, output == bfyx
         if (input_format != output_format)
         {
-            dim_index[0] = 3;
-            dim_index[1] = 2;
-            dim_index[2] = 0;
-            dim_index[3] = 1;
+            if (input_format == kernel_selector::Tensor::DataLayout::yxfb)
+            {
+                dim_index[0] = 2;
+                dim_index[1] = 3;
+                dim_index[2] = 1;
+                dim_index[3] = 0;
+            }
+            else
+            {
+                dim_index[0] = 3;
+                dim_index[1] = 2;
+                dim_index[2] = 0;
+                dim_index[3] = 1;
+            }
         }
 
         cldnnJit.AddConstant(MakeJitConstant("INPUT_DIM_0", dim_index[0]));
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.cpp
index 878a02048..86bfe937c 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.cpp
@@ -241,7 +241,7 @@ namespace kernel_selector
         auto jit = CreateJit(finalKernelName, cldnnJit, entryPoint);
 
         auto& kernel = kd.kernels[0];
-        FillCLKernelData(kernel, runInfo, finalKernelName, jit, entryPoint, exeMode, true, !newParams.bias.empty(), 1, newParams.int8_quantization, newParams.output_calibration);
+        FillCLKernelData(kernel, runInfo, params.engineInfo, finalKernelName, jit, entryPoint, exeMode, true, !newParams.bias.empty(), 1, newParams.int8_quantization, newParams.output_calibration);
         kernel.arguments.push_back({ ArgumentDescriptor::Types::SPLIT, 0 });
 
         kd.estimatedTime = runInfo.effiency;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_3x3_dw_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_3x3_dw_opt.cpp
index ad56556bc..b92df30b7 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_3x3_dw_opt.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_3x3_dw_opt.cpp
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2017 Intel Corporation
+// Copyright (c) 2017-2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -22,7 +22,7 @@ namespace kernel_selector
     ConvolutionKernel_bfyx_3x3_dw_opt::ConvolutionKernel_bfyx_3x3_dw_opt() : ConvolutionKernelBase("convolution_gpu_bfyx_3x3_dw_opt")
     {
         // Generate the dispatch options to the auto-tuner.
-        std::vector<size_t> tileXDimSizes = { 1,2,4,5,6,8,10,12,14,16 };
+        std::vector<size_t> tileXDimSizes = { 1,2,4,5,6,8,10,12,14 };
         std::vector<size_t> tileYDimSizes = { 1,2,3,4,5,6,7 };
         std::vector<std::string> executionModes = { /*AGE_BASED ,*/ ROUND_ROBIN };
 
@@ -141,6 +141,18 @@ namespace kernel_selector
 
     KernelsData ConvolutionKernel_bfyx_3x3_dw_opt::GetTunedKernelsDataByIndex(const Params& params, const optional_params& options, const int autoTuneIndex) const
     {
+        constexpr int simdSize = 16;
+
+        KernelData kd = KernelData::Default<convolution_params>(params);
+        convolution_params& convParams = *static_cast<convolution_params*>(kd.params.get());
+        DispatchData runInfo = SetDefault(convParams, autoTuneIndex);
+
+        if (static_cast<int>(static_cast<int>(runInfo.gws0 - 1) / simdSize) * runInfo.cldnnStyle.blockWidth + simdSize > convParams.inputs[0].Y().pitch)
+        {
+            // Internal Error - requested tile size is not supported for y pitch
+            return{};
+        }
+
         return GetCommonKernelsData(params, options, GetAutoTuneOptions(params, autoTuneIndex).exeMode, autoTuneIndex);
     }
 
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_depthwise_weights_lwg.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_depthwise_weights_lwg.cpp
new file mode 100644
index 000000000..f6841db94
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_depthwise_weights_lwg.cpp
@@ -0,0 +1,100 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "convolution_kernel_bfyx_depthwise_weights_lwg.h"
+#include "kernel_selector_utils.h"
+ 
+namespace kernel_selector 
+{
+    ParamsKey ConvolutionKernel_bfyx_depthwise_weights_lwg::GetSupportedKey() const
+    {
+        ParamsKey k;
+        k.EnableInputDataType(Datatype::F32);
+        k.EnableInputDataType(Datatype::F16);
+        k.EnableInputWeightsType(WeightsType::F16);
+        k.EnableInputWeightsType(WeightsType::F32);
+        k.EnableOutputDataType(Datatype::F32);
+        k.EnableOutputDataType(Datatype::F16);
+        k.EnableInputLayout(DataLayout::bfyx);
+        k.EnableOutputLayout(DataLayout::bfyx);
+        k.EnableTensorOffset();
+        k.EnableTensorPitches();
+        k.EnableBiasPerFeature();
+        k.EnableNonBiasTerm();
+        k.EnableBatching();
+        k.EnableSplitSupport();
+        k.EnableSubGroup();
+        k.EnableSubGroupShort();
+        k.EnableDepthwiseSeparableOpt();
+        return k;
+    }
+
+    bool ConvolutionKernel_bfyx_depthwise_weights_lwg::Validate(const Params& p, const optional_params& o) const
+    {
+        if (!ConvolutionKernelBase::Validate(p, o) ||
+            !CovolutionCheckInput(p, o))
+        {
+            return false;
+        }
+
+       const convolution_params& cp = static_cast<const convolution_params&>(p);
+       if (!cp.depthwiseSeparableOpt)
+           return false;
+
+       if ((cp.filterSize.x > 4) ||
+           (cp.filterSize.y > 4) ||
+           (cp.inputs[0].Feature().v != cp.split))
+       {
+           return false;
+       }
+
+        return true;
+    }
+
+    ConvolutionKernelBase::DispatchData ConvolutionKernel_bfyx_depthwise_weights_lwg::SetDefault(const convolution_params& params, int) const
+    {
+        DispatchData runInfo = Parent::SetDefault(params);
+        const auto& out = params.output;
+
+        std::vector<size_t> global = { out.X().v * out.Y().v, out.Feature().v, out.Batch().v };
+
+        runInfo.gws0 = Align(global[0], 16);
+        runInfo.gws1 = global[1];
+        runInfo.gws2 = global[2];
+        runInfo.lws0 = 16;
+        runInfo.lws1 = 1;
+        runInfo.lws2 = 1;
+
+        runInfo.effiency = FORCE_PRIORITY_6;
+
+        return runInfo;
+    }
+
+    JitConstants ConvolutionKernel_bfyx_depthwise_weights_lwg::GetJitConstants(const convolution_params& params, const DispatchData& kd) const
+    {
+        auto mem_consts = ConvolutionKernelBase::GetJitConstants(params, kd);
+
+        if(params.padding.x != 0 || params.padding.y != 0)
+            mem_consts.AddConstant(MakeJitConstant("BOUNDARY_CHECK", 1));
+
+        return mem_consts;
+    }
+
+    KernelsData ConvolutionKernel_bfyx_depthwise_weights_lwg::GetKernelsData(const Params& params, const optional_params& options) const
+    {
+        return GetCommonKernelsData(params, options);
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_depthwise_weights_lwg.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_depthwise_weights_lwg.h
new file mode 100644
index 000000000..b578f8fd5
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_depthwise_weights_lwg.h
@@ -0,0 +1,39 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "convolution_kernel_base.h"
+ 
+namespace kernel_selector 
+{    
+    class ConvolutionKernel_bfyx_depthwise_weights_lwg : public ConvolutionKernelBase
+    {
+    public:
+        using Parent = ConvolutionKernelBase;
+        ConvolutionKernel_bfyx_depthwise_weights_lwg() : ConvolutionKernelBase("convolution_gpu_bfyx_depthwise_weights_lwg") {}
+        virtual ~ConvolutionKernel_bfyx_depthwise_weights_lwg() {}
+
+        virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+        virtual ParamsKey GetSupportedKey() const override;
+    
+    protected:
+        bool Validate(const Params&, const optional_params&) const override;
+        std::vector<WeightsLayout> GetSupportedWeightLayouts(const convolution_params&)  const override { return{ WeightsLayout::oiyx }; }
+        JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+        DispatchData SetDefault(const convolution_params& params, int autoTuneIndex = -1) const override;
+    };
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_batched.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_batched.cpp
new file mode 100644
index 000000000..ce73392ac
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_batched.cpp
@@ -0,0 +1,97 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "convolution_kernel_mmad_batched.h"
+#include "kernel_selector_utils.h"
+
+namespace kernel_selector {
+    
+    ParamsKey ConvolutionKernel_mmad_batched::GetSupportedKey() const
+    {
+        ParamsKey k;
+        k.EnableInputDataType(Datatype::INT8);
+        k.EnableOutputDataType(Datatype::INT8);
+        k.EnableInputWeightsType(WeightsType::INT8);
+        k.EnableInputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
+        k.EnableOutputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
+        k.EnableTensorOffset();
+        k.EnableTensorPitches();
+        k.EnableDilation();
+        k.EnableBiasPerFeature();
+        k.EnableNonBiasTerm();
+        k.EnableBatching();
+        k.EnableSplitSupport();
+        k.EnableInt8Quantization();
+        k.EnableOutputCalibration();
+        k.DisableTuning();
+        return k;
+    }
+
+    ConvolutionKernelBase::DispatchData ConvolutionKernel_mmad_batched::SetDefault(const convolution_params& arg, int) const
+    {
+        DispatchData runInfo = ConvolutionKernelBase::SetDefault(arg);
+
+        constexpr size_t sub_group_size = 8;
+
+        const auto of_maps = arg.output.Feature().v;
+        const size_t of_threads_per_batch = RoundUp(of_maps, sub_group_size);
+
+        runInfo.effiency = FORCE_PRIORITY_3;
+
+        runInfo.gws0 = arg.output.X().v;
+        runInfo.gws1 = arg.output.Y().v;
+        runInfo.gws2 = of_threads_per_batch * ((arg.output.Batch().v+3) / 4);
+
+        runInfo.lws0 = 1;
+        runInfo.lws1 = 1;
+        runInfo.lws2 = sub_group_size;
+
+        return runInfo;
+    }
+
+    JitConstants ConvolutionKernel_mmad_batched::GetJitConstants(const convolution_params& params, const DispatchData& runInfo) const
+    {
+        auto jit = Parent::GetJitConstants(params, runInfo);
+
+        jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", runInfo.lws2));
+
+        // pitch for special block format used in this kernel
+        const size_t ifm_32_aligned = Align(params.weights.IFM().v, 32);
+        const size_t filter_ofm_block_pitch = (ifm_32_aligned / 32) * params.weights.X().v * params.weights.Y().v * 4 * 8 * 8;
+        jit.AddConstant(MakeJitConstant("FILTER_OFM_BLOCK_PITCH", filter_ofm_block_pitch));
+
+        const size_t in_x_pitch = 32 * 4;
+        const size_t in_y_pitch = 32 * 4 * params.inputs[0].X().LogicalDimPadded();
+        const size_t in_b_block_pitch = in_y_pitch * params.inputs[0].Y().LogicalDimPadded();
+        const size_t in_f_block_pitch = in_b_block_pitch * ((params.inputs[0].Batch().v + 3) / 4);
+        const size_t in_offset = in_x_pitch * params.inputs[0].X().pad.before + in_y_pitch * params.inputs[0].Y().pad.before;
+
+        jit.AddConstant(MakeJitConstant("IN_X_PITCH", in_x_pitch));
+        jit.AddConstant(MakeJitConstant("IN_Y_PITCH", in_y_pitch));
+        jit.AddConstant(MakeJitConstant("IN_B_BLOCK_PITCH", in_b_block_pitch));
+        jit.AddConstant(MakeJitConstant("IN_F_BLOCK_PITCH", in_f_block_pitch));
+        jit.AddConstant(MakeJitConstant("IN_OFFSET", in_offset));
+        return jit;
+    }
+
+    KernelsData ConvolutionKernel_mmad_batched::GetKernelsData(const Params& params, const optional_params& options) const
+    {
+        KernelsData kd = GetCommonKernelsData(params, options);
+        if(!kd.empty())
+            kd[0].estimatedTime = FORCE_PRIORITY_3;
+        return kd;
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_batched.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_batched.h
new file mode 100644
index 000000000..8a3dda451
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_batched.h
@@ -0,0 +1,43 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "convolution_kernel_base.h"
+ 
+namespace kernel_selector {
+    
+    class ConvolutionKernel_mmad_batched : public ConvolutionKernelBase
+    {
+    public:
+        using Parent = ConvolutionKernelBase;
+        ConvolutionKernel_mmad_batched() : ConvolutionKernelBase("convolution_gpu_mmad_batched") {}
+        virtual ~ConvolutionKernel_mmad_batched() {}
+
+        virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+        virtual ParamsKey GetSupportedKey() const override;
+
+    protected:
+        JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+        DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
+        virtual std::vector<WeightsLayout> GetSupportedWeightLayouts(const convolution_params&) const override
+        {
+            return{
+                WeightsLayout::os_is_yx_isa8_osv8_isv4,
+            };
+        }
+    };
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_selector.cpp
index 21d9c92ff..aa5850593 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_selector.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_selector.cpp
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -35,6 +35,8 @@
 #include "convolution_kernel_MMAD_blocks.h"
 #include "convolution_kernel_1x1_gemm_MMAD.h"
 #include "convolution_kernel_byxf_af32_depthwise.h"
+#include "convolution_kernel_mmad_batched.h"
+#include "convolution_kernel_bfyx_depthwise_weights_lwg.h"
 
 #include <iostream>
  
@@ -61,6 +63,8 @@ namespace kernel_selector
         Attach<ConvolutionKernel_MMAD_blocks>();
         Attach<ConvolutionKernel_1x1_gemm_MMAD>();
         Attach<ConvolutionKernel_byxf_af32_depthiwise>();
+        Attach<ConvolutionKernel_mmad_batched>();
+        Attach<ConvolutionKernel_bfyx_depthwise_weights_lwg>();
         //Attach<ConvolutionKernel_Tutorial>(); //In order to use this implementation for tutorial purposes please uncomment this line
     }
 
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_selector.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_selector.h
index 9275a5fcb..6b4c756d0 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_selector.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_selector.h
@@ -17,7 +17,6 @@
 #pragma once
 
 #include "kernel_selector.h"
-#include "kernel_runner_interface.h"
  
 namespace kernel_selector 
 {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_1x1.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_1x1.cpp
index b6065edc6..13f8ba40b 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_1x1.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_1x1.cpp
@@ -22,11 +22,8 @@ namespace kernel_selector
     ParamsKey ConvolutionGradWeightsKernel1x1::GetSupportedKey() const
     {
         ParamsKey k;
-        k.EnableInputDataType(Datatype::F16);
         k.EnableInputDataType(Datatype::F32);
-        k.EnableInputWeightsType(WeightsType::F16);
         k.EnableInputWeightsType(WeightsType::F32);
-        k.EnableOutputDataType(Datatype::F16);
         k.EnableOutputDataType(Datatype::F32);
         k.EnableInputLayout(DataLayout::bfyx);
         k.EnableOutputLayout(DataLayout::yxfb);
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_3x3.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_3x3.cpp
new file mode 100644
index 000000000..c892f6d2d
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_3x3.cpp
@@ -0,0 +1,75 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "convolution_grad_weights_kernel_3x3.h"
+
+namespace kernel_selector
+{
+
+    ParamsKey ConvolutionGradWeightsKernel3x3::GetSupportedKey() const
+    {
+        ParamsKey k;
+        k.EnableInputDataType(Datatype::F32);
+        k.EnableInputWeightsType(WeightsType::F32);
+        k.EnableOutputDataType(Datatype::F32);
+        k.EnableInputLayout(DataLayout::bfyx);
+        k.EnableOutputLayout(DataLayout::yxfb);
+        k.EnableOutputLayout(DataLayout::bfyx);
+        k.EnableOutputLayout(DataLayout::byxf);
+        k.EnableTensorOffset();
+        k.EnableTensorPitches();
+        k.EnableBiasPerFeature();
+        k.EnableNonBiasTerm();
+        k.EnableMomentum();
+        k.EnableBatching();
+        k.EnableSplitSupport();
+        k.EnableGradient();
+        k.DisableTuning();
+        return k;
+    }
+
+    bool ConvolutionGradWeightsKernel3x3::Validate(const Params& p, const optional_params&) const
+    {
+        const auto& params = static_cast<const convolution_grad_weights_params&>(p);
+
+        if (params.stride.x != 1 || params.stride.y != 1)
+            return false;
+        if (params.filterSize.x != 3 || params.filterSize.y != 3)
+            return false;
+        return true;
+    }
+
+    ConvolutionGradWeightsKernelBase::DispatchData ConvolutionGradWeightsKernel3x3::SetDefault(const convolution_grad_weights_params& params) const
+    {
+        auto input_features = params.weights.IFM().v;
+        auto output_features = params.weights.OFM().v;
+
+        DispatchData kd;
+
+        kd.gws0 = Align(output_features, 16);
+        kd.gws1 = input_features;
+        kd.gws2 = 1;
+        kd.lws0 = std::min(std::max(kd.gws0, static_cast<size_t>(1)), static_cast<size_t>(32));
+        while (kd.gws0 % kd.lws0 != 0)
+        {
+            kd.lws0 -= 16;
+        }
+        kd.lws1 = 1;
+        kd.lws2 = 1;
+        kd.effiency = FORCE_PRIORITY_8;
+        return kd;
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_3x3.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_3x3.h
new file mode 100644
index 000000000..39fcb7e96
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_3x3.h
@@ -0,0 +1,33 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "convolution_grad_weights_kernel_base.h"
+
+namespace kernel_selector {
+
+	class ConvolutionGradWeightsKernel3x3 : public ConvolutionGradWeightsKernelBase
+	{
+	public:
+		ConvolutionGradWeightsKernel3x3() : ConvolutionGradWeightsKernelBase("convolution_grad_weights_3x3") {}
+		virtual ~ConvolutionGradWeightsKernel3x3() {}
+
+		virtual DispatchData SetDefault(const convolution_grad_weights_params& params) const override;
+		virtual bool Validate(const Params& p, const optional_params& o) const override;
+		virtual ParamsKey GetSupportedKey() const override;
+	};
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_7x7.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_7x7.cpp
new file mode 100644
index 000000000..8bd5000d6
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_7x7.cpp
@@ -0,0 +1,73 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "convolution_grad_weights_kernel_7x7.h"
+
+namespace kernel_selector
+{
+
+    ParamsKey ConvolutionGradWeightsKernel7x7::GetSupportedKey() const
+    {
+        ParamsKey k;
+        k.EnableInputDataType(Datatype::F32);
+        k.EnableInputWeightsType(WeightsType::F32);
+        k.EnableOutputDataType(Datatype::F32);
+        k.EnableInputLayout(DataLayout::bfyx);
+        k.EnableOutputLayout(DataLayout::yxfb);
+        k.EnableOutputLayout(DataLayout::bfyx);
+        k.EnableOutputLayout(DataLayout::byxf);
+        k.EnableTensorOffset();
+        k.EnableTensorPitches();
+        k.EnableBiasPerFeature();
+        k.EnableNonBiasTerm();
+        k.EnableMomentum();
+        k.EnableBatching();
+        k.EnableSplitSupport();
+        k.EnableGradient();
+        k.DisableTuning();
+        return k;
+    }
+
+    bool ConvolutionGradWeightsKernel7x7::Validate(const Params& p, const optional_params&) const
+    {
+        const auto& params = static_cast<const convolution_grad_weights_params&>(p);
+
+        if (params.filterSize.x != 7 || params.filterSize.y != 7)
+            return false;
+        return true;
+    }
+
+    ConvolutionGradWeightsKernelBase::DispatchData ConvolutionGradWeightsKernel7x7::SetDefault(const convolution_grad_weights_params& params) const
+    {
+        auto input_features = params.weights.IFM().v;
+        auto output_features = params.weights.OFM().v;
+
+        DispatchData kd;
+
+        kd.gws0 = 8;
+        kd.gws1 = Align(output_features, 16);
+        kd.gws2 = input_features;
+        kd.lws0 = 1;
+        kd.lws1 = std::min(std::max(kd.gws1, static_cast<size_t>(1)), static_cast<size_t>(32));
+        while (kd.gws1 % kd.lws1 != 0)
+        {
+            kd.lws1 -= 16;
+        }
+        kd.lws2 = 1;
+        kd.effiency = FORCE_PRIORITY_8;
+        return kd;
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_7x7.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_7x7.h
new file mode 100644
index 000000000..286caf5c0
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_7x7.h
@@ -0,0 +1,33 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "convolution_grad_weights_kernel_base.h"
+
+namespace kernel_selector {
+
+    class ConvolutionGradWeightsKernel7x7 : public ConvolutionGradWeightsKernelBase
+    {
+    public:
+        ConvolutionGradWeightsKernel7x7() : ConvolutionGradWeightsKernelBase("convolution_grad_weights_7x7") {}
+        virtual ~ConvolutionGradWeightsKernel7x7() {}
+
+        virtual DispatchData SetDefault(const convolution_grad_weights_params& params) const override;
+        virtual bool Validate(const Params& p, const optional_params& o) const override;
+        virtual ParamsKey GetSupportedKey() const override;
+    };
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_base.cpp
index 2998075b7..1e2cd30a8 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_base.cpp
@@ -124,7 +124,7 @@ namespace kernel_selector
         auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
 
         auto& kernel = kd.kernels[0];
-        FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point, ROUND_ROBIN, true, !orgParams.bias.empty());
+        FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, ROUND_ROBIN, true, !orgParams.bias.empty());
         if (newParams.use_momentum)
         {
             kernel.arguments.push_back({ ArgumentDescriptor::Types::PREV_WEIGHTS_GRADIENT, 0 });
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_ref.cpp
index 7ca7c9a39..3c29a7616 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_ref.cpp
@@ -24,7 +24,6 @@ namespace kernel_selector
         ParamsKey k;
         k.EnableInputDataType(Datatype::F16);
         k.EnableInputDataType(Datatype::F32);
-        k.EnableInputWeightsType(WeightsType::F16);
         k.EnableInputWeightsType(WeightsType::F32);
         k.EnableOutputDataType(Datatype::F16);
         k.EnableOutputDataType(Datatype::F32);
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_selector.cpp
index 16b842e9e..fb045a273 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_selector.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_selector.cpp
@@ -18,6 +18,8 @@
 #include "convolution_grad_weights_kernel_ref.h"
 #include "convolution_grad_weights_kernel_1x1.h"
 #include "convolution_grad_weights_kernel_yxfb.h"
+#include "convolution_grad_weights_kernel_3x3.h"
+#include "convolution_grad_weights_kernel_7x7.h"
  
 namespace kernel_selector 
 {
@@ -26,6 +28,8 @@ namespace kernel_selector
         Attach<ConvolutionGradWeightsKernelRef>();
         Attach<ConvolutionGradWeightsKernel1x1>();
 		Attach<ConvolutionGradWeightsKernel_yxfb>();
+        Attach<ConvolutionGradWeightsKernel3x3>();
+        Attach<ConvolutionGradWeightsKernel7x7>();
     }
 
     KernelsData convolution_grad_weights_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_yxfb.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_yxfb.cpp
index e6fb6d026..f53d51b39 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_yxfb.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_yxfb.cpp
@@ -24,7 +24,6 @@ namespace kernel_selector
 		ParamsKey k;
 		k.EnableInputDataType(Datatype::F32);
 		k.EnableInputWeightsType(WeightsType::F32);
-		k.EnableOutputDataType(Datatype::F16);
 		k.EnableOutputDataType(Datatype::F32);
 		k.EnableInputLayout(DataLayout::yxfb);
 		k.EnableOutputLayout(DataLayout::yxfb);
@@ -64,11 +63,11 @@ namespace kernel_selector
 
 		DispatchData kd;
 
-		kd.gws0 = 32;
+		kd.gws0 = 16;
 		kd.gws1 = input_features * output_features;
 		kd.gws2 = x * y;
 
-		kd.lws0 = 32;
+        kd.lws0 = 16;
 		kd.lws1 = 1;
 		kd.lws2 = 1;
 		kd.effiency = FORCE_PRIORITY_7;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.cpp
index 5369d7ac0..cbc0bd780 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.cpp
@@ -57,6 +57,7 @@ namespace kernel_selector
             MakeJitConstant("FILTER_ARRAY_NUM",             dp.split),
             MakeJitConstant("INPUT0_OFFSET_WITH_PADDING",   input_offset_with_padding),
             MakeJitConstant("DEPTHWISE_SEPARABLE_OPT",      dp.depthwiseSeparableOpt),
+            MakeJitConstant("FUSED_ELTWISE",                dp.fused_eltwise)
         });
 
         return jit;
@@ -114,13 +115,15 @@ namespace kernel_selector
             return{};
         }
 
-        auto cldnn_jit = GetJitConstants(orgParams);
-        auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
+        auto cldnn_jit = GetJitConstants(newParams);
+        auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
         auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
 
         auto& kernel = kd.kernels[0];
-        FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point, ROUND_ROBIN, true, !orgParams.bias.empty());
+        FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, ROUND_ROBIN, true, !newParams.bias.empty());
         kernel.arguments.push_back({ ArgumentDescriptor::Types::SPLIT, 0 });
+        if (orgParams.fused_eltwise)
+            kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 1 });
 
         kd.estimatedTime = runInfo.effiency;
 
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.h
index ac5baec0f..206614a70 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.h
@@ -34,6 +34,7 @@ namespace kernel_selector
         uSize    padding;
         uint32_t split = 1;
         bool     depthwiseSeparableOpt = false;
+        bool     fused_eltwise = false;
 
         virtual std::string to_string() const override;
 
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.cpp
index 613fbb4f8..5feac0ca5 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.cpp
@@ -41,6 +41,22 @@ namespace kernel_selector
         }
     }
 
+    ParamsKey eltwise_params::GetParamsKey() const
+    {
+        ParamsKey k = base_params::GetParamsKey();
+        if (int8_quantization)
+        {
+            k.EnableInt8Quantization();
+        }
+
+        if (output_calibration)
+        {
+            k.EnableOutputCalibration();
+        }
+
+        return k;
+    }
+
     bool EltwiseKernelBase::Validate(const Params& p, const optional_params& o) const
     {
         if (p.GetType() != KernelType::ELTWISE ||
@@ -56,7 +72,7 @@ namespace kernel_selector
             return false;
         }
 
-        auto& operations = params.eltwiseParams.operations;
+        auto& operations = params.operations;
 
         if (operations.size() == 0)
         {
@@ -91,24 +107,24 @@ namespace kernel_selector
         JitConstants jit = MakeBaseParamsJitConstants(params);
 
         jit.AddConstants({
-            MakeJitConstant("ELTWISE_LAYOUT_BASED", params.eltwiseParams.layoutBased),
-            MakeJitConstant("QUANTIZATION_TERM",    params.eltwiseParams.int8_quantization),
+            MakeJitConstant("ELTWISE_LAYOUT_BASED", params.layoutBased),
+            MakeJitConstant("QUANTIZATION_TERM",    params.int8_quantization),
         });
 
-        if (params.eltwiseParams.int8_quantization)
+        if (params.int8_quantization)
         {
-            if (params.eltwiseParams.output_calibration)
+            if (params.output_calibration)
             {
-                jit.AddConstant(MakeJitConstant("CALIBRATION_TERM", params.eltwiseParams.output_calibration));
+                jit.AddConstant(MakeJitConstant("CALIBRATION_TERM", params.output_calibration));
                 jit.AddConstant(MakeJitConstant("O_QF", params.output_calibration_factors[0]));
 
             }
             else
-                jit.AddConstants({ MakeJitConstant("O_QF",       params.eltwiseParams.output_quantization_factor) });
+                jit.AddConstants({ MakeJitConstant("O_QF",       params.output_quantization_factor) });
         }
 
         std::string inputs_decls, vload_decls;
-        auto& updateInputs = params.eltwiseParams.updateInputIds;
+        auto& updateInputs = params.updateInputIds;
 
         for (size_t i = 0; i < params.inputs.size(); i++)
         {
@@ -143,8 +159,8 @@ namespace kernel_selector
 
         std::string do_eltwise;
 
-        auto& operations   = params.eltwiseParams.operations;
-        auto& coefficients = params.eltwiseParams.coefficients;
+        auto& operations   = params.operations;
+        auto& coefficients = params.coefficients;
 
         for (size_t op_num = 0; op_num < operations.size(); op_num++)
         {
@@ -187,7 +203,7 @@ namespace kernel_selector
                 cast_type = "(MAKE_VECTOR_TYPE(UNIT_TYPE, 8))";
                 op = "const MAKE_VECTOR_TYPE(UNIT_TYPE, 8) tmp" + op_num_str + " = ";
             }
-            else if(params.eltwiseParams.int8_quantization)
+            else if(params.int8_quantization)
             {
                 cast_type = "(int)";
                 op = "const int tmp" + op_num_str + " = ";
@@ -251,7 +267,7 @@ namespace kernel_selector
 
         jit.AddConstant(MakeJitConstant("DO_ELTWISE", do_eltwise));
 
-        if (params.eltwiseParams.layoutBased || params.eltwiseParams.int8_quantization)
+        if (params.layoutBased || params.int8_quantization)
         {
             jit.Merge(GetTensorFriendlyWorkGroupsJit(params.inputs[0]));
         }
@@ -264,32 +280,27 @@ namespace kernel_selector
         return GetJitConstantsCommon(params, false);
     }
 
-    KernelsData EltwiseKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options) const
+    EltwiseKernelBase::DispatchData EltwiseKernelBase::SetDefault(const eltwise_params& params) const
     {
-        if (!Validate(params, options))
-        {
-            return{};
-        }
-
-        KernelData kd = KernelData::Default<eltwise_params>(params);
-        eltwise_params& newParams = *static_cast<eltwise_params*>(kd.params.get());
+        DispatchData kd;
 
-        auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
-        auto cldnn_jit = GetJitConstants(newParams);
-        std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
-
-        const auto& out = newParams.output;
-        auto& kernel = kd.kernels[0];
-        if (newParams.eltwiseParams.layoutBased || newParams.eltwiseParams.int8_quantization)
+        if (params.layoutBased || params.int8_quantization)
         {
-            kernel.workGroups.global = GetTensorFriendlyWorkGroups(newParams.inputs[0]);
+            auto global = GetTensorFriendlyWorkGroups(params.inputs[0]);
+            kd.gws0 = global[0];
+            kd.gws1 = global[1];
+            kd.gws2 = global[2];
         }
-        else if (CheckInputsOutputNoPitchSameDims(newParams))
+        else if (CheckInputsOutputNoPitchSameDims(params))
         {
-            kernel.workGroups.global = { newParams.inputs[0].LogicalSize(), 1, 1 };
+            kd.gws0 = params.inputs[0].LogicalSize();
+            kd.gws1 = 1;
+            kd.gws2 = 1;
         }
         else
         {
+            const auto& out = params.output;
+
             std::vector<size_t> gws;
             for (const auto& o : out.GetDims())
             {
@@ -301,11 +312,42 @@ namespace kernel_selector
                 gws.push_back(1U);
             }
 
-            kernel.workGroups.global = { gws[0], gws[1], gws[2] * gws[3] };
+            kd.gws0 = gws[0];
+            kd.gws1 = gws[1];
+            kd.gws2 = gws[2] * gws[3];
         }
-        kernel.workGroups.local = GetOptimalLocalWorkGroupSizes(kernel.workGroups.global);
-        kernel.kernelString = GetKernelString(kernelName, jit, entry_point, ROUND_ROBIN);
-        kernel.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(), false, false, newParams.eltwiseParams.int8_quantization, newParams.eltwiseParams.output_calibration);
+
+        auto local = GetOptimalLocalWorkGroupSizes( { kd.gws0, kd.gws1, kd.gws2 } );
+        kd.lws0 = local[0];
+        kd.lws1 = local[1];
+        kd.lws2 = local[2];
+
+        return kd;
+    }
+
+    KernelsData EltwiseKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options) const
+    {
+        if (!Validate(params, options))
+        {
+            return{};
+        }
+
+        KernelData kd = KernelData::Default<eltwise_params>(params);
+        eltwise_params& newParams = *static_cast<eltwise_params*>(kd.params.get());
+
+        auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
+        auto cldnn_jit = GetJitConstants(newParams);
+        std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
+
+        DispatchData runInfo = SetDefault(newParams);
+
+        auto& kernel = kd.kernels[0];
+
+        kernel.workGroups.global = { runInfo.gws0, runInfo.gws1, runInfo.gws2 };
+        kernel.workGroups.local = { runInfo.lws0, runInfo.lws1, runInfo.lws2 };
+
+        kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, ROUND_ROBIN);
+        kernel.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(), false, false, newParams.int8_quantization, newParams.output_calibration);
 
         kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE;
 
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.h
index 9d7127822..161140849 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.h
@@ -25,7 +25,7 @@ namespace kernel_selector
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     struct eltwise_params : public base_params
     {
-        eltwise_params() : base_params(KernelType::ELTWISE), eltwiseParams() {}
+        eltwise_params() : base_params(KernelType::ELTWISE) {}
 
         struct InputType
         {
@@ -87,35 +87,17 @@ namespace kernel_selector
             uint32_t tmpId;
         };
 
-        struct DedicatedParams
-        {
-            std::vector<eltwise_params::Node> operations;
-            std::vector<float> coefficients;
-            std::vector<UpdateInputData> updateInputIds;
-            bool layoutBased = false;
-            bool     int8_quantization = false;
-            bool     output_calibration = false;
-            float    output_quantization_factor = 1.0f;
-        };
-
-        DedicatedParams eltwiseParams;
+        std::vector<eltwise_params::Node> operations;
+        std::vector<float> coefficients;
+        std::vector<UpdateInputData> updateInputIds;
+ 
+        bool  layoutBased = false;
+        bool  int8_quantization = false;
+        bool  output_calibration = false;
+        float output_quantization_factor = 1.0f;
+        
         MultiDataTensor output_calibration_factors;
-
-        virtual ParamsKey GetParamsKey() const
-        {
-            ParamsKey k = base_params::GetParamsKey();
-            if (eltwiseParams.int8_quantization)
-            {
-                k.EnableInt8Quantization();
-            }
-
-            if (eltwiseParams.output_calibration)
-            {
-                k.EnableOutputCalibration();
-            }
-
-            return k;
-        }
+        virtual ParamsKey GetParamsKey() const;
     };
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -141,6 +123,7 @@ namespace kernel_selector
     protected:
         virtual bool Validate(const Params& p, const optional_params& o) const override;
         virtual JitConstants GetJitConstants(const eltwise_params& params) const;
+        virtual DispatchData SetDefault(const eltwise_params& params) const;
         KernelsData GetCommonKernelsData(const Params& params, const optional_params& options) const;
     };
 }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_bs_yx_bsv4_fsv32.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_bs_yx_bsv4_fsv32.cpp
new file mode 100644
index 000000000..571a013ce
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_bs_yx_bsv4_fsv32.cpp
@@ -0,0 +1,222 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "eltwise_kernel_fs_bs_yx_bsv4_fsv32.h"
+#include "kernel_selector_utils.h" 
+
+namespace kernel_selector {
+
+    ParamsKey EltwiseKernel_fs_bs_yx_bsv4_fsv32::GetSupportedKey() const
+    {
+        ParamsKey k;
+        k.EnableInputDataType(Datatype::INT8);
+        k.EnableOutputDataType(Datatype::INT8);
+        k.EnableInputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
+        k.EnableOutputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
+        k.EnableTensorOffset();
+        k.EnableTensorPitches();
+        k.EnableBatching();
+        k.EnableInt8Quantization();
+        k.EnableOutputCalibration();
+        return k;
+    }
+
+    EltwiseKernelBase::DispatchData EltwiseKernel_fs_bs_yx_bsv4_fsv32::SetDefault(const eltwise_params& params) const
+    {
+        DispatchData kd;
+
+        kd.gws0 = params.output.X().v;
+        kd.gws1 = params.output.Y().v;
+        // we process 4 batches and 4 features per workitem
+        kd.gws2 = (params.output.Batch().v / 4) * (params.output.Feature().v / 4);
+        kd.lws0 = 1;
+        kd.lws1 = 1;
+        kd.lws2 = 8;
+
+        return kd;
+    }
+
+    JitConstants EltwiseKernel_fs_bs_yx_bsv4_fsv32::GetJitConstants(const eltwise_params& params) const
+    {
+        JitConstants jit = MakeBaseParamsJitConstants(params);
+
+        const size_t in_x_pitch = 32 * 4;
+        const size_t in_y_pitch = 32 * 4 * params.inputs[0].X().LogicalDimPadded();
+        const size_t in_b_block_pitch = in_y_pitch * params.inputs[0].Y().LogicalDimPadded();
+        const size_t in_f_block_pitch = in_b_block_pitch * ((params.inputs[0].Batch().v + 3) / 4);
+        const size_t in_offset = in_x_pitch * params.inputs[0].X().pad.before + in_y_pitch * params.inputs[0].Y().pad.before;
+
+        jit.AddConstant(MakeJitConstant("IN_X_PITCH", in_x_pitch));
+        jit.AddConstant(MakeJitConstant("IN_Y_PITCH", in_y_pitch));
+        jit.AddConstant(MakeJitConstant("IN_B_BLOCK_PITCH", in_b_block_pitch));
+        jit.AddConstant(MakeJitConstant("IN_F_BLOCK_PITCH", in_f_block_pitch));
+        jit.AddConstant(MakeJitConstant("IN_OFFSET", in_offset));
+
+        ///////////////
+        jit.AddConstants({
+            MakeJitConstant("ELTWISE_LAYOUT_BASED", params.layoutBased),
+            MakeJitConstant("QUANTIZATION_TERM",    params.int8_quantization),
+            });
+
+        if (params.int8_quantization)
+        {
+            if (params.output_calibration)
+            {
+                jit.AddConstant(MakeJitConstant("CALIBRATION_TERM", params.output_calibration));
+                jit.AddConstant(MakeJitConstant("O_QF", params.output_calibration_factors[0]));
+
+            }
+            else
+                jit.AddConstants({ MakeJitConstant("O_QF",       params.output_quantization_factor) });
+        }
+
+        std::string inputs_decls;
+        auto& updateInputs = params.updateInputIds;
+
+        for (size_t i = 0; i < params.inputs.size(); i++)
+        {
+            //const should be added only to inputs which will not be updated
+            std::string const_str = "const";
+            for (size_t update_input_idx = 0; update_input_idx < updateInputs.size(); update_input_idx++)
+            {
+                if (updateInputs[update_input_idx].inputId == i)
+                {
+                    const_str = "";
+                    break;
+                }
+            }
+
+            inputs_decls += const_str + " __global " + toCLType(params.inputs[i].GetDType()) + "* input" + std::to_string(i) + ", ";
+        }
+
+        jit.AddConstant(MakeJitConstant("INPUTS_DECLS", inputs_decls));
+        jit.AddConstant(MakeJitConstant("ELTWISE_NO_PITCH_SAME_DIMS", CheckInputsOutputNoPitchSameDims(params)));
+
+        std::string do_eltwise;
+
+        auto& operations = params.operations;
+        auto& coefficients = params.coefficients;
+
+        for (size_t op_num = 0; op_num < operations.size(); op_num++)
+        {
+            const std::string op_num_str = std::to_string(op_num);
+            const auto& ew = operations[op_num];
+
+            for (size_t input_idx = 0; input_idx < ew.inputs.size(); input_idx++)
+            {
+                const auto& input = ew.inputs[input_idx];
+                const std::string name = "INPUT_" + op_num_str + "_" + std::to_string(input_idx);
+                switch (input.mode)
+                {
+                case EltwiseInputMode::SCALAR:
+                    jit.AddConstant(MakeJitConstant(name, input.scalar));
+                    break;
+                case EltwiseInputMode::INPUT_BUFFER:
+                    jit.AddConstant(MakeJitConstant(name, "GET_INPUT(input" + std::to_string(input.index) + ", INPUT" + std::to_string(input.index) + ")"));
+                    break;
+                case EltwiseInputMode::OUTPUT_BUFFER:
+                    jit.AddConstant(MakeJitConstant(name, "output[GET_INDEX(OUTPUT, )]"));
+                    break;
+                case EltwiseInputMode::UNORDERED_ACCESS_INPUT_BUFFER:
+                    jit.AddConstant(MakeJitConstant(name, "input" + std::to_string(input.index) + "[(size_t)tmp" + std::to_string(input.tmpIndex) + "]"));
+                    break;
+                case EltwiseInputMode::INTERMEDIATE_RESULTS_INDEX:
+                    jit.AddConstant(MakeJitConstant(name, "tmp" + std::to_string(input.tmpIndex)));
+                    break;
+                default:
+                    break;
+                }
+            }
+            std::string input0_str, input1_str, cast_type, op;
+
+            if (params.int8_quantization)
+            {
+                cast_type = "(int16)";
+                op = "const int16 tmp" + op_num_str + " = ";
+            }
+            else
+            {
+                cast_type = "(UNIT_TYPE)";
+                op = "const UNIT_TYPE tmp" + op_num_str + " = ";
+            }
+
+            input0_str = cast_type + "INPUT_" + op_num_str + "_0";
+            input1_str = cast_type + "INPUT_" + op_num_str + "_1";
+
+            if (ew.mode == EltwiseMode::ADD)
+            {
+                std::vector<std::string> coeff_strings(ew.inputs.size(), "");
+                for (size_t input_idx = 0; input_idx < ew.inputs.size(); input_idx++)
+                {
+                    const auto& input = ew.inputs[input_idx];
+                    if (input.mode == EltwiseInputMode::INPUT_BUFFER && input.index < coefficients.size())
+                    {
+                        const float c = coefficients[input.index];
+                        if (c != 1.0f)
+                            coeff_strings[input_idx] = cast_type + "(" + std::to_string(c) + ")*";
+                    }
+                }
+
+                input0_str = coeff_strings[0] + input0_str;
+                input1_str = coeff_strings[1] + input1_str;
+            }
+
+
+            switch (ew.mode)
+            {
+            case EltwiseMode::ADD:      op += input0_str + " + " + input1_str; break;
+            case EltwiseMode::SUB:      op += input0_str + " - " + input1_str; break;
+            case EltwiseMode::MUL:      op += input0_str + " * " + input1_str; break;
+            case EltwiseMode::DIV:      op += input0_str + " / " + input1_str; break;
+            case EltwiseMode::MODULU:   op += cast_type + "fmod(" + input0_str + ", " + input1_str + ")"; break;
+            case EltwiseMode::MIN:      op += cast_type + "fmin(" + input0_str + ", " + input1_str + ")"; break;
+            case EltwiseMode::MAX:      op += cast_type + "fmax(" + input0_str + ", " + input1_str + ")"; break;
+            case EltwiseMode::POW:      op += cast_type + "pow(" + input0_str + ", " + input1_str + ")"; break;
+            case EltwiseMode::SQRT:     op += cast_type + "sqrt(" + input0_str + ")"; break;
+            case EltwiseMode::RSQRT:    op += cast_type + "1/sqrt(" + input0_str + ")"; break;
+            case EltwiseMode::ASSIGN:   op += input0_str; break;
+            default:
+                break;
+            }
+
+            std::string opname = "OPERATION" + op_num_str;
+            jit.AddConstant(MakeJitConstant(opname, op));
+            do_eltwise += "\\\n\t" + opname + ";";
+        }
+
+        for (size_t update_input_idx = 0; update_input_idx < updateInputs.size(); update_input_idx++)
+            do_eltwise += "\\\n\tinput" + std::to_string(updateInputs[update_input_idx].inputId) +
+            "[GET_INDEX(INPUT, " + std::to_string(updateInputs[update_input_idx].inputId) +
+            ")] = tmp" + std::to_string(updateInputs[update_input_idx].tmpId) + ";";
+
+        do_eltwise += "\\\n\tres = tmp" + std::to_string(operations.size() - 1) + ";";
+
+        jit.AddConstant(MakeJitConstant("DO_ELTWISE", do_eltwise));
+
+        if (params.layoutBased || params.int8_quantization)
+        {
+            jit.Merge(GetTensorFriendlyWorkGroupsJit(params.inputs[0]));
+        }
+
+        ///////////////
+        return jit;
+    }
+
+    KernelsData EltwiseKernel_fs_bs_yx_bsv4_fsv32::GetKernelsData(const Params& params, const optional_params& options) const
+    {
+        return GetCommonKernelsData(params, options);
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_bs_yx_bsv4_fsv32.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_bs_yx_bsv4_fsv32.h
new file mode 100644
index 000000000..b1fb3e950
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_bs_yx_bsv4_fsv32.h
@@ -0,0 +1,35 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "eltwise_kernel_base.h"
+
+namespace kernel_selector
+{
+    class EltwiseKernel_fs_bs_yx_bsv4_fsv32 : public EltwiseKernelBase
+    {
+    public:
+        EltwiseKernel_fs_bs_yx_bsv4_fsv32() : EltwiseKernelBase("eltwise_fs_bs_yx_bsv4_fsv32") {}
+        virtual ~EltwiseKernel_fs_bs_yx_bsv4_fsv32() {}
+
+        virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+        virtual ParamsKey GetSupportedKey() const override;
+    protected:
+        JitConstants GetJitConstants(const eltwise_params& params) const override;
+        virtual DispatchData SetDefault(const eltwise_params& params) const override;
+    };
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.cpp
index 3840f463e..3a7776575 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.cpp
@@ -22,8 +22,16 @@ namespace kernel_selector {
     ParamsKey EltwiseKernelRef::GetSupportedKey() const
     {
         ParamsKey k;
-        k.EnableAllInputDataType();
-        k.EnableAllOutputDataType();
+        k.EnableInputDataType(Datatype::F16);
+        k.EnableInputDataType(Datatype::F32);
+        k.EnableInputDataType(Datatype::INT8);
+        k.EnableInputDataType(Datatype::INT32);
+        k.EnableInputDataType(Datatype::INT64);
+        k.EnableOutputDataType(Datatype::F16);
+        k.EnableOutputDataType(Datatype::F32);
+        k.EnableOutputDataType(Datatype::INT8);
+        k.EnableOutputDataType(Datatype::INT32);
+        k.EnableOutputDataType(Datatype::INT64);
         k.EnableDifferentTypes();
         k.EnableAllInputLayout();
         k.EnableAllOutputLayout();
@@ -35,6 +43,25 @@ namespace kernel_selector {
         return k;
     }
 
+    bool EltwiseKernelRef::Validate(const Params& p, const optional_params& o) const
+    {
+        if (!EltwiseKernelBase::Validate(p, o))
+        {
+            return false;
+        }
+
+        const eltwise_params& params = static_cast<const eltwise_params&>(p);
+        for (size_t i = 0; i < params.inputs.size(); i++)
+        {
+            if (params.inputs[i].GetLayout() == DataLayout::fs_bs_yx_bsv4_fsv32)
+                return false;
+        }
+        if (params.output.GetLayout() == DataLayout::fs_bs_yx_bsv4_fsv32)
+            return false;
+
+        return true;
+    }
+
     KernelsData EltwiseKernelRef::GetKernelsData(const Params& params, const optional_params& options) const
     {
         return GetCommonKernelsData(params, options);
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.h
index 2249dc8c9..c2ccf054d 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.h
@@ -28,5 +28,8 @@ namespace kernel_selector
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
         virtual ParamsKey GetSupportedKey() const override;
+    protected:
+        bool Validate(const Params& p, const optional_params& o) const override;
+
     };
 }
 \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_selector.cpp
index d71deddfb..cf7565216 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_selector.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_selector.cpp
@@ -17,13 +17,15 @@
 #include "eltwise_kernel_selector.h"
 #include "eltwise_kernel_ref.h"
 #include "eltwise_kernel_vload8.h"
- 
+#include "eltwise_kernel_fs_bs_yx_bsv4_fsv32.h"
+
 namespace kernel_selector 
 {
     eltwise_kernel_selector::eltwise_kernel_selector()
     {
         Attach<EltwiseKernelRef>();
         Attach<EltwiseKernel_vload8>();
+        Attach<EltwiseKernel_fs_bs_yx_bsv4_fsv32>();
     }
 
     KernelsData eltwise_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_vload8.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_vload8.cpp
index 934bc44cd..5ceb75084 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_vload8.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_vload8.cpp
@@ -45,6 +45,15 @@ namespace kernel_selector {
         }
 
         const auto& ewParams = static_cast<const eltwise_params&>(params);
+
+        for (size_t i = 0; i < ewParams.inputs.size(); i++)
+        {
+            if (ewParams.inputs[i].GetLayout() == DataLayout::fs_bs_yx_bsv4_fsv32)
+                return false;
+        }
+        if (ewParams.output.GetLayout() == DataLayout::fs_bs_yx_bsv4_fsv32)
+            return false;
+
         const auto& output = ewParams.output;
         const auto count = output.PhysicalSize();
 
@@ -62,16 +71,16 @@ namespace kernel_selector {
 
         //TODO: add support to this implementation when user requests input values updates
         bool bCheckUpdateInput = true;
-        if (!ewParams.eltwiseParams.updateInputIds.empty())
+        if (!ewParams.updateInputIds.empty())
             bCheckUpdateInput = false;
 
         //TODO: add support for reading from output buffer and using its values in computation
         bool bCheckUseOutput = true;
-        for (size_t op = 0; op < ewParams.eltwiseParams.operations.size(); op++)
+        for (size_t op = 0; op < ewParams.operations.size(); op++)
         {
-            for (size_t input_idx = 0; input_idx < ewParams.eltwiseParams.operations[op].inputs.size(); input_idx++)
+            for (size_t input_idx = 0; input_idx < ewParams.operations[op].inputs.size(); input_idx++)
             {
-                if (ewParams.eltwiseParams.operations[op].inputs[input_idx].mode == EltwiseInputMode::OUTPUT_BUFFER)
+                if (ewParams.operations[op].inputs[input_idx].mode == EltwiseInputMode::OUTPUT_BUFFER)
                 {
                     bCheckUseOutput = false;
                     break;
@@ -114,7 +123,7 @@ namespace kernel_selector {
         auto& kernel = kd.kernels[0];
         kernel.workGroups.global = { std::max(newParams.inputs[0].LogicalSize()/8, (size_t)1), 1, 1 };
         kernel.workGroups.local = GetOptimalLocalWorkGroupSizes(kernel.workGroups.global);
-        kernel.kernelString = GetKernelString(kernelName, jit, entry_point, ROUND_ROBIN);
+        kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, ROUND_ROBIN);
         kernel.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(), false, false);
 
         kd.estimatedTime = FORCE_PRIORITY_8;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embed/embed_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embed/embed_kernel_ref.cpp
index d4c9b126c..f126daa94 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embed/embed_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embed/embed_kernel_ref.cpp
@@ -103,7 +103,7 @@ namespace kernel_selector
 
 		auto& kernel = kd.kernels[0];
 
-		FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point, ROUND_ROBIN, true, !newParams.bias.empty());
+		FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, ROUND_ROBIN, true, !newParams.bias.empty());
 
 		kd.estimatedTime = runInfo.effiency;
 
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_base.cpp
index 6702ebc69..20e6e8dca 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_base.cpp
@@ -49,7 +49,7 @@ namespace kernel_selector
 
     std::unique_ptr<FullyConnectedKernelBase::DispatchData> FullyConnectedKernelBase::SetDefault(const fully_connected_params& params) const
     {
-        std::unique_ptr<DispatchData> dispatchData = std::make_unique<DispatchData>();
+        std::unique_ptr<DispatchData> dispatchData = std::unique_ptr<DispatchData>(new DispatchData());
         dispatchData->fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
 
         // Determine global work sizes.
@@ -122,10 +122,10 @@ namespace kernel_selector
         std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
 
         auto& kernel = kd.kernels[0];
-        FillCLKernelData(kernel, *runInfo.get(), kernelName, jit, entry_point, ROUND_ROBIN, true, !orgParams.bias.empty(), 1, newParams.int8_quantization, newParams.output_calibration);
+        FillCLKernelData(kernel, *runInfo.get(), params.engineInfo, kernelName, jit, entry_point, ROUND_ROBIN, true, !orgParams.bias.empty(), 1, newParams.int8_quantization, newParams.output_calibration);
 
         kd.estimatedTime = estimated_time;
         kd.autoTuneIndex = -1;
         return{ kd };
     }
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_b1.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_b1.cpp
index 0f836133b..b98b528a8 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_b1.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_b1.cpp
@@ -60,7 +60,7 @@ namespace kernel_selector
 
     std::unique_ptr<FullyConnected_bs_f_bsv16_b1::FullyConnectedKernelBase::DispatchData> FullyConnected_bs_f_bsv16_b1::SetDefault(const fully_connected_params& arg) const
     {
-        auto run_info = std::make_unique<DispatchData>(*FullyConnectedKernelBase::SetDefault(arg).get());
+        auto run_info = std::unique_ptr<DispatchData>(new DispatchData(*FullyConnectedKernelBase::SetDefault(arg)));
 
         // Properties of chunk and unit.
         const     char*    chunk_type           = "uint";
@@ -100,4 +100,4 @@ namespace kernel_selector
     {
         return GetCommonKernelsData(params, optParams, DataLayout::bf, {WeightsLayout::os_i_osv16}, FORCE_PRIORITY_5);
     }
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_block.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_block.cpp
index 1ceb3ebc9..b32c8a54e 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_block.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_block.cpp
@@ -37,7 +37,7 @@ namespace kernel_selector
 
     std::unique_ptr<FullyConnected_fb_io_block::FullyConnectedKernelBase::DispatchData> FullyConnected_fb_io_block::SetDefault(const fully_connected_params& arg) const
     {
-        auto kd = std::make_unique<DispatchData>(*FullyConnectedKernelBase::SetDefault(arg).get());
+        auto kd = std::unique_ptr<DispatchData>(new DispatchData(*FullyConnectedKernelBase::SetDefault(arg)));
         const auto& output = arg.output;
         
         auto batch_size = output.Batch().v;
@@ -146,4 +146,4 @@ namespace kernel_selector
         // return GetCommonKernelsData(params, optParams, DataLayout::fb, WeightsLayout::io, estimated_time);
         return GetCommonKernelsData(params, optParams, DataLayout::yxfb, { WeightsLayout::yxio }, estimated_time);
     }
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad_batched.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad_batched.cpp
new file mode 100644
index 000000000..46e4dea8d
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad_batched.cpp
@@ -0,0 +1,117 @@
+/*
+// Copyright (c) 2016 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "fully_connected_kernel_mmad_batched.h"
+#include "kernel_selector_utils.h"
+ 
+namespace kernel_selector 
+{
+    ParamsKey FullyConnected_mmad_batched::GetSupportedKey() const
+    {
+        ParamsKey k;
+        k.EnableInputDataType(Datatype::INT8);
+        k.EnableOutputDataType(Datatype::INT8);
+        k.EnableInputWeightsType(WeightsType::INT8);
+        k.EnableInputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
+        k.EnableOutputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
+        k.EnableOutputLayout(DataLayout::bf);
+        k.EnableBiasPerOutput();
+        k.EnableBiasPerFeature();
+        k.EnableNonBiasTerm();
+        k.EnableTensorOffset();
+        k.EnableTensorPitches();
+        k.EnableBatching();
+        k.EnableInt8Quantization();
+        k.EnableOutputCalibration();
+        return k;
+    }
+
+    bool FullyConnected_mmad_batched::Validate(const Params& p, const optional_params& o) const
+    {
+        if (!FullyConnectedKernelBase::Validate(p, o))
+        {
+            return false;
+        }
+
+        const auto& params = static_cast<const fully_connected_params&>(p);
+
+        // we do not support padded input
+        if (params.inputs[0].X().pad.Total() != 0 || params.inputs[0].Y().pad.Total() != 0)
+            return false;
+
+        size_t batch = params.inputs[0].Batch().v;
+        // batch must be a multiple of 8
+        if (batch % 8 != 0)
+        {
+            return false;
+        }
+
+        return true;
+    }
+
+    JitConstants FullyConnected_mmad_batched::GetJitConstants(const fully_connected_params& params, const DispatchData& runInfo) const
+    {
+        auto jit = Parent::GetJitConstants(params, runInfo);
+
+        jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", runInfo.lws1));
+
+        // pitch for special block format used in this kernel
+        const size_t ifm_32_aligned = Align(params.weights.IFM().v, 32);
+        const size_t filter_ofm_block_pitch = (ifm_32_aligned / 32) * params.weights.X().v * params.weights.Y().v * 4 * 8 * 8;
+        jit.AddConstant(MakeJitConstant("FILTER_OFM_BLOCK_PITCH", filter_ofm_block_pitch));
+
+        const size_t in_x_pitch = 32 * 4;
+        const size_t in_y_pitch = 32 * 4 * params.inputs[0].X().LogicalDimPadded();
+        const size_t in_b_block_pitch = in_y_pitch * params.inputs[0].Y().LogicalDimPadded();
+        const size_t in_f_block_pitch = in_b_block_pitch * ((params.inputs[0].Batch().v + 3) / 4);
+        const size_t in_offset = in_x_pitch * params.inputs[0].X().pad.before + in_y_pitch * params.inputs[0].Y().pad.before;
+
+        jit.AddConstant(MakeJitConstant("IN_X_PITCH", in_x_pitch));
+        jit.AddConstant(MakeJitConstant("IN_Y_PITCH", in_y_pitch));
+        jit.AddConstant(MakeJitConstant("IN_B_BLOCK_PITCH", in_b_block_pitch));
+        jit.AddConstant(MakeJitConstant("IN_F_BLOCK_PITCH", in_f_block_pitch));
+        jit.AddConstant(MakeJitConstant("IN_OFFSET", in_offset));
+
+        return jit;
+    }
+
+    std::unique_ptr<FullyConnected_mmad_batched::Parent::DispatchData> FullyConnected_mmad_batched::SetDefault(const fully_connected_params& params) const
+    {
+        auto runInfo = Parent::SetDefault(params);
+        
+        constexpr size_t sub_group_size = 8;
+
+        const auto of_maps = params.output.Feature().v;
+        const size_t of_threads_per_batch = RoundUp(of_maps, sub_group_size);
+
+        runInfo->gws0 = params.output.Batch().v / 8; // we process 8 batches in a single WG
+        runInfo->gws1 = of_threads_per_batch;
+        runInfo->gws2 = 1;
+
+        runInfo->lws0 = 1;
+        runInfo->lws1 = sub_group_size;
+        runInfo->lws2 = 1;
+
+        runInfo->effiency = FORCE_PRIORITY_1;
+        return std::move(runInfo);
+    }
+
+    KernelsData FullyConnected_mmad_batched::GetKernelsData(const Params& params, const optional_params& options) const
+    {
+        return GetCommonKernelsData(params, options, DataLayout::fs_bs_yx_bsv4_fsv32,
+        { WeightsLayout::os_is_yx_isa8_osv8_isv4 }, FORCE_PRIORITY_1);
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad_batched.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad_batched.h
new file mode 100644
index 000000000..61af89f19
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad_batched.h
@@ -0,0 +1,38 @@
+/*
+// Copyright (c) 2016 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "fully_connected_kernel_base.h"
+ 
+namespace kernel_selector {
+    
+    class FullyConnected_mmad_batched : public FullyConnectedKernelBase
+    {
+    public:
+        using Parent = FullyConnectedKernelBase;
+
+        FullyConnected_mmad_batched() : Parent("fully_connected_gpu_mmad_batched") {}
+
+        KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+        ParamsKey GetSupportedKey() const override;
+        
+    protected:
+        bool Validate(const Params& p, const optional_params& o) const override;
+        JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& kd) const override;
+        std::unique_ptr<DispatchData> SetDefault(const fully_connected_params& params) const override;
+    };
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_selector.cpp
index d7c1a1a85..529e1ca33 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_selector.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_selector.cpp
@@ -30,6 +30,7 @@
 #include "fully_connected_kernel_bf_io_input_spatial.h"
 #include "fully_connected_kernel_image_tutorial.h"
 #include "fully_connected_kernel_MMAD.h"
+#include "fully_connected_kernel_mmad_batched.h"
 
 namespace kernel_selector {
 
@@ -49,6 +50,7 @@ namespace kernel_selector {
         Attach<FullyConnected_fb_io_b8_f8>();
         Attach<FullyConnected_bf_io_input_spatial>();
         Attach<FullyConnectedKernelMMAD>();
+        Attach<FullyConnected_mmad_batched>();
     }
 
     KernelsData fully_connected_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_input/fully_connected_grad_input_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_input/fully_connected_grad_input_kernel_base.cpp
index 94d0b4751..e40848af7 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_input/fully_connected_grad_input_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_input/fully_connected_grad_input_kernel_base.cpp
@@ -80,7 +80,7 @@ namespace kernel_selector
         auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
 
         auto& kernel = kd.kernels[0];
-        FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point, ROUND_ROBIN, true, !orgParams.bias.empty());
+        FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, ROUND_ROBIN, true, !orgParams.bias.empty());
         kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 1 });
 
         kd.estimatedTime = runInfo.effiency;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_weights/fully_connected_grad_weights_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_weights/fully_connected_grad_weights_kernel_base.cpp
index 55d8bdf88..67328ac99 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_weights/fully_connected_grad_weights_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_weights/fully_connected_grad_weights_kernel_base.cpp
@@ -82,7 +82,7 @@ namespace kernel_selector
         auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
 
         auto& kernel = kd.kernels[0];
-        FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point, ROUND_ROBIN, true, !orgParams.bias.empty());
+        FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, ROUND_ROBIN, true, !orgParams.bias.empty());
         if (orgParams.use_momentum)
         {
             kernel.arguments.push_back({ ArgumentDescriptor::Types::PREV_WEIGHTS_GRADIENT, 0 });
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_weights/fully_connected_grad_weights_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_weights/fully_connected_grad_weights_kernel_ref.cpp
index 9ec0f8e17..bb8380457 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_weights/fully_connected_grad_weights_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_weights/fully_connected_grad_weights_kernel_ref.cpp
@@ -24,7 +24,6 @@ namespace kernel_selector
         ParamsKey k;
         k.EnableInputDataType(Datatype::F16);
         k.EnableInputDataType(Datatype::F32);
-        k.EnableInputWeightsType(WeightsType::F16);
         k.EnableInputWeightsType(WeightsType::F32);
         k.EnableOutputDataType(Datatype::F16);
         k.EnableOutputDataType(Datatype::F32);
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_base.cpp
new file mode 100644
index 000000000..12af8a1c5
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_base.cpp
@@ -0,0 +1,98 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "gemm_kernel_base.h"
+
+#include "kernel_selector_utils.h"
+
+
+namespace kernel_selector
+{
+    JitConstants GemmKernelBase::GetJitConstants(const gemm_params& params) const
+    {
+        JitConstants jit = MakeBaseParamsJitConstants(params);
+
+        jit.AddConstants({
+            MakeJitConstant("X1", params.inputs[0].X().v),
+            MakeJitConstant("Y1", params.inputs[0].Y().v),
+            MakeJitConstant("X2", params.inputs[1].X().v),
+            MakeJitConstant("Y2", params.inputs[1].Y().v),
+            MakeJitConstant("ALPHA", params.alpha),
+            MakeJitConstant("BETA", params.beta),
+            MakeJitConstant("TRANSPOSE_INPUT1", params.transpose_input1),
+            MakeJitConstant("TRANSPOSE_INPUT2", params.transpose_input2),
+            });
+
+        if (params.inputs.size() > 2)
+        {
+            jit.AddConstants({MakeJitConstant("OUT_BIAS_TERM", true),});
+        }
+        else
+            jit.AddConstants({ MakeJitConstant("OUT_BIAS_TERM", false)});
+
+        return jit;
+    }
+
+    GemmKernelBase::DispatchData GemmKernelBase::SetDefault(const gemm_params& params) const
+    {
+        const auto& output = params.output;
+
+        DispatchData kd;
+        
+        kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
+        std::vector<size_t> global{ params.inputs[0].Y().v, params.inputs[1].X().v, output.Batch().v };
+
+        if (params.transpose_input1 && params.transpose_input2)
+            global ={ params.inputs[0].X().v, params.inputs[1].Y().v, output.Batch().v };
+        else if(params.transpose_input1)
+           global = { params.inputs[0].X().v, params.inputs[1].X().v, output.Batch().v };
+        else if (params.transpose_input2)
+            global = { params.inputs[0].Y().v, params.inputs[1].Y().v, output.Batch().v };
+
+        const auto& local = GetOptimalLocalWorkGroupSizes(global);
+
+        kd.gws0 = global[0];
+        kd.gws1 = global[1];
+        kd.gws2 = global[2];
+
+        kd.lws0 = local[0];
+        kd.lws1 = local[1];
+        kd.lws2 = local[2];
+
+        return kd;
+    }
+
+    KernelsData GemmKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options, float estimated_time) const
+    {
+        assert(params.GetType() == KernelType::GEMM);
+
+        const auto& prim_params = static_cast<const gemm_params&>(params);
+
+        auto run_info = SetDefault(prim_params);
+        KernelData k_data = KernelData::Default<gemm_params>(params);
+
+        auto cldnn_jit = GetJitConstants(prim_params);
+        auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, options);
+        auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
+
+        auto& kernel = k_data.kernels[0];
+        FillCLKernelData(kernel, run_info, params.engineInfo, kernelName, jit, entry_point, ROUND_ROBIN, false, false, (uint32_t)prim_params.inputs.size());
+
+        k_data.estimatedTime = estimated_time;
+
+        return { k_data };
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_base.h
new file mode 100644
index 000000000..643a0bec7
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_base.h
@@ -0,0 +1,69 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "common_kernel_base.h"
+#include "kernel_selector_params.h"
+
+
+namespace kernel_selector
+{
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // gemm_params
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    struct gemm_params : public base_params
+    {
+        gemm_params() :
+			base_params(KernelType::GEMM),
+			alpha(1.0f),
+			beta(0.0f),
+			transpose_input1(false),
+			transpose_input2(false)
+		{}
+
+        float alpha;
+        float beta;
+        bool transpose_input1;
+        bool transpose_input2;
+
+    };
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // gemm_optional_params
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    struct gemm_optional_params : optional_params
+    {
+        gemm_optional_params()
+            : optional_params(KernelType::GEMM)
+        {
+        }
+    };
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // BorderKernelBase
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    class GemmKernelBase : public common_kernel_base
+    {
+    public:
+        using common_kernel_base::common_kernel_base;
+
+        using DispatchData = CommonDispatchData;
+
+    protected:
+        JitConstants GetJitConstants(const gemm_params& params) const;
+        DispatchData SetDefault(const gemm_params& params) const;
+        KernelsData GetCommonKernelsData(const Params& params, const optional_params&, float estimated_time) const;
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_ref.cpp
new file mode 100644
index 000000000..585d9d90d
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_ref.cpp
@@ -0,0 +1,41 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "gemm_kernel_ref.h"
+
+namespace kernel_selector
+{
+    ParamsKey GemmKernelRef::GetSupportedKey() const
+    {
+        ParamsKey k;
+
+        k.EnableInputDataType(Datatype::F16);
+        k.EnableInputDataType(Datatype::F32);
+        k.EnableOutputDataType(Datatype::F32);
+        k.EnableOutputDataType(Datatype::F16);
+        k.EnableInputLayout(DataLayout::bfyx);
+        k.EnableOutputLayout(DataLayout::bfyx);
+
+        k.EnableBatching();
+
+        return k;
+    }
+
+    KernelsData GemmKernelRef::GetKernelsData(const Params& params, const optional_params& options) const
+    {
+        return GetCommonKernelsData(params, options, FORCE_PRIORITY_9);
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_ref.h
new file mode 100644
index 000000000..89727597d
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_ref.h
@@ -0,0 +1,30 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "gemm_kernel_base.h"
+
+
+namespace kernel_selector
+{
+    class GemmKernelRef : public GemmKernelBase
+    {
+    public:
+        GemmKernelRef() : GemmKernelBase("gemm_ref") {}
+
+        KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+        ParamsKey GetSupportedKey() const override;
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_selector.cpp
new file mode 100644
index 000000000..a31f3cb9f
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_selector.cpp
@@ -0,0 +1,31 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "gemm_kernel_selector.h"
+#include "gemm_kernel_ref.h"
+
+namespace kernel_selector
+{
+    gemm_kernel_selector::gemm_kernel_selector()
+    {
+        Attach<GemmKernelRef>();
+    }
+
+    KernelsData gemm_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const
+    {
+        return GetNaiveBestKernel(params, options, KernelType::GEMM);
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_selector.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_selector.h
new file mode 100644
index 000000000..7a7896afd
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_selector.h
@@ -0,0 +1,35 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "kernel_selector.h"
+
+namespace kernel_selector
+{
+    class gemm_kernel_selector : public kernel_selector_base
+    {
+    public:
+        static gemm_kernel_selector &Instance() {
+            static gemm_kernel_selector instance;
+            return instance;
+        }
+
+        gemm_kernel_selector();
+
+        KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
+    };
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_base.cpp
new file mode 100644
index 000000000..c0dc0851c
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_base.cpp
@@ -0,0 +1,86 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "index_select_kernel_base.h"
+
+#include "kernel_selector_utils.h"
+
+
+namespace kernel_selector 
+{
+    JitConstants IndexSelectKernelBase::GetJitConstants(const index_select_params& params)
+    {
+        JitConstants jit = MakeBaseParamsJitConstants(params);
+
+        jit.AddConstant(MakeJitConstant(toString(params.axis), ""));
+
+        return jit;
+    }
+
+    IndexSelectKernelBase::DispatchData IndexSelectKernelBase::SetDefault(const index_select_params& params)
+    {
+        const auto& output = params.output;
+        const auto& indices = params.inputs.at(1);
+        DispatchData kd;
+
+        kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
+
+        std::vector<size_t> global;
+        if (params.axis == IndexSelectAxis::BATCH)
+        {
+            global = { 1, indices.X().v, output.Feature().v };
+        }
+        else if (params.axis == IndexSelectAxis::X || params.axis == IndexSelectAxis::Y)
+        {
+            global = { output.Batch().v, indices.X().v, output.Feature().v };
+        }
+        else if(params.axis == IndexSelectAxis::FEATURE)
+        {
+            global = { output.Batch().v, indices.X().v, output.Y().v };
+        }
+        const auto& local = GetOptimalLocalWorkGroupSizes(global);
+
+        kd.gws0 = global[0];
+        kd.gws1 = global[1];
+        kd.gws2 = global[2];
+
+        kd.lws0 = local[0];
+        kd.lws1 = local[1];
+        kd.lws2 = local[2];
+
+        return kd;
+    }
+
+    KernelsData IndexSelectKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options, float estimated_time) const
+    {
+        assert(params.GetType() == KernelType::INDEX_SELECT);
+
+        const auto& prim_params = static_cast<const index_select_params&>(params); // NOLINT(cppcoreguidelines-pro-type-static-cast-downcast)
+        
+        auto run_info     = SetDefault(prim_params);
+        KernelData k_data = KernelData::Default<index_select_params>(params);
+
+        auto cldnn_jit   = GetJitConstants(prim_params);
+        auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, options);
+        auto jit         = CreateJit(kernelName, cldnn_jit, entry_point);
+
+        auto& kernel = k_data.kernels[0];
+        FillCLKernelData(kernel, run_info, params.engineInfo, kernelName, jit, entry_point, ROUND_ROBIN, false, false, (uint32_t)prim_params.inputs.size());
+
+        k_data.estimatedTime = estimated_time;
+
+        return {k_data};
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_base.h
new file mode 100644
index 000000000..c7abe43bc
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_base.h
@@ -0,0 +1,61 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "common_kernel_base.h"
+#include "kernel_selector_params.h"
+
+
+namespace kernel_selector 
+{
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // index_select_params
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    struct index_select_params : public base_params
+    {
+        index_select_params()
+            : base_params(KernelType::INDEX_SELECT)
+        {}
+
+        IndexSelectAxis axis = IndexSelectAxis::BATCH;
+    };
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // index_select_optional_params
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    struct index_select_optional_params : optional_params
+    {
+        index_select_optional_params()
+            : optional_params(KernelType::INDEX_SELECT)
+        {}
+    };
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // IndexSelectKernelBase
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    class IndexSelectKernelBase : public common_kernel_base
+    {
+    public:
+        using common_kernel_base::common_kernel_base;
+        virtual ~IndexSelectKernelBase() {}
+
+        using DispatchData = CommonDispatchData;
+
+    protected:
+        static JitConstants GetJitConstants(const index_select_params& params);
+        static DispatchData SetDefault(const index_select_params& params);
+        KernelsData GetCommonKernelsData(const Params& params, const optional_params&, float estimated_time) const;
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_ref.cpp
new file mode 100644
index 000000000..b5ab92dc1
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_ref.cpp
@@ -0,0 +1,58 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "index_select_kernel_ref.h"
+
+
+namespace kernel_selector
+{
+    ParamsKey IndexSelectKernelRef::GetSupportedKey() const
+    {
+        ParamsKey k;
+
+        k.EnableInputDataType(Datatype::F16);
+        k.EnableInputDataType(Datatype::F32);
+        k.EnableInputDataType(Datatype::INT8);
+        k.EnableInputDataType(Datatype::UINT8);
+        k.EnableInputDataType(Datatype::INT32);
+
+        k.EnableOutputDataType(Datatype::F32);
+        k.EnableOutputDataType(Datatype::F16);
+        k.EnableOutputDataType(Datatype::INT8);
+        k.EnableOutputDataType(Datatype::UINT8);
+        k.EnableOutputDataType(Datatype::INT32);
+
+        k.EnableInputLayout(DataLayout::bfyx);
+        k.EnableInputLayout(DataLayout::yxfb);
+
+        k.EnableOutputLayout(DataLayout::bfyx);
+        k.EnableOutputLayout(DataLayout::yxfb);
+
+        k.EnableBatching();
+
+        k.EnableIndexSelectAxis(IndexSelectAxis::BATCH);
+        k.EnableIndexSelectAxis(IndexSelectAxis::FEATURE);
+        k.EnableIndexSelectAxis(IndexSelectAxis::Y);
+        k.EnableIndexSelectAxis(IndexSelectAxis::X);
+
+        k.EnableDifferentTypes();
+
+        return k;
+    }
+
+    KernelsData IndexSelectKernelRef::GetKernelsData(const Params& params, const optional_params& options) const
+    {
+        return GetCommonKernelsData(params, options, FORCE_PRIORITY_9);
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_ref.h
new file mode 100644
index 000000000..3dd16198f
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_ref.h
@@ -0,0 +1,30 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "index_select_kernel_base.h"
+
+
+namespace kernel_selector
+{
+    class IndexSelectKernelRef : public IndexSelectKernelBase
+    {
+    public:
+        IndexSelectKernelRef() : IndexSelectKernelBase("index_select_gpu_ref") {}
+
+        KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+        ParamsKey GetSupportedKey() const override;
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_selector.cpp
new file mode 100644
index 000000000..3d1693046
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_selector.cpp
@@ -0,0 +1,30 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "index_select_kernel_selector.h"
+#include "index_select_kernel_ref.h"
+
+namespace kernel_selector 
+{
+    index_select_kernel_selector::index_select_kernel_selector()
+    {
+        Attach<IndexSelectKernelRef>();
+    }
+
+    KernelsData index_select_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const
+    {
+        return GetNaiveBestKernel(params, options, KernelType::INDEX_SELECT);
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_selector.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_selector.h
new file mode 100644
index 000000000..21363f9f5
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_selector.h
@@ -0,0 +1,34 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "kernel_selector.h"
+
+
+namespace kernel_selector 
+{
+    class index_select_kernel_selector : public kernel_selector_base
+    {
+    public:
+        static index_select_kernel_selector &Instance() {
+            static index_select_kernel_selector instance;
+            return instance;
+        }
+
+        index_select_kernel_selector();
+
+        KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lookup_table/lookup_table_kernel_axis.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lookup_table/lookup_table_kernel_axis.cpp
index 8176f002b..af6737941 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lookup_table/lookup_table_kernel_axis.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lookup_table/lookup_table_kernel_axis.cpp
@@ -86,7 +86,7 @@ namespace kernel_selector
         auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
 
         auto& kernel = kd.kernels[0];
-        FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point, "", false, false, 2);
+        FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, "", false, false, 2);
 
         kd.estimatedTime = FORCE_PRIORITY_9;
 
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lookup_table/lookup_table_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lookup_table/lookup_table_kernel_base.cpp
index de31047d1..bb3f20f7f 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lookup_table/lookup_table_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lookup_table/lookup_table_kernel_base.cpp
@@ -81,7 +81,7 @@ namespace kernel_selector
         auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
 
         auto& kernel = kd.kernels[0];
-        FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point, "", false, false, 2);
+        FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, "", false, false, 2);
 
         kd.estimatedTime = estimatedTime;
 
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_base.cpp
index 4907d9992..9165ea692 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_base.cpp
@@ -103,7 +103,7 @@ namespace kernel_selector
         auto jit = CreateJit(kernelName, cldnnJit, entryPoint);
 
         auto& kernel = kd.kernels[0];
-        FillCLKernelData(kernel, runInfo, kernelName, jit, entryPoint);
+        FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entryPoint);
 
         kd.estimatedTime = estimatedTime;
 
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_elt_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_elt_kernel_base.cpp
index a74b21e27..6170abd46 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_elt_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_elt_kernel_base.cpp
@@ -77,7 +77,7 @@ namespace kernel_selector
         auto jit = CreateJit(kernelName, cldnnJit, entryPoint);
 
         kernel.workGroups.global = { out.X().v, out.Batch().v, 1 };
-        kernel.kernelString = GetKernelString(kernelName, jit, entryPoint);
+        kernel.kernelString = GetKernelString(kernelName, jit, entryPoint, params.engineInfo);
         kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 0 });
         kernel.arguments.push_back({ ArgumentDescriptor::Types::OUTPUT, 0 });
         if (orgParams.has_cell) {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_elt_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_elt_kernel_ref.cpp
index 6afb8504c..a068f9ae4 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_elt_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_elt_kernel_ref.cpp
@@ -22,8 +22,10 @@ namespace kernel_selector {
     ParamsKey LSTMEltKernelRef::GetSupportedKey() const
     {
         ParamsKey k;
-        k.EnableAllInputDataType();
-        k.EnableAllOutputDataType();
+        k.EnableInputDataType(Datatype::F16);
+        k.EnableInputDataType(Datatype::F32);
+        k.EnableOutputDataType(Datatype::F16);
+        k.EnableOutputDataType(Datatype::F32);
         k.EnableDifferentTypes();
         k.EnableAllInputLayout();
         k.EnableAllOutputLayout();
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_base.cpp
index 6d2c9bcf0..703008546 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_base.cpp
@@ -35,6 +35,7 @@ namespace kernel_selector
         }
 
         jit.AddConstants({ MakeJitConstant("WEIGHTS", weights)});
+        jit.AddConstants({ MakeJitConstant("DIRECTION", params.direction)});
 
         return jit;
     }
@@ -64,7 +65,7 @@ namespace kernel_selector
         auto jit = CreateJit(kernelName, cldnnJit, entryPoint);
 
         kernel.workGroups.global = { out.X().v, out.Batch().v, 1 };
-        kernel.kernelString = GetKernelString(kernelName, jit, entryPoint);
+        kernel.kernelString = GetKernelString(kernelName, jit, entryPoint, params.engineInfo);
         kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 0 });
         kernel.arguments.push_back({ ArgumentDescriptor::Types::OUTPUT, 0 });
         kernel.arguments.push_back({ ArgumentDescriptor::Types::WEIGHTS, 0 });
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_base.h
index 6fd517586..e766120e0 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_base.h
@@ -34,6 +34,7 @@ namespace kernel_selector
         DataTensor hidden;
         bool hasBias = false;
         bool hasHidden = false;
+        uint32_t direction = 0;
 
         void SetBias(const DataTensor& v) {
             bias = v;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_ref.cpp
index 167afcb5f..6484dd951 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_ref.cpp
@@ -22,8 +22,10 @@ namespace kernel_selector {
     ParamsKey LSTMGemmKernelRef::GetSupportedKey() const
     {
         ParamsKey k;
-        k.EnableAllInputDataType();
-        k.EnableAllOutputDataType();
+        k.EnableInputDataType(Datatype::F16);
+        k.EnableInputDataType(Datatype::F32);
+        k.EnableOutputDataType(Datatype::F16);
+        k.EnableOutputDataType(Datatype::F32);
         k.EnableDifferentTypes();
         k.EnableAllInputLayout();
         k.EnableAllOutputLayout();
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/max_unpooling/max_unpooling_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/max_unpooling/max_unpooling_kernel_base.cpp
index faea3ea6e..d6e036f40 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/max_unpooling/max_unpooling_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/max_unpooling/max_unpooling_kernel_base.cpp
@@ -91,7 +91,7 @@ namespace kernel_selector
         auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
 
         auto& kernel = kd.kernels[0];
-        FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point);
+        FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
         kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 1 });
 
         kd.estimatedTime = estimatedTime;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_base.cpp
index 23de0ff1a..4775a41b7 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_base.cpp
@@ -82,7 +82,7 @@ namespace kernel_selector
         auto jit = CreateJit(finalKernelName, cldnn_jit, entry_point);
 
         auto& kernel = kd.kernels[0];
-        FillCLKernelData(kernel, runInfo, finalKernelName, jit, entry_point);
+        FillCLKernelData(kernel, runInfo, params.engineInfo, finalKernelName, jit, entry_point);
 
         kd.estimatedTime = estimated_time;
 
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/normalize/normalize_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/normalize/normalize_kernel_base.cpp
index a50849125..b4e4c04c7 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/normalize/normalize_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/normalize/normalize_kernel_base.cpp
@@ -82,7 +82,7 @@ namespace kernel_selector
         auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
 
         auto& kernel = kd.kernels[0];
-        FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point);
+        FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
         kernel.arguments.push_back({ ArgumentDescriptor::Types::SCALE_TABLE, 0 });
 
         kd.estimatedTime = estimated_time;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/permute/permute_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/permute/permute_kernel_ref.cpp
index 057c4e655..ca6977952 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/permute/permute_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/permute/permute_kernel_ref.cpp
@@ -24,8 +24,14 @@ namespace kernel_selector
         ParamsKey k;
         k.EnableInputDataType(Datatype::F16);
         k.EnableInputDataType(Datatype::F32);
+        k.EnableInputDataType(Datatype::INT8);
+        k.EnableInputDataType(Datatype::INT32);
+        k.EnableInputDataType(Datatype::INT64);
         k.EnableOutputDataType(Datatype::F16);
         k.EnableOutputDataType(Datatype::F32);
+        k.EnableOutputDataType(Datatype::INT8);
+        k.EnableOutputDataType(Datatype::INT32);
+        k.EnableOutputDataType(Datatype::INT64);
         k.EnableAllInputLayout();
         k.EnableAllOutputLayout();
         k.EnableTensorOffset();
@@ -72,7 +78,7 @@ namespace kernel_selector
 
         kernel.workGroups.global = { gws[0], gws[1], gws[2] * gws[3] };
         kernel.workGroups.local = GetOptimalLocalWorkGroupSizes(kernel.workGroups.global);
-        kernel.kernelString = GetKernelString(kernelName, jit, entry_point, ROUND_ROBIN);
+        kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, ROUND_ROBIN);
         kernel.arguments = GetArgsDesc(1, false, false);
         
         kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_base.cpp
index 13290c42d..9e5a9ad50 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_base.cpp
@@ -129,7 +129,7 @@ namespace kernel_selector
         auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
 
         auto& kernel = kd.kernels[0];
-        FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point);
+        FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
         if(orgParams.poolType == PoolType::MAX_WITH_ARGMAX)
             kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 1 });
 
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_fs_bs_yx_bsv4_fsv32.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_fs_bs_yx_bsv4_fsv32.cpp
new file mode 100644
index 000000000..5157b4d5e
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_fs_bs_yx_bsv4_fsv32.cpp
@@ -0,0 +1,83 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "pooling_kernel_gpu_fs_bs_yx_bsv4_fsv32.h"
+ 
+namespace kernel_selector 
+{
+    ParamsKey PoolingKerneGPU_fs_bs_yx_bsv4_fsv32::GetSupportedKey() const
+    {
+        ParamsKey k;
+        k.EnableInputDataType(Datatype::INT8);
+        k.EnableOutputDataType(Datatype::INT8);
+        k.EnableInputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
+        k.EnableOutputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
+        k.EnableTensorOffset();
+        k.EnableTensorPitches();
+        k.EnableBatching();
+        k.EnablePoolType(PoolType::MAX);
+        k.EnablePoolType(PoolType::AVG);
+        k.EnablePoolRemainder(PoolRemainder::FLOOR);
+        k.EnablePoolRemainder(PoolRemainder::CEIL);
+        k.EnablePoolKernelDividerMode(KernelDividerMode::FIXED);
+        k.EnablePoolKernelDividerMode(KernelDividerMode::DYNAMIC);
+        k.EnablePoolKernelDividerMode(KernelDividerMode::DYNAMIC_WITH_PADDING);
+        k.EnableDifferentTypes();
+        return k;
+    }
+
+    PoolingKernelBase::DispatchData PoolingKerneGPU_fs_bs_yx_bsv4_fsv32::SetDefault(const pooling_params& params) const
+    {
+        constexpr int simdSize = 8;
+
+        DispatchData runInfo = PoolingKernelBase::SetDefault(params);
+
+        runInfo.gws0 = params.output.X().v;
+        runInfo.gws1 = params.output.Y().v;
+        // we got fs_bs_yx_bsv4_fsv32 format, we process 4 batches and 4 features per workitem
+        runInfo.gws2 = (RoundUp(params.output.Feature().v, 32) * RoundUp(params.output.Batch().v, 4)) / (4*4);
+
+        runInfo.lws0 = 1;
+        runInfo.lws1 = 1;
+        runInfo.lws2 = simdSize;
+
+        return runInfo;
+    }
+    
+    JitConstants PoolingKerneGPU_fs_bs_yx_bsv4_fsv32::GetJitConstants(const pooling_params& params, DispatchData kd) const
+    {
+        auto jit = PoolingKernelBase::GetJitConstants(params, kd);
+
+        const size_t in_x_pitch = 32 * 4;
+        const size_t in_y_pitch = 32 * 4 * params.inputs[0].X().LogicalDimPadded();
+        const size_t in_b_block_pitch = in_y_pitch * params.inputs[0].Y().LogicalDimPadded();
+        const size_t in_f_block_pitch = in_b_block_pitch * ((params.inputs[0].Batch().v + 3) / 4);
+        const size_t in_offset = in_x_pitch * params.inputs[0].X().pad.before + in_y_pitch * params.inputs[0].Y().pad.before;
+
+        jit.AddConstant(MakeJitConstant("IN_X_PITCH", in_x_pitch));
+        jit.AddConstant(MakeJitConstant("IN_Y_PITCH", in_y_pitch));
+        jit.AddConstant(MakeJitConstant("IN_B_BLOCK_PITCH", in_b_block_pitch));
+        jit.AddConstant(MakeJitConstant("IN_F_BLOCK_PITCH", in_f_block_pitch));
+        jit.AddConstant(MakeJitConstant("IN_OFFSET", in_offset));
+
+        return jit;
+    }
+
+    KernelsData PoolingKerneGPU_fs_bs_yx_bsv4_fsv32::GetKernelsData(const Params& params, const optional_params& options) const
+    {
+        return GetCommonKernelsData(params, options, FORCE_PRIORITY_1);
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_fs_bs_yx_bsv4_fsv32.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_fs_bs_yx_bsv4_fsv32.h
new file mode 100644
index 000000000..efb5c67cd
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_fs_bs_yx_bsv4_fsv32.h
@@ -0,0 +1,36 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "pooling_kernel_base.h"
+ 
+namespace kernel_selector 
+{    
+    class PoolingKerneGPU_fs_bs_yx_bsv4_fsv32 : public PoolingKernelBase
+    {
+    public:
+        PoolingKerneGPU_fs_bs_yx_bsv4_fsv32() : PoolingKernelBase("pooling_gpu_fs_bs_yx_bsv4_fsv32") {}
+        virtual ~PoolingKerneGPU_fs_bs_yx_bsv4_fsv32() {}
+
+        virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+        virtual ParamsKey GetSupportedKey() const override;
+        DispatchData SetDefault(const pooling_params& params) const override;
+    protected:
+        JitConstants GetJitConstants(const pooling_params& params, DispatchData kd) const override;
+
+    };
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_selector.cpp
index 5a7d83b47..91ec4d2dc 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_selector.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_selector.cpp
@@ -22,6 +22,7 @@
 #include "pooling_kernel_gpu_byxf_padding_opt.h"
 #include "pooling_kernel_gpu_byxf_af32.h"
 #include "pooling_kernel_gpu_int8_ref.h"
+#include "pooling_kernel_gpu_fs_bs_yx_bsv4_fsv32.h"
 
 namespace kernel_selector {
 
@@ -34,6 +35,7 @@ namespace kernel_selector {
         Attach<PoolingKernelGPUByxfPaddingOpt>();
         Attach<PoolingKernelGPUInt8Ref>();
         Attach<PoolingKerneGPU_byxf_af32>();
+        Attach<PoolingKerneGPU_fs_bs_yx_bsv4_fsv32>();
     }
 
     KernelsData pooling_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/region_yolo/region_yolo_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/region_yolo/region_yolo_kernel_ref.cpp
index 62420b510..6e5577a84 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/region_yolo/region_yolo_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/region_yolo/region_yolo_kernel_ref.cpp
@@ -92,7 +92,7 @@ namespace kernel_selector
         auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
 
         auto& kernel = kd.kernels[0];
-        FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point);
+        FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
 
         kd.estimatedTime = FORCE_PRIORITY_9;
 
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel.cpp
index 87ee70660..ba6f7ce1a 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel.cpp
@@ -24,11 +24,15 @@ namespace kernel_selector
         ParamsKey k;
         k.EnableInputDataType(Datatype::UINT8);
         k.EnableInputDataType(Datatype::INT8);
+        k.EnableInputDataType(Datatype::INT32);
+        k.EnableInputDataType(Datatype::INT64);
         k.EnableInputDataType(Datatype::F16);
         k.EnableInputDataType(Datatype::F32);
         k.EnableOutputDataType(Datatype::F16);
         k.EnableOutputDataType(Datatype::F32);
         k.EnableOutputDataType(Datatype::INT8);
+        k.EnableOutputDataType(Datatype::INT32);
+        k.EnableOutputDataType(Datatype::INT64);
         k.EnableOutputDataType(Datatype::UINT8);
         k.EnableDifferentTypes();
         k.EnableAllInputLayout();
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_base.cpp
index d050de529..867a3c8b9 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_base.cpp
@@ -69,6 +69,14 @@ namespace kernel_selector
             MakeJitConstant("OUTPUT",           output),
         };
 
+        if (fp16Supported)
+        {
+            jit.Merge(MakeUnitTypeJitConstants(Datatype::F16));
+        }
+        else
+        {
+            jit.Merge(MakeUnitTypeJitConstants(Datatype::F32));
+        }
         return jit;
     }
 
@@ -185,7 +193,7 @@ namespace kernel_selector
 
         auto& kernel = kd.kernels[0];
         
-        FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point);
+        FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
 
         kernel.arguments = GetArgsDesc(1, false, false);
 
@@ -215,7 +223,7 @@ namespace kernel_selector
 
         auto& kernel = kd.kernels[0];
 
-        FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point);
+        FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
 
         kernel.arguments = GetArgsDesc(1, false, false);
         if (newParams.mode == MeanSubtractMode::IN_BUFFER)
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorg_yolo/reorg_yolo_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorg_yolo/reorg_yolo_kernel_ref.cpp
index 9ae8d10d9..0462e4a8f 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorg_yolo/reorg_yolo_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorg_yolo/reorg_yolo_kernel_ref.cpp
@@ -87,7 +87,7 @@ namespace kernel_selector
         auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
 
         auto& kernel = kd.kernels[0];
-        FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point);
+        FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
 
         kd.estimatedTime = FORCE_PRIORITY_9;
 
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reshape/reshape_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reshape/reshape_kernel_ref.cpp
index 2984e2e8f..9c9c760ee 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reshape/reshape_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reshape/reshape_kernel_ref.cpp
@@ -24,8 +24,14 @@ namespace kernel_selector
         ParamsKey k;
         k.EnableInputDataType(Datatype::F16);
         k.EnableInputDataType(Datatype::F32);
+        k.EnableInputDataType(Datatype::INT8);
+        k.EnableInputDataType(Datatype::INT32);
+        k.EnableInputDataType(Datatype::INT64);
         k.EnableOutputDataType(Datatype::F16);
         k.EnableOutputDataType(Datatype::F32);
+        k.EnableOutputDataType(Datatype::INT8);
+        k.EnableOutputDataType(Datatype::INT32);
+        k.EnableOutputDataType(Datatype::INT64);
         k.EnableAllInputLayout();
         k.EnableAllOutputLayout();
         k.EnableTensorOffset();
@@ -60,7 +66,7 @@ namespace kernel_selector
 
         kernel.workGroups.global = { gws[0], gws[1], gws[2] * gws[3] };
         kernel.workGroups.local = GetOptimalLocalWorkGroupSizes(kernel.workGroups.global);
-        kernel.kernelString = GetKernelString(kernelName, jit, entry_point, ROUND_ROBIN);
+        kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, ROUND_ROBIN);
         kernel.arguments = GetArgsDesc(1, false, false);
         
         kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/roi_pooling/roi_pooling_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/roi_pooling/roi_pooling_kernel_ref.cpp
index 0683beb3e..64dde2c32 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/roi_pooling/roi_pooling_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/roi_pooling/roi_pooling_kernel_ref.cpp
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -27,6 +27,9 @@ namespace kernel_selector {
         k.EnableOutputDataType(Datatype::F32);
         k.EnableInputLayout(DataLayout::bfyx);
         k.EnableOutputLayout(DataLayout::brfyx);
+        k.EnablePoolType(PoolType::MAX);
+        k.EnablePoolType(PoolType::AVG);
+        k.EnablePoolType(PoolType::BILINEAR);
         k.EnableTensorOffset();
         k.EnableTensorPitches();
         k.EnableBatching();
@@ -69,7 +72,6 @@ namespace kernel_selector {
         });
 
         jit.AddConstants({
-            MakeJitConstant("MAX_POOL",                     rp.mode == PoolType::MAX),
             MakeJitConstant("USE_OLD_SCALE_AND_ROUNDING",   rp.groupSize == 0)
         });
 
@@ -94,11 +96,11 @@ namespace kernel_selector {
         auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
 
         auto& kernel = kd.kernels[0];
-        FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point);
+        FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
         kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 1 });
 
         kd.estimatedTime = FORCE_PRIORITY_9;
 
         return{ kd };
     }
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scale_grad_weights/scale_grad_weights_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scale_grad_weights/scale_grad_weights_kernel_base.cpp
index 471b61e6d..61edddabf 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scale_grad_weights/scale_grad_weights_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scale_grad_weights/scale_grad_weights_kernel_base.cpp
@@ -57,7 +57,7 @@ namespace kernel_selector
         auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
 
         auto& kernel = kd.kernels[0];
-        FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point, ROUND_ROBIN, true, !orgParams.bias.empty(), 2);
+        FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, ROUND_ROBIN, true, !orgParams.bias.empty(), 2);
 
         if (orgParams.use_momentum)
         {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scale_grad_weights/scale_grad_weights_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scale_grad_weights/scale_grad_weights_kernel_ref.cpp
index 4b9190f18..51b1122d4 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scale_grad_weights/scale_grad_weights_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scale_grad_weights/scale_grad_weights_kernel_ref.cpp
@@ -24,7 +24,6 @@ namespace kernel_selector
         ParamsKey k;
         k.EnableInputDataType(Datatype::F16);
         k.EnableInputDataType(Datatype::F32);
-        k.EnableInputWeightsType(WeightsType::F16);
         k.EnableInputWeightsType(WeightsType::F32);
         k.EnableOutputDataType(Datatype::F16);
         k.EnableOutputDataType(Datatype::F32);
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_base.cpp
new file mode 100644
index 000000000..09b3a0151
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_base.cpp
@@ -0,0 +1,177 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "select_kernel_base.h"
+#include "kernel_selector_utils.h" 
+
+namespace kernel_selector
+{
+    
+    bool SelectKernelBase::Validate(const Params& p, const optional_params& o) const
+    {
+        if (p.GetType() != KernelType::SELECT ||
+            o.GetType() != KernelType::SELECT)
+        {
+            return false;
+        }
+
+        const select_params& params = static_cast<const select_params&>(p);
+
+		if (params.inputs[0].GetDType() != params.inputs[1].GetDType()) 
+		{
+			return false;
+		}
+
+        if (params.inputs.size() != 3)
+        {
+            return false;
+        }
+
+        return true;
+    }
+
+    JitConstants SelectKernelBase::GetJitConstantsCommon(const select_params& params) const
+    {
+        JitConstants jit = MakeBaseParamsJitConstants(params);
+
+        std::string inputs_decls;
+
+        for (size_t i = 0; i < params.inputs.size(); i++)
+        {
+            std::string const_str = "const";
+
+            inputs_decls += const_str + " __global " + toCLType(params.inputs[i].GetDType()) + "* input" + std::to_string(i) + ", ";
+        }
+
+        jit.AddConstant(MakeJitConstant("INPUTS_DECLS", inputs_decls));
+
+		std::string destType, absType;
+
+		// i8, i8, i8
+		// i8, i8, u8
+		// u8, u8, i8
+		// u8, u8, u8
+		if ((params.inputs[2].GetDType() == Datatype::INT8
+			|| params.inputs[2].GetDType() == Datatype::UINT8)
+			&& (params.inputs[0].GetDType() == Datatype::INT8
+				|| params.inputs[0].GetDType() == Datatype::UINT8))
+		{
+			jit.AddConstant(MakeJitConstant("MASK", "INPUT_2"));
+		}
+		else
+		{
+			// x, x, f32
+			// x, x, f16
+			if (params.inputs[2].GetDType() == Datatype::F32
+				|| params.inputs[2].GetDType() == Datatype::F16)
+			{
+				absType = "fabs";
+			}
+			// f32, f32, i8
+			// f32, f32, u8
+			// f16, f16, i8
+			// f16, f16, u8
+			else
+			{
+				absType = "abs";
+			}
+
+			// f32, f32, x
+			if (params.inputs[0].GetDType() == Datatype::F32) {
+				destType = "int";
+			}
+			// f16, f16, x
+			else if (params.inputs[0].GetDType() == Datatype::F16) {
+				destType = "short";
+			}
+			// i8, i8, f32
+			// i8, i8, f16
+			// u8, u8, f32
+			// u8, u8, f16
+			else
+			{
+				destType = "char";
+			}
+
+			jit.AddConstant(MakeJitConstant("MASK", "convert_" + destType + "_rtp(" + absType + "(INPUT_2))"));
+		}
+
+        return jit;
+    }
+
+    JitConstants SelectKernelBase::GetJitConstants(const select_params& params) const
+    {
+        return GetJitConstantsCommon(params);
+    }
+
+    SelectKernelBase::DispatchData SelectKernelBase::SetDefault(const select_params& params) const
+    {
+        DispatchData kd;
+
+        const auto& out = params.output;
+
+        std::vector<size_t> gws;
+        for (const auto& o : out.GetDims())
+        {
+            gws.push_back(o.v);
+        }
+
+        for (size_t i = gws.size(); i < 4; i++)
+        {
+            gws.push_back(1U);
+        }
+
+        kd.gws0 = gws[0];
+        kd.gws1 = gws[1];
+        kd.gws2 = gws[2] * gws[3];
+
+        auto local = GetOptimalLocalWorkGroupSizes( { kd.gws0, kd.gws1, kd.gws2 } );
+        kd.lws0 = local[0];
+        kd.lws1 = local[1];
+        kd.lws2 = local[2];
+
+        return kd;
+    }
+
+    KernelsData SelectKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options) const
+    {
+        if (!Validate(params, options))
+        {
+            return{};
+        }
+
+        KernelData kd = KernelData::Default<select_params>(params);
+        select_params& newParams = *static_cast<select_params*>(kd.params.get());
+
+        auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
+        auto cldnn_jit = GetJitConstants(newParams);
+        std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
+
+        DispatchData runInfo = SetDefault(newParams);
+
+        auto& kernel = kd.kernels[0];
+
+        kernel.workGroups.global = { runInfo.gws0, runInfo.gws1, runInfo.gws2 };
+        kernel.workGroups.local = { runInfo.lws0, runInfo.lws1, runInfo.lws2 };
+
+        kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, ROUND_ROBIN);
+        kernel.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(), false, false);
+
+        kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE;
+
+        return{ kd };
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_base.h
new file mode 100644
index 000000000..c1d48d991
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_base.h
@@ -0,0 +1,62 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "common_kernel_base.h"
+
+namespace kernel_selector
+{
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // select_params
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    struct select_params : public base_params
+    {
+        select_params() : base_params(KernelType::SELECT) {}
+
+        virtual ParamsKey GetParamsKey() const
+        {
+            return base_params::GetParamsKey();
+        }
+    };
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // select_optional_params
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    struct select_optional_params : optional_params
+    {
+        select_optional_params() : optional_params(KernelType::SELECT) {}
+    };
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // SelectKernelBase
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    class SelectKernelBase : public common_kernel_base
+    {
+    public:
+        using common_kernel_base::common_kernel_base;
+        virtual ~SelectKernelBase() {}
+
+        using DispatchData = CommonDispatchData;
+        JitConstants GetJitConstantsCommon(const select_params& params) const;
+
+    protected:
+        virtual bool Validate(const Params& p, const optional_params& o) const override;
+        virtual JitConstants GetJitConstants(const select_params& params) const;
+        virtual DispatchData SetDefault(const select_params& params) const;
+        KernelsData GetCommonKernelsData(const Params& params, const optional_params& options) const;
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_ref.cpp
new file mode 100644
index 000000000..f7f776c15
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_ref.cpp
@@ -0,0 +1,64 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "select_kernel_ref.h"
+#include "kernel_selector_utils.h" 
+
+namespace kernel_selector {
+
+    ParamsKey SelectKernelRef::GetSupportedKey() const
+    {
+        ParamsKey k;
+
+		k.EnableInputDataType(Datatype::F16);
+		k.EnableInputDataType(Datatype::F32);
+		k.EnableInputDataType(Datatype::INT8);
+		k.EnableInputDataType(Datatype::UINT8);
+
+		k.EnableOutputDataType(Datatype::F32);
+		k.EnableOutputDataType(Datatype::F16);
+		k.EnableOutputDataType(Datatype::INT8);
+		k.EnableOutputDataType(Datatype::UINT8);
+
+		k.EnableInputLayout(DataLayout::bfyx);
+		k.EnableInputLayout(DataLayout::yxfb);
+		k.EnableInputLayout(DataLayout::byxf);
+
+		k.EnableOutputLayout(DataLayout::bfyx);
+		k.EnableOutputLayout(DataLayout::yxfb);
+		k.EnableOutputLayout(DataLayout::byxf);
+
+		k.EnableBatching();
+		k.EnableDifferentTypes();
+
+        return k;
+    }
+
+    bool SelectKernelRef::Validate(const Params& p, const optional_params& o) const
+    {
+        if (!SelectKernelBase::Validate(p, o))
+        {
+            return false;
+        }
+
+        return true;
+    }
+
+    KernelsData SelectKernelRef::GetKernelsData(const Params& params, const optional_params& options) const
+    {
+        return GetCommonKernelsData(params, options);
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_ref.h
new file mode 100644
index 000000000..a72c0e90a
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_ref.h
@@ -0,0 +1,35 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "select_kernel_base.h"
+
+namespace kernel_selector
+{
+    class SelectKernelRef : public SelectKernelBase
+    {
+    public:
+		SelectKernelRef() : SelectKernelBase("select_gpu_ref") {}
+        virtual ~SelectKernelRef() {}
+
+        virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+        virtual ParamsKey GetSupportedKey() const override;
+    protected:
+        bool Validate(const Params& p, const optional_params& o) const override;
+
+    };
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_selector.cpp
new file mode 100644
index 000000000..ec1218166
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_selector.cpp
@@ -0,0 +1,31 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "select_kernel_selector.h"
+#include "select_kernel_ref.h"
+
+namespace kernel_selector 
+{
+    select_kernel_selector::select_kernel_selector()
+    {
+        Attach<SelectKernelRef>();
+    }
+
+    KernelsData select_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const
+    {
+        return GetNaiveBestKernel(params, options, KernelType::SELECT);
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_selector.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_selector.h
new file mode 100644
index 000000000..b3de11649
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_selector.h
@@ -0,0 +1,35 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "kernel_selector.h"
+ 
+namespace kernel_selector
+{
+    class select_kernel_selector : public kernel_selector_base
+    {
+    public:
+        static select_kernel_selector &Instance() {
+            static select_kernel_selector instance_;
+            return instance_;
+        }
+
+        select_kernel_selector();
+
+        KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
+    };
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_base.cpp
index 4f02da734..4d2c36d39 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_base.cpp
@@ -88,7 +88,7 @@ namespace kernel_selector
         auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
 
         auto& kernel = kd.kernels[0];
-        FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point);
+        FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
 
         kd.estimatedTime = runInfo.effiency;
 
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax_loss_grad/softmax_loss_grad_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax_loss_grad/softmax_loss_grad_kernel_base.cpp
index 0a4473714..da816abac 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax_loss_grad/softmax_loss_grad_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax_loss_grad/softmax_loss_grad_kernel_base.cpp
@@ -74,7 +74,7 @@ namespace kernel_selector
         auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
 
         auto& kernel = kd.kernels[0];
-        FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point);
+        FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
         kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 1 });
 
         kd.estimatedTime = runInfo.effiency;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_ref.cpp
new file mode 100644
index 000000000..37c206d3b
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_ref.cpp
@@ -0,0 +1,153 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "tile_kernel_ref.h"
+#include "kernel_selector_utils.h" 
+ 
+namespace kernel_selector 
+{
+    static int32_t GetTileChannelIndex(const tile_params& params)
+    {
+        Tensor::DataChannelName name = Tensor::DataChannelName::X;
+        switch (params.axis)
+        {
+            case TileAxis::X:         name = Tensor::DataChannelName::X; break;
+            case TileAxis::Y:         name = Tensor::DataChannelName::Y; break;
+            case TileAxis::FEATURE:   name = Tensor::DataChannelName::FEATURE; break;
+            case TileAxis::BATCH:     name = Tensor::DataChannelName::BATCH; break;
+            default: break;
+        }
+
+        return DataTensor::Channelndex(params.output.GetLayout(), name);
+    }
+
+    ParamsKey TileKernelRef::GetSupportedKey() const
+    {
+        ParamsKey k;
+        k.EnableInputDataType(Datatype::F16);
+        k.EnableInputDataType(Datatype::F32);
+        k.EnableOutputDataType(Datatype::F16);
+        k.EnableOutputDataType(Datatype::F32);
+        k.EnableInputLayout(DataLayout::bfyx);
+        k.EnableOutputLayout(DataLayout::bfyx);
+        k.EnableTensorOffset();
+        k.EnableTensorPitches();
+        k.EnableBatching();
+        return k;
+    }
+
+    CommonDispatchData TileKernelRef::SetDefault(const tile_params& params, const optional_params&) const
+    {
+        CommonDispatchData runInfo;
+
+        auto in = params.inputs[0];
+
+        size_t inner_size = 1;
+        size_t outer_size = 1;
+
+        const int32_t axis = GetTileChannelIndex(params);
+
+        for (int32_t i = 0; i <= axis; i++)
+        {
+            inner_size *= in.GetDims()[i].v;
+        }
+
+        for (int32_t i = axis+1; i < static_cast<int32_t>(in.GetDims().size()); i++)
+        {
+            outer_size *= in.GetDims()[i].v;
+        }
+
+        if (inner_size > 1)
+        {
+            runInfo.gws0 = outer_size;
+            runInfo.gws1 = inner_size;
+            runInfo.gws2 = 1;
+
+            runInfo.lws0 = 1;
+            runInfo.lws1 = 1;
+            runInfo.lws2 = 1;
+        }
+        else
+        {
+            runInfo.gws0 = Align(outer_size, 16);
+            runInfo.gws1 = 1;
+            runInfo.gws2 = 1;
+
+            runInfo.lws0 = 16;
+            runInfo.lws1 = 1;
+            runInfo.lws2 = 1;
+        }
+
+        runInfo.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
+
+        return runInfo;
+    }
+
+    JitConstants TileKernelRef::GetJitConstants(const tile_params& params) const
+    {
+        JitConstants jit = MakeBaseParamsJitConstants(params);
+
+        auto in = params.inputs[0];
+        auto out = params.output;
+
+        size_t inner_size = 1;
+        size_t outer_size = 1;
+        size_t axis_pitch = 1;
+
+        const int32_t axis = GetTileChannelIndex(params);
+
+        for (int32_t i = 0; i <= axis; i++)
+        {
+            inner_size *= in.GetDims()[i].v;
+            axis_pitch *= in.GetDims()[i].LogicalDimPadded();
+        }
+        for (int32_t i = axis+1; i < static_cast<int32_t>(in.GetDims().size()); i++)
+        {
+            outer_size *= in.GetDims()[i].v;
+        }
+
+        jit.AddConstant(MakeJitConstant("TILES", params.tiles));
+        jit.AddConstant(MakeJitConstant("AXIS_PITCH", axis_pitch));
+        jit.AddConstant(MakeJitConstant("OUTER_SIZE", outer_size));
+        if (inner_size == 1)
+        {
+            jit.AddConstant(MakeJitConstant("OUTPUT_SIZE", out.LogicalSize()));
+            jit.AddConstant(MakeJitConstant("DENSE", 1));
+        }
+        return jit;
+    }
+
+    KernelsData TileKernelRef::GetKernelsData(const Params& params, const optional_params& options) const
+    {
+        assert(params.GetType() == KernelType::TILE);
+
+        KernelData kd = KernelData::Default<tile_params>(params);
+        tile_params& newParams = *static_cast<tile_params*>(kd.params.get());
+
+        auto runInfo = SetDefault(newParams, options);
+        auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
+        auto cldnn_jit = GetJitConstants(newParams);
+        std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
+
+        auto& kernel = kd.kernels[0];
+
+        FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
+
+        kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE;
+
+        return{ kd };
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_ref.h
new file mode 100644
index 000000000..967dab817
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_ref.h
@@ -0,0 +1,58 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "common_kernel_base.h"
+ 
+namespace kernel_selector 
+{    
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // tile_params
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    struct tile_params : public base_params
+    {
+        tile_params() : base_params(KernelType::TILE) {}
+
+        TileAxis axis;
+        int tiles;
+
+        virtual ParamsKey GetParamsKey() const
+        {
+            return base_params::GetParamsKey();
+        }
+    };
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // tile_optional_params
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    struct tile_optional_params : optional_params
+    {
+        tile_optional_params() : optional_params(KernelType::TILE) {}
+    };
+
+    class TileKernelRef : public common_kernel_base
+    {
+    public:
+        TileKernelRef() : common_kernel_base("tile_ref") {}
+        virtual ~TileKernelRef() {}
+
+        virtual JitConstants GetJitConstants(const tile_params& params) const;
+        virtual CommonDispatchData SetDefault(const tile_params& params, const optional_params&) const;
+        virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+        virtual ParamsKey GetSupportedKey() const override;
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_selector.cpp
new file mode 100644
index 000000000..c0ca49de7
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_selector.cpp
@@ -0,0 +1,31 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "tile_kernel_selector.h"
+#include "tile_kernel_ref.h"
+ 
+namespace kernel_selector {
+
+    tile_kernel_selector::tile_kernel_selector()
+    {
+        Attach<TileKernelRef>();
+    }
+
+    KernelsData tile_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const
+    {
+        return GetNaiveBestKernel(params, options, KernelType::TILE);
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_selector.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_selector.h
new file mode 100644
index 000000000..c0b10fa10
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_selector.h
@@ -0,0 +1,37 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "kernel_selector.h"
+ 
+namespace kernel_selector 
+{
+    class tile_kernel_selector : public kernel_selector_base
+    {
+    public:
+        static tile_kernel_selector &Instance() {
+            static tile_kernel_selector instance_;
+            return instance_;
+        }
+
+        tile_kernel_selector();
+
+        virtual ~tile_kernel_selector() {}
+
+        virtual KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/upsampling/upsampling_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/upsampling/upsampling_kernel_base.cpp
index ea0d89515..889daf8bc 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/upsampling/upsampling_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/upsampling/upsampling_kernel_base.cpp
@@ -76,7 +76,7 @@ namespace kernel_selector
 
         kernel.workGroups.global = { out.X().v, out.Y().v, out.Feature().v * out.Batch().v };
         kernel.workGroups.local = GetOptimalLocalWorkGroupSizes(kernel.workGroups.global);
-        kernel.kernelString = GetKernelString(kernelName, jit, entry_point, ROUND_ROBIN);
+        kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, ROUND_ROBIN);
         kernel.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(), false, false);
 
         kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/upsampling/upsampling_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/upsampling/upsampling_kernel_ref.cpp
index 46a927483..9037ebc0a 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/upsampling/upsampling_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/upsampling/upsampling_kernel_ref.cpp
@@ -15,15 +15,16 @@
 */
 
 #include "upsampling_kernel_ref.h"
-#include "kernel_selector_utils.h" 
 
 namespace kernel_selector {
 
     ParamsKey UpSamplingKernelRef::GetSupportedKey() const
     {
         ParamsKey k;
-        k.EnableAllInputDataType();
-        k.EnableAllOutputDataType();
+        k.EnableInputDataType(Datatype::F16);
+        k.EnableInputDataType(Datatype::F32);
+        k.EnableOutputDataType(Datatype::F16);
+        k.EnableOutputDataType(Datatype::F32);
         k.EnableDifferentTypes();
         k.EnableAllInputLayout();
         k.EnableAllOutputLayout();