summaryrefslogtreecommitdiff
path: root/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels
diff options
context:
space:
mode:
Diffstat (limited to 'inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels')
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_base.cpp2
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_axis.cpp2
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_base.cpp2
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_opt.cpp2
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/average_unpooling/average_unpooling_kernel_base.cpp2
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm/batch_norm_kernel_base.cpp2
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm_grad/batch_norm_grad_kernel_base.cpp2
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_base.cpp78
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_base.h72
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_ref.cpp51
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_ref.h30
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_selector.cpp30
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_selector.h34
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_base.cpp71
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_base.h60
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_ref.cpp51
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_ref.h30
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_selector.cpp30
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_selector.h34
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_base.cpp17
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_ref.cpp22
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.cpp2
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_3x3_dw_opt.cpp16
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_depthwise_weights_lwg.cpp100
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_depthwise_weights_lwg.h39
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_batched.cpp97
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_batched.h43
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_selector.cpp6
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_selector.h1
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_1x1.cpp3
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_3x3.cpp75
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_3x3.h33
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_7x7.cpp73
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_7x7.h33
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_base.cpp2
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_ref.cpp1
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_selector.cpp4
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_yxfb.cpp5
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.cpp9
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.h1
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.cpp110
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.h41
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_bs_yx_bsv4_fsv32.cpp222
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_bs_yx_bsv4_fsv32.h35
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.cpp31
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.h3
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_selector.cpp4
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_vload8.cpp19
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embed/embed_kernel_ref.cpp2
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_base.cpp6
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_b1.cpp4
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_block.cpp4
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad_batched.cpp117
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad_batched.h38
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_selector.cpp2
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_input/fully_connected_grad_input_kernel_base.cpp2
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_weights/fully_connected_grad_weights_kernel_base.cpp2
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_weights/fully_connected_grad_weights_kernel_ref.cpp1
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_base.cpp98
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_base.h69
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_ref.cpp41
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_ref.h30
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_selector.cpp31
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_selector.h35
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_base.cpp86
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_base.h61
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_ref.cpp58
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_ref.h30
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_selector.cpp30
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_selector.h34
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lookup_table/lookup_table_kernel_axis.cpp2
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lookup_table/lookup_table_kernel_base.cpp2
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_base.cpp2
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_elt_kernel_base.cpp2
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_elt_kernel_ref.cpp6
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_base.cpp3
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_base.h1
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_ref.cpp6
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/max_unpooling/max_unpooling_kernel_base.cpp2
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_base.cpp2
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/normalize/normalize_kernel_base.cpp2
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/permute/permute_kernel_ref.cpp8
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_base.cpp2
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_fs_bs_yx_bsv4_fsv32.cpp83
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_fs_bs_yx_bsv4_fsv32.h36
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_selector.cpp2
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/region_yolo/region_yolo_kernel_ref.cpp2
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel.cpp4
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_base.cpp12
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorg_yolo/reorg_yolo_kernel_ref.cpp2
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reshape/reshape_kernel_ref.cpp8
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/roi_pooling/roi_pooling_kernel_ref.cpp10
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scale_grad_weights/scale_grad_weights_kernel_base.cpp2
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scale_grad_weights/scale_grad_weights_kernel_ref.cpp1
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_base.cpp177
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_base.h62
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_ref.cpp64
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_ref.h35
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_selector.cpp31
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_selector.h35
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_base.cpp2
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax_loss_grad/softmax_loss_grad_kernel_base.cpp2
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_ref.cpp153
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_ref.h58
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_selector.cpp31
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_selector.h37
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/upsampling/upsampling_kernel_base.cpp2
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/upsampling/upsampling_kernel_ref.cpp7
108 files changed, 3167 insertions, 144 deletions
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_base.cpp
index 7feeb60d6..358b66d1c 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_base.cpp
@@ -96,7 +96,7 @@ namespace kernel_selector
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];
- FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point);
+ FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
if (newParams.gradient)
kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 1 });
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_axis.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_axis.cpp
index a69ce07c6..894101bff 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_axis.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_axis.cpp
@@ -75,7 +75,7 @@ namespace kernel_selector
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];
- FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point);
+ FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
kd.estimatedTime = FORCE_PRIORITY_9;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_base.cpp
index 455b249cc..a51824c52 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_base.cpp
@@ -78,7 +78,7 @@ namespace kernel_selector
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];
- FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point);
+ FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
kd.estimatedTime = estimatedTime;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_opt.cpp
index 70af969b3..8e086dae6 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_opt.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_opt.cpp
@@ -78,7 +78,7 @@ namespace kernel_selector
runInfo.lws1 = 1;
runInfo.lws2 = 1;
- FillCLKernelData(kernel, runInfo, kernelName, jit, entryPoint);
+ FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entryPoint);
size = (size / 128 + 1) * topK;
}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/average_unpooling/average_unpooling_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/average_unpooling/average_unpooling_kernel_base.cpp
index 46e8e85d8..c94e624bd 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/average_unpooling/average_unpooling_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/average_unpooling/average_unpooling_kernel_base.cpp
@@ -95,7 +95,7 @@ namespace kernel_selector
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];
- FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point);
+ FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
kd.estimatedTime = estimatedTime;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm/batch_norm_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm/batch_norm_kernel_base.cpp
index 811d4412f..ebf881f50 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm/batch_norm_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm/batch_norm_kernel_base.cpp
@@ -80,7 +80,7 @@ namespace kernel_selector
auto& kernel = kd.kernels[0];
int inputs_num = 1 + orgParams.batchNormParams.with_inv_var;
- FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point, "", false, false, inputs_num);
+ FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, "", false, false, inputs_num);
kd.estimatedTime = estimatedTime;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm_grad/batch_norm_grad_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm_grad/batch_norm_grad_kernel_base.cpp
index c5b15712a..25d9115dd 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm_grad/batch_norm_grad_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm_grad/batch_norm_grad_kernel_base.cpp
@@ -70,7 +70,7 @@ namespace kernel_selector
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];
- FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point, "", false, false, 3);
+ FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, "", false, false, 3);
kd.estimatedTime = estimatedTime;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_base.cpp
new file mode 100644
index 000000000..3346c4b0a
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_base.cpp
@@ -0,0 +1,78 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "border_kernel_base.h"
+
+#include "kernel_selector_utils.h"
+
+
+namespace kernel_selector
+{
+ JitConstants BorderKernelBase::GetJitConstants(const border_params& params) const
+ {
+ JitConstants jit = MakeBaseParamsJitConstants(params);
+
+ jit.AddConstants({
+ MakeJitConstant("LT_SIZES", params.lt_sizes),
+ MakeJitConstant("RB_SIZES", params.rb_sizes),
+ MakeJitConstant(toString(params.b_type), "")
+ });
+
+ return jit;
+ }
+
+ BorderKernelBase::DispatchData BorderKernelBase::SetDefault(const border_params& params) const
+ {
+ const auto& output = params.output;
+
+ DispatchData kd;
+
+ kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
+
+ std::vector<size_t> global{output.X().v, output.Y().v, output.Batch().v * output.Feature().v};
+ const auto& local = GetOptimalLocalWorkGroupSizes(global);
+
+ kd.gws0 = global[0];
+ kd.gws1 = global[1];
+ kd.gws2 = global[2];
+
+ kd.lws0 = local[0];
+ kd.lws1 = local[1];
+ kd.lws2 = local[2];
+
+ return kd;
+ }
+
+ KernelsData BorderKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options, float estimated_time) const
+ {
+ assert(params.GetType() == KernelType::BORDER);
+
+ const auto& prim_params = static_cast<const border_params&>(params); // NOLINT(cppcoreguidelines-pro-type-static-cast-downcast)
+
+ auto run_info = SetDefault(prim_params);
+ KernelData k_data = KernelData::Default<border_params>(params);
+
+ auto cldnn_jit = GetJitConstants(prim_params);
+ auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, options);
+ auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
+
+ auto& kernel = k_data.kernels[0];
+ FillCLKernelData(kernel, run_info, params.engineInfo, kernelName, jit, entry_point);
+
+ k_data.estimatedTime = estimated_time;
+
+ return {k_data};
+ }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_base.h
new file mode 100644
index 000000000..43c10c715
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_base.h
@@ -0,0 +1,72 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "common_kernel_base.h"
+#include "kernel_selector_params.h"
+
+
+namespace kernel_selector
+{
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ // border_params
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ struct border_params : public base_params
+ {
+ DimTensor<> lt_sizes;
+ DimTensor<> rb_sizes;
+ BorderType b_type = BorderType::ZERO;
+
+
+ border_params()
+ : base_params(KernelType::BORDER)
+ {
+ }
+
+ ParamsKey GetParamsKey() const override
+ {
+ ParamsKey k = base_params::GetParamsKey();
+ // k.EnableBorderType(b_type);
+ return k;
+ }
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ // border_optional_params
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ struct border_optional_params : optional_params
+ {
+ border_optional_params()
+ : optional_params(KernelType::BORDER)
+ {
+ }
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ // BorderKernelBase
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ class BorderKernelBase : public common_kernel_base
+ {
+ public:
+ using common_kernel_base::common_kernel_base;
+
+ using DispatchData = CommonDispatchData;
+
+ protected:
+ JitConstants GetJitConstants(const border_params& params) const;
+ DispatchData SetDefault(const border_params& params) const;
+ KernelsData GetCommonKernelsData(const Params& params, const optional_params&, float estimated_time) const;
+ };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_ref.cpp
new file mode 100644
index 000000000..9029d7afc
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_ref.cpp
@@ -0,0 +1,51 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "border_kernel_ref.h"
+
+
+namespace kernel_selector
+{
+ ParamsKey BorderKernelRef::GetSupportedKey() const
+ {
+ ParamsKey k;
+
+ k.EnableInputDataType(Datatype::F16);
+ k.EnableInputDataType(Datatype::F32);
+ k.EnableInputDataType(Datatype::INT8);
+ k.EnableInputDataType(Datatype::UINT8);
+
+ k.EnableOutputDataType(Datatype::F32);
+ k.EnableOutputDataType(Datatype::F16);
+ k.EnableOutputDataType(Datatype::INT8);
+ k.EnableOutputDataType(Datatype::UINT8);
+
+ k.EnableInputLayout(DataLayout::bfyx);
+ k.EnableInputLayout(DataLayout::yxfb);
+ k.EnableInputLayout(DataLayout::byxf);
+
+ k.EnableOutputLayout(DataLayout::bfyx);
+ k.EnableOutputLayout(DataLayout::yxfb);
+ k.EnableOutputLayout(DataLayout::byxf);
+
+ k.EnableBatching();
+
+ return k;
+ }
+
+ KernelsData BorderKernelRef::GetKernelsData(const Params& params, const optional_params& options) const
+ {
+ return GetCommonKernelsData(params, options, FORCE_PRIORITY_9);
+ }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_ref.h
new file mode 100644
index 000000000..0862ed144
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_ref.h
@@ -0,0 +1,30 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "border_kernel_base.h"
+
+
+namespace kernel_selector
+{
+ class BorderKernelRef : public BorderKernelBase
+ {
+ public:
+ BorderKernelRef() : BorderKernelBase("border_gpu_ref") {}
+
+ KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+ ParamsKey GetSupportedKey() const override;
+ };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_selector.cpp
new file mode 100644
index 000000000..42e352c6a
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_selector.cpp
@@ -0,0 +1,30 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "border_kernel_selector.h"
+#include "border_kernel_ref.h"
+
+namespace kernel_selector
+{
+ border_kernel_selector::border_kernel_selector()
+ {
+ Attach<BorderKernelRef>();
+ }
+
+ KernelsData border_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const
+ {
+ return GetNaiveBestKernel(params, options, KernelType::BORDER);
+ }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_selector.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_selector.h
new file mode 100644
index 000000000..515a9a497
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_selector.h
@@ -0,0 +1,34 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "kernel_selector.h"
+
+
+namespace kernel_selector
+{
+ class border_kernel_selector : public kernel_selector_base
+ {
+ public:
+ static border_kernel_selector &Instance() {
+ static border_kernel_selector instance;
+ return instance;
+ }
+
+ border_kernel_selector();
+
+ KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
+ };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_base.cpp
new file mode 100644
index 000000000..3d3b2f4d3
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_base.cpp
@@ -0,0 +1,71 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "broadcast_kernel_base.h"
+
+#include "kernel_selector_utils.h"
+
+
+namespace kernel_selector
+{
+ JitConstants BroadcastKernelBase::GetJitConstants(const broadcast_params& params)
+ {
+ JitConstants jit = MakeBaseParamsJitConstants(params);
+ return jit;
+ }
+
+ BroadcastKernelBase::DispatchData BroadcastKernelBase::SetDefault(const broadcast_params& params)
+ {
+ const auto& output = params.output;
+
+ DispatchData kd;
+
+ kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
+
+ std::vector<size_t> global{output.X().v, output.Y().v, output.Batch().v * output.Feature().v};
+ const auto& local = GetOptimalLocalWorkGroupSizes(global);
+
+ kd.gws0 = global[0];
+ kd.gws1 = global[1];
+ kd.gws2 = global[2];
+
+ kd.lws0 = local[0];
+ kd.lws1 = local[1];
+ kd.lws2 = local[2];
+
+ return kd;
+ }
+
+ KernelsData BroadcastKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options, float estimated_time) const
+ {
+ assert(params.GetType() == KernelType::BROADCAST);
+
+ const auto& prim_params = static_cast<const broadcast_params&>(params); // NOLINT(cppcoreguidelines-pro-type-static-cast-downcast)
+
+ auto run_info = SetDefault(prim_params);
+ KernelData k_data = KernelData::Default<broadcast_params>(params);
+
+ auto cldnn_jit = GetJitConstants(prim_params);
+ auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, options);
+ auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
+
+ auto& kernel = k_data.kernels[0];
+ FillCLKernelData(kernel, run_info, params.engineInfo, kernelName, jit, entry_point);
+
+ k_data.estimatedTime = estimated_time;
+
+ return {k_data};
+ }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_base.h
new file mode 100644
index 000000000..cf4865e80
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_base.h
@@ -0,0 +1,60 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "common_kernel_base.h"
+#include "kernel_selector_params.h"
+
+
+namespace kernel_selector
+{
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ // broadcast_params
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ struct broadcast_params : public base_params
+ {
+ broadcast_params()
+ : base_params(KernelType::BROADCAST)
+ {
+ }
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ // broadcast_optional_params
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ struct broadcast_optional_params : optional_params
+ {
+ broadcast_optional_params()
+ : optional_params(KernelType::BROADCAST)
+ {
+ }
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ // BroadcastKernelBase
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ class BroadcastKernelBase : public common_kernel_base
+ {
+ public:
+ using common_kernel_base::common_kernel_base;
+
+ using DispatchData = CommonDispatchData;
+
+ protected:
+ static JitConstants GetJitConstants(const broadcast_params& params);
+ static DispatchData SetDefault(const broadcast_params& params);
+ KernelsData GetCommonKernelsData(const Params& params, const optional_params&, float estimated_time) const;
+ };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_ref.cpp
new file mode 100644
index 000000000..0be42a5e2
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_ref.cpp
@@ -0,0 +1,51 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "broadcast_kernel_ref.h"
+
+
+namespace kernel_selector
+{
+ ParamsKey BroadcastKernelRef::GetSupportedKey() const
+ {
+ ParamsKey k;
+
+ k.EnableInputDataType(Datatype::F16);
+ k.EnableInputDataType(Datatype::F32);
+ k.EnableInputDataType(Datatype::INT8);
+ k.EnableInputDataType(Datatype::UINT8);
+
+ k.EnableOutputDataType(Datatype::F32);
+ k.EnableOutputDataType(Datatype::F16);
+ k.EnableOutputDataType(Datatype::INT8);
+ k.EnableOutputDataType(Datatype::UINT8);
+
+ k.EnableInputLayout(DataLayout::bfyx);
+ k.EnableInputLayout(DataLayout::yxfb);
+ k.EnableInputLayout(DataLayout::byxf);
+
+ k.EnableOutputLayout(DataLayout::bfyx);
+ k.EnableOutputLayout(DataLayout::yxfb);
+ k.EnableOutputLayout(DataLayout::byxf);
+
+ k.EnableBatching();
+
+ return k;
+ }
+
+ KernelsData BroadcastKernelRef::GetKernelsData(const Params& params, const optional_params& options) const
+ {
+ return GetCommonKernelsData(params, options, FORCE_PRIORITY_9);
+ }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_ref.h
new file mode 100644
index 000000000..ccca397ab
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_ref.h
@@ -0,0 +1,30 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "broadcast_kernel_base.h"
+
+
+namespace kernel_selector
+{
+ class BroadcastKernelRef : public BroadcastKernelBase
+ {
+ public:
+ BroadcastKernelRef() : BroadcastKernelBase("broadcast_gpu_ref") {}
+
+ KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+ ParamsKey GetSupportedKey() const override;
+ };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_selector.cpp
new file mode 100644
index 000000000..02ae904d2
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_selector.cpp
@@ -0,0 +1,30 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "broadcast_kernel_selector.h"
+#include "broadcast_kernel_ref.h"
+
+namespace kernel_selector
+{
+ broadcast_kernel_selector::broadcast_kernel_selector()
+ {
+ Attach<BroadcastKernelRef>();
+ }
+
+ KernelsData broadcast_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const
+ {
+ return GetNaiveBestKernel(params, options, KernelType::BROADCAST);
+ }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_selector.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_selector.h
new file mode 100644
index 000000000..ec7f4da55
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_selector.h
@@ -0,0 +1,34 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "kernel_selector.h"
+
+
+namespace kernel_selector
+{
+ class broadcast_kernel_selector : public kernel_selector_base
+ {
+ public:
+ static broadcast_kernel_selector &Instance() {
+ static broadcast_kernel_selector instance;
+ return instance;
+ }
+
+ broadcast_kernel_selector();
+
+ KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
+ };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_base.cpp
index b81cfe879..f9df941ad 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_base.cpp
@@ -71,9 +71,18 @@ namespace kernel_selector
// Determine global work sizes.
if (params.inputs[0].GetLayout() != params.output.GetLayout())
{
- kd.gws0 = dims.size() < 2 ? 1 : dims[2].v;
- kd.gws1 = dims.size() < 3 ? 1 : dims[1].v;
- kd.gws2 = dims.size() < 4 ? 1 : dims[0].v;
+ if (params.inputs[0].GetLayout() == kernel_selector::Tensor::DataLayout::yxfb)
+ {
+ kd.gws0 = dims.size() < 2 ? 1 : dims[3].v;
+ kd.gws1 = dims.size() < 3 ? 1 : dims[1].v;
+ kd.gws2 = dims.size() < 4 ? 1 : dims[0].v;
+ }
+ else
+ {
+ kd.gws0 = dims.size() < 2 ? 1 : dims[2].v;
+ kd.gws1 = dims.size() < 3 ? 1 : dims[1].v;
+ kd.gws2 = dims.size() < 4 ? 1 : dims[0].v;
+ }
}
else
{
@@ -124,7 +133,7 @@ namespace kernel_selector
kernel.workGroups.global = { runInfo.gws0, runInfo.gws1, runInfo.gws2 };
kernel.workGroups.local = { runInfo.lws0, runInfo.lws1, runInfo.lws2 };
- kernel.kernelString = GetKernelString(kernelName, jit, entryPoint);
+ kernel.kernelString = GetKernelString(kernelName, jit, entryPoint, params.engineInfo);
kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, (uint32_t)i });
kernel.arguments.push_back({ ArgumentDescriptor::Types::OUTPUT, 0 });
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_ref.cpp
index 5520f427a..c5c6ae53a 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_ref.cpp
@@ -26,9 +26,13 @@ namespace kernel_selector
k.EnableInputDataType(Datatype::F16);
k.EnableInputDataType(Datatype::F32);
k.EnableInputDataType(Datatype::INT8);
+ k.EnableInputDataType(Datatype::INT32);
+ k.EnableInputDataType(Datatype::INT64);
k.EnableOutputDataType(Datatype::F16);
k.EnableOutputDataType(Datatype::F32);
k.EnableOutputDataType(Datatype::INT8);
+ k.EnableOutputDataType(Datatype::INT32);
+ k.EnableOutputDataType(Datatype::INT64);
k.EnableAllInputLayout();
k.EnableAllOutputLayout();
k.EnableTensorOffset();
@@ -60,10 +64,20 @@ namespace kernel_selector
//case for input == bfyx, output == yxfb and input == yxfb, output == bfyx
if (input_format != output_format)
{
- dim_index[0] = 3;
- dim_index[1] = 2;
- dim_index[2] = 0;
- dim_index[3] = 1;
+ if (input_format == kernel_selector::Tensor::DataLayout::yxfb)
+ {
+ dim_index[0] = 2;
+ dim_index[1] = 3;
+ dim_index[2] = 1;
+ dim_index[3] = 0;
+ }
+ else
+ {
+ dim_index[0] = 3;
+ dim_index[1] = 2;
+ dim_index[2] = 0;
+ dim_index[3] = 1;
+ }
}
cldnnJit.AddConstant(MakeJitConstant("INPUT_DIM_0", dim_index[0]));
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.cpp
index 878a02048..86bfe937c 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.cpp
@@ -241,7 +241,7 @@ namespace kernel_selector
auto jit = CreateJit(finalKernelName, cldnnJit, entryPoint);
auto& kernel = kd.kernels[0];
- FillCLKernelData(kernel, runInfo, finalKernelName, jit, entryPoint, exeMode, true, !newParams.bias.empty(), 1, newParams.int8_quantization, newParams.output_calibration);
+ FillCLKernelData(kernel, runInfo, params.engineInfo, finalKernelName, jit, entryPoint, exeMode, true, !newParams.bias.empty(), 1, newParams.int8_quantization, newParams.output_calibration);
kernel.arguments.push_back({ ArgumentDescriptor::Types::SPLIT, 0 });
kd.estimatedTime = runInfo.effiency;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_3x3_dw_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_3x3_dw_opt.cpp
index ad56556bc..b92df30b7 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_3x3_dw_opt.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_3x3_dw_opt.cpp
@@ -1,5 +1,5 @@
/*
-// Copyright (c) 2017 Intel Corporation
+// Copyright (c) 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -22,7 +22,7 @@ namespace kernel_selector
ConvolutionKernel_bfyx_3x3_dw_opt::ConvolutionKernel_bfyx_3x3_dw_opt() : ConvolutionKernelBase("convolution_gpu_bfyx_3x3_dw_opt")
{
// Generate the dispatch options to the auto-tuner.
- std::vector<size_t> tileXDimSizes = { 1,2,4,5,6,8,10,12,14,16 };
+ std::vector<size_t> tileXDimSizes = { 1,2,4,5,6,8,10,12,14 };
std::vector<size_t> tileYDimSizes = { 1,2,3,4,5,6,7 };
std::vector<std::string> executionModes = { /*AGE_BASED ,*/ ROUND_ROBIN };
@@ -141,6 +141,18 @@ namespace kernel_selector
KernelsData ConvolutionKernel_bfyx_3x3_dw_opt::GetTunedKernelsDataByIndex(const Params& params, const optional_params& options, const int autoTuneIndex) const
{
+ constexpr int simdSize = 16;
+
+ KernelData kd = KernelData::Default<convolution_params>(params);
+ convolution_params& convParams = *static_cast<convolution_params*>(kd.params.get());
+ DispatchData runInfo = SetDefault(convParams, autoTuneIndex);
+
+ if (static_cast<int>(static_cast<int>(runInfo.gws0 - 1) / simdSize) * runInfo.cldnnStyle.blockWidth + simdSize > convParams.inputs[0].Y().pitch)
+ {
+ // Internal Error - requested tile size is not supported for y pitch
+ return{};
+ }
+
return GetCommonKernelsData(params, options, GetAutoTuneOptions(params, autoTuneIndex).exeMode, autoTuneIndex);
}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_depthwise_weights_lwg.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_depthwise_weights_lwg.cpp
new file mode 100644
index 000000000..f6841db94
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_depthwise_weights_lwg.cpp
@@ -0,0 +1,100 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "convolution_kernel_bfyx_depthwise_weights_lwg.h"
+#include "kernel_selector_utils.h"
+
+namespace kernel_selector
+{
+ ParamsKey ConvolutionKernel_bfyx_depthwise_weights_lwg::GetSupportedKey() const
+ {
+ ParamsKey k;
+ k.EnableInputDataType(Datatype::F32);
+ k.EnableInputDataType(Datatype::F16);
+ k.EnableInputWeightsType(WeightsType::F16);
+ k.EnableInputWeightsType(WeightsType::F32);
+ k.EnableOutputDataType(Datatype::F32);
+ k.EnableOutputDataType(Datatype::F16);
+ k.EnableInputLayout(DataLayout::bfyx);
+ k.EnableOutputLayout(DataLayout::bfyx);
+ k.EnableTensorOffset();
+ k.EnableTensorPitches();
+ k.EnableBiasPerFeature();
+ k.EnableNonBiasTerm();
+ k.EnableBatching();
+ k.EnableSplitSupport();
+ k.EnableSubGroup();
+ k.EnableSubGroupShort();
+ k.EnableDepthwiseSeparableOpt();
+ return k;
+ }
+
+ bool ConvolutionKernel_bfyx_depthwise_weights_lwg::Validate(const Params& p, const optional_params& o) const
+ {
+ if (!ConvolutionKernelBase::Validate(p, o) ||
+ !CovolutionCheckInput(p, o))
+ {
+ return false;
+ }
+
+ const convolution_params& cp = static_cast<const convolution_params&>(p);
+ if (!cp.depthwiseSeparableOpt)
+ return false;
+
+ if ((cp.filterSize.x > 4) ||
+ (cp.filterSize.y > 4) ||
+ (cp.inputs[0].Feature().v != cp.split))
+ {
+ return false;
+ }
+
+ return true;
+ }
+
+ ConvolutionKernelBase::DispatchData ConvolutionKernel_bfyx_depthwise_weights_lwg::SetDefault(const convolution_params& params, int) const
+ {
+ DispatchData runInfo = Parent::SetDefault(params);
+ const auto& out = params.output;
+
+ std::vector<size_t> global = { out.X().v * out.Y().v, out.Feature().v, out.Batch().v };
+
+ runInfo.gws0 = Align(global[0], 16);
+ runInfo.gws1 = global[1];
+ runInfo.gws2 = global[2];
+ runInfo.lws0 = 16;
+ runInfo.lws1 = 1;
+ runInfo.lws2 = 1;
+
+ runInfo.effiency = FORCE_PRIORITY_6;
+
+ return runInfo;
+ }
+
+ JitConstants ConvolutionKernel_bfyx_depthwise_weights_lwg::GetJitConstants(const convolution_params& params, const DispatchData& kd) const
+ {
+ auto mem_consts = ConvolutionKernelBase::GetJitConstants(params, kd);
+
+ if(params.padding.x != 0 || params.padding.y != 0)
+ mem_consts.AddConstant(MakeJitConstant("BOUNDARY_CHECK", 1));
+
+ return mem_consts;
+ }
+
+ KernelsData ConvolutionKernel_bfyx_depthwise_weights_lwg::GetKernelsData(const Params& params, const optional_params& options) const
+ {
+ return GetCommonKernelsData(params, options);
+ }
+} \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_depthwise_weights_lwg.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_depthwise_weights_lwg.h
new file mode 100644
index 000000000..b578f8fd5
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_depthwise_weights_lwg.h
@@ -0,0 +1,39 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "convolution_kernel_base.h"
+
+namespace kernel_selector
+{
+ class ConvolutionKernel_bfyx_depthwise_weights_lwg : public ConvolutionKernelBase
+ {
+ public:
+ using Parent = ConvolutionKernelBase;
+ ConvolutionKernel_bfyx_depthwise_weights_lwg() : ConvolutionKernelBase("convolution_gpu_bfyx_depthwise_weights_lwg") {}
+ virtual ~ConvolutionKernel_bfyx_depthwise_weights_lwg() {}
+
+ virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+ virtual ParamsKey GetSupportedKey() const override;
+
+ protected:
+ bool Validate(const Params&, const optional_params&) const override;
+ std::vector<WeightsLayout> GetSupportedWeightLayouts(const convolution_params&) const override { return{ WeightsLayout::oiyx }; }
+ JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+ DispatchData SetDefault(const convolution_params& params, int autoTuneIndex = -1) const override;
+ };
+} \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_batched.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_batched.cpp
new file mode 100644
index 000000000..ce73392ac
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_batched.cpp
@@ -0,0 +1,97 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "convolution_kernel_mmad_batched.h"
+#include "kernel_selector_utils.h"
+
+namespace kernel_selector {
+
+ ParamsKey ConvolutionKernel_mmad_batched::GetSupportedKey() const
+ {
+ ParamsKey k;
+ k.EnableInputDataType(Datatype::INT8);
+ k.EnableOutputDataType(Datatype::INT8);
+ k.EnableInputWeightsType(WeightsType::INT8);
+ k.EnableInputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
+ k.EnableOutputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
+ k.EnableTensorOffset();
+ k.EnableTensorPitches();
+ k.EnableDilation();
+ k.EnableBiasPerFeature();
+ k.EnableNonBiasTerm();
+ k.EnableBatching();
+ k.EnableSplitSupport();
+ k.EnableInt8Quantization();
+ k.EnableOutputCalibration();
+ k.DisableTuning();
+ return k;
+ }
+
+ ConvolutionKernelBase::DispatchData ConvolutionKernel_mmad_batched::SetDefault(const convolution_params& arg, int) const
+ {
+ DispatchData runInfo = ConvolutionKernelBase::SetDefault(arg);
+
+ constexpr size_t sub_group_size = 8;
+
+ const auto of_maps = arg.output.Feature().v;
+ const size_t of_threads_per_batch = RoundUp(of_maps, sub_group_size);
+
+ runInfo.effiency = FORCE_PRIORITY_3;
+
+ runInfo.gws0 = arg.output.X().v;
+ runInfo.gws1 = arg.output.Y().v;
+ runInfo.gws2 = of_threads_per_batch * ((arg.output.Batch().v+3) / 4);
+
+ runInfo.lws0 = 1;
+ runInfo.lws1 = 1;
+ runInfo.lws2 = sub_group_size;
+
+ return runInfo;
+ }
+
+ JitConstants ConvolutionKernel_mmad_batched::GetJitConstants(const convolution_params& params, const DispatchData& runInfo) const
+ {
+ auto jit = Parent::GetJitConstants(params, runInfo);
+
+ jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", runInfo.lws2));
+
+ // pitch for special block format used in this kernel
+ const size_t ifm_32_aligned = Align(params.weights.IFM().v, 32);
+ const size_t filter_ofm_block_pitch = (ifm_32_aligned / 32) * params.weights.X().v * params.weights.Y().v * 4 * 8 * 8;
+ jit.AddConstant(MakeJitConstant("FILTER_OFM_BLOCK_PITCH", filter_ofm_block_pitch));
+
+ const size_t in_x_pitch = 32 * 4;
+ const size_t in_y_pitch = 32 * 4 * params.inputs[0].X().LogicalDimPadded();
+ const size_t in_b_block_pitch = in_y_pitch * params.inputs[0].Y().LogicalDimPadded();
+ const size_t in_f_block_pitch = in_b_block_pitch * ((params.inputs[0].Batch().v + 3) / 4);
+ const size_t in_offset = in_x_pitch * params.inputs[0].X().pad.before + in_y_pitch * params.inputs[0].Y().pad.before;
+
+ jit.AddConstant(MakeJitConstant("IN_X_PITCH", in_x_pitch));
+ jit.AddConstant(MakeJitConstant("IN_Y_PITCH", in_y_pitch));
+ jit.AddConstant(MakeJitConstant("IN_B_BLOCK_PITCH", in_b_block_pitch));
+ jit.AddConstant(MakeJitConstant("IN_F_BLOCK_PITCH", in_f_block_pitch));
+ jit.AddConstant(MakeJitConstant("IN_OFFSET", in_offset));
+ return jit;
+ }
+
+ KernelsData ConvolutionKernel_mmad_batched::GetKernelsData(const Params& params, const optional_params& options) const
+ {
+ KernelsData kd = GetCommonKernelsData(params, options);
+ if(!kd.empty())
+ kd[0].estimatedTime = FORCE_PRIORITY_3;
+ return kd;
+ }
+} \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_batched.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_batched.h
new file mode 100644
index 000000000..8a3dda451
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_batched.h
@@ -0,0 +1,43 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "convolution_kernel_base.h"
+
+namespace kernel_selector {
+
+ class ConvolutionKernel_mmad_batched : public ConvolutionKernelBase
+ {
+ public:
+ using Parent = ConvolutionKernelBase;
+ ConvolutionKernel_mmad_batched() : ConvolutionKernelBase("convolution_gpu_mmad_batched") {}
+ virtual ~ConvolutionKernel_mmad_batched() {}
+
+ virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+ virtual ParamsKey GetSupportedKey() const override;
+
+ protected:
+ JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+ DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
+ virtual std::vector<WeightsLayout> GetSupportedWeightLayouts(const convolution_params&) const override
+ {
+ return{
+ WeightsLayout::os_is_yx_isa8_osv8_isv4,
+ };
+ }
+ };
+} \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_selector.cpp
index 21d9c92ff..aa5850593 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_selector.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_selector.cpp
@@ -1,5 +1,5 @@
/*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -35,6 +35,8 @@
#include "convolution_kernel_MMAD_blocks.h"
#include "convolution_kernel_1x1_gemm_MMAD.h"
#include "convolution_kernel_byxf_af32_depthwise.h"
+#include "convolution_kernel_mmad_batched.h"
+#include "convolution_kernel_bfyx_depthwise_weights_lwg.h"
#include <iostream>
@@ -61,6 +63,8 @@ namespace kernel_selector
Attach<ConvolutionKernel_MMAD_blocks>();
Attach<ConvolutionKernel_1x1_gemm_MMAD>();
Attach<ConvolutionKernel_byxf_af32_depthiwise>();
+ Attach<ConvolutionKernel_mmad_batched>();
+ Attach<ConvolutionKernel_bfyx_depthwise_weights_lwg>();
//Attach<ConvolutionKernel_Tutorial>(); //In order to use this implementation for tutorial purposes please uncomment this line
}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_selector.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_selector.h
index 9275a5fcb..6b4c756d0 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_selector.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_selector.h
@@ -17,7 +17,6 @@
#pragma once
#include "kernel_selector.h"
-#include "kernel_runner_interface.h"
namespace kernel_selector
{
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_1x1.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_1x1.cpp
index b6065edc6..13f8ba40b 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_1x1.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_1x1.cpp
@@ -22,11 +22,8 @@ namespace kernel_selector
ParamsKey ConvolutionGradWeightsKernel1x1::GetSupportedKey() const
{
ParamsKey k;
- k.EnableInputDataType(Datatype::F16);
k.EnableInputDataType(Datatype::F32);
- k.EnableInputWeightsType(WeightsType::F16);
k.EnableInputWeightsType(WeightsType::F32);
- k.EnableOutputDataType(Datatype::F16);
k.EnableOutputDataType(Datatype::F32);
k.EnableInputLayout(DataLayout::bfyx);
k.EnableOutputLayout(DataLayout::yxfb);
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_3x3.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_3x3.cpp
new file mode 100644
index 000000000..c892f6d2d
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_3x3.cpp
@@ -0,0 +1,75 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "convolution_grad_weights_kernel_3x3.h"
+
+namespace kernel_selector
+{
+
+ ParamsKey ConvolutionGradWeightsKernel3x3::GetSupportedKey() const
+ {
+ ParamsKey k;
+ k.EnableInputDataType(Datatype::F32);
+ k.EnableInputWeightsType(WeightsType::F32);
+ k.EnableOutputDataType(Datatype::F32);
+ k.EnableInputLayout(DataLayout::bfyx);
+ k.EnableOutputLayout(DataLayout::yxfb);
+ k.EnableOutputLayout(DataLayout::bfyx);
+ k.EnableOutputLayout(DataLayout::byxf);
+ k.EnableTensorOffset();
+ k.EnableTensorPitches();
+ k.EnableBiasPerFeature();
+ k.EnableNonBiasTerm();
+ k.EnableMomentum();
+ k.EnableBatching();
+ k.EnableSplitSupport();
+ k.EnableGradient();
+ k.DisableTuning();
+ return k;
+ }
+
+ bool ConvolutionGradWeightsKernel3x3::Validate(const Params& p, const optional_params&) const
+ {
+ const auto& params = static_cast<const convolution_grad_weights_params&>(p);
+
+ if (params.stride.x != 1 || params.stride.y != 1)
+ return false;
+ if (params.filterSize.x != 3 || params.filterSize.y != 3)
+ return false;
+ return true;
+ }
+
+ ConvolutionGradWeightsKernelBase::DispatchData ConvolutionGradWeightsKernel3x3::SetDefault(const convolution_grad_weights_params& params) const
+ {
+ auto input_features = params.weights.IFM().v;
+ auto output_features = params.weights.OFM().v;
+
+ DispatchData kd;
+
+ kd.gws0 = Align(output_features, 16);
+ kd.gws1 = input_features;
+ kd.gws2 = 1;
+ kd.lws0 = std::min(std::max(kd.gws0, static_cast<size_t>(1)), static_cast<size_t>(32));
+ while (kd.gws0 % kd.lws0 != 0)
+ {
+ kd.lws0 -= 16;
+ }
+ kd.lws1 = 1;
+ kd.lws2 = 1;
+ kd.effiency = FORCE_PRIORITY_8;
+ return kd;
+ }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_3x3.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_3x3.h
new file mode 100644
index 000000000..39fcb7e96
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_3x3.h
@@ -0,0 +1,33 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "convolution_grad_weights_kernel_base.h"
+
+namespace kernel_selector {
+
+ class ConvolutionGradWeightsKernel3x3 : public ConvolutionGradWeightsKernelBase
+ {
+ public:
+ ConvolutionGradWeightsKernel3x3() : ConvolutionGradWeightsKernelBase("convolution_grad_weights_3x3") {}
+ virtual ~ConvolutionGradWeightsKernel3x3() {}
+
+ virtual DispatchData SetDefault(const convolution_grad_weights_params& params) const override;
+ virtual bool Validate(const Params& p, const optional_params& o) const override;
+ virtual ParamsKey GetSupportedKey() const override;
+ };
+} \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_7x7.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_7x7.cpp
new file mode 100644
index 000000000..8bd5000d6
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_7x7.cpp
@@ -0,0 +1,73 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "convolution_grad_weights_kernel_7x7.h"
+
+namespace kernel_selector
+{
+
+ ParamsKey ConvolutionGradWeightsKernel7x7::GetSupportedKey() const
+ {
+ ParamsKey k;
+ k.EnableInputDataType(Datatype::F32);
+ k.EnableInputWeightsType(WeightsType::F32);
+ k.EnableOutputDataType(Datatype::F32);
+ k.EnableInputLayout(DataLayout::bfyx);
+ k.EnableOutputLayout(DataLayout::yxfb);
+ k.EnableOutputLayout(DataLayout::bfyx);
+ k.EnableOutputLayout(DataLayout::byxf);
+ k.EnableTensorOffset();
+ k.EnableTensorPitches();
+ k.EnableBiasPerFeature();
+ k.EnableNonBiasTerm();
+ k.EnableMomentum();
+ k.EnableBatching();
+ k.EnableSplitSupport();
+ k.EnableGradient();
+ k.DisableTuning();
+ return k;
+ }
+
+ bool ConvolutionGradWeightsKernel7x7::Validate(const Params& p, const optional_params&) const
+ {
+ const auto& params = static_cast<const convolution_grad_weights_params&>(p);
+
+ if (params.filterSize.x != 7 || params.filterSize.y != 7)
+ return false;
+ return true;
+ }
+
+ ConvolutionGradWeightsKernelBase::DispatchData ConvolutionGradWeightsKernel7x7::SetDefault(const convolution_grad_weights_params& params) const
+ {
+ auto input_features = params.weights.IFM().v;
+ auto output_features = params.weights.OFM().v;
+
+ DispatchData kd;
+
+ kd.gws0 = 8;
+ kd.gws1 = Align(output_features, 16);
+ kd.gws2 = input_features;
+ kd.lws0 = 1;
+ kd.lws1 = std::min(std::max(kd.gws1, static_cast<size_t>(1)), static_cast<size_t>(32));
+ while (kd.gws1 % kd.lws1 != 0)
+ {
+ kd.lws1 -= 16;
+ }
+ kd.lws2 = 1;
+ kd.effiency = FORCE_PRIORITY_8;
+ return kd;
+ }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_7x7.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_7x7.h
new file mode 100644
index 000000000..286caf5c0
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_7x7.h
@@ -0,0 +1,33 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "convolution_grad_weights_kernel_base.h"
+
+namespace kernel_selector {
+
+ class ConvolutionGradWeightsKernel7x7 : public ConvolutionGradWeightsKernelBase
+ {
+ public:
+ ConvolutionGradWeightsKernel7x7() : ConvolutionGradWeightsKernelBase("convolution_grad_weights_7x7") {}
+ virtual ~ConvolutionGradWeightsKernel7x7() {}
+
+ virtual DispatchData SetDefault(const convolution_grad_weights_params& params) const override;
+ virtual bool Validate(const Params& p, const optional_params& o) const override;
+ virtual ParamsKey GetSupportedKey() const override;
+ };
+} \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_base.cpp
index 2998075b7..1e2cd30a8 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_base.cpp
@@ -124,7 +124,7 @@ namespace kernel_selector
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];
- FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point, ROUND_ROBIN, true, !orgParams.bias.empty());
+ FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, ROUND_ROBIN, true, !orgParams.bias.empty());
if (newParams.use_momentum)
{
kernel.arguments.push_back({ ArgumentDescriptor::Types::PREV_WEIGHTS_GRADIENT, 0 });
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_ref.cpp
index 7ca7c9a39..3c29a7616 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_ref.cpp
@@ -24,7 +24,6 @@ namespace kernel_selector
ParamsKey k;
k.EnableInputDataType(Datatype::F16);
k.EnableInputDataType(Datatype::F32);
- k.EnableInputWeightsType(WeightsType::F16);
k.EnableInputWeightsType(WeightsType::F32);
k.EnableOutputDataType(Datatype::F16);
k.EnableOutputDataType(Datatype::F32);
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_selector.cpp
index 16b842e9e..fb045a273 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_selector.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_selector.cpp
@@ -18,6 +18,8 @@
#include "convolution_grad_weights_kernel_ref.h"
#include "convolution_grad_weights_kernel_1x1.h"
#include "convolution_grad_weights_kernel_yxfb.h"
+#include "convolution_grad_weights_kernel_3x3.h"
+#include "convolution_grad_weights_kernel_7x7.h"
namespace kernel_selector
{
@@ -26,6 +28,8 @@ namespace kernel_selector
Attach<ConvolutionGradWeightsKernelRef>();
Attach<ConvolutionGradWeightsKernel1x1>();
Attach<ConvolutionGradWeightsKernel_yxfb>();
+ Attach<ConvolutionGradWeightsKernel3x3>();
+ Attach<ConvolutionGradWeightsKernel7x7>();
}
KernelsData convolution_grad_weights_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_yxfb.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_yxfb.cpp
index e6fb6d026..f53d51b39 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_yxfb.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_yxfb.cpp
@@ -24,7 +24,6 @@ namespace kernel_selector
ParamsKey k;
k.EnableInputDataType(Datatype::F32);
k.EnableInputWeightsType(WeightsType::F32);
- k.EnableOutputDataType(Datatype::F16);
k.EnableOutputDataType(Datatype::F32);
k.EnableInputLayout(DataLayout::yxfb);
k.EnableOutputLayout(DataLayout::yxfb);
@@ -64,11 +63,11 @@ namespace kernel_selector
DispatchData kd;
- kd.gws0 = 32;
+ kd.gws0 = 16;
kd.gws1 = input_features * output_features;
kd.gws2 = x * y;
- kd.lws0 = 32;
+ kd.lws0 = 16;
kd.lws1 = 1;
kd.lws2 = 1;
kd.effiency = FORCE_PRIORITY_7;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.cpp
index 5369d7ac0..cbc0bd780 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.cpp
@@ -57,6 +57,7 @@ namespace kernel_selector
MakeJitConstant("FILTER_ARRAY_NUM", dp.split),
MakeJitConstant("INPUT0_OFFSET_WITH_PADDING", input_offset_with_padding),
MakeJitConstant("DEPTHWISE_SEPARABLE_OPT", dp.depthwiseSeparableOpt),
+ MakeJitConstant("FUSED_ELTWISE", dp.fused_eltwise)
});
return jit;
@@ -114,13 +115,15 @@ namespace kernel_selector
return{};
}
- auto cldnn_jit = GetJitConstants(orgParams);
- auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
+ auto cldnn_jit = GetJitConstants(newParams);
+ auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];
- FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point, ROUND_ROBIN, true, !orgParams.bias.empty());
+ FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, ROUND_ROBIN, true, !newParams.bias.empty());
kernel.arguments.push_back({ ArgumentDescriptor::Types::SPLIT, 0 });
+ if (orgParams.fused_eltwise)
+ kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 1 });
kd.estimatedTime = runInfo.effiency;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.h
index ac5baec0f..206614a70 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.h
@@ -34,6 +34,7 @@ namespace kernel_selector
uSize padding;
uint32_t split = 1;
bool depthwiseSeparableOpt = false;
+ bool fused_eltwise = false;
virtual std::string to_string() const override;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.cpp
index 613fbb4f8..5feac0ca5 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.cpp
@@ -41,6 +41,22 @@ namespace kernel_selector
}
}
+ ParamsKey eltwise_params::GetParamsKey() const
+ {
+ ParamsKey k = base_params::GetParamsKey();
+ if (int8_quantization)
+ {
+ k.EnableInt8Quantization();
+ }
+
+ if (output_calibration)
+ {
+ k.EnableOutputCalibration();
+ }
+
+ return k;
+ }
+
bool EltwiseKernelBase::Validate(const Params& p, const optional_params& o) const
{
if (p.GetType() != KernelType::ELTWISE ||
@@ -56,7 +72,7 @@ namespace kernel_selector
return false;
}
- auto& operations = params.eltwiseParams.operations;
+ auto& operations = params.operations;
if (operations.size() == 0)
{
@@ -91,24 +107,24 @@ namespace kernel_selector
JitConstants jit = MakeBaseParamsJitConstants(params);
jit.AddConstants({
- MakeJitConstant("ELTWISE_LAYOUT_BASED", params.eltwiseParams.layoutBased),
- MakeJitConstant("QUANTIZATION_TERM", params.eltwiseParams.int8_quantization),
+ MakeJitConstant("ELTWISE_LAYOUT_BASED", params.layoutBased),
+ MakeJitConstant("QUANTIZATION_TERM", params.int8_quantization),
});
- if (params.eltwiseParams.int8_quantization)
+ if (params.int8_quantization)
{
- if (params.eltwiseParams.output_calibration)
+ if (params.output_calibration)
{
- jit.AddConstant(MakeJitConstant("CALIBRATION_TERM", params.eltwiseParams.output_calibration));
+ jit.AddConstant(MakeJitConstant("CALIBRATION_TERM", params.output_calibration));
jit.AddConstant(MakeJitConstant("O_QF", params.output_calibration_factors[0]));
}
else
- jit.AddConstants({ MakeJitConstant("O_QF", params.eltwiseParams.output_quantization_factor) });
+ jit.AddConstants({ MakeJitConstant("O_QF", params.output_quantization_factor) });
}
std::string inputs_decls, vload_decls;
- auto& updateInputs = params.eltwiseParams.updateInputIds;
+ auto& updateInputs = params.updateInputIds;
for (size_t i = 0; i < params.inputs.size(); i++)
{
@@ -143,8 +159,8 @@ namespace kernel_selector
std::string do_eltwise;
- auto& operations = params.eltwiseParams.operations;
- auto& coefficients = params.eltwiseParams.coefficients;
+ auto& operations = params.operations;
+ auto& coefficients = params.coefficients;
for (size_t op_num = 0; op_num < operations.size(); op_num++)
{
@@ -187,7 +203,7 @@ namespace kernel_selector
cast_type = "(MAKE_VECTOR_TYPE(UNIT_TYPE, 8))";
op = "const MAKE_VECTOR_TYPE(UNIT_TYPE, 8) tmp" + op_num_str + " = ";
}
- else if(params.eltwiseParams.int8_quantization)
+ else if(params.int8_quantization)
{
cast_type = "(int)";
op = "const int tmp" + op_num_str + " = ";
@@ -251,7 +267,7 @@ namespace kernel_selector
jit.AddConstant(MakeJitConstant("DO_ELTWISE", do_eltwise));
- if (params.eltwiseParams.layoutBased || params.eltwiseParams.int8_quantization)
+ if (params.layoutBased || params.int8_quantization)
{
jit.Merge(GetTensorFriendlyWorkGroupsJit(params.inputs[0]));
}
@@ -264,32 +280,27 @@ namespace kernel_selector
return GetJitConstantsCommon(params, false);
}
- KernelsData EltwiseKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options) const
+ EltwiseKernelBase::DispatchData EltwiseKernelBase::SetDefault(const eltwise_params& params) const
{
- if (!Validate(params, options))
- {
- return{};
- }
-
- KernelData kd = KernelData::Default<eltwise_params>(params);
- eltwise_params& newParams = *static_cast<eltwise_params*>(kd.params.get());
+ DispatchData kd;
- auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
- auto cldnn_jit = GetJitConstants(newParams);
- std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
-
- const auto& out = newParams.output;
- auto& kernel = kd.kernels[0];
- if (newParams.eltwiseParams.layoutBased || newParams.eltwiseParams.int8_quantization)
+ if (params.layoutBased || params.int8_quantization)
{
- kernel.workGroups.global = GetTensorFriendlyWorkGroups(newParams.inputs[0]);
+ auto global = GetTensorFriendlyWorkGroups(params.inputs[0]);
+ kd.gws0 = global[0];
+ kd.gws1 = global[1];
+ kd.gws2 = global[2];
}
- else if (CheckInputsOutputNoPitchSameDims(newParams))
+ else if (CheckInputsOutputNoPitchSameDims(params))
{
- kernel.workGroups.global = { newParams.inputs[0].LogicalSize(), 1, 1 };
+ kd.gws0 = params.inputs[0].LogicalSize();
+ kd.gws1 = 1;
+ kd.gws2 = 1;
}
else
{
+ const auto& out = params.output;
+
std::vector<size_t> gws;
for (const auto& o : out.GetDims())
{
@@ -301,11 +312,42 @@ namespace kernel_selector
gws.push_back(1U);
}
- kernel.workGroups.global = { gws[0], gws[1], gws[2] * gws[3] };
+ kd.gws0 = gws[0];
+ kd.gws1 = gws[1];
+ kd.gws2 = gws[2] * gws[3];
}
- kernel.workGroups.local = GetOptimalLocalWorkGroupSizes(kernel.workGroups.global);
- kernel.kernelString = GetKernelString(kernelName, jit, entry_point, ROUND_ROBIN);
- kernel.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(), false, false, newParams.eltwiseParams.int8_quantization, newParams.eltwiseParams.output_calibration);
+
+ auto local = GetOptimalLocalWorkGroupSizes( { kd.gws0, kd.gws1, kd.gws2 } );
+ kd.lws0 = local[0];
+ kd.lws1 = local[1];
+ kd.lws2 = local[2];
+
+ return kd;
+ }
+
+ KernelsData EltwiseKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options) const
+ {
+ if (!Validate(params, options))
+ {
+ return{};
+ }
+
+ KernelData kd = KernelData::Default<eltwise_params>(params);
+ eltwise_params& newParams = *static_cast<eltwise_params*>(kd.params.get());
+
+ auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
+ auto cldnn_jit = GetJitConstants(newParams);
+ std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
+
+ DispatchData runInfo = SetDefault(newParams);
+
+ auto& kernel = kd.kernels[0];
+
+ kernel.workGroups.global = { runInfo.gws0, runInfo.gws1, runInfo.gws2 };
+ kernel.workGroups.local = { runInfo.lws0, runInfo.lws1, runInfo.lws2 };
+
+ kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, ROUND_ROBIN);
+ kernel.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(), false, false, newParams.int8_quantization, newParams.output_calibration);
kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.h
index 9d7127822..161140849 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.h
@@ -25,7 +25,7 @@ namespace kernel_selector
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct eltwise_params : public base_params
{
- eltwise_params() : base_params(KernelType::ELTWISE), eltwiseParams() {}
+ eltwise_params() : base_params(KernelType::ELTWISE) {}
struct InputType
{
@@ -87,35 +87,17 @@ namespace kernel_selector
uint32_t tmpId;
};
- struct DedicatedParams
- {
- std::vector<eltwise_params::Node> operations;
- std::vector<float> coefficients;
- std::vector<UpdateInputData> updateInputIds;
- bool layoutBased = false;
- bool int8_quantization = false;
- bool output_calibration = false;
- float output_quantization_factor = 1.0f;
- };
-
- DedicatedParams eltwiseParams;
+ std::vector<eltwise_params::Node> operations;
+ std::vector<float> coefficients;
+ std::vector<UpdateInputData> updateInputIds;
+
+ bool layoutBased = false;
+ bool int8_quantization = false;
+ bool output_calibration = false;
+ float output_quantization_factor = 1.0f;
+
MultiDataTensor output_calibration_factors;
-
- virtual ParamsKey GetParamsKey() const
- {
- ParamsKey k = base_params::GetParamsKey();
- if (eltwiseParams.int8_quantization)
- {
- k.EnableInt8Quantization();
- }
-
- if (eltwiseParams.output_calibration)
- {
- k.EnableOutputCalibration();
- }
-
- return k;
- }
+ virtual ParamsKey GetParamsKey() const;
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -141,6 +123,7 @@ namespace kernel_selector
protected:
virtual bool Validate(const Params& p, const optional_params& o) const override;
virtual JitConstants GetJitConstants(const eltwise_params& params) const;
+ virtual DispatchData SetDefault(const eltwise_params& params) const;
KernelsData GetCommonKernelsData(const Params& params, const optional_params& options) const;
};
}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_bs_yx_bsv4_fsv32.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_bs_yx_bsv4_fsv32.cpp
new file mode 100644
index 000000000..571a013ce
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_bs_yx_bsv4_fsv32.cpp
@@ -0,0 +1,222 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "eltwise_kernel_fs_bs_yx_bsv4_fsv32.h"
+#include "kernel_selector_utils.h"
+
+namespace kernel_selector {
+
+ ParamsKey EltwiseKernel_fs_bs_yx_bsv4_fsv32::GetSupportedKey() const
+ {
+ ParamsKey k;
+ k.EnableInputDataType(Datatype::INT8);
+ k.EnableOutputDataType(Datatype::INT8);
+ k.EnableInputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
+ k.EnableOutputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
+ k.EnableTensorOffset();
+ k.EnableTensorPitches();
+ k.EnableBatching();
+ k.EnableInt8Quantization();
+ k.EnableOutputCalibration();
+ return k;
+ }
+
+ EltwiseKernelBase::DispatchData EltwiseKernel_fs_bs_yx_bsv4_fsv32::SetDefault(const eltwise_params& params) const
+ {
+ DispatchData kd;
+
+ kd.gws0 = params.output.X().v;
+ kd.gws1 = params.output.Y().v;
+ // we process 4 batches and 4 features per workitem
+ kd.gws2 = (params.output.Batch().v / 4) * (params.output.Feature().v / 4);
+ kd.lws0 = 1;
+ kd.lws1 = 1;
+ kd.lws2 = 8;
+
+ return kd;
+ }
+
+ JitConstants EltwiseKernel_fs_bs_yx_bsv4_fsv32::GetJitConstants(const eltwise_params& params) const
+ {
+ JitConstants jit = MakeBaseParamsJitConstants(params);
+
+ const size_t in_x_pitch = 32 * 4;
+ const size_t in_y_pitch = 32 * 4 * params.inputs[0].X().LogicalDimPadded();
+ const size_t in_b_block_pitch = in_y_pitch * params.inputs[0].Y().LogicalDimPadded();
+ const size_t in_f_block_pitch = in_b_block_pitch * ((params.inputs[0].Batch().v + 3) / 4);
+ const size_t in_offset = in_x_pitch * params.inputs[0].X().pad.before + in_y_pitch * params.inputs[0].Y().pad.before;
+
+ jit.AddConstant(MakeJitConstant("IN_X_PITCH", in_x_pitch));
+ jit.AddConstant(MakeJitConstant("IN_Y_PITCH", in_y_pitch));
+ jit.AddConstant(MakeJitConstant("IN_B_BLOCK_PITCH", in_b_block_pitch));
+ jit.AddConstant(MakeJitConstant("IN_F_BLOCK_PITCH", in_f_block_pitch));
+ jit.AddConstant(MakeJitConstant("IN_OFFSET", in_offset));
+
+ ///////////////
+ jit.AddConstants({
+ MakeJitConstant("ELTWISE_LAYOUT_BASED", params.layoutBased),
+ MakeJitConstant("QUANTIZATION_TERM", params.int8_quantization),
+ });
+
+ if (params.int8_quantization)
+ {
+ if (params.output_calibration)
+ {
+ jit.AddConstant(MakeJitConstant("CALIBRATION_TERM", params.output_calibration));
+ jit.AddConstant(MakeJitConstant("O_QF", params.output_calibration_factors[0]));
+
+ }
+ else
+ jit.AddConstants({ MakeJitConstant("O_QF", params.output_quantization_factor) });
+ }
+
+ std::string inputs_decls;
+ auto& updateInputs = params.updateInputIds;
+
+ for (size_t i = 0; i < params.inputs.size(); i++)
+ {
+ //const should be added only to inputs which will not be updated
+ std::string const_str = "const";
+ for (size_t update_input_idx = 0; update_input_idx < updateInputs.size(); update_input_idx++)
+ {
+ if (updateInputs[update_input_idx].inputId == i)
+ {
+ const_str = "";
+ break;
+ }
+ }
+
+ inputs_decls += const_str + " __global " + toCLType(params.inputs[i].GetDType()) + "* input" + std::to_string(i) + ", ";
+ }
+
+ jit.AddConstant(MakeJitConstant("INPUTS_DECLS", inputs_decls));
+ jit.AddConstant(MakeJitConstant("ELTWISE_NO_PITCH_SAME_DIMS", CheckInputsOutputNoPitchSameDims(params)));
+
+ std::string do_eltwise;
+
+ auto& operations = params.operations;
+ auto& coefficients = params.coefficients;
+
+ for (size_t op_num = 0; op_num < operations.size(); op_num++)
+ {
+ const std::string op_num_str = std::to_string(op_num);
+ const auto& ew = operations[op_num];
+
+ for (size_t input_idx = 0; input_idx < ew.inputs.size(); input_idx++)
+ {
+ const auto& input = ew.inputs[input_idx];
+ const std::string name = "INPUT_" + op_num_str + "_" + std::to_string(input_idx);
+ switch (input.mode)
+ {
+ case EltwiseInputMode::SCALAR:
+ jit.AddConstant(MakeJitConstant(name, input.scalar));
+ break;
+ case EltwiseInputMode::INPUT_BUFFER:
+ jit.AddConstant(MakeJitConstant(name, "GET_INPUT(input" + std::to_string(input.index) + ", INPUT" + std::to_string(input.index) + ")"));
+ break;
+ case EltwiseInputMode::OUTPUT_BUFFER:
+ jit.AddConstant(MakeJitConstant(name, "output[GET_INDEX(OUTPUT, )]"));
+ break;
+ case EltwiseInputMode::UNORDERED_ACCESS_INPUT_BUFFER:
+ jit.AddConstant(MakeJitConstant(name, "input" + std::to_string(input.index) + "[(size_t)tmp" + std::to_string(input.tmpIndex) + "]"));
+ break;
+ case EltwiseInputMode::INTERMEDIATE_RESULTS_INDEX:
+ jit.AddConstant(MakeJitConstant(name, "tmp" + std::to_string(input.tmpIndex)));
+ break;
+ default:
+ break;
+ }
+ }
+ std::string input0_str, input1_str, cast_type, op;
+
+ if (params.int8_quantization)
+ {
+ cast_type = "(int16)";
+ op = "const int16 tmp" + op_num_str + " = ";
+ }
+ else
+ {
+ cast_type = "(UNIT_TYPE)";
+ op = "const UNIT_TYPE tmp" + op_num_str + " = ";
+ }
+
+ input0_str = cast_type + "INPUT_" + op_num_str + "_0";
+ input1_str = cast_type + "INPUT_" + op_num_str + "_1";
+
+ if (ew.mode == EltwiseMode::ADD)
+ {
+ std::vector<std::string> coeff_strings(ew.inputs.size(), "");
+ for (size_t input_idx = 0; input_idx < ew.inputs.size(); input_idx++)
+ {
+ const auto& input = ew.inputs[input_idx];
+ if (input.mode == EltwiseInputMode::INPUT_BUFFER && input.index < coefficients.size())
+ {
+ const float c = coefficients[input.index];
+ if (c != 1.0f)
+ coeff_strings[input_idx] = cast_type + "(" + std::to_string(c) + ")*";
+ }
+ }
+
+ input0_str = coeff_strings[0] + input0_str;
+ input1_str = coeff_strings[1] + input1_str;
+ }
+
+
+ switch (ew.mode)
+ {
+ case EltwiseMode::ADD: op += input0_str + " + " + input1_str; break;
+ case EltwiseMode::SUB: op += input0_str + " - " + input1_str; break;
+ case EltwiseMode::MUL: op += input0_str + " * " + input1_str; break;
+ case EltwiseMode::DIV: op += input0_str + " / " + input1_str; break;
+ case EltwiseMode::MODULU: op += cast_type + "fmod(" + input0_str + ", " + input1_str + ")"; break;
+ case EltwiseMode::MIN: op += cast_type + "fmin(" + input0_str + ", " + input1_str + ")"; break;
+ case EltwiseMode::MAX: op += cast_type + "fmax(" + input0_str + ", " + input1_str + ")"; break;
+ case EltwiseMode::POW: op += cast_type + "pow(" + input0_str + ", " + input1_str + ")"; break;
+ case EltwiseMode::SQRT: op += cast_type + "sqrt(" + input0_str + ")"; break;
+ case EltwiseMode::RSQRT: op += cast_type + "1/sqrt(" + input0_str + ")"; break;
+ case EltwiseMode::ASSIGN: op += input0_str; break;
+ default:
+ break;
+ }
+
+ std::string opname = "OPERATION" + op_num_str;
+ jit.AddConstant(MakeJitConstant(opname, op));
+ do_eltwise += "\\\n\t" + opname + ";";
+ }
+
+ for (size_t update_input_idx = 0; update_input_idx < updateInputs.size(); update_input_idx++)
+ do_eltwise += "\\\n\tinput" + std::to_string(updateInputs[update_input_idx].inputId) +
+ "[GET_INDEX(INPUT, " + std::to_string(updateInputs[update_input_idx].inputId) +
+ ")] = tmp" + std::to_string(updateInputs[update_input_idx].tmpId) + ";";
+
+ do_eltwise += "\\\n\tres = tmp" + std::to_string(operations.size() - 1) + ";";
+
+ jit.AddConstant(MakeJitConstant("DO_ELTWISE", do_eltwise));
+
+ if (params.layoutBased || params.int8_quantization)
+ {
+ jit.Merge(GetTensorFriendlyWorkGroupsJit(params.inputs[0]));
+ }
+
+ ///////////////
+ return jit;
+ }
+
+ KernelsData EltwiseKernel_fs_bs_yx_bsv4_fsv32::GetKernelsData(const Params& params, const optional_params& options) const
+ {
+ return GetCommonKernelsData(params, options);
+ }
+} \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_bs_yx_bsv4_fsv32.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_bs_yx_bsv4_fsv32.h
new file mode 100644
index 000000000..b1fb3e950
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_bs_yx_bsv4_fsv32.h
@@ -0,0 +1,35 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "eltwise_kernel_base.h"
+
+namespace kernel_selector
+{
+ class EltwiseKernel_fs_bs_yx_bsv4_fsv32 : public EltwiseKernelBase
+ {
+ public:
+ EltwiseKernel_fs_bs_yx_bsv4_fsv32() : EltwiseKernelBase("eltwise_fs_bs_yx_bsv4_fsv32") {}
+ virtual ~EltwiseKernel_fs_bs_yx_bsv4_fsv32() {}
+
+ virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+ virtual ParamsKey GetSupportedKey() const override;
+ protected:
+ JitConstants GetJitConstants(const eltwise_params& params) const override;
+ virtual DispatchData SetDefault(const eltwise_params& params) const override;
+ };
+} \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.cpp
index 3840f463e..3a7776575 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.cpp
@@ -22,8 +22,16 @@ namespace kernel_selector {
ParamsKey EltwiseKernelRef::GetSupportedKey() const
{
ParamsKey k;
- k.EnableAllInputDataType();
- k.EnableAllOutputDataType();
+ k.EnableInputDataType(Datatype::F16);
+ k.EnableInputDataType(Datatype::F32);
+ k.EnableInputDataType(Datatype::INT8);
+ k.EnableInputDataType(Datatype::INT32);
+ k.EnableInputDataType(Datatype::INT64);
+ k.EnableOutputDataType(Datatype::F16);
+ k.EnableOutputDataType(Datatype::F32);
+ k.EnableOutputDataType(Datatype::INT8);
+ k.EnableOutputDataType(Datatype::INT32);
+ k.EnableOutputDataType(Datatype::INT64);
k.EnableDifferentTypes();
k.EnableAllInputLayout();
k.EnableAllOutputLayout();
@@ -35,6 +43,25 @@ namespace kernel_selector {
return k;
}
+ bool EltwiseKernelRef::Validate(const Params& p, const optional_params& o) const
+ {
+ if (!EltwiseKernelBase::Validate(p, o))
+ {
+ return false;
+ }
+
+ const eltwise_params& params = static_cast<const eltwise_params&>(p);
+ for (size_t i = 0; i < params.inputs.size(); i++)
+ {
+ if (params.inputs[i].GetLayout() == DataLayout::fs_bs_yx_bsv4_fsv32)
+ return false;
+ }
+ if (params.output.GetLayout() == DataLayout::fs_bs_yx_bsv4_fsv32)
+ return false;
+
+ return true;
+ }
+
KernelsData EltwiseKernelRef::GetKernelsData(const Params& params, const optional_params& options) const
{
return GetCommonKernelsData(params, options);
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.h
index 2249dc8c9..c2ccf054d 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.h
@@ -28,5 +28,8 @@ namespace kernel_selector
virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
virtual ParamsKey GetSupportedKey() const override;
+ protected:
+ bool Validate(const Params& p, const optional_params& o) const override;
+
};
} \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_selector.cpp
index d71deddfb..cf7565216 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_selector.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_selector.cpp
@@ -17,13 +17,15 @@
#include "eltwise_kernel_selector.h"
#include "eltwise_kernel_ref.h"
#include "eltwise_kernel_vload8.h"
-
+#include "eltwise_kernel_fs_bs_yx_bsv4_fsv32.h"
+
namespace kernel_selector
{
eltwise_kernel_selector::eltwise_kernel_selector()
{
Attach<EltwiseKernelRef>();
Attach<EltwiseKernel_vload8>();
+ Attach<EltwiseKernel_fs_bs_yx_bsv4_fsv32>();
}
KernelsData eltwise_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_vload8.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_vload8.cpp
index 934bc44cd..5ceb75084 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_vload8.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_vload8.cpp
@@ -45,6 +45,15 @@ namespace kernel_selector {
}
const auto& ewParams = static_cast<const eltwise_params&>(params);
+
+ for (size_t i = 0; i < ewParams.inputs.size(); i++)
+ {
+ if (ewParams.inputs[i].GetLayout() == DataLayout::fs_bs_yx_bsv4_fsv32)
+ return false;
+ }
+ if (ewParams.output.GetLayout() == DataLayout::fs_bs_yx_bsv4_fsv32)
+ return false;
+
const auto& output = ewParams.output;
const auto count = output.PhysicalSize();
@@ -62,16 +71,16 @@ namespace kernel_selector {
//TODO: add support to this implementation when user requests input values updates
bool bCheckUpdateInput = true;
- if (!ewParams.eltwiseParams.updateInputIds.empty())
+ if (!ewParams.updateInputIds.empty())
bCheckUpdateInput = false;
//TODO: add support for reading from output buffer and using its values in computation
bool bCheckUseOutput = true;
- for (size_t op = 0; op < ewParams.eltwiseParams.operations.size(); op++)
+ for (size_t op = 0; op < ewParams.operations.size(); op++)
{
- for (size_t input_idx = 0; input_idx < ewParams.eltwiseParams.operations[op].inputs.size(); input_idx++)
+ for (size_t input_idx = 0; input_idx < ewParams.operations[op].inputs.size(); input_idx++)
{
- if (ewParams.eltwiseParams.operations[op].inputs[input_idx].mode == EltwiseInputMode::OUTPUT_BUFFER)
+ if (ewParams.operations[op].inputs[input_idx].mode == EltwiseInputMode::OUTPUT_BUFFER)
{
bCheckUseOutput = false;
break;
@@ -114,7 +123,7 @@ namespace kernel_selector {
auto& kernel = kd.kernels[0];
kernel.workGroups.global = { std::max(newParams.inputs[0].LogicalSize()/8, (size_t)1), 1, 1 };
kernel.workGroups.local = GetOptimalLocalWorkGroupSizes(kernel.workGroups.global);
- kernel.kernelString = GetKernelString(kernelName, jit, entry_point, ROUND_ROBIN);
+ kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, ROUND_ROBIN);
kernel.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(), false, false);
kd.estimatedTime = FORCE_PRIORITY_8;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embed/embed_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embed/embed_kernel_ref.cpp
index d4c9b126c..f126daa94 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embed/embed_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embed/embed_kernel_ref.cpp
@@ -103,7 +103,7 @@ namespace kernel_selector
auto& kernel = kd.kernels[0];
- FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point, ROUND_ROBIN, true, !newParams.bias.empty());
+ FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, ROUND_ROBIN, true, !newParams.bias.empty());
kd.estimatedTime = runInfo.effiency;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_base.cpp
index 6702ebc69..20e6e8dca 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_base.cpp
@@ -49,7 +49,7 @@ namespace kernel_selector
std::unique_ptr<FullyConnectedKernelBase::DispatchData> FullyConnectedKernelBase::SetDefault(const fully_connected_params& params) const
{
- std::unique_ptr<DispatchData> dispatchData = std::make_unique<DispatchData>();
+ std::unique_ptr<DispatchData> dispatchData = std::unique_ptr<DispatchData>(new DispatchData());
dispatchData->fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
// Determine global work sizes.
@@ -122,10 +122,10 @@ namespace kernel_selector
std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];
- FillCLKernelData(kernel, *runInfo.get(), kernelName, jit, entry_point, ROUND_ROBIN, true, !orgParams.bias.empty(), 1, newParams.int8_quantization, newParams.output_calibration);
+ FillCLKernelData(kernel, *runInfo.get(), params.engineInfo, kernelName, jit, entry_point, ROUND_ROBIN, true, !orgParams.bias.empty(), 1, newParams.int8_quantization, newParams.output_calibration);
kd.estimatedTime = estimated_time;
kd.autoTuneIndex = -1;
return{ kd };
}
-} \ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_b1.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_b1.cpp
index 0f836133b..b98b528a8 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_b1.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_b1.cpp
@@ -60,7 +60,7 @@ namespace kernel_selector
std::unique_ptr<FullyConnected_bs_f_bsv16_b1::FullyConnectedKernelBase::DispatchData> FullyConnected_bs_f_bsv16_b1::SetDefault(const fully_connected_params& arg) const
{
- auto run_info = std::make_unique<DispatchData>(*FullyConnectedKernelBase::SetDefault(arg).get());
+ auto run_info = std::unique_ptr<DispatchData>(new DispatchData(*FullyConnectedKernelBase::SetDefault(arg)));
// Properties of chunk and unit.
const char* chunk_type = "uint";
@@ -100,4 +100,4 @@ namespace kernel_selector
{
return GetCommonKernelsData(params, optParams, DataLayout::bf, {WeightsLayout::os_i_osv16}, FORCE_PRIORITY_5);
}
-} \ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_block.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_block.cpp
index 1ceb3ebc9..b32c8a54e 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_block.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_block.cpp
@@ -37,7 +37,7 @@ namespace kernel_selector
std::unique_ptr<FullyConnected_fb_io_block::FullyConnectedKernelBase::DispatchData> FullyConnected_fb_io_block::SetDefault(const fully_connected_params& arg) const
{
- auto kd = std::make_unique<DispatchData>(*FullyConnectedKernelBase::SetDefault(arg).get());
+ auto kd = std::unique_ptr<DispatchData>(new DispatchData(*FullyConnectedKernelBase::SetDefault(arg)));
const auto& output = arg.output;
auto batch_size = output.Batch().v;
@@ -146,4 +146,4 @@ namespace kernel_selector
// return GetCommonKernelsData(params, optParams, DataLayout::fb, WeightsLayout::io, estimated_time);
return GetCommonKernelsData(params, optParams, DataLayout::yxfb, { WeightsLayout::yxio }, estimated_time);
}
-} \ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad_batched.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad_batched.cpp
new file mode 100644
index 000000000..46e4dea8d
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad_batched.cpp
@@ -0,0 +1,117 @@
+/*
+// Copyright (c) 2016 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "fully_connected_kernel_mmad_batched.h"
+#include "kernel_selector_utils.h"
+
+namespace kernel_selector
+{
+ ParamsKey FullyConnected_mmad_batched::GetSupportedKey() const
+ {
+ ParamsKey k;
+ k.EnableInputDataType(Datatype::INT8);
+ k.EnableOutputDataType(Datatype::INT8);
+ k.EnableInputWeightsType(WeightsType::INT8);
+ k.EnableInputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
+ k.EnableOutputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
+ k.EnableOutputLayout(DataLayout::bf);
+ k.EnableBiasPerOutput();
+ k.EnableBiasPerFeature();
+ k.EnableNonBiasTerm();
+ k.EnableTensorOffset();
+ k.EnableTensorPitches();
+ k.EnableBatching();
+ k.EnableInt8Quantization();
+ k.EnableOutputCalibration();
+ return k;
+ }
+
+ bool FullyConnected_mmad_batched::Validate(const Params& p, const optional_params& o) const
+ {
+ if (!FullyConnectedKernelBase::Validate(p, o))
+ {
+ return false;
+ }
+
+ const auto& params = static_cast<const fully_connected_params&>(p);
+
+ // we do not support padded input
+ if (params.inputs[0].X().pad.Total() != 0 || params.inputs[0].Y().pad.Total() != 0)
+ return false;
+
+ size_t batch = params.inputs[0].Batch().v;
+ // batch must be a multiple of 8
+ if (batch % 8 != 0)
+ {
+ return false;
+ }
+
+ return true;
+ }
+
+ JitConstants FullyConnected_mmad_batched::GetJitConstants(const fully_connected_params& params, const DispatchData& runInfo) const
+ {
+ auto jit = Parent::GetJitConstants(params, runInfo);
+
+ jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", runInfo.lws1));
+
+ // pitch for special block format used in this kernel
+ const size_t ifm_32_aligned = Align(params.weights.IFM().v, 32);
+ const size_t filter_ofm_block_pitch = (ifm_32_aligned / 32) * params.weights.X().v * params.weights.Y().v * 4 * 8 * 8;
+ jit.AddConstant(MakeJitConstant("FILTER_OFM_BLOCK_PITCH", filter_ofm_block_pitch));
+
+ const size_t in_x_pitch = 32 * 4;
+ const size_t in_y_pitch = 32 * 4 * params.inputs[0].X().LogicalDimPadded();
+ const size_t in_b_block_pitch = in_y_pitch * params.inputs[0].Y().LogicalDimPadded();
+ const size_t in_f_block_pitch = in_b_block_pitch * ((params.inputs[0].Batch().v + 3) / 4);
+ const size_t in_offset = in_x_pitch * params.inputs[0].X().pad.before + in_y_pitch * params.inputs[0].Y().pad.before;
+
+ jit.AddConstant(MakeJitConstant("IN_X_PITCH", in_x_pitch));
+ jit.AddConstant(MakeJitConstant("IN_Y_PITCH", in_y_pitch));
+ jit.AddConstant(MakeJitConstant("IN_B_BLOCK_PITCH", in_b_block_pitch));
+ jit.AddConstant(MakeJitConstant("IN_F_BLOCK_PITCH", in_f_block_pitch));
+ jit.AddConstant(MakeJitConstant("IN_OFFSET", in_offset));
+
+ return jit;
+ }
+
+ std::unique_ptr<FullyConnected_mmad_batched::Parent::DispatchData> FullyConnected_mmad_batched::SetDefault(const fully_connected_params& params) const
+ {
+ auto runInfo = Parent::SetDefault(params);
+
+ constexpr size_t sub_group_size = 8;
+
+ const auto of_maps = params.output.Feature().v;
+ const size_t of_threads_per_batch = RoundUp(of_maps, sub_group_size);
+
+ runInfo->gws0 = params.output.Batch().v / 8; // we process 8 batches in a single WG
+ runInfo->gws1 = of_threads_per_batch;
+ runInfo->gws2 = 1;
+
+ runInfo->lws0 = 1;
+ runInfo->lws1 = sub_group_size;
+ runInfo->lws2 = 1;
+
+ runInfo->effiency = FORCE_PRIORITY_1;
+ return std::move(runInfo);
+ }
+
+ KernelsData FullyConnected_mmad_batched::GetKernelsData(const Params& params, const optional_params& options) const
+ {
+ return GetCommonKernelsData(params, options, DataLayout::fs_bs_yx_bsv4_fsv32,
+ { WeightsLayout::os_is_yx_isa8_osv8_isv4 }, FORCE_PRIORITY_1);
+ }
+} \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad_batched.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad_batched.h
new file mode 100644
index 000000000..61af89f19
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad_batched.h
@@ -0,0 +1,38 @@
+/*
+// Copyright (c) 2016 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "fully_connected_kernel_base.h"
+
+namespace kernel_selector {
+
+ class FullyConnected_mmad_batched : public FullyConnectedKernelBase
+ {
+ public:
+ using Parent = FullyConnectedKernelBase;
+
+ FullyConnected_mmad_batched() : Parent("fully_connected_gpu_mmad_batched") {}
+
+ KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+ ParamsKey GetSupportedKey() const override;
+
+ protected:
+ bool Validate(const Params& p, const optional_params& o) const override;
+ JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& kd) const override;
+ std::unique_ptr<DispatchData> SetDefault(const fully_connected_params& params) const override;
+ };
+} \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_selector.cpp
index d7c1a1a85..529e1ca33 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_selector.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_selector.cpp
@@ -30,6 +30,7 @@
#include "fully_connected_kernel_bf_io_input_spatial.h"
#include "fully_connected_kernel_image_tutorial.h"
#include "fully_connected_kernel_MMAD.h"
+#include "fully_connected_kernel_mmad_batched.h"
namespace kernel_selector {
@@ -49,6 +50,7 @@ namespace kernel_selector {
Attach<FullyConnected_fb_io_b8_f8>();
Attach<FullyConnected_bf_io_input_spatial>();
Attach<FullyConnectedKernelMMAD>();
+ Attach<FullyConnected_mmad_batched>();
}
KernelsData fully_connected_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_input/fully_connected_grad_input_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_input/fully_connected_grad_input_kernel_base.cpp
index 94d0b4751..e40848af7 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_input/fully_connected_grad_input_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_input/fully_connected_grad_input_kernel_base.cpp
@@ -80,7 +80,7 @@ namespace kernel_selector
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];
- FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point, ROUND_ROBIN, true, !orgParams.bias.empty());
+ FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, ROUND_ROBIN, true, !orgParams.bias.empty());
kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 1 });
kd.estimatedTime = runInfo.effiency;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_weights/fully_connected_grad_weights_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_weights/fully_connected_grad_weights_kernel_base.cpp
index 55d8bdf88..67328ac99 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_weights/fully_connected_grad_weights_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_weights/fully_connected_grad_weights_kernel_base.cpp
@@ -82,7 +82,7 @@ namespace kernel_selector
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];
- FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point, ROUND_ROBIN, true, !orgParams.bias.empty());
+ FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, ROUND_ROBIN, true, !orgParams.bias.empty());
if (orgParams.use_momentum)
{
kernel.arguments.push_back({ ArgumentDescriptor::Types::PREV_WEIGHTS_GRADIENT, 0 });
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_weights/fully_connected_grad_weights_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_weights/fully_connected_grad_weights_kernel_ref.cpp
index 9ec0f8e17..bb8380457 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_weights/fully_connected_grad_weights_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_weights/fully_connected_grad_weights_kernel_ref.cpp
@@ -24,7 +24,6 @@ namespace kernel_selector
ParamsKey k;
k.EnableInputDataType(Datatype::F16);
k.EnableInputDataType(Datatype::F32);
- k.EnableInputWeightsType(WeightsType::F16);
k.EnableInputWeightsType(WeightsType::F32);
k.EnableOutputDataType(Datatype::F16);
k.EnableOutputDataType(Datatype::F32);
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_base.cpp
new file mode 100644
index 000000000..12af8a1c5
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_base.cpp
@@ -0,0 +1,98 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "gemm_kernel_base.h"
+
+#include "kernel_selector_utils.h"
+
+
+namespace kernel_selector
+{
+ JitConstants GemmKernelBase::GetJitConstants(const gemm_params& params) const
+ {
+ JitConstants jit = MakeBaseParamsJitConstants(params);
+
+ jit.AddConstants({
+ MakeJitConstant("X1", params.inputs[0].X().v),
+ MakeJitConstant("Y1", params.inputs[0].Y().v),
+ MakeJitConstant("X2", params.inputs[1].X().v),
+ MakeJitConstant("Y2", params.inputs[1].Y().v),
+ MakeJitConstant("ALPHA", params.alpha),
+ MakeJitConstant("BETA", params.beta),
+ MakeJitConstant("TRANSPOSE_INPUT1", params.transpose_input1),
+ MakeJitConstant("TRANSPOSE_INPUT2", params.transpose_input2),
+ });
+
+ if (params.inputs.size() > 2)
+ {
+ jit.AddConstants({MakeJitConstant("OUT_BIAS_TERM", true),});
+ }
+ else
+ jit.AddConstants({ MakeJitConstant("OUT_BIAS_TERM", false)});
+
+ return jit;
+ }
+
+ GemmKernelBase::DispatchData GemmKernelBase::SetDefault(const gemm_params& params) const
+ {
+ const auto& output = params.output;
+
+ DispatchData kd;
+
+ kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
+ std::vector<size_t> global{ params.inputs[0].Y().v, params.inputs[1].X().v, output.Batch().v };
+
+ if (params.transpose_input1 && params.transpose_input2)
+ global ={ params.inputs[0].X().v, params.inputs[1].Y().v, output.Batch().v };
+ else if(params.transpose_input1)
+ global = { params.inputs[0].X().v, params.inputs[1].X().v, output.Batch().v };
+ else if (params.transpose_input2)
+ global = { params.inputs[0].Y().v, params.inputs[1].Y().v, output.Batch().v };
+
+ const auto& local = GetOptimalLocalWorkGroupSizes(global);
+
+ kd.gws0 = global[0];
+ kd.gws1 = global[1];
+ kd.gws2 = global[2];
+
+ kd.lws0 = local[0];
+ kd.lws1 = local[1];
+ kd.lws2 = local[2];
+
+ return kd;
+ }
+
+ KernelsData GemmKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options, float estimated_time) const
+ {
+ assert(params.GetType() == KernelType::GEMM);
+
+ const auto& prim_params = static_cast<const gemm_params&>(params);
+
+ auto run_info = SetDefault(prim_params);
+ KernelData k_data = KernelData::Default<gemm_params>(params);
+
+ auto cldnn_jit = GetJitConstants(prim_params);
+ auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, options);
+ auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
+
+ auto& kernel = k_data.kernels[0];
+ FillCLKernelData(kernel, run_info, params.engineInfo, kernelName, jit, entry_point, ROUND_ROBIN, false, false, (uint32_t)prim_params.inputs.size());
+
+ k_data.estimatedTime = estimated_time;
+
+ return { k_data };
+ }
+} \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_base.h
new file mode 100644
index 000000000..643a0bec7
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_base.h
@@ -0,0 +1,69 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "common_kernel_base.h"
+#include "kernel_selector_params.h"
+
+
+namespace kernel_selector
+{
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ // gemm_params
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ struct gemm_params : public base_params
+ {
+ gemm_params() :
+ base_params(KernelType::GEMM),
+ alpha(1.0f),
+ beta(0.0f),
+ transpose_input1(false),
+ transpose_input2(false)
+ {}
+
+ float alpha;
+ float beta;
+ bool transpose_input1;
+ bool transpose_input2;
+
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ // gemm_optional_params
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ struct gemm_optional_params : optional_params
+ {
+ gemm_optional_params()
+ : optional_params(KernelType::GEMM)
+ {
+ }
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ // BorderKernelBase
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ class GemmKernelBase : public common_kernel_base
+ {
+ public:
+ using common_kernel_base::common_kernel_base;
+
+ using DispatchData = CommonDispatchData;
+
+ protected:
+ JitConstants GetJitConstants(const gemm_params& params) const;
+ DispatchData SetDefault(const gemm_params& params) const;
+ KernelsData GetCommonKernelsData(const Params& params, const optional_params&, float estimated_time) const;
+ };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_ref.cpp
new file mode 100644
index 000000000..585d9d90d
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_ref.cpp
@@ -0,0 +1,41 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "gemm_kernel_ref.h"
+
+namespace kernel_selector
+{
+ ParamsKey GemmKernelRef::GetSupportedKey() const
+ {
+ ParamsKey k;
+
+ k.EnableInputDataType(Datatype::F16);
+ k.EnableInputDataType(Datatype::F32);
+ k.EnableOutputDataType(Datatype::F32);
+ k.EnableOutputDataType(Datatype::F16);
+ k.EnableInputLayout(DataLayout::bfyx);
+ k.EnableOutputLayout(DataLayout::bfyx);
+
+ k.EnableBatching();
+
+ return k;
+ }
+
+ KernelsData GemmKernelRef::GetKernelsData(const Params& params, const optional_params& options) const
+ {
+ return GetCommonKernelsData(params, options, FORCE_PRIORITY_9);
+ }
+} \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_ref.h
new file mode 100644
index 000000000..89727597d
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_ref.h
@@ -0,0 +1,30 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "gemm_kernel_base.h"
+
+
+namespace kernel_selector
+{
+ class GemmKernelRef : public GemmKernelBase
+ {
+ public:
+ GemmKernelRef() : GemmKernelBase("gemm_ref") {}
+
+ KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+ ParamsKey GetSupportedKey() const override;
+ };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_selector.cpp
new file mode 100644
index 000000000..a31f3cb9f
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_selector.cpp
@@ -0,0 +1,31 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "gemm_kernel_selector.h"
+#include "gemm_kernel_ref.h"
+
+namespace kernel_selector
+{
+ gemm_kernel_selector::gemm_kernel_selector()
+ {
+ Attach<GemmKernelRef>();
+ }
+
+ KernelsData gemm_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const
+ {
+ return GetNaiveBestKernel(params, options, KernelType::GEMM);
+ }
+} \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_selector.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_selector.h
new file mode 100644
index 000000000..7a7896afd
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_selector.h
@@ -0,0 +1,35 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "kernel_selector.h"
+
+namespace kernel_selector
+{
+ class gemm_kernel_selector : public kernel_selector_base
+ {
+ public:
+ static gemm_kernel_selector &Instance() {
+ static gemm_kernel_selector instance;
+ return instance;
+ }
+
+ gemm_kernel_selector();
+
+ KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
+ };
+} \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_base.cpp
new file mode 100644
index 000000000..c0dc0851c
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_base.cpp
@@ -0,0 +1,86 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "index_select_kernel_base.h"
+
+#include "kernel_selector_utils.h"
+
+
+namespace kernel_selector
+{
+ JitConstants IndexSelectKernelBase::GetJitConstants(const index_select_params& params)
+ {
+ JitConstants jit = MakeBaseParamsJitConstants(params);
+
+ jit.AddConstant(MakeJitConstant(toString(params.axis), ""));
+
+ return jit;
+ }
+
+ IndexSelectKernelBase::DispatchData IndexSelectKernelBase::SetDefault(const index_select_params& params)
+ {
+ const auto& output = params.output;
+ const auto& indices = params.inputs.at(1);
+ DispatchData kd;
+
+ kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
+
+ std::vector<size_t> global;
+ if (params.axis == IndexSelectAxis::BATCH)
+ {
+ global = { 1, indices.X().v, output.Feature().v };
+ }
+ else if (params.axis == IndexSelectAxis::X || params.axis == IndexSelectAxis::Y)
+ {
+ global = { output.Batch().v, indices.X().v, output.Feature().v };
+ }
+ else if(params.axis == IndexSelectAxis::FEATURE)
+ {
+ global = { output.Batch().v, indices.X().v, output.Y().v };
+ }
+ const auto& local = GetOptimalLocalWorkGroupSizes(global);
+
+ kd.gws0 = global[0];
+ kd.gws1 = global[1];
+ kd.gws2 = global[2];
+
+ kd.lws0 = local[0];
+ kd.lws1 = local[1];
+ kd.lws2 = local[2];
+
+ return kd;
+ }
+
+ KernelsData IndexSelectKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options, float estimated_time) const
+ {
+ assert(params.GetType() == KernelType::INDEX_SELECT);
+
+ const auto& prim_params = static_cast<const index_select_params&>(params); // NOLINT(cppcoreguidelines-pro-type-static-cast-downcast)
+
+ auto run_info = SetDefault(prim_params);
+ KernelData k_data = KernelData::Default<index_select_params>(params);
+
+ auto cldnn_jit = GetJitConstants(prim_params);
+ auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, options);
+ auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
+
+ auto& kernel = k_data.kernels[0];
+ FillCLKernelData(kernel, run_info, params.engineInfo, kernelName, jit, entry_point, ROUND_ROBIN, false, false, (uint32_t)prim_params.inputs.size());
+
+ k_data.estimatedTime = estimated_time;
+
+ return {k_data};
+ }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_base.h
new file mode 100644
index 000000000..c7abe43bc
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_base.h
@@ -0,0 +1,61 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "common_kernel_base.h"
+#include "kernel_selector_params.h"
+
+
+namespace kernel_selector
+{
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ // index_select_params
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ struct index_select_params : public base_params
+ {
+ index_select_params()
+ : base_params(KernelType::INDEX_SELECT)
+ {}
+
+ IndexSelectAxis axis = IndexSelectAxis::BATCH;
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ // index_select_optional_params
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ struct index_select_optional_params : optional_params
+ {
+ index_select_optional_params()
+ : optional_params(KernelType::INDEX_SELECT)
+ {}
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ // IndexSelectKernelBase
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ class IndexSelectKernelBase : public common_kernel_base
+ {
+ public:
+ using common_kernel_base::common_kernel_base;
+ virtual ~IndexSelectKernelBase() {}
+
+ using DispatchData = CommonDispatchData;
+
+ protected:
+ static JitConstants GetJitConstants(const index_select_params& params);
+ static DispatchData SetDefault(const index_select_params& params);
+ KernelsData GetCommonKernelsData(const Params& params, const optional_params&, float estimated_time) const;
+ };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_ref.cpp
new file mode 100644
index 000000000..b5ab92dc1
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_ref.cpp
@@ -0,0 +1,58 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "index_select_kernel_ref.h"
+
+
+namespace kernel_selector
+{
+ ParamsKey IndexSelectKernelRef::GetSupportedKey() const
+ {
+ ParamsKey k;
+
+ k.EnableInputDataType(Datatype::F16);
+ k.EnableInputDataType(Datatype::F32);
+ k.EnableInputDataType(Datatype::INT8);
+ k.EnableInputDataType(Datatype::UINT8);
+ k.EnableInputDataType(Datatype::INT32);
+
+ k.EnableOutputDataType(Datatype::F32);
+ k.EnableOutputDataType(Datatype::F16);
+ k.EnableOutputDataType(Datatype::INT8);
+ k.EnableOutputDataType(Datatype::UINT8);
+ k.EnableOutputDataType(Datatype::INT32);
+
+ k.EnableInputLayout(DataLayout::bfyx);
+ k.EnableInputLayout(DataLayout::yxfb);
+
+ k.EnableOutputLayout(DataLayout::bfyx);
+ k.EnableOutputLayout(DataLayout::yxfb);
+
+ k.EnableBatching();
+
+ k.EnableIndexSelectAxis(IndexSelectAxis::BATCH);
+ k.EnableIndexSelectAxis(IndexSelectAxis::FEATURE);
+ k.EnableIndexSelectAxis(IndexSelectAxis::Y);
+ k.EnableIndexSelectAxis(IndexSelectAxis::X);
+
+ k.EnableDifferentTypes();
+
+ return k;
+ }
+
+ KernelsData IndexSelectKernelRef::GetKernelsData(const Params& params, const optional_params& options) const
+ {
+ return GetCommonKernelsData(params, options, FORCE_PRIORITY_9);
+ }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_ref.h
new file mode 100644
index 000000000..3dd16198f
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_ref.h
@@ -0,0 +1,30 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "index_select_kernel_base.h"
+
+
+namespace kernel_selector
+{
+ class IndexSelectKernelRef : public IndexSelectKernelBase
+ {
+ public:
+ IndexSelectKernelRef() : IndexSelectKernelBase("index_select_gpu_ref") {}
+
+ KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+ ParamsKey GetSupportedKey() const override;
+ };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_selector.cpp
new file mode 100644
index 000000000..3d1693046
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_selector.cpp
@@ -0,0 +1,30 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "index_select_kernel_selector.h"
+#include "index_select_kernel_ref.h"
+
+namespace kernel_selector
+{
+ index_select_kernel_selector::index_select_kernel_selector()
+ {
+ Attach<IndexSelectKernelRef>();
+ }
+
+ KernelsData index_select_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const
+ {
+ return GetNaiveBestKernel(params, options, KernelType::INDEX_SELECT);
+ }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_selector.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_selector.h
new file mode 100644
index 000000000..21363f9f5
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_selector.h
@@ -0,0 +1,34 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "kernel_selector.h"
+
+
+namespace kernel_selector
+{
+ class index_select_kernel_selector : public kernel_selector_base
+ {
+ public:
+ static index_select_kernel_selector &Instance() {
+ static index_select_kernel_selector instance;
+ return instance;
+ }
+
+ index_select_kernel_selector();
+
+ KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
+ };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lookup_table/lookup_table_kernel_axis.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lookup_table/lookup_table_kernel_axis.cpp
index 8176f002b..af6737941 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lookup_table/lookup_table_kernel_axis.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lookup_table/lookup_table_kernel_axis.cpp
@@ -86,7 +86,7 @@ namespace kernel_selector
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];
- FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point, "", false, false, 2);
+ FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, "", false, false, 2);
kd.estimatedTime = FORCE_PRIORITY_9;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lookup_table/lookup_table_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lookup_table/lookup_table_kernel_base.cpp
index de31047d1..bb3f20f7f 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lookup_table/lookup_table_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lookup_table/lookup_table_kernel_base.cpp
@@ -81,7 +81,7 @@ namespace kernel_selector
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];
- FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point, "", false, false, 2);
+ FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, "", false, false, 2);
kd.estimatedTime = estimatedTime;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_base.cpp
index 4907d9992..9165ea692 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_base.cpp
@@ -103,7 +103,7 @@ namespace kernel_selector
auto jit = CreateJit(kernelName, cldnnJit, entryPoint);
auto& kernel = kd.kernels[0];
- FillCLKernelData(kernel, runInfo, kernelName, jit, entryPoint);
+ FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entryPoint);
kd.estimatedTime = estimatedTime;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_elt_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_elt_kernel_base.cpp
index a74b21e27..6170abd46 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_elt_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_elt_kernel_base.cpp
@@ -77,7 +77,7 @@ namespace kernel_selector
auto jit = CreateJit(kernelName, cldnnJit, entryPoint);
kernel.workGroups.global = { out.X().v, out.Batch().v, 1 };
- kernel.kernelString = GetKernelString(kernelName, jit, entryPoint);
+ kernel.kernelString = GetKernelString(kernelName, jit, entryPoint, params.engineInfo);
kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 0 });
kernel.arguments.push_back({ ArgumentDescriptor::Types::OUTPUT, 0 });
if (orgParams.has_cell) {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_elt_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_elt_kernel_ref.cpp
index 6afb8504c..a068f9ae4 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_elt_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_elt_kernel_ref.cpp
@@ -22,8 +22,10 @@ namespace kernel_selector {
ParamsKey LSTMEltKernelRef::GetSupportedKey() const
{
ParamsKey k;
- k.EnableAllInputDataType();
- k.EnableAllOutputDataType();
+ k.EnableInputDataType(Datatype::F16);
+ k.EnableInputDataType(Datatype::F32);
+ k.EnableOutputDataType(Datatype::F16);
+ k.EnableOutputDataType(Datatype::F32);
k.EnableDifferentTypes();
k.EnableAllInputLayout();
k.EnableAllOutputLayout();
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_base.cpp
index 6d2c9bcf0..703008546 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_base.cpp
@@ -35,6 +35,7 @@ namespace kernel_selector
}
jit.AddConstants({ MakeJitConstant("WEIGHTS", weights)});
+ jit.AddConstants({ MakeJitConstant("DIRECTION", params.direction)});
return jit;
}
@@ -64,7 +65,7 @@ namespace kernel_selector
auto jit = CreateJit(kernelName, cldnnJit, entryPoint);
kernel.workGroups.global = { out.X().v, out.Batch().v, 1 };
- kernel.kernelString = GetKernelString(kernelName, jit, entryPoint);
+ kernel.kernelString = GetKernelString(kernelName, jit, entryPoint, params.engineInfo);
kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 0 });
kernel.arguments.push_back({ ArgumentDescriptor::Types::OUTPUT, 0 });
kernel.arguments.push_back({ ArgumentDescriptor::Types::WEIGHTS, 0 });
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_base.h
index 6fd517586..e766120e0 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_base.h
@@ -34,6 +34,7 @@ namespace kernel_selector
DataTensor hidden;
bool hasBias = false;
bool hasHidden = false;
+ uint32_t direction = 0;
void SetBias(const DataTensor& v) {
bias = v;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_ref.cpp
index 167afcb5f..6484dd951 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_ref.cpp
@@ -22,8 +22,10 @@ namespace kernel_selector {
ParamsKey LSTMGemmKernelRef::GetSupportedKey() const
{
ParamsKey k;
- k.EnableAllInputDataType();
- k.EnableAllOutputDataType();
+ k.EnableInputDataType(Datatype::F16);
+ k.EnableInputDataType(Datatype::F32);
+ k.EnableOutputDataType(Datatype::F16);
+ k.EnableOutputDataType(Datatype::F32);
k.EnableDifferentTypes();
k.EnableAllInputLayout();
k.EnableAllOutputLayout();
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/max_unpooling/max_unpooling_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/max_unpooling/max_unpooling_kernel_base.cpp
index faea3ea6e..d6e036f40 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/max_unpooling/max_unpooling_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/max_unpooling/max_unpooling_kernel_base.cpp
@@ -91,7 +91,7 @@ namespace kernel_selector
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];
- FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point);
+ FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 1 });
kd.estimatedTime = estimatedTime;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_base.cpp
index 23de0ff1a..4775a41b7 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_base.cpp
@@ -82,7 +82,7 @@ namespace kernel_selector
auto jit = CreateJit(finalKernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];
- FillCLKernelData(kernel, runInfo, finalKernelName, jit, entry_point);
+ FillCLKernelData(kernel, runInfo, params.engineInfo, finalKernelName, jit, entry_point);
kd.estimatedTime = estimated_time;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/normalize/normalize_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/normalize/normalize_kernel_base.cpp
index a50849125..b4e4c04c7 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/normalize/normalize_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/normalize/normalize_kernel_base.cpp
@@ -82,7 +82,7 @@ namespace kernel_selector
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];
- FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point);
+ FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
kernel.arguments.push_back({ ArgumentDescriptor::Types::SCALE_TABLE, 0 });
kd.estimatedTime = estimated_time;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/permute/permute_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/permute/permute_kernel_ref.cpp
index 057c4e655..ca6977952 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/permute/permute_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/permute/permute_kernel_ref.cpp
@@ -24,8 +24,14 @@ namespace kernel_selector
ParamsKey k;
k.EnableInputDataType(Datatype::F16);
k.EnableInputDataType(Datatype::F32);
+ k.EnableInputDataType(Datatype::INT8);
+ k.EnableInputDataType(Datatype::INT32);
+ k.EnableInputDataType(Datatype::INT64);
k.EnableOutputDataType(Datatype::F16);
k.EnableOutputDataType(Datatype::F32);
+ k.EnableOutputDataType(Datatype::INT8);
+ k.EnableOutputDataType(Datatype::INT32);
+ k.EnableOutputDataType(Datatype::INT64);
k.EnableAllInputLayout();
k.EnableAllOutputLayout();
k.EnableTensorOffset();
@@ -72,7 +78,7 @@ namespace kernel_selector
kernel.workGroups.global = { gws[0], gws[1], gws[2] * gws[3] };
kernel.workGroups.local = GetOptimalLocalWorkGroupSizes(kernel.workGroups.global);
- kernel.kernelString = GetKernelString(kernelName, jit, entry_point, ROUND_ROBIN);
+ kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, ROUND_ROBIN);
kernel.arguments = GetArgsDesc(1, false, false);
kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_base.cpp
index 13290c42d..9e5a9ad50 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_base.cpp
@@ -129,7 +129,7 @@ namespace kernel_selector
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];
- FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point);
+ FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
if(orgParams.poolType == PoolType::MAX_WITH_ARGMAX)
kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 1 });
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_fs_bs_yx_bsv4_fsv32.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_fs_bs_yx_bsv4_fsv32.cpp
new file mode 100644
index 000000000..5157b4d5e
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_fs_bs_yx_bsv4_fsv32.cpp
@@ -0,0 +1,83 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "pooling_kernel_gpu_fs_bs_yx_bsv4_fsv32.h"
+
+namespace kernel_selector
+{
+ ParamsKey PoolingKerneGPU_fs_bs_yx_bsv4_fsv32::GetSupportedKey() const
+ {
+ ParamsKey k;
+ k.EnableInputDataType(Datatype::INT8);
+ k.EnableOutputDataType(Datatype::INT8);
+ k.EnableInputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
+ k.EnableOutputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
+ k.EnableTensorOffset();
+ k.EnableTensorPitches();
+ k.EnableBatching();
+ k.EnablePoolType(PoolType::MAX);
+ k.EnablePoolType(PoolType::AVG);
+ k.EnablePoolRemainder(PoolRemainder::FLOOR);
+ k.EnablePoolRemainder(PoolRemainder::CEIL);
+ k.EnablePoolKernelDividerMode(KernelDividerMode::FIXED);
+ k.EnablePoolKernelDividerMode(KernelDividerMode::DYNAMIC);
+ k.EnablePoolKernelDividerMode(KernelDividerMode::DYNAMIC_WITH_PADDING);
+ k.EnableDifferentTypes();
+ return k;
+ }
+
+ PoolingKernelBase::DispatchData PoolingKerneGPU_fs_bs_yx_bsv4_fsv32::SetDefault(const pooling_params& params) const
+ {
+ constexpr int simdSize = 8;
+
+ DispatchData runInfo = PoolingKernelBase::SetDefault(params);
+
+ runInfo.gws0 = params.output.X().v;
+ runInfo.gws1 = params.output.Y().v;
+ // we got fs_bs_yx_bsv4_fsv32 format, we process 4 batches and 4 features per workitem
+ runInfo.gws2 = (RoundUp(params.output.Feature().v, 32) * RoundUp(params.output.Batch().v, 4)) / (4*4);
+
+ runInfo.lws0 = 1;
+ runInfo.lws1 = 1;
+ runInfo.lws2 = simdSize;
+
+ return runInfo;
+ }
+
+ JitConstants PoolingKerneGPU_fs_bs_yx_bsv4_fsv32::GetJitConstants(const pooling_params& params, DispatchData kd) const
+ {
+ auto jit = PoolingKernelBase::GetJitConstants(params, kd);
+
+ const size_t in_x_pitch = 32 * 4;
+ const size_t in_y_pitch = 32 * 4 * params.inputs[0].X().LogicalDimPadded();
+ const size_t in_b_block_pitch = in_y_pitch * params.inputs[0].Y().LogicalDimPadded();
+ const size_t in_f_block_pitch = in_b_block_pitch * ((params.inputs[0].Batch().v + 3) / 4);
+ const size_t in_offset = in_x_pitch * params.inputs[0].X().pad.before + in_y_pitch * params.inputs[0].Y().pad.before;
+
+ jit.AddConstant(MakeJitConstant("IN_X_PITCH", in_x_pitch));
+ jit.AddConstant(MakeJitConstant("IN_Y_PITCH", in_y_pitch));
+ jit.AddConstant(MakeJitConstant("IN_B_BLOCK_PITCH", in_b_block_pitch));
+ jit.AddConstant(MakeJitConstant("IN_F_BLOCK_PITCH", in_f_block_pitch));
+ jit.AddConstant(MakeJitConstant("IN_OFFSET", in_offset));
+
+ return jit;
+ }
+
+ KernelsData PoolingKerneGPU_fs_bs_yx_bsv4_fsv32::GetKernelsData(const Params& params, const optional_params& options) const
+ {
+ return GetCommonKernelsData(params, options, FORCE_PRIORITY_1);
+ }
+} \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_fs_bs_yx_bsv4_fsv32.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_fs_bs_yx_bsv4_fsv32.h
new file mode 100644
index 000000000..efb5c67cd
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_fs_bs_yx_bsv4_fsv32.h
@@ -0,0 +1,36 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "pooling_kernel_base.h"
+
+namespace kernel_selector
+{
+ class PoolingKerneGPU_fs_bs_yx_bsv4_fsv32 : public PoolingKernelBase
+ {
+ public:
+ PoolingKerneGPU_fs_bs_yx_bsv4_fsv32() : PoolingKernelBase("pooling_gpu_fs_bs_yx_bsv4_fsv32") {}
+ virtual ~PoolingKerneGPU_fs_bs_yx_bsv4_fsv32() {}
+
+ virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+ virtual ParamsKey GetSupportedKey() const override;
+ DispatchData SetDefault(const pooling_params& params) const override;
+ protected:
+ JitConstants GetJitConstants(const pooling_params& params, DispatchData kd) const override;
+
+ };
+} \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_selector.cpp
index 5a7d83b47..91ec4d2dc 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_selector.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_selector.cpp
@@ -22,6 +22,7 @@
#include "pooling_kernel_gpu_byxf_padding_opt.h"
#include "pooling_kernel_gpu_byxf_af32.h"
#include "pooling_kernel_gpu_int8_ref.h"
+#include "pooling_kernel_gpu_fs_bs_yx_bsv4_fsv32.h"
namespace kernel_selector {
@@ -34,6 +35,7 @@ namespace kernel_selector {
Attach<PoolingKernelGPUByxfPaddingOpt>();
Attach<PoolingKernelGPUInt8Ref>();
Attach<PoolingKerneGPU_byxf_af32>();
+ Attach<PoolingKerneGPU_fs_bs_yx_bsv4_fsv32>();
}
KernelsData pooling_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/region_yolo/region_yolo_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/region_yolo/region_yolo_kernel_ref.cpp
index 62420b510..6e5577a84 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/region_yolo/region_yolo_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/region_yolo/region_yolo_kernel_ref.cpp
@@ -92,7 +92,7 @@ namespace kernel_selector
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];
- FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point);
+ FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
kd.estimatedTime = FORCE_PRIORITY_9;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel.cpp
index 87ee70660..ba6f7ce1a 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel.cpp
@@ -24,11 +24,15 @@ namespace kernel_selector
ParamsKey k;
k.EnableInputDataType(Datatype::UINT8);
k.EnableInputDataType(Datatype::INT8);
+ k.EnableInputDataType(Datatype::INT32);
+ k.EnableInputDataType(Datatype::INT64);
k.EnableInputDataType(Datatype::F16);
k.EnableInputDataType(Datatype::F32);
k.EnableOutputDataType(Datatype::F16);
k.EnableOutputDataType(Datatype::F32);
k.EnableOutputDataType(Datatype::INT8);
+ k.EnableOutputDataType(Datatype::INT32);
+ k.EnableOutputDataType(Datatype::INT64);
k.EnableOutputDataType(Datatype::UINT8);
k.EnableDifferentTypes();
k.EnableAllInputLayout();
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_base.cpp
index d050de529..867a3c8b9 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_base.cpp
@@ -69,6 +69,14 @@ namespace kernel_selector
MakeJitConstant("OUTPUT", output),
};
+ if (fp16Supported)
+ {
+ jit.Merge(MakeUnitTypeJitConstants(Datatype::F16));
+ }
+ else
+ {
+ jit.Merge(MakeUnitTypeJitConstants(Datatype::F32));
+ }
return jit;
}
@@ -185,7 +193,7 @@ namespace kernel_selector
auto& kernel = kd.kernels[0];
- FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point);
+ FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
kernel.arguments = GetArgsDesc(1, false, false);
@@ -215,7 +223,7 @@ namespace kernel_selector
auto& kernel = kd.kernels[0];
- FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point);
+ FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
kernel.arguments = GetArgsDesc(1, false, false);
if (newParams.mode == MeanSubtractMode::IN_BUFFER)
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorg_yolo/reorg_yolo_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorg_yolo/reorg_yolo_kernel_ref.cpp
index 9ae8d10d9..0462e4a8f 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorg_yolo/reorg_yolo_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorg_yolo/reorg_yolo_kernel_ref.cpp
@@ -87,7 +87,7 @@ namespace kernel_selector
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];
- FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point);
+ FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
kd.estimatedTime = FORCE_PRIORITY_9;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reshape/reshape_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reshape/reshape_kernel_ref.cpp
index 2984e2e8f..9c9c760ee 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reshape/reshape_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reshape/reshape_kernel_ref.cpp
@@ -24,8 +24,14 @@ namespace kernel_selector
ParamsKey k;
k.EnableInputDataType(Datatype::F16);
k.EnableInputDataType(Datatype::F32);
+ k.EnableInputDataType(Datatype::INT8);
+ k.EnableInputDataType(Datatype::INT32);
+ k.EnableInputDataType(Datatype::INT64);
k.EnableOutputDataType(Datatype::F16);
k.EnableOutputDataType(Datatype::F32);
+ k.EnableOutputDataType(Datatype::INT8);
+ k.EnableOutputDataType(Datatype::INT32);
+ k.EnableOutputDataType(Datatype::INT64);
k.EnableAllInputLayout();
k.EnableAllOutputLayout();
k.EnableTensorOffset();
@@ -60,7 +66,7 @@ namespace kernel_selector
kernel.workGroups.global = { gws[0], gws[1], gws[2] * gws[3] };
kernel.workGroups.local = GetOptimalLocalWorkGroupSizes(kernel.workGroups.global);
- kernel.kernelString = GetKernelString(kernelName, jit, entry_point, ROUND_ROBIN);
+ kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, ROUND_ROBIN);
kernel.arguments = GetArgsDesc(1, false, false);
kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/roi_pooling/roi_pooling_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/roi_pooling/roi_pooling_kernel_ref.cpp
index 0683beb3e..64dde2c32 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/roi_pooling/roi_pooling_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/roi_pooling/roi_pooling_kernel_ref.cpp
@@ -1,5 +1,5 @@
/*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -27,6 +27,9 @@ namespace kernel_selector {
k.EnableOutputDataType(Datatype::F32);
k.EnableInputLayout(DataLayout::bfyx);
k.EnableOutputLayout(DataLayout::brfyx);
+ k.EnablePoolType(PoolType::MAX);
+ k.EnablePoolType(PoolType::AVG);
+ k.EnablePoolType(PoolType::BILINEAR);
k.EnableTensorOffset();
k.EnableTensorPitches();
k.EnableBatching();
@@ -69,7 +72,6 @@ namespace kernel_selector {
});
jit.AddConstants({
- MakeJitConstant("MAX_POOL", rp.mode == PoolType::MAX),
MakeJitConstant("USE_OLD_SCALE_AND_ROUNDING", rp.groupSize == 0)
});
@@ -94,11 +96,11 @@ namespace kernel_selector {
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];
- FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point);
+ FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 1 });
kd.estimatedTime = FORCE_PRIORITY_9;
return{ kd };
}
-} \ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scale_grad_weights/scale_grad_weights_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scale_grad_weights/scale_grad_weights_kernel_base.cpp
index 471b61e6d..61edddabf 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scale_grad_weights/scale_grad_weights_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scale_grad_weights/scale_grad_weights_kernel_base.cpp
@@ -57,7 +57,7 @@ namespace kernel_selector
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];
- FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point, ROUND_ROBIN, true, !orgParams.bias.empty(), 2);
+ FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, ROUND_ROBIN, true, !orgParams.bias.empty(), 2);
if (orgParams.use_momentum)
{
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scale_grad_weights/scale_grad_weights_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scale_grad_weights/scale_grad_weights_kernel_ref.cpp
index 4b9190f18..51b1122d4 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scale_grad_weights/scale_grad_weights_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scale_grad_weights/scale_grad_weights_kernel_ref.cpp
@@ -24,7 +24,6 @@ namespace kernel_selector
ParamsKey k;
k.EnableInputDataType(Datatype::F16);
k.EnableInputDataType(Datatype::F32);
- k.EnableInputWeightsType(WeightsType::F16);
k.EnableInputWeightsType(WeightsType::F32);
k.EnableOutputDataType(Datatype::F16);
k.EnableOutputDataType(Datatype::F32);
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_base.cpp
new file mode 100644
index 000000000..09b3a0151
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_base.cpp
@@ -0,0 +1,177 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "select_kernel_base.h"
+#include "kernel_selector_utils.h"
+
+namespace kernel_selector
+{
+
+ bool SelectKernelBase::Validate(const Params& p, const optional_params& o) const
+ {
+ if (p.GetType() != KernelType::SELECT ||
+ o.GetType() != KernelType::SELECT)
+ {
+ return false;
+ }
+
+ const select_params& params = static_cast<const select_params&>(p);
+
+ if (params.inputs[0].GetDType() != params.inputs[1].GetDType())
+ {
+ return false;
+ }
+
+ if (params.inputs.size() != 3)
+ {
+ return false;
+ }
+
+ return true;
+ }
+
+ JitConstants SelectKernelBase::GetJitConstantsCommon(const select_params& params) const
+ {
+ JitConstants jit = MakeBaseParamsJitConstants(params);
+
+ std::string inputs_decls;
+
+ for (size_t i = 0; i < params.inputs.size(); i++)
+ {
+ std::string const_str = "const";
+
+ inputs_decls += const_str + " __global " + toCLType(params.inputs[i].GetDType()) + "* input" + std::to_string(i) + ", ";
+ }
+
+ jit.AddConstant(MakeJitConstant("INPUTS_DECLS", inputs_decls));
+
+ std::string destType, absType;
+
+ // i8, i8, i8
+ // i8, i8, u8
+ // u8, u8, i8
+ // u8, u8, u8
+ if ((params.inputs[2].GetDType() == Datatype::INT8
+ || params.inputs[2].GetDType() == Datatype::UINT8)
+ && (params.inputs[0].GetDType() == Datatype::INT8
+ || params.inputs[0].GetDType() == Datatype::UINT8))
+ {
+ jit.AddConstant(MakeJitConstant("MASK", "INPUT_2"));
+ }
+ else
+ {
+ // x, x, f32
+ // x, x, f16
+ if (params.inputs[2].GetDType() == Datatype::F32
+ || params.inputs[2].GetDType() == Datatype::F16)
+ {
+ absType = "fabs";
+ }
+ // f32, f32, i8
+ // f32, f32, u8
+ // f16, f16, i8
+ // f16, f16, u8
+ else
+ {
+ absType = "abs";
+ }
+
+ // f32, f32, x
+ if (params.inputs[0].GetDType() == Datatype::F32) {
+ destType = "int";
+ }
+ // f16, f16, x
+ else if (params.inputs[0].GetDType() == Datatype::F16) {
+ destType = "short";
+ }
+ // i8, i8, f32
+ // i8, i8, f16
+ // u8, u8, f32
+ // u8, u8, f16
+ else
+ {
+ destType = "char";
+ }
+
+ jit.AddConstant(MakeJitConstant("MASK", "convert_" + destType + "_rtp(" + absType + "(INPUT_2))"));
+ }
+
+ return jit;
+ }
+
+ JitConstants SelectKernelBase::GetJitConstants(const select_params& params) const
+ {
+ return GetJitConstantsCommon(params);
+ }
+
+ SelectKernelBase::DispatchData SelectKernelBase::SetDefault(const select_params& params) const
+ {
+ DispatchData kd;
+
+ const auto& out = params.output;
+
+ std::vector<size_t> gws;
+ for (const auto& o : out.GetDims())
+ {
+ gws.push_back(o.v);
+ }
+
+ for (size_t i = gws.size(); i < 4; i++)
+ {
+ gws.push_back(1U);
+ }
+
+ kd.gws0 = gws[0];
+ kd.gws1 = gws[1];
+ kd.gws2 = gws[2] * gws[3];
+
+ auto local = GetOptimalLocalWorkGroupSizes( { kd.gws0, kd.gws1, kd.gws2 } );
+ kd.lws0 = local[0];
+ kd.lws1 = local[1];
+ kd.lws2 = local[2];
+
+ return kd;
+ }
+
+ KernelsData SelectKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options) const
+ {
+ if (!Validate(params, options))
+ {
+ return{};
+ }
+
+ KernelData kd = KernelData::Default<select_params>(params);
+ select_params& newParams = *static_cast<select_params*>(kd.params.get());
+
+ auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
+ auto cldnn_jit = GetJitConstants(newParams);
+ std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
+
+ DispatchData runInfo = SetDefault(newParams);
+
+ auto& kernel = kd.kernels[0];
+
+ kernel.workGroups.global = { runInfo.gws0, runInfo.gws1, runInfo.gws2 };
+ kernel.workGroups.local = { runInfo.lws0, runInfo.lws1, runInfo.lws2 };
+
+ kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, ROUND_ROBIN);
+ kernel.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(), false, false);
+
+ kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE;
+
+ return{ kd };
+ }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_base.h
new file mode 100644
index 000000000..c1d48d991
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_base.h
@@ -0,0 +1,62 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "common_kernel_base.h"
+
+namespace kernel_selector
+{
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ // select_params
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ struct select_params : public base_params
+ {
+ select_params() : base_params(KernelType::SELECT) {}
+
+ virtual ParamsKey GetParamsKey() const
+ {
+ return base_params::GetParamsKey();
+ }
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ // select_optional_params
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ struct select_optional_params : optional_params
+ {
+ select_optional_params() : optional_params(KernelType::SELECT) {}
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ // SelectKernelBase
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ class SelectKernelBase : public common_kernel_base
+ {
+ public:
+ using common_kernel_base::common_kernel_base;
+ virtual ~SelectKernelBase() {}
+
+ using DispatchData = CommonDispatchData;
+ JitConstants GetJitConstantsCommon(const select_params& params) const;
+
+ protected:
+ virtual bool Validate(const Params& p, const optional_params& o) const override;
+ virtual JitConstants GetJitConstants(const select_params& params) const;
+ virtual DispatchData SetDefault(const select_params& params) const;
+ KernelsData GetCommonKernelsData(const Params& params, const optional_params& options) const;
+ };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_ref.cpp
new file mode 100644
index 000000000..f7f776c15
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_ref.cpp
@@ -0,0 +1,64 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "select_kernel_ref.h"
+#include "kernel_selector_utils.h"
+
+namespace kernel_selector {
+
+ ParamsKey SelectKernelRef::GetSupportedKey() const
+ {
+ ParamsKey k;
+
+ k.EnableInputDataType(Datatype::F16);
+ k.EnableInputDataType(Datatype::F32);
+ k.EnableInputDataType(Datatype::INT8);
+ k.EnableInputDataType(Datatype::UINT8);
+
+ k.EnableOutputDataType(Datatype::F32);
+ k.EnableOutputDataType(Datatype::F16);
+ k.EnableOutputDataType(Datatype::INT8);
+ k.EnableOutputDataType(Datatype::UINT8);
+
+ k.EnableInputLayout(DataLayout::bfyx);
+ k.EnableInputLayout(DataLayout::yxfb);
+ k.EnableInputLayout(DataLayout::byxf);
+
+ k.EnableOutputLayout(DataLayout::bfyx);
+ k.EnableOutputLayout(DataLayout::yxfb);
+ k.EnableOutputLayout(DataLayout::byxf);
+
+ k.EnableBatching();
+ k.EnableDifferentTypes();
+
+ return k;
+ }
+
+ bool SelectKernelRef::Validate(const Params& p, const optional_params& o) const
+ {
+ if (!SelectKernelBase::Validate(p, o))
+ {
+ return false;
+ }
+
+ return true;
+ }
+
+ KernelsData SelectKernelRef::GetKernelsData(const Params& params, const optional_params& options) const
+ {
+ return GetCommonKernelsData(params, options);
+ }
+} \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_ref.h
new file mode 100644
index 000000000..a72c0e90a
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_ref.h
@@ -0,0 +1,35 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "select_kernel_base.h"
+
+namespace kernel_selector
+{
+ class SelectKernelRef : public SelectKernelBase
+ {
+ public:
+ SelectKernelRef() : SelectKernelBase("select_gpu_ref") {}
+ virtual ~SelectKernelRef() {}
+
+ virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+ virtual ParamsKey GetSupportedKey() const override;
+ protected:
+ bool Validate(const Params& p, const optional_params& o) const override;
+
+ };
+} \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_selector.cpp
new file mode 100644
index 000000000..ec1218166
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_selector.cpp
@@ -0,0 +1,31 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "select_kernel_selector.h"
+#include "select_kernel_ref.h"
+
+namespace kernel_selector
+{
+ select_kernel_selector::select_kernel_selector()
+ {
+ Attach<SelectKernelRef>();
+ }
+
+ KernelsData select_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const
+ {
+ return GetNaiveBestKernel(params, options, KernelType::SELECT);
+ }
+} \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_selector.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_selector.h
new file mode 100644
index 000000000..b3de11649
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_selector.h
@@ -0,0 +1,35 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "kernel_selector.h"
+
+namespace kernel_selector
+{
+ class select_kernel_selector : public kernel_selector_base
+ {
+ public:
+ static select_kernel_selector &Instance() {
+ static select_kernel_selector instance_;
+ return instance_;
+ }
+
+ select_kernel_selector();
+
+ KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
+ };
+} \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_base.cpp
index 4f02da734..4d2c36d39 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_base.cpp
@@ -88,7 +88,7 @@ namespace kernel_selector
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];
- FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point);
+ FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
kd.estimatedTime = runInfo.effiency;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax_loss_grad/softmax_loss_grad_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax_loss_grad/softmax_loss_grad_kernel_base.cpp
index 0a4473714..da816abac 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax_loss_grad/softmax_loss_grad_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax_loss_grad/softmax_loss_grad_kernel_base.cpp
@@ -74,7 +74,7 @@ namespace kernel_selector
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];
- FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point);
+ FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 1 });
kd.estimatedTime = runInfo.effiency;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_ref.cpp
new file mode 100644
index 000000000..37c206d3b
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_ref.cpp
@@ -0,0 +1,153 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "tile_kernel_ref.h"
+#include "kernel_selector_utils.h"
+
+namespace kernel_selector
+{
+ static int32_t GetTileChannelIndex(const tile_params& params)
+ {
+ Tensor::DataChannelName name = Tensor::DataChannelName::X;
+ switch (params.axis)
+ {
+ case TileAxis::X: name = Tensor::DataChannelName::X; break;
+ case TileAxis::Y: name = Tensor::DataChannelName::Y; break;
+ case TileAxis::FEATURE: name = Tensor::DataChannelName::FEATURE; break;
+ case TileAxis::BATCH: name = Tensor::DataChannelName::BATCH; break;
+ default: break;
+ }
+
+ return DataTensor::Channelndex(params.output.GetLayout(), name);
+ }
+
+ ParamsKey TileKernelRef::GetSupportedKey() const
+ {
+ ParamsKey k;
+ k.EnableInputDataType(Datatype::F16);
+ k.EnableInputDataType(Datatype::F32);
+ k.EnableOutputDataType(Datatype::F16);
+ k.EnableOutputDataType(Datatype::F32);
+ k.EnableInputLayout(DataLayout::bfyx);
+ k.EnableOutputLayout(DataLayout::bfyx);
+ k.EnableTensorOffset();
+ k.EnableTensorPitches();
+ k.EnableBatching();
+ return k;
+ }
+
+ CommonDispatchData TileKernelRef::SetDefault(const tile_params& params, const optional_params&) const
+ {
+ CommonDispatchData runInfo;
+
+ auto in = params.inputs[0];
+
+ size_t inner_size = 1;
+ size_t outer_size = 1;
+
+ const int32_t axis = GetTileChannelIndex(params);
+
+ for (int32_t i = 0; i <= axis; i++)
+ {
+ inner_size *= in.GetDims()[i].v;
+ }
+
+ for (int32_t i = axis+1; i < static_cast<int32_t>(in.GetDims().size()); i++)
+ {
+ outer_size *= in.GetDims()[i].v;
+ }
+
+ if (inner_size > 1)
+ {
+ runInfo.gws0 = outer_size;
+ runInfo.gws1 = inner_size;
+ runInfo.gws2 = 1;
+
+ runInfo.lws0 = 1;
+ runInfo.lws1 = 1;
+ runInfo.lws2 = 1;
+ }
+ else
+ {
+ runInfo.gws0 = Align(outer_size, 16);
+ runInfo.gws1 = 1;
+ runInfo.gws2 = 1;
+
+ runInfo.lws0 = 16;
+ runInfo.lws1 = 1;
+ runInfo.lws2 = 1;
+ }
+
+ runInfo.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
+
+ return runInfo;
+ }
+
+ JitConstants TileKernelRef::GetJitConstants(const tile_params& params) const
+ {
+ JitConstants jit = MakeBaseParamsJitConstants(params);
+
+ auto in = params.inputs[0];
+ auto out = params.output;
+
+ size_t inner_size = 1;
+ size_t outer_size = 1;
+ size_t axis_pitch = 1;
+
+ const int32_t axis = GetTileChannelIndex(params);
+
+ for (int32_t i = 0; i <= axis; i++)
+ {
+ inner_size *= in.GetDims()[i].v;
+ axis_pitch *= in.GetDims()[i].LogicalDimPadded();
+ }
+ for (int32_t i = axis+1; i < static_cast<int32_t>(in.GetDims().size()); i++)
+ {
+ outer_size *= in.GetDims()[i].v;
+ }
+
+ jit.AddConstant(MakeJitConstant("TILES", params.tiles));
+ jit.AddConstant(MakeJitConstant("AXIS_PITCH", axis_pitch));
+ jit.AddConstant(MakeJitConstant("OUTER_SIZE", outer_size));
+ if (inner_size == 1)
+ {
+ jit.AddConstant(MakeJitConstant("OUTPUT_SIZE", out.LogicalSize()));
+ jit.AddConstant(MakeJitConstant("DENSE", 1));
+ }
+ return jit;
+ }
+
+ KernelsData TileKernelRef::GetKernelsData(const Params& params, const optional_params& options) const
+ {
+ assert(params.GetType() == KernelType::TILE);
+
+ KernelData kd = KernelData::Default<tile_params>(params);
+ tile_params& newParams = *static_cast<tile_params*>(kd.params.get());
+
+ auto runInfo = SetDefault(newParams, options);
+ auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
+ auto cldnn_jit = GetJitConstants(newParams);
+ std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
+
+ auto& kernel = kd.kernels[0];
+
+ FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
+
+ kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE;
+
+ return{ kd };
+ }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_ref.h
new file mode 100644
index 000000000..967dab817
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_ref.h
@@ -0,0 +1,58 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "common_kernel_base.h"
+
+namespace kernel_selector
+{
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ // tile_params
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ struct tile_params : public base_params
+ {
+ tile_params() : base_params(KernelType::TILE) {}
+
+ TileAxis axis;
+ int tiles;
+
+ virtual ParamsKey GetParamsKey() const
+ {
+ return base_params::GetParamsKey();
+ }
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ // tile_optional_params
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ struct tile_optional_params : optional_params
+ {
+ tile_optional_params() : optional_params(KernelType::TILE) {}
+ };
+
+ class TileKernelRef : public common_kernel_base
+ {
+ public:
+ TileKernelRef() : common_kernel_base("tile_ref") {}
+ virtual ~TileKernelRef() {}
+
+ virtual JitConstants GetJitConstants(const tile_params& params) const;
+ virtual CommonDispatchData SetDefault(const tile_params& params, const optional_params&) const;
+ virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+ virtual ParamsKey GetSupportedKey() const override;
+ };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_selector.cpp
new file mode 100644
index 000000000..c0ca49de7
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_selector.cpp
@@ -0,0 +1,31 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "tile_kernel_selector.h"
+#include "tile_kernel_ref.h"
+
+namespace kernel_selector {
+
+ tile_kernel_selector::tile_kernel_selector()
+ {
+ Attach<TileKernelRef>();
+ }
+
+ KernelsData tile_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const
+ {
+ return GetNaiveBestKernel(params, options, KernelType::TILE);
+ }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_selector.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_selector.h
new file mode 100644
index 000000000..c0b10fa10
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_selector.h
@@ -0,0 +1,37 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "kernel_selector.h"
+
+namespace kernel_selector
+{
+ class tile_kernel_selector : public kernel_selector_base
+ {
+ public:
+ static tile_kernel_selector &Instance() {
+ static tile_kernel_selector instance_;
+ return instance_;
+ }
+
+ tile_kernel_selector();
+
+ virtual ~tile_kernel_selector() {}
+
+ virtual KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
+ };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/upsampling/upsampling_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/upsampling/upsampling_kernel_base.cpp
index ea0d89515..889daf8bc 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/upsampling/upsampling_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/upsampling/upsampling_kernel_base.cpp
@@ -76,7 +76,7 @@ namespace kernel_selector
kernel.workGroups.global = { out.X().v, out.Y().v, out.Feature().v * out.Batch().v };
kernel.workGroups.local = GetOptimalLocalWorkGroupSizes(kernel.workGroups.global);
- kernel.kernelString = GetKernelString(kernelName, jit, entry_point, ROUND_ROBIN);
+ kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, ROUND_ROBIN);
kernel.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(), false, false);
kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/upsampling/upsampling_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/upsampling/upsampling_kernel_ref.cpp
index 46a927483..9037ebc0a 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/upsampling/upsampling_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/upsampling/upsampling_kernel_ref.cpp
@@ -15,15 +15,16 @@
*/
#include "upsampling_kernel_ref.h"
-#include "kernel_selector_utils.h"
namespace kernel_selector {
ParamsKey UpSamplingKernelRef::GetSupportedKey() const
{
ParamsKey k;
- k.EnableAllInputDataType();
- k.EnableAllOutputDataType();
+ k.EnableInputDataType(Datatype::F16);
+ k.EnableInputDataType(Datatype::F32);
+ k.EnableOutputDataType(Datatype::F16);
+ k.EnableOutputDataType(Datatype::F32);
k.EnableDifferentTypes();
k.EnableAllInputLayout();
k.EnableAllOutputLayout();