summaryrefslogtreecommitdiff
path: root/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise
diff options
context:
space:
mode:
Diffstat (limited to 'inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise')
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.cpp110
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.h41
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_bs_yx_bsv4_fsv32.cpp222
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_bs_yx_bsv4_fsv32.h35
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.cpp31
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.h3
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_selector.cpp4
-rw-r--r--inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_vload8.cpp19
8 files changed, 394 insertions, 71 deletions
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.cpp
index 613fbb4f8..5feac0ca5 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.cpp
@@ -41,6 +41,22 @@ namespace kernel_selector
}
}
+ ParamsKey eltwise_params::GetParamsKey() const
+ {
+ ParamsKey k = base_params::GetParamsKey();
+ if (int8_quantization)
+ {
+ k.EnableInt8Quantization();
+ }
+
+ if (output_calibration)
+ {
+ k.EnableOutputCalibration();
+ }
+
+ return k;
+ }
+
bool EltwiseKernelBase::Validate(const Params& p, const optional_params& o) const
{
if (p.GetType() != KernelType::ELTWISE ||
@@ -56,7 +72,7 @@ namespace kernel_selector
return false;
}
- auto& operations = params.eltwiseParams.operations;
+ auto& operations = params.operations;
if (operations.size() == 0)
{
@@ -91,24 +107,24 @@ namespace kernel_selector
JitConstants jit = MakeBaseParamsJitConstants(params);
jit.AddConstants({
- MakeJitConstant("ELTWISE_LAYOUT_BASED", params.eltwiseParams.layoutBased),
- MakeJitConstant("QUANTIZATION_TERM", params.eltwiseParams.int8_quantization),
+ MakeJitConstant("ELTWISE_LAYOUT_BASED", params.layoutBased),
+ MakeJitConstant("QUANTIZATION_TERM", params.int8_quantization),
});
- if (params.eltwiseParams.int8_quantization)
+ if (params.int8_quantization)
{
- if (params.eltwiseParams.output_calibration)
+ if (params.output_calibration)
{
- jit.AddConstant(MakeJitConstant("CALIBRATION_TERM", params.eltwiseParams.output_calibration));
+ jit.AddConstant(MakeJitConstant("CALIBRATION_TERM", params.output_calibration));
jit.AddConstant(MakeJitConstant("O_QF", params.output_calibration_factors[0]));
}
else
- jit.AddConstants({ MakeJitConstant("O_QF", params.eltwiseParams.output_quantization_factor) });
+ jit.AddConstants({ MakeJitConstant("O_QF", params.output_quantization_factor) });
}
std::string inputs_decls, vload_decls;
- auto& updateInputs = params.eltwiseParams.updateInputIds;
+ auto& updateInputs = params.updateInputIds;
for (size_t i = 0; i < params.inputs.size(); i++)
{
@@ -143,8 +159,8 @@ namespace kernel_selector
std::string do_eltwise;
- auto& operations = params.eltwiseParams.operations;
- auto& coefficients = params.eltwiseParams.coefficients;
+ auto& operations = params.operations;
+ auto& coefficients = params.coefficients;
for (size_t op_num = 0; op_num < operations.size(); op_num++)
{
@@ -187,7 +203,7 @@ namespace kernel_selector
cast_type = "(MAKE_VECTOR_TYPE(UNIT_TYPE, 8))";
op = "const MAKE_VECTOR_TYPE(UNIT_TYPE, 8) tmp" + op_num_str + " = ";
}
- else if(params.eltwiseParams.int8_quantization)
+ else if(params.int8_quantization)
{
cast_type = "(int)";
op = "const int tmp" + op_num_str + " = ";
@@ -251,7 +267,7 @@ namespace kernel_selector
jit.AddConstant(MakeJitConstant("DO_ELTWISE", do_eltwise));
- if (params.eltwiseParams.layoutBased || params.eltwiseParams.int8_quantization)
+ if (params.layoutBased || params.int8_quantization)
{
jit.Merge(GetTensorFriendlyWorkGroupsJit(params.inputs[0]));
}
@@ -264,32 +280,27 @@ namespace kernel_selector
return GetJitConstantsCommon(params, false);
}
- KernelsData EltwiseKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options) const
+ EltwiseKernelBase::DispatchData EltwiseKernelBase::SetDefault(const eltwise_params& params) const
{
- if (!Validate(params, options))
- {
- return{};
- }
-
- KernelData kd = KernelData::Default<eltwise_params>(params);
- eltwise_params& newParams = *static_cast<eltwise_params*>(kd.params.get());
+ DispatchData kd;
- auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
- auto cldnn_jit = GetJitConstants(newParams);
- std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
-
- const auto& out = newParams.output;
- auto& kernel = kd.kernels[0];
- if (newParams.eltwiseParams.layoutBased || newParams.eltwiseParams.int8_quantization)
+ if (params.layoutBased || params.int8_quantization)
{
- kernel.workGroups.global = GetTensorFriendlyWorkGroups(newParams.inputs[0]);
+ auto global = GetTensorFriendlyWorkGroups(params.inputs[0]);
+ kd.gws0 = global[0];
+ kd.gws1 = global[1];
+ kd.gws2 = global[2];
}
- else if (CheckInputsOutputNoPitchSameDims(newParams))
+ else if (CheckInputsOutputNoPitchSameDims(params))
{
- kernel.workGroups.global = { newParams.inputs[0].LogicalSize(), 1, 1 };
+ kd.gws0 = params.inputs[0].LogicalSize();
+ kd.gws1 = 1;
+ kd.gws2 = 1;
}
else
{
+ const auto& out = params.output;
+
std::vector<size_t> gws;
for (const auto& o : out.GetDims())
{
@@ -301,11 +312,42 @@ namespace kernel_selector
gws.push_back(1U);
}
- kernel.workGroups.global = { gws[0], gws[1], gws[2] * gws[3] };
+ kd.gws0 = gws[0];
+ kd.gws1 = gws[1];
+ kd.gws2 = gws[2] * gws[3];
}
- kernel.workGroups.local = GetOptimalLocalWorkGroupSizes(kernel.workGroups.global);
- kernel.kernelString = GetKernelString(kernelName, jit, entry_point, ROUND_ROBIN);
- kernel.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(), false, false, newParams.eltwiseParams.int8_quantization, newParams.eltwiseParams.output_calibration);
+
+ auto local = GetOptimalLocalWorkGroupSizes( { kd.gws0, kd.gws1, kd.gws2 } );
+ kd.lws0 = local[0];
+ kd.lws1 = local[1];
+ kd.lws2 = local[2];
+
+ return kd;
+ }
+
+ KernelsData EltwiseKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options) const
+ {
+ if (!Validate(params, options))
+ {
+ return{};
+ }
+
+ KernelData kd = KernelData::Default<eltwise_params>(params);
+ eltwise_params& newParams = *static_cast<eltwise_params*>(kd.params.get());
+
+ auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
+ auto cldnn_jit = GetJitConstants(newParams);
+ std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
+
+ DispatchData runInfo = SetDefault(newParams);
+
+ auto& kernel = kd.kernels[0];
+
+ kernel.workGroups.global = { runInfo.gws0, runInfo.gws1, runInfo.gws2 };
+ kernel.workGroups.local = { runInfo.lws0, runInfo.lws1, runInfo.lws2 };
+
+ kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, ROUND_ROBIN);
+ kernel.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(), false, false, newParams.int8_quantization, newParams.output_calibration);
kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.h
index 9d7127822..161140849 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.h
@@ -25,7 +25,7 @@ namespace kernel_selector
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct eltwise_params : public base_params
{
- eltwise_params() : base_params(KernelType::ELTWISE), eltwiseParams() {}
+ eltwise_params() : base_params(KernelType::ELTWISE) {}
struct InputType
{
@@ -87,35 +87,17 @@ namespace kernel_selector
uint32_t tmpId;
};
- struct DedicatedParams
- {
- std::vector<eltwise_params::Node> operations;
- std::vector<float> coefficients;
- std::vector<UpdateInputData> updateInputIds;
- bool layoutBased = false;
- bool int8_quantization = false;
- bool output_calibration = false;
- float output_quantization_factor = 1.0f;
- };
-
- DedicatedParams eltwiseParams;
+ std::vector<eltwise_params::Node> operations;
+ std::vector<float> coefficients;
+ std::vector<UpdateInputData> updateInputIds;
+
+ bool layoutBased = false;
+ bool int8_quantization = false;
+ bool output_calibration = false;
+ float output_quantization_factor = 1.0f;
+
MultiDataTensor output_calibration_factors;
-
- virtual ParamsKey GetParamsKey() const
- {
- ParamsKey k = base_params::GetParamsKey();
- if (eltwiseParams.int8_quantization)
- {
- k.EnableInt8Quantization();
- }
-
- if (eltwiseParams.output_calibration)
- {
- k.EnableOutputCalibration();
- }
-
- return k;
- }
+ virtual ParamsKey GetParamsKey() const;
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -141,6 +123,7 @@ namespace kernel_selector
protected:
virtual bool Validate(const Params& p, const optional_params& o) const override;
virtual JitConstants GetJitConstants(const eltwise_params& params) const;
+ virtual DispatchData SetDefault(const eltwise_params& params) const;
KernelsData GetCommonKernelsData(const Params& params, const optional_params& options) const;
};
}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_bs_yx_bsv4_fsv32.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_bs_yx_bsv4_fsv32.cpp
new file mode 100644
index 000000000..571a013ce
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_bs_yx_bsv4_fsv32.cpp
@@ -0,0 +1,222 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "eltwise_kernel_fs_bs_yx_bsv4_fsv32.h"
+#include "kernel_selector_utils.h"
+
+namespace kernel_selector {
+
+ ParamsKey EltwiseKernel_fs_bs_yx_bsv4_fsv32::GetSupportedKey() const
+ {
+ ParamsKey k;
+ k.EnableInputDataType(Datatype::INT8);
+ k.EnableOutputDataType(Datatype::INT8);
+ k.EnableInputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
+ k.EnableOutputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
+ k.EnableTensorOffset();
+ k.EnableTensorPitches();
+ k.EnableBatching();
+ k.EnableInt8Quantization();
+ k.EnableOutputCalibration();
+ return k;
+ }
+
+ EltwiseKernelBase::DispatchData EltwiseKernel_fs_bs_yx_bsv4_fsv32::SetDefault(const eltwise_params& params) const
+ {
+ DispatchData kd;
+
+ kd.gws0 = params.output.X().v;
+ kd.gws1 = params.output.Y().v;
+ // we process 4 batches and 4 features per workitem
+ kd.gws2 = (params.output.Batch().v / 4) * (params.output.Feature().v / 4);
+ kd.lws0 = 1;
+ kd.lws1 = 1;
+ kd.lws2 = 8;
+
+ return kd;
+ }
+
+ JitConstants EltwiseKernel_fs_bs_yx_bsv4_fsv32::GetJitConstants(const eltwise_params& params) const
+ {
+ JitConstants jit = MakeBaseParamsJitConstants(params);
+
+ const size_t in_x_pitch = 32 * 4;
+ const size_t in_y_pitch = 32 * 4 * params.inputs[0].X().LogicalDimPadded();
+ const size_t in_b_block_pitch = in_y_pitch * params.inputs[0].Y().LogicalDimPadded();
+ const size_t in_f_block_pitch = in_b_block_pitch * ((params.inputs[0].Batch().v + 3) / 4);
+ const size_t in_offset = in_x_pitch * params.inputs[0].X().pad.before + in_y_pitch * params.inputs[0].Y().pad.before;
+
+ jit.AddConstant(MakeJitConstant("IN_X_PITCH", in_x_pitch));
+ jit.AddConstant(MakeJitConstant("IN_Y_PITCH", in_y_pitch));
+ jit.AddConstant(MakeJitConstant("IN_B_BLOCK_PITCH", in_b_block_pitch));
+ jit.AddConstant(MakeJitConstant("IN_F_BLOCK_PITCH", in_f_block_pitch));
+ jit.AddConstant(MakeJitConstant("IN_OFFSET", in_offset));
+
+ ///////////////
+ jit.AddConstants({
+ MakeJitConstant("ELTWISE_LAYOUT_BASED", params.layoutBased),
+ MakeJitConstant("QUANTIZATION_TERM", params.int8_quantization),
+ });
+
+ if (params.int8_quantization)
+ {
+ if (params.output_calibration)
+ {
+ jit.AddConstant(MakeJitConstant("CALIBRATION_TERM", params.output_calibration));
+ jit.AddConstant(MakeJitConstant("O_QF", params.output_calibration_factors[0]));
+
+ }
+ else
+ jit.AddConstants({ MakeJitConstant("O_QF", params.output_quantization_factor) });
+ }
+
+ std::string inputs_decls;
+ auto& updateInputs = params.updateInputIds;
+
+ for (size_t i = 0; i < params.inputs.size(); i++)
+ {
+ //const should be added only to inputs which will not be updated
+ std::string const_str = "const";
+ for (size_t update_input_idx = 0; update_input_idx < updateInputs.size(); update_input_idx++)
+ {
+ if (updateInputs[update_input_idx].inputId == i)
+ {
+ const_str = "";
+ break;
+ }
+ }
+
+ inputs_decls += const_str + " __global " + toCLType(params.inputs[i].GetDType()) + "* input" + std::to_string(i) + ", ";
+ }
+
+ jit.AddConstant(MakeJitConstant("INPUTS_DECLS", inputs_decls));
+ jit.AddConstant(MakeJitConstant("ELTWISE_NO_PITCH_SAME_DIMS", CheckInputsOutputNoPitchSameDims(params)));
+
+ std::string do_eltwise;
+
+ auto& operations = params.operations;
+ auto& coefficients = params.coefficients;
+
+ for (size_t op_num = 0; op_num < operations.size(); op_num++)
+ {
+ const std::string op_num_str = std::to_string(op_num);
+ const auto& ew = operations[op_num];
+
+ for (size_t input_idx = 0; input_idx < ew.inputs.size(); input_idx++)
+ {
+ const auto& input = ew.inputs[input_idx];
+ const std::string name = "INPUT_" + op_num_str + "_" + std::to_string(input_idx);
+ switch (input.mode)
+ {
+ case EltwiseInputMode::SCALAR:
+ jit.AddConstant(MakeJitConstant(name, input.scalar));
+ break;
+ case EltwiseInputMode::INPUT_BUFFER:
+ jit.AddConstant(MakeJitConstant(name, "GET_INPUT(input" + std::to_string(input.index) + ", INPUT" + std::to_string(input.index) + ")"));
+ break;
+ case EltwiseInputMode::OUTPUT_BUFFER:
+ jit.AddConstant(MakeJitConstant(name, "output[GET_INDEX(OUTPUT, )]"));
+ break;
+ case EltwiseInputMode::UNORDERED_ACCESS_INPUT_BUFFER:
+ jit.AddConstant(MakeJitConstant(name, "input" + std::to_string(input.index) + "[(size_t)tmp" + std::to_string(input.tmpIndex) + "]"));
+ break;
+ case EltwiseInputMode::INTERMEDIATE_RESULTS_INDEX:
+ jit.AddConstant(MakeJitConstant(name, "tmp" + std::to_string(input.tmpIndex)));
+ break;
+ default:
+ break;
+ }
+ }
+ std::string input0_str, input1_str, cast_type, op;
+
+ if (params.int8_quantization)
+ {
+ cast_type = "(int16)";
+ op = "const int16 tmp" + op_num_str + " = ";
+ }
+ else
+ {
+ cast_type = "(UNIT_TYPE)";
+ op = "const UNIT_TYPE tmp" + op_num_str + " = ";
+ }
+
+ input0_str = cast_type + "INPUT_" + op_num_str + "_0";
+ input1_str = cast_type + "INPUT_" + op_num_str + "_1";
+
+ if (ew.mode == EltwiseMode::ADD)
+ {
+ std::vector<std::string> coeff_strings(ew.inputs.size(), "");
+ for (size_t input_idx = 0; input_idx < ew.inputs.size(); input_idx++)
+ {
+ const auto& input = ew.inputs[input_idx];
+ if (input.mode == EltwiseInputMode::INPUT_BUFFER && input.index < coefficients.size())
+ {
+ const float c = coefficients[input.index];
+ if (c != 1.0f)
+ coeff_strings[input_idx] = cast_type + "(" + std::to_string(c) + ")*";
+ }
+ }
+
+ input0_str = coeff_strings[0] + input0_str;
+ input1_str = coeff_strings[1] + input1_str;
+ }
+
+
+ switch (ew.mode)
+ {
+ case EltwiseMode::ADD: op += input0_str + " + " + input1_str; break;
+ case EltwiseMode::SUB: op += input0_str + " - " + input1_str; break;
+ case EltwiseMode::MUL: op += input0_str + " * " + input1_str; break;
+ case EltwiseMode::DIV: op += input0_str + " / " + input1_str; break;
+ case EltwiseMode::MODULU: op += cast_type + "fmod(" + input0_str + ", " + input1_str + ")"; break;
+ case EltwiseMode::MIN: op += cast_type + "fmin(" + input0_str + ", " + input1_str + ")"; break;
+ case EltwiseMode::MAX: op += cast_type + "fmax(" + input0_str + ", " + input1_str + ")"; break;
+ case EltwiseMode::POW: op += cast_type + "pow(" + input0_str + ", " + input1_str + ")"; break;
+ case EltwiseMode::SQRT: op += cast_type + "sqrt(" + input0_str + ")"; break;
+ case EltwiseMode::RSQRT: op += cast_type + "1/sqrt(" + input0_str + ")"; break;
+ case EltwiseMode::ASSIGN: op += input0_str; break;
+ default:
+ break;
+ }
+
+ std::string opname = "OPERATION" + op_num_str;
+ jit.AddConstant(MakeJitConstant(opname, op));
+ do_eltwise += "\\\n\t" + opname + ";";
+ }
+
+ for (size_t update_input_idx = 0; update_input_idx < updateInputs.size(); update_input_idx++)
+ do_eltwise += "\\\n\tinput" + std::to_string(updateInputs[update_input_idx].inputId) +
+ "[GET_INDEX(INPUT, " + std::to_string(updateInputs[update_input_idx].inputId) +
+ ")] = tmp" + std::to_string(updateInputs[update_input_idx].tmpId) + ";";
+
+ do_eltwise += "\\\n\tres = tmp" + std::to_string(operations.size() - 1) + ";";
+
+ jit.AddConstant(MakeJitConstant("DO_ELTWISE", do_eltwise));
+
+ if (params.layoutBased || params.int8_quantization)
+ {
+ jit.Merge(GetTensorFriendlyWorkGroupsJit(params.inputs[0]));
+ }
+
+ ///////////////
+ return jit;
+ }
+
+ KernelsData EltwiseKernel_fs_bs_yx_bsv4_fsv32::GetKernelsData(const Params& params, const optional_params& options) const
+ {
+ return GetCommonKernelsData(params, options);
+ }
+} \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_bs_yx_bsv4_fsv32.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_bs_yx_bsv4_fsv32.h
new file mode 100644
index 000000000..b1fb3e950
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_bs_yx_bsv4_fsv32.h
@@ -0,0 +1,35 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "eltwise_kernel_base.h"
+
+namespace kernel_selector
+{
+ class EltwiseKernel_fs_bs_yx_bsv4_fsv32 : public EltwiseKernelBase
+ {
+ public:
+ EltwiseKernel_fs_bs_yx_bsv4_fsv32() : EltwiseKernelBase("eltwise_fs_bs_yx_bsv4_fsv32") {}
+ virtual ~EltwiseKernel_fs_bs_yx_bsv4_fsv32() {}
+
+ virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+ virtual ParamsKey GetSupportedKey() const override;
+ protected:
+ JitConstants GetJitConstants(const eltwise_params& params) const override;
+ virtual DispatchData SetDefault(const eltwise_params& params) const override;
+ };
+} \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.cpp
index 3840f463e..3a7776575 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.cpp
@@ -22,8 +22,16 @@ namespace kernel_selector {
ParamsKey EltwiseKernelRef::GetSupportedKey() const
{
ParamsKey k;
- k.EnableAllInputDataType();
- k.EnableAllOutputDataType();
+ k.EnableInputDataType(Datatype::F16);
+ k.EnableInputDataType(Datatype::F32);
+ k.EnableInputDataType(Datatype::INT8);
+ k.EnableInputDataType(Datatype::INT32);
+ k.EnableInputDataType(Datatype::INT64);
+ k.EnableOutputDataType(Datatype::F16);
+ k.EnableOutputDataType(Datatype::F32);
+ k.EnableOutputDataType(Datatype::INT8);
+ k.EnableOutputDataType(Datatype::INT32);
+ k.EnableOutputDataType(Datatype::INT64);
k.EnableDifferentTypes();
k.EnableAllInputLayout();
k.EnableAllOutputLayout();
@@ -35,6 +43,25 @@ namespace kernel_selector {
return k;
}
+ bool EltwiseKernelRef::Validate(const Params& p, const optional_params& o) const
+ {
+ if (!EltwiseKernelBase::Validate(p, o))
+ {
+ return false;
+ }
+
+ const eltwise_params& params = static_cast<const eltwise_params&>(p);
+ for (size_t i = 0; i < params.inputs.size(); i++)
+ {
+ if (params.inputs[i].GetLayout() == DataLayout::fs_bs_yx_bsv4_fsv32)
+ return false;
+ }
+ if (params.output.GetLayout() == DataLayout::fs_bs_yx_bsv4_fsv32)
+ return false;
+
+ return true;
+ }
+
KernelsData EltwiseKernelRef::GetKernelsData(const Params& params, const optional_params& options) const
{
return GetCommonKernelsData(params, options);
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.h
index 2249dc8c9..c2ccf054d 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.h
@@ -28,5 +28,8 @@ namespace kernel_selector
virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
virtual ParamsKey GetSupportedKey() const override;
+ protected:
+ bool Validate(const Params& p, const optional_params& o) const override;
+
};
} \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_selector.cpp
index d71deddfb..cf7565216 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_selector.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_selector.cpp
@@ -17,13 +17,15 @@
#include "eltwise_kernel_selector.h"
#include "eltwise_kernel_ref.h"
#include "eltwise_kernel_vload8.h"
-
+#include "eltwise_kernel_fs_bs_yx_bsv4_fsv32.h"
+
namespace kernel_selector
{
eltwise_kernel_selector::eltwise_kernel_selector()
{
Attach<EltwiseKernelRef>();
Attach<EltwiseKernel_vload8>();
+ Attach<EltwiseKernel_fs_bs_yx_bsv4_fsv32>();
}
KernelsData eltwise_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_vload8.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_vload8.cpp
index 934bc44cd..5ceb75084 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_vload8.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_vload8.cpp
@@ -45,6 +45,15 @@ namespace kernel_selector {
}
const auto& ewParams = static_cast<const eltwise_params&>(params);
+
+ for (size_t i = 0; i < ewParams.inputs.size(); i++)
+ {
+ if (ewParams.inputs[i].GetLayout() == DataLayout::fs_bs_yx_bsv4_fsv32)
+ return false;
+ }
+ if (ewParams.output.GetLayout() == DataLayout::fs_bs_yx_bsv4_fsv32)
+ return false;
+
const auto& output = ewParams.output;
const auto count = output.PhysicalSize();
@@ -62,16 +71,16 @@ namespace kernel_selector {
//TODO: add support to this implementation when user requests input values updates
bool bCheckUpdateInput = true;
- if (!ewParams.eltwiseParams.updateInputIds.empty())
+ if (!ewParams.updateInputIds.empty())
bCheckUpdateInput = false;
//TODO: add support for reading from output buffer and using its values in computation
bool bCheckUseOutput = true;
- for (size_t op = 0; op < ewParams.eltwiseParams.operations.size(); op++)
+ for (size_t op = 0; op < ewParams.operations.size(); op++)
{
- for (size_t input_idx = 0; input_idx < ewParams.eltwiseParams.operations[op].inputs.size(); input_idx++)
+ for (size_t input_idx = 0; input_idx < ewParams.operations[op].inputs.size(); input_idx++)
{
- if (ewParams.eltwiseParams.operations[op].inputs[input_idx].mode == EltwiseInputMode::OUTPUT_BUFFER)
+ if (ewParams.operations[op].inputs[input_idx].mode == EltwiseInputMode::OUTPUT_BUFFER)
{
bCheckUseOutput = false;
break;
@@ -114,7 +123,7 @@ namespace kernel_selector {
auto& kernel = kd.kernels[0];
kernel.workGroups.global = { std::max(newParams.inputs[0].LogicalSize()/8, (size_t)1), 1, 1 };
kernel.workGroups.local = GetOptimalLocalWorkGroupSizes(kernel.workGroups.global);
- kernel.kernelString = GetKernelString(kernelName, jit, entry_point, ROUND_ROBIN);
+ kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, ROUND_ROBIN);
kernel.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(), false, false);
kd.estimatedTime = FORCE_PRIORITY_8;