diff options
Diffstat (limited to 'inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise')
8 files changed, 394 insertions, 71 deletions
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.cpp index 613fbb4f8..5feac0ca5 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.cpp @@ -41,6 +41,22 @@ namespace kernel_selector } } + ParamsKey eltwise_params::GetParamsKey() const + { + ParamsKey k = base_params::GetParamsKey(); + if (int8_quantization) + { + k.EnableInt8Quantization(); + } + + if (output_calibration) + { + k.EnableOutputCalibration(); + } + + return k; + } + bool EltwiseKernelBase::Validate(const Params& p, const optional_params& o) const { if (p.GetType() != KernelType::ELTWISE || @@ -56,7 +72,7 @@ namespace kernel_selector return false; } - auto& operations = params.eltwiseParams.operations; + auto& operations = params.operations; if (operations.size() == 0) { @@ -91,24 +107,24 @@ namespace kernel_selector JitConstants jit = MakeBaseParamsJitConstants(params); jit.AddConstants({ - MakeJitConstant("ELTWISE_LAYOUT_BASED", params.eltwiseParams.layoutBased), - MakeJitConstant("QUANTIZATION_TERM", params.eltwiseParams.int8_quantization), + MakeJitConstant("ELTWISE_LAYOUT_BASED", params.layoutBased), + MakeJitConstant("QUANTIZATION_TERM", params.int8_quantization), }); - if (params.eltwiseParams.int8_quantization) + if (params.int8_quantization) { - if (params.eltwiseParams.output_calibration) + if (params.output_calibration) { - jit.AddConstant(MakeJitConstant("CALIBRATION_TERM", params.eltwiseParams.output_calibration)); + jit.AddConstant(MakeJitConstant("CALIBRATION_TERM", params.output_calibration)); jit.AddConstant(MakeJitConstant("O_QF", params.output_calibration_factors[0])); } else - jit.AddConstants({ MakeJitConstant("O_QF", params.eltwiseParams.output_quantization_factor) }); + jit.AddConstants({ MakeJitConstant("O_QF", params.output_quantization_factor) }); } std::string inputs_decls, vload_decls; - auto& updateInputs = params.eltwiseParams.updateInputIds; + auto& updateInputs = params.updateInputIds; for (size_t i = 0; i < params.inputs.size(); i++) { @@ -143,8 +159,8 @@ namespace kernel_selector std::string do_eltwise; - auto& operations = params.eltwiseParams.operations; - auto& coefficients = params.eltwiseParams.coefficients; + auto& operations = params.operations; + auto& coefficients = params.coefficients; for (size_t op_num = 0; op_num < operations.size(); op_num++) { @@ -187,7 +203,7 @@ namespace kernel_selector cast_type = "(MAKE_VECTOR_TYPE(UNIT_TYPE, 8))"; op = "const MAKE_VECTOR_TYPE(UNIT_TYPE, 8) tmp" + op_num_str + " = "; } - else if(params.eltwiseParams.int8_quantization) + else if(params.int8_quantization) { cast_type = "(int)"; op = "const int tmp" + op_num_str + " = "; @@ -251,7 +267,7 @@ namespace kernel_selector jit.AddConstant(MakeJitConstant("DO_ELTWISE", do_eltwise)); - if (params.eltwiseParams.layoutBased || params.eltwiseParams.int8_quantization) + if (params.layoutBased || params.int8_quantization) { jit.Merge(GetTensorFriendlyWorkGroupsJit(params.inputs[0])); } @@ -264,32 +280,27 @@ namespace kernel_selector return GetJitConstantsCommon(params, false); } - KernelsData EltwiseKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options) const + EltwiseKernelBase::DispatchData EltwiseKernelBase::SetDefault(const eltwise_params& params) const { - if (!Validate(params, options)) - { - return{}; - } - - KernelData kd = KernelData::Default<eltwise_params>(params); - eltwise_params& newParams = *static_cast<eltwise_params*>(kd.params.get()); + DispatchData kd; - auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options); - auto cldnn_jit = GetJitConstants(newParams); - std::string jit = CreateJit(kernelName, cldnn_jit, entry_point); - - const auto& out = newParams.output; - auto& kernel = kd.kernels[0]; - if (newParams.eltwiseParams.layoutBased || newParams.eltwiseParams.int8_quantization) + if (params.layoutBased || params.int8_quantization) { - kernel.workGroups.global = GetTensorFriendlyWorkGroups(newParams.inputs[0]); + auto global = GetTensorFriendlyWorkGroups(params.inputs[0]); + kd.gws0 = global[0]; + kd.gws1 = global[1]; + kd.gws2 = global[2]; } - else if (CheckInputsOutputNoPitchSameDims(newParams)) + else if (CheckInputsOutputNoPitchSameDims(params)) { - kernel.workGroups.global = { newParams.inputs[0].LogicalSize(), 1, 1 }; + kd.gws0 = params.inputs[0].LogicalSize(); + kd.gws1 = 1; + kd.gws2 = 1; } else { + const auto& out = params.output; + std::vector<size_t> gws; for (const auto& o : out.GetDims()) { @@ -301,11 +312,42 @@ namespace kernel_selector gws.push_back(1U); } - kernel.workGroups.global = { gws[0], gws[1], gws[2] * gws[3] }; + kd.gws0 = gws[0]; + kd.gws1 = gws[1]; + kd.gws2 = gws[2] * gws[3]; } - kernel.workGroups.local = GetOptimalLocalWorkGroupSizes(kernel.workGroups.global); - kernel.kernelString = GetKernelString(kernelName, jit, entry_point, ROUND_ROBIN); - kernel.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(), false, false, newParams.eltwiseParams.int8_quantization, newParams.eltwiseParams.output_calibration); + + auto local = GetOptimalLocalWorkGroupSizes( { kd.gws0, kd.gws1, kd.gws2 } ); + kd.lws0 = local[0]; + kd.lws1 = local[1]; + kd.lws2 = local[2]; + + return kd; + } + + KernelsData EltwiseKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options) const + { + if (!Validate(params, options)) + { + return{}; + } + + KernelData kd = KernelData::Default<eltwise_params>(params); + eltwise_params& newParams = *static_cast<eltwise_params*>(kd.params.get()); + + auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options); + auto cldnn_jit = GetJitConstants(newParams); + std::string jit = CreateJit(kernelName, cldnn_jit, entry_point); + + DispatchData runInfo = SetDefault(newParams); + + auto& kernel = kd.kernels[0]; + + kernel.workGroups.global = { runInfo.gws0, runInfo.gws1, runInfo.gws2 }; + kernel.workGroups.local = { runInfo.lws0, runInfo.lws1, runInfo.lws2 }; + + kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, ROUND_ROBIN); + kernel.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(), false, false, newParams.int8_quantization, newParams.output_calibration); kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.h index 9d7127822..161140849 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.h @@ -25,7 +25,7 @@ namespace kernel_selector //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// struct eltwise_params : public base_params { - eltwise_params() : base_params(KernelType::ELTWISE), eltwiseParams() {} + eltwise_params() : base_params(KernelType::ELTWISE) {} struct InputType { @@ -87,35 +87,17 @@ namespace kernel_selector uint32_t tmpId; }; - struct DedicatedParams - { - std::vector<eltwise_params::Node> operations; - std::vector<float> coefficients; - std::vector<UpdateInputData> updateInputIds; - bool layoutBased = false; - bool int8_quantization = false; - bool output_calibration = false; - float output_quantization_factor = 1.0f; - }; - - DedicatedParams eltwiseParams; + std::vector<eltwise_params::Node> operations; + std::vector<float> coefficients; + std::vector<UpdateInputData> updateInputIds; + + bool layoutBased = false; + bool int8_quantization = false; + bool output_calibration = false; + float output_quantization_factor = 1.0f; + MultiDataTensor output_calibration_factors; - - virtual ParamsKey GetParamsKey() const - { - ParamsKey k = base_params::GetParamsKey(); - if (eltwiseParams.int8_quantization) - { - k.EnableInt8Quantization(); - } - - if (eltwiseParams.output_calibration) - { - k.EnableOutputCalibration(); - } - - return k; - } + virtual ParamsKey GetParamsKey() const; }; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -141,6 +123,7 @@ namespace kernel_selector protected: virtual bool Validate(const Params& p, const optional_params& o) const override; virtual JitConstants GetJitConstants(const eltwise_params& params) const; + virtual DispatchData SetDefault(const eltwise_params& params) const; KernelsData GetCommonKernelsData(const Params& params, const optional_params& options) const; }; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_bs_yx_bsv4_fsv32.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_bs_yx_bsv4_fsv32.cpp new file mode 100644 index 000000000..571a013ce --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_bs_yx_bsv4_fsv32.cpp @@ -0,0 +1,222 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "eltwise_kernel_fs_bs_yx_bsv4_fsv32.h" +#include "kernel_selector_utils.h" + +namespace kernel_selector { + + ParamsKey EltwiseKernel_fs_bs_yx_bsv4_fsv32::GetSupportedKey() const + { + ParamsKey k; + k.EnableInputDataType(Datatype::INT8); + k.EnableOutputDataType(Datatype::INT8); + k.EnableInputLayout(DataLayout::fs_bs_yx_bsv4_fsv32); + k.EnableOutputLayout(DataLayout::fs_bs_yx_bsv4_fsv32); + k.EnableTensorOffset(); + k.EnableTensorPitches(); + k.EnableBatching(); + k.EnableInt8Quantization(); + k.EnableOutputCalibration(); + return k; + } + + EltwiseKernelBase::DispatchData EltwiseKernel_fs_bs_yx_bsv4_fsv32::SetDefault(const eltwise_params& params) const + { + DispatchData kd; + + kd.gws0 = params.output.X().v; + kd.gws1 = params.output.Y().v; + // we process 4 batches and 4 features per workitem + kd.gws2 = (params.output.Batch().v / 4) * (params.output.Feature().v / 4); + kd.lws0 = 1; + kd.lws1 = 1; + kd.lws2 = 8; + + return kd; + } + + JitConstants EltwiseKernel_fs_bs_yx_bsv4_fsv32::GetJitConstants(const eltwise_params& params) const + { + JitConstants jit = MakeBaseParamsJitConstants(params); + + const size_t in_x_pitch = 32 * 4; + const size_t in_y_pitch = 32 * 4 * params.inputs[0].X().LogicalDimPadded(); + const size_t in_b_block_pitch = in_y_pitch * params.inputs[0].Y().LogicalDimPadded(); + const size_t in_f_block_pitch = in_b_block_pitch * ((params.inputs[0].Batch().v + 3) / 4); + const size_t in_offset = in_x_pitch * params.inputs[0].X().pad.before + in_y_pitch * params.inputs[0].Y().pad.before; + + jit.AddConstant(MakeJitConstant("IN_X_PITCH", in_x_pitch)); + jit.AddConstant(MakeJitConstant("IN_Y_PITCH", in_y_pitch)); + jit.AddConstant(MakeJitConstant("IN_B_BLOCK_PITCH", in_b_block_pitch)); + jit.AddConstant(MakeJitConstant("IN_F_BLOCK_PITCH", in_f_block_pitch)); + jit.AddConstant(MakeJitConstant("IN_OFFSET", in_offset)); + + /////////////// + jit.AddConstants({ + MakeJitConstant("ELTWISE_LAYOUT_BASED", params.layoutBased), + MakeJitConstant("QUANTIZATION_TERM", params.int8_quantization), + }); + + if (params.int8_quantization) + { + if (params.output_calibration) + { + jit.AddConstant(MakeJitConstant("CALIBRATION_TERM", params.output_calibration)); + jit.AddConstant(MakeJitConstant("O_QF", params.output_calibration_factors[0])); + + } + else + jit.AddConstants({ MakeJitConstant("O_QF", params.output_quantization_factor) }); + } + + std::string inputs_decls; + auto& updateInputs = params.updateInputIds; + + for (size_t i = 0; i < params.inputs.size(); i++) + { + //const should be added only to inputs which will not be updated + std::string const_str = "const"; + for (size_t update_input_idx = 0; update_input_idx < updateInputs.size(); update_input_idx++) + { + if (updateInputs[update_input_idx].inputId == i) + { + const_str = ""; + break; + } + } + + inputs_decls += const_str + " __global " + toCLType(params.inputs[i].GetDType()) + "* input" + std::to_string(i) + ", "; + } + + jit.AddConstant(MakeJitConstant("INPUTS_DECLS", inputs_decls)); + jit.AddConstant(MakeJitConstant("ELTWISE_NO_PITCH_SAME_DIMS", CheckInputsOutputNoPitchSameDims(params))); + + std::string do_eltwise; + + auto& operations = params.operations; + auto& coefficients = params.coefficients; + + for (size_t op_num = 0; op_num < operations.size(); op_num++) + { + const std::string op_num_str = std::to_string(op_num); + const auto& ew = operations[op_num]; + + for (size_t input_idx = 0; input_idx < ew.inputs.size(); input_idx++) + { + const auto& input = ew.inputs[input_idx]; + const std::string name = "INPUT_" + op_num_str + "_" + std::to_string(input_idx); + switch (input.mode) + { + case EltwiseInputMode::SCALAR: + jit.AddConstant(MakeJitConstant(name, input.scalar)); + break; + case EltwiseInputMode::INPUT_BUFFER: + jit.AddConstant(MakeJitConstant(name, "GET_INPUT(input" + std::to_string(input.index) + ", INPUT" + std::to_string(input.index) + ")")); + break; + case EltwiseInputMode::OUTPUT_BUFFER: + jit.AddConstant(MakeJitConstant(name, "output[GET_INDEX(OUTPUT, )]")); + break; + case EltwiseInputMode::UNORDERED_ACCESS_INPUT_BUFFER: + jit.AddConstant(MakeJitConstant(name, "input" + std::to_string(input.index) + "[(size_t)tmp" + std::to_string(input.tmpIndex) + "]")); + break; + case EltwiseInputMode::INTERMEDIATE_RESULTS_INDEX: + jit.AddConstant(MakeJitConstant(name, "tmp" + std::to_string(input.tmpIndex))); + break; + default: + break; + } + } + std::string input0_str, input1_str, cast_type, op; + + if (params.int8_quantization) + { + cast_type = "(int16)"; + op = "const int16 tmp" + op_num_str + " = "; + } + else + { + cast_type = "(UNIT_TYPE)"; + op = "const UNIT_TYPE tmp" + op_num_str + " = "; + } + + input0_str = cast_type + "INPUT_" + op_num_str + "_0"; + input1_str = cast_type + "INPUT_" + op_num_str + "_1"; + + if (ew.mode == EltwiseMode::ADD) + { + std::vector<std::string> coeff_strings(ew.inputs.size(), ""); + for (size_t input_idx = 0; input_idx < ew.inputs.size(); input_idx++) + { + const auto& input = ew.inputs[input_idx]; + if (input.mode == EltwiseInputMode::INPUT_BUFFER && input.index < coefficients.size()) + { + const float c = coefficients[input.index]; + if (c != 1.0f) + coeff_strings[input_idx] = cast_type + "(" + std::to_string(c) + ")*"; + } + } + + input0_str = coeff_strings[0] + input0_str; + input1_str = coeff_strings[1] + input1_str; + } + + + switch (ew.mode) + { + case EltwiseMode::ADD: op += input0_str + " + " + input1_str; break; + case EltwiseMode::SUB: op += input0_str + " - " + input1_str; break; + case EltwiseMode::MUL: op += input0_str + " * " + input1_str; break; + case EltwiseMode::DIV: op += input0_str + " / " + input1_str; break; + case EltwiseMode::MODULU: op += cast_type + "fmod(" + input0_str + ", " + input1_str + ")"; break; + case EltwiseMode::MIN: op += cast_type + "fmin(" + input0_str + ", " + input1_str + ")"; break; + case EltwiseMode::MAX: op += cast_type + "fmax(" + input0_str + ", " + input1_str + ")"; break; + case EltwiseMode::POW: op += cast_type + "pow(" + input0_str + ", " + input1_str + ")"; break; + case EltwiseMode::SQRT: op += cast_type + "sqrt(" + input0_str + ")"; break; + case EltwiseMode::RSQRT: op += cast_type + "1/sqrt(" + input0_str + ")"; break; + case EltwiseMode::ASSIGN: op += input0_str; break; + default: + break; + } + + std::string opname = "OPERATION" + op_num_str; + jit.AddConstant(MakeJitConstant(opname, op)); + do_eltwise += "\\\n\t" + opname + ";"; + } + + for (size_t update_input_idx = 0; update_input_idx < updateInputs.size(); update_input_idx++) + do_eltwise += "\\\n\tinput" + std::to_string(updateInputs[update_input_idx].inputId) + + "[GET_INDEX(INPUT, " + std::to_string(updateInputs[update_input_idx].inputId) + + ")] = tmp" + std::to_string(updateInputs[update_input_idx].tmpId) + ";"; + + do_eltwise += "\\\n\tres = tmp" + std::to_string(operations.size() - 1) + ";"; + + jit.AddConstant(MakeJitConstant("DO_ELTWISE", do_eltwise)); + + if (params.layoutBased || params.int8_quantization) + { + jit.Merge(GetTensorFriendlyWorkGroupsJit(params.inputs[0])); + } + + /////////////// + return jit; + } + + KernelsData EltwiseKernel_fs_bs_yx_bsv4_fsv32::GetKernelsData(const Params& params, const optional_params& options) const + { + return GetCommonKernelsData(params, options); + } +}
\ No newline at end of file diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_bs_yx_bsv4_fsv32.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_bs_yx_bsv4_fsv32.h new file mode 100644 index 000000000..b1fb3e950 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_bs_yx_bsv4_fsv32.h @@ -0,0 +1,35 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#pragma once + +#include "eltwise_kernel_base.h" + +namespace kernel_selector +{ + class EltwiseKernel_fs_bs_yx_bsv4_fsv32 : public EltwiseKernelBase + { + public: + EltwiseKernel_fs_bs_yx_bsv4_fsv32() : EltwiseKernelBase("eltwise_fs_bs_yx_bsv4_fsv32") {} + virtual ~EltwiseKernel_fs_bs_yx_bsv4_fsv32() {} + + virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; + virtual ParamsKey GetSupportedKey() const override; + protected: + JitConstants GetJitConstants(const eltwise_params& params) const override; + virtual DispatchData SetDefault(const eltwise_params& params) const override; + }; +}
\ No newline at end of file diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.cpp index 3840f463e..3a7776575 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.cpp @@ -22,8 +22,16 @@ namespace kernel_selector { ParamsKey EltwiseKernelRef::GetSupportedKey() const { ParamsKey k; - k.EnableAllInputDataType(); - k.EnableAllOutputDataType(); + k.EnableInputDataType(Datatype::F16); + k.EnableInputDataType(Datatype::F32); + k.EnableInputDataType(Datatype::INT8); + k.EnableInputDataType(Datatype::INT32); + k.EnableInputDataType(Datatype::INT64); + k.EnableOutputDataType(Datatype::F16); + k.EnableOutputDataType(Datatype::F32); + k.EnableOutputDataType(Datatype::INT8); + k.EnableOutputDataType(Datatype::INT32); + k.EnableOutputDataType(Datatype::INT64); k.EnableDifferentTypes(); k.EnableAllInputLayout(); k.EnableAllOutputLayout(); @@ -35,6 +43,25 @@ namespace kernel_selector { return k; } + bool EltwiseKernelRef::Validate(const Params& p, const optional_params& o) const + { + if (!EltwiseKernelBase::Validate(p, o)) + { + return false; + } + + const eltwise_params& params = static_cast<const eltwise_params&>(p); + for (size_t i = 0; i < params.inputs.size(); i++) + { + if (params.inputs[i].GetLayout() == DataLayout::fs_bs_yx_bsv4_fsv32) + return false; + } + if (params.output.GetLayout() == DataLayout::fs_bs_yx_bsv4_fsv32) + return false; + + return true; + } + KernelsData EltwiseKernelRef::GetKernelsData(const Params& params, const optional_params& options) const { return GetCommonKernelsData(params, options); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.h index 2249dc8c9..c2ccf054d 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.h @@ -28,5 +28,8 @@ namespace kernel_selector virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; virtual ParamsKey GetSupportedKey() const override; + protected: + bool Validate(const Params& p, const optional_params& o) const override; + }; }
\ No newline at end of file diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_selector.cpp index d71deddfb..cf7565216 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_selector.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_selector.cpp @@ -17,13 +17,15 @@ #include "eltwise_kernel_selector.h" #include "eltwise_kernel_ref.h" #include "eltwise_kernel_vload8.h" - +#include "eltwise_kernel_fs_bs_yx_bsv4_fsv32.h" + namespace kernel_selector { eltwise_kernel_selector::eltwise_kernel_selector() { Attach<EltwiseKernelRef>(); Attach<EltwiseKernel_vload8>(); + Attach<EltwiseKernel_fs_bs_yx_bsv4_fsv32>(); } KernelsData eltwise_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_vload8.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_vload8.cpp index 934bc44cd..5ceb75084 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_vload8.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_vload8.cpp @@ -45,6 +45,15 @@ namespace kernel_selector { } const auto& ewParams = static_cast<const eltwise_params&>(params); + + for (size_t i = 0; i < ewParams.inputs.size(); i++) + { + if (ewParams.inputs[i].GetLayout() == DataLayout::fs_bs_yx_bsv4_fsv32) + return false; + } + if (ewParams.output.GetLayout() == DataLayout::fs_bs_yx_bsv4_fsv32) + return false; + const auto& output = ewParams.output; const auto count = output.PhysicalSize(); @@ -62,16 +71,16 @@ namespace kernel_selector { //TODO: add support to this implementation when user requests input values updates bool bCheckUpdateInput = true; - if (!ewParams.eltwiseParams.updateInputIds.empty()) + if (!ewParams.updateInputIds.empty()) bCheckUpdateInput = false; //TODO: add support for reading from output buffer and using its values in computation bool bCheckUseOutput = true; - for (size_t op = 0; op < ewParams.eltwiseParams.operations.size(); op++) + for (size_t op = 0; op < ewParams.operations.size(); op++) { - for (size_t input_idx = 0; input_idx < ewParams.eltwiseParams.operations[op].inputs.size(); input_idx++) + for (size_t input_idx = 0; input_idx < ewParams.operations[op].inputs.size(); input_idx++) { - if (ewParams.eltwiseParams.operations[op].inputs[input_idx].mode == EltwiseInputMode::OUTPUT_BUFFER) + if (ewParams.operations[op].inputs[input_idx].mode == EltwiseInputMode::OUTPUT_BUFFER) { bCheckUseOutput = false; break; @@ -114,7 +123,7 @@ namespace kernel_selector { auto& kernel = kd.kernels[0]; kernel.workGroups.global = { std::max(newParams.inputs[0].LogicalSize()/8, (size_t)1), 1, 1 }; kernel.workGroups.local = GetOptimalLocalWorkGroupSizes(kernel.workGroups.global); - kernel.kernelString = GetKernelString(kernelName, jit, entry_point, ROUND_ROBIN); + kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, ROUND_ROBIN); kernel.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(), false, false); kd.estimatedTime = FORCE_PRIORITY_8; |