diff options
Diffstat (limited to 'inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels')
108 files changed, 3167 insertions, 144 deletions
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_base.cpp index 7feeb60d6..358b66d1c 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_base.cpp @@ -96,7 +96,7 @@ namespace kernel_selector auto jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point); + FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point); if (newParams.gradient) kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 1 }); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_axis.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_axis.cpp index a69ce07c6..894101bff 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_axis.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_axis.cpp @@ -75,7 +75,7 @@ namespace kernel_selector auto jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point); + FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point); kd.estimatedTime = FORCE_PRIORITY_9; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_base.cpp index 455b249cc..a51824c52 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_base.cpp @@ -78,7 +78,7 @@ namespace kernel_selector auto jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point); + FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point); kd.estimatedTime = estimatedTime; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_opt.cpp index 70af969b3..8e086dae6 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_opt.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_opt.cpp @@ -78,7 +78,7 @@ namespace kernel_selector runInfo.lws1 = 1; runInfo.lws2 = 1; - FillCLKernelData(kernel, runInfo, kernelName, jit, entryPoint); + FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entryPoint); size = (size / 128 + 1) * topK; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/average_unpooling/average_unpooling_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/average_unpooling/average_unpooling_kernel_base.cpp index 46e8e85d8..c94e624bd 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/average_unpooling/average_unpooling_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/average_unpooling/average_unpooling_kernel_base.cpp @@ -95,7 +95,7 @@ namespace kernel_selector auto jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point); + FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point); kd.estimatedTime = estimatedTime; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm/batch_norm_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm/batch_norm_kernel_base.cpp index 811d4412f..ebf881f50 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm/batch_norm_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm/batch_norm_kernel_base.cpp @@ -80,7 +80,7 @@ namespace kernel_selector auto& kernel = kd.kernels[0]; int inputs_num = 1 + orgParams.batchNormParams.with_inv_var; - FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point, "", false, false, inputs_num); + FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, "", false, false, inputs_num); kd.estimatedTime = estimatedTime; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm_grad/batch_norm_grad_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm_grad/batch_norm_grad_kernel_base.cpp index c5b15712a..25d9115dd 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm_grad/batch_norm_grad_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm_grad/batch_norm_grad_kernel_base.cpp @@ -70,7 +70,7 @@ namespace kernel_selector auto jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point, "", false, false, 3); + FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, "", false, false, 3); kd.estimatedTime = estimatedTime; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_base.cpp new file mode 100644 index 000000000..3346c4b0a --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_base.cpp @@ -0,0 +1,78 @@ +// Copyright (c) 2018 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#include "border_kernel_base.h" + +#include "kernel_selector_utils.h" + + +namespace kernel_selector +{ + JitConstants BorderKernelBase::GetJitConstants(const border_params& params) const + { + JitConstants jit = MakeBaseParamsJitConstants(params); + + jit.AddConstants({ + MakeJitConstant("LT_SIZES", params.lt_sizes), + MakeJitConstant("RB_SIZES", params.rb_sizes), + MakeJitConstant(toString(params.b_type), "") + }); + + return jit; + } + + BorderKernelBase::DispatchData BorderKernelBase::SetDefault(const border_params& params) const + { + const auto& output = params.output; + + DispatchData kd; + + kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16; + + std::vector<size_t> global{output.X().v, output.Y().v, output.Batch().v * output.Feature().v}; + const auto& local = GetOptimalLocalWorkGroupSizes(global); + + kd.gws0 = global[0]; + kd.gws1 = global[1]; + kd.gws2 = global[2]; + + kd.lws0 = local[0]; + kd.lws1 = local[1]; + kd.lws2 = local[2]; + + return kd; + } + + KernelsData BorderKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options, float estimated_time) const + { + assert(params.GetType() == KernelType::BORDER); + + const auto& prim_params = static_cast<const border_params&>(params); // NOLINT(cppcoreguidelines-pro-type-static-cast-downcast) + + auto run_info = SetDefault(prim_params); + KernelData k_data = KernelData::Default<border_params>(params); + + auto cldnn_jit = GetJitConstants(prim_params); + auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, options); + auto jit = CreateJit(kernelName, cldnn_jit, entry_point); + + auto& kernel = k_data.kernels[0]; + FillCLKernelData(kernel, run_info, params.engineInfo, kernelName, jit, entry_point); + + k_data.estimatedTime = estimated_time; + + return {k_data}; + } +} diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_base.h new file mode 100644 index 000000000..43c10c715 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_base.h @@ -0,0 +1,72 @@ +// Copyright (c) 2018 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "common_kernel_base.h" +#include "kernel_selector_params.h" + + +namespace kernel_selector +{ + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // border_params + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + struct border_params : public base_params + { + DimTensor<> lt_sizes; + DimTensor<> rb_sizes; + BorderType b_type = BorderType::ZERO; + + + border_params() + : base_params(KernelType::BORDER) + { + } + + ParamsKey GetParamsKey() const override + { + ParamsKey k = base_params::GetParamsKey(); + // k.EnableBorderType(b_type); + return k; + } + }; + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // border_optional_params + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + struct border_optional_params : optional_params + { + border_optional_params() + : optional_params(KernelType::BORDER) + { + } + }; + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // BorderKernelBase + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + class BorderKernelBase : public common_kernel_base + { + public: + using common_kernel_base::common_kernel_base; + + using DispatchData = CommonDispatchData; + + protected: + JitConstants GetJitConstants(const border_params& params) const; + DispatchData SetDefault(const border_params& params) const; + KernelsData GetCommonKernelsData(const Params& params, const optional_params&, float estimated_time) const; + }; +} diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_ref.cpp new file mode 100644 index 000000000..9029d7afc --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_ref.cpp @@ -0,0 +1,51 @@ +// Copyright (c) 2018 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "border_kernel_ref.h" + + +namespace kernel_selector +{ + ParamsKey BorderKernelRef::GetSupportedKey() const + { + ParamsKey k; + + k.EnableInputDataType(Datatype::F16); + k.EnableInputDataType(Datatype::F32); + k.EnableInputDataType(Datatype::INT8); + k.EnableInputDataType(Datatype::UINT8); + + k.EnableOutputDataType(Datatype::F32); + k.EnableOutputDataType(Datatype::F16); + k.EnableOutputDataType(Datatype::INT8); + k.EnableOutputDataType(Datatype::UINT8); + + k.EnableInputLayout(DataLayout::bfyx); + k.EnableInputLayout(DataLayout::yxfb); + k.EnableInputLayout(DataLayout::byxf); + + k.EnableOutputLayout(DataLayout::bfyx); + k.EnableOutputLayout(DataLayout::yxfb); + k.EnableOutputLayout(DataLayout::byxf); + + k.EnableBatching(); + + return k; + } + + KernelsData BorderKernelRef::GetKernelsData(const Params& params, const optional_params& options) const + { + return GetCommonKernelsData(params, options, FORCE_PRIORITY_9); + } +} diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_ref.h new file mode 100644 index 000000000..0862ed144 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_ref.h @@ -0,0 +1,30 @@ +// Copyright (c) 2018 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "border_kernel_base.h" + + +namespace kernel_selector +{ + class BorderKernelRef : public BorderKernelBase + { + public: + BorderKernelRef() : BorderKernelBase("border_gpu_ref") {} + + KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; + ParamsKey GetSupportedKey() const override; + }; +} diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_selector.cpp new file mode 100644 index 000000000..42e352c6a --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_selector.cpp @@ -0,0 +1,30 @@ +// Copyright (c) 2018 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#include "border_kernel_selector.h" +#include "border_kernel_ref.h" + +namespace kernel_selector +{ + border_kernel_selector::border_kernel_selector() + { + Attach<BorderKernelRef>(); + } + + KernelsData border_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const + { + return GetNaiveBestKernel(params, options, KernelType::BORDER); + } +} diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_selector.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_selector.h new file mode 100644 index 000000000..515a9a497 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_selector.h @@ -0,0 +1,34 @@ +// Copyright (c) 2018 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "kernel_selector.h" + + +namespace kernel_selector +{ + class border_kernel_selector : public kernel_selector_base + { + public: + static border_kernel_selector &Instance() { + static border_kernel_selector instance; + return instance; + } + + border_kernel_selector(); + + KernelsData GetBestKernels(const Params& params, const optional_params& options) const override; + }; +} diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_base.cpp new file mode 100644 index 000000000..3d3b2f4d3 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_base.cpp @@ -0,0 +1,71 @@ +// Copyright (c) 2018 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#include "broadcast_kernel_base.h" + +#include "kernel_selector_utils.h" + + +namespace kernel_selector +{ + JitConstants BroadcastKernelBase::GetJitConstants(const broadcast_params& params) + { + JitConstants jit = MakeBaseParamsJitConstants(params); + return jit; + } + + BroadcastKernelBase::DispatchData BroadcastKernelBase::SetDefault(const broadcast_params& params) + { + const auto& output = params.output; + + DispatchData kd; + + kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16; + + std::vector<size_t> global{output.X().v, output.Y().v, output.Batch().v * output.Feature().v}; + const auto& local = GetOptimalLocalWorkGroupSizes(global); + + kd.gws0 = global[0]; + kd.gws1 = global[1]; + kd.gws2 = global[2]; + + kd.lws0 = local[0]; + kd.lws1 = local[1]; + kd.lws2 = local[2]; + + return kd; + } + + KernelsData BroadcastKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options, float estimated_time) const + { + assert(params.GetType() == KernelType::BROADCAST); + + const auto& prim_params = static_cast<const broadcast_params&>(params); // NOLINT(cppcoreguidelines-pro-type-static-cast-downcast) + + auto run_info = SetDefault(prim_params); + KernelData k_data = KernelData::Default<broadcast_params>(params); + + auto cldnn_jit = GetJitConstants(prim_params); + auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, options); + auto jit = CreateJit(kernelName, cldnn_jit, entry_point); + + auto& kernel = k_data.kernels[0]; + FillCLKernelData(kernel, run_info, params.engineInfo, kernelName, jit, entry_point); + + k_data.estimatedTime = estimated_time; + + return {k_data}; + } +} diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_base.h new file mode 100644 index 000000000..cf4865e80 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_base.h @@ -0,0 +1,60 @@ +// Copyright (c) 2018 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "common_kernel_base.h" +#include "kernel_selector_params.h" + + +namespace kernel_selector +{ + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // broadcast_params + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + struct broadcast_params : public base_params + { + broadcast_params() + : base_params(KernelType::BROADCAST) + { + } + }; + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // broadcast_optional_params + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + struct broadcast_optional_params : optional_params + { + broadcast_optional_params() + : optional_params(KernelType::BROADCAST) + { + } + }; + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // BroadcastKernelBase + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + class BroadcastKernelBase : public common_kernel_base + { + public: + using common_kernel_base::common_kernel_base; + + using DispatchData = CommonDispatchData; + + protected: + static JitConstants GetJitConstants(const broadcast_params& params); + static DispatchData SetDefault(const broadcast_params& params); + KernelsData GetCommonKernelsData(const Params& params, const optional_params&, float estimated_time) const; + }; +} diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_ref.cpp new file mode 100644 index 000000000..0be42a5e2 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_ref.cpp @@ -0,0 +1,51 @@ +// Copyright (c) 2018 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "broadcast_kernel_ref.h" + + +namespace kernel_selector +{ + ParamsKey BroadcastKernelRef::GetSupportedKey() const + { + ParamsKey k; + + k.EnableInputDataType(Datatype::F16); + k.EnableInputDataType(Datatype::F32); + k.EnableInputDataType(Datatype::INT8); + k.EnableInputDataType(Datatype::UINT8); + + k.EnableOutputDataType(Datatype::F32); + k.EnableOutputDataType(Datatype::F16); + k.EnableOutputDataType(Datatype::INT8); + k.EnableOutputDataType(Datatype::UINT8); + + k.EnableInputLayout(DataLayout::bfyx); + k.EnableInputLayout(DataLayout::yxfb); + k.EnableInputLayout(DataLayout::byxf); + + k.EnableOutputLayout(DataLayout::bfyx); + k.EnableOutputLayout(DataLayout::yxfb); + k.EnableOutputLayout(DataLayout::byxf); + + k.EnableBatching(); + + return k; + } + + KernelsData BroadcastKernelRef::GetKernelsData(const Params& params, const optional_params& options) const + { + return GetCommonKernelsData(params, options, FORCE_PRIORITY_9); + } +} diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_ref.h new file mode 100644 index 000000000..ccca397ab --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_ref.h @@ -0,0 +1,30 @@ +// Copyright (c) 2018 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "broadcast_kernel_base.h" + + +namespace kernel_selector +{ + class BroadcastKernelRef : public BroadcastKernelBase + { + public: + BroadcastKernelRef() : BroadcastKernelBase("broadcast_gpu_ref") {} + + KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; + ParamsKey GetSupportedKey() const override; + }; +} diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_selector.cpp new file mode 100644 index 000000000..02ae904d2 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_selector.cpp @@ -0,0 +1,30 @@ +// Copyright (c) 2018 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#include "broadcast_kernel_selector.h" +#include "broadcast_kernel_ref.h" + +namespace kernel_selector +{ + broadcast_kernel_selector::broadcast_kernel_selector() + { + Attach<BroadcastKernelRef>(); + } + + KernelsData broadcast_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const + { + return GetNaiveBestKernel(params, options, KernelType::BROADCAST); + } +} diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_selector.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_selector.h new file mode 100644 index 000000000..ec7f4da55 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_selector.h @@ -0,0 +1,34 @@ +// Copyright (c) 2018 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "kernel_selector.h" + + +namespace kernel_selector +{ + class broadcast_kernel_selector : public kernel_selector_base + { + public: + static broadcast_kernel_selector &Instance() { + static broadcast_kernel_selector instance; + return instance; + } + + broadcast_kernel_selector(); + + KernelsData GetBestKernels(const Params& params, const optional_params& options) const override; + }; +} diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_base.cpp index b81cfe879..f9df941ad 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_base.cpp @@ -71,9 +71,18 @@ namespace kernel_selector // Determine global work sizes. if (params.inputs[0].GetLayout() != params.output.GetLayout()) { - kd.gws0 = dims.size() < 2 ? 1 : dims[2].v; - kd.gws1 = dims.size() < 3 ? 1 : dims[1].v; - kd.gws2 = dims.size() < 4 ? 1 : dims[0].v; + if (params.inputs[0].GetLayout() == kernel_selector::Tensor::DataLayout::yxfb) + { + kd.gws0 = dims.size() < 2 ? 1 : dims[3].v; + kd.gws1 = dims.size() < 3 ? 1 : dims[1].v; + kd.gws2 = dims.size() < 4 ? 1 : dims[0].v; + } + else + { + kd.gws0 = dims.size() < 2 ? 1 : dims[2].v; + kd.gws1 = dims.size() < 3 ? 1 : dims[1].v; + kd.gws2 = dims.size() < 4 ? 1 : dims[0].v; + } } else { @@ -124,7 +133,7 @@ namespace kernel_selector kernel.workGroups.global = { runInfo.gws0, runInfo.gws1, runInfo.gws2 }; kernel.workGroups.local = { runInfo.lws0, runInfo.lws1, runInfo.lws2 }; - kernel.kernelString = GetKernelString(kernelName, jit, entryPoint); + kernel.kernelString = GetKernelString(kernelName, jit, entryPoint, params.engineInfo); kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, (uint32_t)i }); kernel.arguments.push_back({ ArgumentDescriptor::Types::OUTPUT, 0 }); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_ref.cpp index 5520f427a..c5c6ae53a 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_ref.cpp @@ -26,9 +26,13 @@ namespace kernel_selector k.EnableInputDataType(Datatype::F16); k.EnableInputDataType(Datatype::F32); k.EnableInputDataType(Datatype::INT8); + k.EnableInputDataType(Datatype::INT32); + k.EnableInputDataType(Datatype::INT64); k.EnableOutputDataType(Datatype::F16); k.EnableOutputDataType(Datatype::F32); k.EnableOutputDataType(Datatype::INT8); + k.EnableOutputDataType(Datatype::INT32); + k.EnableOutputDataType(Datatype::INT64); k.EnableAllInputLayout(); k.EnableAllOutputLayout(); k.EnableTensorOffset(); @@ -60,10 +64,20 @@ namespace kernel_selector //case for input == bfyx, output == yxfb and input == yxfb, output == bfyx if (input_format != output_format) { - dim_index[0] = 3; - dim_index[1] = 2; - dim_index[2] = 0; - dim_index[3] = 1; + if (input_format == kernel_selector::Tensor::DataLayout::yxfb) + { + dim_index[0] = 2; + dim_index[1] = 3; + dim_index[2] = 1; + dim_index[3] = 0; + } + else + { + dim_index[0] = 3; + dim_index[1] = 2; + dim_index[2] = 0; + dim_index[3] = 1; + } } cldnnJit.AddConstant(MakeJitConstant("INPUT_DIM_0", dim_index[0])); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.cpp index 878a02048..86bfe937c 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.cpp @@ -241,7 +241,7 @@ namespace kernel_selector auto jit = CreateJit(finalKernelName, cldnnJit, entryPoint); auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, finalKernelName, jit, entryPoint, exeMode, true, !newParams.bias.empty(), 1, newParams.int8_quantization, newParams.output_calibration); + FillCLKernelData(kernel, runInfo, params.engineInfo, finalKernelName, jit, entryPoint, exeMode, true, !newParams.bias.empty(), 1, newParams.int8_quantization, newParams.output_calibration); kernel.arguments.push_back({ ArgumentDescriptor::Types::SPLIT, 0 }); kd.estimatedTime = runInfo.effiency; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_3x3_dw_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_3x3_dw_opt.cpp index ad56556bc..b92df30b7 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_3x3_dw_opt.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_3x3_dw_opt.cpp @@ -1,5 +1,5 @@ /* -// Copyright (c) 2017 Intel Corporation +// Copyright (c) 2017-2018 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -22,7 +22,7 @@ namespace kernel_selector ConvolutionKernel_bfyx_3x3_dw_opt::ConvolutionKernel_bfyx_3x3_dw_opt() : ConvolutionKernelBase("convolution_gpu_bfyx_3x3_dw_opt") { // Generate the dispatch options to the auto-tuner. - std::vector<size_t> tileXDimSizes = { 1,2,4,5,6,8,10,12,14,16 }; + std::vector<size_t> tileXDimSizes = { 1,2,4,5,6,8,10,12,14 }; std::vector<size_t> tileYDimSizes = { 1,2,3,4,5,6,7 }; std::vector<std::string> executionModes = { /*AGE_BASED ,*/ ROUND_ROBIN }; @@ -141,6 +141,18 @@ namespace kernel_selector KernelsData ConvolutionKernel_bfyx_3x3_dw_opt::GetTunedKernelsDataByIndex(const Params& params, const optional_params& options, const int autoTuneIndex) const { + constexpr int simdSize = 16; + + KernelData kd = KernelData::Default<convolution_params>(params); + convolution_params& convParams = *static_cast<convolution_params*>(kd.params.get()); + DispatchData runInfo = SetDefault(convParams, autoTuneIndex); + + if (static_cast<int>(static_cast<int>(runInfo.gws0 - 1) / simdSize) * runInfo.cldnnStyle.blockWidth + simdSize > convParams.inputs[0].Y().pitch) + { + // Internal Error - requested tile size is not supported for y pitch + return{}; + } + return GetCommonKernelsData(params, options, GetAutoTuneOptions(params, autoTuneIndex).exeMode, autoTuneIndex); } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_depthwise_weights_lwg.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_depthwise_weights_lwg.cpp new file mode 100644 index 000000000..f6841db94 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_depthwise_weights_lwg.cpp @@ -0,0 +1,100 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "convolution_kernel_bfyx_depthwise_weights_lwg.h" +#include "kernel_selector_utils.h" + +namespace kernel_selector +{ + ParamsKey ConvolutionKernel_bfyx_depthwise_weights_lwg::GetSupportedKey() const + { + ParamsKey k; + k.EnableInputDataType(Datatype::F32); + k.EnableInputDataType(Datatype::F16); + k.EnableInputWeightsType(WeightsType::F16); + k.EnableInputWeightsType(WeightsType::F32); + k.EnableOutputDataType(Datatype::F32); + k.EnableOutputDataType(Datatype::F16); + k.EnableInputLayout(DataLayout::bfyx); + k.EnableOutputLayout(DataLayout::bfyx); + k.EnableTensorOffset(); + k.EnableTensorPitches(); + k.EnableBiasPerFeature(); + k.EnableNonBiasTerm(); + k.EnableBatching(); + k.EnableSplitSupport(); + k.EnableSubGroup(); + k.EnableSubGroupShort(); + k.EnableDepthwiseSeparableOpt(); + return k; + } + + bool ConvolutionKernel_bfyx_depthwise_weights_lwg::Validate(const Params& p, const optional_params& o) const + { + if (!ConvolutionKernelBase::Validate(p, o) || + !CovolutionCheckInput(p, o)) + { + return false; + } + + const convolution_params& cp = static_cast<const convolution_params&>(p); + if (!cp.depthwiseSeparableOpt) + return false; + + if ((cp.filterSize.x > 4) || + (cp.filterSize.y > 4) || + (cp.inputs[0].Feature().v != cp.split)) + { + return false; + } + + return true; + } + + ConvolutionKernelBase::DispatchData ConvolutionKernel_bfyx_depthwise_weights_lwg::SetDefault(const convolution_params& params, int) const + { + DispatchData runInfo = Parent::SetDefault(params); + const auto& out = params.output; + + std::vector<size_t> global = { out.X().v * out.Y().v, out.Feature().v, out.Batch().v }; + + runInfo.gws0 = Align(global[0], 16); + runInfo.gws1 = global[1]; + runInfo.gws2 = global[2]; + runInfo.lws0 = 16; + runInfo.lws1 = 1; + runInfo.lws2 = 1; + + runInfo.effiency = FORCE_PRIORITY_6; + + return runInfo; + } + + JitConstants ConvolutionKernel_bfyx_depthwise_weights_lwg::GetJitConstants(const convolution_params& params, const DispatchData& kd) const + { + auto mem_consts = ConvolutionKernelBase::GetJitConstants(params, kd); + + if(params.padding.x != 0 || params.padding.y != 0) + mem_consts.AddConstant(MakeJitConstant("BOUNDARY_CHECK", 1)); + + return mem_consts; + } + + KernelsData ConvolutionKernel_bfyx_depthwise_weights_lwg::GetKernelsData(const Params& params, const optional_params& options) const + { + return GetCommonKernelsData(params, options); + } +}
\ No newline at end of file diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_depthwise_weights_lwg.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_depthwise_weights_lwg.h new file mode 100644 index 000000000..b578f8fd5 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_depthwise_weights_lwg.h @@ -0,0 +1,39 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#pragma once + +#include "convolution_kernel_base.h" + +namespace kernel_selector +{ + class ConvolutionKernel_bfyx_depthwise_weights_lwg : public ConvolutionKernelBase + { + public: + using Parent = ConvolutionKernelBase; + ConvolutionKernel_bfyx_depthwise_weights_lwg() : ConvolutionKernelBase("convolution_gpu_bfyx_depthwise_weights_lwg") {} + virtual ~ConvolutionKernel_bfyx_depthwise_weights_lwg() {} + + virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; + virtual ParamsKey GetSupportedKey() const override; + + protected: + bool Validate(const Params&, const optional_params&) const override; + std::vector<WeightsLayout> GetSupportedWeightLayouts(const convolution_params&) const override { return{ WeightsLayout::oiyx }; } + JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override; + DispatchData SetDefault(const convolution_params& params, int autoTuneIndex = -1) const override; + }; +}
\ No newline at end of file diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_batched.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_batched.cpp new file mode 100644 index 000000000..ce73392ac --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_batched.cpp @@ -0,0 +1,97 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "convolution_kernel_mmad_batched.h" +#include "kernel_selector_utils.h" + +namespace kernel_selector { + + ParamsKey ConvolutionKernel_mmad_batched::GetSupportedKey() const + { + ParamsKey k; + k.EnableInputDataType(Datatype::INT8); + k.EnableOutputDataType(Datatype::INT8); + k.EnableInputWeightsType(WeightsType::INT8); + k.EnableInputLayout(DataLayout::fs_bs_yx_bsv4_fsv32); + k.EnableOutputLayout(DataLayout::fs_bs_yx_bsv4_fsv32); + k.EnableTensorOffset(); + k.EnableTensorPitches(); + k.EnableDilation(); + k.EnableBiasPerFeature(); + k.EnableNonBiasTerm(); + k.EnableBatching(); + k.EnableSplitSupport(); + k.EnableInt8Quantization(); + k.EnableOutputCalibration(); + k.DisableTuning(); + return k; + } + + ConvolutionKernelBase::DispatchData ConvolutionKernel_mmad_batched::SetDefault(const convolution_params& arg, int) const + { + DispatchData runInfo = ConvolutionKernelBase::SetDefault(arg); + + constexpr size_t sub_group_size = 8; + + const auto of_maps = arg.output.Feature().v; + const size_t of_threads_per_batch = RoundUp(of_maps, sub_group_size); + + runInfo.effiency = FORCE_PRIORITY_3; + + runInfo.gws0 = arg.output.X().v; + runInfo.gws1 = arg.output.Y().v; + runInfo.gws2 = of_threads_per_batch * ((arg.output.Batch().v+3) / 4); + + runInfo.lws0 = 1; + runInfo.lws1 = 1; + runInfo.lws2 = sub_group_size; + + return runInfo; + } + + JitConstants ConvolutionKernel_mmad_batched::GetJitConstants(const convolution_params& params, const DispatchData& runInfo) const + { + auto jit = Parent::GetJitConstants(params, runInfo); + + jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", runInfo.lws2)); + + // pitch for special block format used in this kernel + const size_t ifm_32_aligned = Align(params.weights.IFM().v, 32); + const size_t filter_ofm_block_pitch = (ifm_32_aligned / 32) * params.weights.X().v * params.weights.Y().v * 4 * 8 * 8; + jit.AddConstant(MakeJitConstant("FILTER_OFM_BLOCK_PITCH", filter_ofm_block_pitch)); + + const size_t in_x_pitch = 32 * 4; + const size_t in_y_pitch = 32 * 4 * params.inputs[0].X().LogicalDimPadded(); + const size_t in_b_block_pitch = in_y_pitch * params.inputs[0].Y().LogicalDimPadded(); + const size_t in_f_block_pitch = in_b_block_pitch * ((params.inputs[0].Batch().v + 3) / 4); + const size_t in_offset = in_x_pitch * params.inputs[0].X().pad.before + in_y_pitch * params.inputs[0].Y().pad.before; + + jit.AddConstant(MakeJitConstant("IN_X_PITCH", in_x_pitch)); + jit.AddConstant(MakeJitConstant("IN_Y_PITCH", in_y_pitch)); + jit.AddConstant(MakeJitConstant("IN_B_BLOCK_PITCH", in_b_block_pitch)); + jit.AddConstant(MakeJitConstant("IN_F_BLOCK_PITCH", in_f_block_pitch)); + jit.AddConstant(MakeJitConstant("IN_OFFSET", in_offset)); + return jit; + } + + KernelsData ConvolutionKernel_mmad_batched::GetKernelsData(const Params& params, const optional_params& options) const + { + KernelsData kd = GetCommonKernelsData(params, options); + if(!kd.empty()) + kd[0].estimatedTime = FORCE_PRIORITY_3; + return kd; + } +}
\ No newline at end of file diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_batched.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_batched.h new file mode 100644 index 000000000..8a3dda451 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_batched.h @@ -0,0 +1,43 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#pragma once + +#include "convolution_kernel_base.h" + +namespace kernel_selector { + + class ConvolutionKernel_mmad_batched : public ConvolutionKernelBase + { + public: + using Parent = ConvolutionKernelBase; + ConvolutionKernel_mmad_batched() : ConvolutionKernelBase("convolution_gpu_mmad_batched") {} + virtual ~ConvolutionKernel_mmad_batched() {} + + virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; + virtual ParamsKey GetSupportedKey() const override; + + protected: + JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override; + DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override; + virtual std::vector<WeightsLayout> GetSupportedWeightLayouts(const convolution_params&) const override + { + return{ + WeightsLayout::os_is_yx_isa8_osv8_isv4, + }; + } + }; +}
\ No newline at end of file diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_selector.cpp index 21d9c92ff..aa5850593 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_selector.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_selector.cpp @@ -1,5 +1,5 @@ /* -// Copyright (c) 2016 Intel Corporation +// Copyright (c) 2016-2018 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -35,6 +35,8 @@ #include "convolution_kernel_MMAD_blocks.h" #include "convolution_kernel_1x1_gemm_MMAD.h" #include "convolution_kernel_byxf_af32_depthwise.h" +#include "convolution_kernel_mmad_batched.h" +#include "convolution_kernel_bfyx_depthwise_weights_lwg.h" #include <iostream> @@ -61,6 +63,8 @@ namespace kernel_selector Attach<ConvolutionKernel_MMAD_blocks>(); Attach<ConvolutionKernel_1x1_gemm_MMAD>(); Attach<ConvolutionKernel_byxf_af32_depthiwise>(); + Attach<ConvolutionKernel_mmad_batched>(); + Attach<ConvolutionKernel_bfyx_depthwise_weights_lwg>(); //Attach<ConvolutionKernel_Tutorial>(); //In order to use this implementation for tutorial purposes please uncomment this line } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_selector.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_selector.h index 9275a5fcb..6b4c756d0 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_selector.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_selector.h @@ -17,7 +17,6 @@ #pragma once #include "kernel_selector.h" -#include "kernel_runner_interface.h" namespace kernel_selector { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_1x1.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_1x1.cpp index b6065edc6..13f8ba40b 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_1x1.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_1x1.cpp @@ -22,11 +22,8 @@ namespace kernel_selector ParamsKey ConvolutionGradWeightsKernel1x1::GetSupportedKey() const { ParamsKey k; - k.EnableInputDataType(Datatype::F16); k.EnableInputDataType(Datatype::F32); - k.EnableInputWeightsType(WeightsType::F16); k.EnableInputWeightsType(WeightsType::F32); - k.EnableOutputDataType(Datatype::F16); k.EnableOutputDataType(Datatype::F32); k.EnableInputLayout(DataLayout::bfyx); k.EnableOutputLayout(DataLayout::yxfb); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_3x3.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_3x3.cpp new file mode 100644 index 000000000..c892f6d2d --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_3x3.cpp @@ -0,0 +1,75 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "convolution_grad_weights_kernel_3x3.h" + +namespace kernel_selector +{ + + ParamsKey ConvolutionGradWeightsKernel3x3::GetSupportedKey() const + { + ParamsKey k; + k.EnableInputDataType(Datatype::F32); + k.EnableInputWeightsType(WeightsType::F32); + k.EnableOutputDataType(Datatype::F32); + k.EnableInputLayout(DataLayout::bfyx); + k.EnableOutputLayout(DataLayout::yxfb); + k.EnableOutputLayout(DataLayout::bfyx); + k.EnableOutputLayout(DataLayout::byxf); + k.EnableTensorOffset(); + k.EnableTensorPitches(); + k.EnableBiasPerFeature(); + k.EnableNonBiasTerm(); + k.EnableMomentum(); + k.EnableBatching(); + k.EnableSplitSupport(); + k.EnableGradient(); + k.DisableTuning(); + return k; + } + + bool ConvolutionGradWeightsKernel3x3::Validate(const Params& p, const optional_params&) const + { + const auto& params = static_cast<const convolution_grad_weights_params&>(p); + + if (params.stride.x != 1 || params.stride.y != 1) + return false; + if (params.filterSize.x != 3 || params.filterSize.y != 3) + return false; + return true; + } + + ConvolutionGradWeightsKernelBase::DispatchData ConvolutionGradWeightsKernel3x3::SetDefault(const convolution_grad_weights_params& params) const + { + auto input_features = params.weights.IFM().v; + auto output_features = params.weights.OFM().v; + + DispatchData kd; + + kd.gws0 = Align(output_features, 16); + kd.gws1 = input_features; + kd.gws2 = 1; + kd.lws0 = std::min(std::max(kd.gws0, static_cast<size_t>(1)), static_cast<size_t>(32)); + while (kd.gws0 % kd.lws0 != 0) + { + kd.lws0 -= 16; + } + kd.lws1 = 1; + kd.lws2 = 1; + kd.effiency = FORCE_PRIORITY_8; + return kd; + } +} diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_3x3.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_3x3.h new file mode 100644 index 000000000..39fcb7e96 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_3x3.h @@ -0,0 +1,33 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#pragma once + +#include "convolution_grad_weights_kernel_base.h" + +namespace kernel_selector { + + class ConvolutionGradWeightsKernel3x3 : public ConvolutionGradWeightsKernelBase + { + public: + ConvolutionGradWeightsKernel3x3() : ConvolutionGradWeightsKernelBase("convolution_grad_weights_3x3") {} + virtual ~ConvolutionGradWeightsKernel3x3() {} + + virtual DispatchData SetDefault(const convolution_grad_weights_params& params) const override; + virtual bool Validate(const Params& p, const optional_params& o) const override; + virtual ParamsKey GetSupportedKey() const override; + }; +}
\ No newline at end of file diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_7x7.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_7x7.cpp new file mode 100644 index 000000000..8bd5000d6 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_7x7.cpp @@ -0,0 +1,73 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "convolution_grad_weights_kernel_7x7.h" + +namespace kernel_selector +{ + + ParamsKey ConvolutionGradWeightsKernel7x7::GetSupportedKey() const + { + ParamsKey k; + k.EnableInputDataType(Datatype::F32); + k.EnableInputWeightsType(WeightsType::F32); + k.EnableOutputDataType(Datatype::F32); + k.EnableInputLayout(DataLayout::bfyx); + k.EnableOutputLayout(DataLayout::yxfb); + k.EnableOutputLayout(DataLayout::bfyx); + k.EnableOutputLayout(DataLayout::byxf); + k.EnableTensorOffset(); + k.EnableTensorPitches(); + k.EnableBiasPerFeature(); + k.EnableNonBiasTerm(); + k.EnableMomentum(); + k.EnableBatching(); + k.EnableSplitSupport(); + k.EnableGradient(); + k.DisableTuning(); + return k; + } + + bool ConvolutionGradWeightsKernel7x7::Validate(const Params& p, const optional_params&) const + { + const auto& params = static_cast<const convolution_grad_weights_params&>(p); + + if (params.filterSize.x != 7 || params.filterSize.y != 7) + return false; + return true; + } + + ConvolutionGradWeightsKernelBase::DispatchData ConvolutionGradWeightsKernel7x7::SetDefault(const convolution_grad_weights_params& params) const + { + auto input_features = params.weights.IFM().v; + auto output_features = params.weights.OFM().v; + + DispatchData kd; + + kd.gws0 = 8; + kd.gws1 = Align(output_features, 16); + kd.gws2 = input_features; + kd.lws0 = 1; + kd.lws1 = std::min(std::max(kd.gws1, static_cast<size_t>(1)), static_cast<size_t>(32)); + while (kd.gws1 % kd.lws1 != 0) + { + kd.lws1 -= 16; + } + kd.lws2 = 1; + kd.effiency = FORCE_PRIORITY_8; + return kd; + } +} diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_7x7.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_7x7.h new file mode 100644 index 000000000..286caf5c0 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_7x7.h @@ -0,0 +1,33 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#pragma once + +#include "convolution_grad_weights_kernel_base.h" + +namespace kernel_selector { + + class ConvolutionGradWeightsKernel7x7 : public ConvolutionGradWeightsKernelBase + { + public: + ConvolutionGradWeightsKernel7x7() : ConvolutionGradWeightsKernelBase("convolution_grad_weights_7x7") {} + virtual ~ConvolutionGradWeightsKernel7x7() {} + + virtual DispatchData SetDefault(const convolution_grad_weights_params& params) const override; + virtual bool Validate(const Params& p, const optional_params& o) const override; + virtual ParamsKey GetSupportedKey() const override; + }; +}
\ No newline at end of file diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_base.cpp index 2998075b7..1e2cd30a8 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_base.cpp @@ -124,7 +124,7 @@ namespace kernel_selector auto jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point, ROUND_ROBIN, true, !orgParams.bias.empty()); + FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, ROUND_ROBIN, true, !orgParams.bias.empty()); if (newParams.use_momentum) { kernel.arguments.push_back({ ArgumentDescriptor::Types::PREV_WEIGHTS_GRADIENT, 0 }); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_ref.cpp index 7ca7c9a39..3c29a7616 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_ref.cpp @@ -24,7 +24,6 @@ namespace kernel_selector ParamsKey k; k.EnableInputDataType(Datatype::F16); k.EnableInputDataType(Datatype::F32); - k.EnableInputWeightsType(WeightsType::F16); k.EnableInputWeightsType(WeightsType::F32); k.EnableOutputDataType(Datatype::F16); k.EnableOutputDataType(Datatype::F32); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_selector.cpp index 16b842e9e..fb045a273 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_selector.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_selector.cpp @@ -18,6 +18,8 @@ #include "convolution_grad_weights_kernel_ref.h" #include "convolution_grad_weights_kernel_1x1.h" #include "convolution_grad_weights_kernel_yxfb.h" +#include "convolution_grad_weights_kernel_3x3.h" +#include "convolution_grad_weights_kernel_7x7.h" namespace kernel_selector { @@ -26,6 +28,8 @@ namespace kernel_selector Attach<ConvolutionGradWeightsKernelRef>(); Attach<ConvolutionGradWeightsKernel1x1>(); Attach<ConvolutionGradWeightsKernel_yxfb>(); + Attach<ConvolutionGradWeightsKernel3x3>(); + Attach<ConvolutionGradWeightsKernel7x7>(); } KernelsData convolution_grad_weights_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_yxfb.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_yxfb.cpp index e6fb6d026..f53d51b39 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_yxfb.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_yxfb.cpp @@ -24,7 +24,6 @@ namespace kernel_selector ParamsKey k; k.EnableInputDataType(Datatype::F32); k.EnableInputWeightsType(WeightsType::F32); - k.EnableOutputDataType(Datatype::F16); k.EnableOutputDataType(Datatype::F32); k.EnableInputLayout(DataLayout::yxfb); k.EnableOutputLayout(DataLayout::yxfb); @@ -64,11 +63,11 @@ namespace kernel_selector DispatchData kd; - kd.gws0 = 32; + kd.gws0 = 16; kd.gws1 = input_features * output_features; kd.gws2 = x * y; - kd.lws0 = 32; + kd.lws0 = 16; kd.lws1 = 1; kd.lws2 = 1; kd.effiency = FORCE_PRIORITY_7; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.cpp index 5369d7ac0..cbc0bd780 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.cpp @@ -57,6 +57,7 @@ namespace kernel_selector MakeJitConstant("FILTER_ARRAY_NUM", dp.split), MakeJitConstant("INPUT0_OFFSET_WITH_PADDING", input_offset_with_padding), MakeJitConstant("DEPTHWISE_SEPARABLE_OPT", dp.depthwiseSeparableOpt), + MakeJitConstant("FUSED_ELTWISE", dp.fused_eltwise) }); return jit; @@ -114,13 +115,15 @@ namespace kernel_selector return{}; } - auto cldnn_jit = GetJitConstants(orgParams); - auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options); + auto cldnn_jit = GetJitConstants(newParams); + auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options); auto jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point, ROUND_ROBIN, true, !orgParams.bias.empty()); + FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, ROUND_ROBIN, true, !newParams.bias.empty()); kernel.arguments.push_back({ ArgumentDescriptor::Types::SPLIT, 0 }); + if (orgParams.fused_eltwise) + kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 1 }); kd.estimatedTime = runInfo.effiency; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.h index ac5baec0f..206614a70 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.h @@ -34,6 +34,7 @@ namespace kernel_selector uSize padding; uint32_t split = 1; bool depthwiseSeparableOpt = false; + bool fused_eltwise = false; virtual std::string to_string() const override; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.cpp index 613fbb4f8..5feac0ca5 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.cpp @@ -41,6 +41,22 @@ namespace kernel_selector } } + ParamsKey eltwise_params::GetParamsKey() const + { + ParamsKey k = base_params::GetParamsKey(); + if (int8_quantization) + { + k.EnableInt8Quantization(); + } + + if (output_calibration) + { + k.EnableOutputCalibration(); + } + + return k; + } + bool EltwiseKernelBase::Validate(const Params& p, const optional_params& o) const { if (p.GetType() != KernelType::ELTWISE || @@ -56,7 +72,7 @@ namespace kernel_selector return false; } - auto& operations = params.eltwiseParams.operations; + auto& operations = params.operations; if (operations.size() == 0) { @@ -91,24 +107,24 @@ namespace kernel_selector JitConstants jit = MakeBaseParamsJitConstants(params); jit.AddConstants({ - MakeJitConstant("ELTWISE_LAYOUT_BASED", params.eltwiseParams.layoutBased), - MakeJitConstant("QUANTIZATION_TERM", params.eltwiseParams.int8_quantization), + MakeJitConstant("ELTWISE_LAYOUT_BASED", params.layoutBased), + MakeJitConstant("QUANTIZATION_TERM", params.int8_quantization), }); - if (params.eltwiseParams.int8_quantization) + if (params.int8_quantization) { - if (params.eltwiseParams.output_calibration) + if (params.output_calibration) { - jit.AddConstant(MakeJitConstant("CALIBRATION_TERM", params.eltwiseParams.output_calibration)); + jit.AddConstant(MakeJitConstant("CALIBRATION_TERM", params.output_calibration)); jit.AddConstant(MakeJitConstant("O_QF", params.output_calibration_factors[0])); } else - jit.AddConstants({ MakeJitConstant("O_QF", params.eltwiseParams.output_quantization_factor) }); + jit.AddConstants({ MakeJitConstant("O_QF", params.output_quantization_factor) }); } std::string inputs_decls, vload_decls; - auto& updateInputs = params.eltwiseParams.updateInputIds; + auto& updateInputs = params.updateInputIds; for (size_t i = 0; i < params.inputs.size(); i++) { @@ -143,8 +159,8 @@ namespace kernel_selector std::string do_eltwise; - auto& operations = params.eltwiseParams.operations; - auto& coefficients = params.eltwiseParams.coefficients; + auto& operations = params.operations; + auto& coefficients = params.coefficients; for (size_t op_num = 0; op_num < operations.size(); op_num++) { @@ -187,7 +203,7 @@ namespace kernel_selector cast_type = "(MAKE_VECTOR_TYPE(UNIT_TYPE, 8))"; op = "const MAKE_VECTOR_TYPE(UNIT_TYPE, 8) tmp" + op_num_str + " = "; } - else if(params.eltwiseParams.int8_quantization) + else if(params.int8_quantization) { cast_type = "(int)"; op = "const int tmp" + op_num_str + " = "; @@ -251,7 +267,7 @@ namespace kernel_selector jit.AddConstant(MakeJitConstant("DO_ELTWISE", do_eltwise)); - if (params.eltwiseParams.layoutBased || params.eltwiseParams.int8_quantization) + if (params.layoutBased || params.int8_quantization) { jit.Merge(GetTensorFriendlyWorkGroupsJit(params.inputs[0])); } @@ -264,32 +280,27 @@ namespace kernel_selector return GetJitConstantsCommon(params, false); } - KernelsData EltwiseKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options) const + EltwiseKernelBase::DispatchData EltwiseKernelBase::SetDefault(const eltwise_params& params) const { - if (!Validate(params, options)) - { - return{}; - } - - KernelData kd = KernelData::Default<eltwise_params>(params); - eltwise_params& newParams = *static_cast<eltwise_params*>(kd.params.get()); + DispatchData kd; - auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options); - auto cldnn_jit = GetJitConstants(newParams); - std::string jit = CreateJit(kernelName, cldnn_jit, entry_point); - - const auto& out = newParams.output; - auto& kernel = kd.kernels[0]; - if (newParams.eltwiseParams.layoutBased || newParams.eltwiseParams.int8_quantization) + if (params.layoutBased || params.int8_quantization) { - kernel.workGroups.global = GetTensorFriendlyWorkGroups(newParams.inputs[0]); + auto global = GetTensorFriendlyWorkGroups(params.inputs[0]); + kd.gws0 = global[0]; + kd.gws1 = global[1]; + kd.gws2 = global[2]; } - else if (CheckInputsOutputNoPitchSameDims(newParams)) + else if (CheckInputsOutputNoPitchSameDims(params)) { - kernel.workGroups.global = { newParams.inputs[0].LogicalSize(), 1, 1 }; + kd.gws0 = params.inputs[0].LogicalSize(); + kd.gws1 = 1; + kd.gws2 = 1; } else { + const auto& out = params.output; + std::vector<size_t> gws; for (const auto& o : out.GetDims()) { @@ -301,11 +312,42 @@ namespace kernel_selector gws.push_back(1U); } - kernel.workGroups.global = { gws[0], gws[1], gws[2] * gws[3] }; + kd.gws0 = gws[0]; + kd.gws1 = gws[1]; + kd.gws2 = gws[2] * gws[3]; } - kernel.workGroups.local = GetOptimalLocalWorkGroupSizes(kernel.workGroups.global); - kernel.kernelString = GetKernelString(kernelName, jit, entry_point, ROUND_ROBIN); - kernel.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(), false, false, newParams.eltwiseParams.int8_quantization, newParams.eltwiseParams.output_calibration); + + auto local = GetOptimalLocalWorkGroupSizes( { kd.gws0, kd.gws1, kd.gws2 } ); + kd.lws0 = local[0]; + kd.lws1 = local[1]; + kd.lws2 = local[2]; + + return kd; + } + + KernelsData EltwiseKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options) const + { + if (!Validate(params, options)) + { + return{}; + } + + KernelData kd = KernelData::Default<eltwise_params>(params); + eltwise_params& newParams = *static_cast<eltwise_params*>(kd.params.get()); + + auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options); + auto cldnn_jit = GetJitConstants(newParams); + std::string jit = CreateJit(kernelName, cldnn_jit, entry_point); + + DispatchData runInfo = SetDefault(newParams); + + auto& kernel = kd.kernels[0]; + + kernel.workGroups.global = { runInfo.gws0, runInfo.gws1, runInfo.gws2 }; + kernel.workGroups.local = { runInfo.lws0, runInfo.lws1, runInfo.lws2 }; + + kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, ROUND_ROBIN); + kernel.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(), false, false, newParams.int8_quantization, newParams.output_calibration); kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.h index 9d7127822..161140849 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.h @@ -25,7 +25,7 @@ namespace kernel_selector //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// struct eltwise_params : public base_params { - eltwise_params() : base_params(KernelType::ELTWISE), eltwiseParams() {} + eltwise_params() : base_params(KernelType::ELTWISE) {} struct InputType { @@ -87,35 +87,17 @@ namespace kernel_selector uint32_t tmpId; }; - struct DedicatedParams - { - std::vector<eltwise_params::Node> operations; - std::vector<float> coefficients; - std::vector<UpdateInputData> updateInputIds; - bool layoutBased = false; - bool int8_quantization = false; - bool output_calibration = false; - float output_quantization_factor = 1.0f; - }; - - DedicatedParams eltwiseParams; + std::vector<eltwise_params::Node> operations; + std::vector<float> coefficients; + std::vector<UpdateInputData> updateInputIds; + + bool layoutBased = false; + bool int8_quantization = false; + bool output_calibration = false; + float output_quantization_factor = 1.0f; + MultiDataTensor output_calibration_factors; - - virtual ParamsKey GetParamsKey() const - { - ParamsKey k = base_params::GetParamsKey(); - if (eltwiseParams.int8_quantization) - { - k.EnableInt8Quantization(); - } - - if (eltwiseParams.output_calibration) - { - k.EnableOutputCalibration(); - } - - return k; - } + virtual ParamsKey GetParamsKey() const; }; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -141,6 +123,7 @@ namespace kernel_selector protected: virtual bool Validate(const Params& p, const optional_params& o) const override; virtual JitConstants GetJitConstants(const eltwise_params& params) const; + virtual DispatchData SetDefault(const eltwise_params& params) const; KernelsData GetCommonKernelsData(const Params& params, const optional_params& options) const; }; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_bs_yx_bsv4_fsv32.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_bs_yx_bsv4_fsv32.cpp new file mode 100644 index 000000000..571a013ce --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_bs_yx_bsv4_fsv32.cpp @@ -0,0 +1,222 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "eltwise_kernel_fs_bs_yx_bsv4_fsv32.h" +#include "kernel_selector_utils.h" + +namespace kernel_selector { + + ParamsKey EltwiseKernel_fs_bs_yx_bsv4_fsv32::GetSupportedKey() const + { + ParamsKey k; + k.EnableInputDataType(Datatype::INT8); + k.EnableOutputDataType(Datatype::INT8); + k.EnableInputLayout(DataLayout::fs_bs_yx_bsv4_fsv32); + k.EnableOutputLayout(DataLayout::fs_bs_yx_bsv4_fsv32); + k.EnableTensorOffset(); + k.EnableTensorPitches(); + k.EnableBatching(); + k.EnableInt8Quantization(); + k.EnableOutputCalibration(); + return k; + } + + EltwiseKernelBase::DispatchData EltwiseKernel_fs_bs_yx_bsv4_fsv32::SetDefault(const eltwise_params& params) const + { + DispatchData kd; + + kd.gws0 = params.output.X().v; + kd.gws1 = params.output.Y().v; + // we process 4 batches and 4 features per workitem + kd.gws2 = (params.output.Batch().v / 4) * (params.output.Feature().v / 4); + kd.lws0 = 1; + kd.lws1 = 1; + kd.lws2 = 8; + + return kd; + } + + JitConstants EltwiseKernel_fs_bs_yx_bsv4_fsv32::GetJitConstants(const eltwise_params& params) const + { + JitConstants jit = MakeBaseParamsJitConstants(params); + + const size_t in_x_pitch = 32 * 4; + const size_t in_y_pitch = 32 * 4 * params.inputs[0].X().LogicalDimPadded(); + const size_t in_b_block_pitch = in_y_pitch * params.inputs[0].Y().LogicalDimPadded(); + const size_t in_f_block_pitch = in_b_block_pitch * ((params.inputs[0].Batch().v + 3) / 4); + const size_t in_offset = in_x_pitch * params.inputs[0].X().pad.before + in_y_pitch * params.inputs[0].Y().pad.before; + + jit.AddConstant(MakeJitConstant("IN_X_PITCH", in_x_pitch)); + jit.AddConstant(MakeJitConstant("IN_Y_PITCH", in_y_pitch)); + jit.AddConstant(MakeJitConstant("IN_B_BLOCK_PITCH", in_b_block_pitch)); + jit.AddConstant(MakeJitConstant("IN_F_BLOCK_PITCH", in_f_block_pitch)); + jit.AddConstant(MakeJitConstant("IN_OFFSET", in_offset)); + + /////////////// + jit.AddConstants({ + MakeJitConstant("ELTWISE_LAYOUT_BASED", params.layoutBased), + MakeJitConstant("QUANTIZATION_TERM", params.int8_quantization), + }); + + if (params.int8_quantization) + { + if (params.output_calibration) + { + jit.AddConstant(MakeJitConstant("CALIBRATION_TERM", params.output_calibration)); + jit.AddConstant(MakeJitConstant("O_QF", params.output_calibration_factors[0])); + + } + else + jit.AddConstants({ MakeJitConstant("O_QF", params.output_quantization_factor) }); + } + + std::string inputs_decls; + auto& updateInputs = params.updateInputIds; + + for (size_t i = 0; i < params.inputs.size(); i++) + { + //const should be added only to inputs which will not be updated + std::string const_str = "const"; + for (size_t update_input_idx = 0; update_input_idx < updateInputs.size(); update_input_idx++) + { + if (updateInputs[update_input_idx].inputId == i) + { + const_str = ""; + break; + } + } + + inputs_decls += const_str + " __global " + toCLType(params.inputs[i].GetDType()) + "* input" + std::to_string(i) + ", "; + } + + jit.AddConstant(MakeJitConstant("INPUTS_DECLS", inputs_decls)); + jit.AddConstant(MakeJitConstant("ELTWISE_NO_PITCH_SAME_DIMS", CheckInputsOutputNoPitchSameDims(params))); + + std::string do_eltwise; + + auto& operations = params.operations; + auto& coefficients = params.coefficients; + + for (size_t op_num = 0; op_num < operations.size(); op_num++) + { + const std::string op_num_str = std::to_string(op_num); + const auto& ew = operations[op_num]; + + for (size_t input_idx = 0; input_idx < ew.inputs.size(); input_idx++) + { + const auto& input = ew.inputs[input_idx]; + const std::string name = "INPUT_" + op_num_str + "_" + std::to_string(input_idx); + switch (input.mode) + { + case EltwiseInputMode::SCALAR: + jit.AddConstant(MakeJitConstant(name, input.scalar)); + break; + case EltwiseInputMode::INPUT_BUFFER: + jit.AddConstant(MakeJitConstant(name, "GET_INPUT(input" + std::to_string(input.index) + ", INPUT" + std::to_string(input.index) + ")")); + break; + case EltwiseInputMode::OUTPUT_BUFFER: + jit.AddConstant(MakeJitConstant(name, "output[GET_INDEX(OUTPUT, )]")); + break; + case EltwiseInputMode::UNORDERED_ACCESS_INPUT_BUFFER: + jit.AddConstant(MakeJitConstant(name, "input" + std::to_string(input.index) + "[(size_t)tmp" + std::to_string(input.tmpIndex) + "]")); + break; + case EltwiseInputMode::INTERMEDIATE_RESULTS_INDEX: + jit.AddConstant(MakeJitConstant(name, "tmp" + std::to_string(input.tmpIndex))); + break; + default: + break; + } + } + std::string input0_str, input1_str, cast_type, op; + + if (params.int8_quantization) + { + cast_type = "(int16)"; + op = "const int16 tmp" + op_num_str + " = "; + } + else + { + cast_type = "(UNIT_TYPE)"; + op = "const UNIT_TYPE tmp" + op_num_str + " = "; + } + + input0_str = cast_type + "INPUT_" + op_num_str + "_0"; + input1_str = cast_type + "INPUT_" + op_num_str + "_1"; + + if (ew.mode == EltwiseMode::ADD) + { + std::vector<std::string> coeff_strings(ew.inputs.size(), ""); + for (size_t input_idx = 0; input_idx < ew.inputs.size(); input_idx++) + { + const auto& input = ew.inputs[input_idx]; + if (input.mode == EltwiseInputMode::INPUT_BUFFER && input.index < coefficients.size()) + { + const float c = coefficients[input.index]; + if (c != 1.0f) + coeff_strings[input_idx] = cast_type + "(" + std::to_string(c) + ")*"; + } + } + + input0_str = coeff_strings[0] + input0_str; + input1_str = coeff_strings[1] + input1_str; + } + + + switch (ew.mode) + { + case EltwiseMode::ADD: op += input0_str + " + " + input1_str; break; + case EltwiseMode::SUB: op += input0_str + " - " + input1_str; break; + case EltwiseMode::MUL: op += input0_str + " * " + input1_str; break; + case EltwiseMode::DIV: op += input0_str + " / " + input1_str; break; + case EltwiseMode::MODULU: op += cast_type + "fmod(" + input0_str + ", " + input1_str + ")"; break; + case EltwiseMode::MIN: op += cast_type + "fmin(" + input0_str + ", " + input1_str + ")"; break; + case EltwiseMode::MAX: op += cast_type + "fmax(" + input0_str + ", " + input1_str + ")"; break; + case EltwiseMode::POW: op += cast_type + "pow(" + input0_str + ", " + input1_str + ")"; break; + case EltwiseMode::SQRT: op += cast_type + "sqrt(" + input0_str + ")"; break; + case EltwiseMode::RSQRT: op += cast_type + "1/sqrt(" + input0_str + ")"; break; + case EltwiseMode::ASSIGN: op += input0_str; break; + default: + break; + } + + std::string opname = "OPERATION" + op_num_str; + jit.AddConstant(MakeJitConstant(opname, op)); + do_eltwise += "\\\n\t" + opname + ";"; + } + + for (size_t update_input_idx = 0; update_input_idx < updateInputs.size(); update_input_idx++) + do_eltwise += "\\\n\tinput" + std::to_string(updateInputs[update_input_idx].inputId) + + "[GET_INDEX(INPUT, " + std::to_string(updateInputs[update_input_idx].inputId) + + ")] = tmp" + std::to_string(updateInputs[update_input_idx].tmpId) + ";"; + + do_eltwise += "\\\n\tres = tmp" + std::to_string(operations.size() - 1) + ";"; + + jit.AddConstant(MakeJitConstant("DO_ELTWISE", do_eltwise)); + + if (params.layoutBased || params.int8_quantization) + { + jit.Merge(GetTensorFriendlyWorkGroupsJit(params.inputs[0])); + } + + /////////////// + return jit; + } + + KernelsData EltwiseKernel_fs_bs_yx_bsv4_fsv32::GetKernelsData(const Params& params, const optional_params& options) const + { + return GetCommonKernelsData(params, options); + } +}
\ No newline at end of file diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_bs_yx_bsv4_fsv32.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_bs_yx_bsv4_fsv32.h new file mode 100644 index 000000000..b1fb3e950 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_bs_yx_bsv4_fsv32.h @@ -0,0 +1,35 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#pragma once + +#include "eltwise_kernel_base.h" + +namespace kernel_selector +{ + class EltwiseKernel_fs_bs_yx_bsv4_fsv32 : public EltwiseKernelBase + { + public: + EltwiseKernel_fs_bs_yx_bsv4_fsv32() : EltwiseKernelBase("eltwise_fs_bs_yx_bsv4_fsv32") {} + virtual ~EltwiseKernel_fs_bs_yx_bsv4_fsv32() {} + + virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; + virtual ParamsKey GetSupportedKey() const override; + protected: + JitConstants GetJitConstants(const eltwise_params& params) const override; + virtual DispatchData SetDefault(const eltwise_params& params) const override; + }; +}
\ No newline at end of file diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.cpp index 3840f463e..3a7776575 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.cpp @@ -22,8 +22,16 @@ namespace kernel_selector { ParamsKey EltwiseKernelRef::GetSupportedKey() const { ParamsKey k; - k.EnableAllInputDataType(); - k.EnableAllOutputDataType(); + k.EnableInputDataType(Datatype::F16); + k.EnableInputDataType(Datatype::F32); + k.EnableInputDataType(Datatype::INT8); + k.EnableInputDataType(Datatype::INT32); + k.EnableInputDataType(Datatype::INT64); + k.EnableOutputDataType(Datatype::F16); + k.EnableOutputDataType(Datatype::F32); + k.EnableOutputDataType(Datatype::INT8); + k.EnableOutputDataType(Datatype::INT32); + k.EnableOutputDataType(Datatype::INT64); k.EnableDifferentTypes(); k.EnableAllInputLayout(); k.EnableAllOutputLayout(); @@ -35,6 +43,25 @@ namespace kernel_selector { return k; } + bool EltwiseKernelRef::Validate(const Params& p, const optional_params& o) const + { + if (!EltwiseKernelBase::Validate(p, o)) + { + return false; + } + + const eltwise_params& params = static_cast<const eltwise_params&>(p); + for (size_t i = 0; i < params.inputs.size(); i++) + { + if (params.inputs[i].GetLayout() == DataLayout::fs_bs_yx_bsv4_fsv32) + return false; + } + if (params.output.GetLayout() == DataLayout::fs_bs_yx_bsv4_fsv32) + return false; + + return true; + } + KernelsData EltwiseKernelRef::GetKernelsData(const Params& params, const optional_params& options) const { return GetCommonKernelsData(params, options); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.h index 2249dc8c9..c2ccf054d 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.h @@ -28,5 +28,8 @@ namespace kernel_selector virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; virtual ParamsKey GetSupportedKey() const override; + protected: + bool Validate(const Params& p, const optional_params& o) const override; + }; }
\ No newline at end of file diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_selector.cpp index d71deddfb..cf7565216 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_selector.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_selector.cpp @@ -17,13 +17,15 @@ #include "eltwise_kernel_selector.h" #include "eltwise_kernel_ref.h" #include "eltwise_kernel_vload8.h" - +#include "eltwise_kernel_fs_bs_yx_bsv4_fsv32.h" + namespace kernel_selector { eltwise_kernel_selector::eltwise_kernel_selector() { Attach<EltwiseKernelRef>(); Attach<EltwiseKernel_vload8>(); + Attach<EltwiseKernel_fs_bs_yx_bsv4_fsv32>(); } KernelsData eltwise_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_vload8.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_vload8.cpp index 934bc44cd..5ceb75084 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_vload8.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_vload8.cpp @@ -45,6 +45,15 @@ namespace kernel_selector { } const auto& ewParams = static_cast<const eltwise_params&>(params); + + for (size_t i = 0; i < ewParams.inputs.size(); i++) + { + if (ewParams.inputs[i].GetLayout() == DataLayout::fs_bs_yx_bsv4_fsv32) + return false; + } + if (ewParams.output.GetLayout() == DataLayout::fs_bs_yx_bsv4_fsv32) + return false; + const auto& output = ewParams.output; const auto count = output.PhysicalSize(); @@ -62,16 +71,16 @@ namespace kernel_selector { //TODO: add support to this implementation when user requests input values updates bool bCheckUpdateInput = true; - if (!ewParams.eltwiseParams.updateInputIds.empty()) + if (!ewParams.updateInputIds.empty()) bCheckUpdateInput = false; //TODO: add support for reading from output buffer and using its values in computation bool bCheckUseOutput = true; - for (size_t op = 0; op < ewParams.eltwiseParams.operations.size(); op++) + for (size_t op = 0; op < ewParams.operations.size(); op++) { - for (size_t input_idx = 0; input_idx < ewParams.eltwiseParams.operations[op].inputs.size(); input_idx++) + for (size_t input_idx = 0; input_idx < ewParams.operations[op].inputs.size(); input_idx++) { - if (ewParams.eltwiseParams.operations[op].inputs[input_idx].mode == EltwiseInputMode::OUTPUT_BUFFER) + if (ewParams.operations[op].inputs[input_idx].mode == EltwiseInputMode::OUTPUT_BUFFER) { bCheckUseOutput = false; break; @@ -114,7 +123,7 @@ namespace kernel_selector { auto& kernel = kd.kernels[0]; kernel.workGroups.global = { std::max(newParams.inputs[0].LogicalSize()/8, (size_t)1), 1, 1 }; kernel.workGroups.local = GetOptimalLocalWorkGroupSizes(kernel.workGroups.global); - kernel.kernelString = GetKernelString(kernelName, jit, entry_point, ROUND_ROBIN); + kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, ROUND_ROBIN); kernel.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(), false, false); kd.estimatedTime = FORCE_PRIORITY_8; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embed/embed_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embed/embed_kernel_ref.cpp index d4c9b126c..f126daa94 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embed/embed_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embed/embed_kernel_ref.cpp @@ -103,7 +103,7 @@ namespace kernel_selector auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point, ROUND_ROBIN, true, !newParams.bias.empty()); + FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, ROUND_ROBIN, true, !newParams.bias.empty()); kd.estimatedTime = runInfo.effiency; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_base.cpp index 6702ebc69..20e6e8dca 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_base.cpp @@ -49,7 +49,7 @@ namespace kernel_selector std::unique_ptr<FullyConnectedKernelBase::DispatchData> FullyConnectedKernelBase::SetDefault(const fully_connected_params& params) const { - std::unique_ptr<DispatchData> dispatchData = std::make_unique<DispatchData>(); + std::unique_ptr<DispatchData> dispatchData = std::unique_ptr<DispatchData>(new DispatchData()); dispatchData->fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16; // Determine global work sizes. @@ -122,10 +122,10 @@ namespace kernel_selector std::string jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, *runInfo.get(), kernelName, jit, entry_point, ROUND_ROBIN, true, !orgParams.bias.empty(), 1, newParams.int8_quantization, newParams.output_calibration); + FillCLKernelData(kernel, *runInfo.get(), params.engineInfo, kernelName, jit, entry_point, ROUND_ROBIN, true, !orgParams.bias.empty(), 1, newParams.int8_quantization, newParams.output_calibration); kd.estimatedTime = estimated_time; kd.autoTuneIndex = -1; return{ kd }; } -}
\ No newline at end of file +} diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_b1.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_b1.cpp index 0f836133b..b98b528a8 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_b1.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_b1.cpp @@ -60,7 +60,7 @@ namespace kernel_selector std::unique_ptr<FullyConnected_bs_f_bsv16_b1::FullyConnectedKernelBase::DispatchData> FullyConnected_bs_f_bsv16_b1::SetDefault(const fully_connected_params& arg) const { - auto run_info = std::make_unique<DispatchData>(*FullyConnectedKernelBase::SetDefault(arg).get()); + auto run_info = std::unique_ptr<DispatchData>(new DispatchData(*FullyConnectedKernelBase::SetDefault(arg))); // Properties of chunk and unit. const char* chunk_type = "uint"; @@ -100,4 +100,4 @@ namespace kernel_selector { return GetCommonKernelsData(params, optParams, DataLayout::bf, {WeightsLayout::os_i_osv16}, FORCE_PRIORITY_5); } -}
\ No newline at end of file +} diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_block.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_block.cpp index 1ceb3ebc9..b32c8a54e 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_block.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_block.cpp @@ -37,7 +37,7 @@ namespace kernel_selector std::unique_ptr<FullyConnected_fb_io_block::FullyConnectedKernelBase::DispatchData> FullyConnected_fb_io_block::SetDefault(const fully_connected_params& arg) const { - auto kd = std::make_unique<DispatchData>(*FullyConnectedKernelBase::SetDefault(arg).get()); + auto kd = std::unique_ptr<DispatchData>(new DispatchData(*FullyConnectedKernelBase::SetDefault(arg))); const auto& output = arg.output; auto batch_size = output.Batch().v; @@ -146,4 +146,4 @@ namespace kernel_selector // return GetCommonKernelsData(params, optParams, DataLayout::fb, WeightsLayout::io, estimated_time); return GetCommonKernelsData(params, optParams, DataLayout::yxfb, { WeightsLayout::yxio }, estimated_time); } -}
\ No newline at end of file +} diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad_batched.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad_batched.cpp new file mode 100644 index 000000000..46e4dea8d --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad_batched.cpp @@ -0,0 +1,117 @@ +/* +// Copyright (c) 2016 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "fully_connected_kernel_mmad_batched.h" +#include "kernel_selector_utils.h" + +namespace kernel_selector +{ + ParamsKey FullyConnected_mmad_batched::GetSupportedKey() const + { + ParamsKey k; + k.EnableInputDataType(Datatype::INT8); + k.EnableOutputDataType(Datatype::INT8); + k.EnableInputWeightsType(WeightsType::INT8); + k.EnableInputLayout(DataLayout::fs_bs_yx_bsv4_fsv32); + k.EnableOutputLayout(DataLayout::fs_bs_yx_bsv4_fsv32); + k.EnableOutputLayout(DataLayout::bf); + k.EnableBiasPerOutput(); + k.EnableBiasPerFeature(); + k.EnableNonBiasTerm(); + k.EnableTensorOffset(); + k.EnableTensorPitches(); + k.EnableBatching(); + k.EnableInt8Quantization(); + k.EnableOutputCalibration(); + return k; + } + + bool FullyConnected_mmad_batched::Validate(const Params& p, const optional_params& o) const + { + if (!FullyConnectedKernelBase::Validate(p, o)) + { + return false; + } + + const auto& params = static_cast<const fully_connected_params&>(p); + + // we do not support padded input + if (params.inputs[0].X().pad.Total() != 0 || params.inputs[0].Y().pad.Total() != 0) + return false; + + size_t batch = params.inputs[0].Batch().v; + // batch must be a multiple of 8 + if (batch % 8 != 0) + { + return false; + } + + return true; + } + + JitConstants FullyConnected_mmad_batched::GetJitConstants(const fully_connected_params& params, const DispatchData& runInfo) const + { + auto jit = Parent::GetJitConstants(params, runInfo); + + jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", runInfo.lws1)); + + // pitch for special block format used in this kernel + const size_t ifm_32_aligned = Align(params.weights.IFM().v, 32); + const size_t filter_ofm_block_pitch = (ifm_32_aligned / 32) * params.weights.X().v * params.weights.Y().v * 4 * 8 * 8; + jit.AddConstant(MakeJitConstant("FILTER_OFM_BLOCK_PITCH", filter_ofm_block_pitch)); + + const size_t in_x_pitch = 32 * 4; + const size_t in_y_pitch = 32 * 4 * params.inputs[0].X().LogicalDimPadded(); + const size_t in_b_block_pitch = in_y_pitch * params.inputs[0].Y().LogicalDimPadded(); + const size_t in_f_block_pitch = in_b_block_pitch * ((params.inputs[0].Batch().v + 3) / 4); + const size_t in_offset = in_x_pitch * params.inputs[0].X().pad.before + in_y_pitch * params.inputs[0].Y().pad.before; + + jit.AddConstant(MakeJitConstant("IN_X_PITCH", in_x_pitch)); + jit.AddConstant(MakeJitConstant("IN_Y_PITCH", in_y_pitch)); + jit.AddConstant(MakeJitConstant("IN_B_BLOCK_PITCH", in_b_block_pitch)); + jit.AddConstant(MakeJitConstant("IN_F_BLOCK_PITCH", in_f_block_pitch)); + jit.AddConstant(MakeJitConstant("IN_OFFSET", in_offset)); + + return jit; + } + + std::unique_ptr<FullyConnected_mmad_batched::Parent::DispatchData> FullyConnected_mmad_batched::SetDefault(const fully_connected_params& params) const + { + auto runInfo = Parent::SetDefault(params); + + constexpr size_t sub_group_size = 8; + + const auto of_maps = params.output.Feature().v; + const size_t of_threads_per_batch = RoundUp(of_maps, sub_group_size); + + runInfo->gws0 = params.output.Batch().v / 8; // we process 8 batches in a single WG + runInfo->gws1 = of_threads_per_batch; + runInfo->gws2 = 1; + + runInfo->lws0 = 1; + runInfo->lws1 = sub_group_size; + runInfo->lws2 = 1; + + runInfo->effiency = FORCE_PRIORITY_1; + return std::move(runInfo); + } + + KernelsData FullyConnected_mmad_batched::GetKernelsData(const Params& params, const optional_params& options) const + { + return GetCommonKernelsData(params, options, DataLayout::fs_bs_yx_bsv4_fsv32, + { WeightsLayout::os_is_yx_isa8_osv8_isv4 }, FORCE_PRIORITY_1); + } +}
\ No newline at end of file diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad_batched.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad_batched.h new file mode 100644 index 000000000..61af89f19 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad_batched.h @@ -0,0 +1,38 @@ +/* +// Copyright (c) 2016 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#pragma once + +#include "fully_connected_kernel_base.h" + +namespace kernel_selector { + + class FullyConnected_mmad_batched : public FullyConnectedKernelBase + { + public: + using Parent = FullyConnectedKernelBase; + + FullyConnected_mmad_batched() : Parent("fully_connected_gpu_mmad_batched") {} + + KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; + ParamsKey GetSupportedKey() const override; + + protected: + bool Validate(const Params& p, const optional_params& o) const override; + JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& kd) const override; + std::unique_ptr<DispatchData> SetDefault(const fully_connected_params& params) const override; + }; +}
\ No newline at end of file diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_selector.cpp index d7c1a1a85..529e1ca33 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_selector.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_selector.cpp @@ -30,6 +30,7 @@ #include "fully_connected_kernel_bf_io_input_spatial.h" #include "fully_connected_kernel_image_tutorial.h" #include "fully_connected_kernel_MMAD.h" +#include "fully_connected_kernel_mmad_batched.h" namespace kernel_selector { @@ -49,6 +50,7 @@ namespace kernel_selector { Attach<FullyConnected_fb_io_b8_f8>(); Attach<FullyConnected_bf_io_input_spatial>(); Attach<FullyConnectedKernelMMAD>(); + Attach<FullyConnected_mmad_batched>(); } KernelsData fully_connected_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_input/fully_connected_grad_input_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_input/fully_connected_grad_input_kernel_base.cpp index 94d0b4751..e40848af7 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_input/fully_connected_grad_input_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_input/fully_connected_grad_input_kernel_base.cpp @@ -80,7 +80,7 @@ namespace kernel_selector auto jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point, ROUND_ROBIN, true, !orgParams.bias.empty()); + FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, ROUND_ROBIN, true, !orgParams.bias.empty()); kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 1 }); kd.estimatedTime = runInfo.effiency; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_weights/fully_connected_grad_weights_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_weights/fully_connected_grad_weights_kernel_base.cpp index 55d8bdf88..67328ac99 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_weights/fully_connected_grad_weights_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_weights/fully_connected_grad_weights_kernel_base.cpp @@ -82,7 +82,7 @@ namespace kernel_selector auto jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point, ROUND_ROBIN, true, !orgParams.bias.empty()); + FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, ROUND_ROBIN, true, !orgParams.bias.empty()); if (orgParams.use_momentum) { kernel.arguments.push_back({ ArgumentDescriptor::Types::PREV_WEIGHTS_GRADIENT, 0 }); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_weights/fully_connected_grad_weights_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_weights/fully_connected_grad_weights_kernel_ref.cpp index 9ec0f8e17..bb8380457 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_weights/fully_connected_grad_weights_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_weights/fully_connected_grad_weights_kernel_ref.cpp @@ -24,7 +24,6 @@ namespace kernel_selector ParamsKey k; k.EnableInputDataType(Datatype::F16); k.EnableInputDataType(Datatype::F32); - k.EnableInputWeightsType(WeightsType::F16); k.EnableInputWeightsType(WeightsType::F32); k.EnableOutputDataType(Datatype::F16); k.EnableOutputDataType(Datatype::F32); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_base.cpp new file mode 100644 index 000000000..12af8a1c5 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_base.cpp @@ -0,0 +1,98 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "gemm_kernel_base.h" + +#include "kernel_selector_utils.h" + + +namespace kernel_selector +{ + JitConstants GemmKernelBase::GetJitConstants(const gemm_params& params) const + { + JitConstants jit = MakeBaseParamsJitConstants(params); + + jit.AddConstants({ + MakeJitConstant("X1", params.inputs[0].X().v), + MakeJitConstant("Y1", params.inputs[0].Y().v), + MakeJitConstant("X2", params.inputs[1].X().v), + MakeJitConstant("Y2", params.inputs[1].Y().v), + MakeJitConstant("ALPHA", params.alpha), + MakeJitConstant("BETA", params.beta), + MakeJitConstant("TRANSPOSE_INPUT1", params.transpose_input1), + MakeJitConstant("TRANSPOSE_INPUT2", params.transpose_input2), + }); + + if (params.inputs.size() > 2) + { + jit.AddConstants({MakeJitConstant("OUT_BIAS_TERM", true),}); + } + else + jit.AddConstants({ MakeJitConstant("OUT_BIAS_TERM", false)}); + + return jit; + } + + GemmKernelBase::DispatchData GemmKernelBase::SetDefault(const gemm_params& params) const + { + const auto& output = params.output; + + DispatchData kd; + + kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16; + std::vector<size_t> global{ params.inputs[0].Y().v, params.inputs[1].X().v, output.Batch().v }; + + if (params.transpose_input1 && params.transpose_input2) + global ={ params.inputs[0].X().v, params.inputs[1].Y().v, output.Batch().v }; + else if(params.transpose_input1) + global = { params.inputs[0].X().v, params.inputs[1].X().v, output.Batch().v }; + else if (params.transpose_input2) + global = { params.inputs[0].Y().v, params.inputs[1].Y().v, output.Batch().v }; + + const auto& local = GetOptimalLocalWorkGroupSizes(global); + + kd.gws0 = global[0]; + kd.gws1 = global[1]; + kd.gws2 = global[2]; + + kd.lws0 = local[0]; + kd.lws1 = local[1]; + kd.lws2 = local[2]; + + return kd; + } + + KernelsData GemmKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options, float estimated_time) const + { + assert(params.GetType() == KernelType::GEMM); + + const auto& prim_params = static_cast<const gemm_params&>(params); + + auto run_info = SetDefault(prim_params); + KernelData k_data = KernelData::Default<gemm_params>(params); + + auto cldnn_jit = GetJitConstants(prim_params); + auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, options); + auto jit = CreateJit(kernelName, cldnn_jit, entry_point); + + auto& kernel = k_data.kernels[0]; + FillCLKernelData(kernel, run_info, params.engineInfo, kernelName, jit, entry_point, ROUND_ROBIN, false, false, (uint32_t)prim_params.inputs.size()); + + k_data.estimatedTime = estimated_time; + + return { k_data }; + } +}
\ No newline at end of file diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_base.h new file mode 100644 index 000000000..643a0bec7 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_base.h @@ -0,0 +1,69 @@ +// Copyright (c) 2018 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "common_kernel_base.h" +#include "kernel_selector_params.h" + + +namespace kernel_selector +{ + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // gemm_params + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + struct gemm_params : public base_params + { + gemm_params() : + base_params(KernelType::GEMM), + alpha(1.0f), + beta(0.0f), + transpose_input1(false), + transpose_input2(false) + {} + + float alpha; + float beta; + bool transpose_input1; + bool transpose_input2; + + }; + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // gemm_optional_params + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + struct gemm_optional_params : optional_params + { + gemm_optional_params() + : optional_params(KernelType::GEMM) + { + } + }; + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // BorderKernelBase + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + class GemmKernelBase : public common_kernel_base + { + public: + using common_kernel_base::common_kernel_base; + + using DispatchData = CommonDispatchData; + + protected: + JitConstants GetJitConstants(const gemm_params& params) const; + DispatchData SetDefault(const gemm_params& params) const; + KernelsData GetCommonKernelsData(const Params& params, const optional_params&, float estimated_time) const; + }; +} diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_ref.cpp new file mode 100644 index 000000000..585d9d90d --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_ref.cpp @@ -0,0 +1,41 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "gemm_kernel_ref.h" + +namespace kernel_selector +{ + ParamsKey GemmKernelRef::GetSupportedKey() const + { + ParamsKey k; + + k.EnableInputDataType(Datatype::F16); + k.EnableInputDataType(Datatype::F32); + k.EnableOutputDataType(Datatype::F32); + k.EnableOutputDataType(Datatype::F16); + k.EnableInputLayout(DataLayout::bfyx); + k.EnableOutputLayout(DataLayout::bfyx); + + k.EnableBatching(); + + return k; + } + + KernelsData GemmKernelRef::GetKernelsData(const Params& params, const optional_params& options) const + { + return GetCommonKernelsData(params, options, FORCE_PRIORITY_9); + } +}
\ No newline at end of file diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_ref.h new file mode 100644 index 000000000..89727597d --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_ref.h @@ -0,0 +1,30 @@ +// Copyright (c) 2018 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "gemm_kernel_base.h" + + +namespace kernel_selector +{ + class GemmKernelRef : public GemmKernelBase + { + public: + GemmKernelRef() : GemmKernelBase("gemm_ref") {} + + KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; + ParamsKey GetSupportedKey() const override; + }; +} diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_selector.cpp new file mode 100644 index 000000000..a31f3cb9f --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_selector.cpp @@ -0,0 +1,31 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "gemm_kernel_selector.h" +#include "gemm_kernel_ref.h" + +namespace kernel_selector +{ + gemm_kernel_selector::gemm_kernel_selector() + { + Attach<GemmKernelRef>(); + } + + KernelsData gemm_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const + { + return GetNaiveBestKernel(params, options, KernelType::GEMM); + } +}
\ No newline at end of file diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_selector.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_selector.h new file mode 100644 index 000000000..7a7896afd --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_selector.h @@ -0,0 +1,35 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#pragma once + +#include "kernel_selector.h" + +namespace kernel_selector +{ + class gemm_kernel_selector : public kernel_selector_base + { + public: + static gemm_kernel_selector &Instance() { + static gemm_kernel_selector instance; + return instance; + } + + gemm_kernel_selector(); + + KernelsData GetBestKernels(const Params& params, const optional_params& options) const override; + }; +}
\ No newline at end of file diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_base.cpp new file mode 100644 index 000000000..c0dc0851c --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_base.cpp @@ -0,0 +1,86 @@ +// Copyright (c) 2018 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#include "index_select_kernel_base.h" + +#include "kernel_selector_utils.h" + + +namespace kernel_selector +{ + JitConstants IndexSelectKernelBase::GetJitConstants(const index_select_params& params) + { + JitConstants jit = MakeBaseParamsJitConstants(params); + + jit.AddConstant(MakeJitConstant(toString(params.axis), "")); + + return jit; + } + + IndexSelectKernelBase::DispatchData IndexSelectKernelBase::SetDefault(const index_select_params& params) + { + const auto& output = params.output; + const auto& indices = params.inputs.at(1); + DispatchData kd; + + kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16; + + std::vector<size_t> global; + if (params.axis == IndexSelectAxis::BATCH) + { + global = { 1, indices.X().v, output.Feature().v }; + } + else if (params.axis == IndexSelectAxis::X || params.axis == IndexSelectAxis::Y) + { + global = { output.Batch().v, indices.X().v, output.Feature().v }; + } + else if(params.axis == IndexSelectAxis::FEATURE) + { + global = { output.Batch().v, indices.X().v, output.Y().v }; + } + const auto& local = GetOptimalLocalWorkGroupSizes(global); + + kd.gws0 = global[0]; + kd.gws1 = global[1]; + kd.gws2 = global[2]; + + kd.lws0 = local[0]; + kd.lws1 = local[1]; + kd.lws2 = local[2]; + + return kd; + } + + KernelsData IndexSelectKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options, float estimated_time) const + { + assert(params.GetType() == KernelType::INDEX_SELECT); + + const auto& prim_params = static_cast<const index_select_params&>(params); // NOLINT(cppcoreguidelines-pro-type-static-cast-downcast) + + auto run_info = SetDefault(prim_params); + KernelData k_data = KernelData::Default<index_select_params>(params); + + auto cldnn_jit = GetJitConstants(prim_params); + auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, options); + auto jit = CreateJit(kernelName, cldnn_jit, entry_point); + + auto& kernel = k_data.kernels[0]; + FillCLKernelData(kernel, run_info, params.engineInfo, kernelName, jit, entry_point, ROUND_ROBIN, false, false, (uint32_t)prim_params.inputs.size()); + + k_data.estimatedTime = estimated_time; + + return {k_data}; + } +} diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_base.h new file mode 100644 index 000000000..c7abe43bc --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_base.h @@ -0,0 +1,61 @@ +// Copyright (c) 2018 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "common_kernel_base.h" +#include "kernel_selector_params.h" + + +namespace kernel_selector +{ + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // index_select_params + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + struct index_select_params : public base_params + { + index_select_params() + : base_params(KernelType::INDEX_SELECT) + {} + + IndexSelectAxis axis = IndexSelectAxis::BATCH; + }; + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // index_select_optional_params + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + struct index_select_optional_params : optional_params + { + index_select_optional_params() + : optional_params(KernelType::INDEX_SELECT) + {} + }; + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // IndexSelectKernelBase + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + class IndexSelectKernelBase : public common_kernel_base + { + public: + using common_kernel_base::common_kernel_base; + virtual ~IndexSelectKernelBase() {} + + using DispatchData = CommonDispatchData; + + protected: + static JitConstants GetJitConstants(const index_select_params& params); + static DispatchData SetDefault(const index_select_params& params); + KernelsData GetCommonKernelsData(const Params& params, const optional_params&, float estimated_time) const; + }; +} diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_ref.cpp new file mode 100644 index 000000000..b5ab92dc1 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_ref.cpp @@ -0,0 +1,58 @@ +// Copyright (c) 2018 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "index_select_kernel_ref.h" + + +namespace kernel_selector +{ + ParamsKey IndexSelectKernelRef::GetSupportedKey() const + { + ParamsKey k; + + k.EnableInputDataType(Datatype::F16); + k.EnableInputDataType(Datatype::F32); + k.EnableInputDataType(Datatype::INT8); + k.EnableInputDataType(Datatype::UINT8); + k.EnableInputDataType(Datatype::INT32); + + k.EnableOutputDataType(Datatype::F32); + k.EnableOutputDataType(Datatype::F16); + k.EnableOutputDataType(Datatype::INT8); + k.EnableOutputDataType(Datatype::UINT8); + k.EnableOutputDataType(Datatype::INT32); + + k.EnableInputLayout(DataLayout::bfyx); + k.EnableInputLayout(DataLayout::yxfb); + + k.EnableOutputLayout(DataLayout::bfyx); + k.EnableOutputLayout(DataLayout::yxfb); + + k.EnableBatching(); + + k.EnableIndexSelectAxis(IndexSelectAxis::BATCH); + k.EnableIndexSelectAxis(IndexSelectAxis::FEATURE); + k.EnableIndexSelectAxis(IndexSelectAxis::Y); + k.EnableIndexSelectAxis(IndexSelectAxis::X); + + k.EnableDifferentTypes(); + + return k; + } + + KernelsData IndexSelectKernelRef::GetKernelsData(const Params& params, const optional_params& options) const + { + return GetCommonKernelsData(params, options, FORCE_PRIORITY_9); + } +} diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_ref.h new file mode 100644 index 000000000..3dd16198f --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_ref.h @@ -0,0 +1,30 @@ +// Copyright (c) 2018 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "index_select_kernel_base.h" + + +namespace kernel_selector +{ + class IndexSelectKernelRef : public IndexSelectKernelBase + { + public: + IndexSelectKernelRef() : IndexSelectKernelBase("index_select_gpu_ref") {} + + KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; + ParamsKey GetSupportedKey() const override; + }; +} diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_selector.cpp new file mode 100644 index 000000000..3d1693046 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_selector.cpp @@ -0,0 +1,30 @@ +// Copyright (c) 2018 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#include "index_select_kernel_selector.h" +#include "index_select_kernel_ref.h" + +namespace kernel_selector +{ + index_select_kernel_selector::index_select_kernel_selector() + { + Attach<IndexSelectKernelRef>(); + } + + KernelsData index_select_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const + { + return GetNaiveBestKernel(params, options, KernelType::INDEX_SELECT); + } +} diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_selector.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_selector.h new file mode 100644 index 000000000..21363f9f5 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_selector.h @@ -0,0 +1,34 @@ +// Copyright (c) 2018 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "kernel_selector.h" + + +namespace kernel_selector +{ + class index_select_kernel_selector : public kernel_selector_base + { + public: + static index_select_kernel_selector &Instance() { + static index_select_kernel_selector instance; + return instance; + } + + index_select_kernel_selector(); + + KernelsData GetBestKernels(const Params& params, const optional_params& options) const override; + }; +} diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lookup_table/lookup_table_kernel_axis.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lookup_table/lookup_table_kernel_axis.cpp index 8176f002b..af6737941 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lookup_table/lookup_table_kernel_axis.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lookup_table/lookup_table_kernel_axis.cpp @@ -86,7 +86,7 @@ namespace kernel_selector auto jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point, "", false, false, 2); + FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, "", false, false, 2); kd.estimatedTime = FORCE_PRIORITY_9; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lookup_table/lookup_table_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lookup_table/lookup_table_kernel_base.cpp index de31047d1..bb3f20f7f 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lookup_table/lookup_table_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lookup_table/lookup_table_kernel_base.cpp @@ -81,7 +81,7 @@ namespace kernel_selector auto jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point, "", false, false, 2); + FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, "", false, false, 2); kd.estimatedTime = estimatedTime; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_base.cpp index 4907d9992..9165ea692 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_base.cpp @@ -103,7 +103,7 @@ namespace kernel_selector auto jit = CreateJit(kernelName, cldnnJit, entryPoint); auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, kernelName, jit, entryPoint); + FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entryPoint); kd.estimatedTime = estimatedTime; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_elt_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_elt_kernel_base.cpp index a74b21e27..6170abd46 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_elt_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_elt_kernel_base.cpp @@ -77,7 +77,7 @@ namespace kernel_selector auto jit = CreateJit(kernelName, cldnnJit, entryPoint); kernel.workGroups.global = { out.X().v, out.Batch().v, 1 }; - kernel.kernelString = GetKernelString(kernelName, jit, entryPoint); + kernel.kernelString = GetKernelString(kernelName, jit, entryPoint, params.engineInfo); kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 0 }); kernel.arguments.push_back({ ArgumentDescriptor::Types::OUTPUT, 0 }); if (orgParams.has_cell) { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_elt_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_elt_kernel_ref.cpp index 6afb8504c..a068f9ae4 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_elt_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_elt_kernel_ref.cpp @@ -22,8 +22,10 @@ namespace kernel_selector { ParamsKey LSTMEltKernelRef::GetSupportedKey() const { ParamsKey k; - k.EnableAllInputDataType(); - k.EnableAllOutputDataType(); + k.EnableInputDataType(Datatype::F16); + k.EnableInputDataType(Datatype::F32); + k.EnableOutputDataType(Datatype::F16); + k.EnableOutputDataType(Datatype::F32); k.EnableDifferentTypes(); k.EnableAllInputLayout(); k.EnableAllOutputLayout(); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_base.cpp index 6d2c9bcf0..703008546 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_base.cpp @@ -35,6 +35,7 @@ namespace kernel_selector } jit.AddConstants({ MakeJitConstant("WEIGHTS", weights)}); + jit.AddConstants({ MakeJitConstant("DIRECTION", params.direction)}); return jit; } @@ -64,7 +65,7 @@ namespace kernel_selector auto jit = CreateJit(kernelName, cldnnJit, entryPoint); kernel.workGroups.global = { out.X().v, out.Batch().v, 1 }; - kernel.kernelString = GetKernelString(kernelName, jit, entryPoint); + kernel.kernelString = GetKernelString(kernelName, jit, entryPoint, params.engineInfo); kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 0 }); kernel.arguments.push_back({ ArgumentDescriptor::Types::OUTPUT, 0 }); kernel.arguments.push_back({ ArgumentDescriptor::Types::WEIGHTS, 0 }); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_base.h index 6fd517586..e766120e0 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_base.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_base.h @@ -34,6 +34,7 @@ namespace kernel_selector DataTensor hidden; bool hasBias = false; bool hasHidden = false; + uint32_t direction = 0; void SetBias(const DataTensor& v) { bias = v; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_ref.cpp index 167afcb5f..6484dd951 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_ref.cpp @@ -22,8 +22,10 @@ namespace kernel_selector { ParamsKey LSTMGemmKernelRef::GetSupportedKey() const { ParamsKey k; - k.EnableAllInputDataType(); - k.EnableAllOutputDataType(); + k.EnableInputDataType(Datatype::F16); + k.EnableInputDataType(Datatype::F32); + k.EnableOutputDataType(Datatype::F16); + k.EnableOutputDataType(Datatype::F32); k.EnableDifferentTypes(); k.EnableAllInputLayout(); k.EnableAllOutputLayout(); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/max_unpooling/max_unpooling_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/max_unpooling/max_unpooling_kernel_base.cpp index faea3ea6e..d6e036f40 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/max_unpooling/max_unpooling_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/max_unpooling/max_unpooling_kernel_base.cpp @@ -91,7 +91,7 @@ namespace kernel_selector auto jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point); + FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point); kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 1 }); kd.estimatedTime = estimatedTime; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_base.cpp index 23de0ff1a..4775a41b7 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_base.cpp @@ -82,7 +82,7 @@ namespace kernel_selector auto jit = CreateJit(finalKernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, finalKernelName, jit, entry_point); + FillCLKernelData(kernel, runInfo, params.engineInfo, finalKernelName, jit, entry_point); kd.estimatedTime = estimated_time; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/normalize/normalize_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/normalize/normalize_kernel_base.cpp index a50849125..b4e4c04c7 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/normalize/normalize_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/normalize/normalize_kernel_base.cpp @@ -82,7 +82,7 @@ namespace kernel_selector auto jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point); + FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point); kernel.arguments.push_back({ ArgumentDescriptor::Types::SCALE_TABLE, 0 }); kd.estimatedTime = estimated_time; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/permute/permute_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/permute/permute_kernel_ref.cpp index 057c4e655..ca6977952 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/permute/permute_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/permute/permute_kernel_ref.cpp @@ -24,8 +24,14 @@ namespace kernel_selector ParamsKey k; k.EnableInputDataType(Datatype::F16); k.EnableInputDataType(Datatype::F32); + k.EnableInputDataType(Datatype::INT8); + k.EnableInputDataType(Datatype::INT32); + k.EnableInputDataType(Datatype::INT64); k.EnableOutputDataType(Datatype::F16); k.EnableOutputDataType(Datatype::F32); + k.EnableOutputDataType(Datatype::INT8); + k.EnableOutputDataType(Datatype::INT32); + k.EnableOutputDataType(Datatype::INT64); k.EnableAllInputLayout(); k.EnableAllOutputLayout(); k.EnableTensorOffset(); @@ -72,7 +78,7 @@ namespace kernel_selector kernel.workGroups.global = { gws[0], gws[1], gws[2] * gws[3] }; kernel.workGroups.local = GetOptimalLocalWorkGroupSizes(kernel.workGroups.global); - kernel.kernelString = GetKernelString(kernelName, jit, entry_point, ROUND_ROBIN); + kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, ROUND_ROBIN); kernel.arguments = GetArgsDesc(1, false, false); kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_base.cpp index 13290c42d..9e5a9ad50 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_base.cpp @@ -129,7 +129,7 @@ namespace kernel_selector auto jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point); + FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point); if(orgParams.poolType == PoolType::MAX_WITH_ARGMAX) kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 1 }); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_fs_bs_yx_bsv4_fsv32.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_fs_bs_yx_bsv4_fsv32.cpp new file mode 100644 index 000000000..5157b4d5e --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_fs_bs_yx_bsv4_fsv32.cpp @@ -0,0 +1,83 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "pooling_kernel_gpu_fs_bs_yx_bsv4_fsv32.h" + +namespace kernel_selector +{ + ParamsKey PoolingKerneGPU_fs_bs_yx_bsv4_fsv32::GetSupportedKey() const + { + ParamsKey k; + k.EnableInputDataType(Datatype::INT8); + k.EnableOutputDataType(Datatype::INT8); + k.EnableInputLayout(DataLayout::fs_bs_yx_bsv4_fsv32); + k.EnableOutputLayout(DataLayout::fs_bs_yx_bsv4_fsv32); + k.EnableTensorOffset(); + k.EnableTensorPitches(); + k.EnableBatching(); + k.EnablePoolType(PoolType::MAX); + k.EnablePoolType(PoolType::AVG); + k.EnablePoolRemainder(PoolRemainder::FLOOR); + k.EnablePoolRemainder(PoolRemainder::CEIL); + k.EnablePoolKernelDividerMode(KernelDividerMode::FIXED); + k.EnablePoolKernelDividerMode(KernelDividerMode::DYNAMIC); + k.EnablePoolKernelDividerMode(KernelDividerMode::DYNAMIC_WITH_PADDING); + k.EnableDifferentTypes(); + return k; + } + + PoolingKernelBase::DispatchData PoolingKerneGPU_fs_bs_yx_bsv4_fsv32::SetDefault(const pooling_params& params) const + { + constexpr int simdSize = 8; + + DispatchData runInfo = PoolingKernelBase::SetDefault(params); + + runInfo.gws0 = params.output.X().v; + runInfo.gws1 = params.output.Y().v; + // we got fs_bs_yx_bsv4_fsv32 format, we process 4 batches and 4 features per workitem + runInfo.gws2 = (RoundUp(params.output.Feature().v, 32) * RoundUp(params.output.Batch().v, 4)) / (4*4); + + runInfo.lws0 = 1; + runInfo.lws1 = 1; + runInfo.lws2 = simdSize; + + return runInfo; + } + + JitConstants PoolingKerneGPU_fs_bs_yx_bsv4_fsv32::GetJitConstants(const pooling_params& params, DispatchData kd) const + { + auto jit = PoolingKernelBase::GetJitConstants(params, kd); + + const size_t in_x_pitch = 32 * 4; + const size_t in_y_pitch = 32 * 4 * params.inputs[0].X().LogicalDimPadded(); + const size_t in_b_block_pitch = in_y_pitch * params.inputs[0].Y().LogicalDimPadded(); + const size_t in_f_block_pitch = in_b_block_pitch * ((params.inputs[0].Batch().v + 3) / 4); + const size_t in_offset = in_x_pitch * params.inputs[0].X().pad.before + in_y_pitch * params.inputs[0].Y().pad.before; + + jit.AddConstant(MakeJitConstant("IN_X_PITCH", in_x_pitch)); + jit.AddConstant(MakeJitConstant("IN_Y_PITCH", in_y_pitch)); + jit.AddConstant(MakeJitConstant("IN_B_BLOCK_PITCH", in_b_block_pitch)); + jit.AddConstant(MakeJitConstant("IN_F_BLOCK_PITCH", in_f_block_pitch)); + jit.AddConstant(MakeJitConstant("IN_OFFSET", in_offset)); + + return jit; + } + + KernelsData PoolingKerneGPU_fs_bs_yx_bsv4_fsv32::GetKernelsData(const Params& params, const optional_params& options) const + { + return GetCommonKernelsData(params, options, FORCE_PRIORITY_1); + } +}
\ No newline at end of file diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_fs_bs_yx_bsv4_fsv32.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_fs_bs_yx_bsv4_fsv32.h new file mode 100644 index 000000000..efb5c67cd --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_fs_bs_yx_bsv4_fsv32.h @@ -0,0 +1,36 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#pragma once + +#include "pooling_kernel_base.h" + +namespace kernel_selector +{ + class PoolingKerneGPU_fs_bs_yx_bsv4_fsv32 : public PoolingKernelBase + { + public: + PoolingKerneGPU_fs_bs_yx_bsv4_fsv32() : PoolingKernelBase("pooling_gpu_fs_bs_yx_bsv4_fsv32") {} + virtual ~PoolingKerneGPU_fs_bs_yx_bsv4_fsv32() {} + + virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; + virtual ParamsKey GetSupportedKey() const override; + DispatchData SetDefault(const pooling_params& params) const override; + protected: + JitConstants GetJitConstants(const pooling_params& params, DispatchData kd) const override; + + }; +}
\ No newline at end of file diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_selector.cpp index 5a7d83b47..91ec4d2dc 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_selector.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_selector.cpp @@ -22,6 +22,7 @@ #include "pooling_kernel_gpu_byxf_padding_opt.h" #include "pooling_kernel_gpu_byxf_af32.h" #include "pooling_kernel_gpu_int8_ref.h" +#include "pooling_kernel_gpu_fs_bs_yx_bsv4_fsv32.h" namespace kernel_selector { @@ -34,6 +35,7 @@ namespace kernel_selector { Attach<PoolingKernelGPUByxfPaddingOpt>(); Attach<PoolingKernelGPUInt8Ref>(); Attach<PoolingKerneGPU_byxf_af32>(); + Attach<PoolingKerneGPU_fs_bs_yx_bsv4_fsv32>(); } KernelsData pooling_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/region_yolo/region_yolo_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/region_yolo/region_yolo_kernel_ref.cpp index 62420b510..6e5577a84 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/region_yolo/region_yolo_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/region_yolo/region_yolo_kernel_ref.cpp @@ -92,7 +92,7 @@ namespace kernel_selector auto jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point); + FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point); kd.estimatedTime = FORCE_PRIORITY_9; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel.cpp index 87ee70660..ba6f7ce1a 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel.cpp @@ -24,11 +24,15 @@ namespace kernel_selector ParamsKey k; k.EnableInputDataType(Datatype::UINT8); k.EnableInputDataType(Datatype::INT8); + k.EnableInputDataType(Datatype::INT32); + k.EnableInputDataType(Datatype::INT64); k.EnableInputDataType(Datatype::F16); k.EnableInputDataType(Datatype::F32); k.EnableOutputDataType(Datatype::F16); k.EnableOutputDataType(Datatype::F32); k.EnableOutputDataType(Datatype::INT8); + k.EnableOutputDataType(Datatype::INT32); + k.EnableOutputDataType(Datatype::INT64); k.EnableOutputDataType(Datatype::UINT8); k.EnableDifferentTypes(); k.EnableAllInputLayout(); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_base.cpp index d050de529..867a3c8b9 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_base.cpp @@ -69,6 +69,14 @@ namespace kernel_selector MakeJitConstant("OUTPUT", output), }; + if (fp16Supported) + { + jit.Merge(MakeUnitTypeJitConstants(Datatype::F16)); + } + else + { + jit.Merge(MakeUnitTypeJitConstants(Datatype::F32)); + } return jit; } @@ -185,7 +193,7 @@ namespace kernel_selector auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point); + FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point); kernel.arguments = GetArgsDesc(1, false, false); @@ -215,7 +223,7 @@ namespace kernel_selector auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point); + FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point); kernel.arguments = GetArgsDesc(1, false, false); if (newParams.mode == MeanSubtractMode::IN_BUFFER) diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorg_yolo/reorg_yolo_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorg_yolo/reorg_yolo_kernel_ref.cpp index 9ae8d10d9..0462e4a8f 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorg_yolo/reorg_yolo_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorg_yolo/reorg_yolo_kernel_ref.cpp @@ -87,7 +87,7 @@ namespace kernel_selector auto jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point); + FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point); kd.estimatedTime = FORCE_PRIORITY_9; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reshape/reshape_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reshape/reshape_kernel_ref.cpp index 2984e2e8f..9c9c760ee 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reshape/reshape_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reshape/reshape_kernel_ref.cpp @@ -24,8 +24,14 @@ namespace kernel_selector ParamsKey k; k.EnableInputDataType(Datatype::F16); k.EnableInputDataType(Datatype::F32); + k.EnableInputDataType(Datatype::INT8); + k.EnableInputDataType(Datatype::INT32); + k.EnableInputDataType(Datatype::INT64); k.EnableOutputDataType(Datatype::F16); k.EnableOutputDataType(Datatype::F32); + k.EnableOutputDataType(Datatype::INT8); + k.EnableOutputDataType(Datatype::INT32); + k.EnableOutputDataType(Datatype::INT64); k.EnableAllInputLayout(); k.EnableAllOutputLayout(); k.EnableTensorOffset(); @@ -60,7 +66,7 @@ namespace kernel_selector kernel.workGroups.global = { gws[0], gws[1], gws[2] * gws[3] }; kernel.workGroups.local = GetOptimalLocalWorkGroupSizes(kernel.workGroups.global); - kernel.kernelString = GetKernelString(kernelName, jit, entry_point, ROUND_ROBIN); + kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, ROUND_ROBIN); kernel.arguments = GetArgsDesc(1, false, false); kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/roi_pooling/roi_pooling_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/roi_pooling/roi_pooling_kernel_ref.cpp index 0683beb3e..64dde2c32 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/roi_pooling/roi_pooling_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/roi_pooling/roi_pooling_kernel_ref.cpp @@ -1,5 +1,5 @@ /* -// Copyright (c) 2016 Intel Corporation +// Copyright (c) 2016-2018 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -27,6 +27,9 @@ namespace kernel_selector { k.EnableOutputDataType(Datatype::F32); k.EnableInputLayout(DataLayout::bfyx); k.EnableOutputLayout(DataLayout::brfyx); + k.EnablePoolType(PoolType::MAX); + k.EnablePoolType(PoolType::AVG); + k.EnablePoolType(PoolType::BILINEAR); k.EnableTensorOffset(); k.EnableTensorPitches(); k.EnableBatching(); @@ -69,7 +72,6 @@ namespace kernel_selector { }); jit.AddConstants({ - MakeJitConstant("MAX_POOL", rp.mode == PoolType::MAX), MakeJitConstant("USE_OLD_SCALE_AND_ROUNDING", rp.groupSize == 0) }); @@ -94,11 +96,11 @@ namespace kernel_selector { auto jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point); + FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point); kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 1 }); kd.estimatedTime = FORCE_PRIORITY_9; return{ kd }; } -}
\ No newline at end of file +} diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scale_grad_weights/scale_grad_weights_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scale_grad_weights/scale_grad_weights_kernel_base.cpp index 471b61e6d..61edddabf 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scale_grad_weights/scale_grad_weights_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scale_grad_weights/scale_grad_weights_kernel_base.cpp @@ -57,7 +57,7 @@ namespace kernel_selector auto jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point, ROUND_ROBIN, true, !orgParams.bias.empty(), 2); + FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, ROUND_ROBIN, true, !orgParams.bias.empty(), 2); if (orgParams.use_momentum) { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scale_grad_weights/scale_grad_weights_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scale_grad_weights/scale_grad_weights_kernel_ref.cpp index 4b9190f18..51b1122d4 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scale_grad_weights/scale_grad_weights_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scale_grad_weights/scale_grad_weights_kernel_ref.cpp @@ -24,7 +24,6 @@ namespace kernel_selector ParamsKey k; k.EnableInputDataType(Datatype::F16); k.EnableInputDataType(Datatype::F32); - k.EnableInputWeightsType(WeightsType::F16); k.EnableInputWeightsType(WeightsType::F32); k.EnableOutputDataType(Datatype::F16); k.EnableOutputDataType(Datatype::F32); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_base.cpp new file mode 100644 index 000000000..09b3a0151 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_base.cpp @@ -0,0 +1,177 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "select_kernel_base.h" +#include "kernel_selector_utils.h" + +namespace kernel_selector +{ + + bool SelectKernelBase::Validate(const Params& p, const optional_params& o) const + { + if (p.GetType() != KernelType::SELECT || + o.GetType() != KernelType::SELECT) + { + return false; + } + + const select_params& params = static_cast<const select_params&>(p); + + if (params.inputs[0].GetDType() != params.inputs[1].GetDType()) + { + return false; + } + + if (params.inputs.size() != 3) + { + return false; + } + + return true; + } + + JitConstants SelectKernelBase::GetJitConstantsCommon(const select_params& params) const + { + JitConstants jit = MakeBaseParamsJitConstants(params); + + std::string inputs_decls; + + for (size_t i = 0; i < params.inputs.size(); i++) + { + std::string const_str = "const"; + + inputs_decls += const_str + " __global " + toCLType(params.inputs[i].GetDType()) + "* input" + std::to_string(i) + ", "; + } + + jit.AddConstant(MakeJitConstant("INPUTS_DECLS", inputs_decls)); + + std::string destType, absType; + + // i8, i8, i8 + // i8, i8, u8 + // u8, u8, i8 + // u8, u8, u8 + if ((params.inputs[2].GetDType() == Datatype::INT8 + || params.inputs[2].GetDType() == Datatype::UINT8) + && (params.inputs[0].GetDType() == Datatype::INT8 + || params.inputs[0].GetDType() == Datatype::UINT8)) + { + jit.AddConstant(MakeJitConstant("MASK", "INPUT_2")); + } + else + { + // x, x, f32 + // x, x, f16 + if (params.inputs[2].GetDType() == Datatype::F32 + || params.inputs[2].GetDType() == Datatype::F16) + { + absType = "fabs"; + } + // f32, f32, i8 + // f32, f32, u8 + // f16, f16, i8 + // f16, f16, u8 + else + { + absType = "abs"; + } + + // f32, f32, x + if (params.inputs[0].GetDType() == Datatype::F32) { + destType = "int"; + } + // f16, f16, x + else if (params.inputs[0].GetDType() == Datatype::F16) { + destType = "short"; + } + // i8, i8, f32 + // i8, i8, f16 + // u8, u8, f32 + // u8, u8, f16 + else + { + destType = "char"; + } + + jit.AddConstant(MakeJitConstant("MASK", "convert_" + destType + "_rtp(" + absType + "(INPUT_2))")); + } + + return jit; + } + + JitConstants SelectKernelBase::GetJitConstants(const select_params& params) const + { + return GetJitConstantsCommon(params); + } + + SelectKernelBase::DispatchData SelectKernelBase::SetDefault(const select_params& params) const + { + DispatchData kd; + + const auto& out = params.output; + + std::vector<size_t> gws; + for (const auto& o : out.GetDims()) + { + gws.push_back(o.v); + } + + for (size_t i = gws.size(); i < 4; i++) + { + gws.push_back(1U); + } + + kd.gws0 = gws[0]; + kd.gws1 = gws[1]; + kd.gws2 = gws[2] * gws[3]; + + auto local = GetOptimalLocalWorkGroupSizes( { kd.gws0, kd.gws1, kd.gws2 } ); + kd.lws0 = local[0]; + kd.lws1 = local[1]; + kd.lws2 = local[2]; + + return kd; + } + + KernelsData SelectKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options) const + { + if (!Validate(params, options)) + { + return{}; + } + + KernelData kd = KernelData::Default<select_params>(params); + select_params& newParams = *static_cast<select_params*>(kd.params.get()); + + auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options); + auto cldnn_jit = GetJitConstants(newParams); + std::string jit = CreateJit(kernelName, cldnn_jit, entry_point); + + DispatchData runInfo = SetDefault(newParams); + + auto& kernel = kd.kernels[0]; + + kernel.workGroups.global = { runInfo.gws0, runInfo.gws1, runInfo.gws2 }; + kernel.workGroups.local = { runInfo.lws0, runInfo.lws1, runInfo.lws2 }; + + kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, ROUND_ROBIN); + kernel.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(), false, false); + + kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE; + + return{ kd }; + } +} diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_base.h new file mode 100644 index 000000000..c1d48d991 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_base.h @@ -0,0 +1,62 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#pragma once + +#include "common_kernel_base.h" + +namespace kernel_selector +{ + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // select_params + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + struct select_params : public base_params + { + select_params() : base_params(KernelType::SELECT) {} + + virtual ParamsKey GetParamsKey() const + { + return base_params::GetParamsKey(); + } + }; + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // select_optional_params + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + struct select_optional_params : optional_params + { + select_optional_params() : optional_params(KernelType::SELECT) {} + }; + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // SelectKernelBase + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + class SelectKernelBase : public common_kernel_base + { + public: + using common_kernel_base::common_kernel_base; + virtual ~SelectKernelBase() {} + + using DispatchData = CommonDispatchData; + JitConstants GetJitConstantsCommon(const select_params& params) const; + + protected: + virtual bool Validate(const Params& p, const optional_params& o) const override; + virtual JitConstants GetJitConstants(const select_params& params) const; + virtual DispatchData SetDefault(const select_params& params) const; + KernelsData GetCommonKernelsData(const Params& params, const optional_params& options) const; + }; +} diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_ref.cpp new file mode 100644 index 000000000..f7f776c15 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_ref.cpp @@ -0,0 +1,64 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "select_kernel_ref.h" +#include "kernel_selector_utils.h" + +namespace kernel_selector { + + ParamsKey SelectKernelRef::GetSupportedKey() const + { + ParamsKey k; + + k.EnableInputDataType(Datatype::F16); + k.EnableInputDataType(Datatype::F32); + k.EnableInputDataType(Datatype::INT8); + k.EnableInputDataType(Datatype::UINT8); + + k.EnableOutputDataType(Datatype::F32); + k.EnableOutputDataType(Datatype::F16); + k.EnableOutputDataType(Datatype::INT8); + k.EnableOutputDataType(Datatype::UINT8); + + k.EnableInputLayout(DataLayout::bfyx); + k.EnableInputLayout(DataLayout::yxfb); + k.EnableInputLayout(DataLayout::byxf); + + k.EnableOutputLayout(DataLayout::bfyx); + k.EnableOutputLayout(DataLayout::yxfb); + k.EnableOutputLayout(DataLayout::byxf); + + k.EnableBatching(); + k.EnableDifferentTypes(); + + return k; + } + + bool SelectKernelRef::Validate(const Params& p, const optional_params& o) const + { + if (!SelectKernelBase::Validate(p, o)) + { + return false; + } + + return true; + } + + KernelsData SelectKernelRef::GetKernelsData(const Params& params, const optional_params& options) const + { + return GetCommonKernelsData(params, options); + } +}
\ No newline at end of file diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_ref.h new file mode 100644 index 000000000..a72c0e90a --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_ref.h @@ -0,0 +1,35 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#pragma once + +#include "select_kernel_base.h" + +namespace kernel_selector +{ + class SelectKernelRef : public SelectKernelBase + { + public: + SelectKernelRef() : SelectKernelBase("select_gpu_ref") {} + virtual ~SelectKernelRef() {} + + virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; + virtual ParamsKey GetSupportedKey() const override; + protected: + bool Validate(const Params& p, const optional_params& o) const override; + + }; +}
\ No newline at end of file diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_selector.cpp new file mode 100644 index 000000000..ec1218166 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_selector.cpp @@ -0,0 +1,31 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "select_kernel_selector.h" +#include "select_kernel_ref.h" + +namespace kernel_selector +{ + select_kernel_selector::select_kernel_selector() + { + Attach<SelectKernelRef>(); + } + + KernelsData select_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const + { + return GetNaiveBestKernel(params, options, KernelType::SELECT); + } +}
\ No newline at end of file diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_selector.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_selector.h new file mode 100644 index 000000000..b3de11649 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_selector.h @@ -0,0 +1,35 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#pragma once + +#include "kernel_selector.h" + +namespace kernel_selector +{ + class select_kernel_selector : public kernel_selector_base + { + public: + static select_kernel_selector &Instance() { + static select_kernel_selector instance_; + return instance_; + } + + select_kernel_selector(); + + KernelsData GetBestKernels(const Params& params, const optional_params& options) const override; + }; +}
\ No newline at end of file diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_base.cpp index 4f02da734..4d2c36d39 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_base.cpp @@ -88,7 +88,7 @@ namespace kernel_selector auto jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point); + FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point); kd.estimatedTime = runInfo.effiency; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax_loss_grad/softmax_loss_grad_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax_loss_grad/softmax_loss_grad_kernel_base.cpp index 0a4473714..da816abac 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax_loss_grad/softmax_loss_grad_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax_loss_grad/softmax_loss_grad_kernel_base.cpp @@ -74,7 +74,7 @@ namespace kernel_selector auto jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, runInfo, kernelName, jit, entry_point); + FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point); kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 1 }); kd.estimatedTime = runInfo.effiency; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_ref.cpp new file mode 100644 index 000000000..37c206d3b --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_ref.cpp @@ -0,0 +1,153 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "tile_kernel_ref.h" +#include "kernel_selector_utils.h" + +namespace kernel_selector +{ + static int32_t GetTileChannelIndex(const tile_params& params) + { + Tensor::DataChannelName name = Tensor::DataChannelName::X; + switch (params.axis) + { + case TileAxis::X: name = Tensor::DataChannelName::X; break; + case TileAxis::Y: name = Tensor::DataChannelName::Y; break; + case TileAxis::FEATURE: name = Tensor::DataChannelName::FEATURE; break; + case TileAxis::BATCH: name = Tensor::DataChannelName::BATCH; break; + default: break; + } + + return DataTensor::Channelndex(params.output.GetLayout(), name); + } + + ParamsKey TileKernelRef::GetSupportedKey() const + { + ParamsKey k; + k.EnableInputDataType(Datatype::F16); + k.EnableInputDataType(Datatype::F32); + k.EnableOutputDataType(Datatype::F16); + k.EnableOutputDataType(Datatype::F32); + k.EnableInputLayout(DataLayout::bfyx); + k.EnableOutputLayout(DataLayout::bfyx); + k.EnableTensorOffset(); + k.EnableTensorPitches(); + k.EnableBatching(); + return k; + } + + CommonDispatchData TileKernelRef::SetDefault(const tile_params& params, const optional_params&) const + { + CommonDispatchData runInfo; + + auto in = params.inputs[0]; + + size_t inner_size = 1; + size_t outer_size = 1; + + const int32_t axis = GetTileChannelIndex(params); + + for (int32_t i = 0; i <= axis; i++) + { + inner_size *= in.GetDims()[i].v; + } + + for (int32_t i = axis+1; i < static_cast<int32_t>(in.GetDims().size()); i++) + { + outer_size *= in.GetDims()[i].v; + } + + if (inner_size > 1) + { + runInfo.gws0 = outer_size; + runInfo.gws1 = inner_size; + runInfo.gws2 = 1; + + runInfo.lws0 = 1; + runInfo.lws1 = 1; + runInfo.lws2 = 1; + } + else + { + runInfo.gws0 = Align(outer_size, 16); + runInfo.gws1 = 1; + runInfo.gws2 = 1; + + runInfo.lws0 = 16; + runInfo.lws1 = 1; + runInfo.lws2 = 1; + } + + runInfo.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16; + + return runInfo; + } + + JitConstants TileKernelRef::GetJitConstants(const tile_params& params) const + { + JitConstants jit = MakeBaseParamsJitConstants(params); + + auto in = params.inputs[0]; + auto out = params.output; + + size_t inner_size = 1; + size_t outer_size = 1; + size_t axis_pitch = 1; + + const int32_t axis = GetTileChannelIndex(params); + + for (int32_t i = 0; i <= axis; i++) + { + inner_size *= in.GetDims()[i].v; + axis_pitch *= in.GetDims()[i].LogicalDimPadded(); + } + for (int32_t i = axis+1; i < static_cast<int32_t>(in.GetDims().size()); i++) + { + outer_size *= in.GetDims()[i].v; + } + + jit.AddConstant(MakeJitConstant("TILES", params.tiles)); + jit.AddConstant(MakeJitConstant("AXIS_PITCH", axis_pitch)); + jit.AddConstant(MakeJitConstant("OUTER_SIZE", outer_size)); + if (inner_size == 1) + { + jit.AddConstant(MakeJitConstant("OUTPUT_SIZE", out.LogicalSize())); + jit.AddConstant(MakeJitConstant("DENSE", 1)); + } + return jit; + } + + KernelsData TileKernelRef::GetKernelsData(const Params& params, const optional_params& options) const + { + assert(params.GetType() == KernelType::TILE); + + KernelData kd = KernelData::Default<tile_params>(params); + tile_params& newParams = *static_cast<tile_params*>(kd.params.get()); + + auto runInfo = SetDefault(newParams, options); + auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options); + auto cldnn_jit = GetJitConstants(newParams); + std::string jit = CreateJit(kernelName, cldnn_jit, entry_point); + + auto& kernel = kd.kernels[0]; + + FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point); + + kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE; + + return{ kd }; + } +} diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_ref.h new file mode 100644 index 000000000..967dab817 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_ref.h @@ -0,0 +1,58 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#pragma once + +#include "common_kernel_base.h" + +namespace kernel_selector +{ + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // tile_params + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + struct tile_params : public base_params + { + tile_params() : base_params(KernelType::TILE) {} + + TileAxis axis; + int tiles; + + virtual ParamsKey GetParamsKey() const + { + return base_params::GetParamsKey(); + } + }; + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // tile_optional_params + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + struct tile_optional_params : optional_params + { + tile_optional_params() : optional_params(KernelType::TILE) {} + }; + + class TileKernelRef : public common_kernel_base + { + public: + TileKernelRef() : common_kernel_base("tile_ref") {} + virtual ~TileKernelRef() {} + + virtual JitConstants GetJitConstants(const tile_params& params) const; + virtual CommonDispatchData SetDefault(const tile_params& params, const optional_params&) const; + virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; + virtual ParamsKey GetSupportedKey() const override; + }; +} diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_selector.cpp new file mode 100644 index 000000000..c0ca49de7 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_selector.cpp @@ -0,0 +1,31 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "tile_kernel_selector.h" +#include "tile_kernel_ref.h" + +namespace kernel_selector { + + tile_kernel_selector::tile_kernel_selector() + { + Attach<TileKernelRef>(); + } + + KernelsData tile_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const + { + return GetNaiveBestKernel(params, options, KernelType::TILE); + } +} diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_selector.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_selector.h new file mode 100644 index 000000000..c0b10fa10 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_selector.h @@ -0,0 +1,37 @@ +/* +// Copyright (c) 2018 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#pragma once + +#include "kernel_selector.h" + +namespace kernel_selector +{ + class tile_kernel_selector : public kernel_selector_base + { + public: + static tile_kernel_selector &Instance() { + static tile_kernel_selector instance_; + return instance_; + } + + tile_kernel_selector(); + + virtual ~tile_kernel_selector() {} + + virtual KernelsData GetBestKernels(const Params& params, const optional_params& options) const override; + }; +} diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/upsampling/upsampling_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/upsampling/upsampling_kernel_base.cpp index ea0d89515..889daf8bc 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/upsampling/upsampling_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/upsampling/upsampling_kernel_base.cpp @@ -76,7 +76,7 @@ namespace kernel_selector kernel.workGroups.global = { out.X().v, out.Y().v, out.Feature().v * out.Batch().v }; kernel.workGroups.local = GetOptimalLocalWorkGroupSizes(kernel.workGroups.global); - kernel.kernelString = GetKernelString(kernelName, jit, entry_point, ROUND_ROBIN); + kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, ROUND_ROBIN); kernel.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(), false, false); kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/upsampling/upsampling_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/upsampling/upsampling_kernel_ref.cpp index 46a927483..9037ebc0a 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/upsampling/upsampling_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/upsampling/upsampling_kernel_ref.cpp @@ -15,15 +15,16 @@ */ #include "upsampling_kernel_ref.h" -#include "kernel_selector_utils.h" namespace kernel_selector { ParamsKey UpSamplingKernelRef::GetSupportedKey() const { ParamsKey k; - k.EnableAllInputDataType(); - k.EnableAllOutputDataType(); + k.EnableInputDataType(Datatype::F16); + k.EnableInputDataType(Datatype::F32); + k.EnableOutputDataType(Datatype::F16); + k.EnableOutputDataType(Datatype::F32); k.EnableDifferentTypes(); k.EnableAllInputLayout(); k.EnableAllOutputLayout(); |