diff options
Diffstat (limited to 'compute/ARMComputeEx/src/runtime/CL')
18 files changed, 315 insertions, 26 deletions
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLArgMinMaxLayerEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLArgMinMaxLayerEx.cpp index b02a48ef2..6b9b0d4b4 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLArgMinMaxLayerEx.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLArgMinMaxLayerEx.cpp @@ -45,7 +45,9 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/runtime/Utils.h" +#include "src/core/helpers/WindowHelpers.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/runtime/Utils.h" namespace arm_compute { @@ -66,7 +68,7 @@ Status CLArgMinMaxLayerEx::validate(const ITensorInfo *input, int axis, const IT "Reduction axis greater than max number of dimensions"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis > 3, "Unsupported reduction axis"); const unsigned int num_of_stages = - calculate_number_of_stages_only_x_axis(input->dimension(0), axis); + utils::calculate_number_of_stages_only_x_axis(input->dimension(0), axis); DataType output_data_type = DataType::S32; TensorInfo not_reshaped_output; @@ -132,7 +134,7 @@ Status CLArgMinMaxLayerEx::validate(const ITensorInfo *input, int axis, const IT ARM_COMPUTE_RETURN_ON_ERROR(CLArgMinMaxLayerKernelEx::validate( input, &sums_vector[last_stage - 1], ¬_reshaped_output, axis, op)); } - ARM_COMPUTE_RETURN_ON_ERROR(CLReshapeLayerKernel::validate(¬_reshaped_output, output)); + ARM_COMPUTE_RETURN_ON_ERROR(CLReshapeLayer::validate(¬_reshaped_output, output)); return Status{}; } @@ -140,7 +142,7 @@ void CLArgMinMaxLayerEx::configure(const ICLTensor *input, int axis, ICLTensor * const ReductionOperation &op) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - _num_of_stages = calculate_number_of_stages_only_x_axis(input->info()->dimension(0), axis); + _num_of_stages = utils::calculate_number_of_stages_only_x_axis(input->info()->dimension(0), axis); _reduction_axis = axis; const TensorShape output_shape = arm_compute::misc::shape_calculator::compute_reduced_shape( @@ -204,7 +206,8 @@ void CLArgMinMaxLayerEx::configure(const ICLTensor *input, int axis, ICLTensor * &_not_reshaped_output, axis, op); _results_vector[last_stage - 1].allocator()->allocate(); } - _reshape_kernel.configure(&_not_reshaped_output, output); + _reshape_kernel.configure(CLKernelLibrary::get().get_compile_context(), &_not_reshaped_output, + output); _not_reshaped_output.allocator()->allocate(); } @@ -216,6 +219,6 @@ void CLArgMinMaxLayerEx::run() { CLScheduler::get().enqueue(_reduction_kernels_vector[i], false); } - CLScheduler::get().enqueue(_reshape_kernel, false); + _reshape_kernel.run(); } } // namespace arm_compute diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp index e5122ab8f..31c96b080 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp @@ -42,13 +42,14 @@ #include "arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h" #include "arm_compute/core/CL/ICLTensor.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" using namespace arm_compute; void CLBinaryLogicalOp::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, BinaryLogicalOperation op) { - auto k = support::cpp14::make_unique<CLBinaryLogicalOpKernel>(); + auto k = std::make_unique<CLBinaryLogicalOpKernel>(); k->configure(input1, input2, output, op); _kernel = std::move(k); @@ -57,7 +58,7 @@ void CLBinaryLogicalOp::configure(ICLTensor *input1, ICLTensor *input2, ICLTenso ICLTensor *broadcasted_info = (input1->info()->dimension(0) == 1) ? input1 : input2; if (broadcasted_info->info()->dimension(0) == 1) { - _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE); + _border_handler->configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE); } } } diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLCastBool.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLCastBool.cpp index c7d0ac8e2..96f9c17a9 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLCastBool.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLCastBool.cpp @@ -46,7 +46,7 @@ using namespace arm_compute; void CLCastBool::configure(ICLTensor *input, ICLTensor *output) { - auto k = arm_compute::support::cpp14::make_unique<CLCastBoolKernel>(); + auto k = std::make_unique<CLCastBoolKernel>(); k->configure(input, output); _kernel = std::move(k); } diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLDirectTransposeConvLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLDirectTransposeConvLayer.cpp index 6359b4bcb..464f60dee 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLDirectTransposeConvLayer.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLDirectTransposeConvLayer.cpp @@ -45,6 +45,8 @@ #include "arm_compute/core/utils/misc/ShapeCalculatorEx.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/helpers/AutoConfiguration.h" + #include <memory> #include <tuple> diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp index ae9d8afc6..003ec8042 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp @@ -39,7 +39,6 @@ */ #include "arm_compute/runtime/CL/functions/CLEmbeddingLookup.h" - #include "arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h" using namespace arm_compute; @@ -47,7 +46,7 @@ using namespace arm_compute; void CLEmbeddingLookup::configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *lookups) { - auto k = support::cpp14::make_unique<CLEmbeddingLookupKernel>(); + auto k = std::make_unique<CLEmbeddingLookupKernel>(); k->configure(input, output, lookups); _kernel = std::move(k); } diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp index 79d0929a9..af936e873 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp @@ -45,7 +45,6 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" -#include "support/MemorySupport.h" #include <algorithm> @@ -68,7 +67,7 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I void CLFullyConnectedHybridLayerReshapeWeights::configure(const ICLTensor *input, ICLTensor *output) { - auto k = support::cpp14::make_unique<CLTransposeKernel>(); + auto k = std::make_unique<CLTransposeKernel>(); k->configure(input, output); _kernel = std::move(k); } diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp index 13d3acbac..c6a88d340 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp @@ -42,11 +42,11 @@ #include "arm_compute/core/Size2D.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/misc/Cast.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" -#include "support/MemorySupport.h" + +#include "support/Cast.h" #include <algorithm> @@ -141,7 +141,7 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I void CLFullyConnectedLayerReshapeWeightsEx::configure(const ICLTensor *input, ICLTensor *output) { - auto k = support::cpp14::make_unique<CLTransposeKernel>(); + auto k = std::make_unique<CLTransposeKernel>(); k->configure(input, output); _kernel = std::move(k); } diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp index ac6982e6f..cda784541 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp @@ -19,6 +19,7 @@ #include <arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h> #include <arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h> #include <arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h> +#include "src/core/helpers/AutoConfiguration.h" using namespace arm_compute; diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLGEMMMatrixAccumulateBiasesKernel.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLGEMMMatrixAccumulateBiasesKernel.cpp new file mode 100644 index 000000000..cd7409417 --- /dev/null +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLGEMMMatrixAccumulateBiasesKernel.cpp @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * Copyright (c) 2017-2020 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h" + +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/CL/CLKernelLibraryEx.h" +#include "arm_compute/core/CL/CLHelpers.h" +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Utils.h" +#include "support/StringSupport.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/AccessWindowStatic.h" +#include "src/core/helpers/WindowHelpers.h" + +using namespace arm_compute; + +namespace +{ +Status validate_arguments(const ITensorInfo *accum, const ITensorInfo *biases) +{ + ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(accum); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(biases, accum); + ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() != 1); + + return Status{}; +} + +std::pair<Status, Window> +validate_and_configure_window(ITensorInfo *accum, ITensorInfo *biases, GPUTarget gpu_target, + unsigned int &num_elems_processed_per_iteration) +{ + // Select the vector size to use (8 for Bifrost; 16 for Midgard). + bool is_gpu_bifrost = + gpu_target_is_in(gpu_target, GPUTarget::G71, GPUTarget::G72, GPUTarget::G76, GPUTarget::G51, + GPUTarget::G51BIG, GPUTarget::G51LIT, GPUTarget::G52, GPUTarget::G52LIT); + num_elems_processed_per_iteration = is_gpu_bifrost ? 8 : 16; + + // Configure kernel window + Window win = calculate_max_window(*accum, Steps(num_elems_processed_per_iteration)); + + AccessWindowStatic biases_access( + biases, 0, 0, ceil_to_multiple(biases->dimension(0), num_elems_processed_per_iteration), + biases->dimension(1)); + AccessWindowHorizontal accum_access(accum, 0, num_elems_processed_per_iteration); + + bool window_changed = update_window_and_padding(win, biases_access, accum_access); + + Status err = (window_changed) + ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") + : Status{}; + return std::make_pair(err, win); +} +} // namespace + +CLGEMMMatrixAccumulateBiasesKernel::CLGEMMMatrixAccumulateBiasesKernel() + : _accum(nullptr), _biases(nullptr) +{ +} + +void CLGEMMMatrixAccumulateBiasesKernel::configure(ICLTensor *accum, const ICLTensor *biases) +{ + configure(CLKernelLibrary::get().get_compile_context(), accum, biases); +} + +void CLGEMMMatrixAccumulateBiasesKernel::configure(const CLCompileContext &compile_context, + ICLTensor *accum, const ICLTensor *biases) +{ + ARM_COMPUTE_UNUSED(compile_context); + // Perform validate step + ARM_COMPUTE_ERROR_ON_NULLPTR(accum, biases); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(accum->info(), biases->info())); + + _biases = biases; + _accum = accum; + + // Get the target gpu + GPUTarget gpu_target = get_target(); + unsigned int vector_size = 0; + + // Configure kernel window + auto win_config = + validate_and_configure_window(accum->info(), biases->info(), gpu_target, vector_size); + ARM_COMPUTE_ERROR_THROW_ON(win_config.first); + ICLKernel::configure_internal(win_config.second); + + // Add build options + CLBuildOptions build_opts; + build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(accum->info()->data_type())); + build_opts.add_option("-DVECTOR_SIZE=" + support::cpp11::to_string(vector_size)); + + // Create kernel + _kernel = static_cast<cl::Kernel>( + CLKernelLibraryEx::get().create_kernel("gemm_accumulate_biases", build_opts.options())); +} + +Status CLGEMMMatrixAccumulateBiasesKernel::validate(const ITensorInfo *accum, + const ITensorInfo *biases, GPUTarget gpu_target) +{ + unsigned int num_elems_processed_per_iteration = 0; + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(accum, biases)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(accum->clone().get(), + biases->clone().get(), gpu_target, + num_elems_processed_per_iteration) + .first); + + return Status{}; +} + +void CLGEMMMatrixAccumulateBiasesKernel::run(const Window &window, cl::CommandQueue &queue) +{ + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window); + + Window accum_slice = window.first_slice_window_2D(); + + Window biases_slice(accum_slice); + biases_slice.set(Window::DimY, Window::Dimension(0, 1, 1)); + + // Run kernel + do + { + // Set arguments + unsigned int idx = 0; + add_2D_tensor_argument(idx, _accum, accum_slice); + add_1D_tensor_argument(idx, _biases, biases_slice); + + enqueue(queue, *this, accum_slice, lws_hint()); + } while (window.slide_window_slice_2D(accum_slice)); +} diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp index e0b833b04..f380e3e2c 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp @@ -41,6 +41,8 @@ #include "arm_compute/runtime/CL/functions/CLGatherEx.h" #include "arm_compute/core/CL/ICLTensor.h" +#include "src/core/CL/kernels/CLGatherKernel.h" + #include "arm_compute/core/CL/kernels/CLGatherExKernel.h" using namespace arm_compute; @@ -48,7 +50,7 @@ using namespace arm_compute; void CLGatherEx::configure(const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis) { - auto k = support::cpp14::make_unique<CLGatherExKernel>(); + auto k = std::make_unique<CLGatherExKernel>(); k->configure(input, indices, output, axis); _kernel = std::move(k); } diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp index 65b89a389..9896abd4b 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp @@ -47,7 +47,7 @@ using namespace arm_compute; void CLHashtableLookup::configure(const ICLTensor *lookups, const ICLTensor *keys, const ICLTensor *input, ICLTensor *output, ICLTensor *hits) { - auto k = support::cpp14::make_unique<CLHashtableLookupKernel>(); + auto k = std::make_unique<CLHashtableLookupKernel>(); k->configure(lookups, keys, input, output, hits); _kernel = std::move(k); } diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp index 5a7e40839..ca45a57f8 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp @@ -50,7 +50,7 @@ CLInstanceNormalizationLayerEx::CLInstanceNormalizationLayerEx() {} void CLInstanceNormalizationLayerEx::configure(ICLTensor *input, ICLTensor *output, ICLTensor *gamma, ICLTensor *beta, float epsilon) { - auto k = support::cpp14::make_unique<CLInstanceNormalizationLayerKernelEx>(); + auto k = std::make_unique<CLInstanceNormalizationLayerKernelEx>(); k->configure(input, output, gamma, beta, epsilon); _kernel = std::move(k); } diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp index 28e5bc0da..2bdc451b3 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp @@ -46,7 +46,7 @@ using namespace arm_compute; void CLNeg::configure(ICLTensor *input, ICLTensor *output) { - auto k = arm_compute::support::cpp14::make_unique<CLNegKernel>(); + auto k = std::make_unique<CLNegKernel>(); k->configure(input, output); _kernel = std::move(k); } diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLOneHot.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLOneHot.cpp index aa9f32ec6..759a19ff3 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLOneHot.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLOneHot.cpp @@ -41,7 +41,7 @@ #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/kernels/CLOneHotKernel.h" #include "arm_compute/runtime/CL/CLScheduler.h" -#include "support/MemorySupport.h" + namespace arm_compute { CLOneHot::CLOneHot() : _memset_kernel(), _onehot_kernel(), _has_to_memset(false) {} diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLPadLayerEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLPadLayerEx.cpp new file mode 100644 index 000000000..4d940e966 --- /dev/null +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLPadLayerEx.cpp @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Copyright (c) 2019-2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLPadLayerEx.h" +#include "arm_compute/core/CL/kernels/CLPadLayerKernelEx.h" + +namespace arm_compute +{ +CLPadLayerEx::CLPadLayerEx() + : _pad_kernel(std::make_unique<CLPadLayerKernelEx>()), + _copy_kernel(std::make_unique<opencl::kernels::ClCopyKernel>()), _perform_pad(false) +{ +} + +void CLPadLayerEx::configure(ICLTensor *input, ICLTensor *output, const PaddingList &padding, + PixelValue constant_value, PaddingMode mode) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, padding, constant_value, + mode); +} + +void CLPadLayerEx::configure(const CLCompileContext &compile_context, ICLTensor *input, + ICLTensor *output, const PaddingList &padding, + PixelValue constant_value, PaddingMode mode) +{ + ARM_COMPUTE_ERROR_THROW_ON( + validate(input->info(), output->info(), padding, constant_value, mode)); + + _perform_pad = std::any_of(padding.begin(), padding.end(), + [](PaddingInfo info) { return info.first > 0 || info.second > 0; }); + + if (_perform_pad) + { + _pad_kernel->configure(compile_context, input, output, padding, constant_value, mode); + } + else + { + Window copy_window = Window(); + copy_window.use_tensor_dimensions(output->info()->tensor_shape()); + // Copy the input to the whole output if no padding is applied + _copy_kernel->configure(compile_context, input->info(), output->info(), ©_window); + } +} +Status CLPadLayerEx::validate(const ITensorInfo *input, const ITensorInfo *output, + const PaddingList &padding, PixelValue constant_value, + PaddingMode mode) +{ + bool perform_pad = std::any_of(padding.begin(), padding.end(), [](PaddingInfo info) { + return info.first > 0 || info.second > 0; + }); + + if (perform_pad) + { + ARM_COMPUTE_RETURN_ON_ERROR( + CLPadLayerKernelEx::validate(input, output, padding, constant_value, mode)); + } + else + { + ARM_COMPUTE_RETURN_ON_ERROR(opencl::kernels::ClCopyKernel::validate(input, output)); + } + return Status{}; +} +void CLPadLayerEx::run() +{ + if (_perform_pad) + { + CLScheduler::get().enqueue(*_pad_kernel); + } + else + { + CLScheduler::get().enqueue(*_copy_kernel); + } +} +} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp index c246041bb..6740835a8 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp @@ -61,7 +61,7 @@ Status CLReduceOperation::validate(const ITensorInfo *input, const ITensorInfo * ARM_COMPUTE_RETURN_ERROR_ON(num_of_kernels < 1); // Create temporary tensor infos - auto interm_tensors = support::cpp14::make_unique<TensorInfo[]>(num_of_interm_tensors); + auto interm_tensors = std::make_unique<TensorInfo[]>(num_of_interm_tensors); // Create intermediate tensor info TensorShape shape{input->tensor_shape()}; @@ -124,8 +124,8 @@ void CLReduceOperation::configure(ICLTensor *input, ICLTensor *output, throw std::runtime_error("CLReduceOperation: there is no axis to reduce"); } - _interm_tensors = support::cpp14::make_unique<CLTensor[]>(num_of_interm_tensors); - _reduce_kernels = support::cpp14::make_unique<CLReduceOperationKernel[]>(num_of_kernels); + _interm_tensors = std::make_unique<CLTensor[]>(num_of_interm_tensors); + _reduce_kernels = std::make_unique<CLReduceOperationKernel[]>(num_of_kernels); // Set a vector that is ordered ICLTensors sequentially. std::vector<ICLTensor *> tensors; diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLSplitVEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLSplitVEx.cpp index 12c0aa829..73f5f6eb1 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLSplitVEx.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLSplitVEx.cpp @@ -47,6 +47,7 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/helpers/AutoConfiguration.h" #include <cassert> using namespace arm_compute; diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp index 0754fd813..f3f093c18 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp @@ -79,7 +79,7 @@ void CLTransposeConvLayer::configure(const CLCompileContext &compile_context, IC { case DeconvolutionMethod::DIRECT: { - auto f = arm_compute::support::cpp14::make_unique<CLDirectTransposeConvLayer>(); + auto f = std::make_unique<CLDirectTransposeConvLayer>(); f->configure(compile_context, input, weights, bias, output, deconv_info, invalid_right, invalid_bottom, weights_info); _function = std::move(f); @@ -87,7 +87,7 @@ void CLTransposeConvLayer::configure(const CLCompileContext &compile_context, IC } case DeconvolutionMethod::GEMM: { - auto f = arm_compute::support::cpp14::make_unique<CLGEMMDeconvolutionLayer>(_memory_manager); + auto f = std::make_unique<CLGEMMDeconvolutionLayer>(_memory_manager); f->configure(compile_context, input, weights, bias, output, deconv_info); _function = std::move(f); break; |