diff options
Diffstat (limited to 'compute/ARMComputeEx/src/runtime/CL/functions')
19 files changed, 376 insertions, 993 deletions
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLArgOperation.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLArgOperation.cpp deleted file mode 100644 index 2d379cf36..000000000 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLArgOperation.cpp +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/CL/functions/CLArgOperation.h" - -#include "arm_compute/core/CL/kernels/CLArgOperationKernel.h" -#include "arm_compute/runtime/CL/CLScheduler.h" - -namespace arm_compute -{ - -CLArgOperation::CLArgOperation() -{ - // DO NOTHING -} - -void CLArgOperation::configure(ICLTensor *input, ICLTensor *output, std::vector<uint32_t> axis, - ArgOperation op) -{ - ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), axis, output->info(), op)); - _input = input; - _output = output; - _axis = axis; - _arg_op = op; - // NOTE The argminmax_axis must have no duplication. - _num_of_kernels = axis.size(); - const size_t num_of_interm_tensors = _num_of_kernels - 1; - - _interm_tensors = arm_compute::support::cpp14::make_unique<CLTensor[]>(num_of_interm_tensors); - _argop_kernels = - arm_compute::support::cpp14::make_unique<CLArgOperationKernel[]>(_num_of_kernels); - - TensorShape shape{input->info()->tensor_shape()}; - for (size_t i = 0; i < num_of_interm_tensors; i++) - { - shape.set(_axis[i], 1); - _interm_tensors[i].allocator()->init( - TensorInfo(shape, input->info()->num_channels(), input->info()->data_type()) - .set_data_layout(input->info()->data_layout())); - _interm_tensors[i].allocator()->allocate(); - } - - // Set a vector that is ordered ICLTensors sequentially. - std::vector<ICLTensor *> tensors; - tensors.emplace_back(input); - for (size_t i = 0; i < num_of_interm_tensors; i++) - { - tensors.emplace_back(_interm_tensors.get() + i); - } - tensors.emplace_back(output); - - // Apply ArgMinMax on all kernels - for (size_t i = 0; i < _num_of_kernels; i++) - { - _argop_kernels[i].configure(tensors[i], tensors[i + 1], _axis[i], op); - } -} - -Status CLArgOperation::validate(const ITensorInfo *input, const std::vector<uint32_t> &axis, - const ITensorInfo *output, ArgOperation op) -{ - const size_t num_of_kernels = axis.size(); - const size_t num_of_interm_tensors = num_of_kernels - 1; - - // Create temporary tensor infos - auto interm_tensors = - arm_compute::support::cpp14::make_unique<TensorInfo[]>(num_of_interm_tensors); - - // Create intermediate tensor info - TensorShape shape{input->tensor_shape()}; - - for (size_t i = 0; i < num_of_interm_tensors; i++) - { - shape.set(axis[i], 1); - interm_tensors[i].set_data_type(input->data_type()); - interm_tensors[i].set_tensor_shape(shape); - interm_tensors[i].set_num_channels(input->num_channels()); - } - - // Set a vector that is ordered ITensorInfo sequentially. - std::vector<const ITensorInfo *> tensors; - tensors.emplace_back(input); - for (size_t i = 0; i < num_of_interm_tensors; i++) - { - tensors.emplace_back(interm_tensors.get() + i); - } - tensors.emplace_back(output); - - // Validate argminmax only on all kernels - for (size_t i = 0; i < num_of_kernels; i++) - { - ARM_COMPUTE_RETURN_ON_ERROR( - CLArgOperationKernel::validate(tensors[i], tensors[i + 1], axis[i], op)); - } - - return Status{}; -} - -void CLArgOperation::run() -{ - for (size_t i = 0; i < _num_of_kernels; ++i) - { - CLScheduler::get().enqueue(_argop_kernels[i]); - } -} - -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp index 92ee69a36..e5122ab8f 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp @@ -48,7 +48,7 @@ using namespace arm_compute; void CLBinaryLogicalOp::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, BinaryLogicalOperation op) { - auto k = arm_compute::support::cpp14::make_unique<CLBinaryLogicalOpKernel>(); + auto k = support::cpp14::make_unique<CLBinaryLogicalOpKernel>(); k->configure(input1, input2, output, op); _kernel = std::move(k); diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLCast.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLCast.cpp deleted file mode 100644 index b3118f39e..000000000 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLCast.cpp +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/CL/functions/CLCast.h" - -#include "arm_compute/core/CL/kernels/CLCastKernel.h" - -using namespace arm_compute; - -void CLCast::configure(ICLTensor *input, ICLTensor *output, SubDataType input_subtype) -{ - auto k = arm_compute::support::cpp14::make_unique<CLCastKernel>(); - k->configure(input, output, input_subtype); - _kernel = std::move(k); -} diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLDepthToSpace.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLDepthToSpace.cpp deleted file mode 100644 index db662505a..000000000 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLDepthToSpace.cpp +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/CL/functions/CLDepthToSpace.h" - -#include "arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h" - -using namespace arm_compute; - -void CLDepthToSpace::configure(ICLTensor *input, ICLTensor *output, const int32_t block_size) -{ - auto k = arm_compute::support::cpp14::make_unique<CLDepthToSpaceKernel>(); - k->configure(input, output, block_size); - _kernel = std::move(k); -} diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLDirectTransposeConvLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLDirectTransposeConvLayer.cpp new file mode 100644 index 000000000..3dede0562 --- /dev/null +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLDirectTransposeConvLayer.cpp @@ -0,0 +1,267 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Copyright (c) 2019-2020 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/UtilsEx.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/utils/misc/ShapeCalculatorEx.h" +#include "arm_compute/runtime/CL/CLScheduler.h" + +#include <memory> +#include <tuple> + +namespace arm_compute +{ +using namespace arm_compute::misc::shape_calculator; + +CLDirectTransposeConvLayer::CLDirectTransposeConvLayer( + std::shared_ptr<IMemoryManager> memory_manager) // NOLINT + : _memory_group(std::move(memory_manager)), + _scale_f(), + _conv_f(), + _flip_weights(), + _scaled_output(), + _original_weights(nullptr), + _weights_flipped(), + _flip_axis(), + _is_prepared(false) +{ +} + +Status CLDirectTransposeConvLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, + const ITensorInfo *bias, ITensorInfo *output, + const PadStrideInfo &info, unsigned int invalid_right, + unsigned int invalid_bottom, + const WeightsInfo &weights_info) +{ + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN( + input, 1, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, weights); + const DataLayout data_layout = input->data_layout(); + + const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + const size_t idx_c = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); + + ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) != weights->dimension(idx_h)); + ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) < 1); + + auto out_dims = transposeconv_output_dimensions( + input->dimension(idx_w), input->dimension(idx_h), weights->dimension(idx_w), + weights->dimension(idx_h), info, invalid_right, invalid_bottom); + + const TensorShape output_shape = compute_transposeconv_output_shape(out_dims, *input, *weights); + + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output, weights); + + if (bias != nullptr) + { + if (is_data_type_quantized_asymmetric(input->data_type())) + { + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bias, 1, DataType::S32); + } + else + { + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, bias); + } + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, bias); + } + + ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(idx_w) != output_shape[idx_w], + "Output's width is invalid."); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(idx_h) != output_shape[idx_h], + "Output's height is invalid."); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(idx_c) != output_shape[idx_c], + "Output's depth is invalid."); + + unsigned int pad_left = 0; + unsigned int pad_right = 0; + unsigned int pad_top = 0; + unsigned int pad_bottom = 0; + const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape( + *input, *weights, info, out_dims, invalid_right, invalid_bottom, pad_left, pad_right, pad_top, + pad_bottom); + TensorInfo scale_out_info(input->clone() + ->set_is_resizable(true) + .reset_padding() + .set_tensor_shape(scale_out_shape) + .set_data_layout(data_layout)); + const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL); + + ARM_COMPUTE_RETURN_ON_ERROR(CLDeconvolutionLayerUpsample::validate(input, &scale_out_info, info)); + ARM_COMPUTE_RETURN_ON_ERROR(CLConvolutionLayer::validate(&scale_out_info, weights, bias, output, + conv_info, weights_info)); + + return Status{}; +} + +void CLDirectTransposeConvLayer::configure(ICLTensor *input, ICLTensor *weights, + const ICLTensor *bias, ICLTensor *output, + const PadStrideInfo &info, unsigned int invalid_right, + unsigned int invalid_bottom, + const WeightsInfo &weights_info) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, weights, bias, output, info, + invalid_right, invalid_bottom, weights_info); +} + +void CLDirectTransposeConvLayer::configure(const CLCompileContext &compile_context, + ICLTensor *input, ICLTensor *weights, + const ICLTensor *bias, ICLTensor *output, + const PadStrideInfo &info, unsigned int invalid_right, + unsigned int invalid_bottom, + const WeightsInfo &weights_info) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); + + unsigned int pad_left = 0; + unsigned int pad_right = 0; + unsigned int pad_top = 0; + unsigned int pad_bottom = 0; + const unsigned int stride_x = info.stride().first; + const unsigned int stride_y = info.stride().second; + + const DataLayout data_layout = input->info()->data_layout(); + + const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + + _original_weights = weights; + _flip_axis.allocator()->init(TensorInfo(TensorShape(2U), 1, DataType::U32)); + _weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout)); + _flip_weights.configure(compile_context, weights, &_weights_flipped, &_flip_axis); + + auto out_dims = transposeconv_output_dimensions( + input->info()->dimension(idx_w), input->info()->dimension(idx_h), + weights->info()->dimension(idx_w), weights->info()->dimension(idx_h), info, invalid_right, + invalid_bottom); + + const TensorShape output_shape = + compute_transposeconv_output_shape(out_dims, *input->info(), *weights->info()); + + // Output auto initialization if not yet initialized + auto_init_if_empty( + *output->info(), + input->info()->clone()->set_tensor_shape(output_shape).set_data_layout(data_layout)); + + // Perform validation step + ARM_COMPUTE_ERROR_THROW_ON(CLDirectTransposeConvLayer::validate( + input->info(), weights->info(), bias == nullptr ? nullptr : bias->info(), output->info(), + info, invalid_right, invalid_bottom)); + + _is_prepared = weights_info.retain_internal_weights(); + + _memory_group.manage(&_scaled_output); + + // Find the upsampled dimensions and the padding needed for the convolution with stride 1 in order + // to match output shape + const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape( + *input->info(), *weights->info(), info, out_dims, invalid_right, invalid_bottom, pad_left, + pad_right, pad_top, pad_bottom); + + TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(), + input->info()->quantization_info()); + scale_out_info.set_data_layout(data_layout); + _scaled_output.allocator()->init(scale_out_info); + + // configure scale function + const PadStrideInfo upsample_info(stride_x, stride_y, pad_left, pad_right, pad_top, pad_bottom, + DimensionRoundingType::FLOOR); + _scale_f.configure(input, &_scaled_output, upsample_info); + + // Setup the function to convolve the upscaled output + const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL); + _conv_f.configure(compile_context, &_scaled_output, &_weights_flipped, bias, output, conv_info, + weights_info); + _scaled_output.allocator()->allocate(); + + // Setup flip axis data + _flip_axis.allocator()->allocate(); + _flip_axis.map(true); + auto axis_data = reinterpret_cast<uint32_t *>(_flip_axis.buffer()); + if (weights->info()->data_layout() == DataLayout::NHWC) + { + axis_data[0] = 1; + axis_data[1] = 2; + } + else + { + axis_data[0] = 0; + axis_data[1] = 1; + } + _flip_axis.unmap(); +} + +void CLDirectTransposeConvLayer::run() +{ + prepare(); + + MemoryGroupResourceScope scope_mg(_memory_group); + + _scale_f.run(); + _conv_f.run(); +} + +void CLDirectTransposeConvLayer::prepare() +{ + if (!_is_prepared) + { + ARM_COMPUTE_ERROR_ON(!_original_weights->is_used()); + + // Run weights flipping and mark original weights tensor as unused + _weights_flipped.allocator()->allocate(); + _flip_weights.run(); + _original_weights->mark_as_unused(); + + // Prepare convolution + _conv_f.prepare(); + + // Free flipped weights + if (!_weights_flipped.is_used()) + { + _weights_flipped.allocator()->free(); + } + + _is_prepared = true; + } +} +} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp index 3d9a28a48..ae9d8afc6 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp @@ -47,7 +47,7 @@ using namespace arm_compute; void CLEmbeddingLookup::configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *lookups) { - auto k = arm_compute::support::cpp14::make_unique<CLEmbeddingLookupKernel>(); + auto k = support::cpp14::make_unique<CLEmbeddingLookupKernel>(); k->configure(input, output, lookups); _kernel = std::move(k); } diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp index f098832b0..01989461e 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp @@ -45,7 +45,7 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" -#include "support/ToolchainSupport.h" +#include "support/MemorySupport.h" #include <algorithm> @@ -60,7 +60,7 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I ARM_COMPUTE_UNUSED(weights); ARM_COMPUTE_UNUSED(output); ARM_COMPUTE_RETURN_ON_ERROR( - CLGEMMLowpMatrixMultiplyCoreEx::validate(&input, &weights, nullptr, &output)); + CLGEMMLowpMatrixMultiplyCore::validate(&input, &weights, nullptr, &output)); return Status{}; } @@ -68,7 +68,7 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I void CLFullyConnectedHybridLayerReshapeWeights::configure(const ICLTensor *input, ICLTensor *output) { - auto k = arm_compute::support::cpp14::make_unique<CLTransposeKernel>(); + auto k = support::cpp14::make_unique<CLTransposeKernel>(); k->configure(input, output); _kernel = std::move(k); } @@ -172,7 +172,8 @@ void CLFullyConnectedHybridLayer::configure(const ICLTensor *input, const ICLTen // Quantize input _quantized_input.allocator()->init( - input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S8)); + input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type( + DataType::QASYMM8_SIGNED)); _memory_group.manage(&_quantized_input); _quant_input_kernel.configure(input, &_scale_factor, &_quantized_input); @@ -199,7 +200,7 @@ Status CLFullyConnectedHybridLayer::validate(const ITensorInfo *input, const ITe { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::S8); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QASYMM8_SIGNED); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 2); @@ -256,8 +257,9 @@ Status CLFullyConnectedHybridLayer::validate(const ITensorInfo *input, const ITe ARM_COMPUTE_RETURN_ON_ERROR(CLScaleFactorSymm8Kernel::validate(input, &scale_factor)); // Validate quantization symm8 kernel - const ITensorInfo &quantized_input = TensorInfo( - input->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S8)); + const ITensorInfo &quantized_input = + TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_data_type( + DataType::QASYMM8_SIGNED)); ARM_COMPUTE_RETURN_ON_ERROR( CLQuantizationSymmetricKernel::validate(input, &scale_factor, &quantized_input)); diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp index 63e291b36..2ff4b9659 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp @@ -46,7 +46,7 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" -#include "support/ToolchainSupport.h" +#include "support/MemorySupport.h" #include <algorithm> @@ -141,7 +141,7 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I void CLFullyConnectedLayerReshapeWeightsEx::configure(const ICLTensor *input, ICLTensor *output) { - auto k = arm_compute::support::cpp14::make_unique<CLTransposeKernel>(); + auto k = support::cpp14::make_unique<CLTransposeKernel>(); k->configure(input, output); _kernel = std::move(k); } diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp index 9aebc473e..157b4d977 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp @@ -53,18 +53,21 @@ void CLFullyConnectedReshapingLayer::configure(const arm_compute::ICLTensor *inp fc->configure(input_to_use, _weights, _biases, _output); return std::unique_ptr<arm_compute::IFunction>(fc); } - else + else if (kernel_type == KernelType::PREPROCESSED_WEIGHTS) { - assert(kernel_type == KernelType::PREPROCESSED_WEIGHTS); - bool is_hybrid = (input->info()->data_type() == DataType::F32 || input->info()->data_type() == DataType::F16) && - weights->info()->data_type() == DataType::S8; + (weights->info()->data_type() == DataType::S8 || + weights->info()->data_type() == DataType::QASYMM8_SIGNED); if (is_hybrid) { auto fc = new arm_compute::CLFullyConnectedHybridLayer{_memory_manager}; + ITensorInfo *weights_info = const_cast<ITensorInfo *>(_weights->info()); + const auto orgin_weights_data_type = weights_info->data_type(); + weights_info->set_data_type(DataType::QASYMM8_SIGNED); fc->configure(input_to_use, _weights, _biases, _output); + weights_info->set_data_type(orgin_weights_data_type); return std::unique_ptr<arm_compute::IFunction>(fc); } else @@ -74,6 +77,11 @@ void CLFullyConnectedReshapingLayer::configure(const arm_compute::ICLTensor *inp return std::unique_ptr<arm_compute::IFunction>(fc); } } + else + { + throw std::runtime_error("CLFullyConnectedReshapingLayer: Unsupported kernel type"); + } + }(); if (_needs_reshape) diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.cpp deleted file mode 100644 index ca5499dfc..000000000 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.cpp +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.h" - -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/core/utils/quantization/AsymmHelpers.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "arm_compute/runtime/MemoryGroup.h" - -namespace arm_compute -{ -using namespace arm_compute::misc::shape_calculator; -using namespace arm_compute::cl_gemm; - -namespace -{ -inline bool is_gemm_reshaped(bool reshape_b_only_on_first_run, GPUTarget gpu_target) -{ - return (get_arch_from_target(gpu_target) != GPUTarget::MIDGARD) && (reshape_b_only_on_first_run); -} -} // namespace - -CLGEMMLowpMatrixMultiplyCoreEx::CLGEMMLowpMatrixMultiplyCoreEx( - std::shared_ptr<IMemoryManager> memory_manager) - : _memory_group(std::move(memory_manager)), _mm_midgard_kernel(), _mtx_a_reduction_kernel(), - _mtx_b_reduction_kernel(), _vector_sum_col(), _vector_sum_row(), _a_offset(0), _b_offset(0), - _reshape_b_only_on_first_run(false), _is_prepared(false) -{ -} - -void CLGEMMLowpMatrixMultiplyCoreEx::configure(const ICLTensor *a, const ICLTensor *b, - const ICLTensor *c, ICLTensor *output, - const GEMMInfo &gemm_info) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(a, b, output); - ARM_COMPUTE_UNUSED(c); - ARM_COMPUTE_ERROR_THROW_ON(CLGEMMLowpMatrixMultiplyCoreEx::validate( - a->info(), b->info(), c != nullptr ? c->info() : nullptr, output->info(), gemm_info)); - - _is_prepared = false; - _reshape_b_only_on_first_run = gemm_info.reshape_b_only_on_first_run(); - _a_offset = a->info()->quantization_info().uniform().offset; - _b_offset = b->info()->quantization_info().uniform().offset; - - // Get the GPU target - const GPUTarget gpu_target = CLScheduler::get().target(); - - // Set the target for the kernels - _mm_midgard_kernel.set_target(gpu_target); - - // GEMMRHSMatrixInfo rhs_info; - // GEMMLHSMatrixInfo lhs_info; - - // Arguments used by GEMMReshapeInfo - // If we pass the matrix A and matrix B reshaped to CLGEMMMatrixMultiplyKernel, we need to pass m, - // n, k, mult_transpose1xW_width and mult_interleave4x4_height to CLGEMMReshapeInfo - // in order to know how the matrices have been reshaped - bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d(); - const unsigned int m = reinterpret_input_as_3d - ? (a->info()->dimension(1) * a->info()->dimension(2)) - : a->info()->dimension(1); - const unsigned int n = b->info()->dimension(0); - const unsigned int k = a->info()->dimension(0); - const int depth_output_gemm3d = gemm_info.depth_output_gemm3d(); - - const ICLTensor *matrix_b = b; - // Configure matrix multiply kernel - _mm_midgard_kernel.configure( - a, matrix_b, output, - GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d)); -} - -Status CLGEMMLowpMatrixMultiplyCoreEx::validate(const ITensorInfo *a, const ITensorInfo *b, - const ITensorInfo *c, const ITensorInfo *output, - const GEMMInfo &gemm_info) -{ - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(a, 1, DataType::S8); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(a, b); - ARM_COMPUTE_UNUSED(c); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_a_reshaped(), - "Matrix A already reshaped is not supported"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_b_reshaped(), - "Matrix B already reshaped is not supported"); - - const ITensorInfo *matrix_a_info = a; - - // Get the GPU target - const GPUTarget gpu_target = CLScheduler::get().target(); - - bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d(); - const unsigned int m = - reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1); - const unsigned int n = b->dimension(0); - const unsigned int k = a->dimension(0); - const int depth_output_gemm3d = gemm_info.depth_output_gemm3d(); - - bool reshape_matrix_b = is_gemm_reshaped(gemm_info.reshape_b_only_on_first_run(), gpu_target); - - const GEMMReshapeInfo reshape_info = - GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d); - - TensorInfo weights_info(*b); - const ITensorInfo *matrix_b_info = &weights_info; - if (reshape_matrix_b) - { - ARM_COMPUTE_RETURN_ERROR_ON_MSG(false, - "CLGEMMLowpMatrixMultiplyCoreEx does not support reshape_b"); - } - - // Validate matrix multiply - ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixMultiplyKernelEx::validate( - matrix_a_info, matrix_b_info, output, reshape_info)); - - return Status{}; -} - -void CLGEMMLowpMatrixMultiplyCoreEx::run() -{ - prepare(); - - MemoryGroupResourceScope scope_mg(_memory_group); - - // Run matrix multiply - CLScheduler::get().enqueue(_mm_midgard_kernel, false); -} - -void CLGEMMLowpMatrixMultiplyCoreEx::prepare() -{ - if (!_is_prepared) - { - _is_prepared = true; - } -} -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp index f594d7a2e..e0b833b04 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp @@ -48,7 +48,7 @@ using namespace arm_compute; void CLGatherEx::configure(const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis) { - auto k = arm_compute::support::cpp14::make_unique<CLGatherExKernel>(); + auto k = support::cpp14::make_unique<CLGatherExKernel>(); k->configure(input, indices, output, axis); _kernel = std::move(k); } diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp index 27ed8e828..65b89a389 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp @@ -47,7 +47,7 @@ using namespace arm_compute; void CLHashtableLookup::configure(const ICLTensor *lookups, const ICLTensor *keys, const ICLTensor *input, ICLTensor *output, ICLTensor *hits) { - auto k = arm_compute::support::cpp14::make_unique<CLHashtableLookupKernel>(); + auto k = support::cpp14::make_unique<CLHashtableLookupKernel>(); k->configure(lookups, keys, input, output, hits); _kernel = std::move(k); } diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp index 80393e8d1..5a7e40839 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp @@ -50,7 +50,7 @@ CLInstanceNormalizationLayerEx::CLInstanceNormalizationLayerEx() {} void CLInstanceNormalizationLayerEx::configure(ICLTensor *input, ICLTensor *output, ICLTensor *gamma, ICLTensor *beta, float epsilon) { - auto k = arm_compute::support::cpp14::make_unique<CLInstanceNormalizationLayerKernelEx>(); + auto k = support::cpp14::make_unique<CLInstanceNormalizationLayerKernelEx>(); k->configure(input, output, gamma, beta, epsilon); _kernel = std::move(k); } diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLPReLU.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLPReLU.cpp deleted file mode 100644 index fbb15ab1d..000000000 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLPReLU.cpp +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/CL/functions/CLPReLU.h" - -#include "arm_compute/core/CL/kernels/CLPReLUKernel.h" -#include "arm_compute/core/CL/ICLTensor.h" - -using namespace arm_compute; - -void CLPReLU::configure(ICLTensor *input, ICLTensor *alpha, ICLTensor *output) -{ - auto k = arm_compute::support::cpp14::make_unique<CLPReLUKernel>(); - k->configure(input, alpha, output); - _kernel = std::move(k); - - if (output->info()->dimension(0) > 1) - { - ICLTensor *broadcasted_info = (input->info()->dimension(0) == 1) ? input : alpha; - - if (broadcasted_info->info()->dimension(0) == 1) - { - _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE); - } - } -} diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLRNNLayerEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLRNNLayerEx.cpp deleted file mode 100644 index 6049b7e70..000000000 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLRNNLayerEx.cpp +++ /dev/null @@ -1,163 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/CL/functions/CLRNNLayerEx.h" - -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "support/ToolchainSupport.h" - -#include <utility> - -using namespace arm_compute; -using namespace arm_compute::misc::shape_calculator; - -CLRNNLayerEx::CLRNNLayerEx(std::shared_ptr<IMemoryManager> memory_manager) - : _memory_group(std::move(memory_manager)), _gemm_state_f(), _add_kernel(), - _activation_kernel(), _fully_connected_kernel(), _copy_kernel(), _fully_connected_out(), - _gemm_output(), _add_output(), _is_prepared(false) -{ -} - -Status CLRNNLayerEx::validate(const ITensorInfo *input, const ITensorInfo *weights, - const ITensorInfo *recurrent_weights, const ITensorInfo *bias, - const ITensorInfo *hidden_state, const ITensorInfo *output, - const ActivationLayerInfo &info) -{ - const int idx_width = 0; - const int idx_height = 1; - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, recurrent_weights, bias, hidden_state, - output); - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(idx_width) != weights->dimension(idx_width)); - ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_height) != - recurrent_weights->dimension(idx_width)); - ARM_COMPUTE_RETURN_ERROR_ON(recurrent_weights->dimension(idx_width) != - recurrent_weights->dimension(1)); - ARM_COMPUTE_RETURN_ERROR_ON(bias->num_dimensions() != 1); - ARM_COMPUTE_RETURN_ERROR_ON(bias->dimension(idx_width) != weights->dimension(idx_height)); - ARM_COMPUTE_RETURN_ERROR_ON(hidden_state->dimension(idx_width) != weights->dimension(idx_height)); - ARM_COMPUTE_RETURN_ERROR_ON(hidden_state->dimension(idx_height) != input->dimension(idx_height)); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), - hidden_state->tensor_shape()); - - auto shape_info = - TensorInfo(compute_rnn_shape(recurrent_weights, hidden_state->dimension(idx_height)), 1, - input->data_type()); - - ARM_COMPUTE_RETURN_ON_ERROR(CLFullyConnectedLayer::validate(input, weights, bias, &shape_info)); - ARM_COMPUTE_RETURN_ON_ERROR( - CLGEMM::validate(hidden_state, recurrent_weights, nullptr, &shape_info, 1.f, 0.f)); - ARM_COMPUTE_RETURN_ON_ERROR(CLSaturatedArithmeticOperationKernel::validate( - ArithmeticOperation::ADD, &shape_info, &shape_info, &shape_info, ConvertPolicy::SATURATE)); - ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayerKernel::validate(&shape_info, &shape_info, info)); - - return Status{}; -} - -void CLRNNLayerEx::configure(const ICLTensor *input, const ICLTensor *weights, - const ICLTensor *recurrent_weights, const ICLTensor *bias, - ICLTensor *hidden_state, ICLTensor *output, ActivationLayerInfo &info) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, recurrent_weights, bias, hidden_state, output); - ARM_COMPUTE_ERROR_THROW_ON(CLRNNLayerEx::validate(input->info(), weights->info(), - recurrent_weights->info(), bias->info(), - hidden_state->info(), output->info(), info)); - - const int idx_height = 1; - TensorShape shape = - compute_rnn_shape(recurrent_weights->info(), hidden_state->info()->dimension(idx_height)); - - _is_prepared = false; - - _fully_connected_out.allocator()->init(TensorInfo(shape, 1, input->info()->data_type())); - _gemm_output.allocator()->init(TensorInfo(shape, 1, input->info()->data_type())); - - // Manage intermediate buffers and configure - _memory_group.manage(&_fully_connected_out); - _fully_connected_kernel.configure(input, weights, bias, &_fully_connected_out); - - _memory_group.manage(&_gemm_output); - _gemm_state_f.configure(hidden_state, recurrent_weights, nullptr, &_gemm_output, 1.f, 0.f); - - _add_output.allocator()->init(TensorInfo(shape, 1, input->info()->data_type())); - _memory_group.manage(&_add_output); - - _add_kernel.configure(ArithmeticOperation::ADD, &_fully_connected_out, &_gemm_output, - &_add_output, ConvertPolicy::SATURATE); - - _fully_connected_out.allocator()->allocate(); - _gemm_output.allocator()->allocate(); - - _activation_kernel.configure(&_add_output, hidden_state, info); - _add_output.allocator()->allocate(); - - _copy_kernel.configure(hidden_state, output); -} - -void CLRNNLayerEx::run() -{ - prepare(); - - _memory_group.acquire(); - - _fully_connected_kernel.run(); - _gemm_state_f.run(); - CLScheduler::get().enqueue(_add_kernel); - CLScheduler::get().enqueue(_activation_kernel); - - // copy hidden out to output - CLScheduler::get().enqueue(_copy_kernel); - - _memory_group.release(); -} - -void CLRNNLayerEx::prepare() -{ - if (!_is_prepared) - { - _fully_connected_kernel.prepare(); - _gemm_state_f.prepare(); - - _is_prepared = true; - } -} diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp index 8ce2d746c..a41e6db60 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp @@ -60,8 +60,7 @@ Status CLReduceOperation::validate(const ITensorInfo *input, const ITensorInfo * const size_t num_of_interm_tensors = num_of_kernels - (keep_dims ? 1 : 0); // Create temporary tensor infos - auto interm_tensors = - arm_compute::support::cpp14::make_unique<TensorInfo[]>(num_of_interm_tensors); + auto interm_tensors = support::cpp14::make_unique<TensorInfo[]>(num_of_interm_tensors); // Create intermediate tensor info TensorShape shape{input->tensor_shape()}; @@ -119,9 +118,8 @@ void CLReduceOperation::configure(ICLTensor *input, ICLTensor *output, const size_t num_of_kernels = axis.size(); const size_t num_of_interm_tensors = num_of_kernels - (keep_dims ? 1 : 0); - _interm_tensors = arm_compute::support::cpp14::make_unique<CLTensor[]>(num_of_interm_tensors); - _reduce_kernels = - arm_compute::support::cpp14::make_unique<CLReduceOperationKernel[]>(num_of_kernels); + _interm_tensors = support::cpp14::make_unique<CLTensor[]>(num_of_interm_tensors); + _reduce_kernels = support::cpp14::make_unique<CLReduceOperationKernel[]>(num_of_kernels); // Set a vector that is ordered ICLTensors sequentially. std::vector<ICLTensor *> tensors; diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLSpaceToDepth.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLSpaceToDepth.cpp deleted file mode 100644 index 7d7b2264b..000000000 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLSpaceToDepth.cpp +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/CL/functions/CLSpaceToDepth.h" - -#include "arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h" - -using namespace arm_compute; - -void CLSpaceToDepth::configure(ICLTensor *input, ICLTensor *output, const int32_t block_size) -{ - auto k = arm_compute::support::cpp14::make_unique<CLSpaceToDepthKernel>(); - k->configure(input, output, block_size); - _kernel = std::move(k); -} diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp index e61746ef2..3215d01a7 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp @@ -15,7 +15,7 @@ */ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -37,218 +37,124 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ - #include "arm_compute/runtime/CL/functions/CLTransposeConvLayer.h" -#include "arm_compute/core/utils/misc/ShapeCalculatorEx.h" -#include "arm_compute/core/Helpers.h" #include "arm_compute/core/Utils.h" -#include "arm_compute/core/UtilsEx.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" -#include "arm_compute/runtime/CPP/CPPScheduler.h" +#include <cmath> #include <memory> #include <tuple> using namespace arm_compute; using namespace arm_compute::misc::shape_calculator; -CLTransposeConvLayer::CLTransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT - : _memory_group(std::move(memory_manager)), - _scale_f(), - _conv_f(), - _flip_weights(), - _scaled_output(), - _original_weights(nullptr), - _weights_flipped(), - _is_prepared(false) +CLTransposeConvLayer::CLTransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager) + : _memory_manager(std::move(memory_manager)), _function() +{ +} + +void CLTransposeConvLayer::configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, + ICLTensor *output, const PadStrideInfo &deconv_info, + unsigned int invalid_right, unsigned int invalid_bottom, + const WeightsInfo &weights_info) { + configure(CLKernelLibrary::get().get_compile_context(), input, weights, bias, output, deconv_info, + invalid_right, invalid_bottom, weights_info); +} + +void CLTransposeConvLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, + ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, + const PadStrideInfo &deconv_info, unsigned int invalid_right, + unsigned int invalid_bottom, const WeightsInfo &weights_info) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); + + switch (CLTransposeConvLayer::get_deconvolution_method(input->info(), weights->info(), nullptr, + output->info(), deconv_info, invalid_right, + invalid_bottom, weights_info)) + { + case DeconvolutionMethod::DIRECT: + { + auto f = arm_compute::support::cpp14::make_unique<CLDirectTransposeConvLayer>(); + f->configure(compile_context, input, weights, bias, output, deconv_info, invalid_right, + invalid_bottom, weights_info); + _function = std::move(f); + break; + } + case DeconvolutionMethod::GEMM: + { + auto f = arm_compute::support::cpp14::make_unique<CLGEMMDeconvolutionLayer>(_memory_manager); + f->configure(compile_context, input, weights, bias, output, deconv_info); + _function = std::move(f); + break; + } + default: + ARM_COMPUTE_ERROR("Not supported."); + break; + } } Status CLTransposeConvLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, ITensorInfo *output, - const PadStrideInfo &info, unsigned int invalid_right, + const PadStrideInfo &deconv_info, unsigned int invalid_right, unsigned int invalid_bottom, const WeightsInfo &weights_info) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, - DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, weights); - - const DataLayout data_layout = input->data_layout(); - - const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); - const size_t idx_c = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); - - ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) != weights->dimension(idx_h)); - ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) < 1); - - const unsigned int kernel_x = weights->dimension(idx_w); - const unsigned int kernel_y = weights->dimension(idx_h); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(invalid_right > kernel_x - 1, - "invalid_right must be smaller than kernel_x"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(invalid_bottom > kernel_y - 1, - "inner_border_top must be smaller than kernel_y"); - - // NOTE From the existing CLDeconvolutionLayer, invalid_right and invalid_bottom were added. - auto out_dims = transposeconv_output_dimensions( - input->dimension(idx_w), input->dimension(idx_h), weights->dimension(idx_w), - weights->dimension(idx_h), info, invalid_right, invalid_bottom); - - const TensorShape output_shape = compute_transposeconv_output_shape(out_dims, *input, *weights); - - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output, weights); - - if (bias != nullptr) + switch (CLTransposeConvLayer::get_deconvolution_method( + input, weights, bias, output, deconv_info, invalid_right, invalid_bottom, weights_info)) { - if (is_data_type_quantized_asymmetric(input->data_type())) + case DeconvolutionMethod::DIRECT: { - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bias, 1, DataType::S32); + // Validate direct convolution layer + ARM_COMPUTE_RETURN_ON_ERROR(CLDirectTransposeConvLayer::validate( + input, weights, bias, output, deconv_info, invalid_right, invalid_bottom, weights_info)); + break; } - else + case DeconvolutionMethod::GEMM: { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, bias); + // Validate gemm-based convolution layer + ARM_COMPUTE_RETURN_ON_ERROR( + CLGEMMDeconvolutionLayer::validate(input, weights, bias, output, deconv_info)); + break; } - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, bias); + default: + ARM_COMPUTE_ERROR("Not supported."); + break; } - ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(idx_w) != output_shape[idx_w], - "Output's width is invalid."); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(idx_h) != output_shape[idx_h], - "Output's height is invalid."); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(idx_c) != output_shape[idx_c], - "Output's depth is invalid."); - - unsigned int pad_left = 0; - unsigned int pad_right = 0; - unsigned int pad_top = 0; - unsigned int pad_bottom = 0; - const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape( - *input, *weights, info, out_dims, invalid_right, invalid_bottom, pad_left, pad_right, pad_top, - pad_bottom); - TensorInfo scale_out_info(input->clone() - ->set_is_resizable(true) - .reset_padding() - .set_tensor_shape(scale_out_shape) - .set_data_layout(data_layout)); - const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL); - - ARM_COMPUTE_RETURN_ON_ERROR( - CLTransposeConvLayerUpsample::validate(input, &scale_out_info, BorderSize(0, 0), info)); - ARM_COMPUTE_RETURN_ON_ERROR(CLConvolutionLayer::validate(&scale_out_info, weights, bias, output, - conv_info, weights_info)); - return Status{}; } -void CLTransposeConvLayer::configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, - ICLTensor *output, const PadStrideInfo &info, - unsigned int invalid_right, unsigned int invalid_bottom, - const WeightsInfo &weights_info) +DeconvolutionMethod CLTransposeConvLayer::get_deconvolution_method( + const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, + ITensorInfo *output, const PadStrideInfo &deconv_info, unsigned int invalid_right, + unsigned int invalid_bottom, const WeightsInfo &weights_info) { - ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); - - const unsigned int stride_x = info.stride().first; - const unsigned int stride_y = info.stride().second; + ARM_COMPUTE_UNUSED(output, bias, weights_info); - const DataLayout data_layout = input->info()->data_layout(); + const DataLayout data_layout = input->data_layout(); const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); - _original_weights = weights; - _weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout)); - _flip_weights.configure(weights, &_weights_flipped); - - // NOTE From the existing CLDeconvolutionLayer, invalid_right and invalid_bottom were - // added. - auto out_dims = transposeconv_output_dimensions( - input->info()->dimension(idx_w), input->info()->dimension(idx_h), - weights->info()->dimension(idx_w), weights->info()->dimension(idx_h), info, invalid_right, - invalid_bottom); - - const TensorShape output_shape = - compute_transposeconv_output_shape(out_dims, *input->info(), *weights->info()); - - // Output auto initialization if not yet initialized - auto_init_if_empty( - *output->info(), - input->info()->clone()->set_tensor_shape(output_shape).set_data_layout(data_layout)); - - // Perform validation step - ARM_COMPUTE_ERROR_THROW_ON(CLTransposeConvLayer::validate( - input->info(), weights->info(), bias == nullptr ? nullptr : bias->info(), output->info(), - info, invalid_right, invalid_bottom)); - - _is_prepared = weights_info.retain_internal_weights(); - - _memory_group.manage(&_scaled_output); - - // Find the upsampled dimensions and the padding needed for the convolution with stride 1 in order - // to match output shape - unsigned int pad_left = 0; - unsigned int pad_right = 0; - unsigned int pad_top = 0; - unsigned int pad_bottom = 0; - const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape( - *input->info(), *weights->info(), info, out_dims, invalid_right, invalid_bottom, pad_left, - pad_right, pad_top, pad_bottom); - - TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(), - input->info()->quantization_info()); - scale_out_info.set_data_layout(data_layout); - _scaled_output.allocator()->init(scale_out_info); - - // configure scale function - const PadStrideInfo upsample_info(stride_x, stride_y, pad_left, pad_right, pad_top, pad_bottom, - DimensionRoundingType::FLOOR); - _scale_f.configure(input, &_scaled_output, BorderSize(0, 0), upsample_info); - - // setup the function to convolve the upscaled output - const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL); - _conv_f.configure(&_scaled_output, &_weights_flipped, bias, output, conv_info, weights_info); - _scaled_output.allocator()->allocate(); + if (weights->dimension(idx_w) != deconv_info.stride().first || + weights->dimension(idx_h) != deconv_info.stride().second || invalid_right != 0 || + invalid_bottom != 0) + { + return DeconvolutionMethod::DIRECT; + } + + return DeconvolutionMethod::GEMM; } void CLTransposeConvLayer::run() { prepare(); - - _memory_group.acquire(); - - _scale_f.run(); - _conv_f.run(); - - _memory_group.release(); + _function->run(); } -void CLTransposeConvLayer::prepare() -{ - if (!_is_prepared) - { - ARM_COMPUTE_ERROR_ON(!_original_weights->is_used()); - - // Run weights flipping and mark original weights tensor as unused - _weights_flipped.allocator()->allocate(); - _weights_flipped.map(true); - _original_weights->map(CLScheduler::get().queue(), true); - CPPScheduler::get().schedule(&_flip_weights, Window::DimZ); - _weights_flipped.unmap(); - _original_weights->unmap(CLScheduler::get().queue()); - _original_weights->mark_as_unused(); - - // Prepare convolution - _conv_f.prepare(); - - if (!_weights_flipped.is_used()) - { - _weights_flipped.allocator()->free(); - } - - _is_prepared = true; - } -} +void CLTransposeConvLayer::prepare() { _function->prepare(); } diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayerUpsample.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayerUpsample.cpp deleted file mode 100644 index 07feb5a64..000000000 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayerUpsample.cpp +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h" - -#include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "arm_compute/core/CL/ICLTensor.h" - -#include <cmath> -#include <memory> -#include <tuple> - -using namespace arm_compute; - -CLTransposeConvLayerUpsample::CLTransposeConvLayerUpsample() // NOLINT - : _upsample(), - _output(nullptr) -{ -} - -Status CLTransposeConvLayerUpsample::validate(const ITensorInfo *input, const ITensorInfo *output, - const BorderSize &inner_border, - const PadStrideInfo &info) -{ - return CLTransposeConvLayerUpsampleKernel::validate(input, output, inner_border, info); -} - -void CLTransposeConvLayerUpsample::configure(ICLTensor *input, ICLTensor *output, - const BorderSize &inner_border, - const PadStrideInfo &info) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - - _output = output; - _upsample.configure(input, _output, inner_border, info); -} - -void CLTransposeConvLayerUpsample::run() -{ - _output->map(CLScheduler::get().queue(), true); - if (is_data_type_quantized_asymmetric(_output->info()->data_type())) - { - const uint8_t quantized_zero = _output->info()->quantization_info().uniform().offset; - std::fill_n(_output->buffer(), _output->info()->total_size(), quantized_zero); - } - else - { - memset(_output->buffer(), 0, _output->info()->total_size()); - } - _output->unmap(CLScheduler::get().queue()); - - CLScheduler::get().enqueue(_upsample, false); -} |