diff options
Diffstat (limited to 'compute/ARMComputeEx/src/runtime/NEON')
16 files changed, 92 insertions, 1392 deletions
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEActivationLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEActivationLayerEx.cpp index ff81ff854..2752eb6aa 100644 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEActivationLayerEx.cpp +++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEActivationLayerEx.cpp @@ -42,7 +42,7 @@ #include "arm_compute/core/NEON/kernels/NEActivationLayerKernelEx.h" #include "arm_compute/runtime/IRuntimeContext.h" -#include "support/ToolchainSupport.h" +#include "support/MemorySupport.h" namespace arm_compute { @@ -53,7 +53,7 @@ NEActivationLayerEx::NEActivationLayerEx(IRuntimeContext *ctx) // NOLINT void NEActivationLayerEx::configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info) { - auto k = arm_compute::support::cpp14::make_unique<NEActivationLayerKernelEx>(); + auto k = support::cpp14::make_unique<NEActivationLayerKernelEx>(); k->configure(input, output, activation_info); _kernel = std::move(k); } diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp index e42c453cf..2fc94b267 100644 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp +++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp @@ -42,7 +42,7 @@ #include <arm_compute/core/NEON/kernels/NEBinaryLogicalOperationKernel.h> #include "arm_compute/core/ITensor.h" -#include "support/ToolchainSupport.h" +#include "support/MemorySupport.h" #include <utility> @@ -53,7 +53,7 @@ template <BinaryLogicalOperation COP> void NEBinaryLogicalOperationStatic<COP>::configure(ITensor *input1, ITensor *input2, ITensor *output) { - auto k = arm_compute::support::cpp14::make_unique<NEBinaryLogicalOperationKernel>(); + auto k = support::cpp14::make_unique<NEBinaryLogicalOperationKernel>(); k->configure(COP, input1, input2, output); _kernel = std::move(k); } @@ -69,7 +69,7 @@ Status NEBinaryLogicalOperationStatic<COP>::validate(const ITensorInfo *input1, void NEBinaryLogicalOperation::configure(ITensor *input1, ITensor *input2, ITensor *output, BinaryLogicalOperation op) { - auto k = arm_compute::support::cpp14::make_unique<NEBinaryLogicalOperationKernel>(); + auto k = support::cpp14::make_unique<NEBinaryLogicalOperationKernel>(); k->configure(op, input1, input2, output); _kernel = std::move(k); } diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NECast.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NECast.cpp deleted file mode 100644 index dc5c62061..000000000 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NECast.cpp +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/NEON/functions/NECast.h" - -#include "arm_compute/core/NEON/kernels/NECastKernel.h" -#include "support/ToolchainSupport.h" - -namespace arm_compute -{ -void NECast::configure(const ITensor *input, ITensor *output, SubDataType input_subtype) -{ - auto k = arm_compute::support::cpp14::make_unique<NECastKernel>(); - k->configure(input, output, input_subtype); - _kernel = std::move(k); -} - -Status NECast::validate(const ITensorInfo *input, const ITensorInfo *output, - SubDataType input_subtype) -{ - return NECastKernel::validate(input, output, input_subtype); -} -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEDepthToSpaceLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEDepthToSpaceLayerEx.cpp deleted file mode 100644 index 5ec0b8677..000000000 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEDepthToSpaceLayerEx.cpp +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/NEON/functions/NEDepthToSpaceLayerEx.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernelEx.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" - -namespace arm_compute -{ -void NEDepthToSpaceLayerEx::configure(const ITensor *input, ITensor *output, int32_t block_shape) -{ - auto k = arm_compute::support::cpp14::make_unique<NEDepthToSpaceLayerKernelEx>(); - k->configure(input, output, block_shape); - _kernel = std::move(k); -} - -Status NEDepthToSpaceLayerEx::validate(const ITensorInfo *input, const ITensorInfo *output, - int32_t block_shape) -{ - return NEDepthToSpaceLayerKernelEx::validate(input, output, block_shape); -} -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp index 53fb15081..e0ab3e025 100644 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp +++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp @@ -41,13 +41,13 @@ #include "arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h" #include "arm_compute/core/NEON/kernels/NEEmbeddingLookupKernel.h" -#include "support/ToolchainSupport.h" +#include "support/MemorySupport.h" using namespace arm_compute; void NEEmbeddingLookup::configure(const ITensor *input, ITensor *output, const ITensor *lookups) { - auto k = arm_compute::support::cpp14::make_unique<NEEmbeddingLookupKernel>(); + auto k = support::cpp14::make_unique<NEEmbeddingLookupKernel>(); k->configure(input, output, lookups); _kernel = std::move(k); } diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp index f45773251..a123439d9 100644 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp +++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp @@ -58,7 +58,7 @@ namespace Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const ITensorInfo &output) { ARM_COMPUTE_RETURN_ON_ERROR( - NEGEMMLowpMatrixMultiplyCoreEx::validate(&input, &weights, nullptr, &output)); + NEGEMMLowpMatrixMultiplyCore::validate(&input, &weights, nullptr, &output)); return Status{}; } @@ -66,7 +66,7 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I void NEFullyConnectedHybridLayerReshapeWeights::configure(const ITensor *input, ITensor *output) { - auto k = arm_compute::support::cpp14::make_unique<NETransposeKernel>(); + auto k = support::cpp14::make_unique<NETransposeKernel>(); k->configure(input, output); _kernel = std::move(k); } @@ -158,7 +158,8 @@ void NEFullyConnectedHybridLayer::configure(const ITensor *input, const ITensor // Quantize input _quantized_input.allocator()->init( - input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S8)); + input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type( + DataType::QASYMM8_SIGNED)); _scale_factor.allocator()->init( TensorInfo(TensorShape{output->info()->dimension(1)}, 1, DataType::F32)); _quant_input_kernel.configure(input, &_quantized_input, &_scale_factor); @@ -186,7 +187,7 @@ Status NEFullyConnectedHybridLayer::validate(const ITensorInfo *input, const ITe ARM_COMPUTE_UNUSED(fc_info.retain_internal_weights); ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::S8); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QASYMM8_SIGNED); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 2); ARM_COMPUTE_RETURN_ERROR_ON(output->num_dimensions() > 2); @@ -224,8 +225,9 @@ Status NEFullyConnectedHybridLayer::validate(const ITensorInfo *input, const ITe ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) != weights_to_use->dimension(1)); // Validate quantization kernel - const ITensorInfo &quantized_input = TensorInfo( - input->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S8)); + const ITensorInfo &quantized_input = + TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_data_type( + DataType::QASYMM8_SIGNED)); const ITensorInfo &scale_factor = TensorInfo(TensorShape{output->dimension(1)}, 1, DataType::F32); ARM_COMPUTE_RETURN_ON_ERROR( NEQuantizationSymmetricKernel::validate(input, &quantized_input, &scale_factor)); diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedReshapingLayer.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedReshapingLayer.cpp index fcac3c7ae..dc6c78478 100644 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedReshapingLayer.cpp +++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedReshapingLayer.cpp @@ -56,12 +56,17 @@ void NEFullyConnectedReshapingLayer::configure(const arm_compute::ITensor *input assert(kernel_type == KernelType::PREPROCESSED_WEIGHTS); bool is_hybrid = input->info()->data_type() == DataType::F32 && - weights->info()->data_type() == DataType::S8; + (weights->info()->data_type() == DataType::S8 || + weights->info()->data_type() == DataType::QASYMM8_SIGNED); if (is_hybrid) { auto fc = new arm_compute::NEFullyConnectedHybridLayer{_memory_manager}; + ITensorInfo *weights_info = const_cast<ITensorInfo *>(_weights->info()); + const auto orgin_weights_data_type = weights_info->data_type(); + weights_info->set_data_type(DataType::QASYMM8_SIGNED); fc->configure(input_to_use, _weights, _biases, _output); + weights_info->set_data_type(orgin_weights_data_type); return std::unique_ptr<arm_compute::IFunction>(fc); } else diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.cpp deleted file mode 100644 index 1290cfd39..000000000 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.cpp +++ /dev/null @@ -1,513 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" -#include "arm_compute/runtime/TensorAllocator.h" -#include "support/ToolchainSupport.h" - -using namespace arm_compute; -using namespace arm_compute::misc::shape_calculator; - -NEGEMMLowpMatrixMultiplyCoreEx::NEGEMMLowpMatrixMultiplyCoreEx( - std::shared_ptr<IMemoryManager> memory_manager) - : _memory_group(memory_manager), _asm_glue(memory_manager), _mm_kernel(nullptr), - _mtx_a_reshape_kernel(nullptr), _mtx_b_reshape_kernel(nullptr), _mtx_a_reduction_kernel(), - _mtx_b_reduction_kernel(), _offset_contribution_kernel(), - _offset_contribution_output_stage_kernel(), _vector_sum_col(), _vector_sum_row(), _tmp_a(), - _tmp_b(), _mm_result_s32(), _signed_a(), _signed_output(), _original_b(nullptr), _a_offset(0), - _b_offset(0), _run_vector_matrix_multiplication(false), _assembly_path(false), - _fused_assembly_path(false), _reshape_b_only_on_first_run(false), _is_prepared(false), - _fuse_output_stage(false), _flip_signedness(false) -{ -} - -void NEGEMMLowpMatrixMultiplyCoreEx::configure(const ITensor *a, const ITensor *b, const ITensor *c, - ITensor *output, const GEMMInfo &gemm_info) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(a, b, output); - ARM_COMPUTE_UNUSED(c); - ARM_COMPUTE_ERROR_THROW_ON(NEGEMMLowpMatrixMultiplyCoreEx::validate( - a->info(), b->info(), c != nullptr ? c->info() : nullptr, output->info(), gemm_info)); - - const ITensor *matrix_a = a; - const ITensor *matrix_b = b; - GEMMInfo info = gemm_info; - - // Clear state - _mtx_a_reshape_kernel = nullptr; - _mtx_b_reshape_kernel = nullptr; - - // Set internal variables - _a_offset = a->info()->quantization_info().uniform().offset; - _b_offset = b->info()->quantization_info().uniform().offset; - _run_vector_matrix_multiplication = a->info()->dimension(1) < 2; - _reshape_b_only_on_first_run = info.reshape_b_only_on_first_run(); - _is_prepared = false; - _fused_assembly_path = false; - _original_b = b; - - const ITensor *a_to_use = a; - - // If GEMMLowpOutputStage != NONE, fuse the offset contribution with the output stage - if (info.gemmlowp_output_stage().type != GEMMLowpOutputStageType::NONE) - { - _fuse_output_stage = true; - _memory_group.manage(&_mm_result_s32); - TensorInfo info_mm_result_s32(output->info()->tensor_shape(), 1, DataType::S32); - _mm_result_s32.allocator()->init(info_mm_result_s32); - } - -#ifdef __aarch64__ - switch (a->info()->data_type()) - { - case DataType::QASYMM8: - case DataType::QASYMM8_SIGNED: - case DataType::U8: - case DataType::S8: - { - if (a_to_use->info()->data_type() == DataType::QASYMM8 && - info.gemmlowp_output_stage().type == GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT) - { - _asm_glue.configure(a_to_use, b, c, output, gemm_info); - _fused_assembly_path = _asm_glue.is_configured(); - } - else - { - _asm_glue.configure(a_to_use, b, nullptr, _fuse_output_stage ? &_mm_result_s32 : output, - gemm_info); - } - _assembly_path = _asm_glue.is_configured(); - break; - } - default: - { - ARM_COMPUTE_ERROR("Datatype not supported"); - break; - } - } -#endif /* __aarch64__ */ - if (!(_assembly_path || _run_vector_matrix_multiplication)) - { - matrix_a = &_tmp_a; - matrix_b = &_tmp_b; - - // The interleaved output matrix will have the following shape: [ a_height * 4, ceil(a_width / - // 4.0f) ] - TensorInfo a_info(compute_interleaved_shape(*a_to_use->info()), 1, - a_to_use->info()->data_type(), a_to_use->info()->quantization_info()); - // The transpose1xW output matrix will have the following shape: [ b_height * 16, ceil(b_width / - // 16.0f) ] - TensorInfo b_info(compute_transpose1xW_shape(*b->info()), 1, b->info()->data_type(), - b->info()->quantization_info()); - _tmp_a.allocator()->init(a_info); - _tmp_b.allocator()->init(b_info); - _memory_group.manage(&_tmp_a); - if (!_reshape_b_only_on_first_run) - { - _memory_group.manage(&_tmp_b); - } - - // Configure interleave kernel - { - auto k = arm_compute::support::cpp14::make_unique<NEGEMMInterleave4x4Kernel>(); - k->configure(a_to_use, &_tmp_a); - _mtx_a_reshape_kernel = std::move(k); - } - - // Configure transpose kernel - { - auto k = arm_compute::support::cpp14::make_unique<NEGEMMTranspose1xWKernel>(); - k->configure(b, &_tmp_b); - _mtx_b_reshape_kernel = std::move(k); - } - } - - if (!_fused_assembly_path) - { - // Initialize matrix B reduction kernel only if _a_offset is not equal to 0 - if (_a_offset != 0) - { - TensorInfo info_vector_sum_col(compute_reductionA_shape(*b->info()), 1, DataType::S32); - - _vector_sum_col.allocator()->init(info_vector_sum_col); - if (!_reshape_b_only_on_first_run) - { - _memory_group.manage(&_vector_sum_col); - } - - // Configure Matrix B reduction kernel - _mtx_b_reduction_kernel.configure(b, &_vector_sum_col, a_to_use->info()->dimension(0), false); - } - - // Initialize Matrix A reduction kernel only if _b_offset is not equal to 0 - if (_b_offset != 0) - { - TensorInfo info_vector_sum_row(compute_reductionB_shape(*a_to_use->info()), 1, DataType::S32); - - _vector_sum_row.allocator()->init(info_vector_sum_row); - _memory_group.manage(&_vector_sum_row); - - // Configure matrix A reduction kernel - _mtx_a_reduction_kernel.configure(a_to_use, &_vector_sum_row, a_to_use->info()->dimension(0), - false); - } - - if (_fuse_output_stage) - { - // Configure matrix multiply kernel - if (!_assembly_path) - { - auto k = arm_compute::support::cpp14::make_unique<NEGEMMLowpMatrixMultiplyKernel>(); - k->configure(matrix_a, matrix_b, &_mm_result_s32); - _mm_kernel = std::move(k); - } - - _offset_contribution_output_stage_kernel.configure( - &_mm_result_s32, _a_offset == 0 ? nullptr : &_vector_sum_col, - _b_offset == 0 ? nullptr : &_vector_sum_row, c, - _flip_signedness ? &_signed_output : output, a->info()->dimension(0), _a_offset, - _b_offset, info.gemmlowp_output_stage()); - } - else - { - // Configure matrix multiply kernel - if (!_assembly_path) - { - auto k = arm_compute::support::cpp14::make_unique<NEGEMMLowpMatrixMultiplyKernel>(); - k->configure(matrix_a, matrix_b, output); - _mm_kernel = std::move(k); - } - // Configure offset contribution kernel - _offset_contribution_kernel.configure(output, _a_offset == 0 ? nullptr : &_vector_sum_col, - _b_offset == 0 ? nullptr : &_vector_sum_row, - a_to_use->info()->dimension(0), _a_offset, _b_offset); - } - } - - // Allocate tensors - if (!_assembly_path && !_run_vector_matrix_multiplication) - { - _tmp_a.allocator()->allocate(); - if (!_reshape_b_only_on_first_run) - { - _tmp_b.allocator()->allocate(); - } - } - - if (!_fused_assembly_path) - { - if (_a_offset != 0 && !_reshape_b_only_on_first_run) - { - _vector_sum_col.allocator()->allocate(); - } - - if (_b_offset != 0) - { - _vector_sum_row.allocator()->allocate(); - } - } - - if (_fuse_output_stage) - { - _mm_result_s32.allocator()->allocate(); - } -} - -Status NEGEMMLowpMatrixMultiplyCoreEx::validate(const ITensorInfo *a, const ITensorInfo *b, - const ITensorInfo *c, const ITensorInfo *output, - const GEMMInfo &gemm_info) -{ - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(a, 1, DataType::S8); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(b, 1, DataType::S8); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::S32); - ARM_COMPUTE_RETURN_ERROR_ON_MSG( - c != nullptr && gemm_info.gemmlowp_output_stage().type == GEMMLowpOutputStageType::NONE, - "Bias addition not supported in NEGEMMLowpMatrixMultiplyCoreEx for output S32"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG((a)->dimension(0) != (b)->dimension(1), - "The product AB is defined only if the number of columns in A is " - "equal to the number of rows in B"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_a_reshaped(), - "Matrix A already reshaped is not supported"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_b_reshaped(), - "Matrix B already reshaped is not supported"); - - GEMMInfo info = gemm_info; - const ITensorInfo *matrix_a_info = a; - const ITensorInfo *matrix_b_info = b; - - const ITensorInfo *a_to_use = a; - - TensorInfo tmp_a_info{}; - TensorInfo tmp_b_info{}; - TensorInfo mm_result_s32_info{}; - - int32_t a_offset = a->quantization_info().uniform().offset; - int32_t b_offset = b->quantization_info().uniform().offset; - - bool fuse_output_stage = info.gemmlowp_output_stage().type != GEMMLowpOutputStageType::NONE; - if (fuse_output_stage) - { - auto_init_if_empty( - mm_result_s32_info, - a->clone()->set_tensor_shape(output->tensor_shape()).set_data_type(DataType::S32)); - } - - // Check if we need to run the optimized assembly kernel - bool run_optimised = false; - bool run_optimised_requantized = false; - if (a_to_use->data_type() == DataType::QASYMM8 && - info.gemmlowp_output_stage().type == GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT) - { - run_optimised = bool(NEGEMMAssemblyDispatch::validate(a_to_use, b, c, output, gemm_info)); - run_optimised_requantized = run_optimised; - } - else - { - run_optimised = bool(NEGEMMAssemblyDispatch::validate( - a_to_use, b, c, fuse_output_stage ? &mm_result_s32_info : output, gemm_info)); - } - - if (run_optimised) - { - ARM_COMPUTE_RETURN_ERROR_ON(b->dimension(0) != output->dimension(0)); - if (info.depth_output_gemm3d() != 0) - { - if (info.reinterpret_input_as_3d()) - { - ARM_COMPUTE_RETURN_ERROR_ON(a->dimension(1) != output->dimension(1)); - ARM_COMPUTE_RETURN_ERROR_ON(a->dimension(2) != output->dimension(2)); - } - else - { - ARM_COMPUTE_RETURN_ERROR_ON(a->dimension(1) != output->dimension(1) * output->dimension(2)); - } - } - else - { - ARM_COMPUTE_RETURN_ERROR_ON(a->dimension(1) != output->dimension(1)); - } - } - else - { - ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.reinterpret_input_as_3d(), - "NEGEMM cannot reinterpret the input tensor as 3D"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.depth_output_gemm3d() != 0, - "NEGEMM cannot reinterpret the output tensor as 3D"); - - const bool run_vector_matrix_multiplication = a->dimension(1) < 2; - if (!run_vector_matrix_multiplication) - { - matrix_a_info = &tmp_a_info; - matrix_b_info = &tmp_b_info; - - // The interleaved output matrix will have the following shape: [ a_height * 4, ceil(a_width / - // 4.0f) ] - TensorShape shape_tmp_a = a->tensor_shape(); - shape_tmp_a.set(0, a->dimension(0) * 4); - shape_tmp_a.set(1, std::ceil(a->dimension(1) / 4.f)); - - // The transpose1xW output matrix will have the following shape: [ b_height * 16, ceil(b_width - // / 16.0f) ] - TensorShape shape_tmp_b = b->tensor_shape(); - shape_tmp_b.set(0, b->dimension(1) * 16); - shape_tmp_b.set(1, std::ceil(b->dimension(0) / 16.f)); - - // Validate interleave kernel - auto_init_if_empty(tmp_a_info, a_to_use->clone()->set_tensor_shape(shape_tmp_a)); - auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(shape_tmp_b)); - - ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMInterleave4x4Kernel::validate(a_to_use, &tmp_a_info)); - ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMTranspose1xWKernel::validate(b, &tmp_b_info)); - } - } - - if (!run_optimised_requantized) - { - TensorInfo info_vector_sum_col{}; - TensorInfo info_vector_sum_row{}; - - // Validate matrix B reduction kernel only if _a_offset is not equal to 0 - if (a_offset != 0) - { - info_vector_sum_col = TensorInfo(compute_reductionA_shape(*b), 1, DataType::S32); - - // Configure Matrix B reduction kernel - ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixBReductionKernel::validate( - b, &info_vector_sum_col, a->dimension(0), false)); - } - - // Validate Matrix A reduction kernel only if _b_offset is not equal to 0 - if (b_offset != 0) - { - info_vector_sum_row = TensorInfo(compute_reductionB_shape(*a), 1, DataType::S32); - - // Configure matrix A reduction kernel - ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixAReductionKernel::validate( - a_to_use, &info_vector_sum_row, a->dimension(0), false)); - } - - if (fuse_output_stage) - { - if (!run_optimised) - { - ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixMultiplyKernel::validate( - matrix_a_info, matrix_b_info, &mm_result_s32_info)); - } - - // Validate offset contribution kernel - ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpOffsetContributionOutputStageKernel::validate( - &mm_result_s32_info, a_offset == 0 ? nullptr : &info_vector_sum_col, - b_offset == 0 ? nullptr : &info_vector_sum_row, c, output, a_offset, b_offset, - info.gemmlowp_output_stage())); - } - else - { - if (!run_optimised) - { - ARM_COMPUTE_RETURN_ON_ERROR( - NEGEMMLowpMatrixMultiplyKernel::validate(matrix_a_info, matrix_b_info, output)); - } - // Validate offset contribution kernel - ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpOffsetContributionKernel::validate( - output, a_offset == 0 ? nullptr : &info_vector_sum_col, - b_offset == 0 ? nullptr : &info_vector_sum_row, a_offset, b_offset)); - } - } - return Status{}; -} - -void NEGEMMLowpMatrixMultiplyCoreEx::run() -{ - prepare(); - - MemoryGroupResourceScope scope_mg(_memory_group); - - // Reshape inputs - if (_mtx_a_reshape_kernel) - { - NEScheduler::get().schedule(_mtx_a_reshape_kernel.get(), Window::DimY); - } - if (_mtx_b_reshape_kernel && !_reshape_b_only_on_first_run) - { - NEScheduler::get().schedule(_mtx_b_reshape_kernel.get(), Window::DimY); - } - - // Run GEMM - if (_asm_glue.is_configured()) - { - _asm_glue.run(); - } - else - { - NEScheduler::get().schedule(_mm_kernel.get(), Window::DimY); - } - - if (!_fused_assembly_path) - { - // Run matrix A reduction kernel only if _b_offset is not equal to 0 - if (_b_offset != 0) - { - NEScheduler::get().schedule(&_mtx_a_reduction_kernel, Window::DimX); - } - - // Run matrix B reduction kernel only if _a_offset is not equal to 0 - if (_a_offset != 0 && !_reshape_b_only_on_first_run) - { - NEScheduler::get().schedule(&_mtx_b_reduction_kernel, Window::DimX); - } - - if (_fuse_output_stage) - { - // Run offset contribution kernel - NEScheduler::get().schedule(&_offset_contribution_output_stage_kernel, Window::DimY); - } - else - { - // Run offset contribution kernel - NEScheduler::get().schedule(&_offset_contribution_kernel, Window::DimY); - } - } -} - -void NEGEMMLowpMatrixMultiplyCoreEx::prepare() -{ - if (!_is_prepared) - { - // Run assembly reshape - if (_asm_glue.is_configured() && _reshape_b_only_on_first_run) - { - ARM_COMPUTE_ERROR_ON(!_original_b->is_used()); - - _asm_glue.prepare(); - _original_b->mark_as_unused(); - } - // Run non-assembly reshape - else if (_mtx_b_reshape_kernel && _reshape_b_only_on_first_run) - { - ARM_COMPUTE_ERROR_ON(!_original_b->is_used()); - - // Run reshape kernel and mark original weights tensor as unused - _tmp_b.allocator()->allocate(); - NEScheduler::get().schedule(_mtx_b_reshape_kernel.get(), Window::DimY); - _original_b->mark_as_unused(); - } - - // Run matrix B reduction kernel only if _a_offset is not equal to 0 - if (_a_offset != 0 && _reshape_b_only_on_first_run) - { - _vector_sum_col.allocator()->allocate(); - NEScheduler::get().schedule(&_mtx_b_reduction_kernel, Window::DimX); - } - - _is_prepared = true; - } -} diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp index c8bb88aea..433c35d58 100644 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp +++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp @@ -41,7 +41,7 @@ #include "arm_compute/runtime/NEON/functions/NEGatherEx.h" #include "arm_compute/core/NEON/kernels/NEGatherKernelEx.h" -#include "support/ToolchainSupport.h" +#include "support/MemorySupport.h" #include <utility> @@ -49,7 +49,7 @@ namespace arm_compute { void NEGatherEx::configure(const ITensor *input, const ITensor *indices, ITensor *output, int axis) { - auto k = arm_compute::support::cpp14::make_unique<NEGatherKernelEx>(); + auto k = support::cpp14::make_unique<NEGatherKernelEx>(); k->configure(input, indices, output, axis); _kernel = std::move(k); } diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEHashtableLookup.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEHashtableLookup.cpp index 078019f4e..52d58accf 100644 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEHashtableLookup.cpp +++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEHashtableLookup.cpp @@ -41,14 +41,14 @@ #include "arm_compute/runtime/NEON/functions/NEHashtableLookup.h" #include "arm_compute/core/NEON/kernels/NEHashtableLookupKernel.h" -#include "support/ToolchainSupport.h" +#include "support/MemorySupport.h" using namespace arm_compute; void NEHashtableLookup::configure(const ITensor *lookups, const ITensor *keys, const ITensor *input, ITensor *output, ITensor *hits) { - auto k = arm_compute::support::cpp14::make_unique<NEHashtableLookupKernel>(); + auto k = support::cpp14::make_unique<NEHashtableLookupKernel>(); k->configure(lookups, keys, input, output, hits); _kernel = std::move(k); } diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEPReLU.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEPReLU.cpp deleted file mode 100644 index dac3b849d..000000000 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEPReLU.cpp +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/NEON/functions/NEPReLU.h" - -#include "arm_compute/core/NEON/kernels/NEPReLUKernel.h" -#include "support/ToolchainSupport.h" - -#include <utility> - -using namespace arm_compute; - -void NEPReLU::configure(const ITensor *input, const ITensor *alpha, ITensor *output) -{ - auto k = arm_compute::support::cpp14::make_unique<NEPReLUKernel>(); - k->configure(input, alpha, output); - _kernel = std::move(k); -} diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NERNNLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NERNNLayerEx.cpp deleted file mode 100644 index 0e9a5e969..000000000 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NERNNLayerEx.cpp +++ /dev/null @@ -1,161 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/NEON/functions/NERNNLayerEx.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" - -namespace arm_compute -{ -NERNNLayerEx::NERNNLayerEx(std::shared_ptr<IMemoryManager> memory_manager) - : _memory_group(std::move(memory_manager)), _gemm_state_f(), _add_kernel(), - _activation_kernel(), _fully_connected_kernel(), _copy_kernel(), _fully_connected_out(), - _gemm_output(), _add_output(), _is_prepared(false) -{ -} - -Status NERNNLayerEx::validate(const ITensorInfo *input, const ITensorInfo *weights, - const ITensorInfo *recurrent_weights, const ITensorInfo *bias, - const ITensorInfo *hidden_state, const ITensorInfo *output, - const ActivationLayerInfo &info) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, recurrent_weights, bias, hidden_state, - output); - - const int idx_width = 0; - const int idx_height = 1; - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(idx_width) != weights->dimension(idx_width)); - ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_height) != - recurrent_weights->dimension(idx_width)); - ARM_COMPUTE_RETURN_ERROR_ON(recurrent_weights->dimension(idx_width) != - recurrent_weights->dimension(idx_height)); - ARM_COMPUTE_RETURN_ERROR_ON(bias->num_dimensions() != 1); - ARM_COMPUTE_RETURN_ERROR_ON(bias->dimension(idx_width) != weights->dimension(idx_height)); - ARM_COMPUTE_RETURN_ERROR_ON(hidden_state->dimension(idx_width) != weights->dimension(idx_height)); - ARM_COMPUTE_RETURN_ERROR_ON(hidden_state->dimension(idx_height) != input->dimension(idx_height)); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), - hidden_state->tensor_shape()); - - auto shape_info = TensorInfo(misc::shape_calculator::compute_rnn_shape( - recurrent_weights, hidden_state->dimension(idx_height)), - 1, input->data_type()); - - ARM_COMPUTE_RETURN_ON_ERROR(NEFullyConnectedLayer::validate(input, weights, bias, &shape_info)); - ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAdditionKernel::validate( - &shape_info, &shape_info, &shape_info, ConvertPolicy::SATURATE)); - ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayerKernel::validate(&shape_info, &shape_info, info)); - - return Status{}; -} - -void NERNNLayerEx::configure(const ITensor *input, const ITensor *weights, - const ITensor *recurrent_weights, const ITensor *bias, - ITensor *hidden_state, ITensor *output, ActivationLayerInfo &info) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, recurrent_weights, bias, hidden_state, output); - ARM_COMPUTE_ERROR_THROW_ON(NERNNLayerEx::validate(input->info(), weights->info(), - recurrent_weights->info(), bias->info(), - hidden_state->info(), output->info(), info)); - - const int idx_height = 1; - TensorShape shape = misc::shape_calculator::compute_rnn_shape( - recurrent_weights->info(), hidden_state->info()->dimension(idx_height)); - - _is_prepared = false; - - // Manage intermediate buffers and configure - _fully_connected_out.allocator()->init(TensorInfo(shape, 1, input->info()->data_type())); - _gemm_output.allocator()->init(TensorInfo(shape, 1, input->info()->data_type())); - - // Manage intermediate buffers and configure - _memory_group.manage(&_fully_connected_out); - _fully_connected_kernel.configure(input, weights, bias, &_fully_connected_out); - - _memory_group.manage(&_gemm_output); - _gemm_state_f.configure(hidden_state, recurrent_weights, nullptr, &_gemm_output, 1.f, 0.f); - - _add_output.allocator()->init(TensorInfo(shape, 1, input->info()->data_type())); - _memory_group.manage(&_add_output); - - _add_kernel.configure(&_fully_connected_out, &_gemm_output, &_add_output, - ConvertPolicy::SATURATE); - - _fully_connected_out.allocator()->allocate(); - _gemm_output.allocator()->allocate(); - - _activation_kernel.configure(&_add_output, hidden_state, info); - _add_output.allocator()->allocate(); - - _copy_kernel.configure(hidden_state, output); -} - -void NERNNLayerEx::run() -{ - prepare(); - - MemoryGroupResourceScope scope_mg(_memory_group); - - _fully_connected_kernel.run(); - - _gemm_state_f.run(); - - NEScheduler::get().schedule(&_add_kernel, Window::DimY); - NEScheduler::get().schedule(&_activation_kernel, Window::DimY); - - // copy hidden out to output - NEScheduler::get().schedule(&_copy_kernel, Window::DimY); -} - -void NERNNLayerEx::prepare() -{ - if (!_is_prepared) - { - _fully_connected_kernel.prepare(); - _gemm_state_f.prepare(); - - _is_prepared = true; - } -} -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceMeanEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceMeanEx.cpp deleted file mode 100644 index 116bba3c0..000000000 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceMeanEx.cpp +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/NEON/functions/NEReduceMeanEx.h" - -#include "arm_compute/core/Helpers.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" - -using namespace arm_compute; - -NEReduceMeanEx::NEReduceMeanEx(std::shared_ptr<IMemoryManager> memory_manager) - : _memory_group(std::move(memory_manager)), _reduction_kernels(), _reduced_outs(), _reshape(), - _reduction_ops(), _keep_dims() -{ -} - -Status NEReduceMeanEx::validate(const ITensorInfo *input, const Coordinates &reduction_axis, - bool keep_dims, const ITensorInfo *output) -{ - ARM_COMPUTE_UNUSED(keep_dims); - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input); - ARM_COMPUTE_RETURN_ERROR_ON(reduction_axis.num_dimensions() > input->num_dimensions()); - - TensorShape out_shape = input->tensor_shape(); - const unsigned int reduction_ops = reduction_axis.num_dimensions(); - const int input_dims = input->num_dimensions(); - Coordinates axis_local = reduction_axis; - - // Convert negative axis - for (unsigned int i = 0; i < reduction_ops; ++i) - { - axis_local[i] = wrap_around(axis_local[i], input_dims); - } - - std::sort(axis_local.begin(), axis_local.begin() + reduction_ops); - for (unsigned int i = 0; i < reduction_ops; ++i) - { - ARM_COMPUTE_RETURN_ERROR_ON(axis_local[i] > 3); - ARM_COMPUTE_RETURN_ERROR_ON(static_cast<unsigned int>(axis_local[i]) > - input->num_dimensions() - 1); - if (output->total_size() > 0 && keep_dims) - { - ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(axis_local[i]) != 1); - } - if (keep_dims) - { - out_shape.set(axis_local[i], 1); - } - else - { - out_shape.remove_dimension(axis_local[i] - i); - } - } - const TensorInfo out_info = input->clone()->set_tensor_shape(out_shape); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &out_info); - - return Status{}; -} - -void NEReduceMeanEx::configure(ITensor *input, const Coordinates &reduction_axis, bool keep_dims, - ITensor *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input); - - _reduction_ops = reduction_axis.num_dimensions(); - _reduction_kernels = - arm_compute::support::cpp14::make_unique<NEReductionOperation[]>(_reduction_ops); - _reduced_outs = - arm_compute::support::cpp14::make_unique<Tensor[]>(_reduction_ops - (keep_dims ? 1 : 0)); - _keep_dims = keep_dims; - - Coordinates axis_local = reduction_axis; - const int input_dims = input->info()->num_dimensions(); - const unsigned int reduction_ops = reduction_axis.num_dimensions(); - - // Convert negative axis - for (unsigned int i = 0; i < reduction_ops; ++i) - { - axis_local[i] = wrap_around(axis_local[i], input_dims); - } - - // Perform reduction for every axis - for (unsigned int i = 0; i < _reduction_ops; ++i) - { - TensorShape out_shape = i == 0 ? input->info()->tensor_shape() - : (_reduced_outs.get() + i - 1)->info()->tensor_shape(); - out_shape.set(axis_local[i], 1); - auto in = (i == 0) ? input : (_reduced_outs.get() + i - 1); - - if (i == _reduction_ops - 1 && keep_dims) - { - _reduction_kernels[i].configure(in, output, axis_local[i], ReductionOperation::MEAN_SUM); - } - else - { - _reduced_outs[i].allocator()->init(TensorInfo(out_shape, input->info()->num_channels(), - input->info()->data_type(), - input->info()->quantization_info()) - .set_data_layout(output->info()->data_layout())); - _memory_group.manage(_reduced_outs.get() + i); - _reduction_kernels[i].configure(in, _reduced_outs.get() + i, axis_local[i], - ReductionOperation::MEAN_SUM); - } - } - - // Allocate intermediate tensors - for (unsigned int i = 0; i < _reduction_ops - (keep_dims ? 1 : 0); ++i) - { - _reduced_outs[i].allocator()->allocate(); - } - - // Configure reshape layer if we want to drop the dimensions - if (!keep_dims) - { - TensorShape out_shape = input->info()->tensor_shape(); - - // We have to sort the reduction axis vectors in order for remove_dimension - // to work properly - std::sort(axis_local.begin(), axis_local.begin() + _reduction_ops); - for (unsigned int i = 0; i < _reduction_ops; ++i) - { - out_shape.remove_dimension(axis_local[i] - i); - } - auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(out_shape)); - _reshape.configure(_reduced_outs.get() + _reduction_ops - 1, output); - } -} - -void NEReduceMeanEx::run() -{ - _memory_group.acquire(); - - for (unsigned int i = 0; i < _reduction_ops; ++i) - { - _reduction_kernels[i].run(); - } - - if (!_keep_dims) - { - _reshape.run(); - } - _memory_group.release(); -} diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NESpaceToBatchLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NESpaceToBatchLayerEx.cpp deleted file mode 100644 index 198bb7672..000000000 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NESpaceToBatchLayerEx.cpp +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/NEON/functions/NESpaceToBatchLayerEx.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" - -namespace arm_compute -{ -NESpaceToBatchLayerEx::NESpaceToBatchLayerEx() - : _space_to_batch_kernel(), _memset_kernel(), _has_padding(false) -{ -} - -void NESpaceToBatchLayerEx::configure(const ITensor *input, const ITensor *block_shape, - const ITensor *paddings, ITensor *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, block_shape, paddings, output); - - if (input->info()->tensor_shape().total_size() != output->info()->tensor_shape().total_size()) - { - _has_padding = true; - _memset_kernel.configure( - output, PixelValue(0, output->info()->data_type(), output->info()->quantization_info())); - } - _space_to_batch_kernel.configure(input, block_shape, paddings, output); -} - -void NESpaceToBatchLayerEx::configure(const ITensor *input, const int block_shape_x, - const int block_shape_y, const Size2D &padding_left, - const Size2D &padding_right, ITensor *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - - if (input->info()->tensor_shape().total_size() != output->info()->tensor_shape().total_size()) - { - _has_padding = true; - _memset_kernel.configure( - output, PixelValue(0, output->info()->data_type(), output->info()->quantization_info())); - } - _space_to_batch_kernel.configure(input, block_shape_x, block_shape_y, padding_left, padding_right, - output); -} - -Status NESpaceToBatchLayerEx::validate(const ITensorInfo *input, const ITensorInfo *block_shape, - const ITensorInfo *paddings, const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ON_ERROR( - NESpaceToBatchLayerKernel::validate(input, block_shape, paddings, output)); - - return Status{}; -} - -Status NESpaceToBatchLayerEx::validate(const ITensorInfo *input, const int block_shape_x, - const int block_shape_y, const Size2D &padding_left, - const Size2D &padding_right, const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ON_ERROR(NESpaceToBatchLayerKernel::validate( - input, block_shape_x, block_shape_y, padding_left, padding_right, output)); - - return Status{}; -} - -void NESpaceToBatchLayerEx::run() -{ - // Zero out output only if we have paddings - if (_has_padding) - { - NEScheduler::get().schedule(&_memset_kernel, Window::DimY); - } - NEScheduler::get().schedule(&_space_to_batch_kernel, Window::DimY); -} -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NESpaceToDepthLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NESpaceToDepthLayerEx.cpp deleted file mode 100644 index 97697e3ea..000000000 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NESpaceToDepthLayerEx.cpp +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/NEON/functions/NESpaceToDepthLayerEx.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernelEx.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" - -namespace arm_compute -{ -void NESpaceToDepthLayerEx::configure(const ITensor *input, ITensor *output, int32_t block_shape) -{ - auto k = arm_compute::support::cpp14::make_unique<NESpaceToDepthLayerKernelEx>(); - k->configure(input, output, block_shape); - _kernel = std::move(k); -} - -Status NESpaceToDepthLayerEx::validate(const ITensorInfo *input, const ITensorInfo *output, - int32_t block_shape) -{ - ARM_COMPUTE_RETURN_ON_ERROR(NESpaceToDepthLayerKernelEx::validate(input, output, block_shape)); - return Status{}; -} -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp index df0689273..09f178005 100644 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp +++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp @@ -1,21 +1,5 @@ /* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -37,14 +21,11 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ - #include "arm_compute/runtime/NEON/functions/NETransposeConvLayer.h" #include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Utils.h" #include "arm_compute/core/UtilsEx.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/misc/ShapeCalculatorEx.h" #include "arm_compute/runtime/NEON/NEScheduler.h" @@ -52,20 +33,15 @@ using namespace arm_compute::misc::shape_calculator; namespace arm_compute { + NETransposeConvLayer::NETransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT : _memory_group(std::move(memory_manager)), _conv_f(), _upsample_f(), _flip_weights(), - _permute_input(), - _permute_weights(), - _permute_output(), _scaled_output(), _weights_flipped(), - _permuted_input(), - _permuted_weights(), - _permuted_output(), - _is_nchw(false), + _flip_axis(), _original_weights(nullptr), _input(nullptr), _info(), @@ -80,7 +56,7 @@ Status NETransposeConvLayer::validate(const ITensorInfo *input, const ITensorInf { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::F16, - DataType::QASYMM8); + DataType::QASYMM8, DataType::QASYMM8_SIGNED); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(weights, input); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(weights, input); const unsigned int width_idx = @@ -95,13 +71,16 @@ Status NETransposeConvLayer::validate(const ITensorInfo *input, const ITensorInf weights->dimension(height_idx), info, invalid_right, invalid_bottom); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); - if (is_data_type_quantized_asymmetric(input->data_type()) && bias) + if (bias != nullptr) { - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bias, 1, DataType::S32); - } - else if (bias) - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, bias); + if (is_data_type_quantized_asymmetric(input->data_type())) + { + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bias, 1, DataType::S32); + } + else + { + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, bias); + } } if (output->tensor_shape().total_size() > 0) @@ -110,12 +89,12 @@ Status NETransposeConvLayer::validate(const ITensorInfo *input, const ITensorInf const TensorShape output_shape = compute_transposeconv_output_shape(out_dims, *input, *weights); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimX) < output_shape.x(), - "Output's dim 0 is invalid."); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimY) < output_shape.y(), - "Output's dim 1 is invalid."); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimZ) < output_shape.z(), - "Output's dim 2 is invalid."); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimX) != output_shape.x(), + "Output's width is invalid."); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimY) != output_shape.y(), + "Output's height is invalid."); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimZ) != output_shape.z(), + "Output's depth is invalid."); } unsigned int pad_left = 0; @@ -127,7 +106,6 @@ Status NETransposeConvLayer::validate(const ITensorInfo *input, const ITensorInf pad_bottom); TensorInfo scale_out_info( input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(scale_out_shape)); - scale_out_info.set_data_layout(input->data_layout()); const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL); const unsigned int batches_idx = @@ -149,19 +127,13 @@ void NETransposeConvLayer::configure(ITensor *input, const ITensor *weights, con ITensor *output, const PadStrideInfo &info, unsigned int invalid_right, unsigned int invalid_bottom) { + // Perform validation step ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); + ARM_COMPUTE_ERROR_THROW_ON(NETransposeConvLayer::validate( + input->info(), weights->info(), (bias == nullptr) ? nullptr : bias->info(), output->info(), + info, invalid_right, invalid_bottom)); const DataLayout data_layout = input->info()->data_layout(); - - _input = input; - _original_weights = weights; - _info = info; - _is_prepared = false; - _is_nchw = data_layout == DataLayout::NCHW; - - const unsigned int stride_x = info.stride().first; - const unsigned int stride_y = info.stride().second; - const unsigned int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); const unsigned int height_idx = @@ -173,101 +145,54 @@ void NETransposeConvLayer::configure(ITensor *input, const ITensor *weights, con const TensorShape output_shape = compute_transposeconv_output_shape(out_dims, *input->info(), *weights->info()); + + _input = input; + _original_weights = weights; + _info = info; + _is_prepared = false; + + unsigned int pad_left = 0; + unsigned int pad_right = 0; + unsigned int pad_top = 0; + unsigned int pad_bottom = 0; + const unsigned int stride_x = info.stride().first; + const unsigned int stride_y = info.stride().second; + // Output auto initialization if not yet initialized auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->quantization_info()); - // Perform validation step - ARM_COMPUTE_ERROR_THROW_ON(NETransposeConvLayer::validate( - input->info(), weights->info(), bias == nullptr ? nullptr : bias->info(), output->info(), - info, invalid_right, invalid_bottom)); - + _flip_axis.allocator()->init(TensorInfo(TensorShape(2U), 1, DataType::U32)); _memory_group.manage(&_scaled_output); - if (!_is_nchw) - { - _memory_group.manage(&_permuted_input); - _memory_group.manage(&_permuted_weights); - _memory_group.manage(&_permuted_output); - - // Configure the function to transform the input tensor from NHWC -> NCHW - _permuted_input.info()->set_quantization_info(input->info()->quantization_info()); - _permute_input.configure(input, &_permuted_input, PermutationVector(1U, 2U, 0U)); - _permuted_input.info()->set_data_layout(DataLayout::NCHW); - - // Configure the function to transform the weights tensor from NHWC -> NCHW - _permuted_weights.info()->set_quantization_info(weights->info()->quantization_info()); - _permute_weights.configure(weights, &_permuted_weights, PermutationVector(1U, 2U, 0U)); - _permuted_weights.info()->set_data_layout(DataLayout::NCHW); - - // Find the upsampled dimensions and the padding needed for the convolution with stride 1 in - // order to match output shape - - unsigned int pad_left = 0; - unsigned int pad_right = 0; - unsigned int pad_top = 0; - unsigned int pad_bottom = 0; - const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape( - *_permuted_input.info(), *_permuted_weights.info(), info, out_dims, invalid_right, - invalid_bottom, pad_left, pad_right, pad_top, pad_bottom); - - TensorInfo scale_out_info(scale_out_shape, 1, _permuted_input.info()->data_type(), - _permuted_input.info()->quantization_info()); - scale_out_info.set_data_layout(DataLayout::NCHW); - _scaled_output.allocator()->init(scale_out_info); - - const PadStrideInfo upsample_info(stride_x, stride_y, pad_left, pad_right, pad_top, pad_bottom, - DimensionRoundingType::CEIL); - _upsample_f.configure(&_permuted_input, &_scaled_output, upsample_info); - - _weights_flipped.allocator()->init(*_permuted_weights.info()->clone()); - _weights_flipped.info()->set_quantization_info(weights->info()->quantization_info()); - _flip_weights.configure(&_permuted_weights, &_weights_flipped); - - // setup the function to convolve the upscaled output - const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL); - - const auto out_shape = output->info()->tensor_shape(); - TensorShape permuted_out_shape{out_shape[1], out_shape[2], out_shape[0], out_shape[3]}; - TensorInfo permuted_out_info(permuted_out_shape, 1, output->info()->data_type(), - output->info()->quantization_info()); - _permuted_output.allocator()->init(permuted_out_info); - _permuted_output.info()->set_data_layout(DataLayout::NCHW); - _conv_f.configure(&_scaled_output, &_weights_flipped, bias, &_permuted_output, conv_info); - - // Configure the function to transform the convoluted output to NHWC - _permute_output.configure(&_permuted_output, output, PermutationVector(2U, 0U, 1U)); - - _permuted_input.allocator()->allocate(); - _permuted_weights.allocator()->allocate(); - _permuted_output.allocator()->allocate(); - } - else - { - // Find the upsampled dimensions and the padding needed for the convolution with stride 1 in - // order to match output shape - unsigned int pad_left = 0; - unsigned int pad_right = 0; - unsigned int pad_top = 0; - unsigned int pad_bottom = 0; - const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape( - *input->info(), *weights->info(), info, out_dims, invalid_right, invalid_bottom, pad_left, - pad_right, pad_top, pad_bottom); - - TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(), - input->info()->quantization_info()); - _scaled_output.allocator()->init(scale_out_info); - const PadStrideInfo upsample_info(stride_x, stride_y, pad_left, pad_right, pad_top, pad_bottom, - DimensionRoundingType::FLOOR); - _upsample_f.configure(input, &_scaled_output, upsample_info); - - _weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout)); - _flip_weights.configure(weights, &_weights_flipped); - - // setup the function to convolve the upscaled output - const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL); - _conv_f.configure(&_scaled_output, &_weights_flipped, bias, output, conv_info); - } + _weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout)); + _flip_weights.configure(weights, &_weights_flipped, &_flip_axis); + + // setup the function to convolve the upscaled output + const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL); + + const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape( + *input->info(), *weights->info(), info, out_dims, invalid_right, invalid_bottom, pad_left, + pad_right, pad_top, pad_bottom); + + const PadStrideInfo upsample_info(stride_x, stride_y, pad_left, pad_right, pad_top, pad_bottom, + DimensionRoundingType::FLOOR); + + TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(), + input->info()->quantization_info()); + scale_out_info.set_data_layout(data_layout); + _scaled_output.allocator()->init(scale_out_info); + + _upsample_f.configure(input, &_scaled_output, upsample_info); + + _conv_f.configure(&_scaled_output, &_weights_flipped, bias, output, conv_info); + + // Setup flip axis data + _flip_axis.allocator()->allocate(); + auto axis_data = reinterpret_cast<uint32_t *>(_flip_axis.buffer()); + axis_data[0] = static_cast<uint32_t>(width_idx); + axis_data[1] = static_cast<uint32_t>(height_idx); + _scaled_output.allocator()->allocate(); } @@ -275,22 +200,10 @@ void NETransposeConvLayer::run() { prepare(); - // MemoryGroupResourceScope scope_mg(_memory_group); - - // Permute input - if (!_is_nchw) - { - _permute_input.run(); - } + MemoryGroupResourceScope scope_mg(_memory_group); _upsample_f.run(); _conv_f.run(); - - // Permute output - if (!_is_nchw) - { - _permute_output.run(); - } } void NETransposeConvLayer::prepare() @@ -301,22 +214,12 @@ void NETransposeConvLayer::prepare() // Run weights flipping and mark original weights tensor as unused _weights_flipped.allocator()->allocate(); - // Permute weights - if (!_is_nchw) - { - _permute_weights.run(); - } - NEScheduler::get().schedule(&_flip_weights, Window::DimZ); + _flip_weights.run(); _original_weights->mark_as_unused(); // Prepare convolution _conv_f.prepare(); - if (!_weights_flipped.is_used()) - { - _weights_flipped.allocator()->free(); - } - _is_prepared = true; } } |