From d6b371e095d737922187a518b8faba1ef6f3a2b1 Mon Sep 17 00:00:00 2001 From: Chunseok Lee Date: Thu, 29 Oct 2020 13:12:50 +0900 Subject: Imported Upstream version 0.4 --- .../src/runtime/NEON/NEFunctionsEx.cpp | 20 - .../runtime/NEON/functions/NEActivationLayerEx.cpp | 66 --- .../NEON/functions/NEBinaryLogicalOperation.cpp | 86 ---- .../src/runtime/NEON/functions/NECastBool.cpp | 57 --- .../runtime/NEON/functions/NEEmbeddingLookup.cpp | 53 --- .../NEON/functions/NEFullyConnectedHybridLayer.cpp | 300 ------------- .../NEON/functions/NEFullyConnectedLayerEx.cpp | 494 --------------------- .../functions/NEFullyConnectedReshapingLayer.cpp | 96 ---- .../src/runtime/NEON/functions/NEGatherEx.cpp | 63 --- .../runtime/NEON/functions/NEHashtableLookup.cpp | 61 --- .../functions/NEInstanceNormalizationLayerEx.cpp | 113 ----- .../src/runtime/NEON/functions/NEOneHot.cpp | 59 --- .../runtime/NEON/functions/NEReduceOperation.cpp | 182 -------- .../src/runtime/NEON/functions/NEReduceSum.cpp | 181 -------- .../NEON/functions/NEReductionOperationEx.cpp | 173 -------- .../NEON/functions/NETransposeConvLayer.cpp | 242 ---------- 16 files changed, 2246 deletions(-) delete mode 100644 compute/ARMComputeEx/src/runtime/NEON/NEFunctionsEx.cpp delete mode 100644 compute/ARMComputeEx/src/runtime/NEON/functions/NEActivationLayerEx.cpp delete mode 100644 compute/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp delete mode 100644 compute/ARMComputeEx/src/runtime/NEON/functions/NECastBool.cpp delete mode 100644 compute/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp delete mode 100644 compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp delete mode 100644 compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp delete mode 100644 compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedReshapingLayer.cpp delete mode 100644 compute/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp delete mode 100644 compute/ARMComputeEx/src/runtime/NEON/functions/NEHashtableLookup.cpp delete mode 100644 compute/ARMComputeEx/src/runtime/NEON/functions/NEInstanceNormalizationLayerEx.cpp delete mode 100644 compute/ARMComputeEx/src/runtime/NEON/functions/NEOneHot.cpp delete mode 100644 compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp delete mode 100644 compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceSum.cpp delete mode 100644 compute/ARMComputeEx/src/runtime/NEON/functions/NEReductionOperationEx.cpp delete mode 100644 compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp (limited to 'compute/ARMComputeEx/src/runtime/NEON') diff --git a/compute/ARMComputeEx/src/runtime/NEON/NEFunctionsEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/NEFunctionsEx.cpp deleted file mode 100644 index 80fbf359d..000000000 --- a/compute/ARMComputeEx/src/runtime/NEON/NEFunctionsEx.cpp +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "arm_compute/runtime/NEON/NEFunctionsEx.h" - -// NOTE This empty file aims to validate "NEFunctionsEx.h". -// DO NOT REMOVE this file. diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEActivationLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEActivationLayerEx.cpp deleted file mode 100644 index 2752eb6aa..000000000 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEActivationLayerEx.cpp +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/NEON/functions/NEActivationLayerEx.h" - -#include "arm_compute/core/NEON/kernels/NEActivationLayerKernelEx.h" -#include "arm_compute/runtime/IRuntimeContext.h" -#include "support/MemorySupport.h" - -namespace arm_compute -{ -NEActivationLayerEx::NEActivationLayerEx(IRuntimeContext *ctx) // NOLINT - : INESimpleFunctionNoBorder(ctx) -{ -} -void NEActivationLayerEx::configure(ITensor *input, ITensor *output, - ActivationLayerInfo activation_info) -{ - auto k = support::cpp14::make_unique(); - k->configure(input, output, activation_info); - _kernel = std::move(k); -} - -Status NEActivationLayerEx::validate(const ITensorInfo *input, const ITensorInfo *output, - const ActivationLayerInfo &act_info) -{ - return NEActivationLayerKernelEx::validate(input, output, act_info); -} -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp deleted file mode 100644 index 2fc94b267..000000000 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h" -#include - -#include "arm_compute/core/ITensor.h" -#include "support/MemorySupport.h" - -#include - -namespace arm_compute -{ - -template -void NEBinaryLogicalOperationStatic::configure(ITensor *input1, ITensor *input2, - ITensor *output) -{ - auto k = support::cpp14::make_unique(); - k->configure(COP, input1, input2, output); - _kernel = std::move(k); -} - -template -Status NEBinaryLogicalOperationStatic::validate(const ITensorInfo *input1, - const ITensorInfo *input2, - const ITensorInfo *output) -{ - return NEBinaryLogicalOperationKernel::validate(COP, input1, input2, output); -} - -void NEBinaryLogicalOperation::configure(ITensor *input1, ITensor *input2, ITensor *output, - BinaryLogicalOperation op) -{ - auto k = support::cpp14::make_unique(); - k->configure(op, input1, input2, output); - _kernel = std::move(k); -} - -Status NEBinaryLogicalOperation::validate(const ITensorInfo *input1, const ITensorInfo *input2, - const ITensorInfo *output, BinaryLogicalOperation op) -{ - return NEBinaryLogicalOperationKernel::validate(op, input1, input2, output); -} - -// Supported Specializations -template class NEBinaryLogicalOperationStatic; -template class NEBinaryLogicalOperationStatic; -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NECastBool.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NECastBool.cpp deleted file mode 100644 index 6ad3e1b12..000000000 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NECastBool.cpp +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NECastBool.h" - -#include "arm_compute/core/NEON/kernels/NECastBoolKernel.h" -#include "support/MemorySupport.h" - -using namespace arm_compute; - -void NECastBool::configure(const ITensor *input, ITensor *output) -{ - auto k = arm_compute::support::cpp14::make_unique(); - k->configure(input, output); - _kernel = std::move(k); -} - -Status NECastBool::validate(const ITensorInfo *input, const ITensorInfo *output) -{ - return NECastBoolKernel::validate(input, output); -} diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp deleted file mode 100644 index e0ab3e025..000000000 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h" - -#include "arm_compute/core/NEON/kernels/NEEmbeddingLookupKernel.h" -#include "support/MemorySupport.h" - -using namespace arm_compute; - -void NEEmbeddingLookup::configure(const ITensor *input, ITensor *output, const ITensor *lookups) -{ - auto k = support::cpp14::make_unique(); - k->configure(input, output, lookups); - _kernel = std::move(k); -} diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp deleted file mode 100644 index a123439d9..000000000 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp +++ /dev/null @@ -1,300 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h" - -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Size2D.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/core/utils/quantization/AsymmHelpers.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" - -#include -#include - -using namespace arm_compute; -using namespace arm_compute::misc::shape_calculator; - -namespace -{ -Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const ITensorInfo &output) -{ - ARM_COMPUTE_RETURN_ON_ERROR( - NEGEMMLowpMatrixMultiplyCore::validate(&input, &weights, nullptr, &output)); - - return Status{}; -} -} // namespace - -void NEFullyConnectedHybridLayerReshapeWeights::configure(const ITensor *input, ITensor *output) -{ - auto k = support::cpp14::make_unique(); - k->configure(input, output); - _kernel = std::move(k); -} - -Status NEFullyConnectedHybridLayerReshapeWeights::validate(const ITensorInfo *input, - const ITensorInfo *output) -{ - return NETransposeKernel::validate(input, output); -} - -NEFullyConnectedHybridLayer::NEFullyConnectedHybridLayer( - std::shared_ptr memory_manager) - : _memory_group(std::move(memory_manager)), _reshape_weights_function(), _quant_input_kernel(), - _mm_gemmlowp(), _accumulate_biases_kernel(), _reshape_weights_output(), _quantized_input(), - _scale_factor(), _original_weights(nullptr), _are_weights_reshaped(false), - _accumulate_biases(false), _is_prepared(false) -{ -} - -void NEFullyConnectedHybridLayer::configure_mm(const ITensor *input, const ITensor *weights, - ITensor *output) -{ - ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != weights->info()->dimension(1)); - - // Configure gemmlowp function - _mm_gemmlowp.configure(input, weights, nullptr, output); -} - -void NEFullyConnectedHybridLayer::configure(const ITensor *input, const ITensor *weights, - const ITensor *biases, ITensor *output, - FullyConnectedLayerInfo fc_info) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); - - // Perform validate step - ARM_COMPUTE_ERROR_THROW_ON(NEFullyConnectedHybridLayer::validate( - input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(), - fc_info)); - - _are_weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true; - _accumulate_biases = false; - _original_weights = weights; - - // Configure accumulate biases kernel for non quantized asymmetric types - if (biases != nullptr) - { - _accumulate_biases = true; - - // Configure accumulate biases kernel - _accumulate_biases_kernel.configure(output, biases); - } - - // With the Fully Connected layer we can have 4 different cases: - // 1) Convolution layer -> Fully Connected layer without batches - // 2) Fully Connected layer -> Fully Connected layer without batches - // 3) Convolution layer -> Fully Connected layer with batches - // 4) Fully Connected layer -> Fully Connected layer with batches - - const ITensor *weights_to_use = weights; - - // Check if we have a fully connected layer with batches - const bool is_batched_fc_layer = output->info()->dimension(1) > 1; - bool _is_fc_after_conv; - if (is_batched_fc_layer) - { - _is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) && - (std::equal(input->info()->tensor_shape().cbegin() + 3, - input->info()->tensor_shape().cend(), - output->info()->tensor_shape().cbegin() + 1)); - } - else - { - _is_fc_after_conv = input->info()->num_dimensions() > 1 && input->info()->dimension(1) > 1; - } - ARM_COMPUTE_ERROR_ON_MSG(_is_fc_after_conv, - "NEFullyConnectedHybridLayer does not support after conv"); - (void)_is_fc_after_conv; - - // Reshape weights if needed - if (!_are_weights_reshaped) - { - // Reshape the weights - _reshape_weights_output.allocator()->init( - weights->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape( - compute_transposed_shape(*weights->info()))); - _reshape_weights_function.configure(weights_to_use, &_reshape_weights_output); - weights_to_use = &_reshape_weights_output; - } - - // Quantize input - _quantized_input.allocator()->init( - input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type( - DataType::QASYMM8_SIGNED)); - _scale_factor.allocator()->init( - TensorInfo(TensorShape{output->info()->dimension(1)}, 1, DataType::F32)); - _quant_input_kernel.configure(input, &_quantized_input, &_scale_factor); - - // GEMM - _gemmlowp_output.allocator()->init( - output->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32)); - configure_mm(&_quantized_input, weights_to_use, &_gemmlowp_output); - - // Multiply scale - _multiply_scale_kernel.configure(&_gemmlowp_output, &_scale_factor, output, - weights->info()->quantization_info().uniform().scale); - - _are_weights_reshaped = _are_weights_reshaped || fc_info.retain_internal_weights; - - _quantized_input.allocator()->allocate(); - _scale_factor.allocator()->allocate(); - _gemmlowp_output.allocator()->allocate(); -} - -Status NEFullyConnectedHybridLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, - const ITensorInfo *biases, const ITensorInfo *output, - FullyConnectedLayerInfo fc_info) -{ - ARM_COMPUTE_UNUSED(fc_info.retain_internal_weights); - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QASYMM8_SIGNED); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 2); - ARM_COMPUTE_RETURN_ERROR_ON(output->num_dimensions() > 2); - - bool weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true; - - const ITensorInfo &reshaped_weights = - TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape( - compute_transposed_shape(*weights))); - - // Configure accumulate biases kernel for non quantized asymmetric types - if (biases != nullptr) - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases); - ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMMatrixAccumulateBiasesKernel::validate(output, biases)); - } - - // With the Fully Connected layer we can have 4 different cases: - // 1) Convolution layer -> Fully Connected layer without batches - // 2) Fully Connected layer -> Fully Connected layer without batches - // 3) Convolution layer -> Fully Connected layer with batches - // 4) Fully Connected layer -> Fully Connected layer with batches - - const ITensorInfo *weights_to_use = weights; - - if (!weights_reshaped) - { - // Validate reshape weights kernel - ARM_COMPUTE_RETURN_ON_ERROR( - NEFullyConnectedHybridLayerReshapeWeights::validate(weights_to_use, &reshaped_weights)); - weights_to_use = &reshaped_weights; - } - - // Fully Connected layer after a Fully Connected Layer without batches - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) != weights_to_use->dimension(1)); - - // Validate quantization kernel - const ITensorInfo &quantized_input = - TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_data_type( - DataType::QASYMM8_SIGNED)); - const ITensorInfo &scale_factor = TensorInfo(TensorShape{output->dimension(1)}, 1, DataType::F32); - ARM_COMPUTE_RETURN_ON_ERROR( - NEQuantizationSymmetricKernel::validate(input, &quantized_input, &scale_factor)); - - const ITensorInfo &gemmlowp_output = TensorInfo( - output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32)); - // Validate matrix multiply kernel - ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(quantized_input, *weights_to_use, gemmlowp_output)); - - ARM_COMPUTE_RETURN_ON_ERROR(NEMultiplyScaleFactorKernel::validate( - &gemmlowp_output, &scale_factor, output, weights->quantization_info().uniform().scale)); - - return Status{}; -} - -void NEFullyConnectedHybridLayer::run() -{ - prepare(); - - MemoryGroupResourceScope scope_mg(_memory_group); - - // Quantize input - NEScheduler::get().schedule(&_quant_input_kernel, Window::DimY); - - // Run matrix multiply - _mm_gemmlowp.run(); - - // Multiply scale factor - NEScheduler::get().schedule(&_multiply_scale_kernel, Window::DimY); - - // Accumulate biases if provided - if (_accumulate_biases) - { - NEScheduler::get().schedule(&_accumulate_biases_kernel, Window::DimY); - } -} - -void NEFullyConnectedHybridLayer::prepare() -{ - if (!_is_prepared) - { - ARM_COMPUTE_ERROR_ON(!_original_weights->is_used()); - - auto release_unused = [](Tensor *w) { - if (!w->is_used()) - { - w->allocator()->free(); - } - }; - - // Reshape of the weights (happens only once) - if (!_are_weights_reshaped) - { - // Run reshape weights kernel and mark weights as unused - _reshape_weights_output.allocator()->allocate(); - _reshape_weights_function.run(); - - _are_weights_reshaped = true; - // We can not release _original_weights because it can be used in other nodes - } - - // Prepare GEMM prepare and release unused weights - _mm_gemmlowp.prepare(); - - // Release reshaped weights if unused - release_unused(&_reshape_weights_output); - - _is_prepared = true; - } -} diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp deleted file mode 100644 index cb7557a5a..000000000 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp +++ /dev/null @@ -1,494 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayerEx.h" - -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Size2D.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/core/utils/quantization/AsymmHelpers.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" - -#include -#include - -using namespace arm_compute; -using namespace arm_compute::misc::shape_calculator; - -namespace -{ -Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const ITensorInfo &output) -{ - if (is_data_type_quantized_asymmetric(input.data_type())) - { - // Since we need negative offsets for computing convolution, we need to change - // QuantizationInfo() - // Extract and negate input and weights offset - const QuantizationInfo input_quantization_info(input.quantization_info().uniform().scale, - -input.quantization_info().uniform().offset); - const QuantizationInfo weights_quantization_info(weights.quantization_info().uniform().scale, - -weights.quantization_info().uniform().offset); - - // Validate gemmlowp function - ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixMultiplyCore::validate( - &input.clone()->set_quantization_info(input_quantization_info), - &weights.clone()->set_quantization_info(weights_quantization_info), nullptr, &output)); - } - else - { - ARM_COMPUTE_RETURN_ON_ERROR(NEGEMM::validate( - &input, &weights, nullptr, &output, 1.f, 0.0f, - GEMMInfo(false, false, false /* Reshape weights only for the first run */))); - } - - return Status{}; -} -} // namespace - -NEFullyConnectedLayerEx::NEFullyConnectedLayerEx(std::shared_ptr memory_manager) - : _memory_group(std::move(memory_manager)), _flatten_kernel(), _convert_weights(), - _reshape_weights_function(), _mm_gemm(), _mm_gemmlowp(), _gemmlowp_output_stage(), - _accumulate_biases_kernel(), _flatten_output(), _gemmlowp_output(), - _converted_weights_output(), _reshape_weights_output(), _original_weights(nullptr), - _are_weights_converted(true), _are_weights_reshaped(false), _is_fc_after_conv(false), - _accumulate_biases(false), _is_quantized(false), _is_prepared(false) -{ -} - -void NEFullyConnectedLayerEx::configure_mm(const ITensor *input, const ITensor *weights, - ITensor *output) -{ - if (_is_quantized) - { - // Since we need negative offsets for computing convolution, we need to change - // QuantizationInfo() - // Extract and negate input and weights offset - const QuantizationInfo input_quantization_info = input->info()->quantization_info(); - const QuantizationInfo weights_quantization_info = weights->info()->quantization_info(); - - input->info()->set_quantization_info(QuantizationInfo( - input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset)); - weights->info()->set_quantization_info(QuantizationInfo( - weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset)); - - // Configure gemmlowp function - _mm_gemmlowp.configure(input, weights, nullptr, output); - - // Revert back QuantizatioInfo as input and weights could be used in other fully connected - // layers - input->info()->set_quantization_info(input_quantization_info); - weights->info()->set_quantization_info(weights_quantization_info); - } - else - { - // Configure matrix multiply kernel - _mm_gemm.configure(input, weights, nullptr, output, 1.f, 0.0f, - GEMMInfo(false, false, false /* Reshape weights only for the first run */)); - } -} - -void NEFullyConnectedLayerEx::configure_conv_fc(const ITensor *input, const ITensor *weights, - ITensor *output) -{ - ARM_COMPUTE_ERROR_ON( - (weights->info()->dimension(1) != - (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2)))); - - // If the fully connected layer is called after a convolution layer, the input tensor must be - // linearized - - // Initialize output tensor for flatten - TensorShape shape_flatten = compute_flatten_shape(input->info()); - _flatten_output.allocator()->init( - input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape( - shape_flatten)); - - // Configure flatten kernel - _memory_group.manage(&_flatten_output); - _flatten_kernel.configure(input, &_flatten_output); - - // Configure matrix multiply kernel - configure_mm(&_flatten_output, weights, output); - - // Allocate the output tensor for flatten once all the configure methods have been called - _flatten_output.allocator()->allocate(); -} - -void NEFullyConnectedLayerEx::configure_fc_fc(const ITensor *input, const ITensor *weights, - ITensor *output) -{ - ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != weights->info()->dimension(1)); - - // Configure matrix multiply kernel - configure_mm(input, weights, output); -} - -void NEFullyConnectedLayerEx::configure(const ITensor *input, const ITensor *weights, - const ITensor *biases, ITensor *output, - FullyConnectedLayerInfo fc_info) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); - - // Perform validate step - ARM_COMPUTE_ERROR_THROW_ON(NEFullyConnectedLayerEx::validate( - input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(), - fc_info)); - - _are_weights_converted = true; - _are_weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true; - _is_fc_after_conv = true; - _accumulate_biases = false; - _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type()); - _original_weights = weights; - - // Configure gemmlowp output - if (_is_quantized) - { - _gemmlowp_output.allocator()->init( - output->info()->clone()->set_is_resizable(true).reset_padding().set_data_type( - DataType::S32)); - } - - // Configure accumulate biases kernel for non quantized asymmetric types - if (biases != nullptr && !_is_quantized) - { - _accumulate_biases = true; - - // Configure accumulate biases kernel - _accumulate_biases_kernel.configure(output, biases); - } - - // With the Fully Connected layer we can have 4 different cases: - // 1) Convolution layer -> Fully Connected layer without batches - // 2) Fully Connected layer -> Fully Connected layer without batches - // 3) Convolution layer -> Fully Connected layer with batches - // 4) Fully Connected layer -> Fully Connected layer with batches - - const ITensor *weights_to_use = weights; - - // Check if we have a fully connected layer with batches - const bool is_batched_fc_layer = output->info()->dimension(1) > 1; - if (is_batched_fc_layer) - { - _is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) && - (std::equal(input->info()->tensor_shape().cbegin() + 3, - input->info()->tensor_shape().cend(), - output->info()->tensor_shape().cbegin() + 1)); - } - else - { - _is_fc_after_conv = input->info()->num_dimensions() > 1; - } - - // Reshape weights if needed - if (!_are_weights_reshaped) - { - // Reshape the weights - _reshape_weights_function.configure(weights, &_reshape_weights_output); - weights_to_use = &_reshape_weights_output; - } - - // Convert weights if needed - if (_is_fc_after_conv && (input->info()->data_layout() != fc_info.weights_trained_layout)) - { - // Convert weights - _convert_weights.configure(weights_to_use, &_converted_weights_output, - input->info()->tensor_shape(), fc_info.weights_trained_layout); - - weights_to_use = &_converted_weights_output; - _are_weights_converted = false; - } - - ITensor *tmp_output = (_is_quantized) ? &_gemmlowp_output : output; - if (_is_fc_after_conv) - { - // Fully Connected layer after a Convolution Layer without batches - configure_conv_fc(input, weights_to_use, tmp_output); - } - else - { - // Fully Connected layer after a Fully Connected Layer without batches - configure_fc_fc(input, weights_to_use, tmp_output); - } - - // Configure output stage for asymmetric quantized types - if (_is_quantized) - { - float multiplier = input->info()->quantization_info().uniform().scale * - weights->info()->quantization_info().uniform().scale / - output->info()->quantization_info().uniform().scale; - int output_multiplier; - int output_shift; - quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, - &output_shift); - _gemmlowp_output_stage.configure(&_gemmlowp_output, biases, output, output_multiplier, - output_shift, - output->info()->quantization_info().uniform().offset); - _gemmlowp_output.allocator()->allocate(); - } - - _are_weights_reshaped = _are_weights_reshaped || fc_info.retain_internal_weights; -} - -Status NEFullyConnectedLayerEx::validate(const ITensorInfo *input, const ITensorInfo *weights, - const ITensorInfo *biases, const ITensorInfo *output, - FullyConnectedLayerInfo fc_info) -{ - ARM_COMPUTE_UNUSED(fc_info.retain_internal_weights); - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, - DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output); - ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 2); - - bool weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true; - bool is_fc_after_conv = true; - bool is_quantized = is_data_type_quantized_asymmetric(input->data_type()); - - const ITensorInfo &flatten_input = - TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape( - compute_flatten_shape(input))); - const ITensorInfo &reshaped_weights = - TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape( - compute_transposed_shape(*weights))); - const ITensorInfo &converted_weights = - weights_reshaped ? TensorInfo(weights->clone()->set_is_resizable(true).reset_padding()) - : TensorInfo(*reshaped_weights.clone()); - const ITensorInfo &gemmlowp_output = TensorInfo( - output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32)); - - // Configure accumulate biases kernel for non quantized asymmetric types - if (biases != nullptr && !is_quantized) - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases); - ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMMatrixAccumulateBiasesKernel::validate(output, biases)); - } - - // With the Fully Connected layer we can have 4 different cases: - // 1) Convolution layer -> Fully Connected layer without batches - // 2) Fully Connected layer -> Fully Connected layer without batches - // 3) Convolution layer -> Fully Connected layer with batches - // 4) Fully Connected layer -> Fully Connected layer with batches - - const ITensorInfo *input_to_use = input; - const ITensorInfo *weights_to_use = weights; - const ITensorInfo *tmp_output = (is_quantized) ? &gemmlowp_output : output; - - // Check if we have a fully connected layer with batches - const bool is_batched_fc_layer = output->dimension(1) > 1; - - if (is_batched_fc_layer) - { - is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) && - (std::equal(input->tensor_shape().cbegin() + 3, input->tensor_shape().cend(), - output->tensor_shape().cbegin() + 1)); - } - else - { - is_fc_after_conv = input->num_dimensions() > 1; - } - - if (!weights_reshaped) - { - // Validate reshape weights kernel - ARM_COMPUTE_RETURN_ON_ERROR( - NEFullyConnectedLayerReshapeWeights::validate(weights, &reshaped_weights)); - weights_to_use = &reshaped_weights; - } - - if (is_fc_after_conv && (input->data_layout() != fc_info.weights_trained_layout)) - { - // Validate convert weights kernel - ARM_COMPUTE_RETURN_ON_ERROR(NEConvertFullyConnectedWeights::validate( - weights_to_use, &converted_weights, input->tensor_shape(), fc_info.weights_trained_layout)); - weights_to_use = &converted_weights; - } - - if (is_fc_after_conv) - { - // Fully Connected layer after a Convolution Layer without batches - ARM_COMPUTE_RETURN_ERROR_ON( - (weights_to_use->dimension(1) != - (input->dimension(0) * input->dimension(1) * input->dimension(2)))); - - // Validate flatten kernel - ARM_COMPUTE_RETURN_ON_ERROR(NEFlattenLayerKernel::validate(input, &flatten_input)); - input_to_use = &flatten_input; - } - else - { - // Fully Connected layer after a Fully Connected Layer without batches - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) != weights_to_use->dimension(1)); - } - // Validate matrix multiply kernel - ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(*input_to_use, *weights_to_use, *tmp_output)); - - // Validate output stage for asymmetric quantized types - if (is_quantized) - { - ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint::validate( - &gemmlowp_output, biases, output)); - } - - return Status{}; -} - -void NEFullyConnectedLayerEx::run() -{ - if (!_is_prepared) - { - if (!_are_weights_reshaped) - _reshape_weights_output.allocator()->allocate(); - if (!_are_weights_converted) - _converted_weights_output.allocator()->allocate(); - _is_prepared = true; - } - - { - ARM_COMPUTE_ERROR_ON(!_original_weights->is_used()); - - // Reshape of the weights - if (!_are_weights_reshaped) - { - _reshape_weights_function.run(); - } - - // Convert weights if needed - if (!_are_weights_converted) - { - _convert_weights.run(); - } - - // Prepare GEMM prepare - if (!_is_quantized) - { - _mm_gemm.prepare(); - } - } - - MemoryGroupResourceScope scope_mg(_memory_group); - - // Linearize input if it comes from a convolutional layer - if (_is_fc_after_conv) - { - NEScheduler::get().schedule(&_flatten_kernel, Window::DimY); - } - - // Run matrix multiply - if (_is_quantized) - { - _mm_gemmlowp.run(); - } - else - { - _mm_gemm.run(); - } - - // Accumulate biases if provided - if (_is_quantized) - { - _gemmlowp_output_stage.run(); - } - else - { - if (_accumulate_biases) - { - NEScheduler::get().schedule(&_accumulate_biases_kernel, Window::DimY); - } - } -} - -void NEFullyConnectedLayerEx::prepare() -{ -#if 0 // TODO Remove this block - if (!_is_prepared) - { - ARM_COMPUTE_ERROR_ON(!_original_weights->is_used()); - - auto release_unused = [](Tensor *w) { - if (!w->is_used()) - { - w->allocator()->free(); - } - }; - - // Pointer to current weights - const ITensor *cur_weights = _original_weights; - - // Reshape of the weights (happens only once) - if (!_are_weights_reshaped) - { - // Run reshape weights kernel and mark weights as unused - _reshape_weights_output.allocator()->allocate(); - _reshape_weights_function.run(); - - cur_weights->mark_as_unused(); - cur_weights = &_reshape_weights_output; - _are_weights_reshaped = true; - } - - // Convert weights if needed (happens only once) - if (!_are_weights_converted) - { - _converted_weights_output.allocator()->allocate(); - _convert_weights.run(); - - cur_weights->mark_as_unused(); - _are_weights_converted = true; - } - - // Release reshaped weights if unused - release_unused(&_reshape_weights_output); - - // Prepare GEMM prepare and release unused weights - if (!_is_quantized) - { - _mm_gemm.prepare(); - } - - // Release converted weights if unused - release_unused(&_reshape_weights_output); - release_unused(&_converted_weights_output); - - _is_prepared = true; - } -#endif -} diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedReshapingLayer.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedReshapingLayer.cpp deleted file mode 100644 index dc6c78478..000000000 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedReshapingLayer.cpp +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h" - -#include -#include -#include - -using namespace arm_compute; - -void NEFullyConnectedReshapingLayer::configure(const arm_compute::ITensor *input, - const arm_compute::ITensor *weights, - const arm_compute::ITensor *biases, - arm_compute::ITensor *output, bool needs_reshape, - const arm_compute::TensorShape &reshape, - KernelType kernel_type) -{ - _input = input; - _weights = weights; - _biases = biases; - _output = output; - _needs_reshape = needs_reshape; - - const ITensor *input_to_use = input; - if (_needs_reshape) - { - // reshape - auto_init_if_empty(*_neon_buffer.info(), _input->info()->clone()->set_tensor_shape(reshape)); - _neon_reshape.configure(_input, &_neon_buffer); - input_to_use = &_neon_buffer; - } - - _neon_fc = [&]() { - if (kernel_type == KernelType::GENERAL) - { - auto fc = new arm_compute::NEFullyConnectedLayerEx{_memory_manager}; - fc->configure(input_to_use, _weights, _biases, _output); - return std::unique_ptr(fc); - } - else - { - assert(kernel_type == KernelType::PREPROCESSED_WEIGHTS); - - bool is_hybrid = input->info()->data_type() == DataType::F32 && - (weights->info()->data_type() == DataType::S8 || - weights->info()->data_type() == DataType::QASYMM8_SIGNED); - - if (is_hybrid) - { - auto fc = new arm_compute::NEFullyConnectedHybridLayer{_memory_manager}; - ITensorInfo *weights_info = const_cast(_weights->info()); - const auto orgin_weights_data_type = weights_info->data_type(); - weights_info->set_data_type(DataType::QASYMM8_SIGNED); - fc->configure(input_to_use, _weights, _biases, _output); - weights_info->set_data_type(orgin_weights_data_type); - return std::unique_ptr(fc); - } - else - { - auto fc = new arm_compute::NEFullyConnectedLayer{_memory_manager}; - fc->configure(input_to_use, _weights, _biases, _output); - return std::unique_ptr(fc); - } - } - }(); - - // NOTE _neon_buffer is inaccessible from outside, and thus it is safe to invoke allocate here. - if (_needs_reshape) - { - _neon_buffer.allocator()->allocate(); - } -} - -void NEFullyConnectedReshapingLayer::run(void) -{ - if (_needs_reshape) - _neon_reshape.run(); - - _neon_fc->run(); -} - -void NEFullyConnectedReshapingLayer::prepare(void) { _neon_fc->prepare(); } diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp deleted file mode 100644 index 433c35d58..000000000 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/NEON/functions/NEGatherEx.h" - -#include "arm_compute/core/NEON/kernels/NEGatherKernelEx.h" -#include "support/MemorySupport.h" - -#include - -namespace arm_compute -{ -void NEGatherEx::configure(const ITensor *input, const ITensor *indices, ITensor *output, int axis) -{ - auto k = support::cpp14::make_unique(); - k->configure(input, indices, output, axis); - _kernel = std::move(k); -} - -Status NEGatherEx::validate(const ITensorInfo *input, const ITensorInfo *indices, - const ITensorInfo *output, int axis) -{ - return NEGatherKernelEx::validate(input, indices, output, axis); -} - -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEHashtableLookup.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEHashtableLookup.cpp deleted file mode 100644 index 52d58accf..000000000 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEHashtableLookup.cpp +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/NEON/functions/NEHashtableLookup.h" - -#include "arm_compute/core/NEON/kernels/NEHashtableLookupKernel.h" -#include "support/MemorySupport.h" - -using namespace arm_compute; - -void NEHashtableLookup::configure(const ITensor *lookups, const ITensor *keys, const ITensor *input, - ITensor *output, ITensor *hits) -{ - auto k = support::cpp14::make_unique(); - k->configure(lookups, keys, input, output, hits); - _kernel = std::move(k); -} - -Status NEHashtableLookup::validate(const ITensorInfo *lookups, const ITensorInfo *keys, - const ITensorInfo *input, const ITensorInfo *output, - const ITensorInfo *hits) -{ - return NEHashtableLookupKernel::validate(lookups, keys, input, output, hits); -} diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEInstanceNormalizationLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEInstanceNormalizationLayerEx.cpp deleted file mode 100644 index 16d74e62d..000000000 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEInstanceNormalizationLayerEx.cpp +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayerEx.h" - -#include "arm_compute/core/Helpers.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" - -namespace arm_compute -{ -NEInstanceNormalizationLayerEx::NEInstanceNormalizationLayerEx( - std::shared_ptr memory_manager) - : _memory_group(std::move(memory_manager)), _normalization_kernel(), _is_nchw(false), - _permute_input(), _permute_output(), _permuted_input(), _permuted_output() -{ -} - -void NEInstanceNormalizationLayerEx::configure(ITensor *input, ITensor *output, ITensor *gamma, - ITensor *beta, float epsilon) -{ - const DataLayout data_layout = input->info()->data_layout(); - - // Configure Kernels - _is_nchw = data_layout == DataLayout::NCHW; - - if (!_is_nchw) - { - _memory_group.manage(&_permuted_input); - _memory_group.manage(&_permuted_output); - - // Configure the function to transform the input tensor from NHWC -> NCHW - _permute_input.configure(input, &_permuted_input, PermutationVector(1U, 2U, 0U)); - _permuted_input.info()->set_data_layout(DataLayout::NCHW); - - _normalization_kernel.configure(&_permuted_input, &_permuted_output, gamma, beta, epsilon); - _permuted_output.info()->set_data_layout(DataLayout::NCHW); - - _permute_output.configure(&_permuted_output, output != nullptr ? output : input, - PermutationVector(2U, 0U, 1U)); - _permuted_input.allocator()->allocate(); - _permuted_output.allocator()->allocate(); - } - else - { - _normalization_kernel.configure(input, output, gamma, beta, epsilon); - } -} - -Status NEInstanceNormalizationLayerEx::validate(const ITensorInfo *input, const ITensorInfo *output, - const ITensorInfo *gamma, const ITensorInfo *beta, - float epsilon) -{ - return NEInstanceNormalizationLayerKernelEx::validate( - &input->clone()->set_data_layout(DataLayout::NCHW), - &output->clone()->set_data_layout(DataLayout::NCHW), gamma, beta, epsilon); -} - -void NEInstanceNormalizationLayerEx::run() -{ - MemoryGroupResourceScope scope_mg(_memory_group); - - // Permute input - if (!_is_nchw) - { - _permute_input.run(); - } - - NEScheduler::get().schedule(&_normalization_kernel, Window::DimZ); - - // Permute output - if (!_is_nchw) - { - _permute_output.run(); - } -} -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEOneHot.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEOneHot.cpp deleted file mode 100644 index 275c55024..000000000 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEOneHot.cpp +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NEOneHot.h" -#include "arm_compute/core/NEON/kernels/NEOneHotKernel.h" -#include "support/MemorySupport.h" -#include -namespace arm_compute -{ -void NEOneHot::configure(const ITensor *indices, const ITensor *depth, const ITensor *on_value, - const ITensor *off_value, ITensor *output, int axis) -{ - auto k = arm_compute::support::cpp14::make_unique(); - k->configure(indices, depth, on_value, off_value, output, axis); - _kernel = std::move(k); -} -Status NEOneHot::validate(const ITensorInfo *indices, const ITensorInfo *depth, - const ITensorInfo *on_value, const ITensorInfo *off_value, - const ITensorInfo *output, int axis) -{ - return NEOneHotKernel::validate(indices, depth, on_value, off_value, output, axis); -} -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp deleted file mode 100644 index aedb537e9..000000000 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp +++ /dev/null @@ -1,182 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/NEON/functions/NEReduceOperation.h" - -#include "arm_compute/core/CPP/Validate.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/runtime/Tensor.h" - -using namespace arm_compute; - -NEReduceOperation::NEReduceOperation(std::shared_ptr memory_manager) - : _memory_group(std::move(memory_manager)), _reduction_kernels(), _reduced_outs(), _reshape(), - _reduction_ops(), _keep_dims() -{ -} - -Status NEReduceOperation::validate(const ITensorInfo *input, const Coordinates &reduction_axis, - bool keep_dims, const ITensorInfo *output, ReduceOperation op) -{ - ARM_COMPUTE_UNUSED(keep_dims); - ARM_COMPUTE_UNUSED(op); - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input); - ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, - DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON(reduction_axis.num_dimensions() > input->num_dimensions()); - - TensorShape out_shape = input->tensor_shape(); - const unsigned int reduction_ops = reduction_axis.num_dimensions(); - const int input_dims = input->num_dimensions(); - Coordinates axis_local = reduction_axis; - - // Convert negative axis - for (unsigned int i = 0; i < reduction_ops; ++i) - { - axis_local[i] = wrap_around(axis_local[i], input_dims); - } - - std::sort(axis_local.begin(), axis_local.begin() + reduction_ops); - for (unsigned int i = 0; i < reduction_ops; ++i) - { - ARM_COMPUTE_RETURN_ERROR_ON(axis_local[i] > 3); - ARM_COMPUTE_RETURN_ERROR_ON(static_cast(axis_local[i]) > - input->num_dimensions() - 1); - if (output->total_size() > 0 && keep_dims) - { - ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(axis_local[i]) != 1); - } - if (keep_dims) - { - out_shape.set(axis_local[i], 1); - } - else - { - out_shape.remove_dimension(axis_local[i] - i); - } - } - const TensorInfo out_info = input->clone()->set_tensor_shape(out_shape); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &out_info); - - return Status{}; -} - -void NEReduceOperation::configure(ITensor *input, const Coordinates &reduction_axis, bool keep_dims, - ITensor *output, ReduceOperation op) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input); - - _reduction_ops = reduction_axis.num_dimensions(); - _reduction_kernels.resize(_reduction_ops); - _reduced_outs.resize(_reduction_ops - (keep_dims ? 1 : 0)); - _keep_dims = keep_dims; - - Coordinates axis_local = reduction_axis; - const int input_dims = input->info()->num_dimensions(); - const unsigned int reduction_ops = reduction_axis.num_dimensions(); - - // Convert negative axis - for (unsigned int i = 0; i < reduction_ops; ++i) - { - axis_local[i] = wrap_around(axis_local[i], input_dims); - } - - // Perform reduction for every axis - for (unsigned int i = 0; i < _reduction_ops; ++i) - { - TensorShape out_shape = - i == 0 ? input->info()->tensor_shape() : (&_reduced_outs[i - 1])->info()->tensor_shape(); - out_shape.set(axis_local[i], 1); - auto in = (i == 0) ? input : (&_reduced_outs[i - 1]); - - if (i == _reduction_ops - 1 && keep_dims) - { - _reduction_kernels[i].configure(in, output, axis_local[i], op); - } - else - { - _reduced_outs[i].allocator()->init(TensorInfo(out_shape, input->info()->num_channels(), - input->info()->data_type(), - input->info()->quantization_info())); - _memory_group.manage(&_reduced_outs[i]); - _reduction_kernels[i].configure(in, &_reduced_outs[i], axis_local[i], op); - } - } - - // Allocate intermediate tensors - for (unsigned int i = 0; i < _reduction_ops - (keep_dims ? 1 : 0); ++i) - { - _reduced_outs[i].allocator()->allocate(); - } - - // Configure reshape layer if we want to drop the dimensions - if (!keep_dims) - { - TensorShape out_shape = input->info()->tensor_shape(); - - // We have to sort the reduction axis vectors in order for remove_dimension - // to work properly - std::sort(axis_local.begin(), axis_local.begin() + _reduction_ops); - for (unsigned int i = 0; i < _reduction_ops; ++i) - { - out_shape.remove_dimension(axis_local[i] - i); - } - auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(out_shape)); - _reshape.configure(&_reduced_outs[_reduction_ops - 1], output); - } -} - -void NEReduceOperation::run() -{ - MemoryGroupResourceScope scope_mg(_memory_group); - - for (unsigned int i = 0; i < _reduction_ops; ++i) - { - _reduction_kernels[i].run(); - } - - if (!_keep_dims) - { - _reshape.run(); - } -} diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceSum.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceSum.cpp deleted file mode 100644 index 26a887912..000000000 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceSum.cpp +++ /dev/null @@ -1,181 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/NEON/functions/NEReduceSum.h" - -#include "arm_compute/core/CPP/Validate.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" - -using namespace arm_compute; - -NEReduceSum::NEReduceSum(std::shared_ptr memory_manager) - : _memory_group(std::move(memory_manager)), _reduction_kernels(), _reduced_outs(), _reshape(), - _reduction_ops(), _keep_dims() -{ -} - -Status NEReduceSum::validate(const ITensorInfo *input, const Coordinates &reduction_axis, - bool keep_dims, const ITensorInfo *output) -{ - ARM_COMPUTE_UNUSED(keep_dims); - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input); - ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, - DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON(reduction_axis.num_dimensions() > input->num_dimensions()); - - TensorShape out_shape = input->tensor_shape(); - const unsigned int reduction_ops = reduction_axis.num_dimensions(); - const int input_dims = input->num_dimensions(); - Coordinates axis_local = reduction_axis; - - // Convert negative axis - for (unsigned int i = 0; i < reduction_ops; ++i) - { - axis_local[i] = wrap_around(axis_local[i], input_dims); - } - - std::sort(axis_local.begin(), axis_local.begin() + reduction_ops); - for (unsigned int i = 0; i < reduction_ops; ++i) - { - ARM_COMPUTE_RETURN_ERROR_ON(axis_local[i] > 3); - ARM_COMPUTE_RETURN_ERROR_ON(static_cast(axis_local[i]) > - input->num_dimensions() - 1); - if (output->total_size() > 0 && keep_dims) - { - ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(axis_local[i]) != 1); - } - if (keep_dims) - { - out_shape.set(axis_local[i], 1); - } - else - { - out_shape.remove_dimension(axis_local[i] - i); - } - } - const TensorInfo out_info = input->clone()->set_tensor_shape(out_shape); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &out_info); - - return Status{}; -} - -void NEReduceSum::configure(ITensor *input, const Coordinates &reduction_axis, bool keep_dims, - ITensor *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input); - - _reduction_ops = reduction_axis.num_dimensions(); - _reduction_kernels.resize(_reduction_ops); - _reduced_outs.resize(_reduction_ops - (keep_dims ? 1 : 0)); - _keep_dims = keep_dims; - - Coordinates axis_local = reduction_axis; - const int input_dims = input->info()->num_dimensions(); - const unsigned int reduction_ops = reduction_axis.num_dimensions(); - - // Convert negative axis - for (unsigned int i = 0; i < reduction_ops; ++i) - { - axis_local[i] = wrap_around(axis_local[i], input_dims); - } - - // Perform reduction for every axis - for (unsigned int i = 0; i < _reduction_ops; ++i) - { - TensorShape out_shape = - i == 0 ? input->info()->tensor_shape() : (&_reduced_outs[i - 1])->info()->tensor_shape(); - out_shape.set(axis_local[i], 1); - auto in = (i == 0) ? input : (&_reduced_outs[i - 1]); - - if (i == _reduction_ops - 1 && keep_dims) - { - _reduction_kernels[i].configure(in, output, axis_local[i], ReductionOperation::SUM); - } - else - { - _reduced_outs[i].allocator()->init(TensorInfo(out_shape, input->info()->num_channels(), - input->info()->data_type(), - input->info()->quantization_info()) - .set_data_layout(input->info()->data_layout())); - _memory_group.manage(&_reduced_outs[i]); - _reduction_kernels[i].configure(in, &_reduced_outs[i], axis_local[i], - ReductionOperation::SUM); - } - } - - // Allocate intermediate tensors - for (unsigned int i = 0; i < _reduction_ops - (keep_dims ? 1 : 0); ++i) - { - _reduced_outs[i].allocator()->allocate(); - } - - // Configure reshape layer if we want to drop the dimensions - if (!keep_dims) - { - TensorShape out_shape = input->info()->tensor_shape(); - - // We have to sort the reduction axis vectors in order for remove_dimension - // to work properly - std::sort(axis_local.begin(), axis_local.begin() + _reduction_ops); - for (unsigned int i = 0; i < _reduction_ops; ++i) - { - out_shape.remove_dimension(axis_local[i] - i); - } - auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(out_shape)); - _reshape.configure(&_reduced_outs[_reduction_ops - 1], output); - } -} - -void NEReduceSum::run() -{ - MemoryGroupResourceScope scope_mg(_memory_group); - - for (unsigned int i = 0; i < _reduction_ops; ++i) - { - _reduction_kernels[i].run(); - } - - if (!_keep_dims) - { - _reshape.run(); - } -} diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReductionOperationEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReductionOperationEx.cpp deleted file mode 100644 index 2aa0d2d4b..000000000 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReductionOperationEx.cpp +++ /dev/null @@ -1,173 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/NEON/functions/NEReductionOperationEx.h" - -#include "arm_compute/core/Helpers.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" - -namespace arm_compute -{ -namespace -{ -/** Define dimension to split the window - * - * @param[in] axis Reduction axis - * - * @return The dimension to split the window - */ -size_t reduction_window_split_dimension(unsigned int axis) -{ - switch (axis) - { - case 0: - return Window::DimY; - case 1: - case 2: - case 3: - return Window::DimX; - default: - ARM_COMPUTE_ERROR("Unsupported reduction axis"); - } -} -} // namespace - -NEReductionOperationEx::NEReductionOperationEx() - : _reduction_kernel(), _fill_border_kernel(), _window_split(0), _reduction_axis() -{ -} - -Status NEReductionOperationEx::validate(const ITensorInfo *input, const ITensorInfo *output, - unsigned int axis, ReduceOperation op) -{ - ARM_COMPUTE_RETURN_ON_ERROR(NEReductionOperationKernelEx::validate(input, output, axis, op)); - - return Status{}; -} - -void NEReductionOperationEx::configure(ITensor *input, ITensor *output, unsigned int axis, - ReduceOperation op) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON( - NEReductionOperationEx::validate(input->info(), output->info(), axis, op)); - - // Configure reduction kernel - _reduction_kernel.configure(input, output, axis, op); - _window_split = reduction_window_split_dimension(axis); - _reduction_axis = axis; - - if (axis == 0) - { - // Configure fill border kernel - const BorderSize fill_border_size = _reduction_kernel.border_size(); - PixelValue pixelValue; - switch (op) - { - case ReduceOperation::MIN: - { - switch (input->info()->data_type()) - { - case DataType::F32: - { - pixelValue = PixelValue(std::numeric_limits::max()); - break; - } - case DataType::F16: - { - pixelValue = PixelValue(static_cast(65504.0f)); - break; - } - case DataType::QASYMM8: - { - pixelValue = - PixelValue(255, input->info()->data_type(), input->info()->quantization_info()); - break; - } - default: - { - ARM_COMPUTE_ERROR("Unsupported DataType"); - } - } - break; - } - case ReduceOperation::MAX: - { - switch (input->info()->data_type()) - { - case DataType::F32: - { - pixelValue = PixelValue(-std::numeric_limits::max()); - break; - } - case DataType::F16: - { - pixelValue = PixelValue(static_cast(-65504.0f)); - break; - } - case DataType::QASYMM8: - { - pixelValue = - PixelValue(0, input->info()->data_type(), input->info()->quantization_info()); - break; - } - default: - { - ARM_COMPUTE_ERROR("Unsupported DataType"); - } - } - break; - } - default: - ARM_COMPUTE_ERROR("Reduction Operation unsupported"); - } - _fill_border_kernel.configure(input, fill_border_size, BorderMode::CONSTANT, pixelValue); - } -} - -void NEReductionOperationEx::run() -{ - if (_reduction_axis == 0) - { - NEScheduler::get().schedule(&_fill_border_kernel, Window::DimY); - } - NEScheduler::get().schedule(&_reduction_kernel, _window_split); -} -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp deleted file mode 100644 index aa165cc15..000000000 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp +++ /dev/null @@ -1,242 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NETransposeConvLayer.h" - -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/UtilsEx.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/misc/ShapeCalculatorEx.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" - -using namespace arm_compute::misc::shape_calculator; - -namespace arm_compute -{ - -NETransposeConvLayer::NETransposeConvLayer(std::shared_ptr memory_manager) // NOLINT - : _memory_group(std::move(memory_manager)), - _conv_f(), - _upsample_f(), - _flip_weights(), - _scaled_output(), - _weights_flipped(), - _flip_axis(), - _original_weights(nullptr), - _input(nullptr), - _info(), - _is_prepared(false) -{ -} - -Status NETransposeConvLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, - const ITensorInfo *bias, const ITensorInfo *output, - const PadStrideInfo &info, unsigned int invalid_right, - unsigned int invalid_bottom) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::F16, - DataType::QASYMM8, DataType::QASYMM8_SIGNED); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(weights, input); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(weights, input); - const unsigned int width_idx = - get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::WIDTH); - const unsigned int height_idx = - get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::HEIGHT); - ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(width_idx) != weights->dimension(height_idx)); - ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(width_idx) < 1); - - auto out_dims = transposeconv_output_dimensions( - input->dimension(width_idx), input->dimension(height_idx), weights->dimension(width_idx), - weights->dimension(height_idx), info, invalid_right, invalid_bottom); - - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); - if (bias != nullptr) - { - if (is_data_type_quantized_asymmetric(input->data_type())) - { - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bias, 1, DataType::S32); - } - else - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, bias); - } - } - - if (output->tensor_shape().total_size() > 0) - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - - const TensorShape output_shape = compute_transposeconv_output_shape(out_dims, *input, *weights); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimX) != output_shape.x(), - "Output's width is invalid."); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimY) != output_shape.y(), - "Output's height is invalid."); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimZ) != output_shape.z(), - "Output's depth is invalid."); - } - - unsigned int pad_left = 0; - unsigned int pad_right = 0; - unsigned int pad_top = 0; - unsigned int pad_bottom = 0; - const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape( - *input, *weights, info, out_dims, invalid_right, invalid_bottom, pad_left, pad_right, pad_top, - pad_bottom); - TensorInfo scale_out_info( - input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(scale_out_shape)); - const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL); - - const unsigned int batches_idx = - get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::BATCHES); - const unsigned int channel_idx = - get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::CHANNEL); - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(batches_idx) != - scale_out_info.dimension(batches_idx)); - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(channel_idx) != - scale_out_info.dimension(channel_idx)); - - ARM_COMPUTE_RETURN_ON_ERROR(NEConvolutionLayer::validate(&scale_out_info, weights, bias, output, - conv_info, WeightsInfo())); - - return Status{}; -} - -void NETransposeConvLayer::configure(ITensor *input, const ITensor *weights, const ITensor *bias, - ITensor *output, const PadStrideInfo &info, - unsigned int invalid_right, unsigned int invalid_bottom) -{ - // Perform validation step - ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); - ARM_COMPUTE_ERROR_THROW_ON(NETransposeConvLayer::validate( - input->info(), weights->info(), (bias == nullptr) ? nullptr : bias->info(), output->info(), - info, invalid_right, invalid_bottom)); - - const DataLayout data_layout = input->info()->data_layout(); - const unsigned int width_idx = - get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - const unsigned int height_idx = - get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); - auto out_dims = transposeconv_output_dimensions( - input->info()->dimension(width_idx), input->info()->dimension(height_idx), - weights->info()->dimension(width_idx), weights->info()->dimension(height_idx), info, - invalid_right, invalid_bottom); - - const TensorShape output_shape = - compute_transposeconv_output_shape(out_dims, *input->info(), *weights->info()); - - _input = input; - _original_weights = weights; - _info = info; - _is_prepared = false; - - unsigned int pad_left = 0; - unsigned int pad_right = 0; - unsigned int pad_top = 0; - unsigned int pad_bottom = 0; - const unsigned int stride_x = info.stride().first; - const unsigned int stride_y = info.stride().second; - - // Output auto initialization if not yet initialized - auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), - input->info()->quantization_info()); - - _flip_axis.allocator()->init(TensorInfo(TensorShape(2U), 1, DataType::U32)); - _memory_group.manage(&_scaled_output); - - _weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout)); - _flip_weights.configure(weights, &_weights_flipped, &_flip_axis); - - // setup the function to convolve the upscaled output - const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL); - - const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape( - *input->info(), *weights->info(), info, out_dims, invalid_right, invalid_bottom, pad_left, - pad_right, pad_top, pad_bottom); - - const PadStrideInfo upsample_info(stride_x, stride_y, pad_left, pad_right, pad_top, pad_bottom, - DimensionRoundingType::FLOOR); - - TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(), - input->info()->quantization_info()); - scale_out_info.set_data_layout(data_layout); - _scaled_output.allocator()->init(scale_out_info); - - _upsample_f.configure(input, &_scaled_output, upsample_info); - - _conv_f.configure(&_scaled_output, &_weights_flipped, bias, output, conv_info); - - // Setup flip axis data - _flip_axis.allocator()->allocate(); - auto axis_data = reinterpret_cast(_flip_axis.buffer()); - axis_data[0] = static_cast(width_idx); - axis_data[1] = static_cast(height_idx); - - _scaled_output.allocator()->allocate(); -} - -void NETransposeConvLayer::run() -{ - prepare(); - - MemoryGroupResourceScope scope_mg(_memory_group); - - _upsample_f.run(); - _conv_f.run(); -} - -void NETransposeConvLayer::prepare() -{ - if (!_is_prepared) - { - ARM_COMPUTE_ERROR_ON(!_original_weights->is_used()); - - // Run weights flipping and mark original weights tensor as unused - _weights_flipped.allocator()->allocate(); - _flip_weights.run(); - _original_weights->mark_as_unused(); - - // Prepare convolution - _conv_f.prepare(); - - _is_prepared = true; - } -} -} // namespace arm_compute -- cgit v1.2.3