summaryrefslogtreecommitdiff
path: root/compute/ARMComputeEx/src/runtime/NEON
diff options
context:
space:
mode:
authorChunseok Lee <chunseok.lee@samsung.com>2020-10-29 13:12:50 +0900
committerChunseok Lee <chunseok.lee@samsung.com>2020-10-29 13:12:50 +0900
commitd6b371e095d737922187a518b8faba1ef6f3a2b1 (patch)
tree9d90c09c887b5111389dbedf924f59206411cd5a /compute/ARMComputeEx/src/runtime/NEON
parentc55f8a6db48cda9d3a78048338b7f18c4cca62b8 (diff)
downloadnnfw-d6b371e095d737922187a518b8faba1ef6f3a2b1.tar.gz
nnfw-d6b371e095d737922187a518b8faba1ef6f3a2b1.tar.bz2
nnfw-d6b371e095d737922187a518b8faba1ef6f3a2b1.zip
Imported Upstream version 0.4upstream/0.4
Diffstat (limited to 'compute/ARMComputeEx/src/runtime/NEON')
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/NEFunctionsEx.cpp20
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEActivationLayerEx.cpp66
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp86
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NECastBool.cpp57
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp53
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp300
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp494
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedReshapingLayer.cpp96
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp63
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEHashtableLookup.cpp61
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEInstanceNormalizationLayerEx.cpp113
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEOneHot.cpp59
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp182
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceSum.cpp181
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEReductionOperationEx.cpp173
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp242
16 files changed, 0 insertions, 2246 deletions
diff --git a/compute/ARMComputeEx/src/runtime/NEON/NEFunctionsEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/NEFunctionsEx.cpp
deleted file mode 100644
index 80fbf359d..000000000
--- a/compute/ARMComputeEx/src/runtime/NEON/NEFunctionsEx.cpp
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "arm_compute/runtime/NEON/NEFunctionsEx.h"
-
-// NOTE This empty file aims to validate "NEFunctionsEx.h".
-// DO NOT REMOVE this file.
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEActivationLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEActivationLayerEx.cpp
deleted file mode 100644
index 2752eb6aa..000000000
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEActivationLayerEx.cpp
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/NEON/functions/NEActivationLayerEx.h"
-
-#include "arm_compute/core/NEON/kernels/NEActivationLayerKernelEx.h"
-#include "arm_compute/runtime/IRuntimeContext.h"
-#include "support/MemorySupport.h"
-
-namespace arm_compute
-{
-NEActivationLayerEx::NEActivationLayerEx(IRuntimeContext *ctx) // NOLINT
- : INESimpleFunctionNoBorder(ctx)
-{
-}
-void NEActivationLayerEx::configure(ITensor *input, ITensor *output,
- ActivationLayerInfo activation_info)
-{
- auto k = support::cpp14::make_unique<NEActivationLayerKernelEx>();
- k->configure(input, output, activation_info);
- _kernel = std::move(k);
-}
-
-Status NEActivationLayerEx::validate(const ITensorInfo *input, const ITensorInfo *output,
- const ActivationLayerInfo &act_info)
-{
- return NEActivationLayerKernelEx::validate(input, output, act_info);
-}
-} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp
deleted file mode 100644
index 2fc94b267..000000000
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h"
-#include <arm_compute/core/NEON/kernels/NEBinaryLogicalOperationKernel.h>
-
-#include "arm_compute/core/ITensor.h"
-#include "support/MemorySupport.h"
-
-#include <utility>
-
-namespace arm_compute
-{
-
-template <BinaryLogicalOperation COP>
-void NEBinaryLogicalOperationStatic<COP>::configure(ITensor *input1, ITensor *input2,
- ITensor *output)
-{
- auto k = support::cpp14::make_unique<NEBinaryLogicalOperationKernel>();
- k->configure(COP, input1, input2, output);
- _kernel = std::move(k);
-}
-
-template <BinaryLogicalOperation COP>
-Status NEBinaryLogicalOperationStatic<COP>::validate(const ITensorInfo *input1,
- const ITensorInfo *input2,
- const ITensorInfo *output)
-{
- return NEBinaryLogicalOperationKernel::validate(COP, input1, input2, output);
-}
-
-void NEBinaryLogicalOperation::configure(ITensor *input1, ITensor *input2, ITensor *output,
- BinaryLogicalOperation op)
-{
- auto k = support::cpp14::make_unique<NEBinaryLogicalOperationKernel>();
- k->configure(op, input1, input2, output);
- _kernel = std::move(k);
-}
-
-Status NEBinaryLogicalOperation::validate(const ITensorInfo *input1, const ITensorInfo *input2,
- const ITensorInfo *output, BinaryLogicalOperation op)
-{
- return NEBinaryLogicalOperationKernel::validate(op, input1, input2, output);
-}
-
-// Supported Specializations
-template class NEBinaryLogicalOperationStatic<BinaryLogicalOperation::AND>;
-template class NEBinaryLogicalOperationStatic<BinaryLogicalOperation::OR>;
-} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NECastBool.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NECastBool.cpp
deleted file mode 100644
index 6ad3e1b12..000000000
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NECastBool.cpp
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/NEON/functions/NECastBool.h"
-
-#include "arm_compute/core/NEON/kernels/NECastBoolKernel.h"
-#include "support/MemorySupport.h"
-
-using namespace arm_compute;
-
-void NECastBool::configure(const ITensor *input, ITensor *output)
-{
- auto k = arm_compute::support::cpp14::make_unique<NECastBoolKernel>();
- k->configure(input, output);
- _kernel = std::move(k);
-}
-
-Status NECastBool::validate(const ITensorInfo *input, const ITensorInfo *output)
-{
- return NECastBoolKernel::validate(input, output);
-}
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp
deleted file mode 100644
index e0ab3e025..000000000
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h"
-
-#include "arm_compute/core/NEON/kernels/NEEmbeddingLookupKernel.h"
-#include "support/MemorySupport.h"
-
-using namespace arm_compute;
-
-void NEEmbeddingLookup::configure(const ITensor *input, ITensor *output, const ITensor *lookups)
-{
- auto k = support::cpp14::make_unique<NEEmbeddingLookupKernel>();
- k->configure(input, output, lookups);
- _kernel = std::move(k);
-}
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp
deleted file mode 100644
index a123439d9..000000000
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp
+++ /dev/null
@@ -1,300 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h"
-
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Size2D.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
-#include "arm_compute/runtime/NEON/NEScheduler.h"
-
-#include <algorithm>
-#include <cmath>
-
-using namespace arm_compute;
-using namespace arm_compute::misc::shape_calculator;
-
-namespace
-{
-Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const ITensorInfo &output)
-{
- ARM_COMPUTE_RETURN_ON_ERROR(
- NEGEMMLowpMatrixMultiplyCore::validate(&input, &weights, nullptr, &output));
-
- return Status{};
-}
-} // namespace
-
-void NEFullyConnectedHybridLayerReshapeWeights::configure(const ITensor *input, ITensor *output)
-{
- auto k = support::cpp14::make_unique<NETransposeKernel>();
- k->configure(input, output);
- _kernel = std::move(k);
-}
-
-Status NEFullyConnectedHybridLayerReshapeWeights::validate(const ITensorInfo *input,
- const ITensorInfo *output)
-{
- return NETransposeKernel::validate(input, output);
-}
-
-NEFullyConnectedHybridLayer::NEFullyConnectedHybridLayer(
- std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _reshape_weights_function(), _quant_input_kernel(),
- _mm_gemmlowp(), _accumulate_biases_kernel(), _reshape_weights_output(), _quantized_input(),
- _scale_factor(), _original_weights(nullptr), _are_weights_reshaped(false),
- _accumulate_biases(false), _is_prepared(false)
-{
-}
-
-void NEFullyConnectedHybridLayer::configure_mm(const ITensor *input, const ITensor *weights,
- ITensor *output)
-{
- ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != weights->info()->dimension(1));
-
- // Configure gemmlowp function
- _mm_gemmlowp.configure(input, weights, nullptr, output);
-}
-
-void NEFullyConnectedHybridLayer::configure(const ITensor *input, const ITensor *weights,
- const ITensor *biases, ITensor *output,
- FullyConnectedLayerInfo fc_info)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
-
- // Perform validate step
- ARM_COMPUTE_ERROR_THROW_ON(NEFullyConnectedHybridLayer::validate(
- input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(),
- fc_info));
-
- _are_weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
- _accumulate_biases = false;
- _original_weights = weights;
-
- // Configure accumulate biases kernel for non quantized asymmetric types
- if (biases != nullptr)
- {
- _accumulate_biases = true;
-
- // Configure accumulate biases kernel
- _accumulate_biases_kernel.configure(output, biases);
- }
-
- // With the Fully Connected layer we can have 4 different cases:
- // 1) Convolution layer -> Fully Connected layer without batches
- // 2) Fully Connected layer -> Fully Connected layer without batches
- // 3) Convolution layer -> Fully Connected layer with batches
- // 4) Fully Connected layer -> Fully Connected layer with batches
-
- const ITensor *weights_to_use = weights;
-
- // Check if we have a fully connected layer with batches
- const bool is_batched_fc_layer = output->info()->dimension(1) > 1;
- bool _is_fc_after_conv;
- if (is_batched_fc_layer)
- {
- _is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) &&
- (std::equal(input->info()->tensor_shape().cbegin() + 3,
- input->info()->tensor_shape().cend(),
- output->info()->tensor_shape().cbegin() + 1));
- }
- else
- {
- _is_fc_after_conv = input->info()->num_dimensions() > 1 && input->info()->dimension(1) > 1;
- }
- ARM_COMPUTE_ERROR_ON_MSG(_is_fc_after_conv,
- "NEFullyConnectedHybridLayer does not support after conv");
- (void)_is_fc_after_conv;
-
- // Reshape weights if needed
- if (!_are_weights_reshaped)
- {
- // Reshape the weights
- _reshape_weights_output.allocator()->init(
- weights->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
- compute_transposed_shape(*weights->info())));
- _reshape_weights_function.configure(weights_to_use, &_reshape_weights_output);
- weights_to_use = &_reshape_weights_output;
- }
-
- // Quantize input
- _quantized_input.allocator()->init(
- input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(
- DataType::QASYMM8_SIGNED));
- _scale_factor.allocator()->init(
- TensorInfo(TensorShape{output->info()->dimension(1)}, 1, DataType::F32));
- _quant_input_kernel.configure(input, &_quantized_input, &_scale_factor);
-
- // GEMM
- _gemmlowp_output.allocator()->init(
- output->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
- configure_mm(&_quantized_input, weights_to_use, &_gemmlowp_output);
-
- // Multiply scale
- _multiply_scale_kernel.configure(&_gemmlowp_output, &_scale_factor, output,
- weights->info()->quantization_info().uniform().scale);
-
- _are_weights_reshaped = _are_weights_reshaped || fc_info.retain_internal_weights;
-
- _quantized_input.allocator()->allocate();
- _scale_factor.allocator()->allocate();
- _gemmlowp_output.allocator()->allocate();
-}
-
-Status NEFullyConnectedHybridLayer::validate(const ITensorInfo *input, const ITensorInfo *weights,
- const ITensorInfo *biases, const ITensorInfo *output,
- FullyConnectedLayerInfo fc_info)
-{
- ARM_COMPUTE_UNUSED(fc_info.retain_internal_weights);
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QASYMM8_SIGNED);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 2);
- ARM_COMPUTE_RETURN_ERROR_ON(output->num_dimensions() > 2);
-
- bool weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
-
- const ITensorInfo &reshaped_weights =
- TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
- compute_transposed_shape(*weights)));
-
- // Configure accumulate biases kernel for non quantized asymmetric types
- if (biases != nullptr)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases);
- ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMMatrixAccumulateBiasesKernel::validate(output, biases));
- }
-
- // With the Fully Connected layer we can have 4 different cases:
- // 1) Convolution layer -> Fully Connected layer without batches
- // 2) Fully Connected layer -> Fully Connected layer without batches
- // 3) Convolution layer -> Fully Connected layer with batches
- // 4) Fully Connected layer -> Fully Connected layer with batches
-
- const ITensorInfo *weights_to_use = weights;
-
- if (!weights_reshaped)
- {
- // Validate reshape weights kernel
- ARM_COMPUTE_RETURN_ON_ERROR(
- NEFullyConnectedHybridLayerReshapeWeights::validate(weights_to_use, &reshaped_weights));
- weights_to_use = &reshaped_weights;
- }
-
- // Fully Connected layer after a Fully Connected Layer without batches
- ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) != weights_to_use->dimension(1));
-
- // Validate quantization kernel
- const ITensorInfo &quantized_input =
- TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_data_type(
- DataType::QASYMM8_SIGNED));
- const ITensorInfo &scale_factor = TensorInfo(TensorShape{output->dimension(1)}, 1, DataType::F32);
- ARM_COMPUTE_RETURN_ON_ERROR(
- NEQuantizationSymmetricKernel::validate(input, &quantized_input, &scale_factor));
-
- const ITensorInfo &gemmlowp_output = TensorInfo(
- output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
- // Validate matrix multiply kernel
- ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(quantized_input, *weights_to_use, gemmlowp_output));
-
- ARM_COMPUTE_RETURN_ON_ERROR(NEMultiplyScaleFactorKernel::validate(
- &gemmlowp_output, &scale_factor, output, weights->quantization_info().uniform().scale));
-
- return Status{};
-}
-
-void NEFullyConnectedHybridLayer::run()
-{
- prepare();
-
- MemoryGroupResourceScope scope_mg(_memory_group);
-
- // Quantize input
- NEScheduler::get().schedule(&_quant_input_kernel, Window::DimY);
-
- // Run matrix multiply
- _mm_gemmlowp.run();
-
- // Multiply scale factor
- NEScheduler::get().schedule(&_multiply_scale_kernel, Window::DimY);
-
- // Accumulate biases if provided
- if (_accumulate_biases)
- {
- NEScheduler::get().schedule(&_accumulate_biases_kernel, Window::DimY);
- }
-}
-
-void NEFullyConnectedHybridLayer::prepare()
-{
- if (!_is_prepared)
- {
- ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
-
- auto release_unused = [](Tensor *w) {
- if (!w->is_used())
- {
- w->allocator()->free();
- }
- };
-
- // Reshape of the weights (happens only once)
- if (!_are_weights_reshaped)
- {
- // Run reshape weights kernel and mark weights as unused
- _reshape_weights_output.allocator()->allocate();
- _reshape_weights_function.run();
-
- _are_weights_reshaped = true;
- // We can not release _original_weights because it can be used in other nodes
- }
-
- // Prepare GEMM prepare and release unused weights
- _mm_gemmlowp.prepare();
-
- // Release reshaped weights if unused
- release_unused(&_reshape_weights_output);
-
- _is_prepared = true;
- }
-}
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp
deleted file mode 100644
index cb7557a5a..000000000
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp
+++ /dev/null
@@ -1,494 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayerEx.h"
-
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Size2D.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
-#include "arm_compute/runtime/NEON/NEScheduler.h"
-
-#include <algorithm>
-#include <cmath>
-
-using namespace arm_compute;
-using namespace arm_compute::misc::shape_calculator;
-
-namespace
-{
-Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const ITensorInfo &output)
-{
- if (is_data_type_quantized_asymmetric(input.data_type()))
- {
- // Since we need negative offsets for computing convolution, we need to change
- // QuantizationInfo()
- // Extract and negate input and weights offset
- const QuantizationInfo input_quantization_info(input.quantization_info().uniform().scale,
- -input.quantization_info().uniform().offset);
- const QuantizationInfo weights_quantization_info(weights.quantization_info().uniform().scale,
- -weights.quantization_info().uniform().offset);
-
- // Validate gemmlowp function
- ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixMultiplyCore::validate(
- &input.clone()->set_quantization_info(input_quantization_info),
- &weights.clone()->set_quantization_info(weights_quantization_info), nullptr, &output));
- }
- else
- {
- ARM_COMPUTE_RETURN_ON_ERROR(NEGEMM::validate(
- &input, &weights, nullptr, &output, 1.f, 0.0f,
- GEMMInfo(false, false, false /* Reshape weights only for the first run */)));
- }
-
- return Status{};
-}
-} // namespace
-
-NEFullyConnectedLayerEx::NEFullyConnectedLayerEx(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _flatten_kernel(), _convert_weights(),
- _reshape_weights_function(), _mm_gemm(), _mm_gemmlowp(), _gemmlowp_output_stage(),
- _accumulate_biases_kernel(), _flatten_output(), _gemmlowp_output(),
- _converted_weights_output(), _reshape_weights_output(), _original_weights(nullptr),
- _are_weights_converted(true), _are_weights_reshaped(false), _is_fc_after_conv(false),
- _accumulate_biases(false), _is_quantized(false), _is_prepared(false)
-{
-}
-
-void NEFullyConnectedLayerEx::configure_mm(const ITensor *input, const ITensor *weights,
- ITensor *output)
-{
- if (_is_quantized)
- {
- // Since we need negative offsets for computing convolution, we need to change
- // QuantizationInfo()
- // Extract and negate input and weights offset
- const QuantizationInfo input_quantization_info = input->info()->quantization_info();
- const QuantizationInfo weights_quantization_info = weights->info()->quantization_info();
-
- input->info()->set_quantization_info(QuantizationInfo(
- input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
- weights->info()->set_quantization_info(QuantizationInfo(
- weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
-
- // Configure gemmlowp function
- _mm_gemmlowp.configure(input, weights, nullptr, output);
-
- // Revert back QuantizatioInfo as input and weights could be used in other fully connected
- // layers
- input->info()->set_quantization_info(input_quantization_info);
- weights->info()->set_quantization_info(weights_quantization_info);
- }
- else
- {
- // Configure matrix multiply kernel
- _mm_gemm.configure(input, weights, nullptr, output, 1.f, 0.0f,
- GEMMInfo(false, false, false /* Reshape weights only for the first run */));
- }
-}
-
-void NEFullyConnectedLayerEx::configure_conv_fc(const ITensor *input, const ITensor *weights,
- ITensor *output)
-{
- ARM_COMPUTE_ERROR_ON(
- (weights->info()->dimension(1) !=
- (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2))));
-
- // If the fully connected layer is called after a convolution layer, the input tensor must be
- // linearized
-
- // Initialize output tensor for flatten
- TensorShape shape_flatten = compute_flatten_shape(input->info());
- _flatten_output.allocator()->init(
- input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
- shape_flatten));
-
- // Configure flatten kernel
- _memory_group.manage(&_flatten_output);
- _flatten_kernel.configure(input, &_flatten_output);
-
- // Configure matrix multiply kernel
- configure_mm(&_flatten_output, weights, output);
-
- // Allocate the output tensor for flatten once all the configure methods have been called
- _flatten_output.allocator()->allocate();
-}
-
-void NEFullyConnectedLayerEx::configure_fc_fc(const ITensor *input, const ITensor *weights,
- ITensor *output)
-{
- ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != weights->info()->dimension(1));
-
- // Configure matrix multiply kernel
- configure_mm(input, weights, output);
-}
-
-void NEFullyConnectedLayerEx::configure(const ITensor *input, const ITensor *weights,
- const ITensor *biases, ITensor *output,
- FullyConnectedLayerInfo fc_info)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
-
- // Perform validate step
- ARM_COMPUTE_ERROR_THROW_ON(NEFullyConnectedLayerEx::validate(
- input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(),
- fc_info));
-
- _are_weights_converted = true;
- _are_weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
- _is_fc_after_conv = true;
- _accumulate_biases = false;
- _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
- _original_weights = weights;
-
- // Configure gemmlowp output
- if (_is_quantized)
- {
- _gemmlowp_output.allocator()->init(
- output->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(
- DataType::S32));
- }
-
- // Configure accumulate biases kernel for non quantized asymmetric types
- if (biases != nullptr && !_is_quantized)
- {
- _accumulate_biases = true;
-
- // Configure accumulate biases kernel
- _accumulate_biases_kernel.configure(output, biases);
- }
-
- // With the Fully Connected layer we can have 4 different cases:
- // 1) Convolution layer -> Fully Connected layer without batches
- // 2) Fully Connected layer -> Fully Connected layer without batches
- // 3) Convolution layer -> Fully Connected layer with batches
- // 4) Fully Connected layer -> Fully Connected layer with batches
-
- const ITensor *weights_to_use = weights;
-
- // Check if we have a fully connected layer with batches
- const bool is_batched_fc_layer = output->info()->dimension(1) > 1;
- if (is_batched_fc_layer)
- {
- _is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) &&
- (std::equal(input->info()->tensor_shape().cbegin() + 3,
- input->info()->tensor_shape().cend(),
- output->info()->tensor_shape().cbegin() + 1));
- }
- else
- {
- _is_fc_after_conv = input->info()->num_dimensions() > 1;
- }
-
- // Reshape weights if needed
- if (!_are_weights_reshaped)
- {
- // Reshape the weights
- _reshape_weights_function.configure(weights, &_reshape_weights_output);
- weights_to_use = &_reshape_weights_output;
- }
-
- // Convert weights if needed
- if (_is_fc_after_conv && (input->info()->data_layout() != fc_info.weights_trained_layout))
- {
- // Convert weights
- _convert_weights.configure(weights_to_use, &_converted_weights_output,
- input->info()->tensor_shape(), fc_info.weights_trained_layout);
-
- weights_to_use = &_converted_weights_output;
- _are_weights_converted = false;
- }
-
- ITensor *tmp_output = (_is_quantized) ? &_gemmlowp_output : output;
- if (_is_fc_after_conv)
- {
- // Fully Connected layer after a Convolution Layer without batches
- configure_conv_fc(input, weights_to_use, tmp_output);
- }
- else
- {
- // Fully Connected layer after a Fully Connected Layer without batches
- configure_fc_fc(input, weights_to_use, tmp_output);
- }
-
- // Configure output stage for asymmetric quantized types
- if (_is_quantized)
- {
- float multiplier = input->info()->quantization_info().uniform().scale *
- weights->info()->quantization_info().uniform().scale /
- output->info()->quantization_info().uniform().scale;
- int output_multiplier;
- int output_shift;
- quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier,
- &output_shift);
- _gemmlowp_output_stage.configure(&_gemmlowp_output, biases, output, output_multiplier,
- output_shift,
- output->info()->quantization_info().uniform().offset);
- _gemmlowp_output.allocator()->allocate();
- }
-
- _are_weights_reshaped = _are_weights_reshaped || fc_info.retain_internal_weights;
-}
-
-Status NEFullyConnectedLayerEx::validate(const ITensorInfo *input, const ITensorInfo *weights,
- const ITensorInfo *biases, const ITensorInfo *output,
- FullyConnectedLayerInfo fc_info)
-{
- ARM_COMPUTE_UNUSED(fc_info.retain_internal_weights);
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16,
- DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output);
- ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 2);
-
- bool weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
- bool is_fc_after_conv = true;
- bool is_quantized = is_data_type_quantized_asymmetric(input->data_type());
-
- const ITensorInfo &flatten_input =
- TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
- compute_flatten_shape(input)));
- const ITensorInfo &reshaped_weights =
- TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
- compute_transposed_shape(*weights)));
- const ITensorInfo &converted_weights =
- weights_reshaped ? TensorInfo(weights->clone()->set_is_resizable(true).reset_padding())
- : TensorInfo(*reshaped_weights.clone());
- const ITensorInfo &gemmlowp_output = TensorInfo(
- output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
-
- // Configure accumulate biases kernel for non quantized asymmetric types
- if (biases != nullptr && !is_quantized)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases);
- ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMMatrixAccumulateBiasesKernel::validate(output, biases));
- }
-
- // With the Fully Connected layer we can have 4 different cases:
- // 1) Convolution layer -> Fully Connected layer without batches
- // 2) Fully Connected layer -> Fully Connected layer without batches
- // 3) Convolution layer -> Fully Connected layer with batches
- // 4) Fully Connected layer -> Fully Connected layer with batches
-
- const ITensorInfo *input_to_use = input;
- const ITensorInfo *weights_to_use = weights;
- const ITensorInfo *tmp_output = (is_quantized) ? &gemmlowp_output : output;
-
- // Check if we have a fully connected layer with batches
- const bool is_batched_fc_layer = output->dimension(1) > 1;
-
- if (is_batched_fc_layer)
- {
- is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) &&
- (std::equal(input->tensor_shape().cbegin() + 3, input->tensor_shape().cend(),
- output->tensor_shape().cbegin() + 1));
- }
- else
- {
- is_fc_after_conv = input->num_dimensions() > 1;
- }
-
- if (!weights_reshaped)
- {
- // Validate reshape weights kernel
- ARM_COMPUTE_RETURN_ON_ERROR(
- NEFullyConnectedLayerReshapeWeights::validate(weights, &reshaped_weights));
- weights_to_use = &reshaped_weights;
- }
-
- if (is_fc_after_conv && (input->data_layout() != fc_info.weights_trained_layout))
- {
- // Validate convert weights kernel
- ARM_COMPUTE_RETURN_ON_ERROR(NEConvertFullyConnectedWeights::validate(
- weights_to_use, &converted_weights, input->tensor_shape(), fc_info.weights_trained_layout));
- weights_to_use = &converted_weights;
- }
-
- if (is_fc_after_conv)
- {
- // Fully Connected layer after a Convolution Layer without batches
- ARM_COMPUTE_RETURN_ERROR_ON(
- (weights_to_use->dimension(1) !=
- (input->dimension(0) * input->dimension(1) * input->dimension(2))));
-
- // Validate flatten kernel
- ARM_COMPUTE_RETURN_ON_ERROR(NEFlattenLayerKernel::validate(input, &flatten_input));
- input_to_use = &flatten_input;
- }
- else
- {
- // Fully Connected layer after a Fully Connected Layer without batches
- ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) != weights_to_use->dimension(1));
- }
- // Validate matrix multiply kernel
- ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(*input_to_use, *weights_to_use, *tmp_output));
-
- // Validate output stage for asymmetric quantized types
- if (is_quantized)
- {
- ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint::validate(
- &gemmlowp_output, biases, output));
- }
-
- return Status{};
-}
-
-void NEFullyConnectedLayerEx::run()
-{
- if (!_is_prepared)
- {
- if (!_are_weights_reshaped)
- _reshape_weights_output.allocator()->allocate();
- if (!_are_weights_converted)
- _converted_weights_output.allocator()->allocate();
- _is_prepared = true;
- }
-
- {
- ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
-
- // Reshape of the weights
- if (!_are_weights_reshaped)
- {
- _reshape_weights_function.run();
- }
-
- // Convert weights if needed
- if (!_are_weights_converted)
- {
- _convert_weights.run();
- }
-
- // Prepare GEMM prepare
- if (!_is_quantized)
- {
- _mm_gemm.prepare();
- }
- }
-
- MemoryGroupResourceScope scope_mg(_memory_group);
-
- // Linearize input if it comes from a convolutional layer
- if (_is_fc_after_conv)
- {
- NEScheduler::get().schedule(&_flatten_kernel, Window::DimY);
- }
-
- // Run matrix multiply
- if (_is_quantized)
- {
- _mm_gemmlowp.run();
- }
- else
- {
- _mm_gemm.run();
- }
-
- // Accumulate biases if provided
- if (_is_quantized)
- {
- _gemmlowp_output_stage.run();
- }
- else
- {
- if (_accumulate_biases)
- {
- NEScheduler::get().schedule(&_accumulate_biases_kernel, Window::DimY);
- }
- }
-}
-
-void NEFullyConnectedLayerEx::prepare()
-{
-#if 0 // TODO Remove this block
- if (!_is_prepared)
- {
- ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
-
- auto release_unused = [](Tensor *w) {
- if (!w->is_used())
- {
- w->allocator()->free();
- }
- };
-
- // Pointer to current weights
- const ITensor *cur_weights = _original_weights;
-
- // Reshape of the weights (happens only once)
- if (!_are_weights_reshaped)
- {
- // Run reshape weights kernel and mark weights as unused
- _reshape_weights_output.allocator()->allocate();
- _reshape_weights_function.run();
-
- cur_weights->mark_as_unused();
- cur_weights = &_reshape_weights_output;
- _are_weights_reshaped = true;
- }
-
- // Convert weights if needed (happens only once)
- if (!_are_weights_converted)
- {
- _converted_weights_output.allocator()->allocate();
- _convert_weights.run();
-
- cur_weights->mark_as_unused();
- _are_weights_converted = true;
- }
-
- // Release reshaped weights if unused
- release_unused(&_reshape_weights_output);
-
- // Prepare GEMM prepare and release unused weights
- if (!_is_quantized)
- {
- _mm_gemm.prepare();
- }
-
- // Release converted weights if unused
- release_unused(&_reshape_weights_output);
- release_unused(&_converted_weights_output);
-
- _is_prepared = true;
- }
-#endif
-}
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedReshapingLayer.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedReshapingLayer.cpp
deleted file mode 100644
index dc6c78478..000000000
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedReshapingLayer.cpp
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h"
-
-#include <arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h>
-#include <arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h>
-#include <arm_compute/runtime/NEON/functions/NEFullyConnectedLayerEx.h>
-
-using namespace arm_compute;
-
-void NEFullyConnectedReshapingLayer::configure(const arm_compute::ITensor *input,
- const arm_compute::ITensor *weights,
- const arm_compute::ITensor *biases,
- arm_compute::ITensor *output, bool needs_reshape,
- const arm_compute::TensorShape &reshape,
- KernelType kernel_type)
-{
- _input = input;
- _weights = weights;
- _biases = biases;
- _output = output;
- _needs_reshape = needs_reshape;
-
- const ITensor *input_to_use = input;
- if (_needs_reshape)
- {
- // reshape
- auto_init_if_empty(*_neon_buffer.info(), _input->info()->clone()->set_tensor_shape(reshape));
- _neon_reshape.configure(_input, &_neon_buffer);
- input_to_use = &_neon_buffer;
- }
-
- _neon_fc = [&]() {
- if (kernel_type == KernelType::GENERAL)
- {
- auto fc = new arm_compute::NEFullyConnectedLayerEx{_memory_manager};
- fc->configure(input_to_use, _weights, _biases, _output);
- return std::unique_ptr<arm_compute::IFunction>(fc);
- }
- else
- {
- assert(kernel_type == KernelType::PREPROCESSED_WEIGHTS);
-
- bool is_hybrid = input->info()->data_type() == DataType::F32 &&
- (weights->info()->data_type() == DataType::S8 ||
- weights->info()->data_type() == DataType::QASYMM8_SIGNED);
-
- if (is_hybrid)
- {
- auto fc = new arm_compute::NEFullyConnectedHybridLayer{_memory_manager};
- ITensorInfo *weights_info = const_cast<ITensorInfo *>(_weights->info());
- const auto orgin_weights_data_type = weights_info->data_type();
- weights_info->set_data_type(DataType::QASYMM8_SIGNED);
- fc->configure(input_to_use, _weights, _biases, _output);
- weights_info->set_data_type(orgin_weights_data_type);
- return std::unique_ptr<arm_compute::IFunction>(fc);
- }
- else
- {
- auto fc = new arm_compute::NEFullyConnectedLayer{_memory_manager};
- fc->configure(input_to_use, _weights, _biases, _output);
- return std::unique_ptr<arm_compute::IFunction>(fc);
- }
- }
- }();
-
- // NOTE _neon_buffer is inaccessible from outside, and thus it is safe to invoke allocate here.
- if (_needs_reshape)
- {
- _neon_buffer.allocator()->allocate();
- }
-}
-
-void NEFullyConnectedReshapingLayer::run(void)
-{
- if (_needs_reshape)
- _neon_reshape.run();
-
- _neon_fc->run();
-}
-
-void NEFullyConnectedReshapingLayer::prepare(void) { _neon_fc->prepare(); }
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp
deleted file mode 100644
index 433c35d58..000000000
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/NEON/functions/NEGatherEx.h"
-
-#include "arm_compute/core/NEON/kernels/NEGatherKernelEx.h"
-#include "support/MemorySupport.h"
-
-#include <utility>
-
-namespace arm_compute
-{
-void NEGatherEx::configure(const ITensor *input, const ITensor *indices, ITensor *output, int axis)
-{
- auto k = support::cpp14::make_unique<NEGatherKernelEx>();
- k->configure(input, indices, output, axis);
- _kernel = std::move(k);
-}
-
-Status NEGatherEx::validate(const ITensorInfo *input, const ITensorInfo *indices,
- const ITensorInfo *output, int axis)
-{
- return NEGatherKernelEx::validate(input, indices, output, axis);
-}
-
-} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEHashtableLookup.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEHashtableLookup.cpp
deleted file mode 100644
index 52d58accf..000000000
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEHashtableLookup.cpp
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/NEON/functions/NEHashtableLookup.h"
-
-#include "arm_compute/core/NEON/kernels/NEHashtableLookupKernel.h"
-#include "support/MemorySupport.h"
-
-using namespace arm_compute;
-
-void NEHashtableLookup::configure(const ITensor *lookups, const ITensor *keys, const ITensor *input,
- ITensor *output, ITensor *hits)
-{
- auto k = support::cpp14::make_unique<NEHashtableLookupKernel>();
- k->configure(lookups, keys, input, output, hits);
- _kernel = std::move(k);
-}
-
-Status NEHashtableLookup::validate(const ITensorInfo *lookups, const ITensorInfo *keys,
- const ITensorInfo *input, const ITensorInfo *output,
- const ITensorInfo *hits)
-{
- return NEHashtableLookupKernel::validate(lookups, keys, input, output, hits);
-}
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEInstanceNormalizationLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEInstanceNormalizationLayerEx.cpp
deleted file mode 100644
index 16d74e62d..000000000
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEInstanceNormalizationLayerEx.cpp
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayerEx.h"
-
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/runtime/NEON/NEScheduler.h"
-
-namespace arm_compute
-{
-NEInstanceNormalizationLayerEx::NEInstanceNormalizationLayerEx(
- std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _normalization_kernel(), _is_nchw(false),
- _permute_input(), _permute_output(), _permuted_input(), _permuted_output()
-{
-}
-
-void NEInstanceNormalizationLayerEx::configure(ITensor *input, ITensor *output, ITensor *gamma,
- ITensor *beta, float epsilon)
-{
- const DataLayout data_layout = input->info()->data_layout();
-
- // Configure Kernels
- _is_nchw = data_layout == DataLayout::NCHW;
-
- if (!_is_nchw)
- {
- _memory_group.manage(&_permuted_input);
- _memory_group.manage(&_permuted_output);
-
- // Configure the function to transform the input tensor from NHWC -> NCHW
- _permute_input.configure(input, &_permuted_input, PermutationVector(1U, 2U, 0U));
- _permuted_input.info()->set_data_layout(DataLayout::NCHW);
-
- _normalization_kernel.configure(&_permuted_input, &_permuted_output, gamma, beta, epsilon);
- _permuted_output.info()->set_data_layout(DataLayout::NCHW);
-
- _permute_output.configure(&_permuted_output, output != nullptr ? output : input,
- PermutationVector(2U, 0U, 1U));
- _permuted_input.allocator()->allocate();
- _permuted_output.allocator()->allocate();
- }
- else
- {
- _normalization_kernel.configure(input, output, gamma, beta, epsilon);
- }
-}
-
-Status NEInstanceNormalizationLayerEx::validate(const ITensorInfo *input, const ITensorInfo *output,
- const ITensorInfo *gamma, const ITensorInfo *beta,
- float epsilon)
-{
- return NEInstanceNormalizationLayerKernelEx::validate(
- &input->clone()->set_data_layout(DataLayout::NCHW),
- &output->clone()->set_data_layout(DataLayout::NCHW), gamma, beta, epsilon);
-}
-
-void NEInstanceNormalizationLayerEx::run()
-{
- MemoryGroupResourceScope scope_mg(_memory_group);
-
- // Permute input
- if (!_is_nchw)
- {
- _permute_input.run();
- }
-
- NEScheduler::get().schedule(&_normalization_kernel, Window::DimZ);
-
- // Permute output
- if (!_is_nchw)
- {
- _permute_output.run();
- }
-}
-} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEOneHot.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEOneHot.cpp
deleted file mode 100644
index 275c55024..000000000
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEOneHot.cpp
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/NEON/functions/NEOneHot.h"
-#include "arm_compute/core/NEON/kernels/NEOneHotKernel.h"
-#include "support/MemorySupport.h"
-#include <utility>
-namespace arm_compute
-{
-void NEOneHot::configure(const ITensor *indices, const ITensor *depth, const ITensor *on_value,
- const ITensor *off_value, ITensor *output, int axis)
-{
- auto k = arm_compute::support::cpp14::make_unique<NEOneHotKernel>();
- k->configure(indices, depth, on_value, off_value, output, axis);
- _kernel = std::move(k);
-}
-Status NEOneHot::validate(const ITensorInfo *indices, const ITensorInfo *depth,
- const ITensorInfo *on_value, const ITensorInfo *off_value,
- const ITensorInfo *output, int axis)
-{
- return NEOneHotKernel::validate(indices, depth, on_value, off_value, output, axis);
-}
-} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp
deleted file mode 100644
index aedb537e9..000000000
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/NEON/functions/NEReduceOperation.h"
-
-#include "arm_compute/core/CPP/Validate.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/runtime/NEON/NEScheduler.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/runtime/Tensor.h"
-
-using namespace arm_compute;
-
-NEReduceOperation::NEReduceOperation(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _reduction_kernels(), _reduced_outs(), _reshape(),
- _reduction_ops(), _keep_dims()
-{
-}
-
-Status NEReduceOperation::validate(const ITensorInfo *input, const Coordinates &reduction_axis,
- bool keep_dims, const ITensorInfo *output, ReduceOperation op)
-{
- ARM_COMPUTE_UNUSED(keep_dims);
- ARM_COMPUTE_UNUSED(op);
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
- ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16,
- DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON(reduction_axis.num_dimensions() > input->num_dimensions());
-
- TensorShape out_shape = input->tensor_shape();
- const unsigned int reduction_ops = reduction_axis.num_dimensions();
- const int input_dims = input->num_dimensions();
- Coordinates axis_local = reduction_axis;
-
- // Convert negative axis
- for (unsigned int i = 0; i < reduction_ops; ++i)
- {
- axis_local[i] = wrap_around(axis_local[i], input_dims);
- }
-
- std::sort(axis_local.begin(), axis_local.begin() + reduction_ops);
- for (unsigned int i = 0; i < reduction_ops; ++i)
- {
- ARM_COMPUTE_RETURN_ERROR_ON(axis_local[i] > 3);
- ARM_COMPUTE_RETURN_ERROR_ON(static_cast<unsigned int>(axis_local[i]) >
- input->num_dimensions() - 1);
- if (output->total_size() > 0 && keep_dims)
- {
- ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(axis_local[i]) != 1);
- }
- if (keep_dims)
- {
- out_shape.set(axis_local[i], 1);
- }
- else
- {
- out_shape.remove_dimension(axis_local[i] - i);
- }
- }
- const TensorInfo out_info = input->clone()->set_tensor_shape(out_shape);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &out_info);
-
- return Status{};
-}
-
-void NEReduceOperation::configure(ITensor *input, const Coordinates &reduction_axis, bool keep_dims,
- ITensor *output, ReduceOperation op)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input);
-
- _reduction_ops = reduction_axis.num_dimensions();
- _reduction_kernels.resize(_reduction_ops);
- _reduced_outs.resize(_reduction_ops - (keep_dims ? 1 : 0));
- _keep_dims = keep_dims;
-
- Coordinates axis_local = reduction_axis;
- const int input_dims = input->info()->num_dimensions();
- const unsigned int reduction_ops = reduction_axis.num_dimensions();
-
- // Convert negative axis
- for (unsigned int i = 0; i < reduction_ops; ++i)
- {
- axis_local[i] = wrap_around(axis_local[i], input_dims);
- }
-
- // Perform reduction for every axis
- for (unsigned int i = 0; i < _reduction_ops; ++i)
- {
- TensorShape out_shape =
- i == 0 ? input->info()->tensor_shape() : (&_reduced_outs[i - 1])->info()->tensor_shape();
- out_shape.set(axis_local[i], 1);
- auto in = (i == 0) ? input : (&_reduced_outs[i - 1]);
-
- if (i == _reduction_ops - 1 && keep_dims)
- {
- _reduction_kernels[i].configure(in, output, axis_local[i], op);
- }
- else
- {
- _reduced_outs[i].allocator()->init(TensorInfo(out_shape, input->info()->num_channels(),
- input->info()->data_type(),
- input->info()->quantization_info()));
- _memory_group.manage(&_reduced_outs[i]);
- _reduction_kernels[i].configure(in, &_reduced_outs[i], axis_local[i], op);
- }
- }
-
- // Allocate intermediate tensors
- for (unsigned int i = 0; i < _reduction_ops - (keep_dims ? 1 : 0); ++i)
- {
- _reduced_outs[i].allocator()->allocate();
- }
-
- // Configure reshape layer if we want to drop the dimensions
- if (!keep_dims)
- {
- TensorShape out_shape = input->info()->tensor_shape();
-
- // We have to sort the reduction axis vectors in order for remove_dimension
- // to work properly
- std::sort(axis_local.begin(), axis_local.begin() + _reduction_ops);
- for (unsigned int i = 0; i < _reduction_ops; ++i)
- {
- out_shape.remove_dimension(axis_local[i] - i);
- }
- auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(out_shape));
- _reshape.configure(&_reduced_outs[_reduction_ops - 1], output);
- }
-}
-
-void NEReduceOperation::run()
-{
- MemoryGroupResourceScope scope_mg(_memory_group);
-
- for (unsigned int i = 0; i < _reduction_ops; ++i)
- {
- _reduction_kernels[i].run();
- }
-
- if (!_keep_dims)
- {
- _reshape.run();
- }
-}
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceSum.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceSum.cpp
deleted file mode 100644
index 26a887912..000000000
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceSum.cpp
+++ /dev/null
@@ -1,181 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/NEON/functions/NEReduceSum.h"
-
-#include "arm_compute/core/CPP/Validate.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/runtime/NEON/NEScheduler.h"
-
-using namespace arm_compute;
-
-NEReduceSum::NEReduceSum(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _reduction_kernels(), _reduced_outs(), _reshape(),
- _reduction_ops(), _keep_dims()
-{
-}
-
-Status NEReduceSum::validate(const ITensorInfo *input, const Coordinates &reduction_axis,
- bool keep_dims, const ITensorInfo *output)
-{
- ARM_COMPUTE_UNUSED(keep_dims);
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
- ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16,
- DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON(reduction_axis.num_dimensions() > input->num_dimensions());
-
- TensorShape out_shape = input->tensor_shape();
- const unsigned int reduction_ops = reduction_axis.num_dimensions();
- const int input_dims = input->num_dimensions();
- Coordinates axis_local = reduction_axis;
-
- // Convert negative axis
- for (unsigned int i = 0; i < reduction_ops; ++i)
- {
- axis_local[i] = wrap_around(axis_local[i], input_dims);
- }
-
- std::sort(axis_local.begin(), axis_local.begin() + reduction_ops);
- for (unsigned int i = 0; i < reduction_ops; ++i)
- {
- ARM_COMPUTE_RETURN_ERROR_ON(axis_local[i] > 3);
- ARM_COMPUTE_RETURN_ERROR_ON(static_cast<unsigned int>(axis_local[i]) >
- input->num_dimensions() - 1);
- if (output->total_size() > 0 && keep_dims)
- {
- ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(axis_local[i]) != 1);
- }
- if (keep_dims)
- {
- out_shape.set(axis_local[i], 1);
- }
- else
- {
- out_shape.remove_dimension(axis_local[i] - i);
- }
- }
- const TensorInfo out_info = input->clone()->set_tensor_shape(out_shape);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &out_info);
-
- return Status{};
-}
-
-void NEReduceSum::configure(ITensor *input, const Coordinates &reduction_axis, bool keep_dims,
- ITensor *output)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input);
-
- _reduction_ops = reduction_axis.num_dimensions();
- _reduction_kernels.resize(_reduction_ops);
- _reduced_outs.resize(_reduction_ops - (keep_dims ? 1 : 0));
- _keep_dims = keep_dims;
-
- Coordinates axis_local = reduction_axis;
- const int input_dims = input->info()->num_dimensions();
- const unsigned int reduction_ops = reduction_axis.num_dimensions();
-
- // Convert negative axis
- for (unsigned int i = 0; i < reduction_ops; ++i)
- {
- axis_local[i] = wrap_around(axis_local[i], input_dims);
- }
-
- // Perform reduction for every axis
- for (unsigned int i = 0; i < _reduction_ops; ++i)
- {
- TensorShape out_shape =
- i == 0 ? input->info()->tensor_shape() : (&_reduced_outs[i - 1])->info()->tensor_shape();
- out_shape.set(axis_local[i], 1);
- auto in = (i == 0) ? input : (&_reduced_outs[i - 1]);
-
- if (i == _reduction_ops - 1 && keep_dims)
- {
- _reduction_kernels[i].configure(in, output, axis_local[i], ReductionOperation::SUM);
- }
- else
- {
- _reduced_outs[i].allocator()->init(TensorInfo(out_shape, input->info()->num_channels(),
- input->info()->data_type(),
- input->info()->quantization_info())
- .set_data_layout(input->info()->data_layout()));
- _memory_group.manage(&_reduced_outs[i]);
- _reduction_kernels[i].configure(in, &_reduced_outs[i], axis_local[i],
- ReductionOperation::SUM);
- }
- }
-
- // Allocate intermediate tensors
- for (unsigned int i = 0; i < _reduction_ops - (keep_dims ? 1 : 0); ++i)
- {
- _reduced_outs[i].allocator()->allocate();
- }
-
- // Configure reshape layer if we want to drop the dimensions
- if (!keep_dims)
- {
- TensorShape out_shape = input->info()->tensor_shape();
-
- // We have to sort the reduction axis vectors in order for remove_dimension
- // to work properly
- std::sort(axis_local.begin(), axis_local.begin() + _reduction_ops);
- for (unsigned int i = 0; i < _reduction_ops; ++i)
- {
- out_shape.remove_dimension(axis_local[i] - i);
- }
- auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(out_shape));
- _reshape.configure(&_reduced_outs[_reduction_ops - 1], output);
- }
-}
-
-void NEReduceSum::run()
-{
- MemoryGroupResourceScope scope_mg(_memory_group);
-
- for (unsigned int i = 0; i < _reduction_ops; ++i)
- {
- _reduction_kernels[i].run();
- }
-
- if (!_keep_dims)
- {
- _reshape.run();
- }
-}
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReductionOperationEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReductionOperationEx.cpp
deleted file mode 100644
index 2aa0d2d4b..000000000
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReductionOperationEx.cpp
+++ /dev/null
@@ -1,173 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/NEON/functions/NEReductionOperationEx.h"
-
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/runtime/NEON/NEScheduler.h"
-
-namespace arm_compute
-{
-namespace
-{
-/** Define dimension to split the window
- *
- * @param[in] axis Reduction axis
- *
- * @return The dimension to split the window
- */
-size_t reduction_window_split_dimension(unsigned int axis)
-{
- switch (axis)
- {
- case 0:
- return Window::DimY;
- case 1:
- case 2:
- case 3:
- return Window::DimX;
- default:
- ARM_COMPUTE_ERROR("Unsupported reduction axis");
- }
-}
-} // namespace
-
-NEReductionOperationEx::NEReductionOperationEx()
- : _reduction_kernel(), _fill_border_kernel(), _window_split(0), _reduction_axis()
-{
-}
-
-Status NEReductionOperationEx::validate(const ITensorInfo *input, const ITensorInfo *output,
- unsigned int axis, ReduceOperation op)
-{
- ARM_COMPUTE_RETURN_ON_ERROR(NEReductionOperationKernelEx::validate(input, output, axis, op));
-
- return Status{};
-}
-
-void NEReductionOperationEx::configure(ITensor *input, ITensor *output, unsigned int axis,
- ReduceOperation op)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_ERROR_THROW_ON(
- NEReductionOperationEx::validate(input->info(), output->info(), axis, op));
-
- // Configure reduction kernel
- _reduction_kernel.configure(input, output, axis, op);
- _window_split = reduction_window_split_dimension(axis);
- _reduction_axis = axis;
-
- if (axis == 0)
- {
- // Configure fill border kernel
- const BorderSize fill_border_size = _reduction_kernel.border_size();
- PixelValue pixelValue;
- switch (op)
- {
- case ReduceOperation::MIN:
- {
- switch (input->info()->data_type())
- {
- case DataType::F32:
- {
- pixelValue = PixelValue(std::numeric_limits<float>::max());
- break;
- }
- case DataType::F16:
- {
- pixelValue = PixelValue(static_cast<half>(65504.0f));
- break;
- }
- case DataType::QASYMM8:
- {
- pixelValue =
- PixelValue(255, input->info()->data_type(), input->info()->quantization_info());
- break;
- }
- default:
- {
- ARM_COMPUTE_ERROR("Unsupported DataType");
- }
- }
- break;
- }
- case ReduceOperation::MAX:
- {
- switch (input->info()->data_type())
- {
- case DataType::F32:
- {
- pixelValue = PixelValue(-std::numeric_limits<float>::max());
- break;
- }
- case DataType::F16:
- {
- pixelValue = PixelValue(static_cast<half>(-65504.0f));
- break;
- }
- case DataType::QASYMM8:
- {
- pixelValue =
- PixelValue(0, input->info()->data_type(), input->info()->quantization_info());
- break;
- }
- default:
- {
- ARM_COMPUTE_ERROR("Unsupported DataType");
- }
- }
- break;
- }
- default:
- ARM_COMPUTE_ERROR("Reduction Operation unsupported");
- }
- _fill_border_kernel.configure(input, fill_border_size, BorderMode::CONSTANT, pixelValue);
- }
-}
-
-void NEReductionOperationEx::run()
-{
- if (_reduction_axis == 0)
- {
- NEScheduler::get().schedule(&_fill_border_kernel, Window::DimY);
- }
- NEScheduler::get().schedule(&_reduction_kernel, _window_split);
-}
-} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp
deleted file mode 100644
index aa165cc15..000000000
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp
+++ /dev/null
@@ -1,242 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/NEON/functions/NETransposeConvLayer.h"
-
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/UtilsEx.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/utils/misc/ShapeCalculatorEx.h"
-#include "arm_compute/runtime/NEON/NEScheduler.h"
-
-using namespace arm_compute::misc::shape_calculator;
-
-namespace arm_compute
-{
-
-NETransposeConvLayer::NETransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
- : _memory_group(std::move(memory_manager)),
- _conv_f(),
- _upsample_f(),
- _flip_weights(),
- _scaled_output(),
- _weights_flipped(),
- _flip_axis(),
- _original_weights(nullptr),
- _input(nullptr),
- _info(),
- _is_prepared(false)
-{
-}
-
-Status NETransposeConvLayer::validate(const ITensorInfo *input, const ITensorInfo *weights,
- const ITensorInfo *bias, const ITensorInfo *output,
- const PadStrideInfo &info, unsigned int invalid_right,
- unsigned int invalid_bottom)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::F16,
- DataType::QASYMM8, DataType::QASYMM8_SIGNED);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(weights, input);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(weights, input);
- const unsigned int width_idx =
- get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::WIDTH);
- const unsigned int height_idx =
- get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::HEIGHT);
- ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(width_idx) != weights->dimension(height_idx));
- ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(width_idx) < 1);
-
- auto out_dims = transposeconv_output_dimensions(
- input->dimension(width_idx), input->dimension(height_idx), weights->dimension(width_idx),
- weights->dimension(height_idx), info, invalid_right, invalid_bottom);
-
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
- if (bias != nullptr)
- {
- if (is_data_type_quantized_asymmetric(input->data_type()))
- {
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bias, 1, DataType::S32);
- }
- else
- {
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, bias);
- }
- }
-
- if (output->tensor_shape().total_size() > 0)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-
- const TensorShape output_shape = compute_transposeconv_output_shape(out_dims, *input, *weights);
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimX) != output_shape.x(),
- "Output's width is invalid.");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimY) != output_shape.y(),
- "Output's height is invalid.");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimZ) != output_shape.z(),
- "Output's depth is invalid.");
- }
-
- unsigned int pad_left = 0;
- unsigned int pad_right = 0;
- unsigned int pad_top = 0;
- unsigned int pad_bottom = 0;
- const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape(
- *input, *weights, info, out_dims, invalid_right, invalid_bottom, pad_left, pad_right, pad_top,
- pad_bottom);
- TensorInfo scale_out_info(
- input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(scale_out_shape));
- const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
-
- const unsigned int batches_idx =
- get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::BATCHES);
- const unsigned int channel_idx =
- get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::CHANNEL);
- ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(batches_idx) !=
- scale_out_info.dimension(batches_idx));
- ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(channel_idx) !=
- scale_out_info.dimension(channel_idx));
-
- ARM_COMPUTE_RETURN_ON_ERROR(NEConvolutionLayer::validate(&scale_out_info, weights, bias, output,
- conv_info, WeightsInfo()));
-
- return Status{};
-}
-
-void NETransposeConvLayer::configure(ITensor *input, const ITensor *weights, const ITensor *bias,
- ITensor *output, const PadStrideInfo &info,
- unsigned int invalid_right, unsigned int invalid_bottom)
-{
- // Perform validation step
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
- ARM_COMPUTE_ERROR_THROW_ON(NETransposeConvLayer::validate(
- input->info(), weights->info(), (bias == nullptr) ? nullptr : bias->info(), output->info(),
- info, invalid_right, invalid_bottom));
-
- const DataLayout data_layout = input->info()->data_layout();
- const unsigned int width_idx =
- get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
- const unsigned int height_idx =
- get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
- auto out_dims = transposeconv_output_dimensions(
- input->info()->dimension(width_idx), input->info()->dimension(height_idx),
- weights->info()->dimension(width_idx), weights->info()->dimension(height_idx), info,
- invalid_right, invalid_bottom);
-
- const TensorShape output_shape =
- compute_transposeconv_output_shape(out_dims, *input->info(), *weights->info());
-
- _input = input;
- _original_weights = weights;
- _info = info;
- _is_prepared = false;
-
- unsigned int pad_left = 0;
- unsigned int pad_right = 0;
- unsigned int pad_top = 0;
- unsigned int pad_bottom = 0;
- const unsigned int stride_x = info.stride().first;
- const unsigned int stride_y = info.stride().second;
-
- // Output auto initialization if not yet initialized
- auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(),
- input->info()->quantization_info());
-
- _flip_axis.allocator()->init(TensorInfo(TensorShape(2U), 1, DataType::U32));
- _memory_group.manage(&_scaled_output);
-
- _weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout));
- _flip_weights.configure(weights, &_weights_flipped, &_flip_axis);
-
- // setup the function to convolve the upscaled output
- const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
-
- const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape(
- *input->info(), *weights->info(), info, out_dims, invalid_right, invalid_bottom, pad_left,
- pad_right, pad_top, pad_bottom);
-
- const PadStrideInfo upsample_info(stride_x, stride_y, pad_left, pad_right, pad_top, pad_bottom,
- DimensionRoundingType::FLOOR);
-
- TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(),
- input->info()->quantization_info());
- scale_out_info.set_data_layout(data_layout);
- _scaled_output.allocator()->init(scale_out_info);
-
- _upsample_f.configure(input, &_scaled_output, upsample_info);
-
- _conv_f.configure(&_scaled_output, &_weights_flipped, bias, output, conv_info);
-
- // Setup flip axis data
- _flip_axis.allocator()->allocate();
- auto axis_data = reinterpret_cast<uint32_t *>(_flip_axis.buffer());
- axis_data[0] = static_cast<uint32_t>(width_idx);
- axis_data[1] = static_cast<uint32_t>(height_idx);
-
- _scaled_output.allocator()->allocate();
-}
-
-void NETransposeConvLayer::run()
-{
- prepare();
-
- MemoryGroupResourceScope scope_mg(_memory_group);
-
- _upsample_f.run();
- _conv_f.run();
-}
-
-void NETransposeConvLayer::prepare()
-{
- if (!_is_prepared)
- {
- ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
-
- // Run weights flipping and mark original weights tensor as unused
- _weights_flipped.allocator()->allocate();
- _flip_weights.run();
- _original_weights->mark_as_unused();
-
- // Prepare convolution
- _conv_f.prepare();
-
- _is_prepared = true;
- }
-}
-} // namespace arm_compute