summaryrefslogtreecommitdiff
path: root/compute/ARMComputeEx/src/runtime
diff options
context:
space:
mode:
Diffstat (limited to 'compute/ARMComputeEx/src/runtime')
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLArgMinMaxLayerEx.cpp224
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp5
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLCastBool.cpp (renamed from compute/ARMComputeEx/src/runtime/CPP/functions/CPPOneHotEx.cpp)14
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLDirectTransposeConvLayer.cpp60
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp3
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp62
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp92
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp6
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLGEMMMatrixAccumulateBiasesKernel.cpp171
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp4
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp2
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp2
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp2
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLOneHot.cpp75
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLPadLayerEx.cpp110
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp21
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLSplitVEx.cpp197
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp16
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp18
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp7
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NECastBool.cpp56
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp3
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp55
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp87
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedReshapingLayer.cpp4
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp3
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEHashtableLookup.cpp3
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEInstanceNormalizationLayerEx.cpp10
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEOneHot.cpp (renamed from compute/ARMComputeEx/src/runtime/NEON/functions/NEActivationLayerEx.cpp)31
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp18
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceSum.cpp14
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEReductionOperationEx.cpp173
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp59
33 files changed, 1126 insertions, 481 deletions
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLArgMinMaxLayerEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLArgMinMaxLayerEx.cpp
new file mode 100644
index 000000000..6b9b0d4b4
--- /dev/null
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLArgMinMaxLayerEx.cpp
@@ -0,0 +1,224 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2018-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/runtime/CL/functions/CLArgMinMaxLayerEx.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/runtime/Utils.h"
+
+namespace arm_compute
+{
+CLArgMinMaxLayerEx::CLArgMinMaxLayerEx(std::shared_ptr<IMemoryManager> memory_manager)
+ : _memory_group(std::move(memory_manager)), _results_vector(), _not_reshaped_output(),
+ _reduction_kernels_vector(), _reshape_kernel(), _num_of_stages(), _reduction_axis()
+{
+}
+
+Status CLArgMinMaxLayerEx::validate(const ITensorInfo *input, int axis, const ITensorInfo *output,
+ const ReductionOperation &op)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(op != ReductionOperation::ARG_IDX_MAX &&
+ op != ReductionOperation::ARG_IDX_MIN,
+ "Invalid reduction operation");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= static_cast<int>(TensorShape::num_max_dimensions),
+ "Reduction axis greater than max number of dimensions");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis > 3, "Unsupported reduction axis");
+ const unsigned int num_of_stages =
+ utils::calculate_number_of_stages_only_x_axis(input->dimension(0), axis);
+
+ DataType output_data_type = DataType::S32;
+ TensorInfo not_reshaped_output;
+ const auto input_num_channles = input->num_channels();
+ const auto input_qinfo = input->quantization_info();
+
+ if (output->total_size() != 0)
+ {
+ output_data_type = output->data_type();
+ const TensorInfo expected_output_shape =
+ output->clone()->set_tensor_shape(arm_compute::misc::shape_calculator::compute_reduced_shape(
+ input->tensor_shape(), axis, false));
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(&expected_output_shape, output);
+ }
+
+ auto shape_before_reshape = input->tensor_shape();
+ shape_before_reshape.set(axis, 1);
+ auto initialize_tensorinfo = [](TensorInfo &ti, TensorShape shape, DataType data_type,
+ int num_channels, QuantizationInfo qinfo) {
+ ti.set_data_type(data_type)
+ .set_tensor_shape(shape)
+ .set_num_channels(num_channels)
+ .set_quantization_info(qinfo);
+ };
+
+ initialize_tensorinfo(not_reshaped_output, shape_before_reshape, output_data_type,
+ input_num_channles, input_qinfo);
+
+ if (num_of_stages == 1)
+ {
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ CLArgMinMaxLayerKernelEx::validate(input, nullptr, &not_reshaped_output, axis, op));
+ }
+ else
+ {
+ // Create temporary tensor infos
+ std::vector<TensorInfo> sums_vector(num_of_stages - 1);
+
+ // Create intermediate tensor info
+ TensorShape shape{input->tensor_shape()};
+
+ for (unsigned int i = 0; i < num_of_stages - 1; i++)
+ {
+ shape.set(0, ceil(shape.x() / 128.f));
+ sums_vector[i].set_data_type(input->data_type());
+ sums_vector[i].set_tensor_shape(shape);
+ sums_vector[i].set_num_channels(input->num_channels());
+ }
+
+ // Validate ReductionOperation only on first kernel
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ CLArgMinMaxLayerKernelEx::validate(input, nullptr, &sums_vector[0], axis, op));
+
+ // Validate ReductionOperation on intermediate stages
+ for (unsigned int i = 1; i < num_of_stages - 1; ++i)
+ {
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ CLArgMinMaxLayerKernelEx::validate(input, &sums_vector[i - 1], &sums_vector[i], axis, op));
+ }
+
+ // Validate ReductionOperation on the last stage
+ const unsigned int last_stage = num_of_stages - 1;
+ ARM_COMPUTE_RETURN_ON_ERROR(CLArgMinMaxLayerKernelEx::validate(
+ input, &sums_vector[last_stage - 1], &not_reshaped_output, axis, op));
+ }
+ ARM_COMPUTE_RETURN_ON_ERROR(CLReshapeLayer::validate(&not_reshaped_output, output));
+ return Status{};
+}
+
+void CLArgMinMaxLayerEx::configure(const ICLTensor *input, int axis, ICLTensor *output,
+ const ReductionOperation &op)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+ _num_of_stages = utils::calculate_number_of_stages_only_x_axis(input->info()->dimension(0), axis);
+ _reduction_axis = axis;
+
+ const TensorShape output_shape = arm_compute::misc::shape_calculator::compute_reduced_shape(
+ input->info()->tensor_shape(), axis, false);
+ DataType output_data_type = (output->info()->data_type() == DataType::UNKNOWN)
+ ? DataType::S32
+ : output->info()->data_type();
+ auto_init_if_empty(*output->info(), input->info()
+ ->clone()
+ ->set_tensor_shape(output_shape)
+ .set_data_type(output_data_type)
+ .reset_padding()
+ .set_is_resizable(true));
+
+ // Configure reduction operation kernels
+ _reduction_kernels_vector.resize(_num_of_stages);
+
+ _memory_group.manage(&_not_reshaped_output);
+ // Create temporary tensors
+ if (_num_of_stages == 1)
+ {
+ // Force an early initialization for int64 output type
+ TensorShape output_shape{input->info()->tensor_shape()};
+ output_shape.set(axis, 1);
+ auto_init_if_empty(*_not_reshaped_output.info(), input->info()
+ ->clone()
+ ->set_tensor_shape(output_shape)
+ .set_data_type(output_data_type)
+ .reset_padding()
+ .set_is_resizable(true));
+ _not_reshaped_output.info()->set_tensor_shape(output_shape);
+ _reduction_kernels_vector[0].configure(input, nullptr, &_not_reshaped_output, axis, op);
+ }
+ else
+ {
+ _results_vector.resize(_num_of_stages - 1);
+ TensorShape shape{input->info()->tensor_shape()};
+ for (unsigned int i = 0; i < _num_of_stages - 1; i++)
+ {
+ shape.set(0, ceil(shape.x() / 128.f));
+ _results_vector[i].allocator()->init(
+ input->info()->clone()->set_tensor_shape(shape).set_data_type(output_data_type));
+ }
+
+ // Apply ReductionOperation only on first kernel
+ _memory_group.manage(&_results_vector[0]);
+ _reduction_kernels_vector[0].configure(input, nullptr, &_results_vector[0], axis, op);
+
+ // Apply ReductionOperation on intermediate stages
+ for (unsigned int i = 1; i < _num_of_stages - 1; ++i)
+ {
+ _memory_group.manage(&_results_vector[i]);
+ _reduction_kernels_vector[i].configure(input, &_results_vector[i - 1], &_results_vector[i],
+ axis, op);
+ _results_vector[i - 1].allocator()->allocate();
+ }
+
+ // Apply ReductionOperation on the last stage
+ const unsigned int last_stage = _num_of_stages - 1;
+ _reduction_kernels_vector[last_stage].configure(input, &_results_vector[last_stage - 1],
+ &_not_reshaped_output, axis, op);
+ _results_vector[last_stage - 1].allocator()->allocate();
+ }
+ _reshape_kernel.configure(CLKernelLibrary::get().get_compile_context(), &_not_reshaped_output,
+ output);
+ _not_reshaped_output.allocator()->allocate();
+}
+
+void CLArgMinMaxLayerEx::run()
+{
+ MemoryGroupResourceScope scope_mg(_memory_group);
+
+ for (unsigned int i = 0; i < _num_of_stages; ++i)
+ {
+ CLScheduler::get().enqueue(_reduction_kernels_vector[i], false);
+ }
+ _reshape_kernel.run();
+}
+} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp
index e5122ab8f..31c96b080 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp
@@ -42,13 +42,14 @@
#include "arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
using namespace arm_compute;
void CLBinaryLogicalOp::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output,
BinaryLogicalOperation op)
{
- auto k = support::cpp14::make_unique<CLBinaryLogicalOpKernel>();
+ auto k = std::make_unique<CLBinaryLogicalOpKernel>();
k->configure(input1, input2, output, op);
_kernel = std::move(k);
@@ -57,7 +58,7 @@ void CLBinaryLogicalOp::configure(ICLTensor *input1, ICLTensor *input2, ICLTenso
ICLTensor *broadcasted_info = (input1->info()->dimension(0) == 1) ? input1 : input2;
if (broadcasted_info->info()->dimension(0) == 1)
{
- _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
+ _border_handler->configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
}
}
}
diff --git a/compute/ARMComputeEx/src/runtime/CPP/functions/CPPOneHotEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLCastBool.cpp
index 768c15b41..96f9c17a9 100644
--- a/compute/ARMComputeEx/src/runtime/CPP/functions/CPPOneHotEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLCastBool.cpp
@@ -15,7 +15,7 @@
*/
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -38,17 +38,15 @@
* SOFTWARE.
*/
-#include "arm_compute/runtime/CPP/functions/CPPOneHotEx.h"
+#include "arm_compute/runtime/CL/functions/CLCastBool.h"
-#include "arm_compute/core/CPP/kernels/CPPOneHotKernelEx.h"
-#include "support/MemorySupport.h"
+#include "arm_compute/core/CL/kernels/CLCastBoolKernel.h"
using namespace arm_compute;
-void CPPOneHotEx::configure(const ITensor *indices, const ITensor *depth, const ITensor *on_value,
- const ITensor *off_value, ITensor *output, const int axis)
+void CLCastBool::configure(ICLTensor *input, ICLTensor *output)
{
- auto k = support::cpp14::make_unique<CPPOneHotKernelEx>();
- k->configure(indices, depth, on_value, off_value, output, axis);
+ auto k = std::make_unique<CLCastBoolKernel>();
+ k->configure(input, output);
_kernel = std::move(k);
}
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLDirectTransposeConvLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLDirectTransposeConvLayer.cpp
index 3dede0562..464f60dee 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLDirectTransposeConvLayer.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLDirectTransposeConvLayer.cpp
@@ -45,6 +45,8 @@
#include "arm_compute/core/utils/misc/ShapeCalculatorEx.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/helpers/AutoConfiguration.h"
+
#include <memory>
#include <tuple>
@@ -53,16 +55,10 @@ namespace arm_compute
using namespace arm_compute::misc::shape_calculator;
CLDirectTransposeConvLayer::CLDirectTransposeConvLayer(
- std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
- : _memory_group(std::move(memory_manager)),
- _scale_f(),
- _conv_f(),
- _flip_weights(),
- _scaled_output(),
- _original_weights(nullptr),
- _weights_flipped(),
- _flip_axis(),
- _is_prepared(false)
+ std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
+ : _memory_group(std::move(memory_manager)), _scale_f(), _conv_f(), _flip_weights(),
+ _scaled_output(), _original_weights(nullptr), _weights_flipped(), _flip_axis(),
+ _is_prepared(false)
{
}
@@ -74,7 +70,7 @@ Status CLDirectTransposeConvLayer::validate(const ITensorInfo *input, const ITen
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(
- input, 1, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::F16, DataType::F32);
+ input, 1, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, weights);
const DataLayout data_layout = input->data_layout();
@@ -86,8 +82,8 @@ Status CLDirectTransposeConvLayer::validate(const ITensorInfo *input, const ITen
ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) < 1);
auto out_dims = transposeconv_output_dimensions(
- input->dimension(idx_w), input->dimension(idx_h), weights->dimension(idx_w),
- weights->dimension(idx_h), info, invalid_right, invalid_bottom);
+ input->dimension(idx_w), input->dimension(idx_h), weights->dimension(idx_w),
+ weights->dimension(idx_h), info, invalid_right, invalid_bottom);
const TensorShape output_shape = compute_transposeconv_output_shape(out_dims, *input, *weights);
@@ -117,19 +113,19 @@ Status CLDirectTransposeConvLayer::validate(const ITensorInfo *input, const ITen
unsigned int pad_right = 0;
unsigned int pad_top = 0;
unsigned int pad_bottom = 0;
- const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape(
- *input, *weights, info, out_dims, invalid_right, invalid_bottom, pad_left, pad_right, pad_top,
- pad_bottom);
+ const TensorShape scale_out_shape =
+ compute_transposeconv_upsampled_shape(*input, *weights, info, out_dims, invalid_right,
+ invalid_bottom, pad_left, pad_right, pad_top, pad_bottom);
TensorInfo scale_out_info(input->clone()
- ->set_is_resizable(true)
- .reset_padding()
- .set_tensor_shape(scale_out_shape)
- .set_data_layout(data_layout));
+ ->set_is_resizable(true)
+ .reset_padding()
+ .set_tensor_shape(scale_out_shape)
+ .set_data_layout(data_layout));
const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
ARM_COMPUTE_RETURN_ON_ERROR(CLDeconvolutionLayerUpsample::validate(input, &scale_out_info, info));
- ARM_COMPUTE_RETURN_ON_ERROR(CLConvolutionLayer::validate(&scale_out_info, weights, bias, output,
- conv_info, weights_info));
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ CLConvolutionLayer::validate(&scale_out_info, weights, bias, output, conv_info, weights_info));
return Status{};
}
@@ -171,22 +167,22 @@ void CLDirectTransposeConvLayer::configure(const CLCompileContext &compile_conte
_flip_weights.configure(compile_context, weights, &_weights_flipped, &_flip_axis);
auto out_dims = transposeconv_output_dimensions(
- input->info()->dimension(idx_w), input->info()->dimension(idx_h),
- weights->info()->dimension(idx_w), weights->info()->dimension(idx_h), info, invalid_right,
- invalid_bottom);
+ input->info()->dimension(idx_w), input->info()->dimension(idx_h),
+ weights->info()->dimension(idx_w), weights->info()->dimension(idx_h), info, invalid_right,
+ invalid_bottom);
const TensorShape output_shape =
- compute_transposeconv_output_shape(out_dims, *input->info(), *weights->info());
+ compute_transposeconv_output_shape(out_dims, *input->info(), *weights->info());
// Output auto initialization if not yet initialized
auto_init_if_empty(
- *output->info(),
- input->info()->clone()->set_tensor_shape(output_shape).set_data_layout(data_layout));
+ *output->info(),
+ input->info()->clone()->set_tensor_shape(output_shape).set_data_layout(data_layout));
// Perform validation step
ARM_COMPUTE_ERROR_THROW_ON(CLDirectTransposeConvLayer::validate(
- input->info(), weights->info(), bias == nullptr ? nullptr : bias->info(), output->info(),
- info, invalid_right, invalid_bottom));
+ input->info(), weights->info(), bias == nullptr ? nullptr : bias->info(), output->info(), info,
+ invalid_right, invalid_bottom));
_is_prepared = weights_info.retain_internal_weights();
@@ -195,8 +191,8 @@ void CLDirectTransposeConvLayer::configure(const CLCompileContext &compile_conte
// Find the upsampled dimensions and the padding needed for the convolution with stride 1 in order
// to match output shape
const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape(
- *input->info(), *weights->info(), info, out_dims, invalid_right, invalid_bottom, pad_left,
- pad_right, pad_top, pad_bottom);
+ *input->info(), *weights->info(), info, out_dims, invalid_right, invalid_bottom, pad_left,
+ pad_right, pad_top, pad_bottom);
TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(),
input->info()->quantization_info());
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp
index ae9d8afc6..003ec8042 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp
@@ -39,7 +39,6 @@
*/
#include "arm_compute/runtime/CL/functions/CLEmbeddingLookup.h"
-
#include "arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h"
using namespace arm_compute;
@@ -47,7 +46,7 @@ using namespace arm_compute;
void CLEmbeddingLookup::configure(const ICLTensor *input, ICLTensor *output,
const ICLTensor *lookups)
{
- auto k = support::cpp14::make_unique<CLEmbeddingLookupKernel>();
+ auto k = std::make_unique<CLEmbeddingLookupKernel>();
k->configure(input, output, lookups);
_kernel = std::move(k);
}
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp
index 01989461e..af936e873 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp
@@ -45,7 +45,6 @@
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "support/MemorySupport.h"
#include <algorithm>
@@ -60,7 +59,7 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I
ARM_COMPUTE_UNUSED(weights);
ARM_COMPUTE_UNUSED(output);
ARM_COMPUTE_RETURN_ON_ERROR(
- CLGEMMLowpMatrixMultiplyCore::validate(&input, &weights, nullptr, &output));
+ CLGEMMLowpMatrixMultiplyCore::validate(&input, &weights, nullptr, &output));
return Status{};
}
@@ -68,7 +67,7 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I
void CLFullyConnectedHybridLayerReshapeWeights::configure(const ICLTensor *input, ICLTensor *output)
{
- auto k = support::cpp14::make_unique<CLTransposeKernel>();
+ auto k = std::make_unique<CLTransposeKernel>();
k->configure(input, output);
_kernel = std::move(k);
}
@@ -80,12 +79,12 @@ Status CLFullyConnectedHybridLayerReshapeWeights::validate(const ITensorInfo *in
}
CLFullyConnectedHybridLayer::CLFullyConnectedHybridLayer(
- std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(memory_manager), _reshape_weights_kernel(), _quant_input_kernel(),
- _mm_gemmlowp(memory_manager), _multiply_scale_kernel(), _accumulate_biases_kernel(),
- _reshape_weights_output(), _quantized_input(), _scale_factor(), _gemmlowp_output(),
- _are_weights_reshaped(true), _accumulate_biases(false), _is_prepared(false),
- _original_weights(nullptr)
+ std::shared_ptr<IMemoryManager> memory_manager)
+ : _memory_group(memory_manager), _reshape_weights_kernel(), _quant_input_kernel(),
+ _mm_gemmlowp(memory_manager), _multiply_scale_kernel(), _accumulate_biases_kernel(),
+ _reshape_weights_output(), _quantized_input(), _scale_factor(), _gemmlowp_output(),
+ _are_weights_reshaped(true), _accumulate_biases(false), _is_prepared(false),
+ _original_weights(nullptr)
{
}
void CLFullyConnectedHybridLayer::configure_mm(const ICLTensor *input, const ICLTensor *weights,
@@ -107,8 +106,8 @@ void CLFullyConnectedHybridLayer::configure(const ICLTensor *input, const ICLTen
// Perform validate step
ARM_COMPUTE_ERROR_THROW_ON(CLFullyConnectedHybridLayer::validate(
- input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(),
- fc_info));
+ input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(),
+ fc_info));
_are_weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
_accumulate_biases = false;
@@ -140,10 +139,10 @@ void CLFullyConnectedHybridLayer::configure(const ICLTensor *input, const ICLTen
bool is_fc_after_conv = false;
if (is_batched_fc_layer)
{
- is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) &&
- (std::equal(input->info()->tensor_shape().cbegin() + 3,
- input->info()->tensor_shape().cend(),
- output->info()->tensor_shape().cbegin() + 1));
+ is_fc_after_conv =
+ (TensorShape::num_max_dimensions >= 4) &&
+ (std::equal(input->info()->tensor_shape().cbegin() + 3, input->info()->tensor_shape().cend(),
+ output->info()->tensor_shape().cbegin() + 1));
}
else
{
@@ -158,28 +157,28 @@ void CLFullyConnectedHybridLayer::configure(const ICLTensor *input, const ICLTen
{
// Reshape the weights
_reshape_weights_output.allocator()->init(
- weights->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
- compute_transposed_shape(*weights->info())));
+ weights->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
+ compute_transposed_shape(*weights->info())));
_reshape_weights_kernel.configure(weights_to_use, &_reshape_weights_output);
weights_to_use = &_reshape_weights_output;
}
// Extract scale factor
_scale_factor.allocator()->init(
- TensorInfo(TensorShape{output->info()->dimension(1)}, 1, input->info()->data_type()));
+ TensorInfo(TensorShape{output->info()->dimension(1)}, 1, input->info()->data_type()));
_memory_group.manage(&_scale_factor);
_scale_factor_kernel.configure(input, &_scale_factor);
// Quantize input
_quantized_input.allocator()->init(
- input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(
- DataType::QASYMM8_SIGNED));
+ input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(
+ DataType::QASYMM8_SIGNED));
_memory_group.manage(&_quantized_input);
_quant_input_kernel.configure(input, &_scale_factor, &_quantized_input);
// GEMMLowp
_gemmlowp_output.allocator()->init(
- output->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
+ output->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
_memory_group.manage(&_gemmlowp_output);
configure_mm(&_quantized_input, weights_to_use, &_gemmlowp_output,
fc_info.retain_internal_weights);
@@ -209,15 +208,15 @@ Status CLFullyConnectedHybridLayer::validate(const ITensorInfo *input, const ITe
const GPUTarget gpu_target = CLScheduler::get().target();
const ITensorInfo &reshaped_weights =
- TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
- compute_transposed_shape(*weights)));
+ TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
+ compute_transposed_shape(*weights)));
// Configure accumulate biases kernel for non quantized asymmetric types
if (biases != nullptr)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases);
ARM_COMPUTE_RETURN_ON_ERROR(
- CLGEMMMatrixAccumulateBiasesKernel::validate(output, biases, gpu_target));
+ CLGEMMMatrixAccumulateBiasesKernel::validate(output, biases, gpu_target));
}
// With the Fully Connected layer we can have 4 different cases:
@@ -247,33 +246,32 @@ Status CLFullyConnectedHybridLayer::validate(const ITensorInfo *input, const ITe
{
// Validate reshape weights kernel
ARM_COMPUTE_RETURN_ON_ERROR(
- CLFullyConnectedHybridLayerReshapeWeights::validate(weights_to_use, &reshaped_weights));
+ CLFullyConnectedHybridLayerReshapeWeights::validate(weights_to_use, &reshaped_weights));
weights_to_use = &reshaped_weights;
}
// Validate Scale factor kernel
const ITensorInfo &scale_factor =
- TensorInfo(TensorShape{output->dimension(1)}, 1, input->data_type());
+ TensorInfo(TensorShape{output->dimension(1)}, 1, input->data_type());
ARM_COMPUTE_RETURN_ON_ERROR(CLScaleFactorSymm8Kernel::validate(input, &scale_factor));
// Validate quantization symm8 kernel
- const ITensorInfo &quantized_input =
- TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_data_type(
- DataType::QASYMM8_SIGNED));
+ const ITensorInfo &quantized_input = TensorInfo(
+ input->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::QASYMM8_SIGNED));
ARM_COMPUTE_RETURN_ON_ERROR(
- CLQuantizationSymmetricKernel::validate(input, &scale_factor, &quantized_input));
+ CLQuantizationSymmetricKernel::validate(input, &scale_factor, &quantized_input));
// Fully Connected layer after a Fully Connected Layer without batches
ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) != weights_to_use->dimension(1));
// Validate matrix multiply kernel
const ITensorInfo &gemmlowp_output = TensorInfo(
- output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
+ output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(quantized_input, *weights_to_use, gemmlowp_output));
// Multiply scale
ARM_COMPUTE_RETURN_ON_ERROR(
- CLMultiplyScaleFactorKernel::validate(&gemmlowp_output, &scale_factor, output));
+ CLMultiplyScaleFactorKernel::validate(&gemmlowp_output, &scale_factor, output));
return Status{};
}
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp
index 2ff4b9659..c6a88d340 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp
@@ -42,11 +42,11 @@
#include "arm_compute/core/Size2D.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/utils/misc/Cast.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "support/MemorySupport.h"
+
+#include "support/Cast.h"
#include <algorithm>
@@ -79,7 +79,7 @@ Status construct_gemmlowp_output_stage(const ITensorInfo &input, const ITensorIn
int output_multiplier = 0;
int output_shift = 0;
ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier_less_than_one(
- multiplier, &output_multiplier, &output_shift));
+ multiplier, &output_multiplier, &output_shift));
// Set the GEMMLowp output stage info
gemmlowp_output_stage.gemmlowp_offset = output_quant_info.offset;
@@ -99,7 +99,7 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I
{
GEMMLowpOutputStageInfo gemmlowp_output_stage;
ARM_COMPUTE_RETURN_ON_ERROR(
- construct_gemmlowp_output_stage(input, weights, output, gemmlowp_output_stage));
+ construct_gemmlowp_output_stage(input, weights, output, gemmlowp_output_stage));
const GEMMInfo &gemm_info = GEMMInfo(false, // is_a_reshaped
false, // is_b_reshaped
@@ -125,14 +125,14 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I
// Validate gemmlowp function
ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixMultiplyCore::validate(
- &input.clone()->set_quantization_info(input_quantization_info),
- &weights.clone()->set_quantization_info(weights_quantization_info), bias, &output,
- gemm_info));
+ &input.clone()->set_quantization_info(input_quantization_info),
+ &weights.clone()->set_quantization_info(weights_quantization_info), bias, &output,
+ gemm_info));
}
else
{
ARM_COMPUTE_RETURN_ON_ERROR(
- CLGEMM::validate(&input, &weights, bias, &output, 1.f, 1.f, gemm_info));
+ CLGEMM::validate(&input, &weights, bias, &output, 1.f, 1.f, gemm_info));
}
return Status{};
@@ -141,7 +141,7 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I
void CLFullyConnectedLayerReshapeWeightsEx::configure(const ICLTensor *input, ICLTensor *output)
{
- auto k = support::cpp14::make_unique<CLTransposeKernel>();
+ auto k = std::make_unique<CLTransposeKernel>();
k->configure(input, output);
_kernel = std::move(k);
}
@@ -154,12 +154,12 @@ Status CLFullyConnectedLayerReshapeWeightsEx::validate(const ITensorInfo *input,
CLFullyConnectedLayerEx::CLFullyConnectedLayerEx(std::shared_ptr<IMemoryManager> memory_manager,
IWeightsManager *weights_manager)
- : _memory_group(memory_manager), _weights_manager(weights_manager), _convert_weights(),
- _convert_weights_managed(), _reshape_weights_managed_function(), _flatten_layer(),
- _reshape_weights_function(), _mm_gemm(memory_manager, weights_manager),
- _mm_gemmlowp(memory_manager), _flatten_output(), _converted_weights_output(),
- _reshape_weights_output(), _are_weights_converted(true), _are_weights_reshaped(true),
- _is_fc_after_conv(true), _is_quantized(false), _is_prepared(false), _original_weights(nullptr)
+ : _memory_group(memory_manager), _weights_manager(weights_manager), _convert_weights(),
+ _convert_weights_managed(), _reshape_weights_managed_function(), _flatten_layer(),
+ _reshape_weights_function(), _mm_gemm(memory_manager, weights_manager),
+ _mm_gemmlowp(memory_manager), _flatten_output(), _converted_weights_output(),
+ _reshape_weights_output(), _are_weights_converted(true), _are_weights_reshaped(true),
+ _is_fc_after_conv(true), _is_quantized(false), _is_prepared(false), _original_weights(nullptr)
{
}
void CLFullyConnectedLayerEx::configure_mm(const ICLTensor *input, const ICLTensor *weights,
@@ -190,9 +190,9 @@ void CLFullyConnectedLayerEx::configure_mm(const ICLTensor *input, const ICLTens
const QuantizationInfo weights_quantization_info = weights->info()->quantization_info();
input->info()->set_quantization_info(QuantizationInfo(
- input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
+ input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
weights->info()->set_quantization_info(QuantizationInfo(
- weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
+ weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
// Configure gemmlowp function
_mm_gemmlowp.configure(input, weights, bias, output, gemm_info);
@@ -214,8 +214,8 @@ void CLFullyConnectedLayerEx::configure_conv_fc(const ICLTensor *input, const IC
const FullyConnectedLayerInfo &fc_info)
{
ARM_COMPUTE_ERROR_ON(
- (weights->info()->dimension(1) !=
- (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2))));
+ (weights->info()->dimension(1) !=
+ (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2))));
// If the fully connected layer is called after a convolution layer, the input tensor must be
// linearized
@@ -223,11 +223,11 @@ void CLFullyConnectedLayerEx::configure_conv_fc(const ICLTensor *input, const IC
// Initialize output tensor for flatten
TensorShape shape_flatten = compute_flatten_shape(input->info());
_flatten_output.allocator()->init(input->info()
- ->clone()
- ->set_is_resizable(true)
- .reset_padding()
- .set_tensor_shape(shape_flatten)
- .set_data_layout(DataLayout::NCHW));
+ ->clone()
+ ->set_is_resizable(true)
+ .reset_padding()
+ .set_tensor_shape(shape_flatten)
+ .set_data_layout(DataLayout::NCHW));
// Configure flatten kernel
_memory_group.manage(&_flatten_output);
@@ -258,8 +258,8 @@ void CLFullyConnectedLayerEx::configure(const ICLTensor *input, const ICLTensor
// Perform validate step
ARM_COMPUTE_ERROR_THROW_ON(CLFullyConnectedLayerEx::validate(
- input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(),
- fc_info));
+ input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(),
+ fc_info));
_are_weights_converted = true;
_are_weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
@@ -285,10 +285,10 @@ void CLFullyConnectedLayerEx::configure(const ICLTensor *input, const ICLTensor
const bool is_batched_fc_layer = output->info()->dimension(1) > 1;
if (is_batched_fc_layer)
{
- _is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) &&
- (std::equal(input->info()->tensor_shape().cbegin() + 3,
- input->info()->tensor_shape().cend(),
- output->info()->tensor_shape().cbegin() + 1));
+ _is_fc_after_conv =
+ (TensorShape::num_max_dimensions >= 4) &&
+ (std::equal(input->info()->tensor_shape().cbegin() + 3, input->info()->tensor_shape().cend(),
+ output->info()->tensor_shape().cbegin() + 1));
}
else
{
@@ -302,7 +302,7 @@ void CLFullyConnectedLayerEx::configure(const ICLTensor *input, const ICLTensor
{
_reshape_weights_managed_function.configure(weights);
weights_to_use = utils::cast::polymorphic_downcast<ICLTensor *>(
- _weights_manager->acquire(weights, &_reshape_weights_managed_function));
+ _weights_manager->acquire(weights, &_reshape_weights_managed_function));
}
else
{
@@ -320,7 +320,7 @@ void CLFullyConnectedLayerEx::configure(const ICLTensor *input, const ICLTensor
_convert_weights_managed.configure(weights_to_use, input->info()->tensor_shape(),
fc_info.weights_trained_layout);
weights_to_use = utils::cast::polymorphic_downcast<ICLTensor *>(
- _weights_manager->acquire(weights, &_convert_weights_managed));
+ _weights_manager->acquire(weights, &_convert_weights_managed));
}
else
{
@@ -359,16 +359,16 @@ Status CLFullyConnectedLayerEx::validate(const ITensorInfo *input, const ITensor
bool is_fc_after_conv = true;
const ITensorInfo &flatten_input = TensorInfo(input->clone()
- ->set_is_resizable(true)
- .reset_padding()
- .set_tensor_shape(compute_flatten_shape(input))
- .set_data_layout(DataLayout::NCHW));
+ ->set_is_resizable(true)
+ .reset_padding()
+ .set_tensor_shape(compute_flatten_shape(input))
+ .set_data_layout(DataLayout::NCHW));
const ITensorInfo &reshaped_weights =
- TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
- compute_transposed_shape(*weights)));
+ TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
+ compute_transposed_shape(*weights)));
const ITensorInfo &converted_weights =
- weights_reshaped ? TensorInfo(weights->clone()->set_is_resizable(true).reset_padding())
- : TensorInfo(*reshaped_weights.clone());
+ weights_reshaped ? TensorInfo(weights->clone()->set_is_resizable(true).reset_padding())
+ : TensorInfo(*reshaped_weights.clone());
// With the Fully Connected layer we can have 4 different cases:
// 1) Convolution layer -> Fully Connected layer without batches
@@ -396,7 +396,7 @@ Status CLFullyConnectedLayerEx::validate(const ITensorInfo *input, const ITensor
{
// Validate reshape weights kernel
ARM_COMPUTE_RETURN_ON_ERROR(
- CLFullyConnectedLayerReshapeWeightsEx::validate(weights, &reshaped_weights));
+ CLFullyConnectedLayerReshapeWeightsEx::validate(weights, &reshaped_weights));
weights_to_use = &reshaped_weights;
}
@@ -404,7 +404,7 @@ Status CLFullyConnectedLayerEx::validate(const ITensorInfo *input, const ITensor
{
// Validate convert weights kernel
ARM_COMPUTE_RETURN_ON_ERROR(CLConvertFullyConnectedWeights::validate(
- weights_to_use, &converted_weights, input->tensor_shape(), fc_info.weights_trained_layout));
+ weights_to_use, &converted_weights, input->tensor_shape(), fc_info.weights_trained_layout));
weights_to_use = &converted_weights;
}
@@ -412,8 +412,8 @@ Status CLFullyConnectedLayerEx::validate(const ITensorInfo *input, const ITensor
{
// Fully Connected layer after a Convolution Layer without batches
ARM_COMPUTE_RETURN_ERROR_ON(
- (weights_to_use->dimension(1) !=
- (input->dimension(0) * input->dimension(1) * input->dimension(2))));
+ (weights_to_use->dimension(1) !=
+ (input->dimension(0) * input->dimension(1) * input->dimension(2))));
// Validate flatten kernel
ARM_COMPUTE_RETURN_ON_ERROR(CLFlattenLayer::validate(input, &flatten_input));
@@ -427,7 +427,7 @@ Status CLFullyConnectedLayerEx::validate(const ITensorInfo *input, const ITensor
// Validate matrix multiply kernel
ARM_COMPUTE_RETURN_ON_ERROR(
- validate_mm(*input_to_use, *weights_to_use, biases, *output, fc_info));
+ validate_mm(*input_to_use, *weights_to_use, biases, *output, fc_info));
return Status{};
}
@@ -457,7 +457,7 @@ void CLFullyConnectedLayerEx::run()
if (_weights_manager && _weights_manager->are_weights_managed(cur_weights))
{
_original_weights = utils::cast::polymorphic_downcast<ICLTensor *>(
- _weights_manager->run(cur_weights, &_reshape_weights_managed_function));
+ _weights_manager->run(cur_weights, &_reshape_weights_managed_function));
}
else
{
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp
index 157b4d977..cda784541 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp
@@ -19,6 +19,7 @@
#include <arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h>
#include <arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h>
#include <arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h>
+#include "src/core/helpers/AutoConfiguration.h"
using namespace arm_compute;
@@ -41,7 +42,7 @@ void CLFullyConnectedReshapingLayer::configure(const arm_compute::ICLTensor *inp
// reshape
auto_init_if_empty(*_cl_buffer.info(),
_input->info()->clone()->set_tensor_shape(reshape).set_data_layout(
- _input->info()->data_layout()));
+ _input->info()->data_layout()));
_cl_reshape.configure(_input, &_cl_buffer);
input_to_use = &_cl_buffer;
}
@@ -57,7 +58,7 @@ void CLFullyConnectedReshapingLayer::configure(const arm_compute::ICLTensor *inp
{
bool is_hybrid = (input->info()->data_type() == DataType::F32 ||
input->info()->data_type() == DataType::F16) &&
- (weights->info()->data_type() == DataType::S8 ||
+ (weights->info()->data_type() == DataType::QSYMM8 ||
weights->info()->data_type() == DataType::QASYMM8_SIGNED);
if (is_hybrid)
@@ -81,7 +82,6 @@ void CLFullyConnectedReshapingLayer::configure(const arm_compute::ICLTensor *inp
{
throw std::runtime_error("CLFullyConnectedReshapingLayer: Unsupported kernel type");
}
-
}();
if (_needs_reshape)
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLGEMMMatrixAccumulateBiasesKernel.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLGEMMMatrixAccumulateBiasesKernel.cpp
new file mode 100644
index 000000000..cd7409417
--- /dev/null
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLGEMMMatrixAccumulateBiasesKernel.cpp
@@ -0,0 +1,171 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Copyright (c) 2017-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h"
+
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/CLKernelLibraryEx.h"
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/CL/OpenCL.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Utils.h"
+#include "support/StringSupport.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/WindowHelpers.h"
+
+using namespace arm_compute;
+
+namespace
+{
+Status validate_arguments(const ITensorInfo *accum, const ITensorInfo *biases)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(accum);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(biases, accum);
+ ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() != 1);
+
+ return Status{};
+}
+
+std::pair<Status, Window>
+validate_and_configure_window(ITensorInfo *accum, ITensorInfo *biases, GPUTarget gpu_target,
+ unsigned int &num_elems_processed_per_iteration)
+{
+ // Select the vector size to use (8 for Bifrost; 16 for Midgard).
+ bool is_gpu_bifrost =
+ gpu_target_is_in(gpu_target, GPUTarget::G71, GPUTarget::G72, GPUTarget::G76, GPUTarget::G51,
+ GPUTarget::G51BIG, GPUTarget::G51LIT, GPUTarget::G52, GPUTarget::G52LIT);
+ num_elems_processed_per_iteration = is_gpu_bifrost ? 8 : 16;
+
+ // Configure kernel window
+ Window win = calculate_max_window(*accum, Steps(num_elems_processed_per_iteration));
+
+ AccessWindowStatic biases_access(
+ biases, 0, 0, ceil_to_multiple(biases->dimension(0), num_elems_processed_per_iteration),
+ biases->dimension(1));
+ AccessWindowHorizontal accum_access(accum, 0, num_elems_processed_per_iteration);
+
+ bool window_changed = update_window_and_padding(win, biases_access, accum_access);
+
+ Status err = (window_changed)
+ ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
+ : Status{};
+ return std::make_pair(err, win);
+}
+} // namespace
+
+CLGEMMMatrixAccumulateBiasesKernel::CLGEMMMatrixAccumulateBiasesKernel()
+ : _accum(nullptr), _biases(nullptr)
+{
+}
+
+void CLGEMMMatrixAccumulateBiasesKernel::configure(ICLTensor *accum, const ICLTensor *biases)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), accum, biases);
+}
+
+void CLGEMMMatrixAccumulateBiasesKernel::configure(const CLCompileContext &compile_context,
+ ICLTensor *accum, const ICLTensor *biases)
+{
+ ARM_COMPUTE_UNUSED(compile_context);
+ // Perform validate step
+ ARM_COMPUTE_ERROR_ON_NULLPTR(accum, biases);
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(accum->info(), biases->info()));
+
+ _biases = biases;
+ _accum = accum;
+
+ // Get the target gpu
+ GPUTarget gpu_target = get_target();
+ unsigned int vector_size = 0;
+
+ // Configure kernel window
+ auto win_config =
+ validate_and_configure_window(accum->info(), biases->info(), gpu_target, vector_size);
+ ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
+ ICLKernel::configure_internal(win_config.second);
+
+ // Add build options
+ CLBuildOptions build_opts;
+ build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(accum->info()->data_type()));
+ build_opts.add_option("-DVECTOR_SIZE=" + support::cpp11::to_string(vector_size));
+
+ // Create kernel
+ _kernel = static_cast<cl::Kernel>(
+ CLKernelLibraryEx::get().create_kernel("gemm_accumulate_biases", build_opts.options()));
+}
+
+Status CLGEMMMatrixAccumulateBiasesKernel::validate(const ITensorInfo *accum,
+ const ITensorInfo *biases, GPUTarget gpu_target)
+{
+ unsigned int num_elems_processed_per_iteration = 0;
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(accum, biases));
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(accum->clone().get(),
+ biases->clone().get(), gpu_target,
+ num_elems_processed_per_iteration)
+ .first);
+
+ return Status{};
+}
+
+void CLGEMMMatrixAccumulateBiasesKernel::run(const Window &window, cl::CommandQueue &queue)
+{
+ ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window);
+
+ Window accum_slice = window.first_slice_window_2D();
+
+ Window biases_slice(accum_slice);
+ biases_slice.set(Window::DimY, Window::Dimension(0, 1, 1));
+
+ // Run kernel
+ do
+ {
+ // Set arguments
+ unsigned int idx = 0;
+ add_2D_tensor_argument(idx, _accum, accum_slice);
+ add_1D_tensor_argument(idx, _biases, biases_slice);
+
+ enqueue(queue, *this, accum_slice, lws_hint());
+ } while (window.slide_window_slice_2D(accum_slice));
+}
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp
index e0b833b04..f380e3e2c 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp
@@ -41,6 +41,8 @@
#include "arm_compute/runtime/CL/functions/CLGatherEx.h"
#include "arm_compute/core/CL/ICLTensor.h"
+#include "src/core/CL/kernels/CLGatherKernel.h"
+
#include "arm_compute/core/CL/kernels/CLGatherExKernel.h"
using namespace arm_compute;
@@ -48,7 +50,7 @@ using namespace arm_compute;
void CLGatherEx::configure(const ICLTensor *input, const ICLTensor *indices, ICLTensor *output,
int axis)
{
- auto k = support::cpp14::make_unique<CLGatherExKernel>();
+ auto k = std::make_unique<CLGatherExKernel>();
k->configure(input, indices, output, axis);
_kernel = std::move(k);
}
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp
index 65b89a389..9896abd4b 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp
@@ -47,7 +47,7 @@ using namespace arm_compute;
void CLHashtableLookup::configure(const ICLTensor *lookups, const ICLTensor *keys,
const ICLTensor *input, ICLTensor *output, ICLTensor *hits)
{
- auto k = support::cpp14::make_unique<CLHashtableLookupKernel>();
+ auto k = std::make_unique<CLHashtableLookupKernel>();
k->configure(lookups, keys, input, output, hits);
_kernel = std::move(k);
}
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp
index 5a7e40839..ca45a57f8 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp
@@ -50,7 +50,7 @@ CLInstanceNormalizationLayerEx::CLInstanceNormalizationLayerEx() {}
void CLInstanceNormalizationLayerEx::configure(ICLTensor *input, ICLTensor *output,
ICLTensor *gamma, ICLTensor *beta, float epsilon)
{
- auto k = support::cpp14::make_unique<CLInstanceNormalizationLayerKernelEx>();
+ auto k = std::make_unique<CLInstanceNormalizationLayerKernelEx>();
k->configure(input, output, gamma, beta, epsilon);
_kernel = std::move(k);
}
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp
index 28e5bc0da..2bdc451b3 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp
@@ -46,7 +46,7 @@ using namespace arm_compute;
void CLNeg::configure(ICLTensor *input, ICLTensor *output)
{
- auto k = arm_compute::support::cpp14::make_unique<CLNegKernel>();
+ auto k = std::make_unique<CLNegKernel>();
k->configure(input, output);
_kernel = std::move(k);
}
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLOneHot.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLOneHot.cpp
new file mode 100644
index 000000000..759a19ff3
--- /dev/null
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLOneHot.cpp
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/CL/functions/CLOneHot.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/CL/kernels/CLOneHotKernel.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+
+namespace arm_compute
+{
+CLOneHot::CLOneHot() : _memset_kernel(), _onehot_kernel(), _has_to_memset(false) {}
+void CLOneHot::configure(const ICLTensor *indices, const ICLTensor *on_value,
+ const ICLTensor *off_value, ICLTensor *output, int depth, int axis)
+{
+ _onehot_kernel.configure(indices, on_value, off_value, output, depth, axis);
+}
+void CLOneHot::configure(const ICLTensor *indices, const ICLTensor *on_value, ICLTensor *output,
+ PixelValue off_value, int depth, int axis)
+{
+ _has_to_memset = true;
+ _memset_kernel.configure(output, off_value);
+ _onehot_kernel.configure(indices, on_value, output, depth, axis);
+}
+Status CLOneHot::validate(const ITensorInfo *indices, const ITensorInfo *on_value,
+ const ITensorInfo *off_value, const ITensorInfo *output, int depth,
+ int axis)
+{
+ return CLOneHotKernel::validate(indices, on_value, off_value, output, depth, axis);
+}
+void CLOneHot::run()
+{
+ if (_has_to_memset)
+ {
+ CLScheduler::get().enqueue(_memset_kernel, true);
+ }
+
+ CLScheduler::get().enqueue(_onehot_kernel, false);
+}
+} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLPadLayerEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLPadLayerEx.cpp
new file mode 100644
index 000000000..4d940e966
--- /dev/null
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLPadLayerEx.cpp
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2019-2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/CL/functions/CLPadLayerEx.h"
+#include "arm_compute/core/CL/kernels/CLPadLayerKernelEx.h"
+
+namespace arm_compute
+{
+CLPadLayerEx::CLPadLayerEx()
+ : _pad_kernel(std::make_unique<CLPadLayerKernelEx>()),
+ _copy_kernel(std::make_unique<opencl::kernels::ClCopyKernel>()), _perform_pad(false)
+{
+}
+
+void CLPadLayerEx::configure(ICLTensor *input, ICLTensor *output, const PaddingList &padding,
+ PixelValue constant_value, PaddingMode mode)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, padding, constant_value,
+ mode);
+}
+
+void CLPadLayerEx::configure(const CLCompileContext &compile_context, ICLTensor *input,
+ ICLTensor *output, const PaddingList &padding,
+ PixelValue constant_value, PaddingMode mode)
+{
+ ARM_COMPUTE_ERROR_THROW_ON(
+ validate(input->info(), output->info(), padding, constant_value, mode));
+
+ _perform_pad = std::any_of(padding.begin(), padding.end(),
+ [](PaddingInfo info) { return info.first > 0 || info.second > 0; });
+
+ if (_perform_pad)
+ {
+ _pad_kernel->configure(compile_context, input, output, padding, constant_value, mode);
+ }
+ else
+ {
+ Window copy_window = Window();
+ copy_window.use_tensor_dimensions(output->info()->tensor_shape());
+ // Copy the input to the whole output if no padding is applied
+ _copy_kernel->configure(compile_context, input->info(), output->info(), &copy_window);
+ }
+}
+Status CLPadLayerEx::validate(const ITensorInfo *input, const ITensorInfo *output,
+ const PaddingList &padding, PixelValue constant_value,
+ PaddingMode mode)
+{
+ bool perform_pad = std::any_of(padding.begin(), padding.end(), [](PaddingInfo info) {
+ return info.first > 0 || info.second > 0;
+ });
+
+ if (perform_pad)
+ {
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ CLPadLayerKernelEx::validate(input, output, padding, constant_value, mode));
+ }
+ else
+ {
+ ARM_COMPUTE_RETURN_ON_ERROR(opencl::kernels::ClCopyKernel::validate(input, output));
+ }
+ return Status{};
+}
+void CLPadLayerEx::run()
+{
+ if (_perform_pad)
+ {
+ CLScheduler::get().enqueue(*_pad_kernel);
+ }
+ else
+ {
+ CLScheduler::get().enqueue(*_copy_kernel);
+ }
+}
+} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp
index b198e7330..6740835a8 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp
@@ -40,21 +40,20 @@
#include "arm_compute/runtime/CL/functions/CLReduceOperation.h"
-#include "arm_compute/core/CL/kernels/CLReduceOperationKernel.h"
#include "arm_compute/core/TensorShape.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
using namespace arm_compute;
CLReduceOperation::CLReduceOperation(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _input(nullptr), _output(nullptr), _axis(),
- _keep_dims(false), _interm_tensors(), _reduce_kernels(), _reshape()
+ : _memory_group(std::move(memory_manager)), _input(nullptr), _output(nullptr), _axis(),
+ _keep_dims(false), _interm_tensors(), _reduce_kernels(), _reshape()
{
}
Status CLReduceOperation::validate(const ITensorInfo *input, const ITensorInfo *output,
const std::set<uint32_t> &axis, bool keep_dims,
- const ReduceOperation &op)
+ const ReductionOperation &op)
{
const size_t num_of_kernels = axis.size();
const size_t num_of_interm_tensors = num_of_kernels - (keep_dims ? 1 : 0);
@@ -62,7 +61,7 @@ Status CLReduceOperation::validate(const ITensorInfo *input, const ITensorInfo *
ARM_COMPUTE_RETURN_ERROR_ON(num_of_kernels < 1);
// Create temporary tensor infos
- auto interm_tensors = support::cpp14::make_unique<TensorInfo[]>(num_of_interm_tensors);
+ auto interm_tensors = std::make_unique<TensorInfo[]>(num_of_interm_tensors);
// Create intermediate tensor info
TensorShape shape{input->tensor_shape()};
@@ -92,13 +91,13 @@ Status CLReduceOperation::validate(const ITensorInfo *input, const ITensorInfo *
for (size_t i = 0; i < num_of_kernels; ++i, ++it)
{
ARM_COMPUTE_RETURN_ON_ERROR(
- CLReduceOperationKernel::validate(tensors[i], tensors[i + 1], *it, op));
+ CLReduceOperationKernel::validate(tensors[i], tensors[i + 1], *it, op));
}
if (!keep_dims)
{
ARM_COMPUTE_RETURN_ON_ERROR(
- CLReshapeLayer::validate(&interm_tensors[num_of_interm_tensors - 1], output));
+ CLReshapeLayer::validate(&interm_tensors[num_of_interm_tensors - 1], output));
}
return Status{};
@@ -106,7 +105,7 @@ Status CLReduceOperation::validate(const ITensorInfo *input, const ITensorInfo *
void CLReduceOperation::configure(ICLTensor *input, ICLTensor *output,
const std::set<uint32_t> &axis, bool keep_dims,
- ReduceOperation op)
+ ReductionOperation op)
{
ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), output->info(), axis, keep_dims, op));
@@ -125,8 +124,8 @@ void CLReduceOperation::configure(ICLTensor *input, ICLTensor *output,
throw std::runtime_error("CLReduceOperation: there is no axis to reduce");
}
- _interm_tensors = support::cpp14::make_unique<CLTensor[]>(num_of_interm_tensors);
- _reduce_kernels = support::cpp14::make_unique<CLReduceOperationKernel[]>(num_of_kernels);
+ _interm_tensors = std::make_unique<CLTensor[]>(num_of_interm_tensors);
+ _reduce_kernels = std::make_unique<CLReduceOperationKernel[]>(num_of_kernels);
// Set a vector that is ordered ICLTensors sequentially.
std::vector<ICLTensor *> tensors;
@@ -137,7 +136,7 @@ void CLReduceOperation::configure(ICLTensor *input, ICLTensor *output,
}
tensors.emplace_back(output);
- // Apply ReduceOperation on all kernels
+ // Apply ReductionOperation on all kernels
TensorShape shape{input->info()->tensor_shape()};
auto it = axis.begin();
for (size_t i = 0; i < num_of_kernels; ++i, ++it)
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLSplitVEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLSplitVEx.cpp
new file mode 100644
index 000000000..bca4d5cb6
--- /dev/null
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLSplitVEx.cpp
@@ -0,0 +1,197 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/CL/functions/CLSplitVEx.h"
+#include "support/ToolchainSupport.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include <cassert>
+
+using namespace arm_compute;
+
+namespace
+{
+Status validate_arguments(const ICLTensor *size_splits, const std::vector<ICLTensor *> &outputs,
+ unsigned int num_splits)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(size_splits->info()->num_dimensions() != 1,
+ "size_splits must be a 1-D tensor.");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(num_splits != outputs.size(),
+ "Number of output tensors does not match number of splits.");
+ return Status{};
+}
+
+Status validate_slices(const ITensorInfo *input, const std::vector<ITensorInfo *> &outputs,
+ uint32_t split_dim)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
+ ARM_COMPUTE_RETURN_ERROR_ON(split_dim >= input->num_dimensions());
+ ARM_COMPUTE_RETURN_ERROR_ON(outputs.size() < 2);
+
+ // Start/End coordinates
+ Coordinates start_coords;
+ Coordinates end_coords;
+ for (unsigned int d = 0; d < input->num_dimensions(); ++d)
+ {
+ end_coords.set(d, -1);
+ }
+ unsigned int axis_offset = 0;
+ // Validate output tensors
+ for (const auto &output : outputs)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
+ // Get output shape
+ const TensorShape output_shape = output->tensor_shape();
+ ARM_COMPUTE_RETURN_ERROR_ON(output_shape.total_size() == 0);
+
+ const size_t axis_split_step = output_shape[split_dim];
+
+ // Output auto inizialitation if not yet initialized
+ TensorInfo tmp_output_info = *output->clone();
+ auto_init_if_empty(tmp_output_info,
+ input->clone()->set_is_resizable(true).set_tensor_shape(output_shape));
+
+ // Update coordinate on axis
+ start_coords.set(split_dim, axis_offset);
+ end_coords.set(split_dim, axis_offset + axis_split_step);
+
+ ARM_COMPUTE_RETURN_ON_ERROR(CLSlice::validate(input, output, start_coords, end_coords));
+
+ axis_offset += axis_split_step;
+ }
+
+ return Status{};
+}
+
+void configure_slices(const ICLTensor *input, const std::vector<ICLTensor *> &outputs,
+ std::vector<CLSlice> &_slice_functions, uint32_t split_dim)
+{
+ unsigned int axis_offset = 0;
+ // Start/End coordinates
+ Coordinates start_coords;
+ Coordinates end_coords;
+ for (unsigned int d = 0; d < input->info()->num_dimensions(); ++d)
+ {
+ end_coords.set(d, -1);
+ }
+ int out_iter = 0;
+ for (const auto &output : outputs)
+ {
+ const TensorShape output_shape = output->info()->tensor_shape();
+ auto op_size = output_shape.total_size();
+ if (!op_size)
+ {
+ continue;
+ }
+
+ assert(op_size != 0);
+ assert(split_dim <= output_shape.num_dimensions());
+
+ const size_t axis_split_step = output_shape[split_dim];
+
+ // Output auto inizialitation if not yet initialized
+ TensorInfo tmp_output_info = *output->info()->clone();
+ auto_init_if_empty(
+ tmp_output_info,
+ input->info()->clone()->set_is_resizable(true).set_tensor_shape(output_shape));
+
+ // Update coordinate on axis
+ start_coords.set(split_dim, axis_offset);
+ end_coords.set(split_dim, axis_offset + axis_split_step);
+
+ // Configure slice function
+ _slice_functions[out_iter].configure(input, output, start_coords, end_coords);
+
+ // Set valid region from shape
+ outputs[out_iter++]->info()->set_valid_region(ValidRegion(Coordinates(), output_shape));
+ axis_offset += axis_split_step;
+ }
+}
+
+} // namespace
+
+CLSplitVEx::CLSplitVEx()
+ : _input(nullptr), _size_splits(nullptr), _outputs(), _num_splits(0), _slice_functions()
+{
+}
+
+void CLSplitVEx::configure(const ICLTensor *input, const ICLTensor *size_splits, uint32_t split_dim,
+ const std::vector<ICLTensor *> &outputs, unsigned int num_splits)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, size_splits);
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(size_splits, outputs, num_splits));
+
+ _input = input;
+ _size_splits = size_splits;
+ _outputs = outputs;
+ _num_splits = num_splits;
+
+ // Create tensor slices
+ _slice_functions.resize(_num_splits);
+
+ // Extract output tensor info
+ std::vector<ITensorInfo *> outputs_info;
+ for (auto &&output : _outputs)
+ {
+ ARM_COMPUTE_ERROR_ON_NULLPTR(output);
+ outputs_info.emplace_back(output->info());
+ }
+
+ // Validate slices
+ ARM_COMPUTE_ERROR_THROW_ON(validate_slices(_input->info(), outputs_info, split_dim));
+
+ // Configure slices
+ configure_slices(_input, _outputs, _slice_functions, split_dim);
+}
+
+void CLSplitVEx::run()
+{
+ // execute the slices
+ for (unsigned i = 0; i < _outputs.size(); ++i)
+ {
+ _slice_functions[i].run();
+ }
+}
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp
index 3ac95a8e6..accd51302 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp
@@ -49,14 +49,14 @@ namespace arm_compute
{
CLTopKV2::CLTopKV2()
- : _k(0), _total_bits(0), _bits(0), _radix(0), _hist_buf_size(0), _glob_sum_buf_size(0), _n(0),
- _input(nullptr), _values(nullptr), _indices(nullptr), _qs_idx_buf(), _qs_temp_buf(),
- _hist_buf(), _glob_sum_buf(), _temp_buf(), _first_negative_idx_buf(), _in_key_buf(),
- _out_key_buf(), _in_ind_buf(), _out_ind_buf(), _p_in_key_buf(nullptr),
- _p_out_key_buf(nullptr), _p_in_ind_buf(nullptr), _p_out_ind_buf(nullptr) /*, _qs_kernel(),
- _init_kernel(), _hist_kernel(), _scan_hist_kernel(), _glob_scan_hist_kernel(),
- _paste_hist_kernel(), _reorder_kernel(), _find_first_negative_kernel(),
- _reorder_negatives_kernel(), _store_kernel()*/
+ : _k(0), _total_bits(0), _bits(0), _radix(0), _hist_buf_size(0), _glob_sum_buf_size(0), _n(0),
+ _input(nullptr), _values(nullptr), _indices(nullptr), _qs_idx_buf(), _qs_temp_buf(),
+ _hist_buf(), _glob_sum_buf(), _temp_buf(), _first_negative_idx_buf(), _in_key_buf(),
+ _out_key_buf(), _in_ind_buf(), _out_ind_buf(), _p_in_key_buf(nullptr), _p_out_key_buf(nullptr),
+ _p_in_ind_buf(nullptr), _p_out_ind_buf(nullptr) /*, _qs_kernel(),
+ _init_kernel(), _hist_kernel(), _scan_hist_kernel(), _glob_scan_hist_kernel(),
+ _paste_hist_kernel(), _reorder_kernel(), _find_first_negative_kernel(),
+ _reorder_negatives_kernel(), _store_kernel()*/
{
}
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp
index 3215d01a7..f3f093c18 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp
@@ -53,7 +53,7 @@ using namespace arm_compute;
using namespace arm_compute::misc::shape_calculator;
CLTransposeConvLayer::CLTransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_manager(std::move(memory_manager)), _function()
+ : _memory_manager(std::move(memory_manager)), _function()
{
}
@@ -79,7 +79,7 @@ void CLTransposeConvLayer::configure(const CLCompileContext &compile_context, IC
{
case DeconvolutionMethod::DIRECT:
{
- auto f = arm_compute::support::cpp14::make_unique<CLDirectTransposeConvLayer>();
+ auto f = std::make_unique<CLDirectTransposeConvLayer>();
f->configure(compile_context, input, weights, bias, output, deconv_info, invalid_right,
invalid_bottom, weights_info);
_function = std::move(f);
@@ -87,7 +87,7 @@ void CLTransposeConvLayer::configure(const CLCompileContext &compile_context, IC
}
case DeconvolutionMethod::GEMM:
{
- auto f = arm_compute::support::cpp14::make_unique<CLGEMMDeconvolutionLayer>(_memory_manager);
+ auto f = std::make_unique<CLGEMMDeconvolutionLayer>(_memory_manager);
f->configure(compile_context, input, weights, bias, output, deconv_info);
_function = std::move(f);
break;
@@ -105,20 +105,20 @@ Status CLTransposeConvLayer::validate(const ITensorInfo *input, const ITensorInf
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
switch (CLTransposeConvLayer::get_deconvolution_method(
- input, weights, bias, output, deconv_info, invalid_right, invalid_bottom, weights_info))
+ input, weights, bias, output, deconv_info, invalid_right, invalid_bottom, weights_info))
{
case DeconvolutionMethod::DIRECT:
{
// Validate direct convolution layer
ARM_COMPUTE_RETURN_ON_ERROR(CLDirectTransposeConvLayer::validate(
- input, weights, bias, output, deconv_info, invalid_right, invalid_bottom, weights_info));
+ input, weights, bias, output, deconv_info, invalid_right, invalid_bottom, weights_info));
break;
}
case DeconvolutionMethod::GEMM:
{
// Validate gemm-based convolution layer
ARM_COMPUTE_RETURN_ON_ERROR(
- CLGEMMDeconvolutionLayer::validate(input, weights, bias, output, deconv_info));
+ CLGEMMDeconvolutionLayer::validate(input, weights, bias, output, deconv_info));
break;
}
default:
@@ -130,9 +130,9 @@ Status CLTransposeConvLayer::validate(const ITensorInfo *input, const ITensorInf
}
DeconvolutionMethod CLTransposeConvLayer::get_deconvolution_method(
- const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias,
- ITensorInfo *output, const PadStrideInfo &deconv_info, unsigned int invalid_right,
- unsigned int invalid_bottom, const WeightsInfo &weights_info)
+ const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias,
+ ITensorInfo *output, const PadStrideInfo &deconv_info, unsigned int invalid_right,
+ unsigned int invalid_bottom, const WeightsInfo &weights_info)
{
ARM_COMPUTE_UNUSED(output, bias, weights_info);
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp
index 2fc94b267..e6b7329d1 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp
@@ -38,11 +38,10 @@
* SOFTWARE.
*/
-#include "arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h"
#include <arm_compute/core/NEON/kernels/NEBinaryLogicalOperationKernel.h>
+#include "arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h"
#include "arm_compute/core/ITensor.h"
-#include "support/MemorySupport.h"
#include <utility>
@@ -53,7 +52,7 @@ template <BinaryLogicalOperation COP>
void NEBinaryLogicalOperationStatic<COP>::configure(ITensor *input1, ITensor *input2,
ITensor *output)
{
- auto k = support::cpp14::make_unique<NEBinaryLogicalOperationKernel>();
+ auto k = std::make_unique<NEBinaryLogicalOperationKernel>();
k->configure(COP, input1, input2, output);
_kernel = std::move(k);
}
@@ -69,7 +68,7 @@ Status NEBinaryLogicalOperationStatic<COP>::validate(const ITensorInfo *input1,
void NEBinaryLogicalOperation::configure(ITensor *input1, ITensor *input2, ITensor *output,
BinaryLogicalOperation op)
{
- auto k = support::cpp14::make_unique<NEBinaryLogicalOperationKernel>();
+ auto k = std::make_unique<NEBinaryLogicalOperationKernel>();
k->configure(op, input1, input2, output);
_kernel = std::move(k);
}
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NECastBool.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NECastBool.cpp
new file mode 100644
index 000000000..f6eec2603
--- /dev/null
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NECastBool.cpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2016-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NECastBool.h"
+
+#include "arm_compute/core/NEON/kernels/NECastBoolKernel.h"
+
+using namespace arm_compute;
+
+void NECastBool::configure(const ITensor *input, ITensor *output)
+{
+ auto k = std::make_unique<NECastBoolKernel>();
+ k->configure(input, output);
+ _kernel = std::move(k);
+}
+
+Status NECastBool::validate(const ITensorInfo *input, const ITensorInfo *output)
+{
+ return NECastBoolKernel::validate(input, output);
+}
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp
index e0ab3e025..99fc5c579 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp
@@ -41,13 +41,12 @@
#include "arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h"
#include "arm_compute/core/NEON/kernels/NEEmbeddingLookupKernel.h"
-#include "support/MemorySupport.h"
using namespace arm_compute;
void NEEmbeddingLookup::configure(const ITensor *input, ITensor *output, const ITensor *lookups)
{
- auto k = support::cpp14::make_unique<NEEmbeddingLookupKernel>();
+ auto k = std::make_unique<NEEmbeddingLookupKernel>();
k->configure(input, output, lookups);
_kernel = std::move(k);
}
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp
index a123439d9..fbd88fff0 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp
@@ -58,7 +58,7 @@ namespace
Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const ITensorInfo &output)
{
ARM_COMPUTE_RETURN_ON_ERROR(
- NEGEMMLowpMatrixMultiplyCore::validate(&input, &weights, nullptr, &output));
+ NEGEMMLowpMatrixMultiplyCore::validate(&input, &weights, nullptr, &output));
return Status{};
}
@@ -66,7 +66,7 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I
void NEFullyConnectedHybridLayerReshapeWeights::configure(const ITensor *input, ITensor *output)
{
- auto k = support::cpp14::make_unique<NETransposeKernel>();
+ auto k = std::make_unique<NETransposeKernel>();
k->configure(input, output);
_kernel = std::move(k);
}
@@ -78,11 +78,11 @@ Status NEFullyConnectedHybridLayerReshapeWeights::validate(const ITensorInfo *in
}
NEFullyConnectedHybridLayer::NEFullyConnectedHybridLayer(
- std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _reshape_weights_function(), _quant_input_kernel(),
- _mm_gemmlowp(), _accumulate_biases_kernel(), _reshape_weights_output(), _quantized_input(),
- _scale_factor(), _original_weights(nullptr), _are_weights_reshaped(false),
- _accumulate_biases(false), _is_prepared(false)
+ std::shared_ptr<IMemoryManager> memory_manager)
+ : _memory_group(std::move(memory_manager)), _reshape_weights_function(), _quant_input_kernel(),
+ _mm_gemmlowp(), _accumulate_biases_kernel(), _reshape_weights_output(), _quantized_input(),
+ _scale_factor(), _original_weights(nullptr), _are_weights_reshaped(false),
+ _accumulate_biases(false), _is_prepared(false)
{
}
@@ -103,8 +103,8 @@ void NEFullyConnectedHybridLayer::configure(const ITensor *input, const ITensor
// Perform validate step
ARM_COMPUTE_ERROR_THROW_ON(NEFullyConnectedHybridLayer::validate(
- input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(),
- fc_info));
+ input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(),
+ fc_info));
_are_weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
_accumulate_biases = false;
@@ -132,10 +132,10 @@ void NEFullyConnectedHybridLayer::configure(const ITensor *input, const ITensor
bool _is_fc_after_conv;
if (is_batched_fc_layer)
{
- _is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) &&
- (std::equal(input->info()->tensor_shape().cbegin() + 3,
- input->info()->tensor_shape().cend(),
- output->info()->tensor_shape().cbegin() + 1));
+ _is_fc_after_conv =
+ (TensorShape::num_max_dimensions >= 4) &&
+ (std::equal(input->info()->tensor_shape().cbegin() + 3, input->info()->tensor_shape().cend(),
+ output->info()->tensor_shape().cbegin() + 1));
}
else
{
@@ -150,23 +150,23 @@ void NEFullyConnectedHybridLayer::configure(const ITensor *input, const ITensor
{
// Reshape the weights
_reshape_weights_output.allocator()->init(
- weights->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
- compute_transposed_shape(*weights->info())));
+ weights->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
+ compute_transposed_shape(*weights->info())));
_reshape_weights_function.configure(weights_to_use, &_reshape_weights_output);
weights_to_use = &_reshape_weights_output;
}
// Quantize input
_quantized_input.allocator()->init(
- input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(
- DataType::QASYMM8_SIGNED));
+ input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(
+ DataType::QASYMM8_SIGNED));
_scale_factor.allocator()->init(
- TensorInfo(TensorShape{output->info()->dimension(1)}, 1, DataType::F32));
+ TensorInfo(TensorShape{output->info()->dimension(1)}, 1, DataType::F32));
_quant_input_kernel.configure(input, &_quantized_input, &_scale_factor);
// GEMM
_gemmlowp_output.allocator()->init(
- output->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
+ output->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
configure_mm(&_quantized_input, weights_to_use, &_gemmlowp_output);
// Multiply scale
@@ -195,8 +195,8 @@ Status NEFullyConnectedHybridLayer::validate(const ITensorInfo *input, const ITe
bool weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
const ITensorInfo &reshaped_weights =
- TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
- compute_transposed_shape(*weights)));
+ TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
+ compute_transposed_shape(*weights)));
// Configure accumulate biases kernel for non quantized asymmetric types
if (biases != nullptr)
@@ -217,7 +217,7 @@ Status NEFullyConnectedHybridLayer::validate(const ITensorInfo *input, const ITe
{
// Validate reshape weights kernel
ARM_COMPUTE_RETURN_ON_ERROR(
- NEFullyConnectedHybridLayerReshapeWeights::validate(weights_to_use, &reshaped_weights));
+ NEFullyConnectedHybridLayerReshapeWeights::validate(weights_to_use, &reshaped_weights));
weights_to_use = &reshaped_weights;
}
@@ -225,20 +225,19 @@ Status NEFullyConnectedHybridLayer::validate(const ITensorInfo *input, const ITe
ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) != weights_to_use->dimension(1));
// Validate quantization kernel
- const ITensorInfo &quantized_input =
- TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_data_type(
- DataType::QASYMM8_SIGNED));
+ const ITensorInfo &quantized_input = TensorInfo(
+ input->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::QASYMM8_SIGNED));
const ITensorInfo &scale_factor = TensorInfo(TensorShape{output->dimension(1)}, 1, DataType::F32);
ARM_COMPUTE_RETURN_ON_ERROR(
- NEQuantizationSymmetricKernel::validate(input, &quantized_input, &scale_factor));
+ NEQuantizationSymmetricKernel::validate(input, &quantized_input, &scale_factor));
const ITensorInfo &gemmlowp_output = TensorInfo(
- output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
+ output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
// Validate matrix multiply kernel
ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(quantized_input, *weights_to_use, gemmlowp_output));
ARM_COMPUTE_RETURN_ON_ERROR(NEMultiplyScaleFactorKernel::validate(
- &gemmlowp_output, &scale_factor, output, weights->quantization_info().uniform().scale));
+ &gemmlowp_output, &scale_factor, output, weights->quantization_info().uniform().scale));
return Status{};
}
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp
index cb7557a5a..758f7dc59 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp
@@ -50,7 +50,8 @@
#include <algorithm>
#include <cmath>
-using namespace arm_compute;
+namespace arm_compute
+{
using namespace arm_compute::misc::shape_calculator;
namespace
@@ -69,14 +70,14 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I
// Validate gemmlowp function
ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixMultiplyCore::validate(
- &input.clone()->set_quantization_info(input_quantization_info),
- &weights.clone()->set_quantization_info(weights_quantization_info), nullptr, &output));
+ &input.clone()->set_quantization_info(input_quantization_info),
+ &weights.clone()->set_quantization_info(weights_quantization_info), nullptr, &output));
}
else
{
- ARM_COMPUTE_RETURN_ON_ERROR(NEGEMM::validate(
- &input, &weights, nullptr, &output, 1.f, 0.0f,
- GEMMInfo(false, false, false /* Reshape weights only for the first run */)));
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ NEGEMM::validate(&input, &weights, nullptr, &output, 1.f, 0.0f,
+ GEMMInfo(false, false, false /* Reshape weights only for the first run */)));
}
return Status{};
@@ -84,12 +85,12 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I
} // namespace
NEFullyConnectedLayerEx::NEFullyConnectedLayerEx(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _flatten_kernel(), _convert_weights(),
- _reshape_weights_function(), _mm_gemm(), _mm_gemmlowp(), _gemmlowp_output_stage(),
- _accumulate_biases_kernel(), _flatten_output(), _gemmlowp_output(),
- _converted_weights_output(), _reshape_weights_output(), _original_weights(nullptr),
- _are_weights_converted(true), _are_weights_reshaped(false), _is_fc_after_conv(false),
- _accumulate_biases(false), _is_quantized(false), _is_prepared(false)
+ : _memory_group(std::move(memory_manager)), _flatten_kernel(), _convert_weights(),
+ _reshape_weights_function(), _mm_gemm(), _mm_gemmlowp(), _gemmlowp_output_stage(),
+ _accumulate_biases_kernel(), _flatten_output(), _gemmlowp_output(), _converted_weights_output(),
+ _reshape_weights_output(), _original_weights(nullptr), _are_weights_converted(true),
+ _are_weights_reshaped(false), _is_fc_after_conv(false), _accumulate_biases(false),
+ _is_quantized(false), _is_prepared(false)
{
}
@@ -105,9 +106,9 @@ void NEFullyConnectedLayerEx::configure_mm(const ITensor *input, const ITensor *
const QuantizationInfo weights_quantization_info = weights->info()->quantization_info();
input->info()->set_quantization_info(QuantizationInfo(
- input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
+ input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
weights->info()->set_quantization_info(QuantizationInfo(
- weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
+ weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
// Configure gemmlowp function
_mm_gemmlowp.configure(input, weights, nullptr, output);
@@ -129,8 +130,8 @@ void NEFullyConnectedLayerEx::configure_conv_fc(const ITensor *input, const ITen
ITensor *output)
{
ARM_COMPUTE_ERROR_ON(
- (weights->info()->dimension(1) !=
- (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2))));
+ (weights->info()->dimension(1) !=
+ (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2))));
// If the fully connected layer is called after a convolution layer, the input tensor must be
// linearized
@@ -138,8 +139,7 @@ void NEFullyConnectedLayerEx::configure_conv_fc(const ITensor *input, const ITen
// Initialize output tensor for flatten
TensorShape shape_flatten = compute_flatten_shape(input->info());
_flatten_output.allocator()->init(
- input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
- shape_flatten));
+ input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_flatten));
// Configure flatten kernel
_memory_group.manage(&_flatten_output);
@@ -165,12 +165,11 @@ void NEFullyConnectedLayerEx::configure(const ITensor *input, const ITensor *wei
const ITensor *biases, ITensor *output,
FullyConnectedLayerInfo fc_info)
{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
-
// Perform validate step
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
ARM_COMPUTE_ERROR_THROW_ON(NEFullyConnectedLayerEx::validate(
- input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(),
- fc_info));
+ input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(),
+ fc_info));
_are_weights_converted = true;
_are_weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
@@ -183,8 +182,7 @@ void NEFullyConnectedLayerEx::configure(const ITensor *input, const ITensor *wei
if (_is_quantized)
{
_gemmlowp_output.allocator()->init(
- output->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(
- DataType::S32));
+ output->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
}
// Configure accumulate biases kernel for non quantized asymmetric types
@@ -208,10 +206,10 @@ void NEFullyConnectedLayerEx::configure(const ITensor *input, const ITensor *wei
const bool is_batched_fc_layer = output->info()->dimension(1) > 1;
if (is_batched_fc_layer)
{
- _is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) &&
- (std::equal(input->info()->tensor_shape().cbegin() + 3,
- input->info()->tensor_shape().cend(),
- output->info()->tensor_shape().cbegin() + 1));
+ _is_fc_after_conv =
+ (TensorShape::num_max_dimensions >= 4) &&
+ (std::equal(input->info()->tensor_shape().cbegin() + 3, input->info()->tensor_shape().cend(),
+ output->info()->tensor_shape().cbegin() + 1));
}
else
{
@@ -284,16 +282,16 @@ Status NEFullyConnectedLayerEx::validate(const ITensorInfo *input, const ITensor
bool is_quantized = is_data_type_quantized_asymmetric(input->data_type());
const ITensorInfo &flatten_input =
- TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
- compute_flatten_shape(input)));
+ TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
+ compute_flatten_shape(input)));
const ITensorInfo &reshaped_weights =
- TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
- compute_transposed_shape(*weights)));
+ TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
+ compute_transposed_shape(*weights)));
const ITensorInfo &converted_weights =
- weights_reshaped ? TensorInfo(weights->clone()->set_is_resizable(true).reset_padding())
- : TensorInfo(*reshaped_weights.clone());
+ weights_reshaped ? TensorInfo(weights->clone()->set_is_resizable(true).reset_padding())
+ : TensorInfo(*reshaped_weights.clone());
const ITensorInfo &gemmlowp_output = TensorInfo(
- output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
+ output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
// Configure accumulate biases kernel for non quantized asymmetric types
if (biases != nullptr && !is_quantized)
@@ -330,7 +328,7 @@ Status NEFullyConnectedLayerEx::validate(const ITensorInfo *input, const ITensor
{
// Validate reshape weights kernel
ARM_COMPUTE_RETURN_ON_ERROR(
- NEFullyConnectedLayerReshapeWeights::validate(weights, &reshaped_weights));
+ NEFullyConnectedLayerReshapeWeights::validate(weights, &reshaped_weights));
weights_to_use = &reshaped_weights;
}
@@ -338,7 +336,7 @@ Status NEFullyConnectedLayerEx::validate(const ITensorInfo *input, const ITensor
{
// Validate convert weights kernel
ARM_COMPUTE_RETURN_ON_ERROR(NEConvertFullyConnectedWeights::validate(
- weights_to_use, &converted_weights, input->tensor_shape(), fc_info.weights_trained_layout));
+ weights_to_use, &converted_weights, input->tensor_shape(), fc_info.weights_trained_layout));
weights_to_use = &converted_weights;
}
@@ -346,11 +344,11 @@ Status NEFullyConnectedLayerEx::validate(const ITensorInfo *input, const ITensor
{
// Fully Connected layer after a Convolution Layer without batches
ARM_COMPUTE_RETURN_ERROR_ON(
- (weights_to_use->dimension(1) !=
- (input->dimension(0) * input->dimension(1) * input->dimension(2))));
+ (weights_to_use->dimension(1) !=
+ (input->dimension(0) * input->dimension(1) * input->dimension(2))));
// Validate flatten kernel
- ARM_COMPUTE_RETURN_ON_ERROR(NEFlattenLayerKernel::validate(input, &flatten_input));
+ ARM_COMPUTE_RETURN_ON_ERROR(NEFlattenLayer::validate(input, &flatten_input));
input_to_use = &flatten_input;
}
else
@@ -365,7 +363,7 @@ Status NEFullyConnectedLayerEx::validate(const ITensorInfo *input, const ITensor
if (is_quantized)
{
ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint::validate(
- &gemmlowp_output, biases, output));
+ &gemmlowp_output, biases, output));
}
return Status{};
@@ -376,9 +374,13 @@ void NEFullyConnectedLayerEx::run()
if (!_is_prepared)
{
if (!_are_weights_reshaped)
+ {
_reshape_weights_output.allocator()->allocate();
+ }
if (!_are_weights_converted)
+ {
_converted_weights_output.allocator()->allocate();
+ }
_is_prepared = true;
}
@@ -409,7 +411,7 @@ void NEFullyConnectedLayerEx::run()
// Linearize input if it comes from a convolutional layer
if (_is_fc_after_conv)
{
- NEScheduler::get().schedule(&_flatten_kernel, Window::DimY);
+ _flatten_kernel.run();
}
// Run matrix multiply
@@ -492,3 +494,4 @@ void NEFullyConnectedLayerEx::prepare()
}
#endif
}
+} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedReshapingLayer.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedReshapingLayer.cpp
index dc6c78478..2199839fb 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedReshapingLayer.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedReshapingLayer.cpp
@@ -19,6 +19,8 @@
#include <arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h>
#include <arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h>
#include <arm_compute/runtime/NEON/functions/NEFullyConnectedLayerEx.h>
+#include "src/core/helpers/AutoConfiguration.h"
+#include <cassert>
using namespace arm_compute;
@@ -56,7 +58,7 @@ void NEFullyConnectedReshapingLayer::configure(const arm_compute::ITensor *input
assert(kernel_type == KernelType::PREPROCESSED_WEIGHTS);
bool is_hybrid = input->info()->data_type() == DataType::F32 &&
- (weights->info()->data_type() == DataType::S8 ||
+ (weights->info()->data_type() == DataType::QSYMM8 ||
weights->info()->data_type() == DataType::QASYMM8_SIGNED);
if (is_hybrid)
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp
index 433c35d58..e5607ab9a 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp
@@ -41,7 +41,6 @@
#include "arm_compute/runtime/NEON/functions/NEGatherEx.h"
#include "arm_compute/core/NEON/kernels/NEGatherKernelEx.h"
-#include "support/MemorySupport.h"
#include <utility>
@@ -49,7 +48,7 @@ namespace arm_compute
{
void NEGatherEx::configure(const ITensor *input, const ITensor *indices, ITensor *output, int axis)
{
- auto k = support::cpp14::make_unique<NEGatherKernelEx>();
+ auto k = std::make_unique<NEGatherKernelEx>();
k->configure(input, indices, output, axis);
_kernel = std::move(k);
}
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEHashtableLookup.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEHashtableLookup.cpp
index 52d58accf..7cc6c89e7 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEHashtableLookup.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEHashtableLookup.cpp
@@ -41,14 +41,13 @@
#include "arm_compute/runtime/NEON/functions/NEHashtableLookup.h"
#include "arm_compute/core/NEON/kernels/NEHashtableLookupKernel.h"
-#include "support/MemorySupport.h"
using namespace arm_compute;
void NEHashtableLookup::configure(const ITensor *lookups, const ITensor *keys, const ITensor *input,
ITensor *output, ITensor *hits)
{
- auto k = support::cpp14::make_unique<NEHashtableLookupKernel>();
+ auto k = std::make_unique<NEHashtableLookupKernel>();
k->configure(lookups, keys, input, output, hits);
_kernel = std::move(k);
}
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEInstanceNormalizationLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEInstanceNormalizationLayerEx.cpp
index 16d74e62d..451aa0997 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEInstanceNormalizationLayerEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEInstanceNormalizationLayerEx.cpp
@@ -46,9 +46,9 @@
namespace arm_compute
{
NEInstanceNormalizationLayerEx::NEInstanceNormalizationLayerEx(
- std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _normalization_kernel(), _is_nchw(false),
- _permute_input(), _permute_output(), _permuted_input(), _permuted_output()
+ std::shared_ptr<IMemoryManager> memory_manager)
+ : _memory_group(std::move(memory_manager)), _normalization_kernel(), _is_nchw(false),
+ _permute_input(), _permute_output(), _permuted_input(), _permuted_output()
{
}
@@ -88,8 +88,8 @@ Status NEInstanceNormalizationLayerEx::validate(const ITensorInfo *input, const
float epsilon)
{
return NEInstanceNormalizationLayerKernelEx::validate(
- &input->clone()->set_data_layout(DataLayout::NCHW),
- &output->clone()->set_data_layout(DataLayout::NCHW), gamma, beta, epsilon);
+ &input->clone()->set_data_layout(DataLayout::NCHW),
+ &output->clone()->set_data_layout(DataLayout::NCHW), gamma, beta, epsilon);
}
void NEInstanceNormalizationLayerEx::run()
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEActivationLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEOneHot.cpp
index 2752eb6aa..e0620bad2 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEActivationLayerEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEOneHot.cpp
@@ -15,7 +15,7 @@
*/
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -37,30 +37,23 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+#include "arm_compute/runtime/NEON/functions/NEOneHot.h"
+#include "arm_compute/core/NEON/kernels/NEOneHotKernel.h"
-#include "arm_compute/runtime/NEON/functions/NEActivationLayerEx.h"
-
-#include "arm_compute/core/NEON/kernels/NEActivationLayerKernelEx.h"
-#include "arm_compute/runtime/IRuntimeContext.h"
-#include "support/MemorySupport.h"
-
+#include <utility>
namespace arm_compute
{
-NEActivationLayerEx::NEActivationLayerEx(IRuntimeContext *ctx) // NOLINT
- : INESimpleFunctionNoBorder(ctx)
+void NEOneHot::configure(const ITensor *indices, const ITensor *depth, const ITensor *on_value,
+ const ITensor *off_value, ITensor *output, int axis)
{
-}
-void NEActivationLayerEx::configure(ITensor *input, ITensor *output,
- ActivationLayerInfo activation_info)
-{
- auto k = support::cpp14::make_unique<NEActivationLayerKernelEx>();
- k->configure(input, output, activation_info);
+ auto k = std::make_unique<NEOneHotKernel>();
+ k->configure(indices, depth, on_value, off_value, output, axis);
_kernel = std::move(k);
}
-
-Status NEActivationLayerEx::validate(const ITensorInfo *input, const ITensorInfo *output,
- const ActivationLayerInfo &act_info)
+Status NEOneHot::validate(const ITensorInfo *indices, const ITensorInfo *depth,
+ const ITensorInfo *on_value, const ITensorInfo *off_value,
+ const ITensorInfo *output, int axis)
{
- return NEActivationLayerKernelEx::validate(input, output, act_info);
+ return NEOneHotKernel::validate(indices, depth, on_value, off_value, output, axis);
}
} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp
index aedb537e9..a30c00ea1 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp
@@ -40,22 +40,24 @@
#include "arm_compute/runtime/NEON/functions/NEReduceOperation.h"
-#include "arm_compute/core/CPP/Validate.h"
+#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/runtime/Tensor.h"
+#include "src/core/CPP/Validate.h"
+#include "src/core/NEON/kernels/NEReductionOperationKernel.h"
+#include "src/core/helpers/AutoConfiguration.h"
using namespace arm_compute;
NEReduceOperation::NEReduceOperation(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _reduction_kernels(), _reduced_outs(), _reshape(),
- _reduction_ops(), _keep_dims()
+ : _memory_group(std::move(memory_manager)), _reduction_kernels(), _reduced_outs(), _reshape(),
+ _reduction_ops(), _keep_dims()
{
}
Status NEReduceOperation::validate(const ITensorInfo *input, const Coordinates &reduction_axis,
- bool keep_dims, const ITensorInfo *output, ReduceOperation op)
+ bool keep_dims, const ITensorInfo *output, ReductionOperation op)
{
ARM_COMPUTE_UNUSED(keep_dims);
ARM_COMPUTE_UNUSED(op);
@@ -102,7 +104,7 @@ Status NEReduceOperation::validate(const ITensorInfo *input, const Coordinates &
}
void NEReduceOperation::configure(ITensor *input, const Coordinates &reduction_axis, bool keep_dims,
- ITensor *output, ReduceOperation op)
+ ITensor *output, ReductionOperation op)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input);
@@ -125,7 +127,7 @@ void NEReduceOperation::configure(ITensor *input, const Coordinates &reduction_a
for (unsigned int i = 0; i < _reduction_ops; ++i)
{
TensorShape out_shape =
- i == 0 ? input->info()->tensor_shape() : (&_reduced_outs[i - 1])->info()->tensor_shape();
+ i == 0 ? input->info()->tensor_shape() : (&_reduced_outs[i - 1])->info()->tensor_shape();
out_shape.set(axis_local[i], 1);
auto in = (i == 0) ? input : (&_reduced_outs[i - 1]);
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceSum.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceSum.cpp
index 26a887912..7a1342644 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceSum.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceSum.cpp
@@ -40,15 +40,19 @@
#include "arm_compute/runtime/NEON/functions/NEReduceSum.h"
-#include "arm_compute/core/CPP/Validate.h"
+#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/CPP/Validate.h"
+#include "src/core/NEON/kernels/NEReductionOperationKernel.h"
+#include "src/core/helpers/AutoConfiguration.h"
using namespace arm_compute;
NEReduceSum::NEReduceSum(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _reduction_kernels(), _reduced_outs(), _reshape(),
- _reduction_ops(), _keep_dims()
+ : _memory_group(std::move(memory_manager)), _reduction_kernels(), _reduced_outs(), _reshape(),
+ _reduction_ops(), _keep_dims()
{
}
@@ -122,7 +126,7 @@ void NEReduceSum::configure(ITensor *input, const Coordinates &reduction_axis, b
for (unsigned int i = 0; i < _reduction_ops; ++i)
{
TensorShape out_shape =
- i == 0 ? input->info()->tensor_shape() : (&_reduced_outs[i - 1])->info()->tensor_shape();
+ i == 0 ? input->info()->tensor_shape() : (&_reduced_outs[i - 1])->info()->tensor_shape();
out_shape.set(axis_local[i], 1);
auto in = (i == 0) ? input : (&_reduced_outs[i - 1]);
@@ -135,7 +139,7 @@ void NEReduceSum::configure(ITensor *input, const Coordinates &reduction_axis, b
_reduced_outs[i].allocator()->init(TensorInfo(out_shape, input->info()->num_channels(),
input->info()->data_type(),
input->info()->quantization_info())
- .set_data_layout(input->info()->data_layout()));
+ .set_data_layout(input->info()->data_layout()));
_memory_group.manage(&_reduced_outs[i]);
_reduction_kernels[i].configure(in, &_reduced_outs[i], axis_local[i],
ReductionOperation::SUM);
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReductionOperationEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReductionOperationEx.cpp
deleted file mode 100644
index 2aa0d2d4b..000000000
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReductionOperationEx.cpp
+++ /dev/null
@@ -1,173 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/NEON/functions/NEReductionOperationEx.h"
-
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/runtime/NEON/NEScheduler.h"
-
-namespace arm_compute
-{
-namespace
-{
-/** Define dimension to split the window
- *
- * @param[in] axis Reduction axis
- *
- * @return The dimension to split the window
- */
-size_t reduction_window_split_dimension(unsigned int axis)
-{
- switch (axis)
- {
- case 0:
- return Window::DimY;
- case 1:
- case 2:
- case 3:
- return Window::DimX;
- default:
- ARM_COMPUTE_ERROR("Unsupported reduction axis");
- }
-}
-} // namespace
-
-NEReductionOperationEx::NEReductionOperationEx()
- : _reduction_kernel(), _fill_border_kernel(), _window_split(0), _reduction_axis()
-{
-}
-
-Status NEReductionOperationEx::validate(const ITensorInfo *input, const ITensorInfo *output,
- unsigned int axis, ReduceOperation op)
-{
- ARM_COMPUTE_RETURN_ON_ERROR(NEReductionOperationKernelEx::validate(input, output, axis, op));
-
- return Status{};
-}
-
-void NEReductionOperationEx::configure(ITensor *input, ITensor *output, unsigned int axis,
- ReduceOperation op)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_ERROR_THROW_ON(
- NEReductionOperationEx::validate(input->info(), output->info(), axis, op));
-
- // Configure reduction kernel
- _reduction_kernel.configure(input, output, axis, op);
- _window_split = reduction_window_split_dimension(axis);
- _reduction_axis = axis;
-
- if (axis == 0)
- {
- // Configure fill border kernel
- const BorderSize fill_border_size = _reduction_kernel.border_size();
- PixelValue pixelValue;
- switch (op)
- {
- case ReduceOperation::MIN:
- {
- switch (input->info()->data_type())
- {
- case DataType::F32:
- {
- pixelValue = PixelValue(std::numeric_limits<float>::max());
- break;
- }
- case DataType::F16:
- {
- pixelValue = PixelValue(static_cast<half>(65504.0f));
- break;
- }
- case DataType::QASYMM8:
- {
- pixelValue =
- PixelValue(255, input->info()->data_type(), input->info()->quantization_info());
- break;
- }
- default:
- {
- ARM_COMPUTE_ERROR("Unsupported DataType");
- }
- }
- break;
- }
- case ReduceOperation::MAX:
- {
- switch (input->info()->data_type())
- {
- case DataType::F32:
- {
- pixelValue = PixelValue(-std::numeric_limits<float>::max());
- break;
- }
- case DataType::F16:
- {
- pixelValue = PixelValue(static_cast<half>(-65504.0f));
- break;
- }
- case DataType::QASYMM8:
- {
- pixelValue =
- PixelValue(0, input->info()->data_type(), input->info()->quantization_info());
- break;
- }
- default:
- {
- ARM_COMPUTE_ERROR("Unsupported DataType");
- }
- }
- break;
- }
- default:
- ARM_COMPUTE_ERROR("Reduction Operation unsupported");
- }
- _fill_border_kernel.configure(input, fill_border_size, BorderMode::CONSTANT, pixelValue);
- }
-}
-
-void NEReductionOperationEx::run()
-{
- if (_reduction_axis == 0)
- {
- NEScheduler::get().schedule(&_fill_border_kernel, Window::DimY);
- }
- NEScheduler::get().schedule(&_reduction_kernel, _window_split);
-}
-} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp
index aa165cc15..4675121b2 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp
@@ -44,6 +44,7 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculatorEx.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/helpers/AutoConfiguration.h"
using namespace arm_compute::misc::shape_calculator;
@@ -51,17 +52,9 @@ namespace arm_compute
{
NETransposeConvLayer::NETransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
- : _memory_group(std::move(memory_manager)),
- _conv_f(),
- _upsample_f(),
- _flip_weights(),
- _scaled_output(),
- _weights_flipped(),
- _flip_axis(),
- _original_weights(nullptr),
- _input(nullptr),
- _info(),
- _is_prepared(false)
+ : _memory_group(std::move(memory_manager)), _conv_f(), _upsample_f(), _flip_weights(),
+ _scaled_output(), _weights_flipped(), _flip_axis(), _original_weights(nullptr), _input(nullptr),
+ _info(), _is_prepared(false)
{
}
@@ -76,15 +69,15 @@ Status NETransposeConvLayer::validate(const ITensorInfo *input, const ITensorInf
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(weights, input);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(weights, input);
const unsigned int width_idx =
- get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::WIDTH);
+ get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::WIDTH);
const unsigned int height_idx =
- get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::HEIGHT);
+ get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::HEIGHT);
ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(width_idx) != weights->dimension(height_idx));
ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(width_idx) < 1);
auto out_dims = transposeconv_output_dimensions(
- input->dimension(width_idx), input->dimension(height_idx), weights->dimension(width_idx),
- weights->dimension(height_idx), info, invalid_right, invalid_bottom);
+ input->dimension(width_idx), input->dimension(height_idx), weights->dimension(width_idx),
+ weights->dimension(height_idx), info, invalid_right, invalid_bottom);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
if (bias != nullptr)
@@ -117,24 +110,24 @@ Status NETransposeConvLayer::validate(const ITensorInfo *input, const ITensorInf
unsigned int pad_right = 0;
unsigned int pad_top = 0;
unsigned int pad_bottom = 0;
- const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape(
- *input, *weights, info, out_dims, invalid_right, invalid_bottom, pad_left, pad_right, pad_top,
- pad_bottom);
+ const TensorShape scale_out_shape =
+ compute_transposeconv_upsampled_shape(*input, *weights, info, out_dims, invalid_right,
+ invalid_bottom, pad_left, pad_right, pad_top, pad_bottom);
TensorInfo scale_out_info(
- input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(scale_out_shape));
+ input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(scale_out_shape));
const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
const unsigned int batches_idx =
- get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::BATCHES);
+ get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::BATCHES);
const unsigned int channel_idx =
- get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::CHANNEL);
+ get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::CHANNEL);
ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(batches_idx) !=
scale_out_info.dimension(batches_idx));
ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(channel_idx) !=
scale_out_info.dimension(channel_idx));
- ARM_COMPUTE_RETURN_ON_ERROR(NEConvolutionLayer::validate(&scale_out_info, weights, bias, output,
- conv_info, WeightsInfo()));
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ NEConvolutionLayer::validate(&scale_out_info, weights, bias, output, conv_info, WeightsInfo()));
return Status{};
}
@@ -146,21 +139,21 @@ void NETransposeConvLayer::configure(ITensor *input, const ITensor *weights, con
// Perform validation step
ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
ARM_COMPUTE_ERROR_THROW_ON(NETransposeConvLayer::validate(
- input->info(), weights->info(), (bias == nullptr) ? nullptr : bias->info(), output->info(),
- info, invalid_right, invalid_bottom));
+ input->info(), weights->info(), (bias == nullptr) ? nullptr : bias->info(), output->info(),
+ info, invalid_right, invalid_bottom));
const DataLayout data_layout = input->info()->data_layout();
const unsigned int width_idx =
- get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+ get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
const unsigned int height_idx =
- get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+ get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
auto out_dims = transposeconv_output_dimensions(
- input->info()->dimension(width_idx), input->info()->dimension(height_idx),
- weights->info()->dimension(width_idx), weights->info()->dimension(height_idx), info,
- invalid_right, invalid_bottom);
+ input->info()->dimension(width_idx), input->info()->dimension(height_idx),
+ weights->info()->dimension(width_idx), weights->info()->dimension(height_idx), info,
+ invalid_right, invalid_bottom);
const TensorShape output_shape =
- compute_transposeconv_output_shape(out_dims, *input->info(), *weights->info());
+ compute_transposeconv_output_shape(out_dims, *input->info(), *weights->info());
_input = input;
_original_weights = weights;
@@ -188,8 +181,8 @@ void NETransposeConvLayer::configure(ITensor *input, const ITensor *weights, con
const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape(
- *input->info(), *weights->info(), info, out_dims, invalid_right, invalid_bottom, pad_left,
- pad_right, pad_top, pad_bottom);
+ *input->info(), *weights->info(), info, out_dims, invalid_right, invalid_bottom, pad_left,
+ pad_right, pad_top, pad_bottom);
const PadStrideInfo upsample_info(stride_x, stride_y, pad_left, pad_right, pad_top, pad_bottom,
DimensionRoundingType::FLOOR);