summaryrefslogtreecommitdiff
path: root/compute/ARMComputeEx/src/runtime/CL/functions
diff options
context:
space:
mode:
Diffstat (limited to 'compute/ARMComputeEx/src/runtime/CL/functions')
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLArgOperation.cpp144
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp2
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLCast.cpp52
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLDepthToSpace.cpp52
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLDirectTransposeConvLayer.cpp267
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp2
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp16
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp4
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp16
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.cpp180
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp2
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp2
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp2
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLPReLU.cpp63
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLRNNLayerEx.cpp163
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp8
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLSpaceToDepth.cpp52
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp250
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayerUpsample.cpp92
19 files changed, 376 insertions, 993 deletions
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLArgOperation.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLArgOperation.cpp
deleted file mode 100644
index 2d379cf36..000000000
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLArgOperation.cpp
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/CL/functions/CLArgOperation.h"
-
-#include "arm_compute/core/CL/kernels/CLArgOperationKernel.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-
-namespace arm_compute
-{
-
-CLArgOperation::CLArgOperation()
-{
- // DO NOTHING
-}
-
-void CLArgOperation::configure(ICLTensor *input, ICLTensor *output, std::vector<uint32_t> axis,
- ArgOperation op)
-{
- ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), axis, output->info(), op));
- _input = input;
- _output = output;
- _axis = axis;
- _arg_op = op;
- // NOTE The argminmax_axis must have no duplication.
- _num_of_kernels = axis.size();
- const size_t num_of_interm_tensors = _num_of_kernels - 1;
-
- _interm_tensors = arm_compute::support::cpp14::make_unique<CLTensor[]>(num_of_interm_tensors);
- _argop_kernels =
- arm_compute::support::cpp14::make_unique<CLArgOperationKernel[]>(_num_of_kernels);
-
- TensorShape shape{input->info()->tensor_shape()};
- for (size_t i = 0; i < num_of_interm_tensors; i++)
- {
- shape.set(_axis[i], 1);
- _interm_tensors[i].allocator()->init(
- TensorInfo(shape, input->info()->num_channels(), input->info()->data_type())
- .set_data_layout(input->info()->data_layout()));
- _interm_tensors[i].allocator()->allocate();
- }
-
- // Set a vector that is ordered ICLTensors sequentially.
- std::vector<ICLTensor *> tensors;
- tensors.emplace_back(input);
- for (size_t i = 0; i < num_of_interm_tensors; i++)
- {
- tensors.emplace_back(_interm_tensors.get() + i);
- }
- tensors.emplace_back(output);
-
- // Apply ArgMinMax on all kernels
- for (size_t i = 0; i < _num_of_kernels; i++)
- {
- _argop_kernels[i].configure(tensors[i], tensors[i + 1], _axis[i], op);
- }
-}
-
-Status CLArgOperation::validate(const ITensorInfo *input, const std::vector<uint32_t> &axis,
- const ITensorInfo *output, ArgOperation op)
-{
- const size_t num_of_kernels = axis.size();
- const size_t num_of_interm_tensors = num_of_kernels - 1;
-
- // Create temporary tensor infos
- auto interm_tensors =
- arm_compute::support::cpp14::make_unique<TensorInfo[]>(num_of_interm_tensors);
-
- // Create intermediate tensor info
- TensorShape shape{input->tensor_shape()};
-
- for (size_t i = 0; i < num_of_interm_tensors; i++)
- {
- shape.set(axis[i], 1);
- interm_tensors[i].set_data_type(input->data_type());
- interm_tensors[i].set_tensor_shape(shape);
- interm_tensors[i].set_num_channels(input->num_channels());
- }
-
- // Set a vector that is ordered ITensorInfo sequentially.
- std::vector<const ITensorInfo *> tensors;
- tensors.emplace_back(input);
- for (size_t i = 0; i < num_of_interm_tensors; i++)
- {
- tensors.emplace_back(interm_tensors.get() + i);
- }
- tensors.emplace_back(output);
-
- // Validate argminmax only on all kernels
- for (size_t i = 0; i < num_of_kernels; i++)
- {
- ARM_COMPUTE_RETURN_ON_ERROR(
- CLArgOperationKernel::validate(tensors[i], tensors[i + 1], axis[i], op));
- }
-
- return Status{};
-}
-
-void CLArgOperation::run()
-{
- for (size_t i = 0; i < _num_of_kernels; ++i)
- {
- CLScheduler::get().enqueue(_argop_kernels[i]);
- }
-}
-
-} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp
index 92ee69a36..e5122ab8f 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp
@@ -48,7 +48,7 @@ using namespace arm_compute;
void CLBinaryLogicalOp::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output,
BinaryLogicalOperation op)
{
- auto k = arm_compute::support::cpp14::make_unique<CLBinaryLogicalOpKernel>();
+ auto k = support::cpp14::make_unique<CLBinaryLogicalOpKernel>();
k->configure(input1, input2, output, op);
_kernel = std::move(k);
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLCast.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLCast.cpp
deleted file mode 100644
index b3118f39e..000000000
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLCast.cpp
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/CL/functions/CLCast.h"
-
-#include "arm_compute/core/CL/kernels/CLCastKernel.h"
-
-using namespace arm_compute;
-
-void CLCast::configure(ICLTensor *input, ICLTensor *output, SubDataType input_subtype)
-{
- auto k = arm_compute::support::cpp14::make_unique<CLCastKernel>();
- k->configure(input, output, input_subtype);
- _kernel = std::move(k);
-}
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLDepthToSpace.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLDepthToSpace.cpp
deleted file mode 100644
index db662505a..000000000
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLDepthToSpace.cpp
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/CL/functions/CLDepthToSpace.h"
-
-#include "arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h"
-
-using namespace arm_compute;
-
-void CLDepthToSpace::configure(ICLTensor *input, ICLTensor *output, const int32_t block_size)
-{
- auto k = arm_compute::support::cpp14::make_unique<CLDepthToSpaceKernel>();
- k->configure(input, output, block_size);
- _kernel = std::move(k);
-}
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLDirectTransposeConvLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLDirectTransposeConvLayer.cpp
new file mode 100644
index 000000000..3dede0562
--- /dev/null
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLDirectTransposeConvLayer.cpp
@@ -0,0 +1,267 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2019-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/UtilsEx.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/utils/misc/ShapeCalculatorEx.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+
+#include <memory>
+#include <tuple>
+
+namespace arm_compute
+{
+using namespace arm_compute::misc::shape_calculator;
+
+CLDirectTransposeConvLayer::CLDirectTransposeConvLayer(
+ std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
+ : _memory_group(std::move(memory_manager)),
+ _scale_f(),
+ _conv_f(),
+ _flip_weights(),
+ _scaled_output(),
+ _original_weights(nullptr),
+ _weights_flipped(),
+ _flip_axis(),
+ _is_prepared(false)
+{
+}
+
+Status CLDirectTransposeConvLayer::validate(const ITensorInfo *input, const ITensorInfo *weights,
+ const ITensorInfo *bias, ITensorInfo *output,
+ const PadStrideInfo &info, unsigned int invalid_right,
+ unsigned int invalid_bottom,
+ const WeightsInfo &weights_info)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(
+ input, 1, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, weights);
+ const DataLayout data_layout = input->data_layout();
+
+ const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+ const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+ const size_t idx_c = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
+
+ ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) != weights->dimension(idx_h));
+ ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) < 1);
+
+ auto out_dims = transposeconv_output_dimensions(
+ input->dimension(idx_w), input->dimension(idx_h), weights->dimension(idx_w),
+ weights->dimension(idx_h), info, invalid_right, invalid_bottom);
+
+ const TensorShape output_shape = compute_transposeconv_output_shape(out_dims, *input, *weights);
+
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output, weights);
+
+ if (bias != nullptr)
+ {
+ if (is_data_type_quantized_asymmetric(input->data_type()))
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bias, 1, DataType::S32);
+ }
+ else
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, bias);
+ }
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, bias);
+ }
+
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(idx_w) != output_shape[idx_w],
+ "Output's width is invalid.");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(idx_h) != output_shape[idx_h],
+ "Output's height is invalid.");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(idx_c) != output_shape[idx_c],
+ "Output's depth is invalid.");
+
+ unsigned int pad_left = 0;
+ unsigned int pad_right = 0;
+ unsigned int pad_top = 0;
+ unsigned int pad_bottom = 0;
+ const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape(
+ *input, *weights, info, out_dims, invalid_right, invalid_bottom, pad_left, pad_right, pad_top,
+ pad_bottom);
+ TensorInfo scale_out_info(input->clone()
+ ->set_is_resizable(true)
+ .reset_padding()
+ .set_tensor_shape(scale_out_shape)
+ .set_data_layout(data_layout));
+ const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
+
+ ARM_COMPUTE_RETURN_ON_ERROR(CLDeconvolutionLayerUpsample::validate(input, &scale_out_info, info));
+ ARM_COMPUTE_RETURN_ON_ERROR(CLConvolutionLayer::validate(&scale_out_info, weights, bias, output,
+ conv_info, weights_info));
+
+ return Status{};
+}
+
+void CLDirectTransposeConvLayer::configure(ICLTensor *input, ICLTensor *weights,
+ const ICLTensor *bias, ICLTensor *output,
+ const PadStrideInfo &info, unsigned int invalid_right,
+ unsigned int invalid_bottom,
+ const WeightsInfo &weights_info)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, weights, bias, output, info,
+ invalid_right, invalid_bottom, weights_info);
+}
+
+void CLDirectTransposeConvLayer::configure(const CLCompileContext &compile_context,
+ ICLTensor *input, ICLTensor *weights,
+ const ICLTensor *bias, ICLTensor *output,
+ const PadStrideInfo &info, unsigned int invalid_right,
+ unsigned int invalid_bottom,
+ const WeightsInfo &weights_info)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
+
+ unsigned int pad_left = 0;
+ unsigned int pad_right = 0;
+ unsigned int pad_top = 0;
+ unsigned int pad_bottom = 0;
+ const unsigned int stride_x = info.stride().first;
+ const unsigned int stride_y = info.stride().second;
+
+ const DataLayout data_layout = input->info()->data_layout();
+
+ const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+ const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+
+ _original_weights = weights;
+ _flip_axis.allocator()->init(TensorInfo(TensorShape(2U), 1, DataType::U32));
+ _weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout));
+ _flip_weights.configure(compile_context, weights, &_weights_flipped, &_flip_axis);
+
+ auto out_dims = transposeconv_output_dimensions(
+ input->info()->dimension(idx_w), input->info()->dimension(idx_h),
+ weights->info()->dimension(idx_w), weights->info()->dimension(idx_h), info, invalid_right,
+ invalid_bottom);
+
+ const TensorShape output_shape =
+ compute_transposeconv_output_shape(out_dims, *input->info(), *weights->info());
+
+ // Output auto initialization if not yet initialized
+ auto_init_if_empty(
+ *output->info(),
+ input->info()->clone()->set_tensor_shape(output_shape).set_data_layout(data_layout));
+
+ // Perform validation step
+ ARM_COMPUTE_ERROR_THROW_ON(CLDirectTransposeConvLayer::validate(
+ input->info(), weights->info(), bias == nullptr ? nullptr : bias->info(), output->info(),
+ info, invalid_right, invalid_bottom));
+
+ _is_prepared = weights_info.retain_internal_weights();
+
+ _memory_group.manage(&_scaled_output);
+
+ // Find the upsampled dimensions and the padding needed for the convolution with stride 1 in order
+ // to match output shape
+ const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape(
+ *input->info(), *weights->info(), info, out_dims, invalid_right, invalid_bottom, pad_left,
+ pad_right, pad_top, pad_bottom);
+
+ TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(),
+ input->info()->quantization_info());
+ scale_out_info.set_data_layout(data_layout);
+ _scaled_output.allocator()->init(scale_out_info);
+
+ // configure scale function
+ const PadStrideInfo upsample_info(stride_x, stride_y, pad_left, pad_right, pad_top, pad_bottom,
+ DimensionRoundingType::FLOOR);
+ _scale_f.configure(input, &_scaled_output, upsample_info);
+
+ // Setup the function to convolve the upscaled output
+ const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
+ _conv_f.configure(compile_context, &_scaled_output, &_weights_flipped, bias, output, conv_info,
+ weights_info);
+ _scaled_output.allocator()->allocate();
+
+ // Setup flip axis data
+ _flip_axis.allocator()->allocate();
+ _flip_axis.map(true);
+ auto axis_data = reinterpret_cast<uint32_t *>(_flip_axis.buffer());
+ if (weights->info()->data_layout() == DataLayout::NHWC)
+ {
+ axis_data[0] = 1;
+ axis_data[1] = 2;
+ }
+ else
+ {
+ axis_data[0] = 0;
+ axis_data[1] = 1;
+ }
+ _flip_axis.unmap();
+}
+
+void CLDirectTransposeConvLayer::run()
+{
+ prepare();
+
+ MemoryGroupResourceScope scope_mg(_memory_group);
+
+ _scale_f.run();
+ _conv_f.run();
+}
+
+void CLDirectTransposeConvLayer::prepare()
+{
+ if (!_is_prepared)
+ {
+ ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
+
+ // Run weights flipping and mark original weights tensor as unused
+ _weights_flipped.allocator()->allocate();
+ _flip_weights.run();
+ _original_weights->mark_as_unused();
+
+ // Prepare convolution
+ _conv_f.prepare();
+
+ // Free flipped weights
+ if (!_weights_flipped.is_used())
+ {
+ _weights_flipped.allocator()->free();
+ }
+
+ _is_prepared = true;
+ }
+}
+} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp
index 3d9a28a48..ae9d8afc6 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp
@@ -47,7 +47,7 @@ using namespace arm_compute;
void CLEmbeddingLookup::configure(const ICLTensor *input, ICLTensor *output,
const ICLTensor *lookups)
{
- auto k = arm_compute::support::cpp14::make_unique<CLEmbeddingLookupKernel>();
+ auto k = support::cpp14::make_unique<CLEmbeddingLookupKernel>();
k->configure(input, output, lookups);
_kernel = std::move(k);
}
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp
index f098832b0..01989461e 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp
@@ -45,7 +45,7 @@
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "support/ToolchainSupport.h"
+#include "support/MemorySupport.h"
#include <algorithm>
@@ -60,7 +60,7 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I
ARM_COMPUTE_UNUSED(weights);
ARM_COMPUTE_UNUSED(output);
ARM_COMPUTE_RETURN_ON_ERROR(
- CLGEMMLowpMatrixMultiplyCoreEx::validate(&input, &weights, nullptr, &output));
+ CLGEMMLowpMatrixMultiplyCore::validate(&input, &weights, nullptr, &output));
return Status{};
}
@@ -68,7 +68,7 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I
void CLFullyConnectedHybridLayerReshapeWeights::configure(const ICLTensor *input, ICLTensor *output)
{
- auto k = arm_compute::support::cpp14::make_unique<CLTransposeKernel>();
+ auto k = support::cpp14::make_unique<CLTransposeKernel>();
k->configure(input, output);
_kernel = std::move(k);
}
@@ -172,7 +172,8 @@ void CLFullyConnectedHybridLayer::configure(const ICLTensor *input, const ICLTen
// Quantize input
_quantized_input.allocator()->init(
- input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S8));
+ input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(
+ DataType::QASYMM8_SIGNED));
_memory_group.manage(&_quantized_input);
_quant_input_kernel.configure(input, &_scale_factor, &_quantized_input);
@@ -199,7 +200,7 @@ Status CLFullyConnectedHybridLayer::validate(const ITensorInfo *input, const ITe
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::S8);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QASYMM8_SIGNED);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 2);
@@ -256,8 +257,9 @@ Status CLFullyConnectedHybridLayer::validate(const ITensorInfo *input, const ITe
ARM_COMPUTE_RETURN_ON_ERROR(CLScaleFactorSymm8Kernel::validate(input, &scale_factor));
// Validate quantization symm8 kernel
- const ITensorInfo &quantized_input = TensorInfo(
- input->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S8));
+ const ITensorInfo &quantized_input =
+ TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_data_type(
+ DataType::QASYMM8_SIGNED));
ARM_COMPUTE_RETURN_ON_ERROR(
CLQuantizationSymmetricKernel::validate(input, &scale_factor, &quantized_input));
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp
index 63e291b36..2ff4b9659 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp
@@ -46,7 +46,7 @@
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "support/ToolchainSupport.h"
+#include "support/MemorySupport.h"
#include <algorithm>
@@ -141,7 +141,7 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I
void CLFullyConnectedLayerReshapeWeightsEx::configure(const ICLTensor *input, ICLTensor *output)
{
- auto k = arm_compute::support::cpp14::make_unique<CLTransposeKernel>();
+ auto k = support::cpp14::make_unique<CLTransposeKernel>();
k->configure(input, output);
_kernel = std::move(k);
}
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp
index 9aebc473e..157b4d977 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp
@@ -53,18 +53,21 @@ void CLFullyConnectedReshapingLayer::configure(const arm_compute::ICLTensor *inp
fc->configure(input_to_use, _weights, _biases, _output);
return std::unique_ptr<arm_compute::IFunction>(fc);
}
- else
+ else if (kernel_type == KernelType::PREPROCESSED_WEIGHTS)
{
- assert(kernel_type == KernelType::PREPROCESSED_WEIGHTS);
-
bool is_hybrid = (input->info()->data_type() == DataType::F32 ||
input->info()->data_type() == DataType::F16) &&
- weights->info()->data_type() == DataType::S8;
+ (weights->info()->data_type() == DataType::S8 ||
+ weights->info()->data_type() == DataType::QASYMM8_SIGNED);
if (is_hybrid)
{
auto fc = new arm_compute::CLFullyConnectedHybridLayer{_memory_manager};
+ ITensorInfo *weights_info = const_cast<ITensorInfo *>(_weights->info());
+ const auto orgin_weights_data_type = weights_info->data_type();
+ weights_info->set_data_type(DataType::QASYMM8_SIGNED);
fc->configure(input_to_use, _weights, _biases, _output);
+ weights_info->set_data_type(orgin_weights_data_type);
return std::unique_ptr<arm_compute::IFunction>(fc);
}
else
@@ -74,6 +77,11 @@ void CLFullyConnectedReshapingLayer::configure(const arm_compute::ICLTensor *inp
return std::unique_ptr<arm_compute::IFunction>(fc);
}
}
+ else
+ {
+ throw std::runtime_error("CLFullyConnectedReshapingLayer: Unsupported kernel type");
+ }
+
}();
if (_needs_reshape)
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.cpp
deleted file mode 100644
index ca5499dfc..000000000
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.cpp
+++ /dev/null
@@ -1,180 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.h"
-
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-
-namespace arm_compute
-{
-using namespace arm_compute::misc::shape_calculator;
-using namespace arm_compute::cl_gemm;
-
-namespace
-{
-inline bool is_gemm_reshaped(bool reshape_b_only_on_first_run, GPUTarget gpu_target)
-{
- return (get_arch_from_target(gpu_target) != GPUTarget::MIDGARD) && (reshape_b_only_on_first_run);
-}
-} // namespace
-
-CLGEMMLowpMatrixMultiplyCoreEx::CLGEMMLowpMatrixMultiplyCoreEx(
- std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _mm_midgard_kernel(), _mtx_a_reduction_kernel(),
- _mtx_b_reduction_kernel(), _vector_sum_col(), _vector_sum_row(), _a_offset(0), _b_offset(0),
- _reshape_b_only_on_first_run(false), _is_prepared(false)
-{
-}
-
-void CLGEMMLowpMatrixMultiplyCoreEx::configure(const ICLTensor *a, const ICLTensor *b,
- const ICLTensor *c, ICLTensor *output,
- const GEMMInfo &gemm_info)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(a, b, output);
- ARM_COMPUTE_UNUSED(c);
- ARM_COMPUTE_ERROR_THROW_ON(CLGEMMLowpMatrixMultiplyCoreEx::validate(
- a->info(), b->info(), c != nullptr ? c->info() : nullptr, output->info(), gemm_info));
-
- _is_prepared = false;
- _reshape_b_only_on_first_run = gemm_info.reshape_b_only_on_first_run();
- _a_offset = a->info()->quantization_info().uniform().offset;
- _b_offset = b->info()->quantization_info().uniform().offset;
-
- // Get the GPU target
- const GPUTarget gpu_target = CLScheduler::get().target();
-
- // Set the target for the kernels
- _mm_midgard_kernel.set_target(gpu_target);
-
- // GEMMRHSMatrixInfo rhs_info;
- // GEMMLHSMatrixInfo lhs_info;
-
- // Arguments used by GEMMReshapeInfo
- // If we pass the matrix A and matrix B reshaped to CLGEMMMatrixMultiplyKernel, we need to pass m,
- // n, k, mult_transpose1xW_width and mult_interleave4x4_height to CLGEMMReshapeInfo
- // in order to know how the matrices have been reshaped
- bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
- const unsigned int m = reinterpret_input_as_3d
- ? (a->info()->dimension(1) * a->info()->dimension(2))
- : a->info()->dimension(1);
- const unsigned int n = b->info()->dimension(0);
- const unsigned int k = a->info()->dimension(0);
- const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();
-
- const ICLTensor *matrix_b = b;
- // Configure matrix multiply kernel
- _mm_midgard_kernel.configure(
- a, matrix_b, output,
- GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d));
-}
-
-Status CLGEMMLowpMatrixMultiplyCoreEx::validate(const ITensorInfo *a, const ITensorInfo *b,
- const ITensorInfo *c, const ITensorInfo *output,
- const GEMMInfo &gemm_info)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(a, 1, DataType::S8);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(a, b);
- ARM_COMPUTE_UNUSED(c);
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_a_reshaped(),
- "Matrix A already reshaped is not supported");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_b_reshaped(),
- "Matrix B already reshaped is not supported");
-
- const ITensorInfo *matrix_a_info = a;
-
- // Get the GPU target
- const GPUTarget gpu_target = CLScheduler::get().target();
-
- bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
- const unsigned int m =
- reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);
- const unsigned int n = b->dimension(0);
- const unsigned int k = a->dimension(0);
- const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();
-
- bool reshape_matrix_b = is_gemm_reshaped(gemm_info.reshape_b_only_on_first_run(), gpu_target);
-
- const GEMMReshapeInfo reshape_info =
- GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d);
-
- TensorInfo weights_info(*b);
- const ITensorInfo *matrix_b_info = &weights_info;
- if (reshape_matrix_b)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(false,
- "CLGEMMLowpMatrixMultiplyCoreEx does not support reshape_b");
- }
-
- // Validate matrix multiply
- ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixMultiplyKernelEx::validate(
- matrix_a_info, matrix_b_info, output, reshape_info));
-
- return Status{};
-}
-
-void CLGEMMLowpMatrixMultiplyCoreEx::run()
-{
- prepare();
-
- MemoryGroupResourceScope scope_mg(_memory_group);
-
- // Run matrix multiply
- CLScheduler::get().enqueue(_mm_midgard_kernel, false);
-}
-
-void CLGEMMLowpMatrixMultiplyCoreEx::prepare()
-{
- if (!_is_prepared)
- {
- _is_prepared = true;
- }
-}
-} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp
index f594d7a2e..e0b833b04 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp
@@ -48,7 +48,7 @@ using namespace arm_compute;
void CLGatherEx::configure(const ICLTensor *input, const ICLTensor *indices, ICLTensor *output,
int axis)
{
- auto k = arm_compute::support::cpp14::make_unique<CLGatherExKernel>();
+ auto k = support::cpp14::make_unique<CLGatherExKernel>();
k->configure(input, indices, output, axis);
_kernel = std::move(k);
}
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp
index 27ed8e828..65b89a389 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp
@@ -47,7 +47,7 @@ using namespace arm_compute;
void CLHashtableLookup::configure(const ICLTensor *lookups, const ICLTensor *keys,
const ICLTensor *input, ICLTensor *output, ICLTensor *hits)
{
- auto k = arm_compute::support::cpp14::make_unique<CLHashtableLookupKernel>();
+ auto k = support::cpp14::make_unique<CLHashtableLookupKernel>();
k->configure(lookups, keys, input, output, hits);
_kernel = std::move(k);
}
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp
index 80393e8d1..5a7e40839 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp
@@ -50,7 +50,7 @@ CLInstanceNormalizationLayerEx::CLInstanceNormalizationLayerEx() {}
void CLInstanceNormalizationLayerEx::configure(ICLTensor *input, ICLTensor *output,
ICLTensor *gamma, ICLTensor *beta, float epsilon)
{
- auto k = arm_compute::support::cpp14::make_unique<CLInstanceNormalizationLayerKernelEx>();
+ auto k = support::cpp14::make_unique<CLInstanceNormalizationLayerKernelEx>();
k->configure(input, output, gamma, beta, epsilon);
_kernel = std::move(k);
}
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLPReLU.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLPReLU.cpp
deleted file mode 100644
index fbb15ab1d..000000000
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLPReLU.cpp
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/CL/functions/CLPReLU.h"
-
-#include "arm_compute/core/CL/kernels/CLPReLUKernel.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-using namespace arm_compute;
-
-void CLPReLU::configure(ICLTensor *input, ICLTensor *alpha, ICLTensor *output)
-{
- auto k = arm_compute::support::cpp14::make_unique<CLPReLUKernel>();
- k->configure(input, alpha, output);
- _kernel = std::move(k);
-
- if (output->info()->dimension(0) > 1)
- {
- ICLTensor *broadcasted_info = (input->info()->dimension(0) == 1) ? input : alpha;
-
- if (broadcasted_info->info()->dimension(0) == 1)
- {
- _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
- }
- }
-}
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLRNNLayerEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLRNNLayerEx.cpp
deleted file mode 100644
index 6049b7e70..000000000
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLRNNLayerEx.cpp
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/CL/functions/CLRNNLayerEx.h"
-
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "support/ToolchainSupport.h"
-
-#include <utility>
-
-using namespace arm_compute;
-using namespace arm_compute::misc::shape_calculator;
-
-CLRNNLayerEx::CLRNNLayerEx(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _gemm_state_f(), _add_kernel(),
- _activation_kernel(), _fully_connected_kernel(), _copy_kernel(), _fully_connected_out(),
- _gemm_output(), _add_output(), _is_prepared(false)
-{
-}
-
-Status CLRNNLayerEx::validate(const ITensorInfo *input, const ITensorInfo *weights,
- const ITensorInfo *recurrent_weights, const ITensorInfo *bias,
- const ITensorInfo *hidden_state, const ITensorInfo *output,
- const ActivationLayerInfo &info)
-{
- const int idx_width = 0;
- const int idx_height = 1;
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, recurrent_weights, bias, hidden_state,
- output);
- ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(idx_width) != weights->dimension(idx_width));
- ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_height) !=
- recurrent_weights->dimension(idx_width));
- ARM_COMPUTE_RETURN_ERROR_ON(recurrent_weights->dimension(idx_width) !=
- recurrent_weights->dimension(1));
- ARM_COMPUTE_RETURN_ERROR_ON(bias->num_dimensions() != 1);
- ARM_COMPUTE_RETURN_ERROR_ON(bias->dimension(idx_width) != weights->dimension(idx_height));
- ARM_COMPUTE_RETURN_ERROR_ON(hidden_state->dimension(idx_width) != weights->dimension(idx_height));
- ARM_COMPUTE_RETURN_ERROR_ON(hidden_state->dimension(idx_height) != input->dimension(idx_height));
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(),
- hidden_state->tensor_shape());
-
- auto shape_info =
- TensorInfo(compute_rnn_shape(recurrent_weights, hidden_state->dimension(idx_height)), 1,
- input->data_type());
-
- ARM_COMPUTE_RETURN_ON_ERROR(CLFullyConnectedLayer::validate(input, weights, bias, &shape_info));
- ARM_COMPUTE_RETURN_ON_ERROR(
- CLGEMM::validate(hidden_state, recurrent_weights, nullptr, &shape_info, 1.f, 0.f));
- ARM_COMPUTE_RETURN_ON_ERROR(CLSaturatedArithmeticOperationKernel::validate(
- ArithmeticOperation::ADD, &shape_info, &shape_info, &shape_info, ConvertPolicy::SATURATE));
- ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayerKernel::validate(&shape_info, &shape_info, info));
-
- return Status{};
-}
-
-void CLRNNLayerEx::configure(const ICLTensor *input, const ICLTensor *weights,
- const ICLTensor *recurrent_weights, const ICLTensor *bias,
- ICLTensor *hidden_state, ICLTensor *output, ActivationLayerInfo &info)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, recurrent_weights, bias, hidden_state, output);
- ARM_COMPUTE_ERROR_THROW_ON(CLRNNLayerEx::validate(input->info(), weights->info(),
- recurrent_weights->info(), bias->info(),
- hidden_state->info(), output->info(), info));
-
- const int idx_height = 1;
- TensorShape shape =
- compute_rnn_shape(recurrent_weights->info(), hidden_state->info()->dimension(idx_height));
-
- _is_prepared = false;
-
- _fully_connected_out.allocator()->init(TensorInfo(shape, 1, input->info()->data_type()));
- _gemm_output.allocator()->init(TensorInfo(shape, 1, input->info()->data_type()));
-
- // Manage intermediate buffers and configure
- _memory_group.manage(&_fully_connected_out);
- _fully_connected_kernel.configure(input, weights, bias, &_fully_connected_out);
-
- _memory_group.manage(&_gemm_output);
- _gemm_state_f.configure(hidden_state, recurrent_weights, nullptr, &_gemm_output, 1.f, 0.f);
-
- _add_output.allocator()->init(TensorInfo(shape, 1, input->info()->data_type()));
- _memory_group.manage(&_add_output);
-
- _add_kernel.configure(ArithmeticOperation::ADD, &_fully_connected_out, &_gemm_output,
- &_add_output, ConvertPolicy::SATURATE);
-
- _fully_connected_out.allocator()->allocate();
- _gemm_output.allocator()->allocate();
-
- _activation_kernel.configure(&_add_output, hidden_state, info);
- _add_output.allocator()->allocate();
-
- _copy_kernel.configure(hidden_state, output);
-}
-
-void CLRNNLayerEx::run()
-{
- prepare();
-
- _memory_group.acquire();
-
- _fully_connected_kernel.run();
- _gemm_state_f.run();
- CLScheduler::get().enqueue(_add_kernel);
- CLScheduler::get().enqueue(_activation_kernel);
-
- // copy hidden out to output
- CLScheduler::get().enqueue(_copy_kernel);
-
- _memory_group.release();
-}
-
-void CLRNNLayerEx::prepare()
-{
- if (!_is_prepared)
- {
- _fully_connected_kernel.prepare();
- _gemm_state_f.prepare();
-
- _is_prepared = true;
- }
-}
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp
index 8ce2d746c..a41e6db60 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp
@@ -60,8 +60,7 @@ Status CLReduceOperation::validate(const ITensorInfo *input, const ITensorInfo *
const size_t num_of_interm_tensors = num_of_kernels - (keep_dims ? 1 : 0);
// Create temporary tensor infos
- auto interm_tensors =
- arm_compute::support::cpp14::make_unique<TensorInfo[]>(num_of_interm_tensors);
+ auto interm_tensors = support::cpp14::make_unique<TensorInfo[]>(num_of_interm_tensors);
// Create intermediate tensor info
TensorShape shape{input->tensor_shape()};
@@ -119,9 +118,8 @@ void CLReduceOperation::configure(ICLTensor *input, ICLTensor *output,
const size_t num_of_kernels = axis.size();
const size_t num_of_interm_tensors = num_of_kernels - (keep_dims ? 1 : 0);
- _interm_tensors = arm_compute::support::cpp14::make_unique<CLTensor[]>(num_of_interm_tensors);
- _reduce_kernels =
- arm_compute::support::cpp14::make_unique<CLReduceOperationKernel[]>(num_of_kernels);
+ _interm_tensors = support::cpp14::make_unique<CLTensor[]>(num_of_interm_tensors);
+ _reduce_kernels = support::cpp14::make_unique<CLReduceOperationKernel[]>(num_of_kernels);
// Set a vector that is ordered ICLTensors sequentially.
std::vector<ICLTensor *> tensors;
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLSpaceToDepth.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLSpaceToDepth.cpp
deleted file mode 100644
index 7d7b2264b..000000000
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLSpaceToDepth.cpp
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/CL/functions/CLSpaceToDepth.h"
-
-#include "arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h"
-
-using namespace arm_compute;
-
-void CLSpaceToDepth::configure(ICLTensor *input, ICLTensor *output, const int32_t block_size)
-{
- auto k = arm_compute::support::cpp14::make_unique<CLSpaceToDepthKernel>();
- k->configure(input, output, block_size);
- _kernel = std::move(k);
-}
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp
index e61746ef2..3215d01a7 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp
@@ -15,7 +15,7 @@
*/
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -37,218 +37,124 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-
#include "arm_compute/runtime/CL/functions/CLTransposeConvLayer.h"
-#include "arm_compute/core/utils/misc/ShapeCalculatorEx.h"
-#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/UtilsEx.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "arm_compute/runtime/CPP/CPPScheduler.h"
+#include <cmath>
#include <memory>
#include <tuple>
using namespace arm_compute;
using namespace arm_compute::misc::shape_calculator;
-CLTransposeConvLayer::CLTransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
- : _memory_group(std::move(memory_manager)),
- _scale_f(),
- _conv_f(),
- _flip_weights(),
- _scaled_output(),
- _original_weights(nullptr),
- _weights_flipped(),
- _is_prepared(false)
+CLTransposeConvLayer::CLTransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager)
+ : _memory_manager(std::move(memory_manager)), _function()
+{
+}
+
+void CLTransposeConvLayer::configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias,
+ ICLTensor *output, const PadStrideInfo &deconv_info,
+ unsigned int invalid_right, unsigned int invalid_bottom,
+ const WeightsInfo &weights_info)
{
+ configure(CLKernelLibrary::get().get_compile_context(), input, weights, bias, output, deconv_info,
+ invalid_right, invalid_bottom, weights_info);
+}
+
+void CLTransposeConvLayer::configure(const CLCompileContext &compile_context, ICLTensor *input,
+ ICLTensor *weights, const ICLTensor *bias, ICLTensor *output,
+ const PadStrideInfo &deconv_info, unsigned int invalid_right,
+ unsigned int invalid_bottom, const WeightsInfo &weights_info)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
+
+ switch (CLTransposeConvLayer::get_deconvolution_method(input->info(), weights->info(), nullptr,
+ output->info(), deconv_info, invalid_right,
+ invalid_bottom, weights_info))
+ {
+ case DeconvolutionMethod::DIRECT:
+ {
+ auto f = arm_compute::support::cpp14::make_unique<CLDirectTransposeConvLayer>();
+ f->configure(compile_context, input, weights, bias, output, deconv_info, invalid_right,
+ invalid_bottom, weights_info);
+ _function = std::move(f);
+ break;
+ }
+ case DeconvolutionMethod::GEMM:
+ {
+ auto f = arm_compute::support::cpp14::make_unique<CLGEMMDeconvolutionLayer>(_memory_manager);
+ f->configure(compile_context, input, weights, bias, output, deconv_info);
+ _function = std::move(f);
+ break;
+ }
+ default:
+ ARM_COMPUTE_ERROR("Not supported.");
+ break;
+ }
}
Status CLTransposeConvLayer::validate(const ITensorInfo *input, const ITensorInfo *weights,
const ITensorInfo *bias, ITensorInfo *output,
- const PadStrideInfo &info, unsigned int invalid_right,
+ const PadStrideInfo &deconv_info, unsigned int invalid_right,
unsigned int invalid_bottom, const WeightsInfo &weights_info)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16,
- DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, weights);
-
- const DataLayout data_layout = input->data_layout();
-
- const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
- const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
- const size_t idx_c = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
-
- ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) != weights->dimension(idx_h));
- ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) < 1);
-
- const unsigned int kernel_x = weights->dimension(idx_w);
- const unsigned int kernel_y = weights->dimension(idx_h);
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(invalid_right > kernel_x - 1,
- "invalid_right must be smaller than kernel_x");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(invalid_bottom > kernel_y - 1,
- "inner_border_top must be smaller than kernel_y");
-
- // NOTE From the existing CLDeconvolutionLayer, invalid_right and invalid_bottom were added.
- auto out_dims = transposeconv_output_dimensions(
- input->dimension(idx_w), input->dimension(idx_h), weights->dimension(idx_w),
- weights->dimension(idx_h), info, invalid_right, invalid_bottom);
-
- const TensorShape output_shape = compute_transposeconv_output_shape(out_dims, *input, *weights);
-
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output, weights);
-
- if (bias != nullptr)
+ switch (CLTransposeConvLayer::get_deconvolution_method(
+ input, weights, bias, output, deconv_info, invalid_right, invalid_bottom, weights_info))
{
- if (is_data_type_quantized_asymmetric(input->data_type()))
+ case DeconvolutionMethod::DIRECT:
{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bias, 1, DataType::S32);
+ // Validate direct convolution layer
+ ARM_COMPUTE_RETURN_ON_ERROR(CLDirectTransposeConvLayer::validate(
+ input, weights, bias, output, deconv_info, invalid_right, invalid_bottom, weights_info));
+ break;
}
- else
+ case DeconvolutionMethod::GEMM:
{
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, bias);
+ // Validate gemm-based convolution layer
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ CLGEMMDeconvolutionLayer::validate(input, weights, bias, output, deconv_info));
+ break;
}
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, bias);
+ default:
+ ARM_COMPUTE_ERROR("Not supported.");
+ break;
}
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(idx_w) != output_shape[idx_w],
- "Output's width is invalid.");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(idx_h) != output_shape[idx_h],
- "Output's height is invalid.");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(idx_c) != output_shape[idx_c],
- "Output's depth is invalid.");
-
- unsigned int pad_left = 0;
- unsigned int pad_right = 0;
- unsigned int pad_top = 0;
- unsigned int pad_bottom = 0;
- const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape(
- *input, *weights, info, out_dims, invalid_right, invalid_bottom, pad_left, pad_right, pad_top,
- pad_bottom);
- TensorInfo scale_out_info(input->clone()
- ->set_is_resizable(true)
- .reset_padding()
- .set_tensor_shape(scale_out_shape)
- .set_data_layout(data_layout));
- const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
-
- ARM_COMPUTE_RETURN_ON_ERROR(
- CLTransposeConvLayerUpsample::validate(input, &scale_out_info, BorderSize(0, 0), info));
- ARM_COMPUTE_RETURN_ON_ERROR(CLConvolutionLayer::validate(&scale_out_info, weights, bias, output,
- conv_info, weights_info));
-
return Status{};
}
-void CLTransposeConvLayer::configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias,
- ICLTensor *output, const PadStrideInfo &info,
- unsigned int invalid_right, unsigned int invalid_bottom,
- const WeightsInfo &weights_info)
+DeconvolutionMethod CLTransposeConvLayer::get_deconvolution_method(
+ const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias,
+ ITensorInfo *output, const PadStrideInfo &deconv_info, unsigned int invalid_right,
+ unsigned int invalid_bottom, const WeightsInfo &weights_info)
{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
-
- const unsigned int stride_x = info.stride().first;
- const unsigned int stride_y = info.stride().second;
+ ARM_COMPUTE_UNUSED(output, bias, weights_info);
- const DataLayout data_layout = input->info()->data_layout();
+ const DataLayout data_layout = input->data_layout();
const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
- _original_weights = weights;
- _weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout));
- _flip_weights.configure(weights, &_weights_flipped);
-
- // NOTE From the existing CLDeconvolutionLayer, invalid_right and invalid_bottom were
- // added.
- auto out_dims = transposeconv_output_dimensions(
- input->info()->dimension(idx_w), input->info()->dimension(idx_h),
- weights->info()->dimension(idx_w), weights->info()->dimension(idx_h), info, invalid_right,
- invalid_bottom);
-
- const TensorShape output_shape =
- compute_transposeconv_output_shape(out_dims, *input->info(), *weights->info());
-
- // Output auto initialization if not yet initialized
- auto_init_if_empty(
- *output->info(),
- input->info()->clone()->set_tensor_shape(output_shape).set_data_layout(data_layout));
-
- // Perform validation step
- ARM_COMPUTE_ERROR_THROW_ON(CLTransposeConvLayer::validate(
- input->info(), weights->info(), bias == nullptr ? nullptr : bias->info(), output->info(),
- info, invalid_right, invalid_bottom));
-
- _is_prepared = weights_info.retain_internal_weights();
-
- _memory_group.manage(&_scaled_output);
-
- // Find the upsampled dimensions and the padding needed for the convolution with stride 1 in order
- // to match output shape
- unsigned int pad_left = 0;
- unsigned int pad_right = 0;
- unsigned int pad_top = 0;
- unsigned int pad_bottom = 0;
- const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape(
- *input->info(), *weights->info(), info, out_dims, invalid_right, invalid_bottom, pad_left,
- pad_right, pad_top, pad_bottom);
-
- TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(),
- input->info()->quantization_info());
- scale_out_info.set_data_layout(data_layout);
- _scaled_output.allocator()->init(scale_out_info);
-
- // configure scale function
- const PadStrideInfo upsample_info(stride_x, stride_y, pad_left, pad_right, pad_top, pad_bottom,
- DimensionRoundingType::FLOOR);
- _scale_f.configure(input, &_scaled_output, BorderSize(0, 0), upsample_info);
-
- // setup the function to convolve the upscaled output
- const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
- _conv_f.configure(&_scaled_output, &_weights_flipped, bias, output, conv_info, weights_info);
- _scaled_output.allocator()->allocate();
+ if (weights->dimension(idx_w) != deconv_info.stride().first ||
+ weights->dimension(idx_h) != deconv_info.stride().second || invalid_right != 0 ||
+ invalid_bottom != 0)
+ {
+ return DeconvolutionMethod::DIRECT;
+ }
+
+ return DeconvolutionMethod::GEMM;
}
void CLTransposeConvLayer::run()
{
prepare();
-
- _memory_group.acquire();
-
- _scale_f.run();
- _conv_f.run();
-
- _memory_group.release();
+ _function->run();
}
-void CLTransposeConvLayer::prepare()
-{
- if (!_is_prepared)
- {
- ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
-
- // Run weights flipping and mark original weights tensor as unused
- _weights_flipped.allocator()->allocate();
- _weights_flipped.map(true);
- _original_weights->map(CLScheduler::get().queue(), true);
- CPPScheduler::get().schedule(&_flip_weights, Window::DimZ);
- _weights_flipped.unmap();
- _original_weights->unmap(CLScheduler::get().queue());
- _original_weights->mark_as_unused();
-
- // Prepare convolution
- _conv_f.prepare();
-
- if (!_weights_flipped.is_used())
- {
- _weights_flipped.allocator()->free();
- }
-
- _is_prepared = true;
- }
-}
+void CLTransposeConvLayer::prepare() { _function->prepare(); }
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayerUpsample.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayerUpsample.cpp
deleted file mode 100644
index 07feb5a64..000000000
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayerUpsample.cpp
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h"
-
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-#include <cmath>
-#include <memory>
-#include <tuple>
-
-using namespace arm_compute;
-
-CLTransposeConvLayerUpsample::CLTransposeConvLayerUpsample() // NOLINT
- : _upsample(),
- _output(nullptr)
-{
-}
-
-Status CLTransposeConvLayerUpsample::validate(const ITensorInfo *input, const ITensorInfo *output,
- const BorderSize &inner_border,
- const PadStrideInfo &info)
-{
- return CLTransposeConvLayerUpsampleKernel::validate(input, output, inner_border, info);
-}
-
-void CLTransposeConvLayerUpsample::configure(ICLTensor *input, ICLTensor *output,
- const BorderSize &inner_border,
- const PadStrideInfo &info)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
-
- _output = output;
- _upsample.configure(input, _output, inner_border, info);
-}
-
-void CLTransposeConvLayerUpsample::run()
-{
- _output->map(CLScheduler::get().queue(), true);
- if (is_data_type_quantized_asymmetric(_output->info()->data_type()))
- {
- const uint8_t quantized_zero = _output->info()->quantization_info().uniform().offset;
- std::fill_n(_output->buffer(), _output->info()->total_size(), quantized_zero);
- }
- else
- {
- memset(_output->buffer(), 0, _output->info()->total_size());
- }
- _output->unmap(CLScheduler::get().queue());
-
- CLScheduler::get().enqueue(_upsample, false);
-}