diff options
Diffstat (limited to 'compute/ARMComputeEx/arm_compute/runtime')
33 files changed, 790 insertions, 343 deletions
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h index cfbd13436..664b8b3b1 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h +++ b/compute/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h @@ -16,14 +16,19 @@ #ifndef __ARM_COMPUTE_CLFUNCTIONSEX_H__ #define __ARM_COMPUTE_CLFUNCTIONSEX_H__ +#include <arm_compute/runtime/CL/functions/CLArgMinMaxLayerEx.h> #include <arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h> +#include <arm_compute/runtime/CL/functions/CLCastBool.h> #include <arm_compute/runtime/CL/functions/CLEmbeddingLookup.h> #include <arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h> #include <arm_compute/runtime/CL/functions/CLGatherEx.h> #include <arm_compute/runtime/CL/functions/CLHashtableLookup.h> #include <arm_compute/runtime/CL/functions/CLInstanceNormalizationLayerEx.h> #include <arm_compute/runtime/CL/functions/CLNeg.h> +#include <arm_compute/runtime/CL/functions/CLOneHot.h> +#include <arm_compute/runtime/CL/functions/CLPadLayerEx.h> #include <arm_compute/runtime/CL/functions/CLReduceOperation.h> +#include <arm_compute/runtime/CL/functions/CLSplitVEx.h> #include <arm_compute/runtime/CL/functions/CLTopKV2.h> #include <arm_compute/runtime/CL/functions/CLTransposeConvLayer.h> diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgMinMaxLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgMinMaxLayerEx.h new file mode 100644 index 000000000..05bcc4075 --- /dev/null +++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgMinMaxLayerEx.h @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Copyright (c) 2018-2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLARGMINMAXLAYEREX_H__ +#define __ARM_COMPUTE_CLARGMINMAXLAYEREX_H__ + +#include "arm_compute/core/CL/kernels/CLArgMinMaxLayerKernelEx.h" + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/functions/CLReshapeLayer.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/IMemoryManager.h" +#include "arm_compute/runtime/MemoryGroup.h" + +namespace arm_compute +{ +class ITensorInfo; +class ICLTensor; + +/** Function to calculate the index of the minimum or maximum values in a + * tensor based on an axis. + * + * @note The default data type for an uninitialized output tensor is + * signed 32-bit integer (S32). It is the user's responsibility to check + * that the results do not overflow because the indices are computed + * in unsigned 32-bit (U32). + */ +class CLArgMinMaxLayerEx : public IFunction +{ +public: + /** Default Constructor. + * + * @param[in] memory_manager (Optional) Memory manager. + */ + CLArgMinMaxLayerEx(std::shared_ptr<IMemoryManager> memory_manager = nullptr); + /** Set the input and output tensors. + * + * @param[in] input Input source tensor. Data types supported: QASYMM8/F16/F32. + * @param[in] axis Axis to find max/min index. + * @param[out] output Output source tensor. Data types supported: U32/S32. + * @param[in] op Reduction operation to perform. Operations supported: ARG_IDX_MAX, + * ARG_IDX_MIN + */ + void configure(const ICLTensor *input, int axis, ICLTensor *output, const ReductionOperation &op); + /** Static function to check if given info will lead to a valid configuration of @ref + * CLArgMinMaxLayerEx + * + * @param[in] input Input source tensor info. Data types supported: QASYMM8/F16/F32. + * @param[in] axis Axis to find max/min index. + * @param[in] output Output source tensor info. Data types supported: U32/S32. + * @param[in] op Reduction operation to perform. Operations supported: ARG_IDX_MAX, + * ARG_IDX_MIN + * + * @return a status + */ + static Status validate(const ITensorInfo *input, int axis, const ITensorInfo *output, + const ReductionOperation &op); + + // Inherited methods overridden: + void run() override; + +private: + MemoryGroup _memory_group; + std::vector<CLTensor> _results_vector; + CLTensor _not_reshaped_output; + std::vector<CLArgMinMaxLayerKernelEx> _reduction_kernels_vector; + CLReshapeLayer _reshape_kernel; + unsigned int _num_of_stages; + unsigned int _reduction_axis; +}; +} // namespace arm_compute +#endif /* __ARM_COMPUTE_CLARGMINMAXLAYEREX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h index 88a9b00ec..fc4322798 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h +++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h @@ -43,6 +43,7 @@ #include "arm_compute/runtime/CL/ICLSimpleFunction.h" #include "arm_compute/core/TypesEx.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" namespace arm_compute { diff --git a/compute/ARMComputeEx/arm_compute/runtime/CPP/functions/CPPOneHotEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLCastBool.h index 7930e4e20..854ddce52 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/CPP/functions/CPPOneHotEx.h +++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLCastBool.h @@ -15,7 +15,7 @@ */ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -38,31 +38,34 @@ * SOFTWARE. */ -#ifndef __ARM_COMPUTE_CPPONEHOT_EX_H__ -#define __ARM_COMPUTE_CPPONEHOT_EX_H__ +/** + * @file CLCastBool.h + * @ingroup COM_AI_RUNTIME + * @brief This file contains arm_compute::CLCastBool class + */ + +#ifndef ARM_COMPUTE_CLCASTBOOL_H +#define ARM_COMPUTE_CLCASTBOOL_H -#include "arm_compute/runtime/CPP/ICPPSimpleFunction.h" -#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" namespace arm_compute { -class ITensor; +class ICLTensor; -/** Basic function to run @ref CPPOneHot */ -class CPPOneHotEx : public ICPPSimpleFunction +/** + * @brief Class to run @ref CLCastBoolKernel. + * This converts the boolean input tensor to the output tensor's type. + */ +class CLCastBool : public ICLSimpleFunction { public: - /** Configure the one_hot function - * - * @param[in] indices A tensor for indices. Data types supported: S32 - * @param[in] depth A tensor for depth. Data types supported: S32 - * @param[in] on_value A tensor for on_value. Data types supported: F32 - * @param[in] off_value A tensor for off_value. Data types supported: F32 - * @param[out] output A tensor for computed value of one hot operator - * @param[in] axis An int value for axis + /** + * @brief Initialise the kernel's input and output + * @param[in] input Input tensor. Data types supported: U8 + * @param[out] output Output tensor. Data types supported: U8/S8/U16/S16/U32/F16/F32. */ - void configure(const ITensor *indices, const ITensor *depth, const ITensor *on_value, - const ITensor *off_value, ITensor *output, const int axis); + void configure(ICLTensor *input, ICLTensor *output); }; -} -#endif /* __ARM_COMPUTE_CPPONEHOT_EX_H__ */ +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLCASTBOOL_H */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h index 409eaf593..026209f69 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h +++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h @@ -106,22 +106,24 @@ public: CLDirectTransposeConvLayer &operator=(CLDirectTransposeConvLayer &&) = default; /** Set the input, weights, biases and output tensors. * - * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an - * optional 4th dimension for batch of inputs. - * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. - * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type - * supported: Same as @p input. - * @param[in] bias (Optional) The biases have one dimension. - * Data type supported: Should match @p input data type, except for - * input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type - * @param[out] output Output tensor. The output has the same number of dimensions as the - * @p input. - * @param[in] info Contains padding and policies to be used in the deconvolution, this - * is decribed in @ref PadStrideInfo. - * @param[in] invalid_right The number of zeros added to right edge of the output. - * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. - * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer, - * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel. + * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, + * and an optional 4th dimension for batch of inputs. + * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. + * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. + * Data type supported: Same as @p input. + * @param[in] bias (Optional) The biases have one dimension. + * Data type supported: Should match @p input data type, + * except for input of QASYMM8 and QASYMM8_SIGNED type + * where biases should be of S32 type + * @param[out] output Output tensor. + * The output has the same number of dimensions as the @p input. + * @param[in] info Contains padding and policies to be used in the deconvolution, + * this is decribed in @ref PadStrideInfo. + * @param[in] invalid_right The number of zeros added to right edge of the output. + * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. + * @param[in] weights_info (Optional) Weights information needed for + * @ref CLConvolutionLayer, specifies if the weights tensor has been reshaped with + * @ref CLWeightsReshapeKernel. * */ void configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, @@ -130,23 +132,24 @@ public: /** Set the input, weights, biases and output tensors. * * @param[in] compile_context The compile context to be used. - * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and - * an optional 4th dimension for batch of inputs. + * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, + * and an optional 4th dimension for batch of inputs. * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. - * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data - * type supported: Same as @p input. + * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. + * Data type supported: Same as @p input. * @param[in] bias (Optional) The biases have one dimension. * Data type supported: Should match @p input data type, except for - * input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type + * input of QASYMM8 and QASYMM8_SIGNED type + * where biases should be of S32 type * @param[out] output Output tensor. The output has the same number of dimensions as - * the @p input. + * the @p input. * @param[in] info Contains padding and policies to be used in the deconvolution, - * this is decribed in @ref PadStrideInfo. - * @param[in] invalid_right The number of zeros added to right edge of the output. - * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. - * @param[in] weights_info (Optional) Weights information needed for @ref - * CLConvolutionLayer, specifies if the weights tensor has been reshaped with @ref - * CLWeightsReshapeKernel. + * this is decribed in @ref PadStrideInfo. + * @param[in] invalid_right The number of zeros added to right edge of the output. + * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. + * @param[in] weights_info (Optional) Weights information needed for + * @ref CLConvolutionLayer, specifies if the weights tensor has + * been reshaped with @ref CLWeightsReshapeKernel. * */ void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *weights, @@ -154,24 +157,26 @@ public: unsigned int invalid_right, unsigned int invalid_bottom, const WeightsInfo &weights_info = WeightsInfo()); /** Static function to check if given info will lead to a valid configuration of @ref - * CLDirectTransposeConvLayer + * CLDirectTransposeConvLayer * - * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an - * optional 4th dimension for batch of inputs. - * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. - * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data - * type supported: Same as @p input. - * @param[in] bias (Optional) The biases have one dimension. - * Data type supported: Should match @p input data type, except for input - * of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type - * @param[in] output Output tensor info. The output has the same number of dimensions as the - * @p input. - * @param[in] info Contains padding and policies to be used in the deconvolution, this is - * decribed in @ref PadStrideInfo. - * @param[in] invalid_right The number of zeros added to right edge of the output. - * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. - * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer, - * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel. + * @param[in] input Input tensor info. 3 lower dimensions represent a single input, + * and an optional 4th dimension for batch of inputs. + * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. + * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. + * Data type supported: Same as @p input. + * @param[in] bias (Optional) The biases have one dimension. + * Data type supported: Should match @p input data type, + * except for input of QASYMM8 and QASYMM8_SIGNED type + * where biases should be of S32 type + * @param[in] output Output tensor info. The output has the same number of dimensions + * as the @p input. + * @param[in] info Contains padding and policies to be used in the deconvolution, + * this is decribed in @ref PadStrideInfo. + * @param[in] invalid_right The number of zeros added to right edge of the output. + * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. + * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer, + * specifies if the weights tensor has been reshaped + * with @ref CLWeightsReshapeKernel. * * @return a status */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h index fbee7e40e..b0149cb09 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h +++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h @@ -73,5 +73,5 @@ public: */ void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *lookups); }; -} +} // namespace arm_compute #endif /*__ARM_COMPUTE_CLEMBEDDINGLOOKUP_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h index f3266f688..c75ae9a50 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h +++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h @@ -43,14 +43,14 @@ #include "arm_compute/runtime/CL/ICLSimpleFunction.h" -#include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h" #include "arm_compute/core/CL/kernels/CLMultiplyScaleFactorKernel.h" #include "arm_compute/core/CL/kernels/CLQuantizationSymmetricKernel.h" #include "arm_compute/core/CL/kernels/CLScaleFactorSymm8Kernel.h" -#include "arm_compute/core/CL/kernels/CLTransposeKernel.h" -#include "arm_compute/runtime/MemoryGroup.h" +#include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h" +#include "arm_compute/runtime/MemoryGroup.h" +#include "src/core/CL/kernels/CLTransposeKernel.h" namespace arm_compute { @@ -182,5 +182,5 @@ private: bool _is_prepared; const ICLTensor *_original_weights; }; -} +} // namespace arm_compute #endif /* __ARM_COMPUTE_CLFULLYCONNECTEDHYBRIDLAYER_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h index e65a646dc..c08da526a 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h +++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h @@ -43,16 +43,14 @@ #include "arm_compute/runtime/CL/ICLSimpleFunction.h" -#include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h" -#include "arm_compute/core/CL/kernels/CLTransposeKernel.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h" #include "arm_compute/runtime/CL/functions/CLFlattenLayer.h" #include "arm_compute/runtime/CL/functions/CLGEMM.h" #include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h" -#include "arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h" #include "arm_compute/runtime/IWeightsManager.h" #include "arm_compute/runtime/MemoryGroup.h" +#include "src/core/CL/kernels/CLTransposeKernel.h" namespace arm_compute { @@ -132,9 +130,6 @@ private: * transpose_weights is set to true ) (called once) * -# @ref CLGEMMMatrixMultiplyKernel or @ref CLGEMMLowpMatrixMultiplyCore (if quantized * asymmetric) - * -# @ref CLGEMMMatrixAccumulateBiasesKernel or @ref - * CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint (if quantized asymmetric) (if @p biases is - * not equal to nullptr) * * @note The fully connected layer accepts "weights" tensors only with 2 dimensions. */ @@ -157,40 +152,36 @@ public: * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. * @param[in] weights Weights tensor. The weights must be 2 dimensional. * If this function is called after a Convolution Layer, the (transposed) - * weights will have as many rows as the product of the first 3 input's dimensions. - * If it is called after another FullyConnected Layer, the (transposed) - * weights will have as many rows as the input's first dimension. - * Data type supported: Same as @p input. + * weights will have as many rows as the product of the first 3 input's dimensions. If it is + * called after another FullyConnected Layer, the (transposed) weights will have as many rows as + * the input's first dimension. Data type supported: Same as @p input. * @param[in] biases Bias tensor. Can be nullptr. Data type supported:Same as @p input. * @param[out] output Destination tensor. Its shape should be equal to the output of a matrix * multiplication between: * - The output of im2col on the input and the (transposed) 2D weights, if the * function is called after a Convolution Layer * - The input tensor and the (transposed) 2D weights, if the function is - * called after another FullyConnected Layer. - * Data type supported: Same as @p input. + * called after another FullyConnected Layer. Data type supported: Same as @p input. * @param[in] fc_info (Optional) Fully connected layer additional info */ void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref - * CLFullyConnectedLayerEx + * CLFullyConnectedLayer * * @param[in] input Source tensor info. Data type supported: QASYMM8/F16/F32. * @param[in] weights Weights tensor info. The weights must be 2 dimensional. * If this function is called after a Convolution Layer, the (transposed) - * weights will have as many rows as the product of the first 3 input's dimensions. - * If it is called after another FullyConnected Layer, the (transposed) - * weights will have as many rows as the input's first dimension. - * Data type supported: Same as @p input. + * weights will have as many rows as the product of the first 3 input's dimensions. If it is + * called after another FullyConnected Layer, the (transposed) weights will have as many rows as + * the input's first dimension. Data type supported: Same as @p input. * @param[in] biases Bias tensor info. Can be nullptr. Data type supported:Same as @p input. * @param[out] output Destination tensor info. Its shape should be equal to the output of a * matrix multiplication between: * - The output of im2col on the input and the (transposed) 2D weights, if the * function is called after a Convolution Layer * - The input tensor and the (transposed) 2D weights, if the function is - * called after another FullyConnected Layer. - * Data type supported: Same as @p input. + * called after another FullyConnected Layer. Data type supported: Same as @p input. * @param[in] fc_info (Optional) Fully connected layer additional info * * @return a status @@ -216,7 +207,7 @@ private: CLConvertFullyConnectedWeights _convert_weights; weights_transformations::CLConvertFullyConnectedWeightsManaged _convert_weights_managed; weights_transformations::CLFullyConnectedLayerReshapeWeightsExManaged - _reshape_weights_managed_function; + _reshape_weights_managed_function; CLFlattenLayer _flatten_layer; CLFullyConnectedLayerReshapeWeightsEx _reshape_weights_function; CLGEMM _mm_gemm; diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h index 289ab167f..bdb168664 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h +++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h @@ -43,8 +43,8 @@ public: public: CLFullyConnectedReshapingLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr) - : _input(nullptr), _weights(nullptr), _biases(nullptr), _output(nullptr), _cl_buffer{}, - _memory_manager{memory_manager}, _cl_fc{nullptr}, _cl_reshape{}, _needs_reshape(false) + : _input(nullptr), _weights(nullptr), _biases(nullptr), _output(nullptr), _cl_buffer{}, + _memory_manager{memory_manager}, _cl_fc{nullptr}, _cl_reshape{}, _needs_reshape(false) { // DO NOTHING } diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGatherEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGatherEx.h index b01ec4255..385eb0b2c 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGatherEx.h +++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGatherEx.h @@ -47,11 +47,14 @@ #ifndef __ARM_COMPUTE_CLGATHEREX_H__ #define __ARM_COMPUTE_CLGATHEREX_H__ +#include "arm_compute/core/Error.h" #include "arm_compute/runtime/CL/ICLSimpleFunction.h" namespace arm_compute { +class CLCompileContext; class ICLTensor; +class ITensorInfo; /** * @brief Class to to run @ref CLGatherKernel. @@ -66,7 +69,7 @@ public: * @param[out] output The output tensor, Data types supported: same as @p input. * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Defaults to 0 * @return N/A - */ + */ void configure(const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis = 0); /** @@ -81,5 +84,5 @@ public: static Status validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, int axis = 0); }; -} +} // namespace arm_compute #endif /*__ARM_COMPUTE_CLGATHEREX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h index 6618f5aa4..5e172a4c7 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h +++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h @@ -78,5 +78,5 @@ public: void configure(const ICLTensor *lookups, const ICLTensor *keys, const ICLTensor *intput, ICLTensor *output, ICLTensor *hits); }; -} +} // namespace arm_compute #endif /*__ARM_COMPUTE_CLHASHTABLELOOKUP_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayerEx.h index 887e7aaa5..02ae6d719 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayerEx.h +++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayerEx.h @@ -41,11 +41,14 @@ #ifndef __ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYEREX_H__ #define __ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYEREX_H__ +#include "arm_compute/core/Error.h" #include "arm_compute/runtime/CL/ICLSimpleFunction.h" namespace arm_compute { +class CLCompileContext; class ICLTensor; +class ITensorInfo; /** Basic function to perform a Instance normalization. * diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLOneHot.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLOneHot.h new file mode 100644 index 000000000..62a36f06d --- /dev/null +++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLOneHot.h @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLONEHOT_H__ +#define __ARM_COMPUTE_CLONEHOT_H__ + +#include "arm_compute/core/CL/kernels/CLOneHotKernel.h" +#include "arm_compute/core/CL/kernels/CLMemsetKernel.h" +#include "arm_compute/runtime/IFunction.h" + +namespace arm_compute +{ +class ICLTensor; +/** Basic function to run @ref CLOneHotKernel */ +class CLOneHot : public IFunction +{ +public: + /** Constructor */ + CLOneHot(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLOneHot(const CLOneHot &) = delete; + /** Default move constructor */ + CLOneHot(CLOneHot &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLOneHot &operator=(const CLOneHot &) = delete; + /** Default move assignment operator */ + CLOneHot &operator=(CLOneHot &&) = default; + /** Initialise the kernel's inputs and outputs + * + * @param[in] indices Indices tensor. Supported tensor rank: up to 3. Must be one of the + * following types: U32/S32 + * @param[in] on_value On value tensor. Supported tensor rank: only 1. Data type supported: + * U8/S8/U16/S16/F16/U32/S32/F32 + * @param[in] off_value Off value tensor. Supported tensor rank: only 1. Data type supported: + * Same as @p on_value + * @param[out] output Destination tensor. Data type supported: Same as @p on_value + * @param[in] depth The depth of the one hot dimension. + * @param[in] axis (Optional) The axis to fill. Negative values wrap around. Defaults to -1. + * value must be in range [-indices.rank , indices.rank) + */ + void configure(const ICLTensor *indices, const ICLTensor *on_value, const ICLTensor *off_value, + ICLTensor *output, int depth, int axis = -1); + /** Initialise the kernel's inputs and outputs with off_value being constant + * + * @param[in] indices Indices tensor. Supported tensor rank: up to 3. Must be one of the + * following types: U32/S32 + * @param[in] on_value On value tensor. Supported tensor rank: only 1. Data type supported: + * U8/S8/U16/S16/F16/U32/S32/F32 + * @param[out] output Destination tensor. Data type supported: Same as @p on_value + * @param[in] off_value The PixelValue for off value. Data type supported: Same as @p on_value + * @param[in] depth The depth of the one hot dimension. + * @param[in] axis (Optional) The axis to fill. Negative values wrap around. Defaults to -1. + * value must be in range [-indices.rank , indices.rank) + */ + void configure(const ICLTensor *indices, const ICLTensor *on_value, ICLTensor *output, + PixelValue off_value, int depth, int axis = -1); + /** Static function to check if given info will lead to a valid configuration of @ref + * CLOneHotKernel + * + * @param[in] indices Indices tensor. Supported tensor rank: up to 3. Must be one of the + * following types: U32/S32 + * @param[in] on_value On value tensor. Supported tensor rank: only 1. Data type supported: + * U8/S8/U16/S16/F16/U32/S32/F32 + * @param[in] off_value Off value tensor. Supported tensor rank: only 1. Data type supported: + * Same as @p on_value + * @param[in] output Destination tensor. Data type supported: Same as @p on_value + * @param[in] depth The depth of the one hot dimension. + * @param[in] axis (Optional) The axis to fill. Negative values wrap around. Defaults to -1. + * value must be in range [-indices.rank , indices.rank) + * + * @return a status + */ + static Status validate(const ITensorInfo *indices, const ITensorInfo *on_value, + const ITensorInfo *off_value, const ITensorInfo *output, int depth, + int axis = -1); + + // Inherited methods overridden: + void run() override; + +private: + CLMemsetKernel _memset_kernel; /**< Memset kernel */ + CLOneHotKernel _onehot_kernel; /**< OneHot kernel */ + bool _has_to_memset; +}; +} // namespace arm_compute +#endif /* __ARM_COMPUTE_CLONEHOT_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLPadLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLPadLayerEx.h new file mode 100644 index 000000000..ee1879aaa --- /dev/null +++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLPadLayerEx.h @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Copyright (c) 2018-2020 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLPADLAYEREX_H +#define ARM_COMPUTE_CLPADLAYEREX_H + +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/core/CL/kernels/CLPadLayerKernelEx.h" +#include "src/core/gpu/cl/kernels/ClCopyKernel.h" +// #include "arm_compute/runtime/CL/functions/CLCopy.h" +#include <memory> + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to pad a tensor. This function calls the following OpenCL functions/kernels: + * + * -# @ref CLPadLayerKernelEx if there is padding to be added + * -# @ref CLCopyKernel otherwise + */ +class CLPadLayerEx : public IFunction +{ +public: + /** Default constructor */ + CLPadLayerEx(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLPadLayerEx(const CLPadLayerEx &) = delete; + /** Default move constructor */ + CLPadLayerEx(CLPadLayerEx &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLPadLayerEx &operator=(const CLPadLayerEx &) = delete; + /** Default move assignment operator */ + CLPadLayerEx &operator=(CLPadLayerEx &&) = default; + + /** Initialize the function + * + * @param[in] input Source tensor. Data types supported: All. + * @param[out] output Output tensor. Data type supported: same as @p input + * @param[in] padding The padding for each spatial dimension of the input tensor. The pair + * padding[i] specifies the front and the end padding in the i-th dimension. + * @param[in] constant_value (Optional) Constant value to be used for the padding. + * @param[in] mode (Optional) Controls whether the padding should be filled with @p + * constant_value using CONSTANT, or reflect the input, either including the border values + * (SYMMETRIC) or not (REFLECT). + */ + void configure(ICLTensor *input, ICLTensor *output, const PaddingList &padding, + PixelValue constant_value = PixelValue(), + PaddingMode mode = PaddingMode::CONSTANT); + /** Initialize the function + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: All. + * @param[out] output Output tensor. Data type supported: same as @p input + * @param[in] padding The padding for each spatial dimension of the input tensor. The + * pair padding[i] specifies the front and the end padding in the i-th dimension. + * @param[in] constant_value (Optional) Constant value to be used for the padding. + * @param[in] mode (Optional) Controls whether the padding should be filled with @p + * constant_value using CONSTANT, or reflect the input, either including the border values + * (SYMMETRIC) or not (REFLECT). + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, + const PaddingList &padding, PixelValue constant_value = PixelValue(), + PaddingMode mode = PaddingMode::CONSTANT); + + /** Static function to check if given info will lead to a valid configuration of @ref + * CLPadLayerEx. + * + * @param[in] input Source tensor info. Data types supported: All. + * @param[in] output Output tensor info. Data type supported: same as @p input + * @param[in] padding The padding for each spatial dimension of the input tensor. The pair + * padding[i] specifies the front and the end padding in the i-th dimension. + * @param[in] constant_value (Optional) Constant value to be used for the padding + * @param[in] mode (Optional) Controls whether the padding should be filled with @p + * constant_value using CONSTANT, or reflect the input, either including the border values + * (SYMMETRIC) or not (REFLECT). + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, + const PaddingList &padding, PixelValue constant_value = PixelValue(), + PaddingMode mode = PaddingMode::CONSTANT); + + // Inherited methods overridden: + void run() override; + +private: + void configure_reflect_mode(ICLTensor *input, ICLTensor *output); + + std::unique_ptr<CLPadLayerKernelEx> _pad_kernel; + std::unique_ptr<opencl::kernels::ClCopyKernel> _copy_kernel; + bool _perform_pad; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLPADLAYEREX_H */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h index 7dba84b12..45eb72bef 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h +++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h @@ -48,7 +48,7 @@ #define __ARM_COMPUTE_CLREDUCEOPERATION_H__ #include "arm_compute/core/CL/kernels/CLReduceOperationKernel.h" -#include "arm_compute/core/TypesEx.h" +#include "arm_compute/core/Types.h" #include "arm_compute/runtime/MemoryGroup.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/CLTensorAllocator.h" @@ -82,7 +82,7 @@ public: * @return N/A */ void configure(ICLTensor *input, ICLTensor *output, const std::set<uint32_t> &axis, - bool keep_dims, ReduceOperation op); + bool keep_dims, ReductionOperation op); /** * @brief Static function to check if given info will lead to a valid configuration of @ref @@ -96,7 +96,8 @@ public: * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *output, - const std::set<uint32_t> &axis, bool keep_dims, const ReduceOperation &op); + const std::set<uint32_t> &axis, bool keep_dims, + const ReductionOperation &op); /** * @brief Run the OpenCL kernel for this operation @@ -115,5 +116,5 @@ private: std::unique_ptr<CLReduceOperationKernel[]> _reduce_kernels{nullptr}; CLReshapeLayer _reshape; }; -} +} // namespace arm_compute #endif /*__ARM_COMPUTE_CLREDUCEOPERATION_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLSplitVEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLSplitVEx.h new file mode 100644 index 000000000..3023df3f0 --- /dev/null +++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLSplitVEx.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Copyright (c) 2017-2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLSPLITVEX__ +#define __ARM_COMPUTE_CLSPLITVEX__ + +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/CL/functions/CLSlice.h" +#include "arm_compute/core/Types.h" +#include <vector> +#include <memory> + +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/runtime/CPP/functions/CPPSplit.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLSplitVKernel */ +class CLSplitVEx : public IFunction +{ +public: + /** Default constructor */ + CLSplitVEx(); + /** Configure the split CL kernel + * + * @param[in] input The input tensor to split. Data types supported: + * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 + * @param[in] size_splits A 1-D tensor containing the number of tensor values per split + * @param[out] outputs A vector containing the output tensor. Data types supported: Same as @p + * input + * The output tensors should match the input tensor dimensions for all + * shape dimensions apart + * from the split dimension. + * @param[in] split_dim Integer value representing the input tensor dimension along which to + * split + * @param[in] num_splits Number of splits + */ + void configure(const ICLTensor *input, const ICLTensor *size_splits, uint32_t split_dim, + const std::vector<ICLTensor *> &outputs, unsigned int num_splits); + + void run() override; + +private: + const ICLTensor *_input; + const ICLTensor *_size_splits; + std::vector<ICLTensor *> _outputs; + unsigned int _num_splits; + std::vector<CLSlice> _slice_functions; +}; +} // namespace arm_compute +#endif /* __ARM_COMPUTE_CLSPLITVEX__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h index e301a5152..f426a4d75 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h +++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h @@ -160,5 +160,5 @@ private: CLTopKV2Store _store_kernel; #endif }; -} +} // namespace arm_compute #endif // __ARM_COMPUTE_CLTOPK_V2_H__ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h index 5fb102e47..5b27d362a 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h +++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h @@ -63,20 +63,22 @@ public: /** Set the input, weights, biases and output tensors. * - * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an - * optional 4th dimension for batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. - * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type - * supported: Same as @p input. - * @param[in] bias (Optional) The biases have one dimension. Data type supported: Same - * as @p input. - * @param[out] output Output tensor. The output has the same number of dimensions as the - * @p input. - * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this - * is described in @ref PadStrideInfo. - * @param[in] invalid_right The number of zeros added to right edge of the output. - * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. - * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer, - * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel. + * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, + * and an optional 4th dimension for batch of inputs. + * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. + * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. + * Data type supported: Same as @p input. + * @param[in] bias (Optional) The biases have one dimension. + * Data type supported: Same as @p input. + * @param[out] output Output tensor. The output has the same number of dimensions + * as the @p input. + * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, + * this is described in @ref PadStrideInfo. + * @param[in] invalid_right The number of zeros added to right edge of the output. + * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. + * @param[in] weights_info (Optional) Weights information needed for + * @ref CLConvolutionLayer, specifies if the weights tensor has + * been reshaped with @ref CLWeightsReshapeKernel. * */ void configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, @@ -85,22 +87,22 @@ public: /** Set the input, weights, biases and output tensors. * * @param[in] compile_context The compile context to be used. - * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and - * an optional 4th dimension for batch of inputs. Data types supported: - * QASYMM8_SIGNED/QASYMM8/F16/F32. - * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data - * type supported: Same as @p input. - * @param[in] bias (Optional) The biases have one dimension. Data type supported: - * Same as @p input. - * @param[out] output Output tensor. The output has the same number of dimensions as - * the @p input. + * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, + * and an optional 4th dimension for batch of inputs. + * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. + * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. + * Data type supported: Same as @p input. + * @param[in] bias (Optional) The biases have one dimension. + * Data type supported: Same as @p input. + * @param[out] output Output tensor. The output has the same number of dimensions + * as the @p input. * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, - * this is described in @ref PadStrideInfo. - * @param[in] invalid_right The number of zeros added to right edge of the output. - * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. - * @param[in] weights_info (Optional) Weights information needed for @ref - * CLConvolutionLayer, specifies if the weights tensor has been reshaped with @ref - * CLWeightsReshapeKernel. + * this is described in @ref PadStrideInfo. + * @param[in] invalid_right The number of zeros added to right edge of the output. + * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. + * @param[in] weights_info (Optional) Weights information needed for + * @ref CLConvolutionLayer, specifies if the weights tensor has + * been reshaped with @ref CLWeightsReshapeKernel. * */ void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *weights, @@ -108,22 +110,24 @@ public: unsigned int invalid_right, unsigned int invalid_bottom, const WeightsInfo &weights_info = WeightsInfo()); /** Static function to check if given info will lead to a valid configuration of @ref - * CLTransposeConvLayer + * CLTransposeConvLayer * - * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an - * optional 4th dimension for batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. - * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data - * type supported: Same as @p input. - * @param[in] bias (Optional) The biases have one dimension. Data type supported: Same as - * @p input. - * @param[in] output Output tensor info. The output has the same number of dimensions as the - * @p input. - * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this is - * described in @ref PadStrideInfo. - * @param[in] invalid_right The number of zeros added to right edge of the output. - * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. - * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer, - * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel. + * @param[in] input Input tensor info. 3 lower dimensions represent a single input, + * and an optional 4th dimension for batch of inputs. + * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. + * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. + * Data type supported: Same as @p input. + * @param[in] bias (Optional) The biases have one dimension. + * Data type supported: Same as @p input. + * @param[in] output Output tensor info. The output has the same number of dimensions + * as the @p input. + * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, + * this is described in @ref PadStrideInfo. + * @param[in] invalid_right The number of zeros added to right edge of the output. + * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. + * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer, + * specifies if the weights tensor has been reshaped with + * @ref CLWeightsReshapeKernel. * * @return a status */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h index 3fad230f1..d0ddc2609 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h +++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h @@ -16,13 +16,13 @@ #ifndef __ARM_COMPUTE_NEFUNCTIONSEX_H__ #define __ARM_COMPUTE_NEFUNCTIONSEX_H__ -#include <arm_compute/runtime/NEON/functions/NEActivationLayerEx.h> -#include <arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h> +#include <arm_compute/runtime/NEON/functions/NECastBool.h> #include <arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h> #include <arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h> #include <arm_compute/runtime/NEON/functions/NEGatherEx.h> #include <arm_compute/runtime/NEON/functions/NEHashtableLookup.h> #include <arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayerEx.h> +#include <arm_compute/runtime/NEON/functions/NEOneHot.h> #include <arm_compute/runtime/NEON/functions/NEReduceSum.h> #include <arm_compute/runtime/NEON/functions/NEReduceOperation.h> #include <arm_compute/runtime/NEON/functions/NETransposeConvLayer.h> diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEActivationLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEActivationLayerEx.h deleted file mode 100644 index 6156c84f8..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEActivationLayerEx.h +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_NEACTIVATIONLAYEREX_H__ -#define __ARM_COMPUTE_NEACTIVATIONLAYEREX_H__ - -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Basic function to run @ref NEActivationLayerKernelEx - * - * @note The function simulates an activation layer with the specified activation function. - */ -class NEActivationLayerEx : public INESimpleFunctionNoBorder -{ -public: - /** Constructor - * - * @param[in] ctx Runtime context to be used by the function - */ - NEActivationLayerEx(IRuntimeContext *ctx = nullptr); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEActivationLayerEx(const NEActivationLayerEx &) = delete; - /** Default move constructor */ - NEActivationLayerEx(NEActivationLayerEx &&) = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEActivationLayerEx &operator=(const NEActivationLayerEx &) = delete; - /** Default move assignment operator */ - NEActivationLayerEx &operator=(NEActivationLayerEx &&) = default; - /** [NEActivationLayerEx snippet] **/ - /** Set the input and output tensor. - * - * @note If the output tensor is a nullptr or is equal to the input, the activation function will - * be performed in-place - * - * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this - * tensor will store the result - * of the activation function. Data types supported: - * QASYMM8/QSYMM16/F16/F32. - * @param[out] output Destination tensor. Data type supported: same as @p input - * @param[in] activation_info Activation layer parameters. - */ - void configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info); - /** [NEActivationLayerEx snippet] **/ - /** Static function to check if given info will lead to a valid configuration of @ref - * NEActivationLayerEx - * - * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor - * will store the result - * of the activation function. Data types supported: QASYMM8/QSYMM16/F16/F32. - * @param[in] output Destination tensor info. Data type supported: same as @p input - * @param[in] act_info Activation layer information. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, - const ActivationLayerInfo &act_info); -}; -} // namespace arm_compute -#endif /* __ARM_COMPUTE_NEACTIVATIONLAYEREX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h index 026d30098..8d931f08d 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h +++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h @@ -41,8 +41,10 @@ #ifndef __ARM_COMPUTE_NEBINARYLOGICALOPERATION_H__ #define __ARM_COMPUTE_NEBINARYLOGICALOPERATION_H__ +#include "arm_compute/core/Error.h" #include "arm_compute/core/TypesEx.h" #include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/core/ITensorInfo.h" namespace arm_compute { diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReductionOperationEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NECastBool.h index 1693922b7..dd62645ee 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReductionOperationEx.h +++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NECastBool.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,7 +15,7 @@ */ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2019-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -37,63 +37,41 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ +#ifndef __ARM_COMPUTE_NECASTBOOL_H__ +#define __ARM_COMPUTE_NECASTBOOL_H__ -#ifndef __ARM_COMPUTE_NEREDUCTIONOPERATIONEX_H__ -#define __ARM_COMPUTE_NEREDUCTIONOPERATIONEX_H__ - -#include "arm_compute/runtime/IFunction.h" - -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" -#include "arm_compute/core/NEON/kernels/NEReductionOperationKernelEx.h" -#include "arm_compute/core/TypesEx.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" namespace arm_compute { class ITensor; +class ITensorInfo; -/** Basic function to simulate a reduction operation. This function calls the following NEON - * kernels: - * - * -# @ref NEFillBorderKernel - * -# @ref NEReductionOperationKernelEx - * +/** + * @brief Class to run @ref INESimpleFunctionNoBorder. */ -class NEReductionOperationEx : public IFunction +class NECastBool : public INESimpleFunctionNoBorder { public: - /** Default constructor */ - NEReductionOperationEx(); - /** Set the input and output tensors. + /** Initialize the function's source, destination + * + * Valid conversions Input -> Output : * - * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. - * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input. - * @param[in] axis Dimension along which to reduce. - * @param[in] op Reduction operation to perform. + * - U8 -> U8, S8, U16, S16, U32, S32, F32, F16 + * + * @param[in] input The input tensor to convert. Data types supported: U8 + * @param[out] output The output tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. */ - void configure(ITensor *input, ITensor *output, unsigned int axis, ReduceOperation op); - - /** Static function to check if given info will lead to a valid configuration of @ref - * NEReductionOperationEx. + void configure(const ITensor *input, ITensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref NECastBool * - * @param[in] input Source tensor info. Data type supported: QASYMM8/F16/F32. - * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p - * input. - * @param[in] axis Dimension along which to reduce. - * @param[in] op Reduction operation to perform. + * @param[in] input Source tensor info. Data types supported: U8. + * @param[in] output Destination tensor info. Data type supported: U8/S8/U16/S16/U32/S32/F16/F32. * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, - ReduceOperation op); - - // Inherited methods overridden: - void run() override; - -private: - NEReductionOperationKernelEx _reduction_kernel; - NEFillBorderKernel _fill_border_kernel; - size_t _window_split; - int _reduction_axis; + static Status validate(const ITensorInfo *input, const ITensorInfo *output); }; } // namespace arm_compute -#endif /* __ARM_COMPUTE_NEREDUCTIONOPERATIONEX_H__ */ +#endif /*__ARM_COMPUTE_NECASTBOOL_H__*/ diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h index 63f7714aa..82a789e86 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h +++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h @@ -48,12 +48,14 @@ #define __ARM_COMPUTE_NEEMBEDDINGLOOKUP_H__ #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" +#include "arm_compute/core/Error.h" #include <vector> namespace arm_compute { class ITensor; +class ITensorInfo; /** * @brief Class to perform EmbeddingLookup operation @@ -84,5 +86,5 @@ public: static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *lookups); }; -} +} // namespace arm_compute #endif /*__ARM_COMPUTE_NEEMBEDDINGLOOKUP_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h index 56548a479..214592710 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h +++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h @@ -44,11 +44,11 @@ #include "arm_compute/core/NEON/kernels/NEQuantizationSymmetricKernel.h" #include "arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h" #include "arm_compute/core/NEON/kernels/NEMuliplyScaleFactorKernel.h" -#include "arm_compute/core/NEON/kernels/NETransposeKernel.h" #include "arm_compute/runtime/MemoryGroup.h" #include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h" #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" #include "arm_compute/runtime/Tensor.h" +#include "src/core/NEON/kernels/NETransposeKernel.h" namespace arm_compute { diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedLayerEx.h index 8f98f220a..2bbb1fea1 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedLayerEx.h +++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedLayerEx.h @@ -43,16 +43,16 @@ #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h" -#include "arm_compute/core/NEON/kernels/NETransposeKernel.h" -#include "arm_compute/runtime/MemoryGroup.h" +#include "arm_compute/runtime/NEON/functions/NEFlattenLayer.h" #include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h" #include "arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h" #include "arm_compute/runtime/NEON/functions/NEGEMM.h" #include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h" #include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h" +#include "arm_compute/runtime/MemoryGroup.h" #include "arm_compute/runtime/Tensor.h" +#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h" +#include "src/core/NEON/kernels/NETransposeKernel.h" namespace arm_compute { @@ -79,11 +79,11 @@ public: /** Prevent instances of this class from being copied (As this class contains pointers) */ NEFullyConnectedLayerEx(const NEFullyConnectedLayerEx &) = delete; /** Default move constructor */ - NEFullyConnectedLayerEx(NEFullyConnectedLayerEx &&) = default; + NEFullyConnectedLayerEx(NEFullyConnectedLayerEx &&) = delete; /** Prevent instances of this class from being copied (As this class contains pointers) */ NEFullyConnectedLayerEx &operator=(const NEFullyConnectedLayerEx &) = delete; /** Default move assignment operator */ - NEFullyConnectedLayerEx &operator=(NEFullyConnectedLayerEx &&) = default; + NEFullyConnectedLayerEx &operator=(NEFullyConnectedLayerEx &&) = delete; /** Set the input and output tensors. * * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. @@ -141,7 +141,7 @@ private: void configure_mm(const ITensor *input, const ITensor *weights, ITensor *output); MemoryGroup _memory_group; - NEFlattenLayerKernel _flatten_kernel; + NEFlattenLayer _flatten_kernel; NEConvertFullyConnectedWeights _convert_weights; NEFullyConnectedLayerReshapeWeights _reshape_weights_function; NEGEMM _mm_gemm; diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h index 18cb61bf9..e34b4dcb0 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h +++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h @@ -43,8 +43,8 @@ public: public: NEFullyConnectedReshapingLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr) - : _memory_manager{memory_manager}, _input(nullptr), _weights(nullptr), _biases(nullptr), - _output(nullptr), _neon_buffer{}, _neon_fc{nullptr}, _neon_reshape{}, _needs_reshape(false) + : _memory_manager{memory_manager}, _input(nullptr), _weights(nullptr), _biases(nullptr), + _output(nullptr), _neon_buffer{}, _neon_fc{nullptr}, _neon_reshape{}, _needs_reshape(false) { // DO NOTHING } diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEGatherEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEGatherEx.h index 155a1b837..6944c77f6 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEGatherEx.h +++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEGatherEx.h @@ -47,6 +47,7 @@ namespace arm_compute { class ITensor; +class ITensorInfo; /** Basic function to run @ref NEGatherKernelEx */ class NEGatherEx : public INESimpleFunctionNoBorder diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEHashtableLookup.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEHashtableLookup.h index 521a05ad9..f6fda60a9 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEHashtableLookup.h +++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEHashtableLookup.h @@ -48,12 +48,14 @@ #define __ARM_COMPUTE_NEHASHTABLELOOKUP_H__ #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" +#include "arm_compute/core/Error.h" #include <vector> namespace arm_compute { class ITensor; +class ITensorInfo; /** * @brief Class to perform HashtableLookup operation @@ -96,5 +98,5 @@ public: const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *hits); }; -} +} // namespace arm_compute #endif /*__ARM_COMPUTE_NEHASHTABLELOOKUP_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayerEx.h index 18e813923..0ee967698 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayerEx.h +++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayerEx.h @@ -54,6 +54,7 @@ namespace arm_compute { class ITensor; +class ITensorInfo; /** Basic function to perform a Instance normalization. * @@ -112,5 +113,5 @@ private: Tensor _permuted_input; Tensor _permuted_output; }; -} +} // namespace arm_compute #endif /* __ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYEREX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEOneHot.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEOneHot.h new file mode 100644 index 000000000..668f024a1 --- /dev/null +++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEOneHot.h @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Copyright (c) 2019 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEONEHOT_H__ +#define __ARM_COMPUTE_NEONEHOT_H__ +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" +namespace arm_compute +{ +// Forward declarations +class ITensor; +class ITensorInfo; + +/** Basic function to run @ref NEOneHotKernel */ +class NEOneHot : public INESimpleFunctionNoBorder +{ +public: + /** Initialise the kernel's inputs and outputs + * + * @param[in] indices Indices tensor. Supported tensor rank: up to 3. Must be one of the + * following types: U32/S32 + * @param[in] depth The tensor for depth of the one hot dimension. Supported tensor rank: up + * to 3. Must be one of the following types: U32/S32 + * @param[in] on_value On value tensor. Supported tensor rank: only 1. Data type supported: + * U8/S8/U16/S16/F16/U32/S32/F32 + * @param[in] off_value Off value tensor. Supported tensor rank: only 1. Data type supported: + * Same as @p on_value + * @param[out] output Destination tensor. Data type supported: Same as @p on_value + * @param[in] axis (Optional) The axis to fill. Negative values wrap around. Defaults to -1. + * The value must be in range [-indices.rank , indices.rank) + */ + void configure(const ITensor *indices, const ITensor *depth, const ITensor *on_value, + const ITensor *off_value, ITensor *output, int axis = -1); + /** Static function to check if given info will lead to a valid configuration of @ref + * NEOneHotKernel + * + * @param[in] indices Indices tensor info. Supported tensor rank: up to 3. + * Must be one of the following types: U32/S32 + * @param[in] depth The tensor info for depth of the one hot dimension. + * Supported tensor rank: up to 3. + * Must be one of the following types: U32/S32 + * @param[in] on_value On value tensor info. Supported tensor rank: only 1. + * Data type supported: U8/S8/U16/S16/F16/U32/S32/F32 + * @param[in] off_value Off value tensor info. Supported tensor rank: only 1. + * Data type supported: Same as @p on_value + * @param[out] output Destination tensor info. Data type supported: Same as @p on_value + * @param[in] axis (Optional) The axis to fill. Negative values wrap around. Defaults to -1. + * The value must be in range [-indices.rank , indices.rank) + * + * @return a status + */ + static Status validate(const ITensorInfo *indices, const ITensorInfo *depth, + const ITensorInfo *on_value, const ITensorInfo *off_value, + const ITensorInfo *output, int axis = -1); +}; +} // namespace arm_compute +#endif /* __ARM_COMPUTE_NEONEHOT_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceOperation.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceOperation.h index 7f764b000..9858e6c09 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceOperation.h +++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceOperation.h @@ -43,10 +43,10 @@ #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" -#include "arm_compute/core/TypesEx.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/Types.h" #include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NEReductionOperationEx.h" +#include "arm_compute/runtime/NEON/functions/NEReductionOperation.h" #include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h" #include "arm_compute/runtime/Tensor.h" @@ -71,7 +71,7 @@ public: * @param[in] op Reduce operation to perform. */ void configure(ITensor *input, const Coordinates &reduction_axis, bool keep_dims, ITensor *output, - ReduceOperation op); + ReductionOperation op); /** Static function to check if given info will lead to a valid configuration of @ref * NEReduceOperation @@ -85,14 +85,14 @@ public: * @return A status */ static Status validate(const ITensorInfo *input, const Coordinates &reduction_axis, - bool keep_dims, const ITensorInfo *output, ReduceOperation op); + bool keep_dims, const ITensorInfo *output, ReductionOperation op); // Inherited methods overridden: void run() override; private: MemoryGroup _memory_group; - std::vector<NEReductionOperationEx> _reduction_kernels; + std::vector<NEReductionOperation> _reduction_kernels; std::vector<Tensor> _reduced_outs; NEReshapeLayer _reshape; unsigned int _reduction_ops; diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceSum.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceSum.h index 48b416923..f34a8f8af 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceSum.h +++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceSum.h @@ -43,11 +43,13 @@ #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/MemoryGroup.h" +#include "arm_compute/runtime/NEON/functions/NEDequantizationLayer.h" +#include "arm_compute/runtime/NEON/functions/NEQuantizationLayer.h" #include "arm_compute/runtime/NEON/functions/NEReductionOperation.h" #include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h" +#include "arm_compute/runtime/Tensor.h" namespace arm_compute { diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h index 24ff5dac9..f82579a45 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h +++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h @@ -102,47 +102,50 @@ public: /** Prevent instances of this class from being copied (As this class contains pointers) */ NETransposeConvLayer &operator=(const NETransposeConvLayer &) = delete; /** Allow instances of this class to be moved */ - NETransposeConvLayer(NETransposeConvLayer &&) = default; + NETransposeConvLayer(NETransposeConvLayer &&) = delete; /** Allow instances of this class to be moved */ - NETransposeConvLayer &operator=(NETransposeConvLayer &&) = default; + NETransposeConvLayer &operator=(NETransposeConvLayer &&) = delete; /** Default destructor */ virtual ~NETransposeConvLayer() = default; /** Set the input, weights, biases and output tensors. * - * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an - * optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED. - * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type - * supported: Same as @p input. - * @param[in] bias Optional, ignored if NULL. The biases have one dimension. Data type - * supported: Data types supported: S32 for QASYMM8 and QASYMM8_SIGNED input, F32 for F32 input, F16 - * for F16 input. - * @param[out] output Output tensor. The output has the same number of dimensions as the @p - * input. - * @param[in] info Contains padding and policies to be used in the deconvolution, this is - * decribed in @ref PadStrideInfo. - * @param[in] invalid_right The number of zeros added to right edge of the output. - * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. + * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, + * and an optional 4th dimension for batch of inputs. + * Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED. + * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. + * Data type supported: Same as @p input. + * @param[in] bias Optional, ignored if NULL. The biases have one dimension. + * Data type supported: Data types supported: S32 for QASYMM8 and + * QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input. + * @param[out] output Output tensor. The output has the same number of dimensions as + * the @p input. + * @param[in] info Contains padding and policies to be used in the deconvolution, + * this is decribed in @ref PadStrideInfo. + * @param[in] invalid_right The number of zeros added to right edge of the output. + * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. * */ void configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, const PadStrideInfo &info, unsigned int invalid_right, unsigned int invalid_bottom); /** Static function to check if given info will lead to a valid configuration of @ref - * NETransposeConvLayer + * NETransposeConvLayer * - * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an - * optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED. - * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data type - * supported: Same as @p input. - * @param[in] bias (Optional) The biases have one dimension. Data type supported: Data types - * supported: S32 for QASYMM8 and QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input. - * @param[in] output Output tensor info. The output has the same number of dimensions as the @p - * input. - * @param[in] info Contains padding and policies to be used in the deconvolution, this is - * decribed in @ref PadStrideInfo. - * @param[in] innvalid_right The number of zeros added to right edge of the output. - * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. + * @param[in] input Input tensor info. 3 lower dimensions represent a single input, + * and an optional 4th dimension for batch of inputs. + * Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED. + * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. + * Data type supported: Same as @p input. + * @param[in] bias (Optional) The biases have one dimension. + * Data types supported: S32 for QASYMM8 and QASYMM8_SIGNED input, + * F32 for F32 input, F16 for F16 input. + * @param[in] output Output tensor info. The output has the same number of dimensions as + * the @p input. + * @param[in] info Contains padding and policies to be used in the deconvolution, + * this is decribed in @ref PadStrideInfo. + * @param[in] innvalid_right The number of zeros added to right edge of the output. + * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. * * @return a status */ @@ -168,5 +171,5 @@ private: PadStrideInfo _info; bool _is_prepared; }; -} // arm_compute +} // namespace arm_compute #endif /* __ARM_COMPUTE_NETRANSPOSECONVLAYER_H__ */ |