diff options
Diffstat (limited to 'compute/ARMComputeEx/arm_compute/runtime/CL/functions')
14 files changed, 167 insertions, 33 deletions
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgMinMaxLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgMinMaxLayerEx.h index b1ee52bf9..05bcc4075 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgMinMaxLayerEx.h +++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgMinMaxLayerEx.h @@ -41,8 +41,9 @@ #define __ARM_COMPUTE_CLARGMINMAXLAYEREX_H__ #include "arm_compute/core/CL/kernels/CLArgMinMaxLayerKernelEx.h" -#include "arm_compute/core/CL/kernels/CLReshapeLayerKernel.h" + #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/functions/CLReshapeLayer.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" @@ -100,7 +101,7 @@ private: std::vector<CLTensor> _results_vector; CLTensor _not_reshaped_output; std::vector<CLArgMinMaxLayerKernelEx> _reduction_kernels_vector; - CLReshapeLayerKernel _reshape_kernel; + CLReshapeLayer _reshape_kernel; unsigned int _num_of_stages; unsigned int _reduction_axis; }; diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h index 88a9b00ec..fc4322798 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h +++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h @@ -43,6 +43,7 @@ #include "arm_compute/runtime/CL/ICLSimpleFunction.h" #include "arm_compute/core/TypesEx.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" namespace arm_compute { diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLCastBool.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLCastBool.h index d6150684a..854ddce52 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLCastBool.h +++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLCastBool.h @@ -67,5 +67,5 @@ public: */ void configure(ICLTensor *input, ICLTensor *output); }; -} +} // namespace arm_compute #endif /* ARM_COMPUTE_CLCASTBOOL_H */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h index fbee7e40e..b0149cb09 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h +++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h @@ -73,5 +73,5 @@ public: */ void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *lookups); }; -} +} // namespace arm_compute #endif /*__ARM_COMPUTE_CLEMBEDDINGLOOKUP_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h index f3266f688..c75ae9a50 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h +++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h @@ -43,14 +43,14 @@ #include "arm_compute/runtime/CL/ICLSimpleFunction.h" -#include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h" #include "arm_compute/core/CL/kernels/CLMultiplyScaleFactorKernel.h" #include "arm_compute/core/CL/kernels/CLQuantizationSymmetricKernel.h" #include "arm_compute/core/CL/kernels/CLScaleFactorSymm8Kernel.h" -#include "arm_compute/core/CL/kernels/CLTransposeKernel.h" -#include "arm_compute/runtime/MemoryGroup.h" +#include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h" +#include "arm_compute/runtime/MemoryGroup.h" +#include "src/core/CL/kernels/CLTransposeKernel.h" namespace arm_compute { @@ -182,5 +182,5 @@ private: bool _is_prepared; const ICLTensor *_original_weights; }; -} +} // namespace arm_compute #endif /* __ARM_COMPUTE_CLFULLYCONNECTEDHYBRIDLAYER_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h index f27e9913e..c08da526a 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h +++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h @@ -43,16 +43,14 @@ #include "arm_compute/runtime/CL/ICLSimpleFunction.h" -#include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h" -#include "arm_compute/core/CL/kernels/CLTransposeKernel.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h" #include "arm_compute/runtime/CL/functions/CLFlattenLayer.h" #include "arm_compute/runtime/CL/functions/CLGEMM.h" #include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h" -#include "arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h" #include "arm_compute/runtime/IWeightsManager.h" #include "arm_compute/runtime/MemoryGroup.h" +#include "src/core/CL/kernels/CLTransposeKernel.h" namespace arm_compute { @@ -132,9 +130,6 @@ private: * transpose_weights is set to true ) (called once) * -# @ref CLGEMMMatrixMultiplyKernel or @ref CLGEMMLowpMatrixMultiplyCore (if quantized * asymmetric) - * -# @ref CLGEMMMatrixAccumulateBiasesKernel or @ref - * CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint (if quantized asymmetric) (if @p biases is - * not equal to nullptr) * * @note The fully connected layer accepts "weights" tensors only with 2 dimensions. */ @@ -157,40 +152,36 @@ public: * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. * @param[in] weights Weights tensor. The weights must be 2 dimensional. * If this function is called after a Convolution Layer, the (transposed) - * weights will have as many rows as the product of the first 3 input's dimensions. - * If it is called after another FullyConnected Layer, the (transposed) - * weights will have as many rows as the input's first dimension. - * Data type supported: Same as @p input. + * weights will have as many rows as the product of the first 3 input's dimensions. If it is + * called after another FullyConnected Layer, the (transposed) weights will have as many rows as + * the input's first dimension. Data type supported: Same as @p input. * @param[in] biases Bias tensor. Can be nullptr. Data type supported:Same as @p input. * @param[out] output Destination tensor. Its shape should be equal to the output of a matrix * multiplication between: * - The output of im2col on the input and the (transposed) 2D weights, if the * function is called after a Convolution Layer * - The input tensor and the (transposed) 2D weights, if the function is - * called after another FullyConnected Layer. - * Data type supported: Same as @p input. + * called after another FullyConnected Layer. Data type supported: Same as @p input. * @param[in] fc_info (Optional) Fully connected layer additional info */ void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref - * CLFullyConnectedLayerEx + * CLFullyConnectedLayer * * @param[in] input Source tensor info. Data type supported: QASYMM8/F16/F32. * @param[in] weights Weights tensor info. The weights must be 2 dimensional. * If this function is called after a Convolution Layer, the (transposed) - * weights will have as many rows as the product of the first 3 input's dimensions. - * If it is called after another FullyConnected Layer, the (transposed) - * weights will have as many rows as the input's first dimension. - * Data type supported: Same as @p input. + * weights will have as many rows as the product of the first 3 input's dimensions. If it is + * called after another FullyConnected Layer, the (transposed) weights will have as many rows as + * the input's first dimension. Data type supported: Same as @p input. * @param[in] biases Bias tensor info. Can be nullptr. Data type supported:Same as @p input. * @param[out] output Destination tensor info. Its shape should be equal to the output of a * matrix multiplication between: * - The output of im2col on the input and the (transposed) 2D weights, if the * function is called after a Convolution Layer * - The input tensor and the (transposed) 2D weights, if the function is - * called after another FullyConnected Layer. - * Data type supported: Same as @p input. + * called after another FullyConnected Layer. Data type supported: Same as @p input. * @param[in] fc_info (Optional) Fully connected layer additional info * * @return a status diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGatherEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGatherEx.h index 167554c9e..385eb0b2c 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGatherEx.h +++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGatherEx.h @@ -47,11 +47,14 @@ #ifndef __ARM_COMPUTE_CLGATHEREX_H__ #define __ARM_COMPUTE_CLGATHEREX_H__ +#include "arm_compute/core/Error.h" #include "arm_compute/runtime/CL/ICLSimpleFunction.h" namespace arm_compute { +class CLCompileContext; class ICLTensor; +class ITensorInfo; /** * @brief Class to to run @ref CLGatherKernel. @@ -81,5 +84,5 @@ public: static Status validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, int axis = 0); }; -} +} // namespace arm_compute #endif /*__ARM_COMPUTE_CLGATHEREX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h index 6618f5aa4..5e172a4c7 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h +++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h @@ -78,5 +78,5 @@ public: void configure(const ICLTensor *lookups, const ICLTensor *keys, const ICLTensor *intput, ICLTensor *output, ICLTensor *hits); }; -} +} // namespace arm_compute #endif /*__ARM_COMPUTE_CLHASHTABLELOOKUP_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayerEx.h index 887e7aaa5..02ae6d719 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayerEx.h +++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayerEx.h @@ -41,11 +41,14 @@ #ifndef __ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYEREX_H__ #define __ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYEREX_H__ +#include "arm_compute/core/Error.h" #include "arm_compute/runtime/CL/ICLSimpleFunction.h" namespace arm_compute { +class CLCompileContext; class ICLTensor; +class ITensorInfo; /** Basic function to perform a Instance normalization. * diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLOneHot.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLOneHot.h index 2bbfca821..62a36f06d 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLOneHot.h +++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLOneHot.h @@ -39,9 +39,11 @@ */ #ifndef __ARM_COMPUTE_CLONEHOT_H__ #define __ARM_COMPUTE_CLONEHOT_H__ -#include "arm_compute/core/CL/kernels/CLMemsetKernel.h" + #include "arm_compute/core/CL/kernels/CLOneHotKernel.h" +#include "arm_compute/core/CL/kernels/CLMemsetKernel.h" #include "arm_compute/runtime/IFunction.h" + namespace arm_compute { class ICLTensor; diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLPadLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLPadLayerEx.h new file mode 100644 index 000000000..ee1879aaa --- /dev/null +++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLPadLayerEx.h @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Copyright (c) 2018-2020 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLPADLAYEREX_H +#define ARM_COMPUTE_CLPADLAYEREX_H + +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/core/CL/kernels/CLPadLayerKernelEx.h" +#include "src/core/gpu/cl/kernels/ClCopyKernel.h" +// #include "arm_compute/runtime/CL/functions/CLCopy.h" +#include <memory> + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to pad a tensor. This function calls the following OpenCL functions/kernels: + * + * -# @ref CLPadLayerKernelEx if there is padding to be added + * -# @ref CLCopyKernel otherwise + */ +class CLPadLayerEx : public IFunction +{ +public: + /** Default constructor */ + CLPadLayerEx(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLPadLayerEx(const CLPadLayerEx &) = delete; + /** Default move constructor */ + CLPadLayerEx(CLPadLayerEx &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLPadLayerEx &operator=(const CLPadLayerEx &) = delete; + /** Default move assignment operator */ + CLPadLayerEx &operator=(CLPadLayerEx &&) = default; + + /** Initialize the function + * + * @param[in] input Source tensor. Data types supported: All. + * @param[out] output Output tensor. Data type supported: same as @p input + * @param[in] padding The padding for each spatial dimension of the input tensor. The pair + * padding[i] specifies the front and the end padding in the i-th dimension. + * @param[in] constant_value (Optional) Constant value to be used for the padding. + * @param[in] mode (Optional) Controls whether the padding should be filled with @p + * constant_value using CONSTANT, or reflect the input, either including the border values + * (SYMMETRIC) or not (REFLECT). + */ + void configure(ICLTensor *input, ICLTensor *output, const PaddingList &padding, + PixelValue constant_value = PixelValue(), + PaddingMode mode = PaddingMode::CONSTANT); + /** Initialize the function + * + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: All. + * @param[out] output Output tensor. Data type supported: same as @p input + * @param[in] padding The padding for each spatial dimension of the input tensor. The + * pair padding[i] specifies the front and the end padding in the i-th dimension. + * @param[in] constant_value (Optional) Constant value to be used for the padding. + * @param[in] mode (Optional) Controls whether the padding should be filled with @p + * constant_value using CONSTANT, or reflect the input, either including the border values + * (SYMMETRIC) or not (REFLECT). + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, + const PaddingList &padding, PixelValue constant_value = PixelValue(), + PaddingMode mode = PaddingMode::CONSTANT); + + /** Static function to check if given info will lead to a valid configuration of @ref + * CLPadLayerEx. + * + * @param[in] input Source tensor info. Data types supported: All. + * @param[in] output Output tensor info. Data type supported: same as @p input + * @param[in] padding The padding for each spatial dimension of the input tensor. The pair + * padding[i] specifies the front and the end padding in the i-th dimension. + * @param[in] constant_value (Optional) Constant value to be used for the padding + * @param[in] mode (Optional) Controls whether the padding should be filled with @p + * constant_value using CONSTANT, or reflect the input, either including the border values + * (SYMMETRIC) or not (REFLECT). + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, + const PaddingList &padding, PixelValue constant_value = PixelValue(), + PaddingMode mode = PaddingMode::CONSTANT); + + // Inherited methods overridden: + void run() override; + +private: + void configure_reflect_mode(ICLTensor *input, ICLTensor *output); + + std::unique_ptr<CLPadLayerKernelEx> _pad_kernel; + std::unique_ptr<opencl::kernels::ClCopyKernel> _copy_kernel; + bool _perform_pad; +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_CLPADLAYEREX_H */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h index bb852e404..45eb72bef 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h +++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h @@ -116,5 +116,5 @@ private: std::unique_ptr<CLReduceOperationKernel[]> _reduce_kernels{nullptr}; CLReshapeLayer _reshape; }; -} +} // namespace arm_compute #endif /*__ARM_COMPUTE_CLREDUCEOPERATION_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLSplitVEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLSplitVEx.h index bb741d98d..3023df3f0 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLSplitVEx.h +++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLSplitVEx.h @@ -46,6 +46,9 @@ #include <vector> #include <memory> +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/runtime/CPP/functions/CPPSplit.h" + namespace arm_compute { class ICLTensor; @@ -82,5 +85,5 @@ private: unsigned int _num_splits; std::vector<CLSlice> _slice_functions; }; -} +} // namespace arm_compute #endif /* __ARM_COMPUTE_CLSPLITVEX__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h index e301a5152..f426a4d75 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h +++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h @@ -160,5 +160,5 @@ private: CLTopKV2Store _store_kernel; #endif }; -} +} // namespace arm_compute #endif // __ARM_COMPUTE_CLTOPK_V2_H__ |