diff options
Diffstat (limited to 'libs/ARMComputeEx/arm_compute/core/CL/kernels')
26 files changed, 2054 insertions, 286 deletions
diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLActivationLayerExKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLActivationLayerExKernel.h new file mode 100644 index 000000000..080cc47ef --- /dev/null +++ b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLActivationLayerExKernel.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __ARM_COMPUTE_CLACTIVATIONLAYEREXKERNEL_H__ +#define __ARM_COMPUTE_CLACTIVATIONLAYEREXKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/TypesEx.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the activation layer kernel. */ +class CLActivationLayerExKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLActivationLayerExKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLActivationLayerExKernel(const CLActivationLayerExKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLActivationLayerExKernel &operator=(const CLActivationLayerExKernel &) = delete; + /** Allow instances of this class to be moved */ + CLActivationLayerExKernel(CLActivationLayerExKernel &&) = default; + /** Allow instances of this class to be moved */ + CLActivationLayerExKernel &operator=(CLActivationLayerExKernel &&) = default; + /** Default destructor */ + ~CLActivationLayerExKernel() = default; + /** Set the input and output tensor. + * + * @note If the output tensor is a nullptr, the activation function will be performed in-place + * + * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will + * store the result + * of the activation function. Data types supported: + * QASYMM8/F16/F32. + * @param[out] output Destination tensor. Data type supported: same as @p input + * @param[in] act_info Activation layer information. + */ + void configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfoEx act_info); + /** Static function to check if given info will lead to a valid configuration of @ref + * CLActivationLayerKernel + * + * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor + * will store the result + * of the activation function. Data types supported: QASYMM8/F16/F32. + * @param[in] output Destination tensor info. Data type supported: same as @p input + * @param[in] act_info Activation layer information. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, + const ActivationLayerInfoEx &act_info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + ICLTensor *_input; + ICLTensor *_output; + bool _run_in_place; +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_CLACTIVATIONLAYEREXKERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLArgMinMaxKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLArgMinMaxKernel.h new file mode 100644 index 000000000..b91a26159 --- /dev/null +++ b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLArgMinMaxKernel.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file CLArgMinMaxKernel.h + * @brief This file defines CLArgMinMaxKernel + * @ingroup COM_AI_RUNTIME + */ + +#ifndef __ARM_COMPUTE_CLARG_MIN_MAX_KERNEL_H__ +#define __ARM_COMPUTE_CLARG_MIN_MAX_KERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/TypesEx.h" + +namespace arm_compute +{ +class ICLTensor; + +/** + * @brief Class to define interface for the argminmax max kernel. + */ +class CLArgMinMaxKernel : public ICLKernel +{ +public: + /** + * @brief Default constructor. + */ + CLArgMinMaxKernel(); + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers). + * @param [in] copiedInstance Const reference of CLArgMinMaxKernel to be copied + */ + CLArgMinMaxKernel(const CLArgMinMaxKernel &) = delete; + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers). + * @param [in] copiedInstance Const reference of CLArgMinMaxKernel to be copied + * @return Reference of this instance + */ + CLArgMinMaxKernel &operator=(const CLArgMinMaxKernel &) = delete; + /** + * @brief Allow instances of this class to be moved + * @param [in] movedInstance Rvalue reference of CLArgMinMaxKernel to be moved + */ + CLArgMinMaxKernel(CLArgMinMaxKernel &&) = default; + /** + * @brief Allow instances of this class to be moved + * @param [in] movedInstance Rvalue reference of CLArgMinMaxKernel to be moved + * @return Reference of this instance + */ + CLArgMinMaxKernel &operator=(CLArgMinMaxKernel &&) = default; + /** + * @brief Initialise the kernel's input, output and border mode. + * @param[in] input An input tensor. Data types supported: U8/QASYMM8/S32/F32. + * @param[out] output The output tensor, Data types supported: same as @p input. + * @param[in] argminmax_axis Axis to argminmax + * return N/A + */ + void configure(const ICLTensor *input, ICLTensor *output, const uint32_t argminmax_axis, + ArgOperation op); + /** + * @brief Static function to check if given info will lead to a valid configuration of @ref + * CLArgMinMaxKernel + * @param[in] input An input tensor info. Data types supported: U8/QASYMM8/S32/F32. + * @param[in] output The output tensor info, Data types supported: same as @p input1. + * @param[in] argminmax_axis Axis to argminmax + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, + const uint32_t argminmax_axis, ArgOperation op); + + /* + * @brief Run CLArgMinMaxKernel op + * @param[in] window Window to be used for in_slice + * @param[in] queue cl::CommandQueue + * @return N/A + */ + void run(const Window &window, cl::CommandQueue &queue) override; + /* + * @brief Run CLArgMinMaxKernel op on CPU + * @param[in] queue cl::CommandQueue + * @return N/A + */ + void run_on_cpu(cl::CommandQueue &queue); + +private: + const ICLTensor *_input; + ICLTensor *_output; + uint32_t _argminmax_axis; +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_CLargminmaxMAXKERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLArithmeticSubtractionExKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLArithmeticSubtractionExKernel.h new file mode 100644 index 000000000..9a765f310 --- /dev/null +++ b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLArithmeticSubtractionExKernel.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __ARM_COMPUTE_CLARITHMETICSUBTRACTIONEXKERNEL_H__ +#define __ARM_COMPUTE_CLARITHMETICSUBTRACTIONEXKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the arithmetic subtraction kernel (support broadcasting) + * + * Arithmetic subtraction is computed by: + * @f[ output(x,y) = input1(x,y) - input2(x,y) @f] + */ +class CLArithmeticSubtractionExKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLArithmeticSubtractionExKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLArithmeticSubtractionExKernel(const CLArithmeticSubtractionExKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLArithmeticSubtractionExKernel &operator=(const CLArithmeticSubtractionExKernel &) = delete; + /** Allow instances of this class to be moved */ + CLArithmeticSubtractionExKernel(CLArithmeticSubtractionExKernel &&) = default; + /** Allow instances of this class to be moved */ + CLArithmeticSubtractionExKernel &operator=(CLArithmeticSubtractionExKernel &&) = default; + /** Default destructor */ + ~CLArithmeticSubtractionExKernel() = default; + + /** Initialise the kernel's inputs, output and convertion policy. + * + * @param[in] input1 First tensor input. Data types supported: U8/S16/F16/F32. + * @param[in] input2 Second tensor input. Data types supported: U8/S16/F16/F32. + * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), + * S16/F16/F32. + * @param[in] policy Policy to use to handle overflow. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, + ConvertPolicy policy); + /** Static function to check if given info will lead to a valid configuration of @ref + * CLArithmeticSubtractionExKernel + * + * @param[in] input1 First tensor input info. Data types supported: U8/S16/F16/F32. + * @param[in] input2 Second tensor input info. Data types supported: U8/S16/F16/F32. + * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8), + * S16/F16/F32. + * @param[in] policy Policy to use to handle overflow. + * + * @return a status + */ + static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, + const ITensorInfo *output, ConvertPolicy policy); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input1; /**< Source tensor 1 */ + const ICLTensor *_input2; /**< Source tensor 2 */ + ICLTensor *_output; /**< Destination tensor */ +}; +} // namespace arm_compute +#endif /* __ARM_COMPUTE_CLARITHMETICSUBTRACTIONEXKERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLBatchToSpaceNDKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLBatchToSpaceNDKernel.h new file mode 100644 index 000000000..1387897c9 --- /dev/null +++ b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLBatchToSpaceNDKernel.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __ARM_COMPUTE_CLBATCH_TO_SPACE_ND_KERNEL_H__ +#define __ARM_COMPUTE_CLBATCH_TO_SPACE_ND_KERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to perform BATCH_TO_SPACE_ND operation */ +class CLBatchToSpaceNDKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLBatchToSpaceNDKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLBatchToSpaceNDKernel(const CLBatchToSpaceNDKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLBatchToSpaceNDKernel &operator=(const CLBatchToSpaceNDKernel &) = delete; + /** Allow instances of this class to be moved */ + CLBatchToSpaceNDKernel(CLBatchToSpaceNDKernel &&) = default; + /** Allow instances of this class to be moved */ + CLBatchToSpaceNDKernel &operator=(CLBatchToSpaceNDKernel &&) = default; + /** Default destructor */ + ~CLBatchToSpaceNDKernel() = default; + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. + * @param[in] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. + */ + void configure(const ICLTensor *input, ICLTensor *output, const int32_t *block_size); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; /**< Source tensor */ + ICLTensor *_output; /**< Destination tensor */ +}; + +} // namespace arm_compute +#endif /* __ARM_COMPUTE_CLSPACE_TO_BATCH_ND_KERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h new file mode 100644 index 000000000..ab33d9d3a --- /dev/null +++ b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __ARM_COMPUTE_CLBINARYLOGICALOPKERNEL_H__ +#define __ARM_COMPUTE_CLBINARYLOGICALOPKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/TypesEx.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to return truth values of two input tensors for Binary Logical Op*/ +class CLBinaryLogicalOpKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLBinaryLogicalOpKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLBinaryLogicalOpKernel(const CLBinaryLogicalOpKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLBinaryLogicalOpKernel &operator=(const CLBinaryLogicalOpKernel &) = delete; + /** Allow instances of this class to be moved */ + CLBinaryLogicalOpKernel(CLBinaryLogicalOpKernel &&) = default; + /** Allow instances of this class to be moved */ + CLBinaryLogicalOpKernel &operator=(CLBinaryLogicalOpKernel &&) = default; + /** Initialize the kernel's input, output. + * + * @param[in] input1 Source tensor1. + * @param[in] input2 Source tensor2. + * @param[out] output Output tensor. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, + BinaryLogicalOperation op); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + + BorderSize border_size() const override; + +private: + const ICLTensor *_input1; + const ICLTensor *_input2; + ICLTensor *_output; +}; + +} // namespace arm_compute +#endif /*__ARM_COMPUTE_CLBINARYLOGICALOPKERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLCastKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLCastKernel.h index 6bd33bf8f..4c2feb903 100644 --- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLCastKernel.h +++ b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLCastKernel.h @@ -14,6 +14,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/** + * @file CLCastKernel.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines CLCastKernel class + */ + #ifndef __ARM_COMPUTE_CLCASTKERNEL_H__ #define __ARM_COMPUTE_CLCASTKERNEL_H__ @@ -23,30 +30,62 @@ namespace arm_compute { class ICLTensor; -/** OpenCL kernel to perform a cast operation */ +/** + * @brief Class to define OpenCL kernel for cast operation + */ class CLCastKernel : public ICLKernel { public: - /** Default constructor */ + /** + * @brief Construct CLCastKernel object + */ CLCastKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ + + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers) + */ CLCastKernel(const CLCastKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ + + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers) + */ CLCastKernel &operator=(const CLCastKernel &) = delete; - /** Allow instances of this class to be moved */ + + /** + * @brief Construct CLCastKernel object using default move constructor + * @param[in] CLCastKernel object to move + */ CLCastKernel(CLCastKernel &&) = default; - /** Allow instances of this class to be moved */ + + /** + * @brief Allow instances of this class to be moved + * @param[in] CLCastKernel object to move + */ CLCastKernel &operator=(CLCastKernel &&) = default; - /** Default destructor */ + + /** + * @brief Destruct this CLCastKernel object + */ ~CLCastKernel() = default; - /** Initialise the kernel's input and output. - * + + /** + * @brief Initialise the kernel's input and output. * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. * @param[in] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. + * @return N/A */ void configure(const ICLTensor *input, ICLTensor *output); - // Inherited methods overridden: + /** + * @brief Enqueue the OpenCL kernel to process the given window on the passed OpenCL command + * queue. + * @note The queue is *not* flushed by this method, and therefore the kernel will not have + * been executed by the time this method returns. + * @param[in] window Region on which to execute the kernel. (Must be a valid region of + * the window returned by window()). + * @param[in,out] queue Command queue on which to enqueue the kernel.@return N/A + * @return N/A + */ void run(const Window &window, cl::CommandQueue &queue) override; private: diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLComparisonOpKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLComparisonOpKernel.h new file mode 100644 index 000000000..f5f455993 --- /dev/null +++ b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLComparisonOpKernel.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __ARM_COMPUTE_CLCOMPARISON_OP_KERNEL_H__ +#define __ARM_COMPUTE_CLCOMPARISON_OP_KERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/TypesEx.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to check if values in both tensors are equal*/ +class CLComparisonOpKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLComparisonOpKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLComparisonOpKernel(const CLComparisonOpKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLComparisonOpKernel &operator=(const CLComparisonOpKernel &) = delete; + /** Allow instances of this class to be moved */ + CLComparisonOpKernel(CLComparisonOpKernel &&) = default; + /** Allow instances of this class to be moved */ + CLComparisonOpKernel &operator=(CLComparisonOpKernel &&) = default; + /** Initialize the kernel's input, output. + * + * @param[in] input1 Source tensor1. + * @param[in] input2 Source tensor2. + * @param[out] output Output tensor. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, + const ComparisonOperation &op); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + + BorderSize border_size() const override; + +private: + const ICLTensor *_input1; + const ICLTensor *_input2; + ICLTensor *_output; +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_CLCOMPARISON_OP_KERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h new file mode 100644 index 000000000..60ec7a82a --- /dev/null +++ b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __ARM_COMPUTE_CLDEPTHTOSPACEKERNEL_H__ +#define __ARM_COMPUTE_CLDEPTHTOSPACEKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to perform depthTospace operation */ +class CLDepthToSpaceKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLDepthToSpaceKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDepthToSpaceKernel(const CLDepthToSpaceKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDepthToSpaceKernel &operator=(const CLDepthToSpaceKernel &) = delete; + /** Allow instances of this class to be moved */ + CLDepthToSpaceKernel(CLDepthToSpaceKernel &&) = default; + /** Allow instances of this class to be moved */ + CLDepthToSpaceKernel &operator=(CLDepthToSpaceKernel &&) = default; + /** Default destructor */ + ~CLDepthToSpaceKernel() = default; + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. + * @param[in] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. + */ + void configure(const ICLTensor *input, ICLTensor *output, const int32_t block_size); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; /**< Source tensor */ + ICLTensor *_output; /**< Destination tensor */ +}; + +} // namespace arm_compute +#endif /* __ARM_COMPUTE_CLDEPTHTOSPACEKERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h new file mode 100644 index 000000000..da075db69 --- /dev/null +++ b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file CLEmbeddingLookupKernel.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines CLEmbeddingLookupKernel class + */ + +#ifndef __ARM_COMPUTE_CLEMBEDDINGLOOKUPKERNEL_H__ +#define __ARM_COMPUTE_CLEMBEDDINGLOOKUPKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** +* @brief Class to perform EmbeddingLookup operation with opencl kernel +*/ +class CLEmbeddingLookupKernel : public ICLKernel +{ +public: + /** + * @brief Construct a CLEmbeddingLookupKernel object + * */ + CLEmbeddingLookupKernel(); + + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers) + * */ + CLEmbeddingLookupKernel(const CLEmbeddingLookupKernel &) = delete; + + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers) + * */ + CLEmbeddingLookupKernel &operator=(const CLEmbeddingLookupKernel &) = delete; + + /** + * @brief Construct a CLEmbeddingLookupKernel object by using default move constructor + * @param[in] CLEmbeddingLookupKernel object to move + * */ + CLEmbeddingLookupKernel(CLEmbeddingLookupKernel &&) = default; + + /** + * @brief Move assignment operator + * @param[in] CLEmbeddingLookupKernel object to move + * */ + CLEmbeddingLookupKernel &operator=(CLEmbeddingLookupKernel &&) = default; + + /** + * @brief Destruct this object + * */ + ~CLEmbeddingLookupKernel() = default; + + /** + * @brief Set the input and output of the kernel + * @param[in] input Source tensor. + * Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 + * @param[out] output Destination tensor. Data type supported: Same as @p input + * @param[in] lookups Lookups are 1D tensor that values are indices into the first + * dimension of input. + * Data types supported: S32. + * @return N/A + */ + void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *lookups); + + /** + * @brief Static function to check if given info will lead to a valid configuration of @ref + * CLEmbeddingLookupKernel + * @param[in] input The input tensor info. + * Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 + * @param[in] output The output tensor info, Data types supported: same as @p input1. + * @param[in] lookups Lookups info. Data types supported: S32. + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, + const ITensorInfo *lookups); + + /** + * @brief Enqueue the OpenCL kernel to process the given window on the passed OpenCL command + * queue. + * @note The queue is *not* flushed by this method, and therefore the kernel will not have + * been executed by the time this method returns. + * @param[in] window Region on which to execute the kernel. (Must be a valid region of + * the window returned by window()). + * @param[in,out] queue Command queue on which to enqueue the kernel.@return N/A + * @return N/A + */ + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; /** Source tensor */ + ICLTensor *_output; /** Destination tensor */ + const ICLTensor *_lookups; /** Lookups tensor */ +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_CLEMBEDDINGLOOKUPKERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLExpKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLExpKernel.h new file mode 100644 index 000000000..a6ea539f8 --- /dev/null +++ b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLExpKernel.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __ARM_COMPUTE_CLEXPKERNEL_H__ +#define __ARM_COMPUTE_CLEXPKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to perform an exponential operation */ +class CLExpKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLExpKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLExpKernel(const CLExpKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLExpKernel &operator=(const CLExpKernel &) = delete; + /** Allow instances of this class to be moved */ + CLExpKernel(CLExpKernel &&) = default; + /** Allow instances of this class to be moved */ + CLExpKernel &operator=(CLExpKernel &&) = default; + /** Default destructor */ + ~CLExpKernel() = default; + /** Set the source, destination of the kernel + * + * @param[in] input Source tensor. Data type supported: F32. + * @param[out] output Destination tensor. Data type supported: F32. + */ + void configure(const ICLTensor *input, ICLTensor *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_CLEXPKERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLGatherKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLGatherKernel.h index a51441aca..7e35a80b0 100644 --- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLGatherKernel.h +++ b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLGatherKernel.h @@ -14,52 +14,85 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/** + * @file CLGatherKernel.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines CLGatherKernel class + */ + #ifndef __ARM_COMPUTE_CLGATHERKERNEL_H__ #define __ARM_COMPUTE_CLGATHERKERNEL_H__ #include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" namespace arm_compute { class ICLTensor; -/** Interface for the gather kernel. - * +/** + * @brief Class to define an interface for the gather kernel. */ class CLGatherKernel : public ICLKernel { public: - /** Default constructor.*/ + /** + * @brief Construct CLGatherKernel object + * */ CLGatherKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers). */ + + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers). + */ CLGatherKernel(const CLGatherKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers). */ + + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers). + */ CLGatherKernel &operator=(const CLGatherKernel &) = delete; - /** Allow instances of this class to be moved */ + + /** + * @brief Construct CLGatherKernel object by using default move constructor + * @param[in] CLGatherKernel object to move + */ CLGatherKernel(CLGatherKernel &&) = default; - /** Allow instances of this class to be moved */ + + /** + * @brief Move assignment operator + * @param[in] CLGatherKernel object to move + */ CLGatherKernel &operator=(CLGatherKernel &&) = default; - /** Initialise the kernel's input, output and border mode. - * + + /** + * @brief Initialise the kernel's input, output and border mode. * @param[in] input1 An input tensor. Data types supported: U8/S32/F32. * @param[in] input2 An input tensor. Data types supported: S32. * @param[out] output The output tensor, Data types supported: same as @p input1. + * @return N/A */ void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref + + /** + * @brief Static function to check if given info will lead to a valid configuration of @ref * CLGatherKernel - * * @param[in] input1 An input tensor. Data types supported: U8/S32/F32. * @param[in] input2 An input tensor. Data types supported: S32. * @param[out] output The output tensor, Data types supported: same as @p input1. - * * @return a status */ static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); - // Inherited methods overridden: + /** + * @brief Enqueue the OpenCL kernel to process the given window on the passed OpenCL command + * queue. + * @note The queue is *not* flushed by this method, and therefore the kernel will not have + * been executed by the time this method returns. + * @param[in] window Region on which to execute the kernel. (Must be a valid region of + * the window returned by window()). + * @param[in,out] queue Command queue on which to enqueue the kernel.@return N/A + * @return N/A + */ void run(const Window &window, cl::CommandQueue &queue) override; private: diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLHashtableLookupKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLHashtableLookupKernel.h new file mode 100644 index 000000000..c3fc15637 --- /dev/null +++ b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLHashtableLookupKernel.h @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file CLHashtableLookupKernel.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines CLHashtableLookupKernel class + */ + +#ifndef __ARM_COMPUTE_CLHASHTABLELOOKUPKERNEL_H__ +#define __ARM_COMPUTE_CLHASHTABLELOOKUPKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/runtime/CL/CLTensor.h" + +namespace arm_compute +{ +class ICLTensor; + +/** +* @brief Class to perform HashtableLookup operation with opencl kernel +*/ +class CLHashtableLookupKernel : public ICLKernel +{ +public: + /** + * @brief Construct a CLHashtableLookupKernel object + * */ + CLHashtableLookupKernel(); + + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers) + * */ + CLHashtableLookupKernel(const CLHashtableLookupKernel &) = delete; + + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers) + * */ + CLHashtableLookupKernel &operator=(const CLHashtableLookupKernel &) = delete; + + /** + * @brief Construct a CLHashtableLookupKernel object by using default move constructor + * @param[in] CLHashtableLookupKernel object to move + * */ + CLHashtableLookupKernel(CLHashtableLookupKernel &&) = default; + + /** + * @brief Move assignment operator + * @param[in] CLHashtableLookupKernel object to move + * */ + CLHashtableLookupKernel &operator=(CLHashtableLookupKernel &&) = default; + + /** + * @brief Destruct this object + * */ + ~CLHashtableLookupKernel() = default; + + /** + * @brief Set the input and output of the kernel + * @param[in] lookups Lookups 1D tensor that values are indices into the first dimension of + * input. + * @param[in] keys Keys 1D tensor. keys and input pair represent a map. + * Data types supported: S32 + * @param[in] input Source tensor. + * Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 + * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p + * input. + * @param[out] hits Hits 1D tensor. A boolean tensor that indicates whether the lookup hits + * (True) or not (False). Data types supported: U8/QASYMM8 + * @return N/A + */ + void configure(const ICLTensor *lookups, const ICLTensor *keys, const ICLTensor *input, + ICLTensor *output, ICLTensor *hits); + + /** + * @brief Static function to check if given info will lead to a valid configuration of @ref + * CLHashtableLookupKernel + * @param[in] lookups The lookups tensor info. Data types supported: S32. + * @param[in] keys The keys tensor info. keys and input pair represent a map. + * Data types supported: S32 + * @param[in] input The input tensor info. + * Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 + * @param[out] output The output tensor. Data types and data layouts supported: Same as @p + * input. + * @param[out] hits The hits tensor info. A boolean tensor that indicates whether the lookup + * hits + * (True) or not (False). Data types supported: U8/QASYMM8 + * @return a status + */ + static Status validate(const ITensorInfo *lookups, const ITensorInfo *keys, + const ITensorInfo *input, const ITensorInfo *output, + const ITensorInfo *hits); + + /** + * @brief Enqueue the OpenCL kernel to process the given window on the passed OpenCL command + * queue. + * @note The queue is *not* flushed by this method, and therefore the kernel will not have + * been executed by the time this method returns. + * @param[in] window Region on which to execute the kernel. (Must be a valid region of + * the window returned by window()). + * @param[in,out] queue Command queue on which to enqueue the kernel.@return N/A + * @return N/A + */ + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_lookups; /** Lookups tensor */ + const ICLTensor *_keys; /** Keys tensor */ + const ICLTensor *_input; /** Source tensor */ + ICLTensor *_output; /** Destination tensor */ + ICLTensor *_hits; /** Hits tensor */ + std::unique_ptr<CLTensor> _lookup_indices{nullptr}; /** Lookup indices tensor */ +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_CLHASHTABLELOOKUPKERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLNegKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLNegKernel.h new file mode 100644 index 000000000..ccbea147e --- /dev/null +++ b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLNegKernel.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __ARM_COMPUTE_CLNEGKERNEL_H__ +#define __ARM_COMPUTE_CLNEGKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to perform a negation operation on tensor*/ +class CLNegKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLNegKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLNegKernel(const CLNegKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLNegKernel &operator=(const CLNegKernel &) = delete; + /** Allow instances of this class to be moved */ + CLNegKernel(CLNegKernel &&) = default; + /** Allow instances of this class to be moved */ + CLNegKernel &operator=(CLNegKernel &&) = default; + /** Initialize the kernel's input, output. + * + * @param[in] input Source tensor. + * @param[out] output Destination tensor. + */ + void configure(const ICLTensor *input, ICLTensor *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_CLNEGKERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLNormalizationLayerExKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLNormalizationLayerExKernel.h new file mode 100644 index 000000000..181a6226a --- /dev/null +++ b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLNormalizationLayerExKernel.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __ARM_COMPUTE_CLNORMALIZATIONLAYEREXKERNEL_H__ +#define __ARM_COMPUTE_CLNORMALIZATIONLAYEREXKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the normalization layer kernel. + */ +class CLNormalizationLayerExKernel : public ICLKernel +{ +public: + /** Constructor */ + CLNormalizationLayerExKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLNormalizationLayerExKernel(const CLNormalizationLayerExKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLNormalizationLayerExKernel &operator=(const CLNormalizationLayerExKernel &) = delete; + /** Default Move Constructor. */ + CLNormalizationLayerExKernel(CLNormalizationLayerExKernel &&) = default; + /** Default move assignment operator */ + CLNormalizationLayerExKernel &operator=(CLNormalizationLayerExKernel &&) = default; + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions + * [width, height, IFM], + * and an optional 4th dimension for batch of inputs. Data types supported: + * F16/F32. + * @param[out] output Destination tensor. Output will have the same number of dimensions as + * input. Data types supported: same as @p input. + * @param[in] norm_info Normalization layer information like the normalization type, + * normalization size and other parameters. + */ + void configure(const ICLTensor *input, ICLTensor *output, NormalizationLayerInfo norm_info); + /** Static function to check if given info will lead to a valid configuration of @ref + * CLNormalizationLayerKernel + * + * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions + * [width, height, IFM], + * and an optional 4th dimension for batch of inputs. Data types supported: + * F16/F32. + * @param[in] output Destination tensor. Output will have the same number of dimensions as + * input. Data types supported: same as @p input. + * @param[in] norm_info Normalization layer information like the normalization type, normalization + * size and other parameters. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, + NormalizationLayerInfo norm_info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + BorderSize _border_size; + bool _is_in_map; +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_CLNORMALIZATIONLAYEREXKERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPReLUKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPReLUKernel.h new file mode 100644 index 000000000..eff1b8bd5 --- /dev/null +++ b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPReLUKernel.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __ARM_COMPUTE_CLPRELU_KERNEL_H__ +#define __ARM_COMPUTE_CLPRELU_KERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to calculate PReLU*/ +class CLPReLUKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLPReLUKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLPReLUKernel(const CLPReLUKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLPReLUKernel &operator=(const CLPReLUKernel &) = delete; + /** Allow instances of this class to be moved */ + CLPReLUKernel(CLPReLUKernel &&) = default; + /** Allow instances of this class to be moved */ + CLPReLUKernel &operator=(CLPReLUKernel &&) = default; + /** Initialize the kernel's input, output. + * + * @param[in] input Source tensor1. + * @param[in] alpha Source tensor2. + * @param[out] output Output tensor. + */ + void configure(const ICLTensor *input, const ICLTensor *alpha, ICLTensor *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + + BorderSize border_size() const override; + +private: + const ICLTensor *_input; + const ICLTensor *_alpha; + ICLTensor *_output; +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_CLPRELU_KERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPadLayerKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPadLayerKernel.h new file mode 100644 index 000000000..cbaa2adee --- /dev/null +++ b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPadLayerKernel.h @@ -0,0 +1,60 @@ +/* +* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved +* Copyright (c) 2016-2018 ARM Limited. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ +#ifndef __ARM_COMPUTE_CLPADLAYERKERNEL_H__ +#define __ARM_COMPUTE_CLPADLAYERKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to perform PAD operation */ +class CLPadLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLPadLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLPadLayerKernel(const CLPadLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLPadLayerKernel &operator=(const CLPadLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLPadLayerKernel(CLPadLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLPadLayerKernel &operator=(CLPadLayerKernel &&) = default; + /** Default destructor */ + ~CLPadLayerKernel() = default; + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. + * @param[in] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. + * @param[in] pad_size Padding Size tensor. Data types supported : S32 + */ + void configure(const ICLTensor *input, ICLTensor *output, ICLTensor *pad_size); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; /**< Source tensor */ + ICLTensor *_output; /**< Destination tensor */ + ICLTensor *_pad_size; /**< Padding Size tensor */ +}; + +} // namespace arm_compute +#endif /* __ARM_COMPUTE_CLPADLAYERKERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPermuteExKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPermuteExKernel.h new file mode 100644 index 000000000..3434deee8 --- /dev/null +++ b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPermuteExKernel.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __ARM_COMPUTE_CLPERMUTEEXKERNEL_H__ +#define __ARM_COMPUTE_CLPERMUTEEXKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to perform tensor permutation. + * + * Permutes given a permutation vector + */ +class CLPermuteExKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLPermuteExKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLPermuteExKernel(const CLPermuteExKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLPermuteExKernel &operator=(const CLPermuteExKernel &) = delete; + /** Allow instances of this class to be moved */ + CLPermuteExKernel(CLPermuteExKernel &&) = default; + /** Allow instances of this class to be moved */ + CLPermuteExKernel &operator=(CLPermuteExKernel &&) = default; + /** Set the input and output of the kernel. + * + * @param[in] input The input tensor to permute. Data types supported: + * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 + * @param[in] output The output tensor. Data types supported: Same as @p input + * @param[in] perm Permutation vector + */ + void configure(const ICLTensor *input, ICLTensor *output, const PermutationVector &perm); + /** Static function to check if given info will lead to a valid configuration of @ref + * CLPermuteKernel + * + * @param[in] input First tensor input info. Data types supported: + * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] output Output tensor info. Data types supported: same as @p input. + * @param[in] perm Permutation vector + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, + const PermutationVector &perm); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + PermutationVector _perm; +}; +} // arm_compute +#endif /*__ARM_COMPUTE_CLPERMUTEEXKERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPixelWiseDivisionKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPixelWiseDivisionKernel.h index cd2b255bc..d579f5d8f 100644 --- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPixelWiseDivisionKernel.h +++ b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPixelWiseDivisionKernel.h @@ -14,68 +14,106 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/** + * @file CLPixelWiseDivisionKernel.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines CLPixelWiseDivisionKernel class + */ + #ifndef __ARM_COMPUTE_CLPIXELWISEDIVISIONKERNEL_H__ #define __ARM_COMPUTE_CLPIXELWISEDIVISIONKERNEL_H__ #include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" namespace arm_compute { class ICLTensor; -/** Interface for the pixelwise division kernel. - * +/** + * @brief Interface for the pixelwise division kernel. */ class CLPixelWiseDivisionKernel : public ICLKernel { public: - /** Default constructor.*/ + /** + * @brief Construct a CLPixelWiseDivisionKernel object + */ CLPixelWiseDivisionKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers). */ + + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers). + */ CLPixelWiseDivisionKernel(const CLPixelWiseDivisionKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers). */ + + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers). + */ CLPixelWiseDivisionKernel &operator=(const CLPixelWiseDivisionKernel &) = delete; - /** Allow instances of this class to be moved */ + + /** + * @brief Construct a CLPixelWiseDivisionKernel object by using move constructor + * @param[in] CLPixelWiseDivisionKernel object to move + */ CLPixelWiseDivisionKernel(CLPixelWiseDivisionKernel &&) = default; - /** Allow instances of this class to be moved */ + + /** + * @brief Allow instances of this class to be moved + * @param[in] CLPixelWiseDivisionKernel object to move + */ CLPixelWiseDivisionKernel &operator=(CLPixelWiseDivisionKernel &&) = default; - /** Initialise the kernel's input, output and border mode. - * - * @param[in] input1 An input tensor. Data types supported: U8/QS8/QS16/S16/F16/F32. + + /** + * @brief Initialise the kernel's input, output and border mode. + * @param[in] input1 An input tensor. Data types supported: U8/S16/F16/F32. * @param[in] input2 An input tensor. Data types supported: same as @p input1. * @param[out] output The output tensor, Data types supported: same as @p input1. Note: - * U8 (QS8, QS16) requires both inputs to be U8 (QS8, QS16). + * U8 requires both inputs to be U8. * @param[in] scale Scale to apply after division. * Scale must be positive and its value must be either 1/255 or 1/2^n - * where n is between 0 and 15. For QS8 and QS16 scale must be 1. + * where n is between 0 and 15. * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest * even. + * @return N/A */ void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy); - /** Static function to check if given info will lead to a valid configuration of @ref + + /** + * @brief Static function to check if given info will lead to a valid configuration of @ref * CLPixelWiseDivisionKernel - * - * @param[in] input1 An input tensor info. Data types supported: U8/QS8/QS16/S16/F16/F32. + * @param[in] input1 An input tensor info. Data types supported: U8/S16/F16/F32. * @param[in] input2 An input tensor info. Data types supported: same as @p input1. * @param[in] output The output tensor info, Data types supported: same as @p input1. - * Note: U8 (QS8, QS16) requires both inputs to be U8 (QS8, QS16). + * Note: U8 requires both inputs to be U8. * @param[in] scale Scale to apply after division. * Scale must be positive and its value must be either 1/255 or 1/2^n - * where n is between 0 and 15. For QS8 and QS16 scale must be 1. + * where n is between 0 and 15. * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even. - * * @return a status */ static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy); - // Inherited methods overridden: + /** + * @brief Enqueue the OpenCL kernel to process the given window on the passed OpenCL command + * queue. + * @note The queue is *not* flushed by this method, and therefore the kernel will not have + * been executed by the time this method returns. + * @param[in] window Region on which to execute the kernel. (Must be a valid region of + * the window returned by window()). + * @param[in,out] queue Command queue on which to enqueue the kernel.@return N/A + * @return N/A + */ void run(const Window &window, cl::CommandQueue &queue) override; + + /** + * @brief The size of the border for that kernel + * @return The width in number of elements of the border. + */ BorderSize border_size() const override; private: diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLReduceMaxKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLReduceMaxKernel.h deleted file mode 100644 index a7d96cc5c..000000000 --- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLReduceMaxKernel.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __ARM_COMPUTE_CLREDUCEMAXKERNEL_H__ -#define __ARM_COMPUTE_CLREDUCEMAXKERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the pixelwise division kernel. - * - */ -class CLReduceMaxKernel : public ICLKernel -{ -public: - /** Default constructor.*/ - CLReduceMaxKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers). */ - CLReduceMaxKernel(const CLReduceMaxKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers). */ - CLReduceMaxKernel &operator=(const CLReduceMaxKernel &) = delete; - /** Allow instances of this class to be moved */ - CLReduceMaxKernel(CLReduceMaxKernel &&) = default; - /** Allow instances of this class to be moved */ - CLReduceMaxKernel &operator=(CLReduceMaxKernel &&) = default; - /** Initialise the kernel's input, output and border mode. - * - * @param[in] input An input tensor. Data types supported: U8/QS8/QS16/S16/F16/F32. - * @param[in] axis Axis to reduce - * @param[out] output The output tensor, Data types supported: same as @p input1. Note: - * U8 (QS8, QS16) requires both inputs to be U8 (QS8, QS16). - */ - void configure(const ICLTensor *input, int32_t axis, ICLTensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref - * CLReduceMaxKernel - * - * @param[in] input An input tensor info. Data types supported: U8/QS8/QS16/S16/F16/F32. - * @param[in] axis Axis to reduce - * @param[in] output The output tensor info, Data types supported: same as @p input1. - * Note: U8 (QS8, QS16) requires both inputs to be U8 (QS8, QS16). - * - * @return a status - */ - static Status validate(const ITensorInfo *input, int32_t axis, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - void run_on_cpu(cl::CommandQueue &queue); - -private: - const ICLTensor *_input; - ICLTensor *_output; - int32_t _axis; -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLREDUCEMAXKERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLReduceOperationKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLReduceOperationKernel.h new file mode 100644 index 000000000..a26a4a7fc --- /dev/null +++ b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLReduceOperationKernel.h @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file CLReduceOperationKernel.h + * @brief This file defines CLReduceOperationKernel class + * @ingroup COM_AI_RUNTIME + */ + +#ifndef __ARM_COMPUTE_CLREDUCEOPERATIONKERNEL_H__ +#define __ARM_COMPUTE_CLREDUCEOPERATIONKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/TypesEx.h" + +namespace arm_compute +{ +class ICLTensor; + +/** + * @brief Class to define interface for the reduce operation kernel + */ +class CLReduceOperationKernel : public ICLKernel +{ +public: + /** + * @brief Default constructor + */ + CLReduceOperationKernel(); + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers) + */ + CLReduceOperationKernel(const CLReduceOperationKernel &) = delete; + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers) + */ + CLReduceOperationKernel &operator=(const CLReduceOperationKernel &) = delete; + /** + * @brief Allow instances of this class to be moved + */ + CLReduceOperationKernel(CLReduceOperationKernel &&) = default; + /** + * @brief Allow instances of this class to be moved + */ + CLReduceOperationKernel &operator=(CLReduceOperationKernel &&) = default; + /** + * @brief Default destructor + */ + ~CLReduceOperationKernel() = default; + + /** + * @brief Set the input and output tensors. + * @param[in] input Source tensor. Data types supported: U8/S32/F32. + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * Output will have the same number of dimensions as input. + * @param[in] axis Axis along which to reduce. + * @param[in] op Reduce operation to perform. + * @return N/A + */ + void configure(const ICLTensor *input, ICLTensor *output, const uint32_t axis, + ReduceOperation op); + + /** + * @brief Static function to check if given info will lead to a valid configuration of @ref + * CLReduceOperationKernel. + * @param[in] input Source tensor info. Data types supported: U8/S32/F32. + * @param[in] output Destination tensor info. Data types supported: Same as @p input. + * Output will have the same number of dimensions as input. + * @param[in] axis Axis along which to reduce. + * @param[in] op Reduce operation to perform. + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const uint32_t axis, + ReduceOperation op); + + /* + * @brief Run CLReduceOperationKernel op + * @param[in] window Window to be used for in_slice + * @param[in] queue CLQueue + * @return N/A + */ + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + uint32_t _axis; +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_CLREDUCEOPERATIONKERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLReductionMeanKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLReductionMeanKernel.h deleted file mode 100644 index de9df3381..000000000 --- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLReductionMeanKernel.h +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __ARM_COMPUTE_CLREDUCTIONMEANKERNEL_H__ -#define __ARM_COMPUTE_CLREDUCTIONMEANKERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the reduction operation kernel */ -class CLReductionMeanKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLReductionMeanKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLReductionMeanKernel(const CLReductionMeanKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLReductionMeanKernel &operator=(const CLReductionMeanKernel &) = delete; - /** Allow instances of this class to be moved */ - CLReductionMeanKernel(CLReductionMeanKernel &&) = default; - /** Allow instances of this class to be moved */ - CLReductionMeanKernel &operator=(CLReductionMeanKernel &&) = default; - /** Default destructor */ - ~CLReductionMeanKernel() = default; - - /** Set the input and output tensors. - * - * @param[in] input Source tensor. Data types supported: F32. Data layouts supported: NCHW. - * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input. - * Output will have the same number of dimensions as input. - * @param[in] axis Axis along which to reduce. Supported reduction axis : 0, 1 - */ - void configure(const ICLTensor *input, ICLTensor *output, std::vector<uint32_t> axis); - - /** Static function to check if given info will lead to a valid configuration of @ref - * CLReductionMeanKernel. - * - * @param[in] input Source tensor info. Data types supported: F32. Data layouts supported: NCHW. - * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p - * input. - * Output will have the same number of dimensions as input. - * @param[in] axis Axis along which to reduce. Supported reduction axis : 0, 1 - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, - std::vector<uint32_t> axis); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - const ICLTensor *_input; - ICLTensor *_output; - std::vector<uint32_t> _reduction_axis; - BorderSize _border_size; -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLREDUCTIONMEANKERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToBatchNDKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToBatchNDKernel.h new file mode 100644 index 000000000..68534f1ab --- /dev/null +++ b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToBatchNDKernel.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __ARM_COMPUTE_CLSPACE_TO_BATCH_ND_KERNEL_H__ +#define __ARM_COMPUTE_CLSPACE_TO_BATCH_ND_KERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to perform SPACE_TO_BATCH_ND operation */ +class CLSpaceToBatchNDKernel final : public ICLKernel +{ +public: + /** Default constructor */ + CLSpaceToBatchNDKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLSpaceToBatchNDKernel(const CLSpaceToBatchNDKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLSpaceToBatchNDKernel &operator=(const CLSpaceToBatchNDKernel &) = delete; + /** Allow instances of this class to be moved */ + CLSpaceToBatchNDKernel(CLSpaceToBatchNDKernel &&) = default; + /** Allow instances of this class to be moved */ + CLSpaceToBatchNDKernel &operator=(CLSpaceToBatchNDKernel &&) = default; + /** Default destructor */ + ~CLSpaceToBatchNDKernel() = default; + /** Initialise the kernel's input and output. + * + * @note The data layout of input and output must be the same. + * @note The number of dimensions of input and output must be 4, and `spatial` dimensions + * are height and width. + * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/F16/S32/F32. + * Data layout supported: NCHW/NHWC + * @param[in] block_size Block size tensor. Data types supported: S32. + * @param[in] padding_size Padding size tensor. Data types supported: S32. + * @param[out] output Output tensor. Data types supported: U8/QASYMM8/S16/F16/S32/F32. + * Data layout supported: NCHW/NHWC + */ + void configure(const ICLTensor *input, const ICLTensor *block_size, const ICLTensor *padding_size, + ICLTensor *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; /**< Source tensor */ + const ICLTensor *_block_size; /**< Block size tensor */ + const ICLTensor *_padding_size; /**< Padding size tensor */ + ICLTensor *_output; /**< Destination tensor */ +}; + +} // namespace arm_compute + +#endif /* __ARM_COMPUTE_CLSPACE_TO_BATCH_ND_KERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h new file mode 100644 index 000000000..be845a549 --- /dev/null +++ b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __ARM_COMPUTE_CLSPACETODEPTHKERNEL_H__ +#define __ARM_COMPUTE_CLSPACETODEPTHKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to perform spaceTodepth operation */ +class CLSpaceToDepthKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLSpaceToDepthKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLSpaceToDepthKernel(const CLSpaceToDepthKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLSpaceToDepthKernel &operator=(const CLSpaceToDepthKernel &) = delete; + /** Allow instances of this class to be moved */ + CLSpaceToDepthKernel(CLSpaceToDepthKernel &&) = default; + /** Allow instances of this class to be moved */ + CLSpaceToDepthKernel &operator=(CLSpaceToDepthKernel &&) = default; + /** Default destructor */ + ~CLSpaceToDepthKernel() = default; + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. + * @param[in] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. + */ + void configure(const ICLTensor *input, ICLTensor *output, const int32_t block_size); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; /**< Source tensor */ + ICLTensor *_output; /**< Destination tensor */ +}; + +} // namespace arm_compute +#endif /* __ARM_COMPUTE_CLSPACETODEPTHKERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLSquaredDifferenceKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLSquaredDifferenceKernel.h new file mode 100644 index 000000000..a4c44e35d --- /dev/null +++ b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLSquaredDifferenceKernel.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __ARM_COMPUTE_CLSQUARED_DIFFERENCE_KERNEL_H__ +#define __ARM_COMPUTE_CLSQUARED_DIFFERENCE_KERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to return squared difference value of two tensors (x-y)^2*/ +class CLSquaredDifferenceKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLSquaredDifferenceKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLSquaredDifferenceKernel(const CLSquaredDifferenceKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLSquaredDifferenceKernel &operator=(const CLSquaredDifferenceKernel &) = delete; + /** Allow instances of this class to be moved */ + CLSquaredDifferenceKernel(CLSquaredDifferenceKernel &&) = default; + /** Allow instances of this class to be moved */ + CLSquaredDifferenceKernel &operator=(CLSquaredDifferenceKernel &&) = default; + /** Initialize the kernel's input, output. + * + * @param[in] input1 Source tensor1. + * @param[in] input2 Source tensor2. + * @param[out] output Output tensor. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + + BorderSize border_size() const override; + +private: + const ICLTensor *_input1; + const ICLTensor *_input2; + ICLTensor *_output; +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_CLSQUARED_DIFFERENCE_KERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLStridedSliceKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLStridedSliceExKernel.h index 248ae6635..6368c380e 100644 --- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLStridedSliceKernel.h +++ b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLStridedSliceExKernel.h @@ -14,36 +14,64 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#ifndef __ARM_COMPUTE_CLSTRIDEDSLICEKERNEL_H__ -#define __ARM_COMPUTE_CLSTRIDEDSLICEKERNEL_H__ + +/** + * @file CLStridedSliceExKernel.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines CLStridedSliceExKernel class + */ + +#ifndef __ARM_COMPUTE_CLSTRIDEDSLICEEXKERNEL_H__ +#define __ARM_COMPUTE_CLSTRIDEDSLICEEXKERNEL_H__ #include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" namespace arm_compute { class ICLTensor; -/** Interface for the kernel to extract a strided slice of a tensor */ -class CLStridedSliceKernel : public ICLKernel +/** +* @brief Class to define an interface for the kernel to extract a strided slice of a tensor +*/ +class CLStridedSliceExKernel : public ICLKernel { public: - /** Default constructor */ - CLStridedSliceKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLStridedSliceKernel(const CLStridedSliceKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLStridedSliceKernel &operator=(const CLStridedSliceKernel &) = delete; - /** Allow instances of this class to be moved */ - CLStridedSliceKernel(CLStridedSliceKernel &&) = default; - /** Allow instances of this class to be moved */ - CLStridedSliceKernel &operator=(CLStridedSliceKernel &&) = default; - /** Default destructor */ - ~CLStridedSliceKernel() = default; - /** Set the input and output of the kernel - * + /** + * @brief Construct a CLStridedSliceExKernel object + * */ + CLStridedSliceExKernel(); + + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers) + * */ + CLStridedSliceExKernel(const CLStridedSliceExKernel &) = delete; + + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers) + * */ + CLStridedSliceExKernel &operator=(const CLStridedSliceExKernel &) = delete; + + /** + * @brief Construct a CLStridedSliceExKernel object by using default move constructor + * @param[in] CLStridedSliceExKernel object to move + * */ + CLStridedSliceExKernel(CLStridedSliceExKernel &&) = default; + + /** + * @brief Move assignment operator + * @param[in] CLStridedSliceExKernel object to move + * */ + CLStridedSliceExKernel &operator=(CLStridedSliceExKernel &&) = default; + + /** + * @brief Destruct this object + * */ + ~CLStridedSliceExKernel() = default; + + /** + * @brief Set the input and output of the kernel * @param[in] input Source tensor. Data type supported: - * U8/S8/QS8/QASYMM8/U16/S16/QS16/U32/S32/F16/F32 + * U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 * @param[out] output Destination tensor. Data type supported: Same as @p input * @param[in] beginData The begin tensor. Data types supported: S32. * The number of dimensions must be 1. @@ -57,17 +85,17 @@ public: * @param[in] beginMask Mask for begin * @param[in] endMask Mask for end * @param[in] shrinkAxisMask Mask for shrink axis. - * + * @return N/A */ void configure(const ICLTensor *input, ICLTensor *output, ICLTensor *beginData, ICLTensor *endData, ICLTensor *stridesData, int32_t beginMask, int32_t endMask, int32_t shrinkAxisMask); - /** Static function to check if given info will lead to a valid configuration of @ref - * CLStridedSliceKernel - * + /** + * @brief Static function to check if given info will lead to a valid configuration of @ref + * CLStridedSliceExKernel * @param[in] input The input tensor info. Data types supported: - * U8/S8/QS8/QASYMM8/U16/S16/QS16/U32/S32/F16/F32 + * U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 * @param[in] output The output tensor info, Data types supported: same as @p input1. * @param[in] begin The begin tensor info. Data types supported: S32. * The number of dimensions must be 1. @@ -81,7 +109,6 @@ public: * @param[in] beginMask Mask for begin * @param[in] endMask Mask for end * @param[in] shrinkAxisMask Mask for shrink axis. - * * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *output, @@ -89,7 +116,16 @@ public: const ITensorInfo *stride, int32_t beginMask, int32_t endMask, int32_t shrinkAxisMask); - // Inherited methods overridden: + /** + * @brief Enqueue the OpenCL kernel to process the given window on the passed OpenCL command + * queue. + * @note The queue is *not* flushed by this method, and therefore the kernel will not have + * been executed by the time this method returns. + * @param[in] window Region on which to execute the kernel. (Must be a valid region of + * the window returned by window()). + * @param[in,out] queue Command queue on which to enqueue the kernel.@return N/A + * @return N/A + */ void run(const Window &window, cl::CommandQueue &queue) override; private: @@ -103,4 +139,4 @@ private: int32_t _shrinkAxisMask; /** Shrink axis mask */ }; } // namespace arm_compute -#endif /*__ARM_COMPUTE_CLSTRIDEDSLICEKERNEL_H__ */ +#endif /*__ARM_COMPUTE_CLSTRIDEDSLICEEXKERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLTopKV2Kernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLTopKV2Kernel.h index 5c567f38e..eb2bad254 100644 --- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLTopKV2Kernel.h +++ b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLTopKV2Kernel.h @@ -14,14 +14,18 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/** + * @file CLTopKV2Kernel.h + * @brief This file defines classes for TopKV2Kernel + * @ingroup COM_AI_RUNTIME + */ + #ifndef __ARM_COMPUTE_CLTOPKV2KERNEL_H__ #define __ARM_COMPUTE_CLTOPKV2KERNEL_H__ -#include "arm_compute/core/CL/ICLArray.h" #include "arm_compute/core/CL/ICLKernel.h" -#include <array> - // these parameters can be changed #define _ITEMS 16 // number of items in a group #define _GROUPS 4 // the number of virtual processors is _ITEMS * _GROUPS @@ -33,24 +37,59 @@ namespace arm_compute { class ICLTensor; +/** + * @brief Class to define CLTopKV2Single + */ class CLTopKV2Single : public ICLKernel { public: - /** Constructor */ + /** + * @brief Constructor + */ CLTopKV2Single(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers). + * @param [in] copiedInstance Const reference of CLTopKV2Single to be copied + */ CLTopKV2Single(const CLTopKV2Single &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers). + * @param [in] copiedInstance Const reference of CLTopKV2Single to be copied + * @return Reference of this instance + */ CLTopKV2Single &operator=(const CLTopKV2Single &) = delete; - /** Allow instances of this class to be moved */ + /** + * @brief Allow instances of this class to be moved + * @param [in] movedInstance Rvalue reference of CLTopKV2Single to be moved + */ CLTopKV2Single(CLTopKV2Single &&) = default; - /** Allow instances of this class to be moved */ + /** + * @brief Allow instances of this class to be moved + * @param [in] movedInstance Rvalue reference of CLTopKV2Single to be moved + * @return Reference of this instance + */ CLTopKV2Single &operator=(CLTopKV2Single &&) = default; + /** + * @brief Initialise kernel with params + * @param[in] input An input tensor + * @param[in] topk_values Values of the top k predictions + * @param[in] topk_indices Indices of the top k predictions + * @param[in] indices Indices + * @param[in] temp_stack Temp stack + * @param[in] k K of the top k predictions + * @param[in] n Number times to quick-sort + * return N/A + */ void configure(ICLTensor *input, ICLTensor *topk_values, ICLTensor *topk_indices, cl::Buffer *indices, cl::Buffer *temp_stack, int k, int n); - // Inherited methods overridden: + /* + * @brief Run CLTopKV2Single op + * @param[in] window Window to be used for in_slice + * @param[in] queue cl::CommandQueue + * @return N/A + */ void run(const Window &window, cl::CommandQueue &queue) override; private: @@ -59,52 +98,121 @@ private: ICLTensor *_topk_indices; }; +/** + * @brief Class to define CLTopKV2Init + */ class CLTopKV2Init : public ICLKernel { public: - /** Constructor */ + /** + * @brief Constructor + */ CLTopKV2Init(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers). + * @param [in] copiedInstance Const reference of CLTopKV2Init to be copied + */ CLTopKV2Init(const CLTopKV2Init &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers). + * @param [in] copiedInstance Const reference of CLTopKV2Init to be copied + * @return Reference of this instance + */ CLTopKV2Init &operator=(const CLTopKV2Init &) = delete; - /** Allow instances of this class to be moved */ + /** + * @brief Allow instances of this class to be moved + * @param [in] movedInstance Rvalue reference of CLTopKV2Init to be moved + */ CLTopKV2Init(CLTopKV2Init &&) = default; - /** Allow instances of this class to be moved */ + /** + * @brief Allow instances of this class to be moved + * @param [in] movedInstance Rvalue reference of CLTopKV2Init to be moved + * @return Reference of this instance + */ CLTopKV2Init &operator=(CLTopKV2Init &&) = default; + /** + * @brief Initialise kernel with params + * @param[in] input An input tensor + * @param[in] in_key_buf Buffer of input key + * @param[in] in_ind_buf Buffer of input index + * @param[in] n Number times to quick-sort + * return N/A + */ void configure(ICLTensor *input, cl::Buffer *in_key_buf, cl::Buffer *in_ind_buf, int n); - // Inherited methods overridden: + /* + * @brief Run CLTopKV2Init op + * @param[in] window Window to be used for in_slice + * @param[in] queue cl::CommandQueue + * @return N/A + */ void run(const Window &window, cl::CommandQueue &queue) override; private: ICLTensor *_input; }; +/** + * @brief Class to define CLRadixSortHistogram + */ class CLRadixSortHistogram : public ICLKernel { public: - /** Constructor */ + /** + * @brief Constructor + */ CLRadixSortHistogram(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers). + * @param [in] copiedInstance Const reference of CLRadixSortHistogram to be copied + */ CLRadixSortHistogram(const CLRadixSortHistogram &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers). + * @param [in] copiedInstance Const reference of CLRadixSortHistogram to be copied + * @return Reference of this instance + */ CLRadixSortHistogram &operator=(const CLRadixSortHistogram &) = delete; - /** Allow instances of this class to be moved */ + /** + * @brief Allow instances of this class to be moved + * @param [in] movedInstance Rvalue reference of CLRadixSortHistogram to be moved + */ CLRadixSortHistogram(CLRadixSortHistogram &&) = default; - /** Allow instances of this class to be moved */ + /** + * @brief Allow instances of this class to be moved + * @param [in] movedInstance Rvalue reference of CLRadixSortHistogram to be moved + * @return Reference of this instance + */ CLRadixSortHistogram &operator=(CLRadixSortHistogram &&) = default; + /** + * @brief Initialise kernel with params + * @param[out] hist_buf Buffer of histogram + * @param[in] bits Number of bits to be used for radix sort + * @param[in] n Integer number size to sort + * return N/A + */ void configure(cl::Buffer *hist_buf, int bits, int n); + /** + * @brief Set pass + * @param[in] pass Passes made of in radix sort algorithm + * @param[in] in_key_buf Buffer of input key + * return N/A + */ void setPass(int pass, cl::Buffer *in_key_buf) { _pass = pass; _in_key_buf = in_key_buf; } - // Inherited methods overridden: + /* + * @brief Run CLRadixSortHistogram op + * @param[in] window Window to be used for in_slice + * @param[in] queue cl::CommandQueue + * @return N/A + */ void run(const Window &window, cl::CommandQueue &queue) override; private: @@ -112,82 +220,210 @@ private: cl::Buffer *_in_key_buf; }; +/** + * @brief Class to define CLRadixSortScanHistogram + */ class CLRadixSortScanHistogram : public ICLKernel { public: - /** Constructor */ + /** + * @brief Constructor + */ CLRadixSortScanHistogram(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers). + * @param [in] copiedInstance Const reference of CLRadixSortScanHistogram to be copied + */ CLRadixSortScanHistogram(const CLRadixSortScanHistogram &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers). + * @param [in] copiedInstance Const reference of CLRadixSortScanHistogram to be copied + * @return Reference of this instance + */ CLRadixSortScanHistogram &operator=(const CLRadixSortScanHistogram &) = delete; - /** Allow instances of this class to be moved */ + /** + * @brief Allow instances of this class to be moved + * @param [in] movedInstance Rvalue reference of CLRadixSortScanHistogram to be moved + */ CLRadixSortScanHistogram(CLRadixSortScanHistogram &&) = default; - /** Allow instances of this class to be moved */ + /** + * @brief Allow instances of this class to be moved + * @param [in] movedInstance Rvalue reference of CLRadixSortScanHistogram to be moved + * @return Reference of this instance + */ CLRadixSortScanHistogram &operator=(CLRadixSortScanHistogram &&) = default; + /** + * @brief Initialise kernel with params + * @param[out] hist_buf Buffer of histogram + * @param[out] glob_sum_buf Buffer of global sum + * @param[in] bits Number of bits to be used for radix sort + * return N/A + */ void configure(cl::Buffer *hist_buf, cl::Buffer *glob_sum_buf, int bits); - // Inherited methods overridden: + /* + * @brief Run CLRadixSortScanHistogram op + * @param[in] window Window to be used for in_slice + * @param[in] queue cl::CommandQueue + * @return N/A + */ void run(const Window &window, cl::CommandQueue &queue) override; }; +/** + * @brief Class to define CLRadixSortGlobalScanHistogram + */ class CLRadixSortGlobalScanHistogram : public ICLKernel { public: - /** Constructor */ + /** + * @brief Constructor + */ CLRadixSortGlobalScanHistogram(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers). + * @param [in] copiedInstance Const reference of CLRadixSortGlobalScanHistogram to be copied + */ CLRadixSortGlobalScanHistogram(const CLRadixSortGlobalScanHistogram &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers). + * @param [in] copiedInstance Const reference of CLRadixSortGlobalScanHistogram to be copied + * @return Reference of this instance + */ CLRadixSortGlobalScanHistogram &operator=(const CLRadixSortGlobalScanHistogram &) = delete; - /** Allow instances of this class to be moved */ + /** + * @brief Allow instances of this class to be moved + * @param [in] movedInstance Rvalue reference of CLRadixSortGlobalScanHistogram to be moved + */ CLRadixSortGlobalScanHistogram(CLRadixSortGlobalScanHistogram &&) = default; - /** Allow instances of this class to be moved */ + /** + * @brief Allow instances of this class to be moved + * @param [in] movedInstance Rvalue reference of CLRadixSortGlobalScanHistogram to be moved + * @return Reference of this instance + */ CLRadixSortGlobalScanHistogram &operator=(CLRadixSortGlobalScanHistogram &&) = default; + /** + * @brief Initialise kernel with params + * @param[out] glob_sum_buf Buffer of global sum + * @param[out] temp_buf Temp buffer to be used while RadixSortGlobalScanHistogram + * @param[in] bits Number of bits to be used for radix sort + * return N/A + */ void configure(cl::Buffer *glob_sum_buf, cl::Buffer *temp_buf, int bits); - // Inherited methods overridden: + /* + * @brief Run CLRadixSortGlobalScanHistogram op + * @param[in] window Window to be used for in_slice + * @param[in] queue cl::CommandQueue + * @return N/A + */ void run(const Window &window, cl::CommandQueue &queue) override; }; +/** + * @brief Class to define CLRadixSortPasteHistogram + */ class CLRadixSortPasteHistogram : public ICLKernel { public: - /** Constructor */ + /** + * @brief Constructor + */ CLRadixSortPasteHistogram(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers). + * @param [in] copiedInstance Const reference of CLRadixSortPasteHistogram to be copied + */ CLRadixSortPasteHistogram(const CLRadixSortPasteHistogram &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers). + * @param [in] copiedInstance Const reference of CLRadixSortPasteHistogram to be copied + * @return Reference of this instance + */ CLRadixSortPasteHistogram &operator=(const CLRadixSortPasteHistogram &) = delete; - /** Allow instances of this class to be moved */ + /** + * @brief Allow instances of this class to be moved + * @param [in] movedInstance Rvalue reference of CLRadixSortPasteHistogram to be moved + */ CLRadixSortPasteHistogram(CLRadixSortPasteHistogram &&) = default; - /** Allow instances of this class to be moved */ + /** + * @brief Allow instances of this class to be moved + * @param [in] movedInstance Rvalue reference of CLRadixSortPasteHistogram to be moved + * @return Reference of this instance + */ CLRadixSortPasteHistogram &operator=(CLRadixSortPasteHistogram &&) = default; + /** + * @brief Initialise kernel with params + * @param[out] hist_buf Buffer of histogram + * @param[out] glob_sum_buf Buffer of global sum + * @param[in] bits Number of bits to be used for radix sort + * return N/A + */ void configure(cl::Buffer *hist_buf, cl::Buffer *glob_sum_buf, int bits); - // Inherited methods overridden: + /* + * @brief Run CLRadixSortPasteHistogram op + * @param[in] window Window to be used for in_slice + * @param[in] queue cl::CommandQueue + * @return N/A + */ void run(const Window &window, cl::CommandQueue &queue) override; }; +/** + * @brief Class to define CLRadixSortReorder + */ class CLRadixSortReorder : public ICLKernel { public: - /** Constructor */ + /** + * @brief Constructor + */ CLRadixSortReorder(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers). + * @param [in] copiedInstance Const reference of CLRadixSortReorder to be copied + */ CLRadixSortReorder(const CLRadixSortReorder &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers). + * @param [in] copiedInstance Const reference of CLRadixSortReorder to be copied + * @return Reference of this instance + */ CLRadixSortReorder &operator=(const CLRadixSortReorder &) = delete; - /** Allow instances of this class to be moved */ + /** + * @brief Allow instances of this class to be moved + * @param [in] movedInstance Rvalue reference of CLRadixSortReorder to be moved + */ CLRadixSortReorder(CLRadixSortReorder &&) = default; - /** Allow instances of this class to be moved */ + /** + * @brief Allow instances of this class to be moved + * @param [in] movedInstance Rvalue reference of CLRadixSortReorder to be moved + * @return Reference of this instance + */ CLRadixSortReorder &operator=(CLRadixSortReorder &&) = default; + /** + * @brief Initialise kernel with params + * @param[out] hist_buf Buffer of histogram + * @param[in] bits Number of bits to be used for radix sort + * @param[in] n Integer number size to sort + * return N/A + */ void configure(cl::Buffer *hist_buf, int bits, int n); + /** + * @brief Set pass + * @param[in] pass Passes made of in radix sort algorithm + * @param[in] in_key_buf Buffer of input key + * @param[out] out_key_buf Buffer of output key + * @param[in] in_ind_buf Buffer of input index + * @param[out] out_ind_buf Buffer of output index + * return N/A + */ void setPass(int pass, cl::Buffer *in_key_buf, cl::Buffer *out_key_buf, cl::Buffer *in_ind_buf, cl::Buffer *out_ind_buf) { @@ -197,7 +433,12 @@ public: _in_ind_buf = in_ind_buf; _out_ind_buf = out_ind_buf; } - // Inherited methods overridden: + /* + * @brief Run CLRadixSortReorder op + * @param[in] window Window to be used for in_slice + * @param[in] queue cl::CommandQueue + * @return N/A + */ void run(const Window &window, cl::CommandQueue &queue) override; private: @@ -208,47 +449,115 @@ private: cl::Buffer *_out_ind_buf; }; +/** + * @brief Class to define CLTopKV2FindFirstNegative + */ class CLTopKV2FindFirstNegative : public ICLKernel { public: - /** Constructor */ + /** + * @brief Constructor + */ CLTopKV2FindFirstNegative(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers). + * @param [in] copiedInstance Const reference of CLTopKV2FindFirstNegative to be copied + */ CLTopKV2FindFirstNegative(const CLTopKV2FindFirstNegative &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers). + * @param [in] copiedInstance Const reference of CLTopKV2FindFirstNegative to be copied + * @return Reference of this instance + */ CLTopKV2FindFirstNegative &operator=(const CLTopKV2FindFirstNegative &) = delete; - /** Allow instances of this class to be moved */ + /** + * @brief Allow instances of this class to be moved + * @param [in] movedInstance Rvalue reference of CLTopKV2FindFirstNegative to be moved + */ CLTopKV2FindFirstNegative(CLTopKV2FindFirstNegative &&) = default; - /** Allow instances of this class to be moved */ + /** + * @brief Allow instances of this class to be moved + * @param [in] movedInstance Rvalue reference of CLTopKV2FindFirstNegative to be moved + * @return Reference of this instance + */ CLTopKV2FindFirstNegative &operator=(CLTopKV2FindFirstNegative &&) = default; + /** + * @brief Initialise kernel with params + * @param[out] first_negative_idx_buf Buffer of the first negative index + * @param[in] n Number times to find + * return N/A + */ void configure(cl::Buffer *first_negative_idx_buf, int n); + /** + * @brief Set output buffer + * @param[out] out_key_buf Buffer of output key + * return N/A + */ void setOutputBuffer(cl::Buffer *out_key_buf) { _out_key_buf = out_key_buf; } - // Inherited methods overridden: + /* + * @brief Run CLTopKV2FindFirstNegative op + * @param[in] window Window to be used for in_slice + * @param[in] queue cl::CommandQueue + * @return N/A + */ void run(const Window &window, cl::CommandQueue &queue) override; private: cl::Buffer *_out_key_buf; }; +/** + * @brief Class to define CLTopKV2ReorderNegatives + */ class CLTopKV2ReorderNegatives : public ICLKernel { public: - /** Constructor */ + /** + * @brief Constructor + */ CLTopKV2ReorderNegatives(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers). + * @param [in] copiedInstance Const reference of CLTopKV2ReorderNegatives to be copied + */ CLTopKV2ReorderNegatives(const CLTopKV2ReorderNegatives &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers). + * @param [in] copiedInstance Const reference of CLTopKV2ReorderNegatives to be copied + * @return Reference of this instance + */ CLTopKV2ReorderNegatives &operator=(const CLTopKV2ReorderNegatives &) = delete; - /** Allow instances of this class to be moved */ + /** + * @brief Allow instances of this class to be moved + * @param [in] movedInstance Rvalue reference of CLTopKV2ReorderNegatives to be moved + */ CLTopKV2ReorderNegatives(CLTopKV2ReorderNegatives &&) = default; - /** Allow instances of this class to be moved */ + /** + * @brief Allow instances of this class to be moved + * @param [in] movedInstance Rvalue reference of CLTopKV2ReorderNegatives to be moved + * @return Reference of this instance + */ CLTopKV2ReorderNegatives &operator=(CLTopKV2ReorderNegatives &&) = default; + /** + * @brief Initialise kernel with params + * @param[out] first_negative_idx_buf Buffer of the first negative index + * @param[in] n Number times to find + * return N/A + */ void configure(cl::Buffer *first_negative_idx_buf, int n); + /** + * @brief Set buffers + * @param[in] in_key_buf Buffer of input key + * @param[out] out_key_buf Buffer of output key + * @param[in] in_ind_buf Buffer of input index + * @param[out] out_ind_buf Buffer of output index + * return N/A + */ void setBuffers(cl::Buffer *in_key_buf, cl::Buffer *out_key_buf, cl::Buffer *in_ind_buf, cl::Buffer *out_ind_buf) { @@ -258,7 +567,12 @@ public: _out_ind_buf = out_ind_buf; } - // Inherited methods overridden: + /* + * @brief Run CLTopKV2ReorderNegatives op + * @param[in] window Window to be used for in_slice + * @param[in] queue cl::CommandQueue + * @return N/A + */ void run(const Window &window, cl::CommandQueue &queue) override; private: @@ -268,25 +582,63 @@ private: cl::Buffer *_out_ind_buf; }; +/** + * @brief Class to define CLTopKV2Store + */ class CLTopKV2Store : public ICLKernel { public: - /** Constructor */ + /** + * @brief Constructor + */ CLTopKV2Store(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers). + * @param [in] copiedInstance Const reference of CLTopKV2Store to be copied + */ CLTopKV2Store(const CLTopKV2Store &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers). + * @param [in] copiedInstance Const reference of CLTopKV2Store to be copied + * @return Reference of this instance + */ CLTopKV2Store &operator=(const CLTopKV2Store &) = delete; - /** Allow instances of this class to be moved */ + /** + * @brief Allow instances of this class to be moved + * @param [in] movedInstance Rvalue reference of CLTopKV2Store to be moved + */ CLTopKV2Store(CLTopKV2Store &&) = default; - /** Allow instances of this class to be moved */ + /** + * @brief Allow instances of this class to be moved + * @param [in] movedInstance Rvalue reference of CLTopKV2Store to be moved + * @return Reference of this instance + */ CLTopKV2Store &operator=(CLTopKV2Store &&) = default; + /** + * @brief Initialise kernel with params + * @param[out] values Values tensor to store + * @param[out] indices Indices tensor to be used for store + * @param[in] k K of the top k predictions + * @param[in] n Number times to store + * return N/A + */ void configure(ICLTensor *values, ICLTensor *indices, int k, int n); + /** + * @brief Set buffers + * @param[out] out_key_buf Buffer of output key + * @param[out] out_ind_buf Buffer of output index + * return N/A + */ void setOutputBuffers(cl::Buffer *out_key_buf, cl::Buffer *out_ind_buf); - // Inherited methods overridden: + /* + * @brief Run CLTopKV2Store op + * @param[in] window Window to be used for in_slice + * @param[in] queue cl::CommandQueue + * @return N/A + */ void run(const Window &window, cl::CommandQueue &queue) override; private: |