summaryrefslogtreecommitdiff
path: root/libs/ARMComputeEx/arm_compute/runtime/CL/functions
diff options
context:
space:
mode:
Diffstat (limited to 'libs/ARMComputeEx/arm_compute/runtime/CL/functions')
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLActivationLayerEx.h63
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgMinMax.h114
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLArithmeticSubtractionEx.h62
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLBatchToSpaceND.h45
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h41
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLCast.h27
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLComparisonOp.h42
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLDepthToSpace.h44
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h54
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLExp.h38
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLGather.h37
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h59
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLNeg.h39
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLNormalizationLayerEx.h77
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPReLU.h40
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPadLayerEx.h47
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPermuteEx.h51
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPixelWiseDivision.h34
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceMax.h81
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h87
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLReductionMean.h73
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToBatchND.h56
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToDepth.h44
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLSquaredDifference.h40
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLStridedSlice.h69
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLStridedSliceEx.h58
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h60
27 files changed, 1206 insertions, 276 deletions
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLActivationLayerEx.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLActivationLayerEx.h
new file mode 100644
index 000000000..7e578550f
--- /dev/null
+++ b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLActivationLayerEx.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLACTIVATIONLAYEREX_H__
+#define __ARM_COMPUTE_CLACTIVATIONLAYEREX_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+#include "arm_compute/core/TypesEx.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLActivationLayerExKernel
+ *
+ * @note The function simulates an activation layer with the specified activation function.
+ */
+class CLActivationLayerEx : public ICLSimpleFunction
+{
+public:
+ /** Set the input and output tensor.
+ *
+ * @note If the output tensor is a nullptr or is equal to the input, the activation function will
+ * be performed in-place
+ *
+ * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will
+ * store the result
+ * of the activation function. Data types supported:
+ * QASYMM8/F16/F32.
+ * @param[out] output Destination tensor. Data type supported: same as @p input
+ * @param[in] act_info Activation layer parameters.
+ */
+ void configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfoEx act_info);
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * CLActivationLayer
+ *
+ * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor
+ * will store the result
+ * of the activation function. Data types supported: QASYMM8/F16/F32.
+ * @param[in] output Destination tensor info. Data type supported: same as @p input
+ * @param[in] act_info Activation layer information.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output,
+ const ActivationLayerInfoEx &act_info);
+};
+}
+#endif /* __ARM_COMPUTE_CLACTIVATIONLAYEREX_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgMinMax.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgMinMax.h
new file mode 100644
index 000000000..8044c58af
--- /dev/null
+++ b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgMinMax.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file CLArgMinMax.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file contains arm_compute::CLArgMinMax class
+ */
+
+#ifndef __ARM_COMPUTE_CLARG_MIN_MAX_H__
+#define __ARM_COMPUTE_CLARG_MIN_MAX_H__
+
+#include "arm_compute/core/CL/kernels/CLArgMinMaxKernel.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/core/TypesEx.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/**
+ * @brief Class to execute CLArgMinMax operation
+ */
+class CLArgMinMax : public IFunction
+{
+public:
+ /**
+ * @brief Construct a new CLArgMinMax object
+ */
+ CLArgMinMax();
+
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers)
+ */
+ CLArgMinMax(const CLArgMinMax &) = delete;
+
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers)
+ */
+ CLArgMinMax &operator=(const CLArgMinMax &) = delete;
+
+ /**
+ * @brief Construct a new CLArgMinMax object by using copy constructor
+ * @param[in] CLArgMinMax object to move
+ */
+ CLArgMinMax(CLArgMinMax &&) = default;
+
+ /**
+ * @brief Assign a CLArgMinMax object.
+ * @param[in] CLArgMinMax object to assign. This object will be moved.
+ */
+ CLArgMinMax &operator=(CLArgMinMax &&) = default;
+
+ /**
+ * @brief Initialise the kernel's inputs and outputs.
+ * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S32/F32.
+ * @param[out] output The result of argminmaxMax operation. Data types supported: same as @p
+ * input.
+ * @param[in] axis Axis to argminmax. It must be sorted and no duplicates.
+ * @param[in] is_min True for ArgMin operation.
+ * @param[in] is_max Ture for ArgMax operation.
+ * @return N/A
+ */
+ void configure(ICLTensor *input, ICLTensor *output, std::vector<uint32_t> argminmax_axis,
+ ArgOperation op);
+
+ /**
+ * @brief Static function to check if given info will lead to a valid configuration
+ * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S32/F32.
+ * @param[in] axis Axis to argminmax
+ * @param[out] output The result of argminmaxMax operation. Data types supported: same as @p
+ * input.
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const std::vector<uint32_t> &argminmax_axis,
+ const ITensorInfo *output, ArgOperation op);
+
+ /**
+ * @brief Run the kernels contained in the function
+ * This operation works on CPU on GPU depending on the value of argminmax_MAX_RUN_ON_CPU macro
+ * in CLArgMinMax.cpp.
+ * If argminmax_MAX_RUN_ON_CPU == 1, CPU runs this operation.
+ * Otherwise GPU runs this operation.
+ * @return N/A
+ */
+ void run() override;
+
+private:
+ ICLTensor *_input;
+ ICLTensor *_output;
+ std::vector<uint32_t> _argminmax_axis;
+ ArgOperation _arg_op;
+
+ std::unique_ptr<CLTensor[]> _interm_tensors{nullptr};
+ std::unique_ptr<CLArgMinMaxKernel[]> _argminmax_kernels{nullptr};
+ size_t _num_of_kernels;
+};
+}
+#endif /*__ARM_COMPUTE_CLargminmax_MAX_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLArithmeticSubtractionEx.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLArithmeticSubtractionEx.h
new file mode 100644
index 000000000..34e6c6334
--- /dev/null
+++ b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLArithmeticSubtractionEx.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLARITHMETICSUBTRACTIONEX_H__
+#define __ARM_COMPUTE_CLARITHMETICSUBTRACTIONEX_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLArithmeticSubtractionExKernel
+ *
+ * @note The tensor data type for the inputs must be U8/S16/F16/F32.
+ * @note The function performs an arithmetic subtraction between two tensors.
+ */
+class CLArithmeticSubtractionEx : public ICLSimpleFunction
+{
+public:
+ /** Initialise the kernel's inputs, output and convertion policy.
+ *
+ * @param[in, out] input1 An input tensor. Data types supported: U8/S16/F16/F32.
+ * The input tensor is [in, out] because its TensorInfo might be modified
+ * inside the kernel in case of broadcasting of dimension 0.
+ * @param[in, out] input2 An input tensor. Data types supported: same as @p input1.
+ * The input tensor is [in, out] because its TensorInfo might be modified
+ * inside the kernel in case of broadcasting of dimension 0.
+ * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8),
+ * S16/F16/F32.
+ * @param[in] policy Policy to use to handle overflow.
+ */
+ void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy);
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * CLArithmeticSubtractionEx
+ *
+ * @param[in] input1 First tensor input info. Data types supported: U8/S16/F16/F32.
+ * @param[in] input2 Second tensor input info. Data types supported: U8/S16/F16/F32.
+ * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8),
+ * S16/F16/F32.
+ * @param[in] policy Policy to use to handle overflow.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input1, const ITensorInfo *input2,
+ const ITensorInfo *output, ConvertPolicy policy);
+};
+}
+#endif /* __ARM_COMPUTE_CLARITHMETICSUBTRACTIONEX_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLBatchToSpaceND.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLBatchToSpaceND.h
new file mode 100644
index 000000000..d16a0762d
--- /dev/null
+++ b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLBatchToSpaceND.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLBATCH_TO_SPACE_ND_H__
+#define __ARM_COMPUTE_CLBATCH_TO_SPACE_ND_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLBatchToSpaceNDKernel
+ *
+ * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/F16/F32.
+ * @note The function converts the input tensor to the tensor of the output tensor's type.
+ */
+class CLBatchToSpaceND : public ICLSimpleFunction
+{
+public:
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
+ * @param[out] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
+ * @param[in] block_size A pointer to an array of integer values specifying block sizes
+ * for spatial dimension.
+ */
+ void configure(ICLTensor *input, ICLTensor *output, const int32_t *block_size);
+};
+
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_CLBATCH_TO_SPACE_ND_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h
new file mode 100644
index 000000000..061e34f26
--- /dev/null
+++ b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLBINARYLOGICALOP_H__
+#define __ARM_COMPUTE_CLBINARYLOGICALOP_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+#include "arm_compute/core/TypesEx.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+class CLBinaryLogicalOp : public ICLSimpleFunction
+{
+public:
+ /** Initialise the function's source and destination.
+ *
+ * @param[in] input1 Source tensor1. Data types supported: U8, QASYMM8.
+ * @param[in] input2 Source tensor2. Data types supported: U8 QASYMM8.
+ * @param[out] output Output tensor. Data types supported: U8, QASYMM8.
+ */
+ void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output,
+ BinaryLogicalOperation op);
+};
+
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLBINARYLOGICALOP_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLCast.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLCast.h
index 63050067d..56b8408e2 100644
--- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLCast.h
+++ b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLCast.h
@@ -14,30 +14,35 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/**
+ * @file CLCast.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file contains arm_compute::CLCast class
+ */
+
#ifndef __ARM_COMPUTE_CLCAST_H__
#define __ARM_COMPUTE_CLCAST_H__
-#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
class ICLTensor;
-/** Basic function to run @ref CLCastKernel
- *
- * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/F16/F32.
- * @note The function converts the input tensor to the tensor of the output tensor's type.
+/**
+ * @brief Class to run @ref CLCastKernel.
+ * This converts the input tensor to the tensor of the output tensor's type.
*/
class CLCast : public ICLSimpleFunction
{
public:
- /** Initialise the kernel's input and output.
- *
- * @param[in, out] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
- * The input tensor is [in, out] because its TensorInfo might be modified
- * inside the kernel.
- * @param[out] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
+ /**
+ * @brief Initialise the kernel's input and output
+ * @param[in, out] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
+ * The input tensor is [in, out] because its TensorInfo might be
+ * modified inside the kernel.
+ * @param[out] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
*/
void configure(ICLTensor *input, ICLTensor *output);
};
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLComparisonOp.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLComparisonOp.h
new file mode 100644
index 000000000..1b0d70e7f
--- /dev/null
+++ b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLComparisonOp.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLCOMPARISON_OP_H__
+#define __ARM_COMPUTE_CLCOMPARISON_OP_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+#include "arm_compute/core/TypesEx.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+class CLComparisonOp : public ICLSimpleFunction
+{
+public:
+ /** Initialise the function's source and destination.
+ *
+ * @param[in] input1 Source tensor1. Data types supported:
+ * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+ * @param[in] input2 Source tensor2. Data types supported:
+ * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+ * @param[out] output Output tensor. Data types supported: Same as @p input.
+ */
+ void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output,
+ const ComparisonOperation &op);
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLCOMPARISON_OP_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLDepthToSpace.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLDepthToSpace.h
new file mode 100644
index 000000000..d78a6ada4
--- /dev/null
+++ b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLDepthToSpace.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLDEPTHTOSPACE_H__
+#define __ARM_COMPUTE_CLDEPTHTOSPACE_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLDepthToSpaceKernel
+ *
+ * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/F16/F32.
+ * @note The function converts the input tensor to the tensor of the output tensor's type.
+ */
+class CLDepthToSpace : public ICLSimpleFunction
+{
+public:
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
+ * @param[out] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
+ * @param[block_size] block size integer only
+ */
+ void configure(ICLTensor *input, ICLTensor *output, const int32_t block_size);
+};
+} // namesace arm_compute
+
+#endif /* __ARM_COMPUTE_CLDEPTHTOSPACE_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h
new file mode 100644
index 000000000..257772a89
--- /dev/null
+++ b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file CLEmbeddingLookup.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file contains arm_compute::CLEmbeddingLookup class
+ */
+
+#ifndef __ARM_COMPUTE_CLEMBEDDINGLOOKUP_H__
+#define __ARM_COMPUTE_CLEMBEDDINGLOOKUP_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+#include <vector>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/**
+ * @brief Class to perform EmbeddingLookup operation
+ */
+class CLEmbeddingLookup : public ICLSimpleFunction
+{
+public:
+ /**
+ * @brief Set the input and output tensors.
+ * @param[in] input Source tensor.
+ * Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+ * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p
+ * input.
+ * @param[in] lookups Lookups 1D tensor that values are indices into the first dimension of
+ * input.
+ * @return N/A
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *lookups);
+};
+}
+#endif /*__ARM_COMPUTE_CLEMBEDDINGLOOKUP_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLExp.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLExp.h
new file mode 100644
index 000000000..2d0fc23a4
--- /dev/null
+++ b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLExp.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLEXP_H__
+#define __ARM_COMPUTE_CLEXP_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLExpKernel */
+class CLExp : public ICLSimpleFunction
+{
+public:
+ /** Set the source, destination of the kernel
+ *
+ * @param[in] input Source tensor. Data type supported: F32.
+ * @param[out] output Destination tensor. Data type supported: F32.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output);
+};
+}
+#endif /* __ARM_COMPUTE_CLEXP_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLGather.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLGather.h
index 3ae7afe14..f7fd3cda1 100644
--- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLGather.h
+++ b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLGather.h
@@ -14,32 +14,43 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/**
+ * @file CLGather.h
+ * @brief This file contains CLGather class
+ * @ingroup COM_AI_RUNTIME
+ */
+
#ifndef __ARM_COMPUTE_CLGATHER_H__
#define __ARM_COMPUTE_CLGATHER_H__
-#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
class ICLTensor;
-/** Basic function to run @ref CLGatherKernel. */
+/**
+ * @brief Class to to run @ref CLGatherKernel.
+ */
class CLGather : public ICLSimpleFunction
{
public:
- /** Initialise the kernel's inputs, output and convertion policy.
- *
- * @param[in] input1 An input tensor. Data types supported: U8/S32/F32.
- * @param[in] input2 An indexes tensor. Data types supported: S32.
- * @param[out] output The output tensor, Data types supported: same as @p input1.
- */
+ /**
+ * @brief Initialise the kernel's inputs, output and convertion policy.
+ * @param[in] input1 An input tensor. Data types supported: U8/S32/F32.
+ * @param[in] input2 An indexes tensor. Data types supported: S32.
+ * @param[out] output The output tensor, Data types supported: same as @p input1.
+ * @return N/A
+ */
void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLGather
- *
- * @param[in] input1 An input tensor. Data types supported: U8/S32/F32.
- * @param[in] input2 An indexes tensor. Data types supported: S32.
- * @param[out] output The output tensor, Data types supported: same as @p input1.
+
+ /**
+ * @brief Static function to check if given info will lead to a valid configuration
+ * of @ref CLGather
+ * @param[in] input1 An input tensor. Data types supported: U8/S32/F32.
+ * @param[in] input2 An indexes tensor. Data types supported: S32.
+ * @param[out] output The output tensor, Data types supported: same as @p input1.
* @return a status
*/
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2,
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h
new file mode 100644
index 000000000..65aa6cbd5
--- /dev/null
+++ b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file CLHashtableLookup.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file contains arm_compute::CLHashtableLookup class
+ */
+
+#ifndef __ARM_COMPUTE_CLHASHTABLELOOKUP_H__
+#define __ARM_COMPUTE_CLHASHTABLELOOKUP_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+#include <vector>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/**
+ * @brief Class to perform HashtableLookup operation
+ */
+class CLHashtableLookup : public ICLSimpleFunction
+{
+public:
+ /**
+ * @brief Set the input and output tensors.
+ * @param[in] lookups Lookups 1D tensor that values are indices into the first dimension of
+ * input.
+ * @param[in] keys Keys 1D tensor. keys and input pair represent a map.
+ * Data types supported: S32
+ * @param[in] input Source tensor.
+ * Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+ * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p
+ * input.
+ * @param[out] hits Hits 1D tensor. A boolean tensor that indicates whether the lookup hits
+ * (True) or not (False). Data types supported: U8/QASYMM8
+ * @return N/A
+ */
+ void configure(const ICLTensor *lookups, const ICLTensor *keys, const ICLTensor *intput,
+ ICLTensor *output, ICLTensor *hits);
+};
+}
+#endif /*__ARM_COMPUTE_CLHASHTABLELOOKUP_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLNeg.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLNeg.h
new file mode 100644
index 000000000..198a0fd4e
--- /dev/null
+++ b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLNeg.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLNEG_H__
+#define __ARM_COMPUTE_CLNEG_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+class CLNeg : public ICLSimpleFunction
+{
+public:
+ /** Initialise the function's source and destination.
+ *
+ * @param[in] input Source tensor. Data types supported:
+ * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+ * @param[out] output Output tensor. Data types supported: Same as @p input.
+ *
+ */
+ void configure(ICLTensor *input, ICLTensor *output);
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLNEG_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLNormalizationLayerEx.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLNormalizationLayerEx.h
new file mode 100644
index 000000000..4077245d5
--- /dev/null
+++ b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLNormalizationLayerEx.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLNORMALIZATIONLAYEREX_H__
+#define __ARM_COMPUTE_CLNORMALIZATIONLAYEREX_H__
+
+#include "arm_compute/runtime/IFunction.h"
+
+#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
+#include "arm_compute/core/CL/kernels/CLNormalizationLayerExKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to compute a normalization layer. This function calls the following CL kernels:
+ *
+ * -# @ref CLFillBorderKernel
+ * -# @ref CLNormalizationLayerKernelEx
+ *
+ */
+class CLNormalizationLayerEx : public IFunction
+{
+public:
+ /** Default constructor */
+ CLNormalizationLayerEx();
+ /** Set the input and output tensors.
+ *
+ * @param[in, out] input Source tensor. 3 lower dims represent a single input with dimensions
+ * [width, height, IFM],
+ * and an optional 4th dimension for batch of inputs. Data types
+ * supported: F16/F32 (Written to by the border handler)
+ * @param[out] output Destination tensor. Dimensions, data type and number of channels must
+ * match the input ones.
+ * @param[in] norm_info Normalization layer information like the normalization type,
+ * normalization size and other parameters.
+ */
+ void configure(ICLTensor *input, ICLTensor *output, const NormalizationLayerInfo &norm_info);
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * CLNormalizationLayer
+ *
+ * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions
+ * [width, height, IFM],
+ * and an optional 4th dimension for batch of inputs. Data types supported:
+ * F16/F32
+ * @param[in] output Destination tensor. Dimensions, data type and number of channels must
+ * match the input ones.
+ * @param[in] norm_info Normalization layer information like the normalization type, normalization
+ * size and other parameters.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output,
+ const NormalizationLayerInfo &norm_info);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ CLNormalizationLayerExKernel _norm_kernel; /**< Normalization layer kernel to run */
+ CLFillBorderKernel _border_handler; /**< Kernel to handle borders */
+};
+}
+#endif /* __ARM_COMPUTE_CLNORMALIZATIONLAYEREX_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPReLU.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPReLU.h
new file mode 100644
index 000000000..622a61b5e
--- /dev/null
+++ b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPReLU.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLPRELU_H__
+#define __ARM_COMPUTE_CLPRELU_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+class CLPReLU : public ICLSimpleFunction
+{
+public:
+ /** Initialise the function's source and destination.
+ *
+ * @param[in] input. Data types supported:
+ * QASYMM8/F16/F32.
+ * @param[in] alpha. Data types supported:
+ * QASYMM8/F16/F32.
+ * @param[out] output Output tensor. Data types supported: Same as @p input.
+ */
+ void configure(ICLTensor *input, ICLTensor *alpha, ICLTensor *output);
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLPRELU_H__*/
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPadLayerEx.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPadLayerEx.h
new file mode 100644
index 000000000..d6ea486d1
--- /dev/null
+++ b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPadLayerEx.h
@@ -0,0 +1,47 @@
+/*
+* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+* Copyright (c) 2016-2018 ARM Limited.
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+#ifndef __ARM_COMPUTE_CLPADLAYEREX_H__
+#define __ARM_COMPUTE_CLPADLAYEREX_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLPadLayerKernel
+ *
+ * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/F16/F32.
+ * @note The function converts the input tensor to the tensor of the output tensor's type.
+ */
+class CLPadLayerEx : public ICLSimpleFunction
+{
+public:
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] input Input tensor. Data types supported:
+ * U8/QASYMM8/S16/S32/F16/F32.
+ * @param[out] output Output tensor. Data types supported:
+ * U8/QASYMM8/S16/S32/F16/F32.
+ * @param[in] pad_size Tensor for Padding values in NHWC format shape [n, 2],
+ * where n is the rank of tensor . Data types supported: S32
+ */
+ void configure(ICLTensor *input, ICLTensor *output, ICLTensor *pad_size);
+};
+
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_CLPADLAYEREX_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPermuteEx.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPermuteEx.h
new file mode 100644
index 000000000..9a0cc213c
--- /dev/null
+++ b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPermuteEx.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLPERMUTEEX_H__
+#define __ARM_COMPUTE_CLPERMUTEEX_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to execute an @ref CLPermuteKernel. */
+class CLPermuteEx : public ICLSimpleFunction
+{
+public:
+ /** Set the input and output tensors.
+ *
+ * @param[in] input The input tensor to permute. Data types supported:
+ * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in] output The output tensor. Data types supported: Same as @p input
+ * @param[in] perm Permutation vector
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const PermutationVector &perm);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLPermute.
+ *
+ * @param[in] input First tensor input info. Data types supported:
+ * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+ * @param[in] output Output tensor info. Data types supported: same as @p input.
+ * @param[in] perm Permutation vector
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output,
+ const PermutationVector &perm);
+};
+}
+#endif /*__ARM_COMPUTE_CLPERMUTEEX_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPixelWiseDivision.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPixelWiseDivision.h
index c1383e21f..b142d3a2e 100644
--- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPixelWiseDivision.h
+++ b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPixelWiseDivision.h
@@ -14,53 +14,61 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/**
+ * @file CLPixelWiseDivision.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file contains arm_compute::CLPixelWiseDivision class
+ */
#ifndef __ARM_COMPUTE_CLPIXELWISEDIVISION_H__
#define __ARM_COMPUTE_CLPIXELWISEDIVISION_H__
-#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
class ICLTensor;
-/** Basic function to run @ref CLPixelWiseDivisionKernel. */
+/**
+ * @brief Class to run @ref CLPixelWiseDivisionKernel.
+ */
class CLPixelWiseDivision : public ICLSimpleFunction
{
public:
- /** Initialise the kernel's inputs, output and convertion policy.
- *
- * @param[in, out] input1 An input tensor. Data types supported: U8/QS8/QS16/S16/F16/F32.
+ /**
+ * @brief Initialise the kernel's inputs, output and convertion policy.
+ * @param[in, out] input1 An input tensor. Data types supported: U8/S16/F16/F32
* The input tensor is [in, out] because its TensorInfo might be
* modified inside the kernel in case of broadcasting of dimension 0.
* @param[in, out] input2 An input tensor. Data types supported: same as @p input1.
* The input tensor is [in, out] because its TensorInfo might be
* modified inside the kernel in case of broadcasting of dimension 0.
* @param[out] output The output tensor, Data types supported: same as @p input1.
- * Note: U8 (QS8, QS16) requires both inputs to be U8 (QS8, QS16).
+ * Note: U8 requires both inputs to be U8.
* @param[in] scale Scale to apply after multiplication.
* Scale must be positive and its value must be either 1/255 or
- * 1/2^n where n is between 0 and 15. For QS8 and QS16 scale must be 1.
+ * 1/2^n where n is between 0 and 15.
* @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
* @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest
* even.
+ * @return N/A
*/
void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, float scale = 1.f,
ConvertPolicy overflow_policy = ConvertPolicy::WRAP,
RoundingPolicy rounding_policy = RoundingPolicy::TO_ZERO);
- /** Static function to check if given info will lead to a valid configuration of @ref
+
+ /**
+ * @brief Static function to check if given info will lead to a valid configuration of @ref
* CLPixelWiseDivision
- *
- * @param[in] input1 An input tensor info. Data types supported: U8/QS8/QS16/S16/F16/F32.
+ * @param[in] input1 An input tensor info. Data types supported: U8/S16/F16/F32
* @param[in] input2 An input tensor info. Data types supported: same as @p input1.
* @param[in] output The output tensor info, Data types supported: same as @p input1.
- * Note: U8 (QS8, QS16) requires both inputs to be U8 (QS8, QS16).
+ * Note: U8 requires both inputs to be U8.
* @param[in] scale Scale to apply after multiplication.
* Scale must be positive and its value must be either 1/255 or 1/2^n
- * where n is between 0 and 15. For QS8 and QS16 scale must be 1.
+ * where n is between 0 and 15.
* @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
* @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even.
- *
* @return a status
*/
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2,
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceMax.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceMax.h
deleted file mode 100644
index 14b473f33..000000000
--- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceMax.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __ARM_COMPUTE_CLREDUCE_MAX_H__
-#define __ARM_COMPUTE_CLREDUCE_MAX_H__
-
-#include "arm_compute/runtime/CL/CLArray.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Basic function to execute TopK operation. This function calls the following OpenCL kernels:
- *
- * -# @ref CLTopKV2Kernel
- */
-class CLReduceMax : public IFunction
-{
-public:
- /** Constructor */
- CLReduceMax();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLReduceMax(const CLReduceMax &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLReduceMax &operator=(const CLReduceMax &) = delete;
- /** Allow instances of this class to be moved */
- CLReduceMax(CLReduceMax &&) = default;
- /** Allow instances of this class to be moved */
- CLReduceMax &operator=(CLReduceMax &&) = default;
- /** Initialise the kernel's inputs and outputs.
- *
- * @note When locations of min and max occurrences are requested, the reported number of locations
- * is limited to the given array size.
- *
- * @param[in] input Input image. Data types supported: F32
- * @param[in] axis Axis to reduce. Data type supported: S32
- * @param[out] output indices related to top k values. Data types supported: F32.
- */
- void configure(ICLTensor *input, int32_t axis, ICLTensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref
- * CLPixelWiseDivision
- *
- * @param[in] input Input image. Data types supported: F32
- * @param[in] axis Axis to reduce. Data type supported: S32
- * @param[out] output indices related to top k values. Data types supported: F32. *
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, int32_t axis, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- void run_on_cpu();
-
- int32_t _axis;
-
- ICLTensor *_input;
- ICLTensor *_output;
-
- std::unique_ptr<ICLKernel> _kernel;
-};
-}
-#endif /*__ARM_COMPUTE_CLREDUCE_MAX_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h
new file mode 100644
index 000000000..e1a6f6ab4
--- /dev/null
+++ b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file CLReduceOperation.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file contains arm_compute::CLReduceOperation class
+ */
+
+#ifndef __ARM_COMPUTE_CLREDUCEOPERATION_H__
+#define __ARM_COMPUTE_CLREDUCEOPERATION_H__
+
+#include "arm_compute/core/CL/kernels/CLReduceOperationKernel.h"
+#include "arm_compute/core/TypesEx.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/IFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/**
+ * @brief Class to perform ReduceOperation
+ */
+class CLReduceOperation : public IFunction
+{
+public:
+ /**
+ * @brief Construct a new ReduceOperation object
+ */
+ CLReduceOperation();
+
+ /**
+ * @brief Set the input and output tensors.
+ * @param[in] input Source tensor. Data types supported: U8/S32/F32
+ * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p
+ * input.
+ * @param[in] axis Axis along which to reduce. It must be sorted and no duplicates.
+ * @param[in] op Reduce operation to perform.
+ * @return N/A
+ */
+ void configure(ICLTensor *input, ICLTensor *output, const std::set<uint32_t> &axis,
+ ReduceOperation op);
+
+ /**
+ * @brief Static function to check if given info will lead to a valid configuration of @ref
+ * CLReduceOperation.
+ * @param[in] input Source tensor info. Data types supported: U8/S32/F32
+ * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p
+ * input.
+ * @param[in] axis Axis along which to reduce. It must be sorted and no duplicates.
+ * @param[in] op Reduce operation to perform.
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output,
+ const std::set<uint32_t> &axis, const ReduceOperation &op);
+
+ /**
+ * @brief Run the OpenCL kernel for this operation
+ * @return N/A
+ */
+ void run() override;
+
+private:
+ ICLTensor *_input;
+ ICLTensor *_output;
+ std::set<uint32_t> _axis;
+
+ std::unique_ptr<CLTensor[]> _interm_tensors{nullptr};
+ std::unique_ptr<CLReduceOperationKernel[]> _reduce_kernels{nullptr};
+};
+}
+#endif /*__ARM_COMPUTE_CLREDUCEOPERATION_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLReductionMean.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLReductionMean.h
deleted file mode 100644
index 2081518c1..000000000
--- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLReductionMean.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ARM_COMPUTE_CLREDUCTIONMEAN_H__
-#define __ARM_COMPUTE_CLREDUCTIONMEAN_H__
-
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
-#include "arm_compute/core/CL/kernels/CLReductionMeanKernel.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/IFunction.h"
-
-#include <cstdint>
-#include <memory>
-#include <vector>
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Perform reduction operation.
- */
-class CLReductionMean : public IFunction
-{
-public:
- /** Default Constructor.
- */
- CLReductionMean();
-
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. Data types supported: F32. Data layouts supported: NCHW.
- * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input.
- * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1
- */
- void configure(ICLTensor *input, ICLTensor *output, std::vector<uint32_t> axis);
-
- /** Static function to check if given info will lead to a valid configuration of @ref
- * CLReductionMean.
- *
- * @param[in] input Source tensor info. Data types supported: F32. Data layouts supported: NCHW.
- * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p
- * input.
- * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output,
- std::vector<uint32_t> axis);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- CLReductionMeanKernel _reduction_mean_kernel;
- CLFillBorderKernel _fill_border_kernel;
-};
-}
-#endif /*__ARM_COMPUTE_CLREDUCTIONMEAN_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToBatchND.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToBatchND.h
new file mode 100644
index 000000000..7e2df8986
--- /dev/null
+++ b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToBatchND.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLSPACE_TO_BATCH_ND_H__
+#define __ARM_COMPUTE_CLSPACE_TO_BATCH_ND_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLSpaceToBatchNDKernel
+ *
+ * @note The tensor data type for the inputs must be U8/QASYMM8/S16/F16/S32/F32.
+ * @note The function divides "spatial" dimensions of the input into a grid of blocks of shape
+ * block_shape, and interleaves these blocks with the "batch" dimension such that in the output.
+ */
+class CLSpaceToBatchND : public ICLSimpleFunction
+{
+public:
+ /** Initialise the kernel's input and output.
+ *
+ * @note The data layout of input and output must be the same.
+ * @note The number of dimensions of input and output must be 4, and `spatial` dimensions
+ * are height and width.
+ * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/F16/S32/F32.
+ * Data layout supported: NCHW/NHWC
+ * @param[in] block_size Tensor of integer values specifying block sizes for spatial
+ * dimension.
+ * Data types supported: S32
+ * @param[in] padding_size Tensor of integer values specifying padding sizes for spatial
+ * dimension.
+ * Data types supported: S32
+ * @param[out] output Output tensor. Data types supported: same as @p input.
+ * Data layout supported: NCHW/NHWC
+ */
+ void configure(const ICLTensor *input, const ICLTensor *block_size, const ICLTensor *padding_size,
+ ICLTensor *output);
+};
+
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_CLSPACE_TO_BATCH_ND_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToDepth.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToDepth.h
new file mode 100644
index 000000000..17f762092
--- /dev/null
+++ b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToDepth.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLSPACETODEPTH_H__
+#define __ARM_COMPUTE_CLSPACETODEPTH_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLSpaceToDepthKernel
+ *
+ * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/F16/F32.
+ * @note The function converts the input tensor to the tensor of the output tensor's type.
+ */
+class CLSpaceToDepth : public ICLSimpleFunction
+{
+public:
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
+ * @param[out] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
+ * @param[block_size] block size integer only
+ */
+ void configure(ICLTensor *input, ICLTensor *output, const int32_t block_size);
+};
+
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_CLSPACETODEPTH_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLSquaredDifference.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLSquaredDifference.h
new file mode 100644
index 000000000..3610ba71c
--- /dev/null
+++ b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLSquaredDifference.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLSQUARED_DIFFERENCE_H__
+#define __ARM_COMPUTE_CLSQUARED_DIFFERENCE_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+class CLSquaredDifference : public ICLSimpleFunction
+{
+public:
+ /** Initialise the function's source and destination.
+ *
+ * @param[in] input1 Source tensor1. Data types supported:
+ * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+ * @param[in] input2 Source tensor2. Data types supported:
+ * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+ * @param[out] output Output tensor. Data types supported: Same as @p input.
+ */
+ void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output);
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLSQUARED_DIFFERENCE_H__*/
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLStridedSlice.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLStridedSlice.h
deleted file mode 100644
index f223a79be..000000000
--- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLStridedSlice.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __ARM_COMPUTE_CLSTRIDEDSLICE_H__
-#define __ARM_COMPUTE_CLSTRIDEDSLICE_H__
-
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Basic function to run @ref CLStridedSliceKernel */
-class CLStridedSlice : public ICLSimpleFunction
-{
-public:
- /** Initialise the kernel's inputs and outputs
- *
- * @param[in] input First tensor input. Data type supported:
- * U8/S8/QS8/QASYMM8/U16/S16/QS16/U32/S32/F16/F32
- * @param[out] output Output tensor. Data type supported: Same as @p input
- */
- void configure(const ICLTensor *input, ICLTensor *output, ICLTensor *beginData,
- ICLTensor *endData, ICLTensor *stridesData, int32_t beginMask, int32_t endMask,
- int32_t shrinkAxisMask);
-};
-
-class CLStridedSliceCPU : public IFunction
-{
-public:
- /** Initialise inputs and outputs
- *
- * @param[in] input First tensor input.
- * @param[out] output Output tensor.
- */
- void configure(ICLTensor *input, ICLTensor *output, ICLTensor *beginData, ICLTensor *endData,
- ICLTensor *stridesData, int32_t beginMask, int32_t endMask,
- int32_t shrinkAxisMask);
-
- void run() override;
-
-private:
- void run_on_cpu();
-
- ICLTensor *_input;
- ICLTensor *_output;
- ICLTensor *_beginData;
- ICLTensor *_endData;
- ICLTensor *_stridesData;
- int32_t _beginMask;
- int32_t _endMask;
- int32_t _shrinkAxisMask;
-};
-}
-#endif /*__ARM_COMPUTE_CLSTRIDEDSLICE_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLStridedSliceEx.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLStridedSliceEx.h
new file mode 100644
index 000000000..6b26a85c8
--- /dev/null
+++ b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLStridedSliceEx.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file CLStridedSlice.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file contains arm_compute::CLStridedSlice and arm_compute::CLStridedSliceCPU class
+ */
+
+#ifndef __ARM_COMPUTE_CLSTRIDEDSLICEEX_H__
+#define __ARM_COMPUTE_CLSTRIDEDSLICEEX_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/**
+ * @brief Class to run @ref CLStridedSliceKernel
+ */
+class CLStridedSliceEx : public ICLSimpleFunction
+{
+public:
+ /**
+ * @brief Initialise the kernel's inputs and outputs
+ * @param[in] input Tensor input. Data type supported:
+ * U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+ * @param[out] output Output tensor. Data type supported: Same as @p input
+ * @param[in] beginData 'begin' vector of strided slice operation
+ * @param[in] endData 'end' vector of strided slice operation
+ * @param[in] stridesData 'strides' vector of strided slice operation
+ * @param[in] beginMask If the ith bit is set, begin[i] is ignored
+ * @param[in] endMask If the ith bit is set, end[i] is ignored
+ * @param[in] shrinkAxisMask If the ith bit is set, the ith specification shrinks the
+ * dimensionality by 1, taking on the value at index begin[i]
+ * @return N/A
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, ICLTensor *beginData,
+ ICLTensor *endData, ICLTensor *stridesData, int32_t beginMask, int32_t endMask,
+ int32_t shrinkAxisMask);
+};
+}
+#endif /*__ARM_COMPUTE_CLSTRIDEDSLICEEX_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h
index 06cd1ee9b..5327e016f 100644
--- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h
+++ b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h
@@ -14,51 +14,79 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/**
+ * @file CLTopKV2.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file contains arm_compute::CLTopKV2 class
+ */
#ifndef __ARM_COMPUTE_CLTOPK_V2_H__
#define __ARM_COMPUTE_CLTOPK_V2_H__
#include "arm_compute/core/CL/kernels/CLTopKV2Kernel.h"
-#include "arm_compute/runtime/CL/CLArray.h"
#include "arm_compute/runtime/IFunction.h"
namespace arm_compute
{
class ICLTensor;
-/** Basic function to execute TopK operation. This function calls the following OpenCL kernels:
- *
- * -# @ref CLTopKV2Kernel
+/**
+ * @brief Class to execute TopKV2 operation.
*/
class CLTopKV2 : public IFunction
{
public:
- /** Constructor */
+ /**
+ * @brief Construct a new CLTopKV2 object
+ */
CLTopKV2();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
+
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers)
+ */
CLTopKV2(const CLTopKV2 &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
+
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers)
+ */
CLTopKV2 &operator=(const CLTopKV2 &) = delete;
- /** Allow instances of this class to be moved */
+
+ /**
+ * @brief Construct a new CLTopKV2 object by using copy constructor
+ * @param[in] CLTopKV2 object to move
+ */
CLTopKV2(CLTopKV2 &&) = default;
- /** Allow instances of this class to be moved */
+
+ /**
+ * @brief Assign a CLTopKV2 object.
+ * @param[in] CLTopKV2 object to assign. This object will be moved.
+ */
CLTopKV2 &operator=(CLTopKV2 &&) = default;
- /** Initialise the kernel's inputs and outputs.
- *
- * @note When locations of min and max occurrences are requested, the reported number of locations
- * is limited to the given array size.
- *
+
+ /**
+ * @brief Initialise the kernel's inputs and outputs.
* @param[in] input Input image. Data types supported: U8/S16/F32.
* @param[in] k The value of `k`.
* @param[out] values Top k values. Data types supported: S32 if input type is U8/S16, F32 if
* input type is F32.
- * @param[out] indices indices related to top k values. Data types supported: S32 if input type
+ * @param[out] indices Indices related to top k values. Data types supported: S32 if input type
* is U8/S16, F32 if input type is F32.
+ * @return N/A
*/
void configure(ICLTensor *input, int k, ICLTensor *values, ICLTensor *indices,
int total_bits = 32, int bits = 4);
- // Inherited methods overridden:
+ /**
+ * @brief Run the kernels contained in the function
+ * Depending on the value of the following environment variables it works differently:
+ * - If the value of environment variable "ACL_TOPKV2" == "GPU_SINGLE",
+ * quick sort on GPU is used.
+ * - If the value of environment variable "ACL_TOPKV2" == ""GPU"",
+ * radix sort on GPU is used.
+ * - For other value, TopKV2 runs on CPU
+ * @return N/A
+ */
void run() override;
private: