summaryrefslogtreecommitdiff
path: root/libs/ARMComputeEx/arm_compute
diff options
context:
space:
mode:
Diffstat (limited to 'libs/ARMComputeEx/arm_compute')
-rw-r--r--libs/ARMComputeEx/arm_compute/core/CL/CLKernelLibraryEx.h189
-rw-r--r--libs/ARMComputeEx/arm_compute/core/CL/kernels/CLCastKernel.h57
-rw-r--r--libs/ARMComputeEx/arm_compute/core/CL/kernels/CLGatherKernel.h71
-rw-r--r--libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPixelWiseDivisionKernel.h87
-rw-r--r--libs/ARMComputeEx/arm_compute/core/CL/kernels/CLReduceMaxKernel.h73
-rw-r--r--libs/ARMComputeEx/arm_compute/core/CL/kernels/CLReductionMeanKernel.h78
-rw-r--r--libs/ARMComputeEx/arm_compute/core/CL/kernels/CLStridedSliceKernel.h106
-rw-r--r--libs/ARMComputeEx/arm_compute/core/CL/kernels/CLTopKV2Kernel.h301
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLCast.h45
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLGather.h49
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPixelWiseDivision.h72
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceMax.h81
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLReductionMean.h73
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLStridedSlice.h69
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h109
15 files changed, 1460 insertions, 0 deletions
diff --git a/libs/ARMComputeEx/arm_compute/core/CL/CLKernelLibraryEx.h b/libs/ARMComputeEx/arm_compute/core/CL/CLKernelLibraryEx.h
new file mode 100644
index 000000000..026487077
--- /dev/null
+++ b/libs/ARMComputeEx/arm_compute/core/CL/CLKernelLibraryEx.h
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLKERNELLIBRARY_EX_H__
+#define __ARM_COMPUTE_CLKERNELLIBRARY_EX_H__
+
+#include "arm_compute/core/CL/OpenCL.h"
+
+#include <map>
+#include <set>
+#include <string>
+#include <utility>
+
+namespace arm_compute
+{
+
+/** CLKernelLibrary class */
+class CLKernelLibraryEx
+{
+ using StringSet = std::set<std::string>;
+
+private:
+ /** Default Constructor. */
+ CLKernelLibraryEx();
+
+public:
+ /** Prevent instances of this class from being copied */
+ CLKernelLibraryEx(const CLKernelLibraryEx &) = delete;
+ /** Prevent instances of this class from being copied */
+ const CLKernelLibraryEx &operator=(const CLKernelLibraryEx &) = delete;
+ /** Access the KernelLibrary singleton.
+ * @return The KernelLibrary instance.
+ */
+ static CLKernelLibraryEx &get();
+ /** Initialises the kernel library.
+ *
+ * @param[in] kernel_path (Optional) Path of the directory from which kernel sources are loaded.
+ * @param[in] context (Optional) CL context used to create programs.
+ * @param[in] device (Optional) CL device for which the programs are created.
+ */
+ void init(std::string kernel_path = ".", cl::Context context = cl::Context::getDefault(),
+ cl::Device device = cl::Device::getDefault())
+ {
+ _kernel_path = std::move(kernel_path);
+ _context = std::move(context);
+ _device = std::move(device);
+ }
+ /** Sets the path that the kernels reside in.
+ *
+ * @param[in] kernel_path Path of the kernel.
+ */
+ void set_kernel_path(const std::string &kernel_path) { _kernel_path = kernel_path; };
+ /** Gets the path that the kernels reside in.
+ */
+ std::string get_kernel_path() { return _kernel_path; };
+ /** Gets the source of the selected program.
+ *
+ * @param[in] program_name Program name.
+ *
+ * @return Source of the selected program.
+ */
+ std::string get_program_source(const std::string &program_name);
+ /** Sets the CL context used to create programs.
+ *
+ * @note Setting the context also resets the device to the
+ * first one available in the new context.
+ *
+ * @param[in] context A CL context.
+ */
+ void set_context(cl::Context context)
+ {
+ _context = std::move(context);
+ if (_context.get() == nullptr)
+ {
+ _device = cl::Device();
+ }
+ else
+ {
+ const auto cl_devices = _context.getInfo<CL_CONTEXT_DEVICES>();
+
+ if (cl_devices.empty())
+ {
+ _device = cl::Device();
+ }
+ else
+ {
+ _device = cl_devices[0];
+ }
+ }
+ }
+
+ /** Accessor for the associated CL context.
+ *
+ * @return A CL context.
+ */
+ cl::Context &context() { return _context; }
+
+ /** Sets the CL device for which the programs are created.
+ *
+ * @param[in] device A CL device.
+ */
+ void set_device(cl::Device device) { _device = std::move(device); }
+
+ /** Return the device version
+ *
+ * @return The content of CL_DEVICE_VERSION
+ */
+ std::string get_device_version();
+ /** Creates a kernel from the kernel library.
+ *
+ * @param[in] kernel_name Kernel name.
+ * @param[in] build_options_set Kernel build options as a set.
+ *
+ * @return The created kernel.
+ */
+ Kernel create_kernel(const std::string &kernel_name,
+ const StringSet &build_options_set = {}) const;
+ /** Find the maximum number of local work items in a workgroup can be supported for the kernel.
+ *
+ */
+ size_t max_local_workgroup_size(const cl::Kernel &kernel) const;
+ /** Return the default NDRange for the device.
+ *
+ */
+ cl::NDRange default_ndrange() const;
+
+ /** Clear the library's cache of binary programs
+ */
+ void clear_programs_cache()
+ {
+ _programs_map.clear();
+ _built_programs_map.clear();
+ }
+
+ /** Access the cache of built OpenCL programs */
+ const std::map<std::string, cl::Program> &get_built_programs() const
+ {
+ return _built_programs_map;
+ }
+
+ /** Add a new built program to the cache
+ *
+ * @param[in] built_program_name Name of the program
+ * @param[in] program Built program to add to the cache
+ */
+ void add_built_program(const std::string &built_program_name, cl::Program program);
+
+private:
+ /** Load program and its dependencies.
+ *
+ * @param[in] program_name Name of the program to load.
+ */
+ const Program &load_program(const std::string &program_name) const;
+ /** Concatenates contents of a set into a single string.
+ *
+ * @param[in] s Input set to concatenate.
+ *
+ * @return Concatenated string.
+ */
+ std::string stringify_set(const StringSet &s) const;
+
+ cl::Context _context; /**< Underlying CL context. */
+ cl::Device _device; /**< Underlying CL device. */
+ std::string _kernel_path; /**< Path to the kernels folder. */
+ mutable std::map<std::string, const Program>
+ _programs_map; /**< Map with all already loaded program data. */
+ mutable std::map<std::string, cl::Program>
+ _built_programs_map; /**< Map with all already built program data. */
+ static const std::map<std::string, std::string>
+ _kernel_program_map; /**< Map that associates kernel names with programs. */
+ static const std::map<std::string, std::string>
+ _program_source_map; /**< Contains sources for all programs.
+ Used for compile-time kernel inclusion. >*/
+};
+}
+#endif /* __ARM_COMPUTE_CLKERNELLIBRARY_EX_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLCastKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLCastKernel.h
new file mode 100644
index 000000000..6bd33bf8f
--- /dev/null
+++ b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLCastKernel.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLCASTKERNEL_H__
+#define __ARM_COMPUTE_CLCASTKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel to perform a cast operation */
+class CLCastKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLCastKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLCastKernel(const CLCastKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLCastKernel &operator=(const CLCastKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLCastKernel(CLCastKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLCastKernel &operator=(CLCastKernel &&) = default;
+ /** Default destructor */
+ ~CLCastKernel() = default;
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
+ * @param[in] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input; /**< Source tensor */
+ ICLTensor *_output; /**< Destination tensor */
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_CLCASTKERNEL_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLGatherKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLGatherKernel.h
new file mode 100644
index 000000000..a51441aca
--- /dev/null
+++ b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLGatherKernel.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLGATHERKERNEL_H__
+#define __ARM_COMPUTE_CLGATHERKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the gather kernel.
+ *
+ */
+class CLGatherKernel : public ICLKernel
+{
+public:
+ /** Default constructor.*/
+ CLGatherKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLGatherKernel(const CLGatherKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLGatherKernel &operator=(const CLGatherKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLGatherKernel(CLGatherKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLGatherKernel &operator=(CLGatherKernel &&) = default;
+ /** Initialise the kernel's input, output and border mode.
+ *
+ * @param[in] input1 An input tensor. Data types supported: U8/S32/F32.
+ * @param[in] input2 An input tensor. Data types supported: S32.
+ * @param[out] output The output tensor, Data types supported: same as @p input1.
+ */
+ void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * CLGatherKernel
+ *
+ * @param[in] input1 An input tensor. Data types supported: U8/S32/F32.
+ * @param[in] input2 An input tensor. Data types supported: S32.
+ * @param[out] output The output tensor, Data types supported: same as @p input1.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input1, const ITensorInfo *input2,
+ const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input1;
+ const ICLTensor *_input2;
+ ICLTensor *_output;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLGATHERKERNEL_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPixelWiseDivisionKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPixelWiseDivisionKernel.h
new file mode 100644
index 000000000..cd2b255bc
--- /dev/null
+++ b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPixelWiseDivisionKernel.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLPIXELWISEDIVISIONKERNEL_H__
+#define __ARM_COMPUTE_CLPIXELWISEDIVISIONKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the pixelwise division kernel.
+ *
+ */
+class CLPixelWiseDivisionKernel : public ICLKernel
+{
+public:
+ /** Default constructor.*/
+ CLPixelWiseDivisionKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLPixelWiseDivisionKernel(const CLPixelWiseDivisionKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLPixelWiseDivisionKernel &operator=(const CLPixelWiseDivisionKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLPixelWiseDivisionKernel(CLPixelWiseDivisionKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLPixelWiseDivisionKernel &operator=(CLPixelWiseDivisionKernel &&) = default;
+ /** Initialise the kernel's input, output and border mode.
+ *
+ * @param[in] input1 An input tensor. Data types supported: U8/QS8/QS16/S16/F16/F32.
+ * @param[in] input2 An input tensor. Data types supported: same as @p input1.
+ * @param[out] output The output tensor, Data types supported: same as @p input1. Note:
+ * U8 (QS8, QS16) requires both inputs to be U8 (QS8, QS16).
+ * @param[in] scale Scale to apply after division.
+ * Scale must be positive and its value must be either 1/255 or 1/2^n
+ * where n is between 0 and 15. For QS8 and QS16 scale must be 1.
+ * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
+ * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest
+ * even.
+ */
+ void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float scale,
+ ConvertPolicy overflow_policy, RoundingPolicy rounding_policy);
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * CLPixelWiseDivisionKernel
+ *
+ * @param[in] input1 An input tensor info. Data types supported: U8/QS8/QS16/S16/F16/F32.
+ * @param[in] input2 An input tensor info. Data types supported: same as @p input1.
+ * @param[in] output The output tensor info, Data types supported: same as @p input1.
+ * Note: U8 (QS8, QS16) requires both inputs to be U8 (QS8, QS16).
+ * @param[in] scale Scale to apply after division.
+ * Scale must be positive and its value must be either 1/255 or 1/2^n
+ * where n is between 0 and 15. For QS8 and QS16 scale must be 1.
+ * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
+ * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input1, const ITensorInfo *input2,
+ const ITensorInfo *output, float scale, ConvertPolicy overflow_policy,
+ RoundingPolicy rounding_policy);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+ BorderSize border_size() const override;
+
+private:
+ const ICLTensor *_input1;
+ const ICLTensor *_input2;
+ ICLTensor *_output;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLPIXELWISEDIVISIONKERNEL_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLReduceMaxKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLReduceMaxKernel.h
new file mode 100644
index 000000000..a7d96cc5c
--- /dev/null
+++ b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLReduceMaxKernel.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLREDUCEMAXKERNEL_H__
+#define __ARM_COMPUTE_CLREDUCEMAXKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the pixelwise division kernel.
+ *
+ */
+class CLReduceMaxKernel : public ICLKernel
+{
+public:
+ /** Default constructor.*/
+ CLReduceMaxKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLReduceMaxKernel(const CLReduceMaxKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLReduceMaxKernel &operator=(const CLReduceMaxKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLReduceMaxKernel(CLReduceMaxKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLReduceMaxKernel &operator=(CLReduceMaxKernel &&) = default;
+ /** Initialise the kernel's input, output and border mode.
+ *
+ * @param[in] input An input tensor. Data types supported: U8/QS8/QS16/S16/F16/F32.
+ * @param[in] axis Axis to reduce
+ * @param[out] output The output tensor, Data types supported: same as @p input1. Note:
+ * U8 (QS8, QS16) requires both inputs to be U8 (QS8, QS16).
+ */
+ void configure(const ICLTensor *input, int32_t axis, ICLTensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * CLReduceMaxKernel
+ *
+ * @param[in] input An input tensor info. Data types supported: U8/QS8/QS16/S16/F16/F32.
+ * @param[in] axis Axis to reduce
+ * @param[in] output The output tensor info, Data types supported: same as @p input1.
+ * Note: U8 (QS8, QS16) requires both inputs to be U8 (QS8, QS16).
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, int32_t axis, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+ void run_on_cpu(cl::CommandQueue &queue);
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+ int32_t _axis;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLREDUCEMAXKERNEL_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLReductionMeanKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLReductionMeanKernel.h
new file mode 100644
index 000000000..de9df3381
--- /dev/null
+++ b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLReductionMeanKernel.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLREDUCTIONMEANKERNEL_H__
+#define __ARM_COMPUTE_CLREDUCTIONMEANKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the reduction operation kernel */
+class CLReductionMeanKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLReductionMeanKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLReductionMeanKernel(const CLReductionMeanKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLReductionMeanKernel &operator=(const CLReductionMeanKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLReductionMeanKernel(CLReductionMeanKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLReductionMeanKernel &operator=(CLReductionMeanKernel &&) = default;
+ /** Default destructor */
+ ~CLReductionMeanKernel() = default;
+
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Source tensor. Data types supported: F32. Data layouts supported: NCHW.
+ * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input.
+ * Output will have the same number of dimensions as input.
+ * @param[in] axis Axis along which to reduce. Supported reduction axis : 0, 1
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, std::vector<uint32_t> axis);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * CLReductionMeanKernel.
+ *
+ * @param[in] input Source tensor info. Data types supported: F32. Data layouts supported: NCHW.
+ * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p
+ * input.
+ * Output will have the same number of dimensions as input.
+ * @param[in] axis Axis along which to reduce. Supported reduction axis : 0, 1
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output,
+ std::vector<uint32_t> axis);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+ BorderSize border_size() const override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+ std::vector<uint32_t> _reduction_axis;
+ BorderSize _border_size;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLREDUCTIONMEANKERNEL_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLStridedSliceKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLStridedSliceKernel.h
new file mode 100644
index 000000000..248ae6635
--- /dev/null
+++ b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLStridedSliceKernel.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLSTRIDEDSLICEKERNEL_H__
+#define __ARM_COMPUTE_CLSTRIDEDSLICEKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the kernel to extract a strided slice of a tensor */
+class CLStridedSliceKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLStridedSliceKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLStridedSliceKernel(const CLStridedSliceKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLStridedSliceKernel &operator=(const CLStridedSliceKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLStridedSliceKernel(CLStridedSliceKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLStridedSliceKernel &operator=(CLStridedSliceKernel &&) = default;
+ /** Default destructor */
+ ~CLStridedSliceKernel() = default;
+ /** Set the input and output of the kernel
+ *
+ * @param[in] input Source tensor. Data type supported:
+ * U8/S8/QS8/QASYMM8/U16/S16/QS16/U32/S32/F16/F32
+ * @param[out] output Destination tensor. Data type supported: Same as @p input
+ * @param[in] beginData The begin tensor. Data types supported: S32.
+ * The number of dimensions must be 1.
+ * The length must be the same as the number of dimensions of input.
+ * @param[in] endData The end tensor. Data types supported: S32.
+ * The number of dimensions must be 1.
+ * The length must be the same as the number of dimensions of input.
+ * @param[in] strideData The stride tensor. Data types supported: S32.
+ * The number of dimensions must be 1.
+ * The length must be the same as the number of dimensions of input.
+ * @param[in] beginMask Mask for begin
+ * @param[in] endMask Mask for end
+ * @param[in] shrinkAxisMask Mask for shrink axis.
+ *
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, ICLTensor *beginData,
+ ICLTensor *endData, ICLTensor *stridesData, int32_t beginMask, int32_t endMask,
+ int32_t shrinkAxisMask);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * CLStridedSliceKernel
+ *
+ * @param[in] input The input tensor info. Data types supported:
+ * U8/S8/QS8/QASYMM8/U16/S16/QS16/U32/S32/F16/F32
+ * @param[in] output The output tensor info, Data types supported: same as @p input1.
+ * @param[in] begin The begin tensor info. Data types supported: S32.
+ * The number of dimensions must be 1.
+ * The length must be the same as the number of dimensions of input.
+ * @param[in] end The end tensor info. Data types supported: S32.
+ * The number of dimensions must be 1.
+ * The length must be the same as the number of dimensions of input.
+ * @param[in] stride The stride tensor info. Data types supported: S32.
+ * The number of dimensions must be 1.
+ * The length must be the same as the number of dimensions of input.
+ * @param[in] beginMask Mask for begin
+ * @param[in] endMask Mask for end
+ * @param[in] shrinkAxisMask Mask for shrink axis.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output,
+ const ITensorInfo *begin, const ITensorInfo *end,
+ const ITensorInfo *stride, int32_t beginMask, int32_t endMask,
+ int32_t shrinkAxisMask);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input; /** Source tensor */
+ ICLTensor *_output; /** Destination tensor */
+ ICLTensor *_beginData; /** Start indices of input tensor */
+ ICLTensor *_endData; /** Stop indices of input tensor */
+ ICLTensor *_stridesData; /** Strides tensor */
+ int32_t _beginMask; /** Begin mask */
+ int32_t _endMask; /** End mask */
+ int32_t _shrinkAxisMask; /** Shrink axis mask */
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLSTRIDEDSLICEKERNEL_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLTopKV2Kernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLTopKV2Kernel.h
new file mode 100644
index 000000000..5c567f38e
--- /dev/null
+++ b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLTopKV2Kernel.h
@@ -0,0 +1,301 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLTOPKV2KERNEL_H__
+#define __ARM_COMPUTE_CLTOPKV2KERNEL_H__
+
+#include "arm_compute/core/CL/ICLArray.h"
+#include "arm_compute/core/CL/ICLKernel.h"
+
+#include <array>
+
+// these parameters can be changed
+#define _ITEMS 16 // number of items in a group
+#define _GROUPS 4 // the number of virtual processors is _ITEMS * _GROUPS
+#define _HISTOSPLIT (_ITEMS * _GROUPS / 2) // number of splits of the histogram
+#define PERMUT // store the final permutation
+////////////////////////////////////////////////////////
+
+namespace arm_compute
+{
+class ICLTensor;
+
+class CLTopKV2Single : public ICLKernel
+{
+public:
+ /** Constructor */
+ CLTopKV2Single();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLTopKV2Single(const CLTopKV2Single &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLTopKV2Single &operator=(const CLTopKV2Single &) = delete;
+ /** Allow instances of this class to be moved */
+ CLTopKV2Single(CLTopKV2Single &&) = default;
+ /** Allow instances of this class to be moved */
+ CLTopKV2Single &operator=(CLTopKV2Single &&) = default;
+
+ void configure(ICLTensor *input, ICLTensor *topk_values, ICLTensor *topk_indices,
+ cl::Buffer *indices, cl::Buffer *temp_stack, int k, int n);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ ICLTensor *_input;
+ ICLTensor *_topk_values;
+ ICLTensor *_topk_indices;
+};
+
+class CLTopKV2Init : public ICLKernel
+{
+public:
+ /** Constructor */
+ CLTopKV2Init();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLTopKV2Init(const CLTopKV2Init &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLTopKV2Init &operator=(const CLTopKV2Init &) = delete;
+ /** Allow instances of this class to be moved */
+ CLTopKV2Init(CLTopKV2Init &&) = default;
+ /** Allow instances of this class to be moved */
+ CLTopKV2Init &operator=(CLTopKV2Init &&) = default;
+
+ void configure(ICLTensor *input, cl::Buffer *in_key_buf, cl::Buffer *in_ind_buf, int n);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ ICLTensor *_input;
+};
+
+class CLRadixSortHistogram : public ICLKernel
+{
+public:
+ /** Constructor */
+ CLRadixSortHistogram();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLRadixSortHistogram(const CLRadixSortHistogram &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLRadixSortHistogram &operator=(const CLRadixSortHistogram &) = delete;
+ /** Allow instances of this class to be moved */
+ CLRadixSortHistogram(CLRadixSortHistogram &&) = default;
+ /** Allow instances of this class to be moved */
+ CLRadixSortHistogram &operator=(CLRadixSortHistogram &&) = default;
+
+ void configure(cl::Buffer *hist_buf, int bits, int n);
+
+ void setPass(int pass, cl::Buffer *in_key_buf)
+ {
+ _pass = pass;
+ _in_key_buf = in_key_buf;
+ }
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ int _pass;
+ cl::Buffer *_in_key_buf;
+};
+
+class CLRadixSortScanHistogram : public ICLKernel
+{
+public:
+ /** Constructor */
+ CLRadixSortScanHistogram();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLRadixSortScanHistogram(const CLRadixSortScanHistogram &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLRadixSortScanHistogram &operator=(const CLRadixSortScanHistogram &) = delete;
+ /** Allow instances of this class to be moved */
+ CLRadixSortScanHistogram(CLRadixSortScanHistogram &&) = default;
+ /** Allow instances of this class to be moved */
+ CLRadixSortScanHistogram &operator=(CLRadixSortScanHistogram &&) = default;
+
+ void configure(cl::Buffer *hist_buf, cl::Buffer *glob_sum_buf, int bits);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+};
+
+class CLRadixSortGlobalScanHistogram : public ICLKernel
+{
+public:
+ /** Constructor */
+ CLRadixSortGlobalScanHistogram();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLRadixSortGlobalScanHistogram(const CLRadixSortGlobalScanHistogram &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLRadixSortGlobalScanHistogram &operator=(const CLRadixSortGlobalScanHistogram &) = delete;
+ /** Allow instances of this class to be moved */
+ CLRadixSortGlobalScanHistogram(CLRadixSortGlobalScanHistogram &&) = default;
+ /** Allow instances of this class to be moved */
+ CLRadixSortGlobalScanHistogram &operator=(CLRadixSortGlobalScanHistogram &&) = default;
+
+ void configure(cl::Buffer *glob_sum_buf, cl::Buffer *temp_buf, int bits);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+};
+
+class CLRadixSortPasteHistogram : public ICLKernel
+{
+public:
+ /** Constructor */
+ CLRadixSortPasteHistogram();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLRadixSortPasteHistogram(const CLRadixSortPasteHistogram &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLRadixSortPasteHistogram &operator=(const CLRadixSortPasteHistogram &) = delete;
+ /** Allow instances of this class to be moved */
+ CLRadixSortPasteHistogram(CLRadixSortPasteHistogram &&) = default;
+ /** Allow instances of this class to be moved */
+ CLRadixSortPasteHistogram &operator=(CLRadixSortPasteHistogram &&) = default;
+
+ void configure(cl::Buffer *hist_buf, cl::Buffer *glob_sum_buf, int bits);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+};
+
+class CLRadixSortReorder : public ICLKernel
+{
+public:
+ /** Constructor */
+ CLRadixSortReorder();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLRadixSortReorder(const CLRadixSortReorder &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLRadixSortReorder &operator=(const CLRadixSortReorder &) = delete;
+ /** Allow instances of this class to be moved */
+ CLRadixSortReorder(CLRadixSortReorder &&) = default;
+ /** Allow instances of this class to be moved */
+ CLRadixSortReorder &operator=(CLRadixSortReorder &&) = default;
+
+ void configure(cl::Buffer *hist_buf, int bits, int n);
+
+ void setPass(int pass, cl::Buffer *in_key_buf, cl::Buffer *out_key_buf, cl::Buffer *in_ind_buf,
+ cl::Buffer *out_ind_buf)
+ {
+ _pass = pass;
+ _in_key_buf = in_key_buf;
+ _out_key_buf = out_key_buf;
+ _in_ind_buf = in_ind_buf;
+ _out_ind_buf = out_ind_buf;
+ }
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ int _pass;
+ cl::Buffer *_in_key_buf;
+ cl::Buffer *_out_key_buf;
+ cl::Buffer *_in_ind_buf;
+ cl::Buffer *_out_ind_buf;
+};
+
+class CLTopKV2FindFirstNegative : public ICLKernel
+{
+public:
+ /** Constructor */
+ CLTopKV2FindFirstNegative();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLTopKV2FindFirstNegative(const CLTopKV2FindFirstNegative &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLTopKV2FindFirstNegative &operator=(const CLTopKV2FindFirstNegative &) = delete;
+ /** Allow instances of this class to be moved */
+ CLTopKV2FindFirstNegative(CLTopKV2FindFirstNegative &&) = default;
+ /** Allow instances of this class to be moved */
+ CLTopKV2FindFirstNegative &operator=(CLTopKV2FindFirstNegative &&) = default;
+
+ void configure(cl::Buffer *first_negative_idx_buf, int n);
+
+ void setOutputBuffer(cl::Buffer *out_key_buf) { _out_key_buf = out_key_buf; }
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ cl::Buffer *_out_key_buf;
+};
+
+class CLTopKV2ReorderNegatives : public ICLKernel
+{
+public:
+ /** Constructor */
+ CLTopKV2ReorderNegatives();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLTopKV2ReorderNegatives(const CLTopKV2ReorderNegatives &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLTopKV2ReorderNegatives &operator=(const CLTopKV2ReorderNegatives &) = delete;
+ /** Allow instances of this class to be moved */
+ CLTopKV2ReorderNegatives(CLTopKV2ReorderNegatives &&) = default;
+ /** Allow instances of this class to be moved */
+ CLTopKV2ReorderNegatives &operator=(CLTopKV2ReorderNegatives &&) = default;
+
+ void configure(cl::Buffer *first_negative_idx_buf, int n);
+
+ void setBuffers(cl::Buffer *in_key_buf, cl::Buffer *out_key_buf, cl::Buffer *in_ind_buf,
+ cl::Buffer *out_ind_buf)
+ {
+ _in_key_buf = in_key_buf;
+ _out_key_buf = out_key_buf;
+ _in_ind_buf = in_ind_buf;
+ _out_ind_buf = out_ind_buf;
+ }
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ cl::Buffer *_in_key_buf;
+ cl::Buffer *_out_key_buf;
+ cl::Buffer *_in_ind_buf;
+ cl::Buffer *_out_ind_buf;
+};
+
+class CLTopKV2Store : public ICLKernel
+{
+public:
+ /** Constructor */
+ CLTopKV2Store();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLTopKV2Store(const CLTopKV2Store &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLTopKV2Store &operator=(const CLTopKV2Store &) = delete;
+ /** Allow instances of this class to be moved */
+ CLTopKV2Store(CLTopKV2Store &&) = default;
+ /** Allow instances of this class to be moved */
+ CLTopKV2Store &operator=(CLTopKV2Store &&) = default;
+
+ void configure(ICLTensor *values, ICLTensor *indices, int k, int n);
+
+ void setOutputBuffers(cl::Buffer *out_key_buf, cl::Buffer *out_ind_buf);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ ICLTensor *_values;
+ ICLTensor *_indices;
+ cl::Buffer *_out_key_buf;
+ cl::Buffer *_out_ind_buf;
+};
+
+} // namespace arm_compute
+
+#endif // __ARM_COMPUTE_CLTOPKV2KERNEL_H__
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLCast.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLCast.h
new file mode 100644
index 000000000..63050067d
--- /dev/null
+++ b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLCast.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLCAST_H__
+#define __ARM_COMPUTE_CLCAST_H__
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLCastKernel
+ *
+ * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/F16/F32.
+ * @note The function converts the input tensor to the tensor of the output tensor's type.
+ */
+class CLCast : public ICLSimpleFunction
+{
+public:
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in, out] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
+ * The input tensor is [in, out] because its TensorInfo might be modified
+ * inside the kernel.
+ * @param[out] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
+ */
+ void configure(ICLTensor *input, ICLTensor *output);
+};
+}
+#endif /* __ARM_COMPUTE_CLCAST_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLGather.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLGather.h
new file mode 100644
index 000000000..3ae7afe14
--- /dev/null
+++ b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLGather.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLGATHER_H__
+#define __ARM_COMPUTE_CLGATHER_H__
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLGatherKernel. */
+class CLGather : public ICLSimpleFunction
+{
+public:
+ /** Initialise the kernel's inputs, output and convertion policy.
+ *
+ * @param[in] input1 An input tensor. Data types supported: U8/S32/F32.
+ * @param[in] input2 An indexes tensor. Data types supported: S32.
+ * @param[out] output The output tensor, Data types supported: same as @p input1.
+ */
+ void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLGather
+ *
+ * @param[in] input1 An input tensor. Data types supported: U8/S32/F32.
+ * @param[in] input2 An indexes tensor. Data types supported: S32.
+ * @param[out] output The output tensor, Data types supported: same as @p input1.
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input1, const ITensorInfo *input2,
+ const ITensorInfo *output);
+};
+}
+#endif /*__ARM_COMPUTE_CLGATHER_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPixelWiseDivision.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPixelWiseDivision.h
new file mode 100644
index 000000000..c1383e21f
--- /dev/null
+++ b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPixelWiseDivision.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLPIXELWISEDIVISION_H__
+#define __ARM_COMPUTE_CLPIXELWISEDIVISION_H__
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLPixelWiseDivisionKernel. */
+class CLPixelWiseDivision : public ICLSimpleFunction
+{
+public:
+ /** Initialise the kernel's inputs, output and convertion policy.
+ *
+ * @param[in, out] input1 An input tensor. Data types supported: U8/QS8/QS16/S16/F16/F32.
+ * The input tensor is [in, out] because its TensorInfo might be
+ * modified inside the kernel in case of broadcasting of dimension 0.
+ * @param[in, out] input2 An input tensor. Data types supported: same as @p input1.
+ * The input tensor is [in, out] because its TensorInfo might be
+ * modified inside the kernel in case of broadcasting of dimension 0.
+ * @param[out] output The output tensor, Data types supported: same as @p input1.
+ * Note: U8 (QS8, QS16) requires both inputs to be U8 (QS8, QS16).
+ * @param[in] scale Scale to apply after multiplication.
+ * Scale must be positive and its value must be either 1/255 or
+ * 1/2^n where n is between 0 and 15. For QS8 and QS16 scale must be 1.
+ * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
+ * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest
+ * even.
+ */
+ void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, float scale = 1.f,
+ ConvertPolicy overflow_policy = ConvertPolicy::WRAP,
+ RoundingPolicy rounding_policy = RoundingPolicy::TO_ZERO);
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * CLPixelWiseDivision
+ *
+ * @param[in] input1 An input tensor info. Data types supported: U8/QS8/QS16/S16/F16/F32.
+ * @param[in] input2 An input tensor info. Data types supported: same as @p input1.
+ * @param[in] output The output tensor info, Data types supported: same as @p input1.
+ * Note: U8 (QS8, QS16) requires both inputs to be U8 (QS8, QS16).
+ * @param[in] scale Scale to apply after multiplication.
+ * Scale must be positive and its value must be either 1/255 or 1/2^n
+ * where n is between 0 and 15. For QS8 and QS16 scale must be 1.
+ * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
+ * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input1, const ITensorInfo *input2,
+ const ITensorInfo *output, float scale = 1.f,
+ ConvertPolicy overflow_policy = ConvertPolicy::WRAP,
+ RoundingPolicy rounding_policy = RoundingPolicy::TO_ZERO);
+};
+}
+#endif /*__ARM_COMPUTE_CLPIXELWISEDIVISION_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceMax.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceMax.h
new file mode 100644
index 000000000..14b473f33
--- /dev/null
+++ b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceMax.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLREDUCE_MAX_H__
+#define __ARM_COMPUTE_CLREDUCE_MAX_H__
+
+#include "arm_compute/runtime/CL/CLArray.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to execute TopK operation. This function calls the following OpenCL kernels:
+ *
+ * -# @ref CLTopKV2Kernel
+ */
+class CLReduceMax : public IFunction
+{
+public:
+ /** Constructor */
+ CLReduceMax();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLReduceMax(const CLReduceMax &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLReduceMax &operator=(const CLReduceMax &) = delete;
+ /** Allow instances of this class to be moved */
+ CLReduceMax(CLReduceMax &&) = default;
+ /** Allow instances of this class to be moved */
+ CLReduceMax &operator=(CLReduceMax &&) = default;
+ /** Initialise the kernel's inputs and outputs.
+ *
+ * @note When locations of min and max occurrences are requested, the reported number of locations
+ * is limited to the given array size.
+ *
+ * @param[in] input Input image. Data types supported: F32
+ * @param[in] axis Axis to reduce. Data type supported: S32
+ * @param[out] output indices related to top k values. Data types supported: F32.
+ */
+ void configure(ICLTensor *input, int32_t axis, ICLTensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * CLPixelWiseDivision
+ *
+ * @param[in] input Input image. Data types supported: F32
+ * @param[in] axis Axis to reduce. Data type supported: S32
+ * @param[out] output indices related to top k values. Data types supported: F32. *
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, int32_t axis, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ void run_on_cpu();
+
+ int32_t _axis;
+
+ ICLTensor *_input;
+ ICLTensor *_output;
+
+ std::unique_ptr<ICLKernel> _kernel;
+};
+}
+#endif /*__ARM_COMPUTE_CLREDUCE_MAX_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLReductionMean.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLReductionMean.h
new file mode 100644
index 000000000..2081518c1
--- /dev/null
+++ b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLReductionMean.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ARM_COMPUTE_CLREDUCTIONMEAN_H__
+#define __ARM_COMPUTE_CLREDUCTIONMEAN_H__
+
+#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
+#include "arm_compute/core/CL/kernels/CLReductionMeanKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Perform reduction operation.
+ */
+class CLReductionMean : public IFunction
+{
+public:
+ /** Default Constructor.
+ */
+ CLReductionMean();
+
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Source tensor. Data types supported: F32. Data layouts supported: NCHW.
+ * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input.
+ * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1
+ */
+ void configure(ICLTensor *input, ICLTensor *output, std::vector<uint32_t> axis);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * CLReductionMean.
+ *
+ * @param[in] input Source tensor info. Data types supported: F32. Data layouts supported: NCHW.
+ * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p
+ * input.
+ * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output,
+ std::vector<uint32_t> axis);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ CLReductionMeanKernel _reduction_mean_kernel;
+ CLFillBorderKernel _fill_border_kernel;
+};
+}
+#endif /*__ARM_COMPUTE_CLREDUCTIONMEAN_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLStridedSlice.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLStridedSlice.h
new file mode 100644
index 000000000..f223a79be
--- /dev/null
+++ b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLStridedSlice.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLSTRIDEDSLICE_H__
+#define __ARM_COMPUTE_CLSTRIDEDSLICE_H__
+
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLStridedSliceKernel */
+class CLStridedSlice : public ICLSimpleFunction
+{
+public:
+ /** Initialise the kernel's inputs and outputs
+ *
+ * @param[in] input First tensor input. Data type supported:
+ * U8/S8/QS8/QASYMM8/U16/S16/QS16/U32/S32/F16/F32
+ * @param[out] output Output tensor. Data type supported: Same as @p input
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, ICLTensor *beginData,
+ ICLTensor *endData, ICLTensor *stridesData, int32_t beginMask, int32_t endMask,
+ int32_t shrinkAxisMask);
+};
+
+class CLStridedSliceCPU : public IFunction
+{
+public:
+ /** Initialise inputs and outputs
+ *
+ * @param[in] input First tensor input.
+ * @param[out] output Output tensor.
+ */
+ void configure(ICLTensor *input, ICLTensor *output, ICLTensor *beginData, ICLTensor *endData,
+ ICLTensor *stridesData, int32_t beginMask, int32_t endMask,
+ int32_t shrinkAxisMask);
+
+ void run() override;
+
+private:
+ void run_on_cpu();
+
+ ICLTensor *_input;
+ ICLTensor *_output;
+ ICLTensor *_beginData;
+ ICLTensor *_endData;
+ ICLTensor *_stridesData;
+ int32_t _beginMask;
+ int32_t _endMask;
+ int32_t _shrinkAxisMask;
+};
+}
+#endif /*__ARM_COMPUTE_CLSTRIDEDSLICE_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h
new file mode 100644
index 000000000..06cd1ee9b
--- /dev/null
+++ b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLTOPK_V2_H__
+#define __ARM_COMPUTE_CLTOPK_V2_H__
+
+#include "arm_compute/core/CL/kernels/CLTopKV2Kernel.h"
+
+#include "arm_compute/runtime/CL/CLArray.h"
+#include "arm_compute/runtime/IFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to execute TopK operation. This function calls the following OpenCL kernels:
+ *
+ * -# @ref CLTopKV2Kernel
+ */
+class CLTopKV2 : public IFunction
+{
+public:
+ /** Constructor */
+ CLTopKV2();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLTopKV2(const CLTopKV2 &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLTopKV2 &operator=(const CLTopKV2 &) = delete;
+ /** Allow instances of this class to be moved */
+ CLTopKV2(CLTopKV2 &&) = default;
+ /** Allow instances of this class to be moved */
+ CLTopKV2 &operator=(CLTopKV2 &&) = default;
+ /** Initialise the kernel's inputs and outputs.
+ *
+ * @note When locations of min and max occurrences are requested, the reported number of locations
+ * is limited to the given array size.
+ *
+ * @param[in] input Input image. Data types supported: U8/S16/F32.
+ * @param[in] k The value of `k`.
+ * @param[out] values Top k values. Data types supported: S32 if input type is U8/S16, F32 if
+ * input type is F32.
+ * @param[out] indices indices related to top k values. Data types supported: S32 if input type
+ * is U8/S16, F32 if input type is F32.
+ */
+ void configure(ICLTensor *input, int k, ICLTensor *values, ICLTensor *indices,
+ int total_bits = 32, int bits = 4);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ void run_on_cpu();
+ void run_on_gpu();
+ void run_on_gpu_single_quicksort();
+
+ uint32_t _k;
+ uint32_t _total_bits;
+ uint32_t _bits;
+ uint32_t _radix;
+ uint32_t _hist_buf_size;
+ uint32_t _glob_sum_buf_size;
+ uint32_t _n;
+
+ ICLTensor *_input;
+ ICLTensor *_values;
+ ICLTensor *_indices;
+
+ cl::Buffer _qs_idx_buf;
+ cl::Buffer _qs_temp_buf;
+ cl::Buffer _hist_buf;
+ cl::Buffer _glob_sum_buf;
+ cl::Buffer _temp_buf;
+ cl::Buffer _first_negative_idx_buf;
+ cl::Buffer _in_key_buf;
+ cl::Buffer _out_key_buf;
+ cl::Buffer _in_ind_buf;
+ cl::Buffer _out_ind_buf;
+
+ cl::Buffer *_p_in_key_buf;
+ cl::Buffer *_p_out_key_buf;
+ cl::Buffer *_p_in_ind_buf;
+ cl::Buffer *_p_out_ind_buf;
+
+ CLTopKV2Single _qs_kernel;
+ CLTopKV2Init _init_kernel;
+ CLRadixSortHistogram _hist_kernel;
+ CLRadixSortScanHistogram _scan_hist_kernel;
+ CLRadixSortGlobalScanHistogram _glob_scan_hist_kernel;
+ CLRadixSortPasteHistogram _paste_hist_kernel;
+ CLRadixSortReorder _reorder_kernel;
+ CLTopKV2FindFirstNegative _find_first_negative_kernel;
+ CLTopKV2ReorderNegatives _reorder_negatives_kernel;
+ CLTopKV2Store _store_kernel;
+};
+}
+#endif // __ARM_COMPUTE_CLTOPK_V2_H__