summaryrefslogtreecommitdiff
path: root/runtimes/libs/ARMComputeEx/arm_compute/core/CL
diff options
context:
space:
mode:
Diffstat (limited to 'runtimes/libs/ARMComputeEx/arm_compute/core/CL')
-rw-r--r--runtimes/libs/ARMComputeEx/arm_compute/core/CL/CLKernelLibraryEx.h245
-rw-r--r--runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLArgOperationKernel.h101
-rw-r--r--runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h62
-rw-r--r--runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLCastKernel.h96
-rw-r--r--runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h58
-rw-r--r--runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h113
-rw-r--r--runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLGatherExKernel.h109
-rw-r--r--runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLHashtableLookupKernel.h129
-rw-r--r--runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLNegKernel.h55
-rw-r--r--runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPReLUKernel.h59
-rw-r--r--runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLReduceOperationKernel.h104
-rw-r--r--runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToBatchNDKernel.h69
-rw-r--r--runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h58
-rw-r--r--runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLTopKV2Kernel.h653
-rw-r--r--runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h85
15 files changed, 1996 insertions, 0 deletions
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/core/CL/CLKernelLibraryEx.h b/runtimes/libs/ARMComputeEx/arm_compute/core/CL/CLKernelLibraryEx.h
new file mode 100644
index 000000000..e4e752ef9
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/core/CL/CLKernelLibraryEx.h
@@ -0,0 +1,245 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file CLKernelLibraryEx.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file is a cloned version of CLKernelLibrary.h in ACL. This file defines
+ * an interface for CLKernelLibrary.cpp which adds more OpenCL kernels on top of ACL.
+ */
+
+#ifndef __ARM_COMPUTE_CLKERNELLIBRARY_EX_H__
+#define __ARM_COMPUTE_CLKERNELLIBRARY_EX_H__
+
+#include "arm_compute/core/CL/OpenCL.h"
+
+#include <map>
+#include <set>
+#include <string>
+#include <utility>
+
+namespace arm_compute
+{
+
+/**
+ * @brief Class to build OpenCL kernels added from nnfw
+ * */
+class CLKernelLibraryEx
+{
+ using StringSet = std::set<std::string>;
+
+private:
+ /**
+ * @brief Construct a new CLKernelLibraryEx object
+ */
+ CLKernelLibraryEx();
+
+public:
+ /**
+ * @brief Prevent instances of this class from being copied.
+ */
+ CLKernelLibraryEx(const CLKernelLibraryEx &) = delete;
+
+ /**
+ * @brief Prevent instances of this class from being copied.
+ */
+ const CLKernelLibraryEx &operator=(const CLKernelLibraryEx &) = delete;
+
+ /**
+ * @brief Get the KernelLibrary singleton.
+ * @return The KernelLibrary instance
+ */
+ static CLKernelLibraryEx &get();
+
+ /**
+ * @brief Initialise the kernel library.
+ * @param[in] kernel_path Path of the directory from which kernel sources are loaded.
+ * @param[in] context CL context used to create programs.
+ * @param[in] device CL device for which the programs are created.
+ * @return N/A
+ */
+ void init(std::string kernel_path, cl::Context context, cl::Device device)
+ {
+ _kernel_path = std::move(kernel_path);
+ _context = std::move(context);
+ _device = std::move(device);
+ }
+
+ /**
+ * @brief Set the path that the kernels reside in.
+ * @param[in] kernel_path Path of the directory from which kernel sources are loaded.
+ * @return N/A
+ */
+ void set_kernel_path(const std::string &kernel_path) { _kernel_path = kernel_path; };
+
+ /**
+ * @brief Get the path that the kernels reside in.
+ * @return the path of kernel files
+ */
+ std::string get_kernel_path() { return _kernel_path; };
+
+ /**
+ * @brief Get the source of the selected program.
+ * @param[in] program_name Program name.
+ * @return Source of the selected program.
+ */
+ std::string get_program_source(const std::string &program_name);
+
+ /**
+ * @brief Set the CL context used to create programs.
+ * @note Setting the context also resets the device to the
+ * first one available in the new context.
+ * @param[in] context A CL context.
+ * @return N/A
+ */
+ void set_context(cl::Context context)
+ {
+ _context = std::move(context);
+ if (_context.get() == nullptr)
+ {
+ _device = cl::Device();
+ }
+ else
+ {
+ const auto cl_devices = _context.getInfo<CL_CONTEXT_DEVICES>();
+
+ if (cl_devices.empty())
+ {
+ _device = cl::Device();
+ }
+ else
+ {
+ _device = cl_devices[0];
+ }
+ }
+ }
+
+ /**
+ * @brief Return associated CL context.
+ * @return A CL context.
+ */
+ cl::Context &context() { return _context; }
+
+ /**
+ * @brief Set the CL device for which the programs are created.
+ * @param[in] device A CL device.
+ * @return N/A
+ */
+ void set_device(cl::Device device) { _device = std::move(device); }
+
+ /**
+ * @brief Gets the CL device for which the programs are created.
+ * @return A CL device.
+ */
+ cl::Device &get_device() { return _device; }
+
+ /**
+ * @brief Return the device version
+ * @return The content of CL_DEVICE_VERSION
+ */
+ std::string get_device_version();
+
+ /**
+ * @brief Create a kernel from the kernel library.
+ * @param[in] kernel_name Kernel name.
+ * @param[in] build_options_set Kernel build options as a set.
+ * @return The created kernel.
+ */
+ Kernel create_kernel(const std::string &kernel_name,
+ const StringSet &build_options_set = {}) const;
+
+ /**
+ * @brief Find the maximum number of local work items in a workgroup can be supported for the
+ * kernel.
+ * @param[in] kernel kernel object
+ */
+
+ size_t max_local_workgroup_size(const cl::Kernel &kernel) const;
+ /**
+ * @brief Return the default NDRange for the device.
+ * @return default NDRangeof the device
+ */
+ cl::NDRange default_ndrange() const;
+
+ /**
+ * @brief Clear the library's cache of binary programs
+ * @return N/A
+ */
+ void clear_programs_cache()
+ {
+ _programs_map.clear();
+ _built_programs_map.clear();
+ }
+
+ /**
+ * @brief Access the cache of built OpenCL programs
+ * @return program map data structure of which key is name of kernel and value is
+ * kerel source name. (*.cl)
+ */
+ const std::map<std::string, cl::Program> &get_built_programs() const
+ {
+ return _built_programs_map;
+ }
+
+ /**
+ * @brief Add a new built program to the cache
+ * @param[in] built_program_name Name of the program
+ * @param[in] program Built program to add to the cache
+ * @return N/A
+ */
+ void add_built_program(const std::string &built_program_name, cl::Program program);
+
+ /**
+ * @brief Returns true if FP16 is supported by the CL device
+ * @return true if the CL device supports FP16
+ */
+ bool fp16_supported() const;
+
+ /**
+ * @brief Returns true if int64_base_atomics extension is supported by the CL device
+ * @return true if the CL device supports int64_base_atomics extension
+ */
+ bool int64_base_atomics_supported() const;
+
+private:
+ /**
+ * @brief Load program and its dependencies.
+ * @param[in] program_name Name of the program to load.
+ */
+ const Program &load_program(const std::string &program_name) const;
+ /**
+ * @brief Concatenates contents of a set into a single string.
+ * @param[in] s Input set to concatenate.
+ * @return Concatenated string.
+ */
+ std::string stringify_set(const StringSet &s) const;
+
+ cl::Context _context; /**< Underlying CL context. */
+ cl::Device _device; /**< Underlying CL device. */
+ std::string _kernel_path; /**< Path to the kernels folder. */
+ mutable std::map<std::string, const Program>
+ _programs_map; /**< Map with all already loaded program data. */
+ mutable std::map<std::string, cl::Program>
+ _built_programs_map; /**< Map with all already built program data. */
+ static const std::map<std::string, std::string>
+ _kernel_program_map; /**< Map that associates kernel names with programs. */
+ static const std::map<std::string, std::string>
+ _program_source_map; /**< Contains sources for all programs.
+ Used for compile-time kernel inclusion. >*/
+};
+}
+#endif /* __ARM_COMPUTE_CLKERNELLIBRARY_EX_H__ */
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLArgOperationKernel.h b/runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLArgOperationKernel.h
new file mode 100644
index 000000000..b98b174f7
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLArgOperationKernel.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file CLArgOperationKernel.h
+ * @brief This file defines CLArgOperationKernel
+ * @ingroup COM_AI_RUNTIME
+ */
+
+#ifndef __ARM_COMPUTE_CLARGOPERATIONKERNEL_H__
+#define __ARM_COMPUTE_CLARGOPERATIONKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+#include "arm_compute/core/TypesEx.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/**
+ * @brief Class to define interface for the argop kernel.
+ */
+class CLArgOperationKernel : public ICLKernel
+{
+public:
+ /**
+ * @brief Default constructor.
+ */
+ CLArgOperationKernel();
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers).
+ * @param [in] copiedInstance Const reference of CLArgOperationKernel to be copied
+ */
+ CLArgOperationKernel(const CLArgOperationKernel &) = delete;
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers).
+ * @param [in] copiedInstance Const reference of CLArgOperationKernel to be copied
+ * @return Reference of this instance
+ */
+ CLArgOperationKernel &operator=(const CLArgOperationKernel &) = delete;
+ /**
+ * @brief Allow instances of this class to be moved
+ * @param [in] movedInstance Rvalue reference of CLArgOperationKernel to be moved
+ */
+ CLArgOperationKernel(CLArgOperationKernel &&) = default;
+ /**
+ * @brief Allow instances of this class to be moved
+ * @param [in] movedInstance Rvalue reference of CLArgOperationKernel to be moved
+ * @return Reference of this instance
+ */
+ CLArgOperationKernel &operator=(CLArgOperationKernel &&) = default;
+ /**
+ * @brief Initialise the kernel's input, output and border mode.
+ * @param[in] input An input tensor. Data types supported: U8/QASYMM8/S32/F32.
+ * @param[out] output The output tensor, Data types supported: S32.
+ * @param[in] axis Axis along which to reduce. It must be sorted and no duplicates.
+ * @param[in] op Arg operation to perform.
+ * return N/A
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const uint32_t axis, ArgOperation op);
+ /**
+ * @brief Static function to check if given info will lead to a valid configuration of @ref
+ * CLArgOperationKernel
+ * @param[in] input An input tensor info. Data types supported: U8/QASYMM8/S32/F32.
+ * @param[in] output The output tensor info, Data types supported: S32.
+ * @param[in] axis Axis along which to reduce. It must be sorted and no duplicates.
+ * @param[in] op Arg operation to perform.
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const uint32_t axis,
+ ArgOperation op);
+
+ /*
+ * @brief Run CLArgOperationKernel op
+ * @param[in] window Window to be used for in_slice
+ * @param[in] queue cl::CommandQueue
+ * @return N/A
+ */
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+ uint32_t _axis;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLARGOPERATIONKERNEL_H__ */
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h b/runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h
new file mode 100644
index 000000000..ab33d9d3a
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLBINARYLOGICALOPKERNEL_H__
+#define __ARM_COMPUTE_CLBINARYLOGICALOPKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+#include "arm_compute/core/TypesEx.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel to return truth values of two input tensors for Binary Logical Op*/
+class CLBinaryLogicalOpKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLBinaryLogicalOpKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLBinaryLogicalOpKernel(const CLBinaryLogicalOpKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLBinaryLogicalOpKernel &operator=(const CLBinaryLogicalOpKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLBinaryLogicalOpKernel(CLBinaryLogicalOpKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLBinaryLogicalOpKernel &operator=(CLBinaryLogicalOpKernel &&) = default;
+ /** Initialize the kernel's input, output.
+ *
+ * @param[in] input1 Source tensor1.
+ * @param[in] input2 Source tensor2.
+ * @param[out] output Output tensor.
+ */
+ void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output,
+ BinaryLogicalOperation op);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+ BorderSize border_size() const override;
+
+private:
+ const ICLTensor *_input1;
+ const ICLTensor *_input2;
+ ICLTensor *_output;
+};
+
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLBINARYLOGICALOPKERNEL_H__ */
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLCastKernel.h b/runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLCastKernel.h
new file mode 100644
index 000000000..4c2feb903
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLCastKernel.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file CLCastKernel.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines CLCastKernel class
+ */
+
+#ifndef __ARM_COMPUTE_CLCASTKERNEL_H__
+#define __ARM_COMPUTE_CLCASTKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/**
+ * @brief Class to define OpenCL kernel for cast operation
+ */
+class CLCastKernel : public ICLKernel
+{
+public:
+ /**
+ * @brief Construct CLCastKernel object
+ */
+ CLCastKernel();
+
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers)
+ */
+ CLCastKernel(const CLCastKernel &) = delete;
+
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers)
+ */
+ CLCastKernel &operator=(const CLCastKernel &) = delete;
+
+ /**
+ * @brief Construct CLCastKernel object using default move constructor
+ * @param[in] CLCastKernel object to move
+ */
+ CLCastKernel(CLCastKernel &&) = default;
+
+ /**
+ * @brief Allow instances of this class to be moved
+ * @param[in] CLCastKernel object to move
+ */
+ CLCastKernel &operator=(CLCastKernel &&) = default;
+
+ /**
+ * @brief Destruct this CLCastKernel object
+ */
+ ~CLCastKernel() = default;
+
+ /**
+ * @brief Initialise the kernel's input and output.
+ * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
+ * @param[in] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
+ * @return N/A
+ */
+ void configure(const ICLTensor *input, ICLTensor *output);
+
+ /**
+ * @brief Enqueue the OpenCL kernel to process the given window on the passed OpenCL command
+ * queue.
+ * @note The queue is *not* flushed by this method, and therefore the kernel will not have
+ * been executed by the time this method returns.
+ * @param[in] window Region on which to execute the kernel. (Must be a valid region of
+ * the window returned by window()).
+ * @param[in,out] queue Command queue on which to enqueue the kernel.@return N/A
+ * @return N/A
+ */
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input; /**< Source tensor */
+ ICLTensor *_output; /**< Destination tensor */
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_CLCASTKERNEL_H__ */
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h b/runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h
new file mode 100644
index 000000000..60ec7a82a
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLDEPTHTOSPACEKERNEL_H__
+#define __ARM_COMPUTE_CLDEPTHTOSPACEKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel to perform depthTospace operation */
+class CLDepthToSpaceKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLDepthToSpaceKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLDepthToSpaceKernel(const CLDepthToSpaceKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLDepthToSpaceKernel &operator=(const CLDepthToSpaceKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLDepthToSpaceKernel(CLDepthToSpaceKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLDepthToSpaceKernel &operator=(CLDepthToSpaceKernel &&) = default;
+ /** Default destructor */
+ ~CLDepthToSpaceKernel() = default;
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
+ * @param[in] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const int32_t block_size);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input; /**< Source tensor */
+ ICLTensor *_output; /**< Destination tensor */
+};
+
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_CLDEPTHTOSPACEKERNEL_H__ */
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h b/runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h
new file mode 100644
index 000000000..da075db69
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file CLEmbeddingLookupKernel.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines CLEmbeddingLookupKernel class
+ */
+
+#ifndef __ARM_COMPUTE_CLEMBEDDINGLOOKUPKERNEL_H__
+#define __ARM_COMPUTE_CLEMBEDDINGLOOKUPKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/**
+* @brief Class to perform EmbeddingLookup operation with opencl kernel
+*/
+class CLEmbeddingLookupKernel : public ICLKernel
+{
+public:
+ /**
+ * @brief Construct a CLEmbeddingLookupKernel object
+ * */
+ CLEmbeddingLookupKernel();
+
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers)
+ * */
+ CLEmbeddingLookupKernel(const CLEmbeddingLookupKernel &) = delete;
+
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers)
+ * */
+ CLEmbeddingLookupKernel &operator=(const CLEmbeddingLookupKernel &) = delete;
+
+ /**
+ * @brief Construct a CLEmbeddingLookupKernel object by using default move constructor
+ * @param[in] CLEmbeddingLookupKernel object to move
+ * */
+ CLEmbeddingLookupKernel(CLEmbeddingLookupKernel &&) = default;
+
+ /**
+ * @brief Move assignment operator
+ * @param[in] CLEmbeddingLookupKernel object to move
+ * */
+ CLEmbeddingLookupKernel &operator=(CLEmbeddingLookupKernel &&) = default;
+
+ /**
+ * @brief Destruct this object
+ * */
+ ~CLEmbeddingLookupKernel() = default;
+
+ /**
+ * @brief Set the input and output of the kernel
+ * @param[in] input Source tensor.
+ * Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+ * @param[out] output Destination tensor. Data type supported: Same as @p input
+ * @param[in] lookups Lookups are 1D tensor that values are indices into the first
+ * dimension of input.
+ * Data types supported: S32.
+ * @return N/A
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *lookups);
+
+ /**
+ * @brief Static function to check if given info will lead to a valid configuration of @ref
+ * CLEmbeddingLookupKernel
+ * @param[in] input The input tensor info.
+ * Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+ * @param[in] output The output tensor info, Data types supported: same as @p input1.
+ * @param[in] lookups Lookups info. Data types supported: S32.
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output,
+ const ITensorInfo *lookups);
+
+ /**
+ * @brief Enqueue the OpenCL kernel to process the given window on the passed OpenCL command
+ * queue.
+ * @note The queue is *not* flushed by this method, and therefore the kernel will not have
+ * been executed by the time this method returns.
+ * @param[in] window Region on which to execute the kernel. (Must be a valid region of
+ * the window returned by window()).
+ * @param[in,out] queue Command queue on which to enqueue the kernel.@return N/A
+ * @return N/A
+ */
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input; /** Source tensor */
+ ICLTensor *_output; /** Destination tensor */
+ const ICLTensor *_lookups; /** Lookups tensor */
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLEMBEDDINGLOOKUPKERNEL_H__ */
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLGatherExKernel.h b/runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLGatherExKernel.h
new file mode 100644
index 000000000..aa81a1efa
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLGatherExKernel.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file CLGatherExKernel.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines CLGatherExKernel class
+ */
+
+#ifndef __ARM_COMPUTE_CLGATHEREXKERNEL_H__
+#define __ARM_COMPUTE_CLGATHEREXKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/**
+ * @brief Class to define an interface for the gather kernel.
+ */
+class CLGatherExKernel : public ICLKernel
+{
+public:
+ /**
+ * @brief Construct CLGatherExKernel object
+ * */
+ CLGatherExKernel();
+
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers).
+ */
+ CLGatherExKernel(const CLGatherExKernel &) = delete;
+
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers).
+ */
+ CLGatherExKernel &operator=(const CLGatherExKernel &) = delete;
+
+ /**
+ * @brief Construct CLGatherExKernel object by using default move constructor
+ * @param[in] CLGatherExKernel object to move
+ */
+ CLGatherExKernel(CLGatherExKernel &&) = default;
+
+ /**
+ * @brief Move assignment operator
+ * @param[in] CLGatherExKernel object to move
+ */
+ CLGatherExKernel &operator=(CLGatherExKernel &&) = default;
+
+ /**
+ * @brief Initialise the kernel's input, output and border mode.
+ * @param[in] input An input tensor. Data types supported: U8/QASYMM8/S32/F32.
+ * @param[in] indices Indices tensor. Data types supported: S32.
+ * @param[out] output The output tensor, Data types supported: same as @p input1.
+ * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative
+ * values wrap around. Defaults to 0
+ * @return N/A
+ */
+ void configure(const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis = 0);
+
+ /**
+ * @brief Static function to check if given info will lead to a valid configuration of @ref
+ * CLGatherExKernel
+ * @param[in] input An input tensor. Data types supported: U8/QASYMM8/S32/F32.
+ * @param[in] indices Indices tensor. Data types supported: S32.
+ * @param[out] output The output tensor, Data types supported: same as @p input1.
+ * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative
+ * values wrap around. Defaults to 0
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *indices,
+ const ITensorInfo *output, int axis = 0);
+
+ /**
+ * @brief Enqueue the OpenCL kernel to process the given window on the passed OpenCL command
+ * queue.
+ * @note The queue is *not* flushed by this method, and therefore the kernel will not have
+ * been executed by the time this method returns.
+ * @param[in] window Region on which to execute the kernel. (Must be a valid region of
+ * the window returned by window()).
+ * @param[in,out] queue Command queue on which to enqueue the kernel.@return N/A
+ * @return N/A
+ */
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ const ICLTensor *_indices;
+ ICLTensor *_output;
+ int _axis;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLGATHEREXKERNEL_H__ */
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLHashtableLookupKernel.h b/runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLHashtableLookupKernel.h
new file mode 100644
index 000000000..8269e5a7a
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLHashtableLookupKernel.h
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file CLHashtableLookupKernel.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines CLHashtableLookupKernel class
+ */
+
+#ifndef __ARM_COMPUTE_CLHASHTABLELOOKUPKERNEL_H__
+#define __ARM_COMPUTE_CLHASHTABLELOOKUPKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/**
+* @brief Class to perform HashtableLookup operation with opencl kernel
+*/
+class CLHashtableLookupKernel : public ICLKernel
+{
+public:
+ /**
+ * @brief Construct a CLHashtableLookupKernel object
+ * */
+ CLHashtableLookupKernel();
+
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers)
+ * */
+ CLHashtableLookupKernel(const CLHashtableLookupKernel &) = delete;
+
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers)
+ * */
+ CLHashtableLookupKernel &operator=(const CLHashtableLookupKernel &) = delete;
+
+ /**
+ * @brief Construct a CLHashtableLookupKernel object by using default move constructor
+ * @param[in] CLHashtableLookupKernel object to move
+ * */
+ CLHashtableLookupKernel(CLHashtableLookupKernel &&) = default;
+
+ /**
+ * @brief Move assignment operator
+ * @param[in] CLHashtableLookupKernel object to move
+ * */
+ CLHashtableLookupKernel &operator=(CLHashtableLookupKernel &&) = default;
+
+ /**
+ * @brief Destruct this object
+ * */
+ ~CLHashtableLookupKernel() = default;
+
+ /**
+ * @brief Set the input and output of the kernel
+ * @param[in] lookups Lookups 1D tensor that values are indices into the first dimension of
+ * input.
+ * @param[in] keys Keys 1D tensor. keys and input pair represent a map.
+ * Data types supported: S32
+ * @param[in] input Source tensor.
+ * Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+ * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p
+ * input.
+ * @param[out] hits Hits 1D tensor. A boolean tensor that indicates whether the lookup hits
+ * (True) or not (False). Data types supported: U8/QASYMM8
+ * @return N/A
+ */
+ void configure(const ICLTensor *lookups, const ICLTensor *keys, const ICLTensor *input,
+ ICLTensor *output, ICLTensor *hits);
+
+ /**
+ * @brief Static function to check if given info will lead to a valid configuration of @ref
+ * CLHashtableLookupKernel
+ * @param[in] lookups The lookups tensor info. Data types supported: S32.
+ * @param[in] keys The keys tensor info. keys and input pair represent a map.
+ * Data types supported: S32
+ * @param[in] input The input tensor info.
+ * Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+ * @param[out] output The output tensor. Data types and data layouts supported: Same as @p
+ * input.
+ * @param[out] hits The hits tensor info. A boolean tensor that indicates whether the lookup
+ * hits
+ * (True) or not (False). Data types supported: U8/QASYMM8
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *lookups, const ITensorInfo *keys,
+ const ITensorInfo *input, const ITensorInfo *output,
+ const ITensorInfo *hits);
+
+ /**
+ * @brief Enqueue the OpenCL kernel to process the given window on the passed OpenCL command
+ * queue.
+ * @note The queue is *not* flushed by this method, and therefore the kernel will not have
+ * been executed by the time this method returns.
+ * @param[in] window Region on which to execute the kernel. (Must be a valid region of
+ * the window returned by window()).
+ * @param[in,out] queue Command queue on which to enqueue the kernel.@return N/A
+ * @return N/A
+ */
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_lookups{nullptr}; /** Lookups tensor */
+ const ICLTensor *_keys{nullptr}; /** Keys tensor */
+ const ICLTensor *_input{nullptr}; /** Source tensor */
+ ICLTensor *_output{nullptr}; /** Destination tensor */
+ ICLTensor *_hits{nullptr}; /** Hits tensor */
+ std::unique_ptr<CLTensor> _lookup_indices{nullptr}; /** Lookup indices tensor */
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLHASHTABLELOOKUPKERNEL_H__ */
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLNegKernel.h b/runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLNegKernel.h
new file mode 100644
index 000000000..ccbea147e
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLNegKernel.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLNEGKERNEL_H__
+#define __ARM_COMPUTE_CLNEGKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel to perform a negation operation on tensor*/
+class CLNegKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLNegKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLNegKernel(const CLNegKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLNegKernel &operator=(const CLNegKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLNegKernel(CLNegKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLNegKernel &operator=(CLNegKernel &&) = default;
+ /** Initialize the kernel's input, output.
+ *
+ * @param[in] input Source tensor.
+ * @param[out] output Destination tensor.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLNEGKERNEL_H__ */
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPReLUKernel.h b/runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPReLUKernel.h
new file mode 100644
index 000000000..eff1b8bd5
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPReLUKernel.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLPRELU_KERNEL_H__
+#define __ARM_COMPUTE_CLPRELU_KERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel to calculate PReLU*/
+class CLPReLUKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLPReLUKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLPReLUKernel(const CLPReLUKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLPReLUKernel &operator=(const CLPReLUKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLPReLUKernel(CLPReLUKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLPReLUKernel &operator=(CLPReLUKernel &&) = default;
+ /** Initialize the kernel's input, output.
+ *
+ * @param[in] input Source tensor1.
+ * @param[in] alpha Source tensor2.
+ * @param[out] output Output tensor.
+ */
+ void configure(const ICLTensor *input, const ICLTensor *alpha, ICLTensor *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+ BorderSize border_size() const override;
+
+private:
+ const ICLTensor *_input;
+ const ICLTensor *_alpha;
+ ICLTensor *_output;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLPRELU_KERNEL_H__ */
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLReduceOperationKernel.h b/runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLReduceOperationKernel.h
new file mode 100644
index 000000000..a26a4a7fc
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLReduceOperationKernel.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file CLReduceOperationKernel.h
+ * @brief This file defines CLReduceOperationKernel class
+ * @ingroup COM_AI_RUNTIME
+ */
+
+#ifndef __ARM_COMPUTE_CLREDUCEOPERATIONKERNEL_H__
+#define __ARM_COMPUTE_CLREDUCEOPERATIONKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+#include "arm_compute/core/TypesEx.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/**
+ * @brief Class to define interface for the reduce operation kernel
+ */
+class CLReduceOperationKernel : public ICLKernel
+{
+public:
+ /**
+ * @brief Default constructor
+ */
+ CLReduceOperationKernel();
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers)
+ */
+ CLReduceOperationKernel(const CLReduceOperationKernel &) = delete;
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers)
+ */
+ CLReduceOperationKernel &operator=(const CLReduceOperationKernel &) = delete;
+ /**
+ * @brief Allow instances of this class to be moved
+ */
+ CLReduceOperationKernel(CLReduceOperationKernel &&) = default;
+ /**
+ * @brief Allow instances of this class to be moved
+ */
+ CLReduceOperationKernel &operator=(CLReduceOperationKernel &&) = default;
+ /**
+ * @brief Default destructor
+ */
+ ~CLReduceOperationKernel() = default;
+
+ /**
+ * @brief Set the input and output tensors.
+ * @param[in] input Source tensor. Data types supported: U8/S32/F32.
+ * @param[out] output Destination tensor. Data types supported: Same as @p input.
+ * Output will have the same number of dimensions as input.
+ * @param[in] axis Axis along which to reduce.
+ * @param[in] op Reduce operation to perform.
+ * @return N/A
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const uint32_t axis,
+ ReduceOperation op);
+
+ /**
+ * @brief Static function to check if given info will lead to a valid configuration of @ref
+ * CLReduceOperationKernel.
+ * @param[in] input Source tensor info. Data types supported: U8/S32/F32.
+ * @param[in] output Destination tensor info. Data types supported: Same as @p input.
+ * Output will have the same number of dimensions as input.
+ * @param[in] axis Axis along which to reduce.
+ * @param[in] op Reduce operation to perform.
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const uint32_t axis,
+ ReduceOperation op);
+
+ /*
+ * @brief Run CLReduceOperationKernel op
+ * @param[in] window Window to be used for in_slice
+ * @param[in] queue CLQueue
+ * @return N/A
+ */
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+ uint32_t _axis;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLREDUCEOPERATIONKERNEL_H__ */
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToBatchNDKernel.h b/runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToBatchNDKernel.h
new file mode 100644
index 000000000..577e38cc4
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToBatchNDKernel.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLSPACE_TO_BATCH_ND_KERNEL_H__
+#define __ARM_COMPUTE_CLSPACE_TO_BATCH_ND_KERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel to perform SPACE_TO_BATCH_ND operation */
+class CLSpaceToBatchNDKernel final : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLSpaceToBatchNDKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLSpaceToBatchNDKernel(const CLSpaceToBatchNDKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLSpaceToBatchNDKernel &operator=(const CLSpaceToBatchNDKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLSpaceToBatchNDKernel(CLSpaceToBatchNDKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLSpaceToBatchNDKernel &operator=(CLSpaceToBatchNDKernel &&) = default;
+ /** Default destructor */
+ ~CLSpaceToBatchNDKernel() = default;
+ /** Initialise the kernel's input and output.
+ *
+ * @note The data layout of input and output must be the same.
+ * @note The number of dimensions of input and output must be 4, and `spatial` dimensions
+ * are height and width.
+ * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/F16/S32/F32.
+ * Data layout supported: NCHW/NHWC
+ * @param[in] block_size Block size tensor. Data types supported: S32.
+ * @param[in] padding_size Padding size tensor. Data types supported: S32.
+ * @param[out] output Output tensor. Data types supported: U8/QASYMM8/S16/F16/S32/F32.
+ * Data layout supported: NCHW/NHWC
+ */
+ void configure(const ICLTensor *input, const ICLTensor *block_size, const ICLTensor *padding_size,
+ ICLTensor *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input{nullptr}; /**< Source tensor */
+ const ICLTensor *_block_size{nullptr}; /**< Block size tensor */
+ const ICLTensor *_padding_size{nullptr}; /**< Padding size tensor */
+ ICLTensor *_output{nullptr}; /**< Destination tensor */
+};
+
+} // namespace arm_compute
+
+#endif /* __ARM_COMPUTE_CLSPACE_TO_BATCH_ND_KERNEL_H__ */
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h b/runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h
new file mode 100644
index 000000000..be845a549
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLSPACETODEPTHKERNEL_H__
+#define __ARM_COMPUTE_CLSPACETODEPTHKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel to perform spaceTodepth operation */
+class CLSpaceToDepthKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLSpaceToDepthKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLSpaceToDepthKernel(const CLSpaceToDepthKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLSpaceToDepthKernel &operator=(const CLSpaceToDepthKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLSpaceToDepthKernel(CLSpaceToDepthKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLSpaceToDepthKernel &operator=(CLSpaceToDepthKernel &&) = default;
+ /** Default destructor */
+ ~CLSpaceToDepthKernel() = default;
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
+ * @param[in] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const int32_t block_size);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input; /**< Source tensor */
+ ICLTensor *_output; /**< Destination tensor */
+};
+
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_CLSPACETODEPTHKERNEL_H__ */
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLTopKV2Kernel.h b/runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLTopKV2Kernel.h
new file mode 100644
index 000000000..eb2bad254
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLTopKV2Kernel.h
@@ -0,0 +1,653 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file CLTopKV2Kernel.h
+ * @brief This file defines classes for TopKV2Kernel
+ * @ingroup COM_AI_RUNTIME
+ */
+
+#ifndef __ARM_COMPUTE_CLTOPKV2KERNEL_H__
+#define __ARM_COMPUTE_CLTOPKV2KERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+// these parameters can be changed
+#define _ITEMS 16 // number of items in a group
+#define _GROUPS 4 // the number of virtual processors is _ITEMS * _GROUPS
+#define _HISTOSPLIT (_ITEMS * _GROUPS / 2) // number of splits of the histogram
+#define PERMUT // store the final permutation
+////////////////////////////////////////////////////////
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/**
+ * @brief Class to define CLTopKV2Single
+ */
+class CLTopKV2Single : public ICLKernel
+{
+public:
+ /**
+ * @brief Constructor
+ */
+ CLTopKV2Single();
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers).
+ * @param [in] copiedInstance Const reference of CLTopKV2Single to be copied
+ */
+ CLTopKV2Single(const CLTopKV2Single &) = delete;
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers).
+ * @param [in] copiedInstance Const reference of CLTopKV2Single to be copied
+ * @return Reference of this instance
+ */
+ CLTopKV2Single &operator=(const CLTopKV2Single &) = delete;
+ /**
+ * @brief Allow instances of this class to be moved
+ * @param [in] movedInstance Rvalue reference of CLTopKV2Single to be moved
+ */
+ CLTopKV2Single(CLTopKV2Single &&) = default;
+ /**
+ * @brief Allow instances of this class to be moved
+ * @param [in] movedInstance Rvalue reference of CLTopKV2Single to be moved
+ * @return Reference of this instance
+ */
+ CLTopKV2Single &operator=(CLTopKV2Single &&) = default;
+
+ /**
+ * @brief Initialise kernel with params
+ * @param[in] input An input tensor
+ * @param[in] topk_values Values of the top k predictions
+ * @param[in] topk_indices Indices of the top k predictions
+ * @param[in] indices Indices
+ * @param[in] temp_stack Temp stack
+ * @param[in] k K of the top k predictions
+ * @param[in] n Number times to quick-sort
+ * return N/A
+ */
+ void configure(ICLTensor *input, ICLTensor *topk_values, ICLTensor *topk_indices,
+ cl::Buffer *indices, cl::Buffer *temp_stack, int k, int n);
+
+ /*
+ * @brief Run CLTopKV2Single op
+ * @param[in] window Window to be used for in_slice
+ * @param[in] queue cl::CommandQueue
+ * @return N/A
+ */
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ ICLTensor *_input;
+ ICLTensor *_topk_values;
+ ICLTensor *_topk_indices;
+};
+
+/**
+ * @brief Class to define CLTopKV2Init
+ */
+class CLTopKV2Init : public ICLKernel
+{
+public:
+ /**
+ * @brief Constructor
+ */
+ CLTopKV2Init();
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers).
+ * @param [in] copiedInstance Const reference of CLTopKV2Init to be copied
+ */
+ CLTopKV2Init(const CLTopKV2Init &) = delete;
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers).
+ * @param [in] copiedInstance Const reference of CLTopKV2Init to be copied
+ * @return Reference of this instance
+ */
+ CLTopKV2Init &operator=(const CLTopKV2Init &) = delete;
+ /**
+ * @brief Allow instances of this class to be moved
+ * @param [in] movedInstance Rvalue reference of CLTopKV2Init to be moved
+ */
+ CLTopKV2Init(CLTopKV2Init &&) = default;
+ /**
+ * @brief Allow instances of this class to be moved
+ * @param [in] movedInstance Rvalue reference of CLTopKV2Init to be moved
+ * @return Reference of this instance
+ */
+ CLTopKV2Init &operator=(CLTopKV2Init &&) = default;
+
+ /**
+ * @brief Initialise kernel with params
+ * @param[in] input An input tensor
+ * @param[in] in_key_buf Buffer of input key
+ * @param[in] in_ind_buf Buffer of input index
+ * @param[in] n Number times to quick-sort
+ * return N/A
+ */
+ void configure(ICLTensor *input, cl::Buffer *in_key_buf, cl::Buffer *in_ind_buf, int n);
+
+ /*
+ * @brief Run CLTopKV2Init op
+ * @param[in] window Window to be used for in_slice
+ * @param[in] queue cl::CommandQueue
+ * @return N/A
+ */
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ ICLTensor *_input;
+};
+
+/**
+ * @brief Class to define CLRadixSortHistogram
+ */
+class CLRadixSortHistogram : public ICLKernel
+{
+public:
+ /**
+ * @brief Constructor
+ */
+ CLRadixSortHistogram();
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers).
+ * @param [in] copiedInstance Const reference of CLRadixSortHistogram to be copied
+ */
+ CLRadixSortHistogram(const CLRadixSortHistogram &) = delete;
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers).
+ * @param [in] copiedInstance Const reference of CLRadixSortHistogram to be copied
+ * @return Reference of this instance
+ */
+ CLRadixSortHistogram &operator=(const CLRadixSortHistogram &) = delete;
+ /**
+ * @brief Allow instances of this class to be moved
+ * @param [in] movedInstance Rvalue reference of CLRadixSortHistogram to be moved
+ */
+ CLRadixSortHistogram(CLRadixSortHistogram &&) = default;
+ /**
+ * @brief Allow instances of this class to be moved
+ * @param [in] movedInstance Rvalue reference of CLRadixSortHistogram to be moved
+ * @return Reference of this instance
+ */
+ CLRadixSortHistogram &operator=(CLRadixSortHistogram &&) = default;
+
+ /**
+ * @brief Initialise kernel with params
+ * @param[out] hist_buf Buffer of histogram
+ * @param[in] bits Number of bits to be used for radix sort
+ * @param[in] n Integer number size to sort
+ * return N/A
+ */
+ void configure(cl::Buffer *hist_buf, int bits, int n);
+
+ /**
+ * @brief Set pass
+ * @param[in] pass Passes made of in radix sort algorithm
+ * @param[in] in_key_buf Buffer of input key
+ * return N/A
+ */
+ void setPass(int pass, cl::Buffer *in_key_buf)
+ {
+ _pass = pass;
+ _in_key_buf = in_key_buf;
+ }
+
+ /*
+ * @brief Run CLRadixSortHistogram op
+ * @param[in] window Window to be used for in_slice
+ * @param[in] queue cl::CommandQueue
+ * @return N/A
+ */
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ int _pass;
+ cl::Buffer *_in_key_buf;
+};
+
+/**
+ * @brief Class to define CLRadixSortScanHistogram
+ */
+class CLRadixSortScanHistogram : public ICLKernel
+{
+public:
+ /**
+ * @brief Constructor
+ */
+ CLRadixSortScanHistogram();
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers).
+ * @param [in] copiedInstance Const reference of CLRadixSortScanHistogram to be copied
+ */
+ CLRadixSortScanHistogram(const CLRadixSortScanHistogram &) = delete;
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers).
+ * @param [in] copiedInstance Const reference of CLRadixSortScanHistogram to be copied
+ * @return Reference of this instance
+ */
+ CLRadixSortScanHistogram &operator=(const CLRadixSortScanHistogram &) = delete;
+ /**
+ * @brief Allow instances of this class to be moved
+ * @param [in] movedInstance Rvalue reference of CLRadixSortScanHistogram to be moved
+ */
+ CLRadixSortScanHistogram(CLRadixSortScanHistogram &&) = default;
+ /**
+ * @brief Allow instances of this class to be moved
+ * @param [in] movedInstance Rvalue reference of CLRadixSortScanHistogram to be moved
+ * @return Reference of this instance
+ */
+ CLRadixSortScanHistogram &operator=(CLRadixSortScanHistogram &&) = default;
+
+ /**
+ * @brief Initialise kernel with params
+ * @param[out] hist_buf Buffer of histogram
+ * @param[out] glob_sum_buf Buffer of global sum
+ * @param[in] bits Number of bits to be used for radix sort
+ * return N/A
+ */
+ void configure(cl::Buffer *hist_buf, cl::Buffer *glob_sum_buf, int bits);
+
+ /*
+ * @brief Run CLRadixSortScanHistogram op
+ * @param[in] window Window to be used for in_slice
+ * @param[in] queue cl::CommandQueue
+ * @return N/A
+ */
+ void run(const Window &window, cl::CommandQueue &queue) override;
+};
+
+/**
+ * @brief Class to define CLRadixSortGlobalScanHistogram
+ */
+class CLRadixSortGlobalScanHistogram : public ICLKernel
+{
+public:
+ /**
+ * @brief Constructor
+ */
+ CLRadixSortGlobalScanHistogram();
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers).
+ * @param [in] copiedInstance Const reference of CLRadixSortGlobalScanHistogram to be copied
+ */
+ CLRadixSortGlobalScanHistogram(const CLRadixSortGlobalScanHistogram &) = delete;
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers).
+ * @param [in] copiedInstance Const reference of CLRadixSortGlobalScanHistogram to be copied
+ * @return Reference of this instance
+ */
+ CLRadixSortGlobalScanHistogram &operator=(const CLRadixSortGlobalScanHistogram &) = delete;
+ /**
+ * @brief Allow instances of this class to be moved
+ * @param [in] movedInstance Rvalue reference of CLRadixSortGlobalScanHistogram to be moved
+ */
+ CLRadixSortGlobalScanHistogram(CLRadixSortGlobalScanHistogram &&) = default;
+ /**
+ * @brief Allow instances of this class to be moved
+ * @param [in] movedInstance Rvalue reference of CLRadixSortGlobalScanHistogram to be moved
+ * @return Reference of this instance
+ */
+ CLRadixSortGlobalScanHistogram &operator=(CLRadixSortGlobalScanHistogram &&) = default;
+
+ /**
+ * @brief Initialise kernel with params
+ * @param[out] glob_sum_buf Buffer of global sum
+ * @param[out] temp_buf Temp buffer to be used while RadixSortGlobalScanHistogram
+ * @param[in] bits Number of bits to be used for radix sort
+ * return N/A
+ */
+ void configure(cl::Buffer *glob_sum_buf, cl::Buffer *temp_buf, int bits);
+
+ /*
+ * @brief Run CLRadixSortGlobalScanHistogram op
+ * @param[in] window Window to be used for in_slice
+ * @param[in] queue cl::CommandQueue
+ * @return N/A
+ */
+ void run(const Window &window, cl::CommandQueue &queue) override;
+};
+
+/**
+ * @brief Class to define CLRadixSortPasteHistogram
+ */
+class CLRadixSortPasteHistogram : public ICLKernel
+{
+public:
+ /**
+ * @brief Constructor
+ */
+ CLRadixSortPasteHistogram();
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers).
+ * @param [in] copiedInstance Const reference of CLRadixSortPasteHistogram to be copied
+ */
+ CLRadixSortPasteHistogram(const CLRadixSortPasteHistogram &) = delete;
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers).
+ * @param [in] copiedInstance Const reference of CLRadixSortPasteHistogram to be copied
+ * @return Reference of this instance
+ */
+ CLRadixSortPasteHistogram &operator=(const CLRadixSortPasteHistogram &) = delete;
+ /**
+ * @brief Allow instances of this class to be moved
+ * @param [in] movedInstance Rvalue reference of CLRadixSortPasteHistogram to be moved
+ */
+ CLRadixSortPasteHistogram(CLRadixSortPasteHistogram &&) = default;
+ /**
+ * @brief Allow instances of this class to be moved
+ * @param [in] movedInstance Rvalue reference of CLRadixSortPasteHistogram to be moved
+ * @return Reference of this instance
+ */
+ CLRadixSortPasteHistogram &operator=(CLRadixSortPasteHistogram &&) = default;
+
+ /**
+ * @brief Initialise kernel with params
+ * @param[out] hist_buf Buffer of histogram
+ * @param[out] glob_sum_buf Buffer of global sum
+ * @param[in] bits Number of bits to be used for radix sort
+ * return N/A
+ */
+ void configure(cl::Buffer *hist_buf, cl::Buffer *glob_sum_buf, int bits);
+
+ /*
+ * @brief Run CLRadixSortPasteHistogram op
+ * @param[in] window Window to be used for in_slice
+ * @param[in] queue cl::CommandQueue
+ * @return N/A
+ */
+ void run(const Window &window, cl::CommandQueue &queue) override;
+};
+
+/**
+ * @brief Class to define CLRadixSortReorder
+ */
+class CLRadixSortReorder : public ICLKernel
+{
+public:
+ /**
+ * @brief Constructor
+ */
+ CLRadixSortReorder();
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers).
+ * @param [in] copiedInstance Const reference of CLRadixSortReorder to be copied
+ */
+ CLRadixSortReorder(const CLRadixSortReorder &) = delete;
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers).
+ * @param [in] copiedInstance Const reference of CLRadixSortReorder to be copied
+ * @return Reference of this instance
+ */
+ CLRadixSortReorder &operator=(const CLRadixSortReorder &) = delete;
+ /**
+ * @brief Allow instances of this class to be moved
+ * @param [in] movedInstance Rvalue reference of CLRadixSortReorder to be moved
+ */
+ CLRadixSortReorder(CLRadixSortReorder &&) = default;
+ /**
+ * @brief Allow instances of this class to be moved
+ * @param [in] movedInstance Rvalue reference of CLRadixSortReorder to be moved
+ * @return Reference of this instance
+ */
+ CLRadixSortReorder &operator=(CLRadixSortReorder &&) = default;
+
+ /**
+ * @brief Initialise kernel with params
+ * @param[out] hist_buf Buffer of histogram
+ * @param[in] bits Number of bits to be used for radix sort
+ * @param[in] n Integer number size to sort
+ * return N/A
+ */
+ void configure(cl::Buffer *hist_buf, int bits, int n);
+
+ /**
+ * @brief Set pass
+ * @param[in] pass Passes made of in radix sort algorithm
+ * @param[in] in_key_buf Buffer of input key
+ * @param[out] out_key_buf Buffer of output key
+ * @param[in] in_ind_buf Buffer of input index
+ * @param[out] out_ind_buf Buffer of output index
+ * return N/A
+ */
+ void setPass(int pass, cl::Buffer *in_key_buf, cl::Buffer *out_key_buf, cl::Buffer *in_ind_buf,
+ cl::Buffer *out_ind_buf)
+ {
+ _pass = pass;
+ _in_key_buf = in_key_buf;
+ _out_key_buf = out_key_buf;
+ _in_ind_buf = in_ind_buf;
+ _out_ind_buf = out_ind_buf;
+ }
+ /*
+ * @brief Run CLRadixSortReorder op
+ * @param[in] window Window to be used for in_slice
+ * @param[in] queue cl::CommandQueue
+ * @return N/A
+ */
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ int _pass;
+ cl::Buffer *_in_key_buf;
+ cl::Buffer *_out_key_buf;
+ cl::Buffer *_in_ind_buf;
+ cl::Buffer *_out_ind_buf;
+};
+
+/**
+ * @brief Class to define CLTopKV2FindFirstNegative
+ */
+class CLTopKV2FindFirstNegative : public ICLKernel
+{
+public:
+ /**
+ * @brief Constructor
+ */
+ CLTopKV2FindFirstNegative();
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers).
+ * @param [in] copiedInstance Const reference of CLTopKV2FindFirstNegative to be copied
+ */
+ CLTopKV2FindFirstNegative(const CLTopKV2FindFirstNegative &) = delete;
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers).
+ * @param [in] copiedInstance Const reference of CLTopKV2FindFirstNegative to be copied
+ * @return Reference of this instance
+ */
+ CLTopKV2FindFirstNegative &operator=(const CLTopKV2FindFirstNegative &) = delete;
+ /**
+ * @brief Allow instances of this class to be moved
+ * @param [in] movedInstance Rvalue reference of CLTopKV2FindFirstNegative to be moved
+ */
+ CLTopKV2FindFirstNegative(CLTopKV2FindFirstNegative &&) = default;
+ /**
+ * @brief Allow instances of this class to be moved
+ * @param [in] movedInstance Rvalue reference of CLTopKV2FindFirstNegative to be moved
+ * @return Reference of this instance
+ */
+ CLTopKV2FindFirstNegative &operator=(CLTopKV2FindFirstNegative &&) = default;
+
+ /**
+ * @brief Initialise kernel with params
+ * @param[out] first_negative_idx_buf Buffer of the first negative index
+ * @param[in] n Number times to find
+ * return N/A
+ */
+ void configure(cl::Buffer *first_negative_idx_buf, int n);
+
+ /**
+ * @brief Set output buffer
+ * @param[out] out_key_buf Buffer of output key
+ * return N/A
+ */
+ void setOutputBuffer(cl::Buffer *out_key_buf) { _out_key_buf = out_key_buf; }
+
+ /*
+ * @brief Run CLTopKV2FindFirstNegative op
+ * @param[in] window Window to be used for in_slice
+ * @param[in] queue cl::CommandQueue
+ * @return N/A
+ */
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ cl::Buffer *_out_key_buf;
+};
+
+/**
+ * @brief Class to define CLTopKV2ReorderNegatives
+ */
+class CLTopKV2ReorderNegatives : public ICLKernel
+{
+public:
+ /**
+ * @brief Constructor
+ */
+ CLTopKV2ReorderNegatives();
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers).
+ * @param [in] copiedInstance Const reference of CLTopKV2ReorderNegatives to be copied
+ */
+ CLTopKV2ReorderNegatives(const CLTopKV2ReorderNegatives &) = delete;
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers).
+ * @param [in] copiedInstance Const reference of CLTopKV2ReorderNegatives to be copied
+ * @return Reference of this instance
+ */
+ CLTopKV2ReorderNegatives &operator=(const CLTopKV2ReorderNegatives &) = delete;
+ /**
+ * @brief Allow instances of this class to be moved
+ * @param [in] movedInstance Rvalue reference of CLTopKV2ReorderNegatives to be moved
+ */
+ CLTopKV2ReorderNegatives(CLTopKV2ReorderNegatives &&) = default;
+ /**
+ * @brief Allow instances of this class to be moved
+ * @param [in] movedInstance Rvalue reference of CLTopKV2ReorderNegatives to be moved
+ * @return Reference of this instance
+ */
+ CLTopKV2ReorderNegatives &operator=(CLTopKV2ReorderNegatives &&) = default;
+
+ /**
+ * @brief Initialise kernel with params
+ * @param[out] first_negative_idx_buf Buffer of the first negative index
+ * @param[in] n Number times to find
+ * return N/A
+ */
+ void configure(cl::Buffer *first_negative_idx_buf, int n);
+
+ /**
+ * @brief Set buffers
+ * @param[in] in_key_buf Buffer of input key
+ * @param[out] out_key_buf Buffer of output key
+ * @param[in] in_ind_buf Buffer of input index
+ * @param[out] out_ind_buf Buffer of output index
+ * return N/A
+ */
+ void setBuffers(cl::Buffer *in_key_buf, cl::Buffer *out_key_buf, cl::Buffer *in_ind_buf,
+ cl::Buffer *out_ind_buf)
+ {
+ _in_key_buf = in_key_buf;
+ _out_key_buf = out_key_buf;
+ _in_ind_buf = in_ind_buf;
+ _out_ind_buf = out_ind_buf;
+ }
+
+ /*
+ * @brief Run CLTopKV2ReorderNegatives op
+ * @param[in] window Window to be used for in_slice
+ * @param[in] queue cl::CommandQueue
+ * @return N/A
+ */
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ cl::Buffer *_in_key_buf;
+ cl::Buffer *_out_key_buf;
+ cl::Buffer *_in_ind_buf;
+ cl::Buffer *_out_ind_buf;
+};
+
+/**
+ * @brief Class to define CLTopKV2Store
+ */
+class CLTopKV2Store : public ICLKernel
+{
+public:
+ /**
+ * @brief Constructor
+ */
+ CLTopKV2Store();
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers).
+ * @param [in] copiedInstance Const reference of CLTopKV2Store to be copied
+ */
+ CLTopKV2Store(const CLTopKV2Store &) = delete;
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers).
+ * @param [in] copiedInstance Const reference of CLTopKV2Store to be copied
+ * @return Reference of this instance
+ */
+ CLTopKV2Store &operator=(const CLTopKV2Store &) = delete;
+ /**
+ * @brief Allow instances of this class to be moved
+ * @param [in] movedInstance Rvalue reference of CLTopKV2Store to be moved
+ */
+ CLTopKV2Store(CLTopKV2Store &&) = default;
+ /**
+ * @brief Allow instances of this class to be moved
+ * @param [in] movedInstance Rvalue reference of CLTopKV2Store to be moved
+ * @return Reference of this instance
+ */
+ CLTopKV2Store &operator=(CLTopKV2Store &&) = default;
+
+ /**
+ * @brief Initialise kernel with params
+ * @param[out] values Values tensor to store
+ * @param[out] indices Indices tensor to be used for store
+ * @param[in] k K of the top k predictions
+ * @param[in] n Number times to store
+ * return N/A
+ */
+ void configure(ICLTensor *values, ICLTensor *indices, int k, int n);
+
+ /**
+ * @brief Set buffers
+ * @param[out] out_key_buf Buffer of output key
+ * @param[out] out_ind_buf Buffer of output index
+ * return N/A
+ */
+ void setOutputBuffers(cl::Buffer *out_key_buf, cl::Buffer *out_ind_buf);
+
+ /*
+ * @brief Run CLTopKV2Store op
+ * @param[in] window Window to be used for in_slice
+ * @param[in] queue cl::CommandQueue
+ * @return N/A
+ */
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ ICLTensor *_values;
+ ICLTensor *_indices;
+ cl::Buffer *_out_key_buf;
+ cl::Buffer *_out_ind_buf;
+};
+
+} // namespace arm_compute
+
+#endif // __ARM_COMPUTE_CLTOPKV2KERNEL_H__
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h b/runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h
new file mode 100644
index 000000000..c5ef730b6
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLEKERNEL_H__
+#define __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLEKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the Upsampling layer kernel for transpose convolution on OpenCL.
+ */
+class CLTransposeConvLayerUpsampleKernel : public ICLKernel
+{
+public:
+ /** Constructor */
+ CLTransposeConvLayerUpsampleKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLTransposeConvLayerUpsampleKernel(const CLTransposeConvLayerUpsampleKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLTransposeConvLayerUpsampleKernel &
+ operator=(const CLTransposeConvLayerUpsampleKernel &) = delete;
+ /** Default Move Constructor. */
+ CLTransposeConvLayerUpsampleKernel(CLTransposeConvLayerUpsampleKernel &&) = default;
+ /** Default move assignment operator */
+ CLTransposeConvLayerUpsampleKernel &operator=(CLTransposeConvLayerUpsampleKernel &&) = default;
+ /** Default destructor */
+ ~CLTransposeConvLayerUpsampleKernel() = default;
+
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32.
+ * @param[out] output Destination tensor. Data types supported: same as @p input. All but
+ * the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only
+ * performed within the XY-plane.
+ * @param[in] inner_border Top and right inner border sizes. These rows and columns will be
+ * filled with zero.
+ * @param[in] info Contains padding and stride information described in @ref
+ * PadStrideInfo.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const BorderSize &inner_border,
+ const PadStrideInfo &info);
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * CLTransposeConvLayerUpsample
+ *
+ * @param[in] input Source tensor info. Data types supported: QASYMM8/F16/F32.
+ * @param[in] output Destination tensor info. Data types supported: same as @p input. All
+ * but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is
+ * only performed within the XY-plane.
+ * @param[in] inner_border Top and right inner border sizes. These rows and columns will be filled
+ * with zero.
+ * @param[in] info Contains padding and stride information described in @ref
+ * PadStrideInfo.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output,
+ const BorderSize &inner_border, const PadStrideInfo &info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+ BorderSize _inner_border;
+ PadStrideInfo _info;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLEKERNEL_H__ */