summaryrefslogtreecommitdiff
path: root/compute/ARMComputeEx/arm_compute/core
diff options
context:
space:
mode:
Diffstat (limited to 'compute/ARMComputeEx/arm_compute/core')
-rw-r--r--compute/ARMComputeEx/arm_compute/core/CL/CLKernelLibraryEx.h245
-rw-r--r--compute/ARMComputeEx/arm_compute/core/CL/kernels/CLArgOperationKernel.h101
-rw-r--r--compute/ARMComputeEx/arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h62
-rw-r--r--compute/ARMComputeEx/arm_compute/core/CL/kernels/CLCastKernel.h98
-rw-r--r--compute/ARMComputeEx/arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h58
-rw-r--r--compute/ARMComputeEx/arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h113
-rw-r--r--compute/ARMComputeEx/arm_compute/core/CL/kernels/CLGatherExKernel.h109
-rw-r--r--compute/ARMComputeEx/arm_compute/core/CL/kernels/CLHashtableLookupKernel.h129
-rw-r--r--compute/ARMComputeEx/arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.h100
-rw-r--r--compute/ARMComputeEx/arm_compute/core/CL/kernels/CLNegKernel.h55
-rw-r--r--compute/ARMComputeEx/arm_compute/core/CL/kernels/CLPReLUKernel.h59
-rw-r--r--compute/ARMComputeEx/arm_compute/core/CL/kernels/CLReduceOperationKernel.h104
-rw-r--r--compute/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToBatchNDKernel.h69
-rw-r--r--compute/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h58
-rw-r--r--compute/ARMComputeEx/arm_compute/core/CL/kernels/CLTopKV2Kernel.h657
-rw-r--r--compute/ARMComputeEx/arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h85
-rw-r--r--compute/ARMComputeEx/arm_compute/core/CPP/kernels/CPPUpsampleKernelEx.h72
-rw-r--r--compute/ARMComputeEx/arm_compute/core/NEON/NEElementwiseOperationFuncs.h69
-rw-r--r--compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEBinaryLogicalOperationKernel.h70
-rw-r--r--compute/ARMComputeEx/arm_compute/core/NEON/kernels/NECastKernel.h80
-rw-r--r--compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernelEx.h80
-rw-r--r--compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEElementwiseUnaryKernelEx.h102
-rw-r--r--compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEEmbeddingLookupKernel.h79
-rw-r--r--compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEGatherKernelEx.h118
-rw-r--r--compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEHashtableLookupKernel.h96
-rw-r--r--compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.h115
-rw-r--r--compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEMuliplyScaleFactorKernel.h83
-rw-r--r--compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEPReLUKernel.h84
-rw-r--r--compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEQuantizationSymmetricKernel.h82
-rw-r--r--compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEReductionOperationKernelEx.h92
-rw-r--r--compute/ARMComputeEx/arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernelEx.h81
-rw-r--r--compute/ARMComputeEx/arm_compute/core/TypesEx.h64
-rw-r--r--compute/ARMComputeEx/arm_compute/core/UtilsEx.h47
-rw-r--r--compute/ARMComputeEx/arm_compute/core/utils/misc/ShapeCalculatorEx.h222
34 files changed, 3738 insertions, 0 deletions
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/CLKernelLibraryEx.h b/compute/ARMComputeEx/arm_compute/core/CL/CLKernelLibraryEx.h
new file mode 100644
index 000000000..e4e752ef9
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/CL/CLKernelLibraryEx.h
@@ -0,0 +1,245 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file CLKernelLibraryEx.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file is a cloned version of CLKernelLibrary.h in ACL. This file defines
+ * an interface for CLKernelLibrary.cpp which adds more OpenCL kernels on top of ACL.
+ */
+
+#ifndef __ARM_COMPUTE_CLKERNELLIBRARY_EX_H__
+#define __ARM_COMPUTE_CLKERNELLIBRARY_EX_H__
+
+#include "arm_compute/core/CL/OpenCL.h"
+
+#include <map>
+#include <set>
+#include <string>
+#include <utility>
+
+namespace arm_compute
+{
+
+/**
+ * @brief Class to build OpenCL kernels added from nnfw
+ * */
+class CLKernelLibraryEx
+{
+ using StringSet = std::set<std::string>;
+
+private:
+ /**
+ * @brief Construct a new CLKernelLibraryEx object
+ */
+ CLKernelLibraryEx();
+
+public:
+ /**
+ * @brief Prevent instances of this class from being copied.
+ */
+ CLKernelLibraryEx(const CLKernelLibraryEx &) = delete;
+
+ /**
+ * @brief Prevent instances of this class from being copied.
+ */
+ const CLKernelLibraryEx &operator=(const CLKernelLibraryEx &) = delete;
+
+ /**
+ * @brief Get the KernelLibrary singleton.
+ * @return The KernelLibrary instance
+ */
+ static CLKernelLibraryEx &get();
+
+ /**
+ * @brief Initialise the kernel library.
+ * @param[in] kernel_path Path of the directory from which kernel sources are loaded.
+ * @param[in] context CL context used to create programs.
+ * @param[in] device CL device for which the programs are created.
+ * @return N/A
+ */
+ void init(std::string kernel_path, cl::Context context, cl::Device device)
+ {
+ _kernel_path = std::move(kernel_path);
+ _context = std::move(context);
+ _device = std::move(device);
+ }
+
+ /**
+ * @brief Set the path that the kernels reside in.
+ * @param[in] kernel_path Path of the directory from which kernel sources are loaded.
+ * @return N/A
+ */
+ void set_kernel_path(const std::string &kernel_path) { _kernel_path = kernel_path; };
+
+ /**
+ * @brief Get the path that the kernels reside in.
+ * @return the path of kernel files
+ */
+ std::string get_kernel_path() { return _kernel_path; };
+
+ /**
+ * @brief Get the source of the selected program.
+ * @param[in] program_name Program name.
+ * @return Source of the selected program.
+ */
+ std::string get_program_source(const std::string &program_name);
+
+ /**
+ * @brief Set the CL context used to create programs.
+ * @note Setting the context also resets the device to the
+ * first one available in the new context.
+ * @param[in] context A CL context.
+ * @return N/A
+ */
+ void set_context(cl::Context context)
+ {
+ _context = std::move(context);
+ if (_context.get() == nullptr)
+ {
+ _device = cl::Device();
+ }
+ else
+ {
+ const auto cl_devices = _context.getInfo<CL_CONTEXT_DEVICES>();
+
+ if (cl_devices.empty())
+ {
+ _device = cl::Device();
+ }
+ else
+ {
+ _device = cl_devices[0];
+ }
+ }
+ }
+
+ /**
+ * @brief Return associated CL context.
+ * @return A CL context.
+ */
+ cl::Context &context() { return _context; }
+
+ /**
+ * @brief Set the CL device for which the programs are created.
+ * @param[in] device A CL device.
+ * @return N/A
+ */
+ void set_device(cl::Device device) { _device = std::move(device); }
+
+ /**
+ * @brief Gets the CL device for which the programs are created.
+ * @return A CL device.
+ */
+ cl::Device &get_device() { return _device; }
+
+ /**
+ * @brief Return the device version
+ * @return The content of CL_DEVICE_VERSION
+ */
+ std::string get_device_version();
+
+ /**
+ * @brief Create a kernel from the kernel library.
+ * @param[in] kernel_name Kernel name.
+ * @param[in] build_options_set Kernel build options as a set.
+ * @return The created kernel.
+ */
+ Kernel create_kernel(const std::string &kernel_name,
+ const StringSet &build_options_set = {}) const;
+
+ /**
+ * @brief Find the maximum number of local work items in a workgroup can be supported for the
+ * kernel.
+ * @param[in] kernel kernel object
+ */
+
+ size_t max_local_workgroup_size(const cl::Kernel &kernel) const;
+ /**
+ * @brief Return the default NDRange for the device.
+ * @return default NDRangeof the device
+ */
+ cl::NDRange default_ndrange() const;
+
+ /**
+ * @brief Clear the library's cache of binary programs
+ * @return N/A
+ */
+ void clear_programs_cache()
+ {
+ _programs_map.clear();
+ _built_programs_map.clear();
+ }
+
+ /**
+ * @brief Access the cache of built OpenCL programs
+ * @return program map data structure of which key is name of kernel and value is
+ * kerel source name. (*.cl)
+ */
+ const std::map<std::string, cl::Program> &get_built_programs() const
+ {
+ return _built_programs_map;
+ }
+
+ /**
+ * @brief Add a new built program to the cache
+ * @param[in] built_program_name Name of the program
+ * @param[in] program Built program to add to the cache
+ * @return N/A
+ */
+ void add_built_program(const std::string &built_program_name, cl::Program program);
+
+ /**
+ * @brief Returns true if FP16 is supported by the CL device
+ * @return true if the CL device supports FP16
+ */
+ bool fp16_supported() const;
+
+ /**
+ * @brief Returns true if int64_base_atomics extension is supported by the CL device
+ * @return true if the CL device supports int64_base_atomics extension
+ */
+ bool int64_base_atomics_supported() const;
+
+private:
+ /**
+ * @brief Load program and its dependencies.
+ * @param[in] program_name Name of the program to load.
+ */
+ const Program &load_program(const std::string &program_name) const;
+ /**
+ * @brief Concatenates contents of a set into a single string.
+ * @param[in] s Input set to concatenate.
+ * @return Concatenated string.
+ */
+ std::string stringify_set(const StringSet &s) const;
+
+ cl::Context _context; /**< Underlying CL context. */
+ cl::Device _device; /**< Underlying CL device. */
+ std::string _kernel_path; /**< Path to the kernels folder. */
+ mutable std::map<std::string, const Program>
+ _programs_map; /**< Map with all already loaded program data. */
+ mutable std::map<std::string, cl::Program>
+ _built_programs_map; /**< Map with all already built program data. */
+ static const std::map<std::string, std::string>
+ _kernel_program_map; /**< Map that associates kernel names with programs. */
+ static const std::map<std::string, std::string>
+ _program_source_map; /**< Contains sources for all programs.
+ Used for compile-time kernel inclusion. >*/
+};
+}
+#endif /* __ARM_COMPUTE_CLKERNELLIBRARY_EX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLArgOperationKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLArgOperationKernel.h
new file mode 100644
index 000000000..b98b174f7
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLArgOperationKernel.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file CLArgOperationKernel.h
+ * @brief This file defines CLArgOperationKernel
+ * @ingroup COM_AI_RUNTIME
+ */
+
+#ifndef __ARM_COMPUTE_CLARGOPERATIONKERNEL_H__
+#define __ARM_COMPUTE_CLARGOPERATIONKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+#include "arm_compute/core/TypesEx.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/**
+ * @brief Class to define interface for the argop kernel.
+ */
+class CLArgOperationKernel : public ICLKernel
+{
+public:
+ /**
+ * @brief Default constructor.
+ */
+ CLArgOperationKernel();
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers).
+ * @param [in] copiedInstance Const reference of CLArgOperationKernel to be copied
+ */
+ CLArgOperationKernel(const CLArgOperationKernel &) = delete;
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers).
+ * @param [in] copiedInstance Const reference of CLArgOperationKernel to be copied
+ * @return Reference of this instance
+ */
+ CLArgOperationKernel &operator=(const CLArgOperationKernel &) = delete;
+ /**
+ * @brief Allow instances of this class to be moved
+ * @param [in] movedInstance Rvalue reference of CLArgOperationKernel to be moved
+ */
+ CLArgOperationKernel(CLArgOperationKernel &&) = default;
+ /**
+ * @brief Allow instances of this class to be moved
+ * @param [in] movedInstance Rvalue reference of CLArgOperationKernel to be moved
+ * @return Reference of this instance
+ */
+ CLArgOperationKernel &operator=(CLArgOperationKernel &&) = default;
+ /**
+ * @brief Initialise the kernel's input, output and border mode.
+ * @param[in] input An input tensor. Data types supported: U8/QASYMM8/S32/F32.
+ * @param[out] output The output tensor, Data types supported: S32.
+ * @param[in] axis Axis along which to reduce. It must be sorted and no duplicates.
+ * @param[in] op Arg operation to perform.
+ * return N/A
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const uint32_t axis, ArgOperation op);
+ /**
+ * @brief Static function to check if given info will lead to a valid configuration of @ref
+ * CLArgOperationKernel
+ * @param[in] input An input tensor info. Data types supported: U8/QASYMM8/S32/F32.
+ * @param[in] output The output tensor info, Data types supported: S32.
+ * @param[in] axis Axis along which to reduce. It must be sorted and no duplicates.
+ * @param[in] op Arg operation to perform.
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const uint32_t axis,
+ ArgOperation op);
+
+ /*
+ * @brief Run CLArgOperationKernel op
+ * @param[in] window Window to be used for in_slice
+ * @param[in] queue cl::CommandQueue
+ * @return N/A
+ */
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+ uint32_t _axis;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLARGOPERATIONKERNEL_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h
new file mode 100644
index 000000000..ab33d9d3a
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLBINARYLOGICALOPKERNEL_H__
+#define __ARM_COMPUTE_CLBINARYLOGICALOPKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+#include "arm_compute/core/TypesEx.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel to return truth values of two input tensors for Binary Logical Op*/
+class CLBinaryLogicalOpKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLBinaryLogicalOpKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLBinaryLogicalOpKernel(const CLBinaryLogicalOpKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLBinaryLogicalOpKernel &operator=(const CLBinaryLogicalOpKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLBinaryLogicalOpKernel(CLBinaryLogicalOpKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLBinaryLogicalOpKernel &operator=(CLBinaryLogicalOpKernel &&) = default;
+ /** Initialize the kernel's input, output.
+ *
+ * @param[in] input1 Source tensor1.
+ * @param[in] input2 Source tensor2.
+ * @param[out] output Output tensor.
+ */
+ void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output,
+ BinaryLogicalOperation op);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+ BorderSize border_size() const override;
+
+private:
+ const ICLTensor *_input1;
+ const ICLTensor *_input2;
+ ICLTensor *_output;
+};
+
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLBINARYLOGICALOPKERNEL_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLCastKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLCastKernel.h
new file mode 100644
index 000000000..16cef0b61
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLCastKernel.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file CLCastKernel.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines CLCastKernel class
+ */
+
+#ifndef __ARM_COMPUTE_CLCASTKERNEL_H__
+#define __ARM_COMPUTE_CLCASTKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+#include "arm_compute/core/TypesEx.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/**
+ * @brief Class to define OpenCL kernel for cast operation
+ */
+class CLCastKernel : public ICLKernel
+{
+public:
+ /**
+ * @brief Construct CLCastKernel object
+ */
+ CLCastKernel();
+
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers)
+ */
+ CLCastKernel(const CLCastKernel &) = delete;
+
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers)
+ */
+ CLCastKernel &operator=(const CLCastKernel &) = delete;
+
+ /**
+ * @brief Construct CLCastKernel object using default move constructor
+ * @param[in] CLCastKernel object to move
+ */
+ CLCastKernel(CLCastKernel &&) = default;
+
+ /**
+ * @brief Allow instances of this class to be moved
+ * @param[in] CLCastKernel object to move
+ */
+ CLCastKernel &operator=(CLCastKernel &&) = default;
+
+ /**
+ * @brief Destruct this CLCastKernel object
+ */
+ ~CLCastKernel() = default;
+
+ /**
+ * @brief Initialise the kernel's input and output.
+ * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
+ * @param[in] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
+ * @param[in] input_subtype Sub data type of input.
+ * @return N/A
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, SubDataType input_subtype);
+
+ /**
+ * @brief Enqueue the OpenCL kernel to process the given window on the passed OpenCL command
+ * queue.
+ * @note The queue is *not* flushed by this method, and therefore the kernel will not have
+ * been executed by the time this method returns.
+ * @param[in] window Region on which to execute the kernel. (Must be a valid region of
+ * the window returned by window()).
+ * @param[in,out] queue Command queue on which to enqueue the kernel.@return N/A
+ * @return N/A
+ */
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input; /**< Source tensor */
+ ICLTensor *_output; /**< Destination tensor */
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_CLCASTKERNEL_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h
new file mode 100644
index 000000000..60ec7a82a
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLDEPTHTOSPACEKERNEL_H__
+#define __ARM_COMPUTE_CLDEPTHTOSPACEKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel to perform depthTospace operation */
+class CLDepthToSpaceKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLDepthToSpaceKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLDepthToSpaceKernel(const CLDepthToSpaceKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLDepthToSpaceKernel &operator=(const CLDepthToSpaceKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLDepthToSpaceKernel(CLDepthToSpaceKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLDepthToSpaceKernel &operator=(CLDepthToSpaceKernel &&) = default;
+ /** Default destructor */
+ ~CLDepthToSpaceKernel() = default;
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
+ * @param[in] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const int32_t block_size);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input; /**< Source tensor */
+ ICLTensor *_output; /**< Destination tensor */
+};
+
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_CLDEPTHTOSPACEKERNEL_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h
new file mode 100644
index 000000000..da075db69
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file CLEmbeddingLookupKernel.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines CLEmbeddingLookupKernel class
+ */
+
+#ifndef __ARM_COMPUTE_CLEMBEDDINGLOOKUPKERNEL_H__
+#define __ARM_COMPUTE_CLEMBEDDINGLOOKUPKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/**
+* @brief Class to perform EmbeddingLookup operation with opencl kernel
+*/
+class CLEmbeddingLookupKernel : public ICLKernel
+{
+public:
+ /**
+ * @brief Construct a CLEmbeddingLookupKernel object
+ * */
+ CLEmbeddingLookupKernel();
+
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers)
+ * */
+ CLEmbeddingLookupKernel(const CLEmbeddingLookupKernel &) = delete;
+
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers)
+ * */
+ CLEmbeddingLookupKernel &operator=(const CLEmbeddingLookupKernel &) = delete;
+
+ /**
+ * @brief Construct a CLEmbeddingLookupKernel object by using default move constructor
+ * @param[in] CLEmbeddingLookupKernel object to move
+ * */
+ CLEmbeddingLookupKernel(CLEmbeddingLookupKernel &&) = default;
+
+ /**
+ * @brief Move assignment operator
+ * @param[in] CLEmbeddingLookupKernel object to move
+ * */
+ CLEmbeddingLookupKernel &operator=(CLEmbeddingLookupKernel &&) = default;
+
+ /**
+ * @brief Destruct this object
+ * */
+ ~CLEmbeddingLookupKernel() = default;
+
+ /**
+ * @brief Set the input and output of the kernel
+ * @param[in] input Source tensor.
+ * Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+ * @param[out] output Destination tensor. Data type supported: Same as @p input
+ * @param[in] lookups Lookups are 1D tensor that values are indices into the first
+ * dimension of input.
+ * Data types supported: S32.
+ * @return N/A
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *lookups);
+
+ /**
+ * @brief Static function to check if given info will lead to a valid configuration of @ref
+ * CLEmbeddingLookupKernel
+ * @param[in] input The input tensor info.
+ * Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+ * @param[in] output The output tensor info, Data types supported: same as @p input1.
+ * @param[in] lookups Lookups info. Data types supported: S32.
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output,
+ const ITensorInfo *lookups);
+
+ /**
+ * @brief Enqueue the OpenCL kernel to process the given window on the passed OpenCL command
+ * queue.
+ * @note The queue is *not* flushed by this method, and therefore the kernel will not have
+ * been executed by the time this method returns.
+ * @param[in] window Region on which to execute the kernel. (Must be a valid region of
+ * the window returned by window()).
+ * @param[in,out] queue Command queue on which to enqueue the kernel.@return N/A
+ * @return N/A
+ */
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input; /** Source tensor */
+ ICLTensor *_output; /** Destination tensor */
+ const ICLTensor *_lookups; /** Lookups tensor */
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLEMBEDDINGLOOKUPKERNEL_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLGatherExKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLGatherExKernel.h
new file mode 100644
index 000000000..aa81a1efa
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLGatherExKernel.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file CLGatherExKernel.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines CLGatherExKernel class
+ */
+
+#ifndef __ARM_COMPUTE_CLGATHEREXKERNEL_H__
+#define __ARM_COMPUTE_CLGATHEREXKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/**
+ * @brief Class to define an interface for the gather kernel.
+ */
+class CLGatherExKernel : public ICLKernel
+{
+public:
+ /**
+ * @brief Construct CLGatherExKernel object
+ * */
+ CLGatherExKernel();
+
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers).
+ */
+ CLGatherExKernel(const CLGatherExKernel &) = delete;
+
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers).
+ */
+ CLGatherExKernel &operator=(const CLGatherExKernel &) = delete;
+
+ /**
+ * @brief Construct CLGatherExKernel object by using default move constructor
+ * @param[in] CLGatherExKernel object to move
+ */
+ CLGatherExKernel(CLGatherExKernel &&) = default;
+
+ /**
+ * @brief Move assignment operator
+ * @param[in] CLGatherExKernel object to move
+ */
+ CLGatherExKernel &operator=(CLGatherExKernel &&) = default;
+
+ /**
+ * @brief Initialise the kernel's input, output and border mode.
+ * @param[in] input An input tensor. Data types supported: U8/QASYMM8/S32/F32.
+ * @param[in] indices Indices tensor. Data types supported: S32.
+ * @param[out] output The output tensor, Data types supported: same as @p input1.
+ * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative
+ * values wrap around. Defaults to 0
+ * @return N/A
+ */
+ void configure(const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis = 0);
+
+ /**
+ * @brief Static function to check if given info will lead to a valid configuration of @ref
+ * CLGatherExKernel
+ * @param[in] input An input tensor. Data types supported: U8/QASYMM8/S32/F32.
+ * @param[in] indices Indices tensor. Data types supported: S32.
+ * @param[out] output The output tensor, Data types supported: same as @p input1.
+ * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative
+ * values wrap around. Defaults to 0
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *indices,
+ const ITensorInfo *output, int axis = 0);
+
+ /**
+ * @brief Enqueue the OpenCL kernel to process the given window on the passed OpenCL command
+ * queue.
+ * @note The queue is *not* flushed by this method, and therefore the kernel will not have
+ * been executed by the time this method returns.
+ * @param[in] window Region on which to execute the kernel. (Must be a valid region of
+ * the window returned by window()).
+ * @param[in,out] queue Command queue on which to enqueue the kernel.@return N/A
+ * @return N/A
+ */
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ const ICLTensor *_indices;
+ ICLTensor *_output;
+ int _axis;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLGATHEREXKERNEL_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLHashtableLookupKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLHashtableLookupKernel.h
new file mode 100644
index 000000000..8269e5a7a
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLHashtableLookupKernel.h
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file CLHashtableLookupKernel.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines CLHashtableLookupKernel class
+ */
+
+#ifndef __ARM_COMPUTE_CLHASHTABLELOOKUPKERNEL_H__
+#define __ARM_COMPUTE_CLHASHTABLELOOKUPKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/**
+* @brief Class to perform HashtableLookup operation with opencl kernel
+*/
+class CLHashtableLookupKernel : public ICLKernel
+{
+public:
+ /**
+ * @brief Construct a CLHashtableLookupKernel object
+ * */
+ CLHashtableLookupKernel();
+
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers)
+ * */
+ CLHashtableLookupKernel(const CLHashtableLookupKernel &) = delete;
+
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers)
+ * */
+ CLHashtableLookupKernel &operator=(const CLHashtableLookupKernel &) = delete;
+
+ /**
+ * @brief Construct a CLHashtableLookupKernel object by using default move constructor
+ * @param[in] CLHashtableLookupKernel object to move
+ * */
+ CLHashtableLookupKernel(CLHashtableLookupKernel &&) = default;
+
+ /**
+ * @brief Move assignment operator
+ * @param[in] CLHashtableLookupKernel object to move
+ * */
+ CLHashtableLookupKernel &operator=(CLHashtableLookupKernel &&) = default;
+
+ /**
+ * @brief Destruct this object
+ * */
+ ~CLHashtableLookupKernel() = default;
+
+ /**
+ * @brief Set the input and output of the kernel
+ * @param[in] lookups Lookups 1D tensor that values are indices into the first dimension of
+ * input.
+ * @param[in] keys Keys 1D tensor. keys and input pair represent a map.
+ * Data types supported: S32
+ * @param[in] input Source tensor.
+ * Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+ * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p
+ * input.
+ * @param[out] hits Hits 1D tensor. A boolean tensor that indicates whether the lookup hits
+ * (True) or not (False). Data types supported: U8/QASYMM8
+ * @return N/A
+ */
+ void configure(const ICLTensor *lookups, const ICLTensor *keys, const ICLTensor *input,
+ ICLTensor *output, ICLTensor *hits);
+
+ /**
+ * @brief Static function to check if given info will lead to a valid configuration of @ref
+ * CLHashtableLookupKernel
+ * @param[in] lookups The lookups tensor info. Data types supported: S32.
+ * @param[in] keys The keys tensor info. keys and input pair represent a map.
+ * Data types supported: S32
+ * @param[in] input The input tensor info.
+ * Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+ * @param[out] output The output tensor. Data types and data layouts supported: Same as @p
+ * input.
+ * @param[out] hits The hits tensor info. A boolean tensor that indicates whether the lookup
+ * hits
+ * (True) or not (False). Data types supported: U8/QASYMM8
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *lookups, const ITensorInfo *keys,
+ const ITensorInfo *input, const ITensorInfo *output,
+ const ITensorInfo *hits);
+
+ /**
+ * @brief Enqueue the OpenCL kernel to process the given window on the passed OpenCL command
+ * queue.
+ * @note The queue is *not* flushed by this method, and therefore the kernel will not have
+ * been executed by the time this method returns.
+ * @param[in] window Region on which to execute the kernel. (Must be a valid region of
+ * the window returned by window()).
+ * @param[in,out] queue Command queue on which to enqueue the kernel.@return N/A
+ * @return N/A
+ */
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_lookups{nullptr}; /** Lookups tensor */
+ const ICLTensor *_keys{nullptr}; /** Keys tensor */
+ const ICLTensor *_input{nullptr}; /** Source tensor */
+ ICLTensor *_output{nullptr}; /** Destination tensor */
+ ICLTensor *_hits{nullptr}; /** Hits tensor */
+ std::unique_ptr<CLTensor> _lookup_indices{nullptr}; /** Lookup indices tensor */
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLHASHTABLELOOKUPKERNEL_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.h
new file mode 100644
index 000000000..f5e147e03
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYERKERNELEX_H__
+#define __ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYERKERNELEX_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for performing an instance normalization */
+class CLInstanceNormalizationLayerKernelEx : public ICLKernel
+{
+public:
+ /** Constructor */
+ CLInstanceNormalizationLayerKernelEx();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLInstanceNormalizationLayerKernelEx(const CLInstanceNormalizationLayerKernelEx &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLInstanceNormalizationLayerKernelEx &
+ operator=(const CLInstanceNormalizationLayerKernelEx &) = delete;
+ /** Default Move Constructor. */
+ CLInstanceNormalizationLayerKernelEx(CLInstanceNormalizationLayerKernelEx &&) = default;
+ /** Default move assignment operator */
+ CLInstanceNormalizationLayerKernelEx &
+ operator=(CLInstanceNormalizationLayerKernelEx &&) = default;
+ /** Default destructor */
+ ~CLInstanceNormalizationLayerKernelEx() = default;
+
+ /** Set the input and output tensors.
+ *
+ * @param[in, out] input Source tensor. Data types supported: F16/F32. Data layout supported:
+ * NCHW
+ * @param[out] output Destination tensor. Data types and data layouts supported: same as @p
+ * input.
+ * @param[in] gamma (Optional) The scale tensor applied to the normalized tensor. Defaults
+ * to nullptr
+ * @param[in] beta (Optional) The offset tensor applied to the normalized tensor. Defaults
+ * to nullptr
+ * @param[in] epsilon (Optional) Lower bound value for the normalization. Defaults to 1e-12
+ */
+ void configure(ICLTensor *input, ICLTensor *output, ICLTensor *gamma = nullptr,
+ ICLTensor *beta = nullptr, float epsilon = 1e-12f);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * CLInstanceNormalizationLayerEx.
+ *
+ * @param[in] input Source tensor info. In case of @p output tensor = nullptr this tensor will
+ * store the result of the normalization.
+ * Data types supported: F16/F32. Data layout supported: NHWC, NCHW
+ * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p
+ * input.
+ * @param[in] gamma (Optional) The scale tensor applied to the normalized tensor. Defaults to
+ * nullptr
+ * @param[in] beta (Optional) The offset tensor applied to the normalized tensor. Defaults to
+ * nullptr
+ * @param[in] epsilon (Optional) Lower bound value for the normalization. Defaults to 1e-12
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output,
+ const ITensorInfo *gamma = nullptr, const ITensorInfo *beta = nullptr,
+ float epsilon = 1e-12f);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ ICLTensor *_input;
+ ICLTensor *_output;
+ ICLTensor *_gamma;
+ ICLTensor *_beta;
+ float _epsilon;
+ bool _run_in_place;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYERKERNELEX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLNegKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLNegKernel.h
new file mode 100644
index 000000000..ccbea147e
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLNegKernel.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLNEGKERNEL_H__
+#define __ARM_COMPUTE_CLNEGKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel to perform a negation operation on tensor*/
+class CLNegKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLNegKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLNegKernel(const CLNegKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLNegKernel &operator=(const CLNegKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLNegKernel(CLNegKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLNegKernel &operator=(CLNegKernel &&) = default;
+ /** Initialize the kernel's input, output.
+ *
+ * @param[in] input Source tensor.
+ * @param[out] output Destination tensor.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLNEGKERNEL_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLPReLUKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLPReLUKernel.h
new file mode 100644
index 000000000..eff1b8bd5
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLPReLUKernel.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLPRELU_KERNEL_H__
+#define __ARM_COMPUTE_CLPRELU_KERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel to calculate PReLU*/
+class CLPReLUKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLPReLUKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLPReLUKernel(const CLPReLUKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLPReLUKernel &operator=(const CLPReLUKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLPReLUKernel(CLPReLUKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLPReLUKernel &operator=(CLPReLUKernel &&) = default;
+ /** Initialize the kernel's input, output.
+ *
+ * @param[in] input Source tensor1.
+ * @param[in] alpha Source tensor2.
+ * @param[out] output Output tensor.
+ */
+ void configure(const ICLTensor *input, const ICLTensor *alpha, ICLTensor *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+ BorderSize border_size() const override;
+
+private:
+ const ICLTensor *_input;
+ const ICLTensor *_alpha;
+ ICLTensor *_output;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLPRELU_KERNEL_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLReduceOperationKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLReduceOperationKernel.h
new file mode 100644
index 000000000..a26a4a7fc
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLReduceOperationKernel.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file CLReduceOperationKernel.h
+ * @brief This file defines CLReduceOperationKernel class
+ * @ingroup COM_AI_RUNTIME
+ */
+
+#ifndef __ARM_COMPUTE_CLREDUCEOPERATIONKERNEL_H__
+#define __ARM_COMPUTE_CLREDUCEOPERATIONKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+#include "arm_compute/core/TypesEx.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/**
+ * @brief Class to define interface for the reduce operation kernel
+ */
+class CLReduceOperationKernel : public ICLKernel
+{
+public:
+ /**
+ * @brief Default constructor
+ */
+ CLReduceOperationKernel();
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers)
+ */
+ CLReduceOperationKernel(const CLReduceOperationKernel &) = delete;
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers)
+ */
+ CLReduceOperationKernel &operator=(const CLReduceOperationKernel &) = delete;
+ /**
+ * @brief Allow instances of this class to be moved
+ */
+ CLReduceOperationKernel(CLReduceOperationKernel &&) = default;
+ /**
+ * @brief Allow instances of this class to be moved
+ */
+ CLReduceOperationKernel &operator=(CLReduceOperationKernel &&) = default;
+ /**
+ * @brief Default destructor
+ */
+ ~CLReduceOperationKernel() = default;
+
+ /**
+ * @brief Set the input and output tensors.
+ * @param[in] input Source tensor. Data types supported: U8/S32/F32.
+ * @param[out] output Destination tensor. Data types supported: Same as @p input.
+ * Output will have the same number of dimensions as input.
+ * @param[in] axis Axis along which to reduce.
+ * @param[in] op Reduce operation to perform.
+ * @return N/A
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const uint32_t axis,
+ ReduceOperation op);
+
+ /**
+ * @brief Static function to check if given info will lead to a valid configuration of @ref
+ * CLReduceOperationKernel.
+ * @param[in] input Source tensor info. Data types supported: U8/S32/F32.
+ * @param[in] output Destination tensor info. Data types supported: Same as @p input.
+ * Output will have the same number of dimensions as input.
+ * @param[in] axis Axis along which to reduce.
+ * @param[in] op Reduce operation to perform.
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const uint32_t axis,
+ ReduceOperation op);
+
+ /*
+ * @brief Run CLReduceOperationKernel op
+ * @param[in] window Window to be used for in_slice
+ * @param[in] queue CLQueue
+ * @return N/A
+ */
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+ uint32_t _axis;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLREDUCEOPERATIONKERNEL_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToBatchNDKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToBatchNDKernel.h
new file mode 100644
index 000000000..577e38cc4
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToBatchNDKernel.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLSPACE_TO_BATCH_ND_KERNEL_H__
+#define __ARM_COMPUTE_CLSPACE_TO_BATCH_ND_KERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel to perform SPACE_TO_BATCH_ND operation */
+class CLSpaceToBatchNDKernel final : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLSpaceToBatchNDKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLSpaceToBatchNDKernel(const CLSpaceToBatchNDKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLSpaceToBatchNDKernel &operator=(const CLSpaceToBatchNDKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLSpaceToBatchNDKernel(CLSpaceToBatchNDKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLSpaceToBatchNDKernel &operator=(CLSpaceToBatchNDKernel &&) = default;
+ /** Default destructor */
+ ~CLSpaceToBatchNDKernel() = default;
+ /** Initialise the kernel's input and output.
+ *
+ * @note The data layout of input and output must be the same.
+ * @note The number of dimensions of input and output must be 4, and `spatial` dimensions
+ * are height and width.
+ * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/F16/S32/F32.
+ * Data layout supported: NCHW/NHWC
+ * @param[in] block_size Block size tensor. Data types supported: S32.
+ * @param[in] padding_size Padding size tensor. Data types supported: S32.
+ * @param[out] output Output tensor. Data types supported: U8/QASYMM8/S16/F16/S32/F32.
+ * Data layout supported: NCHW/NHWC
+ */
+ void configure(const ICLTensor *input, const ICLTensor *block_size, const ICLTensor *padding_size,
+ ICLTensor *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input{nullptr}; /**< Source tensor */
+ const ICLTensor *_block_size{nullptr}; /**< Block size tensor */
+ const ICLTensor *_padding_size{nullptr}; /**< Padding size tensor */
+ ICLTensor *_output{nullptr}; /**< Destination tensor */
+};
+
+} // namespace arm_compute
+
+#endif /* __ARM_COMPUTE_CLSPACE_TO_BATCH_ND_KERNEL_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h
new file mode 100644
index 000000000..be845a549
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLSPACETODEPTHKERNEL_H__
+#define __ARM_COMPUTE_CLSPACETODEPTHKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel to perform spaceTodepth operation */
+class CLSpaceToDepthKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLSpaceToDepthKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLSpaceToDepthKernel(const CLSpaceToDepthKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLSpaceToDepthKernel &operator=(const CLSpaceToDepthKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLSpaceToDepthKernel(CLSpaceToDepthKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLSpaceToDepthKernel &operator=(CLSpaceToDepthKernel &&) = default;
+ /** Default destructor */
+ ~CLSpaceToDepthKernel() = default;
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
+ * @param[in] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const int32_t block_size);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input; /**< Source tensor */
+ ICLTensor *_output; /**< Destination tensor */
+};
+
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_CLSPACETODEPTHKERNEL_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLTopKV2Kernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLTopKV2Kernel.h
new file mode 100644
index 000000000..8da2daecc
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLTopKV2Kernel.h
@@ -0,0 +1,657 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file CLTopKV2Kernel.h
+ * @brief This file defines classes for TopKV2Kernel
+ * @ingroup COM_AI_RUNTIME
+ */
+
+#ifndef __ARM_COMPUTE_CLTOPKV2KERNEL_H__
+#define __ARM_COMPUTE_CLTOPKV2KERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+// these parameters can be changed
+#define _ITEMS 16 // number of items in a group
+#define _GROUPS 4 // the number of virtual processors is _ITEMS * _GROUPS
+#define _HISTOSPLIT (_ITEMS * _GROUPS / 2) // number of splits of the histogram
+#define PERMUT // store the final permutation
+////////////////////////////////////////////////////////
+
+// Disable GPU implementation
+// TODO Enable GPU implementation with verification, or remove code
+// Invalid result on GPU
+#if 0
+namespace arm_compute
+{
+class ICLTensor;
+
+/**
+ * @brief Class to define CLTopKV2Single
+ */
+class CLTopKV2Single : public ICLKernel
+{
+public:
+ /**
+ * @brief Constructor
+ */
+ CLTopKV2Single();
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers).
+ * @param [in] copiedInstance Const reference of CLTopKV2Single to be copied
+ */
+ CLTopKV2Single(const CLTopKV2Single &) = delete;
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers).
+ * @param [in] copiedInstance Const reference of CLTopKV2Single to be copied
+ * @return Reference of this instance
+ */
+ CLTopKV2Single &operator=(const CLTopKV2Single &) = delete;
+ /**
+ * @brief Allow instances of this class to be moved
+ * @param [in] movedInstance Rvalue reference of CLTopKV2Single to be moved
+ */
+ CLTopKV2Single(CLTopKV2Single &&) = default;
+ /**
+ * @brief Allow instances of this class to be moved
+ * @param [in] movedInstance Rvalue reference of CLTopKV2Single to be moved
+ * @return Reference of this instance
+ */
+ CLTopKV2Single &operator=(CLTopKV2Single &&) = default;
+
+ /**
+ * @brief Initialise kernel with params
+ * @param[in] input An input tensor
+ * @param[in] topk_values Values of the top k predictions
+ * @param[in] topk_indices Indices of the top k predictions
+ * @param[in] indices Indices
+ * @param[in] temp_stack Temp stack
+ * @param[in] k K of the top k predictions
+ * @param[in] n Number times to quick-sort
+ * return N/A
+ */
+ void configure(ICLTensor *input, ICLTensor *topk_values, ICLTensor *topk_indices,
+ cl::Buffer *indices, cl::Buffer *temp_stack, int k, int n);
+
+ /*
+ * @brief Run CLTopKV2Single op
+ * @param[in] window Window to be used for in_slice
+ * @param[in] queue cl::CommandQueue
+ * @return N/A
+ */
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ ICLTensor *_input;
+ ICLTensor *_topk_values;
+ ICLTensor *_topk_indices;
+};
+
+/**
+ * @brief Class to define CLTopKV2Init
+ */
+class CLTopKV2Init : public ICLKernel
+{
+public:
+ /**
+ * @brief Constructor
+ */
+ CLTopKV2Init();
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers).
+ * @param [in] copiedInstance Const reference of CLTopKV2Init to be copied
+ */
+ CLTopKV2Init(const CLTopKV2Init &) = delete;
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers).
+ * @param [in] copiedInstance Const reference of CLTopKV2Init to be copied
+ * @return Reference of this instance
+ */
+ CLTopKV2Init &operator=(const CLTopKV2Init &) = delete;
+ /**
+ * @brief Allow instances of this class to be moved
+ * @param [in] movedInstance Rvalue reference of CLTopKV2Init to be moved
+ */
+ CLTopKV2Init(CLTopKV2Init &&) = default;
+ /**
+ * @brief Allow instances of this class to be moved
+ * @param [in] movedInstance Rvalue reference of CLTopKV2Init to be moved
+ * @return Reference of this instance
+ */
+ CLTopKV2Init &operator=(CLTopKV2Init &&) = default;
+
+ /**
+ * @brief Initialise kernel with params
+ * @param[in] input An input tensor
+ * @param[in] in_key_buf Buffer of input key
+ * @param[in] in_ind_buf Buffer of input index
+ * @param[in] n Number times to quick-sort
+ * return N/A
+ */
+ void configure(ICLTensor *input, cl::Buffer *in_key_buf, cl::Buffer *in_ind_buf, int n);
+
+ /*
+ * @brief Run CLTopKV2Init op
+ * @param[in] window Window to be used for in_slice
+ * @param[in] queue cl::CommandQueue
+ * @return N/A
+ */
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ ICLTensor *_input;
+};
+
+/**
+ * @brief Class to define CLRadixSortHistogram
+ */
+class CLRadixSortHistogram : public ICLKernel
+{
+public:
+ /**
+ * @brief Constructor
+ */
+ CLRadixSortHistogram();
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers).
+ * @param [in] copiedInstance Const reference of CLRadixSortHistogram to be copied
+ */
+ CLRadixSortHistogram(const CLRadixSortHistogram &) = delete;
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers).
+ * @param [in] copiedInstance Const reference of CLRadixSortHistogram to be copied
+ * @return Reference of this instance
+ */
+ CLRadixSortHistogram &operator=(const CLRadixSortHistogram &) = delete;
+ /**
+ * @brief Allow instances of this class to be moved
+ * @param [in] movedInstance Rvalue reference of CLRadixSortHistogram to be moved
+ */
+ CLRadixSortHistogram(CLRadixSortHistogram &&) = default;
+ /**
+ * @brief Allow instances of this class to be moved
+ * @param [in] movedInstance Rvalue reference of CLRadixSortHistogram to be moved
+ * @return Reference of this instance
+ */
+ CLRadixSortHistogram &operator=(CLRadixSortHistogram &&) = default;
+
+ /**
+ * @brief Initialise kernel with params
+ * @param[out] hist_buf Buffer of histogram
+ * @param[in] bits Number of bits to be used for radix sort
+ * @param[in] n Integer number size to sort
+ * return N/A
+ */
+ void configure(cl::Buffer *hist_buf, int bits, int n);
+
+ /**
+ * @brief Set pass
+ * @param[in] pass Passes made of in radix sort algorithm
+ * @param[in] in_key_buf Buffer of input key
+ * return N/A
+ */
+ void setPass(int pass, cl::Buffer *in_key_buf)
+ {
+ _pass = pass;
+ _in_key_buf = in_key_buf;
+ }
+
+ /*
+ * @brief Run CLRadixSortHistogram op
+ * @param[in] window Window to be used for in_slice
+ * @param[in] queue cl::CommandQueue
+ * @return N/A
+ */
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ int _pass;
+ cl::Buffer *_in_key_buf;
+};
+
+/**
+ * @brief Class to define CLRadixSortScanHistogram
+ */
+class CLRadixSortScanHistogram : public ICLKernel
+{
+public:
+ /**
+ * @brief Constructor
+ */
+ CLRadixSortScanHistogram();
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers).
+ * @param [in] copiedInstance Const reference of CLRadixSortScanHistogram to be copied
+ */
+ CLRadixSortScanHistogram(const CLRadixSortScanHistogram &) = delete;
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers).
+ * @param [in] copiedInstance Const reference of CLRadixSortScanHistogram to be copied
+ * @return Reference of this instance
+ */
+ CLRadixSortScanHistogram &operator=(const CLRadixSortScanHistogram &) = delete;
+ /**
+ * @brief Allow instances of this class to be moved
+ * @param [in] movedInstance Rvalue reference of CLRadixSortScanHistogram to be moved
+ */
+ CLRadixSortScanHistogram(CLRadixSortScanHistogram &&) = default;
+ /**
+ * @brief Allow instances of this class to be moved
+ * @param [in] movedInstance Rvalue reference of CLRadixSortScanHistogram to be moved
+ * @return Reference of this instance
+ */
+ CLRadixSortScanHistogram &operator=(CLRadixSortScanHistogram &&) = default;
+
+ /**
+ * @brief Initialise kernel with params
+ * @param[out] hist_buf Buffer of histogram
+ * @param[out] glob_sum_buf Buffer of global sum
+ * @param[in] bits Number of bits to be used for radix sort
+ * return N/A
+ */
+ void configure(cl::Buffer *hist_buf, cl::Buffer *glob_sum_buf, int bits);
+
+ /*
+ * @brief Run CLRadixSortScanHistogram op
+ * @param[in] window Window to be used for in_slice
+ * @param[in] queue cl::CommandQueue
+ * @return N/A
+ */
+ void run(const Window &window, cl::CommandQueue &queue) override;
+};
+
+/**
+ * @brief Class to define CLRadixSortGlobalScanHistogram
+ */
+class CLRadixSortGlobalScanHistogram : public ICLKernel
+{
+public:
+ /**
+ * @brief Constructor
+ */
+ CLRadixSortGlobalScanHistogram();
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers).
+ * @param [in] copiedInstance Const reference of CLRadixSortGlobalScanHistogram to be copied
+ */
+ CLRadixSortGlobalScanHistogram(const CLRadixSortGlobalScanHistogram &) = delete;
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers).
+ * @param [in] copiedInstance Const reference of CLRadixSortGlobalScanHistogram to be copied
+ * @return Reference of this instance
+ */
+ CLRadixSortGlobalScanHistogram &operator=(const CLRadixSortGlobalScanHistogram &) = delete;
+ /**
+ * @brief Allow instances of this class to be moved
+ * @param [in] movedInstance Rvalue reference of CLRadixSortGlobalScanHistogram to be moved
+ */
+ CLRadixSortGlobalScanHistogram(CLRadixSortGlobalScanHistogram &&) = default;
+ /**
+ * @brief Allow instances of this class to be moved
+ * @param [in] movedInstance Rvalue reference of CLRadixSortGlobalScanHistogram to be moved
+ * @return Reference of this instance
+ */
+ CLRadixSortGlobalScanHistogram &operator=(CLRadixSortGlobalScanHistogram &&) = default;
+
+ /**
+ * @brief Initialise kernel with params
+ * @param[out] glob_sum_buf Buffer of global sum
+ * @param[out] temp_buf Temp buffer to be used while RadixSortGlobalScanHistogram
+ * @param[in] bits Number of bits to be used for radix sort
+ * return N/A
+ */
+ void configure(cl::Buffer *glob_sum_buf, cl::Buffer *temp_buf, int bits);
+
+ /*
+ * @brief Run CLRadixSortGlobalScanHistogram op
+ * @param[in] window Window to be used for in_slice
+ * @param[in] queue cl::CommandQueue
+ * @return N/A
+ */
+ void run(const Window &window, cl::CommandQueue &queue) override;
+};
+
+/**
+ * @brief Class to define CLRadixSortPasteHistogram
+ */
+class CLRadixSortPasteHistogram : public ICLKernel
+{
+public:
+ /**
+ * @brief Constructor
+ */
+ CLRadixSortPasteHistogram();
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers).
+ * @param [in] copiedInstance Const reference of CLRadixSortPasteHistogram to be copied
+ */
+ CLRadixSortPasteHistogram(const CLRadixSortPasteHistogram &) = delete;
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers).
+ * @param [in] copiedInstance Const reference of CLRadixSortPasteHistogram to be copied
+ * @return Reference of this instance
+ */
+ CLRadixSortPasteHistogram &operator=(const CLRadixSortPasteHistogram &) = delete;
+ /**
+ * @brief Allow instances of this class to be moved
+ * @param [in] movedInstance Rvalue reference of CLRadixSortPasteHistogram to be moved
+ */
+ CLRadixSortPasteHistogram(CLRadixSortPasteHistogram &&) = default;
+ /**
+ * @brief Allow instances of this class to be moved
+ * @param [in] movedInstance Rvalue reference of CLRadixSortPasteHistogram to be moved
+ * @return Reference of this instance
+ */
+ CLRadixSortPasteHistogram &operator=(CLRadixSortPasteHistogram &&) = default;
+
+ /**
+ * @brief Initialise kernel with params
+ * @param[out] hist_buf Buffer of histogram
+ * @param[out] glob_sum_buf Buffer of global sum
+ * @param[in] bits Number of bits to be used for radix sort
+ * return N/A
+ */
+ void configure(cl::Buffer *hist_buf, cl::Buffer *glob_sum_buf, int bits);
+
+ /*
+ * @brief Run CLRadixSortPasteHistogram op
+ * @param[in] window Window to be used for in_slice
+ * @param[in] queue cl::CommandQueue
+ * @return N/A
+ */
+ void run(const Window &window, cl::CommandQueue &queue) override;
+};
+
+/**
+ * @brief Class to define CLRadixSortReorder
+ */
+class CLRadixSortReorder : public ICLKernel
+{
+public:
+ /**
+ * @brief Constructor
+ */
+ CLRadixSortReorder();
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers).
+ * @param [in] copiedInstance Const reference of CLRadixSortReorder to be copied
+ */
+ CLRadixSortReorder(const CLRadixSortReorder &) = delete;
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers).
+ * @param [in] copiedInstance Const reference of CLRadixSortReorder to be copied
+ * @return Reference of this instance
+ */
+ CLRadixSortReorder &operator=(const CLRadixSortReorder &) = delete;
+ /**
+ * @brief Allow instances of this class to be moved
+ * @param [in] movedInstance Rvalue reference of CLRadixSortReorder to be moved
+ */
+ CLRadixSortReorder(CLRadixSortReorder &&) = default;
+ /**
+ * @brief Allow instances of this class to be moved
+ * @param [in] movedInstance Rvalue reference of CLRadixSortReorder to be moved
+ * @return Reference of this instance
+ */
+ CLRadixSortReorder &operator=(CLRadixSortReorder &&) = default;
+
+ /**
+ * @brief Initialise kernel with params
+ * @param[out] hist_buf Buffer of histogram
+ * @param[in] bits Number of bits to be used for radix sort
+ * @param[in] n Integer number size to sort
+ * return N/A
+ */
+ void configure(cl::Buffer *hist_buf, int bits, int n);
+
+ /**
+ * @brief Set pass
+ * @param[in] pass Passes made of in radix sort algorithm
+ * @param[in] in_key_buf Buffer of input key
+ * @param[out] out_key_buf Buffer of output key
+ * @param[in] in_ind_buf Buffer of input index
+ * @param[out] out_ind_buf Buffer of output index
+ * return N/A
+ */
+ void setPass(int pass, cl::Buffer *in_key_buf, cl::Buffer *out_key_buf, cl::Buffer *in_ind_buf,
+ cl::Buffer *out_ind_buf)
+ {
+ _pass = pass;
+ _in_key_buf = in_key_buf;
+ _out_key_buf = out_key_buf;
+ _in_ind_buf = in_ind_buf;
+ _out_ind_buf = out_ind_buf;
+ }
+ /*
+ * @brief Run CLRadixSortReorder op
+ * @param[in] window Window to be used for in_slice
+ * @param[in] queue cl::CommandQueue
+ * @return N/A
+ */
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ int _pass;
+ cl::Buffer *_in_key_buf;
+ cl::Buffer *_out_key_buf;
+ cl::Buffer *_in_ind_buf;
+ cl::Buffer *_out_ind_buf;
+};
+
+/**
+ * @brief Class to define CLTopKV2FindFirstNegative
+ */
+class CLTopKV2FindFirstNegative : public ICLKernel
+{
+public:
+ /**
+ * @brief Constructor
+ */
+ CLTopKV2FindFirstNegative();
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers).
+ * @param [in] copiedInstance Const reference of CLTopKV2FindFirstNegative to be copied
+ */
+ CLTopKV2FindFirstNegative(const CLTopKV2FindFirstNegative &) = delete;
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers).
+ * @param [in] copiedInstance Const reference of CLTopKV2FindFirstNegative to be copied
+ * @return Reference of this instance
+ */
+ CLTopKV2FindFirstNegative &operator=(const CLTopKV2FindFirstNegative &) = delete;
+ /**
+ * @brief Allow instances of this class to be moved
+ * @param [in] movedInstance Rvalue reference of CLTopKV2FindFirstNegative to be moved
+ */
+ CLTopKV2FindFirstNegative(CLTopKV2FindFirstNegative &&) = default;
+ /**
+ * @brief Allow instances of this class to be moved
+ * @param [in] movedInstance Rvalue reference of CLTopKV2FindFirstNegative to be moved
+ * @return Reference of this instance
+ */
+ CLTopKV2FindFirstNegative &operator=(CLTopKV2FindFirstNegative &&) = default;
+
+ /**
+ * @brief Initialise kernel with params
+ * @param[out] first_negative_idx_buf Buffer of the first negative index
+ * @param[in] n Number times to find
+ * return N/A
+ */
+ void configure(cl::Buffer *first_negative_idx_buf, int n);
+
+ /**
+ * @brief Set output buffer
+ * @param[out] out_key_buf Buffer of output key
+ * return N/A
+ */
+ void setOutputBuffer(cl::Buffer *out_key_buf) { _out_key_buf = out_key_buf; }
+
+ /*
+ * @brief Run CLTopKV2FindFirstNegative op
+ * @param[in] window Window to be used for in_slice
+ * @param[in] queue cl::CommandQueue
+ * @return N/A
+ */
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ cl::Buffer *_out_key_buf;
+};
+
+/**
+ * @brief Class to define CLTopKV2ReorderNegatives
+ */
+class CLTopKV2ReorderNegatives : public ICLKernel
+{
+public:
+ /**
+ * @brief Constructor
+ */
+ CLTopKV2ReorderNegatives();
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers).
+ * @param [in] copiedInstance Const reference of CLTopKV2ReorderNegatives to be copied
+ */
+ CLTopKV2ReorderNegatives(const CLTopKV2ReorderNegatives &) = delete;
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers).
+ * @param [in] copiedInstance Const reference of CLTopKV2ReorderNegatives to be copied
+ * @return Reference of this instance
+ */
+ CLTopKV2ReorderNegatives &operator=(const CLTopKV2ReorderNegatives &) = delete;
+ /**
+ * @brief Allow instances of this class to be moved
+ * @param [in] movedInstance Rvalue reference of CLTopKV2ReorderNegatives to be moved
+ */
+ CLTopKV2ReorderNegatives(CLTopKV2ReorderNegatives &&) = default;
+ /**
+ * @brief Allow instances of this class to be moved
+ * @param [in] movedInstance Rvalue reference of CLTopKV2ReorderNegatives to be moved
+ * @return Reference of this instance
+ */
+ CLTopKV2ReorderNegatives &operator=(CLTopKV2ReorderNegatives &&) = default;
+
+ /**
+ * @brief Initialise kernel with params
+ * @param[out] first_negative_idx_buf Buffer of the first negative index
+ * @param[in] n Number times to find
+ * return N/A
+ */
+ void configure(cl::Buffer *first_negative_idx_buf, int n);
+
+ /**
+ * @brief Set buffers
+ * @param[in] in_key_buf Buffer of input key
+ * @param[out] out_key_buf Buffer of output key
+ * @param[in] in_ind_buf Buffer of input index
+ * @param[out] out_ind_buf Buffer of output index
+ * return N/A
+ */
+ void setBuffers(cl::Buffer *in_key_buf, cl::Buffer *out_key_buf, cl::Buffer *in_ind_buf,
+ cl::Buffer *out_ind_buf)
+ {
+ _in_key_buf = in_key_buf;
+ _out_key_buf = out_key_buf;
+ _in_ind_buf = in_ind_buf;
+ _out_ind_buf = out_ind_buf;
+ }
+
+ /*
+ * @brief Run CLTopKV2ReorderNegatives op
+ * @param[in] window Window to be used for in_slice
+ * @param[in] queue cl::CommandQueue
+ * @return N/A
+ */
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ cl::Buffer *_in_key_buf;
+ cl::Buffer *_out_key_buf;
+ cl::Buffer *_in_ind_buf;
+ cl::Buffer *_out_ind_buf;
+};
+
+/**
+ * @brief Class to define CLTopKV2Store
+ */
+class CLTopKV2Store : public ICLKernel
+{
+public:
+ /**
+ * @brief Constructor
+ */
+ CLTopKV2Store();
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers).
+ * @param [in] copiedInstance Const reference of CLTopKV2Store to be copied
+ */
+ CLTopKV2Store(const CLTopKV2Store &) = delete;
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers).
+ * @param [in] copiedInstance Const reference of CLTopKV2Store to be copied
+ * @return Reference of this instance
+ */
+ CLTopKV2Store &operator=(const CLTopKV2Store &) = delete;
+ /**
+ * @brief Allow instances of this class to be moved
+ * @param [in] movedInstance Rvalue reference of CLTopKV2Store to be moved
+ */
+ CLTopKV2Store(CLTopKV2Store &&) = default;
+ /**
+ * @brief Allow instances of this class to be moved
+ * @param [in] movedInstance Rvalue reference of CLTopKV2Store to be moved
+ * @return Reference of this instance
+ */
+ CLTopKV2Store &operator=(CLTopKV2Store &&) = default;
+
+ /**
+ * @brief Initialise kernel with params
+ * @param[out] values Values tensor to store
+ * @param[out] indices Indices tensor to be used for store
+ * @param[in] k K of the top k predictions
+ * @param[in] n Number times to store
+ * return N/A
+ */
+ void configure(ICLTensor *values, ICLTensor *indices, int k, int n);
+
+ /**
+ * @brief Set buffers
+ * @param[out] out_key_buf Buffer of output key
+ * @param[out] out_ind_buf Buffer of output index
+ * return N/A
+ */
+ void setOutputBuffers(cl::Buffer *out_key_buf, cl::Buffer *out_ind_buf);
+
+ /*
+ * @brief Run CLTopKV2Store op
+ * @param[in] window Window to be used for in_slice
+ * @param[in] queue cl::CommandQueue
+ * @return N/A
+ */
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ ICLTensor *_values;
+ ICLTensor *_indices;
+ cl::Buffer *_out_key_buf;
+ cl::Buffer *_out_ind_buf;
+};
+
+} // namespace arm_compute
+#endif // Disable GPU implementation
+#endif // __ARM_COMPUTE_CLTOPKV2KERNEL_H__
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h
new file mode 100644
index 000000000..c5ef730b6
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLEKERNEL_H__
+#define __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLEKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the Upsampling layer kernel for transpose convolution on OpenCL.
+ */
+class CLTransposeConvLayerUpsampleKernel : public ICLKernel
+{
+public:
+ /** Constructor */
+ CLTransposeConvLayerUpsampleKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLTransposeConvLayerUpsampleKernel(const CLTransposeConvLayerUpsampleKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLTransposeConvLayerUpsampleKernel &
+ operator=(const CLTransposeConvLayerUpsampleKernel &) = delete;
+ /** Default Move Constructor. */
+ CLTransposeConvLayerUpsampleKernel(CLTransposeConvLayerUpsampleKernel &&) = default;
+ /** Default move assignment operator */
+ CLTransposeConvLayerUpsampleKernel &operator=(CLTransposeConvLayerUpsampleKernel &&) = default;
+ /** Default destructor */
+ ~CLTransposeConvLayerUpsampleKernel() = default;
+
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32.
+ * @param[out] output Destination tensor. Data types supported: same as @p input. All but
+ * the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only
+ * performed within the XY-plane.
+ * @param[in] inner_border Top and right inner border sizes. These rows and columns will be
+ * filled with zero.
+ * @param[in] info Contains padding and stride information described in @ref
+ * PadStrideInfo.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const BorderSize &inner_border,
+ const PadStrideInfo &info);
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * CLTransposeConvLayerUpsample
+ *
+ * @param[in] input Source tensor info. Data types supported: QASYMM8/F16/F32.
+ * @param[in] output Destination tensor info. Data types supported: same as @p input. All
+ * but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is
+ * only performed within the XY-plane.
+ * @param[in] inner_border Top and right inner border sizes. These rows and columns will be filled
+ * with zero.
+ * @param[in] info Contains padding and stride information described in @ref
+ * PadStrideInfo.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output,
+ const BorderSize &inner_border, const PadStrideInfo &info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+ BorderSize _inner_border;
+ PadStrideInfo _info;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLEKERNEL_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/CPP/kernels/CPPUpsampleKernelEx.h b/compute/ARMComputeEx/arm_compute/core/CPP/kernels/CPPUpsampleKernelEx.h
new file mode 100644
index 000000000..d093c22cb
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/CPP/kernels/CPPUpsampleKernelEx.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CPPUPSAMPLEKERNEL_EX_H__
+#define __ARM_COMPUTE_CPPUPSAMPLEKERNEL_EX_H__
+
+#include "arm_compute/core/CPP/ICPPKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** CPP kernel to perform tensor upsample.
+ *
+ */
+class CPPUpsampleKernelEx : public ICPPKernel
+{
+public:
+ const char *name() const override { return "CPPUpsampleKernelEx"; }
+ /** Default constructor */
+ CPPUpsampleKernelEx();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CPPUpsampleKernelEx(const CPPUpsampleKernelEx &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CPPUpsampleKernelEx &operator=(const CPPUpsampleKernelEx &) = delete;
+ /** Allow instances of this class to be moved */
+ CPPUpsampleKernelEx(CPPUpsampleKernelEx &&) = default;
+ /** Allow instances of this class to be moved */
+ CPPUpsampleKernelEx &operator=(CPPUpsampleKernelEx &&) = default;
+ /** Default destructor */
+ ~CPPUpsampleKernelEx() = default;
+
+ /** Set the input and output of the kernel.
+ *
+ * @param[in] input The input tensor to upsample. Data types supported: F32/F16/QASYMM8
+ * @param[out] output The output tensor. Data types supported: Same as @p input
+ * @param[in] info Padding info.
+ */
+ void configure(const ITensor *input, ITensor *output, const PadStrideInfo &info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+ bool is_parallelisable() const override;
+
+private:
+ const ITensor *_input;
+ ITensor *_output;
+ PadStrideInfo _info;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CPPUPSAMPLEKERNEL_EX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/NEElementwiseOperationFuncs.h b/compute/ARMComputeEx/arm_compute/core/NEON/NEElementwiseOperationFuncs.h
new file mode 100644
index 000000000..358e0ebc6
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/NEElementwiseOperationFuncs.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_NEELEMENTWISEOPERATIONFUNCS_H__
+#define __ARM_COMPUTE_NEELEMENTWISEOPERATIONFUNCS_H__
+
+#include <arm_neon.h>
+
+namespace arm_compute
+{
+class ITensor;
+class Window;
+class QuantizationInfo;
+} // namespace arm_compute
+
+namespace arm_compute
+{
+
+float32x4x4_t load_quantized(const uint8_t *input1_ptr, const int32x4_t &offset,
+ const float32x4_t &scale);
+
+void store_quantized(uint8_t *output_ptr, const float32x4x4_t &rf, const float32x4_t &offset,
+ const float32x4_t &invscale);
+
+float32x4x4_t dup_quantized(uint8_t broadcast_value, int offset, float scale);
+
+void elementwise_op_quantized(
+ const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window,
+ uint8_t (*scalar_func)(const float &, const float &, QuantizationInfo),
+ int (*broadcast_func)(int, int, int, const uint8_t *, float32x4x4_t, uint8_t *, int32x4_t,
+ float32x4_t, float32x4_t, float32x4_t, const bool),
+ int (*neon_func)(int, int, int, const uint8_t *, const uint8_t *, uint8_t *, int32x4_t,
+ int32x4_t, float32x4_t, float32x4_t, float32x4_t, float32x4_t));
+
+void elementwise_op(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window,
+ float (*scalar_func)(const float &, const float &),
+ int (*broadcast_func)(int, int, int, const float *, const float &, float *,
+ const bool),
+ int (*neon_func)(int, int, int, const float *, const float *, float *));
+
+void elementwise_op(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window,
+ uint8_t (*scalar_func)(const uint8_t &, const uint8_t &),
+ int (*broadcast_func)(int, int, int, const uint8_t *, const uint8_t &,
+ uint8_t *, const bool),
+ int (*neon_func)(int, int, int, const uint8_t *, const uint8_t *, uint8_t *));
+} // namespace arm_compute
+#endif // __ARM_COMPUTE_NEELEMENTWISEOPERATIONFUNCS_H__
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEBinaryLogicalOperationKernel.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEBinaryLogicalOperationKernel.h
new file mode 100644
index 000000000..61992bd50
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEBinaryLogicalOperationKernel.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2018-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NEBINARYLOGICALOPERATIONKERNEL_H__
+#define __ARM_COMPUTE_NEBINARYLOGICALOPERATIONKERNEL_H__
+
+#include "arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h"
+#include "arm_compute/core/TypesEx.h"
+
+namespace arm_compute
+{
+
+class NEBinaryLogicalOperationKernel : public NEElementwiseOperationKernel
+{
+public:
+ /** Default destructor */
+ ~NEBinaryLogicalOperationKernel() = default;
+
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * NEBinaryLogicalOperationKernel
+ *
+ * @param[in] op Binary logical operation to be executed.
+ * @param[in] input1 First tensor input. Data types supported: QASYMM8/U8.
+ * @param[in] input2 Second tensor input. Data types supported: Same as @p input1.
+ * @param[in] output Output tensor. Data types supported: Same as @p input1.
+ */
+ void configure(BinaryLogicalOperation op, const ITensor *input1, const ITensor *input2,
+ ITensor *output);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * NEBinaryLogicalOperationKernel
+ *
+ * @param[in] op Binary logical operation to be executed.
+ * @param[in] input1 First tensor input info. Data types supported: QASYMM8/U8.
+ * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
+ * @param[in] output Output tensor info. Data types supported: Same as @p input1.
+ *
+ * @return a Status
+ */
+ static Status validate(BinaryLogicalOperation op, const ITensorInfo *input1,
+ const ITensorInfo *input2, const ITensorInfo *output);
+
+protected:
+ // Inherited methods overridden:
+ static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2,
+ const ITensorInfo &output);
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_NEBINARYLOGICALOPERATIONKERNEL_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NECastKernel.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NECastKernel.h
new file mode 100644
index 000000000..fd2a2ee3b
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NECastKernel.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NECASTKERNEL_H__
+#define __ARM_COMPUTE_NECASTKERNEL_H__
+
+#include "arm_compute/core/NEON/INEKernel.h"
+#include "arm_compute/core/TypesEx.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for the cast layer kernel. */
+class NECastKernel : public INEKernel
+{
+public:
+ const char *name() const override { return "NECastKernel"; }
+ /** Default constructor */
+ NECastKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NECastKernel(const NECastKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NECastKernel &operator=(const NECastKernel &) = delete;
+ /** Default Move Constructor. */
+ NECastKernel(NECastKernel &&) = default;
+ /** Default move assignment operator */
+ NECastKernel &operator=(NECastKernel &&) = default;
+ /** Default destructor */
+ ~NECastKernel() = default;
+ /** Set input, output tensors.
+ *
+ * @param[in] input Source tensor. Data type supported: U8/S8/QASYMM8/U32/S32/F32.
+ * @param[out] output Destination tensor with the same dimensions of input. Data type supported:
+ * U8/S8/QASYMM8/U32/S32/F32.
+ * @param[in] input_subtype Sub data type of input.
+ */
+ void configure(const ITensor *input, ITensor *output, SubDataType input_subtype);
+ /** Static function to check if given info will lead to a valid configuration of @ref NECastKernel
+ *
+ * @param[in] input Input tensor info. Data types supported: U8/S8/QASYMM8/U32/S32/F32.
+ * @param[in] output Output tensor info. Data types supported: U8/S8/QASYMM8/U32/S32/F32.
+ * @param[in] input_subtype Sub data type of input.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output,
+ SubDataType input_subtype);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ const ITensor *_input;
+ ITensor *_output;
+ SubDataType _input_subtype;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_NECASTKERNEL_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernelEx.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernelEx.h
new file mode 100644
index 000000000..5b6ef6bfb
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernelEx.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NEDEPTHTOSPACELAYERKERNELEX_H__
+#define __ARM_COMPUTE_NEDEPTHTOSPACELAYERKERNELEX_H__
+
+#include "arm_compute/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for the depth to space kernel */
+class NEDepthToSpaceLayerKernelEx : public INEKernel
+{
+public:
+ const char *name() const override { return "NEDepthToSpaceLayerKernelEx"; }
+ /** Default constructor */
+ NEDepthToSpaceLayerKernelEx();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEDepthToSpaceLayerKernelEx(const NEDepthToSpaceLayerKernelEx &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEDepthToSpaceLayerKernelEx &operator=(const NEDepthToSpaceLayerKernelEx &) = delete;
+ /** Allow instances of this class to be moved */
+ NEDepthToSpaceLayerKernelEx(NEDepthToSpaceLayerKernelEx &&) = default;
+ /** Allow instances of this class to be moved */
+ NEDepthToSpaceLayerKernelEx &operator=(NEDepthToSpaceLayerKernelEx &&) = default;
+ /** Default destructor */
+ ~NEDepthToSpaceLayerKernelEx() = default;
+ /** Initialise the kernel's inputs and output.
+ *
+ * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported:
+ * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+ * @param[out] output Tensor output. Data types supported: same as @p input
+ * @param[in] block_shape Block shape x value.
+ */
+ void configure(const ITensor *input, ITensor *output, int32_t block_shape);
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * NEDepthToSpaceLayerKernelEx.
+ *
+ * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported:
+ * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+ * @param[in] output Tensor output info. Data types supported: same as @p input
+ * @param[in] block_shape Block shape value.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ const ITensor *_input; /**< Source tensor */
+ ITensor *_output; /**< Destination tensor */
+ int32_t _block_shape; /**< Block shape */
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_NEDEPTHTOSPACELAYERKERNELEX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEElementwiseUnaryKernelEx.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEElementwiseUnaryKernelEx.h
new file mode 100644
index 000000000..d6fad1155
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEElementwiseUnaryKernelEx.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2018-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NEELEMENTWISEUNARYKERNELEX_H__
+#define __ARM_COMPUTE_NEELEMENTWISEUNARYKERNELEX_H__
+
+#include "arm_compute/core/NEON/INEKernel.h"
+#include "arm_compute/core/TypesEx.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for an element-wise unary operation kernel
+ *
+ * Element-wise operation is computed by:
+ * @f[ output(x) = OP(input(x))@f]
+ *
+ */
+class NEElementwiseUnaryKernelEx : public INEKernel
+{
+public:
+ const char *name() const override { return "NEElementwiseUnaryKernelEx"; }
+ /** Default constructor */
+ NEElementwiseUnaryKernelEx();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEElementwiseUnaryKernelEx(const NEElementwiseUnaryKernelEx &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEElementwiseUnaryKernelEx &operator=(const NEElementwiseUnaryKernelEx &) = delete;
+ /** Allow instances of this class to be moved */
+ NEElementwiseUnaryKernelEx(NEElementwiseUnaryKernelEx &&) = default;
+ /** Allow instances of this class to be moved */
+ NEElementwiseUnaryKernelEx &operator=(NEElementwiseUnaryKernelEx &&) = default;
+ /** Default destructor */
+ ~NEElementwiseUnaryKernelEx() = default;
+
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * NEElementwiseUnaryKernelEx
+ *
+ * @param[in] op Arithmetic operation to be executed.
+ * @param[in] input First tensor input. Data types supported: F16/F32/S32.
+ * @param[in] output Output tensor. Data types supported: Same as @p input.
+ */
+ void configure(ElementWiseUnaryEx op, const ITensor *input, ITensor *output);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * NEElementwiseUnaryKernelEx
+ *
+ * @param[in] op Arithmetic operation to be executed.
+ * @param[in] input First tensor input info. Data types supported: F16/F32/S32.
+ * @param[in] output Output tensor info. Data types supported: Same as @p input.
+ *
+ * @return a Status
+ */
+ static Status validate(ElementWiseUnaryEx op, const ITensorInfo *input,
+ const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+ /** Common signature for all the specialised arithmetic functions
+ *
+ * @param[in] input An input tensor. Data types supported: F16/F32/S32.
+ * @param[out] output The output tensor. Data types supported: Same as @p input.
+ * @param[in] window Region on which to execute the kernel.
+ */
+ using ElementwiseUnaryFunction = void(const ITensor *input, ITensor *output,
+ const Window &window);
+
+protected:
+ // Inherited methods overridden:
+ static Status validate_arguments(const ITensorInfo &input, const ITensorInfo &output);
+
+ /** Function to use for the particular tensor types passed to configure() */
+ std::function<void(const ITensor *input, ITensor *output, const Window &window)> _function;
+
+ const ITensor *_input;
+ ITensor *_output;
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_NEELEMENTWISEUNARYKERNELEX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEEmbeddingLookupKernel.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEEmbeddingLookupKernel.h
new file mode 100644
index 000000000..1490e75f2
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEEmbeddingLookupKernel.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NEEMBEDDINGLOOKUPKERNEL_H__
+#define __ARM_COMPUTE_NEEMBEDDINGLOOKUPKERNEL_H__
+
+#include "arm_compute/core/NEON/INEKernel.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** NEON kernel to perform EmbeddingLookup operation */
+class NEEmbeddingLookupKernel : public INEKernel
+{
+public:
+ const char *name() const override { return "NEEmbeddingLookupKernel"; }
+ /** Default constructor */
+ NEEmbeddingLookupKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ NEEmbeddingLookupKernel(const NEEmbeddingLookupKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ NEEmbeddingLookupKernel &operator=(const NEEmbeddingLookupKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEEmbeddingLookupKernel(NEEmbeddingLookupKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEEmbeddingLookupKernel &operator=(NEEmbeddingLookupKernel &&) = default;
+ /** Initialize the kernel's input, output.
+ *
+ * @param[in] input Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+ * @param[out] output Destination tensor. Data types supported: same as @p input.
+ * @param[in] lookups Lookups are 1D tensor that values are indices into the first dimension of
+ * input.
+ */
+ void configure(const ITensor *input, ITensor *output, const ITensor *lookups);
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * NEEmbeddingLookupKernel
+ *
+ * @param[in] input Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+ * @param[in] output Destination tensor. Data types supported: same as @p input.
+ * @param[in] lookups Lookups info. Data types supported: S32.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output,
+ const ITensorInfo *lookups);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ const ITensor *_input;
+ const ITensor *_lookups;
+ ITensor *_output;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_NEEMBEDDINGLOOKUPKERNEL_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEGatherKernelEx.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEGatherKernelEx.h
new file mode 100644
index 000000000..3fa9c6e9a
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEGatherKernelEx.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_NEGATHERKERNELEX_H__
+#define __ARM_COMPUTE_NEGATHERKERNELEX_H__
+
+#include "arm_compute/core/NEON/INEKernel.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Kernel to perform other operation on NEON */
+class NEGatherKernelEx : public INEKernel
+{
+public:
+ /** Default constructor. */
+ NEGatherKernelEx();
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ NEGatherKernelEx(const NEGatherKernelEx &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ NEGatherKernelEx &operator=(const NEGatherKernelEx &) = delete;
+ /** Allow instances of this class to be moved. */
+ NEGatherKernelEx(NEGatherKernelEx &&) = default;
+ /** Allow instances of this class to be moved. */
+ NEGatherKernelEx &operator=(NEGatherKernelEx &&) = default;
+ /** Default detructor */
+ ~NEGatherKernelEx() = default;
+
+ /** Name of the kernel
+ *
+ * @return Kernel name
+ */
+ const char *name() const override { return "NEGatherKernelEx"; }
+ /** Initialise the kernel's inputs and outputs
+ *
+ * @param[in] input Source tensor. Supported tensor rank: up to 4. Data type supported:
+ * U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+ * @param[in] indices Indices tensor. Supported tensor rank: up to 3. Must be one of the
+ * following type: U32/S32. Each value Must be in range [0, input.shape[@p axis])
+ * @param[out] output Destination tensor. Data type supported: Same as @p input
+ * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values
+ * wrap around. Defaults to 0
+ */
+ void configure(const ITensor *input, const ITensor *indices, ITensor *output, int axis = 0);
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * NEGatherKernelEx
+ *
+ * @param[in] input Source tensor info. Supported tensor rank: up to 4. Data type supported:
+ * U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+ * @param[in] indices Indices tensor info. Supported tensor rank: up to 3. Must be one of the
+ * following type: U32/S32. Each value Must be in range [0, input.shape[@p axis])
+ * @param[in] output Destination tensor info. Data type supported: Same as @p input
+ * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values
+ * wrap around. Defaults to 0
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *indices,
+ const ITensorInfo *output, int axis);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ /** Implementation of the gather operation for 0 axis.
+ *
+ * For gather on the 0 axis an element by element copy is performed.
+ *
+ * @param[in] window Region on which to execute the kernel. (Must be a region of the window
+ * returned by window())
+ * @param[in] info Info about executing thread and CPU.
+ */
+ template <typename U> void gather_0_axis(const Window &window, const ThreadInfo &info);
+
+ /** Implementation of the gather operation.
+ *
+ * For 1<=axis a row-wise copy is taking place.
+ *
+ * @param[in] window Region on which to execute the kernel. (Must be a region of the window
+ * returned by window())
+ * @param[in] info Info about executing thread and CPU.
+ */
+ template <typename U> void gather_n_axis(const Window &window, const ThreadInfo &info);
+
+ using kernel_ptr = void (NEGatherKernelEx::*)(const Window &window, const ThreadInfo &info);
+
+ const ITensor *_input;
+ const ITensor *_indices;
+ int _axis;
+ ITensor *_output;
+ kernel_ptr _func;
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_NEGATHERKERNELEX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEHashtableLookupKernel.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEHashtableLookupKernel.h
new file mode 100644
index 000000000..d8976e7d0
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEHashtableLookupKernel.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NEHASHTABLELOOKUPKERNEL_H__
+#define __ARM_COMPUTE_NEHASHTABLELOOKUPKERNEL_H__
+
+#include "arm_compute/core/NEON/INEKernel.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** NEON kernel to perform HashtableLookup operation */
+class NEHashtableLookupKernel : public INEKernel
+{
+public:
+ const char *name() const override { return "NEHashtableLookupKernel"; }
+ /** Default constructor */
+ NEHashtableLookupKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ NEHashtableLookupKernel(const NEHashtableLookupKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ NEHashtableLookupKernel &operator=(const NEHashtableLookupKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEHashtableLookupKernel(NEHashtableLookupKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEHashtableLookupKernel &operator=(NEHashtableLookupKernel &&) = default;
+ /** Initialize the kernel's inputs, outputs.
+ *
+ * @param[in] lookups Lookups 1D tensor that values are indices into the first dimension of
+ * input. Data types supported: S32
+ * @param[in] keys Keys 1D tensor. keys and input pair represent a map.
+ * Data types supported: S32
+ * @param[in] input Source tensor.
+ * Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+ * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p
+ * input.
+ * @param[out] hits Hits 1D tensor. A boolean tensor that indicates whether the lookup hits
+ * (True) or not (False). Data types supported: U8/QASYMM8
+ * input.
+ */
+ void configure(const ITensor *lookups, const ITensor *keys, const ITensor *input, ITensor *output,
+ ITensor *hits);
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * NEHashtableLookupKernel
+ *
+ * @param[in] lookups The lookups tensor info. Data types supported: S32.
+ * @param[in] keys The keys tensor info. keys and input pair represent a map.
+ * Data types supported: S32
+ * @param[in] input The input tensor info.
+ * Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+ * @param[out] output The output tensor info. Data types and data layouts supported: Same as @p
+ * input.
+ * @param[out] hits The hits tensor info. A boolean tensor that indicates whether the lookup
+ * hits (True) or not (False). Data types supported: U8/QASYMM8
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *lookups, const ITensorInfo *keys,
+ const ITensorInfo *input, const ITensorInfo *output,
+ const ITensorInfo *hits);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ const ITensor *_lookups; /** Lookups tensor */
+ const ITensor *_keys; /** Keys tensor */
+ const ITensor *_input; /** Source tensor */
+ ITensor *_output; /** Destination tensor */
+ ITensor *_hits; /** Hits tensor */
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_NEHASHTABLELOOKUPKERNEL_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.h
new file mode 100644
index 000000000..76e2587af
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.h
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYERKERNELEX_H__
+#define __ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYERKERNELEX_H__
+
+#include "arm_compute/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for performing an instance normalization */
+class NEInstanceNormalizationLayerKernelEx : public INEKernel
+{
+public:
+ const char *name() const override { return "NEInstanceNormalizationLayerKernelEx"; }
+ /** Default constructor */
+ NEInstanceNormalizationLayerKernelEx();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEInstanceNormalizationLayerKernelEx(const NEInstanceNormalizationLayerKernelEx &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEInstanceNormalizationLayerKernelEx &
+ operator=(const NEInstanceNormalizationLayerKernelEx &) = delete;
+ /** Allow instances of this class to be moved */
+ NEInstanceNormalizationLayerKernelEx(NEInstanceNormalizationLayerKernelEx &&) = default;
+ /** Allow instances of this class to be moved */
+ NEInstanceNormalizationLayerKernelEx &
+ operator=(NEInstanceNormalizationLayerKernelEx &&) = default;
+ /** Default destructor */
+ ~NEInstanceNormalizationLayerKernelEx() = default;
+ /** Set the input and output tensors.
+ *
+ * @param[in, out] input Source tensor. Data types supported: F16/F32. Data layout supported:
+ * NCHW
+ * In case of @p output tensor = nullptr this tensor will store the result
+ * of the normalization.
+ * @param[out] output Destination tensor. Data types and data layouts supported: same as @p
+ * input.
+ * @param[in] gamma (Optional) The scale scalar value applied to the normalized tensor.
+ * Defaults to 1.0
+ * @param[in] beta (Optional) The offset scalar value applied to the normalized tensor.
+ * Defaults to 0.0
+ * @param[in] epsilon (Optional) Lower bound value for the normalization. Defaults to 1e-12
+ */
+ void configure(ITensor *input, ITensor *output, ITensor *gamma = nullptr, ITensor *beta = nullptr,
+ float epsilon = 1e-12f);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * NEInstanceNormalizationLayer.
+ *
+ * @param[in] input Source tensor info. Data types supported: F16/F32. Data layout supported:
+ * NCHW
+ * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p
+ * input.
+ * @param[in] gamma (Optional) The scale scalar value applied to the normalized tensor. Defaults
+ * to 1.0
+ * @param[in] beta (Optional) The offset scalar value applied to the normalized tensor.
+ * Defaults to 0.0
+ * @param[in] epsilon (Optional) Lower bound value for the normalization. Defaults to 1e-12
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output,
+ const ITensorInfo *gamma = nullptr, const ITensorInfo *beta = nullptr,
+ float epsilon = 1e-12f);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ /** Common signature for all the specialized instance normalization functions
+ *
+ * @param[in, out] input An input tensor. In case of @p output tensor = nullptr this tensor will
+ * store the result of the normalization.
+ * @param[out] output The output tensor.
+ * @param[in] gamma The scale scalar value applied to the normalized tensor. Defaults to
+ * 1.0
+ * @param[in] beta The offset scalar value applied to the normalized tensor. Defaults to
+ * 0.0
+ * @param[in] epsilon Lower bound value for the normalization. Defaults to 1e-12
+ */
+ using NormalizationFunction = void(ITensor *input, ITensor *output, ITensor *gamma, ITensor *beta,
+ float epsilon, const Window &window);
+
+ NormalizationFunction *_func;
+ ITensor *_input;
+ ITensor *_output;
+ ITensor *_gamma;
+ ITensor *_beta;
+ float _epsilon;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYERKERNELEX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEMuliplyScaleFactorKernel.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEMuliplyScaleFactorKernel.h
new file mode 100644
index 000000000..723b14523
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEMuliplyScaleFactorKernel.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NEMULTIPLYSCALEFACTORKERNEL_H__
+#define __ARM_COMPUTE_NEMULTIPLYSCALEFACTORKERNEL_H__
+
+#include "arm_compute/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface to multiply scale factor kernel. */
+class NEMultiplyScaleFactorKernel : public INEKernel
+{
+public:
+ const char *name() const override { return "NEMultiplyScaleFactorKernel"; }
+ /** Default constructor */
+ NEMultiplyScaleFactorKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEMultiplyScaleFactorKernel(const NEMultiplyScaleFactorKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEMultiplyScaleFactorKernel &operator=(const NEMultiplyScaleFactorKernel &) = delete;
+ /** Default Move Constructor. */
+ NEMultiplyScaleFactorKernel(NEMultiplyScaleFactorKernel &&) = default;
+ /** Default move assignment operator */
+ NEMultiplyScaleFactorKernel &operator=(NEMultiplyScaleFactorKernel &&) = default;
+ /** Default destructor */
+ ~NEMultiplyScaleFactorKernel() = default;
+ /** Set input, output tensors.
+ *
+ * @param[in/out] input Source tensor. Data type supported: S32.
+ * @param[in] scale_factor Scale tensor. Data type supported: F16/F32.
+ * @param[out] output Destination tensor. Data type supported: Same as @p scale_factor.
+ */
+ void configure(const ITensor *input, const ITensor *scale_factor, ITensor *output,
+ float multiplier = 1.f);
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * NEMultiplyScaleFactorKernel
+ *
+ * @param[in] input Input tensor info. Data types supported: S32.
+ * @param[in] scale_factor Scale tensor. Data type supported: F16/F32.
+ * @param[in] output Output tensor info. Data types supported: Same as @p scale_factor.
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *scale_factor,
+ const ITensorInfo *output, float multiplier = 1.f);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ template <typename T> void multiply(const Window &window);
+
+private:
+ const ITensor *_input;
+ const ITensor *_scale_factor;
+ ITensor *_output;
+ float _multiplier;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_NEMULTIPLYSCALEFACTORKERNEL_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEPReLUKernel.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEPReLUKernel.h
new file mode 100644
index 000000000..79bb78661
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEPReLUKernel.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NEPRELUKERNEL_H__
+#define __ARM_COMPUTE_NEPRELUKERNEL_H__
+
+#include "arm_compute/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for the kernel to perform Parametric Rectified Linear Unit
+ *
+ * Result is computed by:
+ * @f[ output(x) = alpha * x for x < 0, output(x) = x for x >= 0 @f]
+ */
+class NEPReLUKernel : public INEKernel
+{
+public:
+ const char *name() const override { return "NEPReLUKernel"; }
+ /** Default constructor */
+ NEPReLUKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEPReLUKernel(const NEPReLUKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEPReLUKernel &operator=(const NEPReLUKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NEPReLUKernel(NEPReLUKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NEPReLUKernel &operator=(NEPReLUKernel &&) = default;
+ /** Initialise the kernel's inputs and output
+ *
+ * @param[in] input Input tensor. Data type supported: QASYMM8/F32
+ * @param[in] alpha Alpha tensor. Data types supported: Same as @p input
+ * @param[out] output Output tensor. Data types supported: Same as @p input
+ */
+ void configure(const ITensor *input, const ITensor *alpha, ITensor *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * NEPReLUKernel.h
+ *
+ * @param[in] input Input tensor input info. Data types supported: QASYMM8/F32.
+ * @param[in] alpha Alpha tensor input info. Data types supported: Same as @p input.
+ * @param[in] output Output tensor info. Data types supported: Same as @p input.
+ *
+ * @return a Status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *alpha,
+ const ITensorInfo *output);
+ static Status validate_arguments(const ITensorInfo &input, const ITensorInfo &alpha,
+ const ITensorInfo &output);
+
+private:
+ const ITensor *_input; /**< Source tensor */
+ const ITensor *_alpha; /**< Alpha tensor */
+ ITensor *_output; /**< Destination tensor */
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_NEPRELUKERNEL_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEQuantizationSymmetricKernel.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEQuantizationSymmetricKernel.h
new file mode 100644
index 000000000..590b23873
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEQuantizationSymmetricKernel.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NEQUANTIZATIONSYMMETRICKERNEL_H__
+#define __ARM_COMPUTE_NEQUANTIZATIONSYMMETRICKERNEL_H__
+
+#include "arm_compute/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for the dequantization layer kernel. */
+class NEQuantizationSymmetricKernel : public INEKernel
+{
+public:
+ const char *name() const override { return "NEQuantizationSymmetricKernel"; }
+ /** Default constructor */
+ NEQuantizationSymmetricKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEQuantizationSymmetricKernel(const NEQuantizationSymmetricKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEQuantizationSymmetricKernel &operator=(const NEQuantizationSymmetricKernel &) = delete;
+ /** Default Move Constructor. */
+ NEQuantizationSymmetricKernel(NEQuantizationSymmetricKernel &&) = default;
+ /** Default move assignment operator */
+ NEQuantizationSymmetricKernel &operator=(NEQuantizationSymmetricKernel &&) = default;
+ /** Default destructor */
+ ~NEQuantizationSymmetricKernel() = default;
+ /** Set input, output tensors.
+ *
+ * @param[in] input Source tensor. Data type supported: F16/F32.
+ * @param[out] output Destination tensor with the same dimensions of input. Data type supported:
+ * S8.
+ * @param[out] scale_factor Scale tensor of @p output. Data type supported: Same as @p input.
+ */
+ void configure(const ITensor *input, ITensor *output, ITensor *scale_factor);
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * NEQuantizationSymmetricKernel
+ *
+ * @param[in] input Input tensor info. Data types supported: F16/F32.
+ * @param[in] output Output tensor info. Data types supported: S8.
+ * @param[out] scale_factor Scale tensor of @p output. Data type supported: Same as @p input.
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output,
+ const ITensorInfo *scale_factor);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ template <typename T> void quantize(const Window &window);
+
+private:
+ const ITensor *_input;
+ ITensor *_output;
+ ITensor *_scale_factor;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_NEQUANTIZATIONSYMMETRICKERNEL_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEReductionOperationKernelEx.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEReductionOperationKernelEx.h
new file mode 100644
index 000000000..73991b67d
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEReductionOperationKernelEx.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NEREDUCTIONOPERATIONKERNELEX_H__
+#define __ARM_COMPUTE_NEREDUCTIONOPERATIONKERNELEX_H__
+
+#include "arm_compute/core/NEON/INEKernel.h"
+#include "arm_compute/core/TypesEx.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** NEON kernel to perform a reduction operation */
+class NEReductionOperationKernelEx : public INEKernel
+{
+public:
+ const char *name() const override { return "NEReductionOperationKernelEx"; }
+ /** Default constructor */
+ NEReductionOperationKernelEx();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEReductionOperationKernelEx(const NEReductionOperationKernelEx &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEReductionOperationKernelEx &operator=(const NEReductionOperationKernelEx &) = delete;
+ /** Allow instances of this class to be moved */
+ NEReductionOperationKernelEx(NEReductionOperationKernelEx &&) = default;
+ /** Allow instances of this class to be moved */
+ NEReductionOperationKernelEx &operator=(NEReductionOperationKernelEx &&) = default;
+ /** Default destructor */
+ ~NEReductionOperationKernelEx() = default;
+
+ /** Set the source, destination of the kernel
+ *
+ * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. Data layouts supported:
+ * NCHW.
+ * @param[out] output Destination tensor.Data types and data layouts supported: same as @p input.
+ * Output will have the same number of dimensions as input.
+ * @param[in] axis Axis along which to reduce. Supported reduction axis : 0
+ * @param[in] op Reduction operation to perform.
+ */
+ void configure(const ITensor *input, ITensor *output, unsigned int axis, ReduceOperation op);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * NEReductionOperationKernelEx.
+ *
+ * @param[in] input Source tensor info. Data type supported: QASYMM8/F16/F32. Data layouts
+ * supported: NCHW.
+ * @param[in] output Destination tensor info.Data types and data layouts supported: same as @p
+ * input.
+ * Output will have the same number of dimensions as input.
+ * @param[in] axis Axis along which to reduce. Supported reduction axis : 0
+ * @param[in] op Reduction operation to perform.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis,
+ ReduceOperation op);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+ BorderSize border_size() const override;
+
+private:
+ const ITensor *_input;
+ ITensor *_output;
+ unsigned int _reduction_axis;
+ ReduceOperation _op;
+ BorderSize _border_size;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_NEREDUCTIONOPERATIONKERNELEX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernelEx.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernelEx.h
new file mode 100644
index 000000000..5d697c2b2
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernelEx.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NESPACETODEPTHLAYERKERNELEX_H__
+#define __ARM_COMPUTE_NESPACETODEPTHLAYERKERNELEX_H__
+
+#include "arm_compute/core/NEON/INEKernel.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for the space to depth kernel */
+class NESpaceToDepthLayerKernelEx : public INEKernel
+{
+public:
+ const char *name() const override { return "NESpaceToDepthLayerKernelEx"; }
+ /** Default constructor */
+ NESpaceToDepthLayerKernelEx();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NESpaceToDepthLayerKernelEx(const NESpaceToDepthLayerKernelEx &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NESpaceToDepthLayerKernelEx &operator=(const NESpaceToDepthLayerKernelEx &) = delete;
+ /** Allow instances of this class to be moved */
+ NESpaceToDepthLayerKernelEx(NESpaceToDepthLayerKernelEx &&) = default;
+ /** Allow instances of this class to be moved */
+ NESpaceToDepthLayerKernelEx &operator=(NESpaceToDepthLayerKernelEx &&) = default;
+ /** Default destructor */
+ ~NESpaceToDepthLayerKernelEx() = default;
+ /** Initialise the kernel's inputs and output.
+ *
+ * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported:
+ * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+ * @param[out] output Tensor output. Data types supported: same as @p input
+ * @param[in] block_shape Block shape value
+ */
+ void configure(const ITensor *input, ITensor *output, int32_t block_shape);
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * NESpaceToDepthLayerKernelEx
+ *
+ * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported:
+ * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+ * @param[in] output Tensor output info. Data types supported: same as @p input
+ * @param[in] block_shape Block shape value
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+ const ITensor *_input; /**< Source tensor */
+ ITensor *_output; /**< Destination tensor */
+ int32_t _block_shape; /**< Block shape */
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_NESPACETODEPTHLAYERKERNELEX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/TypesEx.h b/compute/ARMComputeEx/arm_compute/core/TypesEx.h
new file mode 100644
index 000000000..3b0902f08
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/TypesEx.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_TYPESEX_H__
+#define __ARM_COMPUTE_TYPESEX_H__
+
+namespace arm_compute
+{
+
+/** Available ArgIndex operations **/
+enum class ArgOperation
+{
+ MAX,
+ MIN,
+};
+
+/** Available reduce operations */
+enum class ReduceOperation
+{
+ MAX, /**< Max */
+ MEAN, /**< Mean */
+ SUM, /**< Sum */
+ MIN, /**< Min */
+};
+
+/** Available binary logical operations */
+enum class BinaryLogicalOperation
+{
+ AND, /**< AND */
+ OR, /**< OR */
+};
+
+enum class ComparisonOperationEx
+{
+ EQUAL, /**< EQUAL */
+ NOT_EQUAL, /**< NOT_EQUAL */
+};
+
+enum class ElementWiseUnaryEx
+{
+ NEG, /**< NEG */
+};
+
+enum class SubDataType
+{
+ NONE,
+ BOOL,
+};
+
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TYPESEX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/UtilsEx.h b/compute/ARMComputeEx/arm_compute/core/UtilsEx.h
new file mode 100644
index 000000000..39026e6bb
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/UtilsEx.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_UTILSEX_H__
+#define __ARM_COMPUTE_UTILSEX_H__
+
+#include <utility>
+
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+
+/** Returns expected width and height of the transpose convolution's output tensor.
+ *
+ * @note This function was copied in order to fix a bug computing to wrong output dimensions.
+ *
+ * @param[in] in_width Width of input tensor (Number of columns)
+ * @param[in] in_height Height of input tensor (Number of rows)
+ * @param[in] kernel_width Kernel width.
+ * @param[in] kernel_height Kernel height.
+ * @param[in] info padding and stride info.
+ * @param[in] invalid_right The number of zeros added to right edge of the output.
+ * @param[in] invalid_top The number of zeros added to bottom edge of the output.
+ *
+ * @return A pair with the new width in the first position and the new height in the second.
+ */
+const std::pair<unsigned int, unsigned int>
+transposeconv_output_dimensions(unsigned int in_width, unsigned int in_height,
+ unsigned int kernel_width, unsigned int kernel_height,
+ const PadStrideInfo &info, unsigned int invalid_right,
+ unsigned int invalid_top);
+}
+#endif /*__ARM_COMPUTE_UTILSEX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/utils/misc/ShapeCalculatorEx.h b/compute/ARMComputeEx/arm_compute/core/utils/misc/ShapeCalculatorEx.h
new file mode 100644
index 000000000..16fd40ed9
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/core/utils/misc/ShapeCalculatorEx.h
@@ -0,0 +1,222 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ARM_COMPUTE_MISC_SHAPE_CALCULATOR_EX_H__
+#define __ARM_COMPUTE_MISC_SHAPE_CALCULATOR_EX_H__
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/ITensorInfo.h"
+#include "arm_compute/core/Utils.h"
+
+#include "arm_compute/core/utils/helpers/tensor_transform.h"
+
+#include <cmath>
+
+namespace arm_compute
+{
+namespace misc
+{
+namespace shape_calculator
+{
+
+/** Calculate the upsampled output shape used for transpose convolution
+ *
+ * @param[in] input Input tensor info
+ * @param[in] weights Weights tensor shape
+ * @param[in] info Padding and stride info
+ * @param[in] out_dims Output shape dimensions
+ * @param[in] invalid_right The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
+ * @param[out] pad_left Padding on left
+ * @param[out] pad_right Padding on right
+ * @param[out] pad_top Padding on top
+ * @param[out] pad_bottom Padding on bottom
+ *
+ * @return the calculated shape
+ */
+inline TensorShape compute_transposeconv_upsampled_shape(
+ const ITensorInfo &input, const ITensorInfo &weights, const PadStrideInfo &info,
+ std::pair<unsigned int, unsigned int> &out_dims, unsigned int invalid_right,
+ unsigned int invalid_bottom, unsigned int &pad_left, unsigned int &pad_right,
+ unsigned int &pad_top, unsigned int &pad_bottom)
+{
+ unsigned int sx = info.stride().first;
+ unsigned int sy = info.stride().second;
+ const DataLayout data_layout = input.data_layout();
+ const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+ const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+
+ // Find the upsampled dimensions
+ // transpose conv out:
+ // tconv_out + pad = 1 + (in - 1) * stride + invalid
+ // tconv_out = 1 + (in - 1) * stride + invalid - pad
+ // upsample out:
+ // upsample_out = 1 + (in - 1) * stride
+ unsigned int out_x = (input.dimension(idx_w) - 1) * sx + 1;
+ unsigned int out_y = (input.dimension(idx_h) - 1) * sy + 1;
+
+ // Find the padding needed for the convolution with stride 1 in order to match output shape
+ // upsample+pad out:
+ // upsample_out + pad = tconv_out + kernel - 1
+ // pad = tconv_out + kernel - 1 - upsample_out
+ unsigned int padx = out_dims.first - (out_x - weights.dimension(idx_w) + 1);
+ unsigned int pady = out_dims.second - (out_y - weights.dimension(idx_h) + 1);
+ out_x += padx;
+ out_y += pady;
+
+ unsigned int padx_all_except_invallid = padx + info.pad_left() + info.pad_right() - invalid_right;
+ unsigned int pady_all_except_invallid =
+ pady + info.pad_top() + info.pad_bottom() - invalid_bottom;
+ pad_left = (padx_all_except_invallid + 1) / 2 - info.pad_left();
+ pad_right = pady_all_except_invallid / 2 - info.pad_right() + invalid_right;
+ pad_top = (padx_all_except_invallid + 1) / 2 - info.pad_top();
+ pad_bottom = pady_all_except_invallid / 2 - info.pad_bottom() + invalid_bottom;
+
+ TensorShape scale_out_shape(input.tensor_shape());
+ scale_out_shape.set(idx_w, out_x);
+ scale_out_shape.set(idx_h, out_y);
+
+ return scale_out_shape;
+}
+
+/** Calculate the output shape of the transpose convolution layer
+ *
+ * @param[in] out_dims Output x and y shape dimensions
+ * @param[in] input Input tensor info
+ * @param[in] weights Weights tensor shape
+ *
+ * @return the calculated shape
+ */
+inline TensorShape
+compute_transposeconv_output_shape(const std::pair<unsigned int, unsigned int> &out_dims,
+ const ITensorInfo &input, const ITensorInfo &weights)
+{
+ const TensorShape input_shape{input.tensor_shape()};
+ const TensorShape weights_shape{weights.tensor_shape()};
+
+ const DataLayout data_layout = input.data_layout();
+ const int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+ const int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+ const int channel_idx =
+ get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
+ const int batch_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES);
+
+ TensorShape out_shape{input_shape};
+ out_shape.set(width_idx, out_dims.first);
+ out_shape.set(height_idx, out_dims.second);
+ out_shape.set(channel_idx, weights_shape[batch_idx]);
+ return out_shape;
+}
+
+/** Calculate the depth to space output shape of a tensor
+ *
+ * @param[in] input Input tensor info
+ * @param[in] block Block shape value
+ *
+ * @return the calculated shape
+ */
+inline TensorShape compute_depth_to_space_shape_ex(const ITensorInfo *input, int block)
+{
+ ARM_COMPUTE_ERROR_ON(block < 2);
+
+ const DataLayout data_layout = input->data_layout();
+ const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+ const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+ const int idx_channel =
+ get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
+
+ TensorShape output_shape{input->tensor_shape()};
+ output_shape.set(idx_width, input->dimension(idx_width) * block);
+ output_shape.set(idx_height, input->dimension(idx_height) * block);
+ output_shape.set(idx_channel, input->dimension(idx_channel) / (block * block));
+
+ return output_shape;
+}
+
+/** Calculate the space to batch output shape of a tensor
+ *
+ * @param[in] input Input tensor info
+ * @param[in] block_shape Block shape value
+ *
+ * @return the calculated shape
+ */
+inline TensorShape compute_space_to_depth_shape_ex(const ITensorInfo *input, int32_t block_shape)
+{
+ ARM_COMPUTE_ERROR_ON(block_shape < 2);
+ TensorShape output_shape{input->tensor_shape()};
+
+ const DataLayout data_layout = input->data_layout();
+ const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+ const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+ const int idx_depth = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
+
+ output_shape.set(idx_width, input->tensor_shape()[idx_width] * block_shape);
+ output_shape.set(idx_height, input->tensor_shape()[idx_height] * block_shape);
+ output_shape.set(idx_depth, input->tensor_shape()[idx_depth] / (block_shape * block_shape));
+
+ return output_shape;
+}
+
+/** Calculate the gather output shape of a tensor
+ *
+ * @param[in] input_shape Input tensor shape
+ * @param[in] indices_shape Indices tensor shape
+ * @param[in] actual_axis The axis to be gathered
+ *
+ * @return the calculated shape
+ */
+inline TensorShape compute_gather_shape_ex(const TensorShape &input_shape,
+ const TensorShape &indices_shape, uint32_t actual_axis)
+{
+ ARM_COMPUTE_ERROR_ON(indices_shape.num_dimensions() > 3);
+ ARM_COMPUTE_ERROR_ON(input_shape.num_dimensions() > 4);
+ ARM_COMPUTE_ERROR_ON(input_shape.num_dimensions() + indices_shape.num_dimensions() - 1 > 4);
+ ARM_COMPUTE_ERROR_ON(actual_axis >= input_shape.num_dimensions());
+
+ TensorShape output_shape = input_shape;
+ if (indices_shape.num_dimensions() == 1)
+ {
+ output_shape[actual_axis] = indices_shape[0];
+ }
+ else if (indices_shape.num_dimensions() > 1)
+ {
+ output_shape.shift_right(indices_shape.num_dimensions() - 1);
+
+ for (uint32_t i = 0, o = 0; o < output_shape.num_dimensions(); ++o, ++i)
+ {
+ if (o == actual_axis)
+ {
+ ++i;
+ for (uint32_t in = 0; in < indices_shape.num_dimensions(); ++in, ++o)
+ {
+ output_shape[o] = indices_shape[in];
+ }
+ }
+ else
+ {
+ output_shape[o] = input_shape[i];
+ }
+ }
+ }
+ return output_shape;
+}
+
+} // namespace shape_calculator
+} // namespace misc
+} // namespace arm_compute
+
+#endif // __ARM_COMPUTE_MISC_SHAPE_CALCULATOR_EX_H__