diff options
Diffstat (limited to 'compute/ARMComputeEx/arm_compute/core')
34 files changed, 3738 insertions, 0 deletions
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/CLKernelLibraryEx.h b/compute/ARMComputeEx/arm_compute/core/CL/CLKernelLibraryEx.h new file mode 100644 index 000000000..e4e752ef9 --- /dev/null +++ b/compute/ARMComputeEx/arm_compute/core/CL/CLKernelLibraryEx.h @@ -0,0 +1,245 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file CLKernelLibraryEx.h + * @ingroup COM_AI_RUNTIME + * @brief This file is a cloned version of CLKernelLibrary.h in ACL. This file defines + * an interface for CLKernelLibrary.cpp which adds more OpenCL kernels on top of ACL. + */ + +#ifndef __ARM_COMPUTE_CLKERNELLIBRARY_EX_H__ +#define __ARM_COMPUTE_CLKERNELLIBRARY_EX_H__ + +#include "arm_compute/core/CL/OpenCL.h" + +#include <map> +#include <set> +#include <string> +#include <utility> + +namespace arm_compute +{ + +/** + * @brief Class to build OpenCL kernels added from nnfw + * */ +class CLKernelLibraryEx +{ + using StringSet = std::set<std::string>; + +private: + /** + * @brief Construct a new CLKernelLibraryEx object + */ + CLKernelLibraryEx(); + +public: + /** + * @brief Prevent instances of this class from being copied. + */ + CLKernelLibraryEx(const CLKernelLibraryEx &) = delete; + + /** + * @brief Prevent instances of this class from being copied. + */ + const CLKernelLibraryEx &operator=(const CLKernelLibraryEx &) = delete; + + /** + * @brief Get the KernelLibrary singleton. + * @return The KernelLibrary instance + */ + static CLKernelLibraryEx &get(); + + /** + * @brief Initialise the kernel library. + * @param[in] kernel_path Path of the directory from which kernel sources are loaded. + * @param[in] context CL context used to create programs. + * @param[in] device CL device for which the programs are created. + * @return N/A + */ + void init(std::string kernel_path, cl::Context context, cl::Device device) + { + _kernel_path = std::move(kernel_path); + _context = std::move(context); + _device = std::move(device); + } + + /** + * @brief Set the path that the kernels reside in. + * @param[in] kernel_path Path of the directory from which kernel sources are loaded. + * @return N/A + */ + void set_kernel_path(const std::string &kernel_path) { _kernel_path = kernel_path; }; + + /** + * @brief Get the path that the kernels reside in. + * @return the path of kernel files + */ + std::string get_kernel_path() { return _kernel_path; }; + + /** + * @brief Get the source of the selected program. + * @param[in] program_name Program name. + * @return Source of the selected program. + */ + std::string get_program_source(const std::string &program_name); + + /** + * @brief Set the CL context used to create programs. + * @note Setting the context also resets the device to the + * first one available in the new context. + * @param[in] context A CL context. + * @return N/A + */ + void set_context(cl::Context context) + { + _context = std::move(context); + if (_context.get() == nullptr) + { + _device = cl::Device(); + } + else + { + const auto cl_devices = _context.getInfo<CL_CONTEXT_DEVICES>(); + + if (cl_devices.empty()) + { + _device = cl::Device(); + } + else + { + _device = cl_devices[0]; + } + } + } + + /** + * @brief Return associated CL context. + * @return A CL context. + */ + cl::Context &context() { return _context; } + + /** + * @brief Set the CL device for which the programs are created. + * @param[in] device A CL device. + * @return N/A + */ + void set_device(cl::Device device) { _device = std::move(device); } + + /** + * @brief Gets the CL device for which the programs are created. + * @return A CL device. + */ + cl::Device &get_device() { return _device; } + + /** + * @brief Return the device version + * @return The content of CL_DEVICE_VERSION + */ + std::string get_device_version(); + + /** + * @brief Create a kernel from the kernel library. + * @param[in] kernel_name Kernel name. + * @param[in] build_options_set Kernel build options as a set. + * @return The created kernel. + */ + Kernel create_kernel(const std::string &kernel_name, + const StringSet &build_options_set = {}) const; + + /** + * @brief Find the maximum number of local work items in a workgroup can be supported for the + * kernel. + * @param[in] kernel kernel object + */ + + size_t max_local_workgroup_size(const cl::Kernel &kernel) const; + /** + * @brief Return the default NDRange for the device. + * @return default NDRangeof the device + */ + cl::NDRange default_ndrange() const; + + /** + * @brief Clear the library's cache of binary programs + * @return N/A + */ + void clear_programs_cache() + { + _programs_map.clear(); + _built_programs_map.clear(); + } + + /** + * @brief Access the cache of built OpenCL programs + * @return program map data structure of which key is name of kernel and value is + * kerel source name. (*.cl) + */ + const std::map<std::string, cl::Program> &get_built_programs() const + { + return _built_programs_map; + } + + /** + * @brief Add a new built program to the cache + * @param[in] built_program_name Name of the program + * @param[in] program Built program to add to the cache + * @return N/A + */ + void add_built_program(const std::string &built_program_name, cl::Program program); + + /** + * @brief Returns true if FP16 is supported by the CL device + * @return true if the CL device supports FP16 + */ + bool fp16_supported() const; + + /** + * @brief Returns true if int64_base_atomics extension is supported by the CL device + * @return true if the CL device supports int64_base_atomics extension + */ + bool int64_base_atomics_supported() const; + +private: + /** + * @brief Load program and its dependencies. + * @param[in] program_name Name of the program to load. + */ + const Program &load_program(const std::string &program_name) const; + /** + * @brief Concatenates contents of a set into a single string. + * @param[in] s Input set to concatenate. + * @return Concatenated string. + */ + std::string stringify_set(const StringSet &s) const; + + cl::Context _context; /**< Underlying CL context. */ + cl::Device _device; /**< Underlying CL device. */ + std::string _kernel_path; /**< Path to the kernels folder. */ + mutable std::map<std::string, const Program> + _programs_map; /**< Map with all already loaded program data. */ + mutable std::map<std::string, cl::Program> + _built_programs_map; /**< Map with all already built program data. */ + static const std::map<std::string, std::string> + _kernel_program_map; /**< Map that associates kernel names with programs. */ + static const std::map<std::string, std::string> + _program_source_map; /**< Contains sources for all programs. + Used for compile-time kernel inclusion. >*/ +}; +} +#endif /* __ARM_COMPUTE_CLKERNELLIBRARY_EX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLArgOperationKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLArgOperationKernel.h new file mode 100644 index 000000000..b98b174f7 --- /dev/null +++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLArgOperationKernel.h @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file CLArgOperationKernel.h + * @brief This file defines CLArgOperationKernel + * @ingroup COM_AI_RUNTIME + */ + +#ifndef __ARM_COMPUTE_CLARGOPERATIONKERNEL_H__ +#define __ARM_COMPUTE_CLARGOPERATIONKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/TypesEx.h" + +namespace arm_compute +{ +class ICLTensor; + +/** + * @brief Class to define interface for the argop kernel. + */ +class CLArgOperationKernel : public ICLKernel +{ +public: + /** + * @brief Default constructor. + */ + CLArgOperationKernel(); + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers). + * @param [in] copiedInstance Const reference of CLArgOperationKernel to be copied + */ + CLArgOperationKernel(const CLArgOperationKernel &) = delete; + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers). + * @param [in] copiedInstance Const reference of CLArgOperationKernel to be copied + * @return Reference of this instance + */ + CLArgOperationKernel &operator=(const CLArgOperationKernel &) = delete; + /** + * @brief Allow instances of this class to be moved + * @param [in] movedInstance Rvalue reference of CLArgOperationKernel to be moved + */ + CLArgOperationKernel(CLArgOperationKernel &&) = default; + /** + * @brief Allow instances of this class to be moved + * @param [in] movedInstance Rvalue reference of CLArgOperationKernel to be moved + * @return Reference of this instance + */ + CLArgOperationKernel &operator=(CLArgOperationKernel &&) = default; + /** + * @brief Initialise the kernel's input, output and border mode. + * @param[in] input An input tensor. Data types supported: U8/QASYMM8/S32/F32. + * @param[out] output The output tensor, Data types supported: S32. + * @param[in] axis Axis along which to reduce. It must be sorted and no duplicates. + * @param[in] op Arg operation to perform. + * return N/A + */ + void configure(const ICLTensor *input, ICLTensor *output, const uint32_t axis, ArgOperation op); + /** + * @brief Static function to check if given info will lead to a valid configuration of @ref + * CLArgOperationKernel + * @param[in] input An input tensor info. Data types supported: U8/QASYMM8/S32/F32. + * @param[in] output The output tensor info, Data types supported: S32. + * @param[in] axis Axis along which to reduce. It must be sorted and no duplicates. + * @param[in] op Arg operation to perform. + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const uint32_t axis, + ArgOperation op); + + /* + * @brief Run CLArgOperationKernel op + * @param[in] window Window to be used for in_slice + * @param[in] queue cl::CommandQueue + * @return N/A + */ + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + uint32_t _axis; +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_CLARGOPERATIONKERNEL_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h new file mode 100644 index 000000000..ab33d9d3a --- /dev/null +++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __ARM_COMPUTE_CLBINARYLOGICALOPKERNEL_H__ +#define __ARM_COMPUTE_CLBINARYLOGICALOPKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/TypesEx.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to return truth values of two input tensors for Binary Logical Op*/ +class CLBinaryLogicalOpKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLBinaryLogicalOpKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLBinaryLogicalOpKernel(const CLBinaryLogicalOpKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLBinaryLogicalOpKernel &operator=(const CLBinaryLogicalOpKernel &) = delete; + /** Allow instances of this class to be moved */ + CLBinaryLogicalOpKernel(CLBinaryLogicalOpKernel &&) = default; + /** Allow instances of this class to be moved */ + CLBinaryLogicalOpKernel &operator=(CLBinaryLogicalOpKernel &&) = default; + /** Initialize the kernel's input, output. + * + * @param[in] input1 Source tensor1. + * @param[in] input2 Source tensor2. + * @param[out] output Output tensor. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, + BinaryLogicalOperation op); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + + BorderSize border_size() const override; + +private: + const ICLTensor *_input1; + const ICLTensor *_input2; + ICLTensor *_output; +}; + +} // namespace arm_compute +#endif /*__ARM_COMPUTE_CLBINARYLOGICALOPKERNEL_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLCastKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLCastKernel.h new file mode 100644 index 000000000..16cef0b61 --- /dev/null +++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLCastKernel.h @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file CLCastKernel.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines CLCastKernel class + */ + +#ifndef __ARM_COMPUTE_CLCASTKERNEL_H__ +#define __ARM_COMPUTE_CLCASTKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/TypesEx.h" + +namespace arm_compute +{ +class ICLTensor; + +/** + * @brief Class to define OpenCL kernel for cast operation + */ +class CLCastKernel : public ICLKernel +{ +public: + /** + * @brief Construct CLCastKernel object + */ + CLCastKernel(); + + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers) + */ + CLCastKernel(const CLCastKernel &) = delete; + + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers) + */ + CLCastKernel &operator=(const CLCastKernel &) = delete; + + /** + * @brief Construct CLCastKernel object using default move constructor + * @param[in] CLCastKernel object to move + */ + CLCastKernel(CLCastKernel &&) = default; + + /** + * @brief Allow instances of this class to be moved + * @param[in] CLCastKernel object to move + */ + CLCastKernel &operator=(CLCastKernel &&) = default; + + /** + * @brief Destruct this CLCastKernel object + */ + ~CLCastKernel() = default; + + /** + * @brief Initialise the kernel's input and output. + * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. + * @param[in] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. + * @param[in] input_subtype Sub data type of input. + * @return N/A + */ + void configure(const ICLTensor *input, ICLTensor *output, SubDataType input_subtype); + + /** + * @brief Enqueue the OpenCL kernel to process the given window on the passed OpenCL command + * queue. + * @note The queue is *not* flushed by this method, and therefore the kernel will not have + * been executed by the time this method returns. + * @param[in] window Region on which to execute the kernel. (Must be a valid region of + * the window returned by window()). + * @param[in,out] queue Command queue on which to enqueue the kernel.@return N/A + * @return N/A + */ + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; /**< Source tensor */ + ICLTensor *_output; /**< Destination tensor */ +}; +} // namespace arm_compute +#endif /* __ARM_COMPUTE_CLCASTKERNEL_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h new file mode 100644 index 000000000..60ec7a82a --- /dev/null +++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __ARM_COMPUTE_CLDEPTHTOSPACEKERNEL_H__ +#define __ARM_COMPUTE_CLDEPTHTOSPACEKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to perform depthTospace operation */ +class CLDepthToSpaceKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLDepthToSpaceKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDepthToSpaceKernel(const CLDepthToSpaceKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDepthToSpaceKernel &operator=(const CLDepthToSpaceKernel &) = delete; + /** Allow instances of this class to be moved */ + CLDepthToSpaceKernel(CLDepthToSpaceKernel &&) = default; + /** Allow instances of this class to be moved */ + CLDepthToSpaceKernel &operator=(CLDepthToSpaceKernel &&) = default; + /** Default destructor */ + ~CLDepthToSpaceKernel() = default; + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. + * @param[in] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. + */ + void configure(const ICLTensor *input, ICLTensor *output, const int32_t block_size); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; /**< Source tensor */ + ICLTensor *_output; /**< Destination tensor */ +}; + +} // namespace arm_compute +#endif /* __ARM_COMPUTE_CLDEPTHTOSPACEKERNEL_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h new file mode 100644 index 000000000..da075db69 --- /dev/null +++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file CLEmbeddingLookupKernel.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines CLEmbeddingLookupKernel class + */ + +#ifndef __ARM_COMPUTE_CLEMBEDDINGLOOKUPKERNEL_H__ +#define __ARM_COMPUTE_CLEMBEDDINGLOOKUPKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** +* @brief Class to perform EmbeddingLookup operation with opencl kernel +*/ +class CLEmbeddingLookupKernel : public ICLKernel +{ +public: + /** + * @brief Construct a CLEmbeddingLookupKernel object + * */ + CLEmbeddingLookupKernel(); + + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers) + * */ + CLEmbeddingLookupKernel(const CLEmbeddingLookupKernel &) = delete; + + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers) + * */ + CLEmbeddingLookupKernel &operator=(const CLEmbeddingLookupKernel &) = delete; + + /** + * @brief Construct a CLEmbeddingLookupKernel object by using default move constructor + * @param[in] CLEmbeddingLookupKernel object to move + * */ + CLEmbeddingLookupKernel(CLEmbeddingLookupKernel &&) = default; + + /** + * @brief Move assignment operator + * @param[in] CLEmbeddingLookupKernel object to move + * */ + CLEmbeddingLookupKernel &operator=(CLEmbeddingLookupKernel &&) = default; + + /** + * @brief Destruct this object + * */ + ~CLEmbeddingLookupKernel() = default; + + /** + * @brief Set the input and output of the kernel + * @param[in] input Source tensor. + * Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 + * @param[out] output Destination tensor. Data type supported: Same as @p input + * @param[in] lookups Lookups are 1D tensor that values are indices into the first + * dimension of input. + * Data types supported: S32. + * @return N/A + */ + void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *lookups); + + /** + * @brief Static function to check if given info will lead to a valid configuration of @ref + * CLEmbeddingLookupKernel + * @param[in] input The input tensor info. + * Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 + * @param[in] output The output tensor info, Data types supported: same as @p input1. + * @param[in] lookups Lookups info. Data types supported: S32. + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, + const ITensorInfo *lookups); + + /** + * @brief Enqueue the OpenCL kernel to process the given window on the passed OpenCL command + * queue. + * @note The queue is *not* flushed by this method, and therefore the kernel will not have + * been executed by the time this method returns. + * @param[in] window Region on which to execute the kernel. (Must be a valid region of + * the window returned by window()). + * @param[in,out] queue Command queue on which to enqueue the kernel.@return N/A + * @return N/A + */ + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; /** Source tensor */ + ICLTensor *_output; /** Destination tensor */ + const ICLTensor *_lookups; /** Lookups tensor */ +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_CLEMBEDDINGLOOKUPKERNEL_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLGatherExKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLGatherExKernel.h new file mode 100644 index 000000000..aa81a1efa --- /dev/null +++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLGatherExKernel.h @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file CLGatherExKernel.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines CLGatherExKernel class + */ + +#ifndef __ARM_COMPUTE_CLGATHEREXKERNEL_H__ +#define __ARM_COMPUTE_CLGATHEREXKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** + * @brief Class to define an interface for the gather kernel. + */ +class CLGatherExKernel : public ICLKernel +{ +public: + /** + * @brief Construct CLGatherExKernel object + * */ + CLGatherExKernel(); + + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers). + */ + CLGatherExKernel(const CLGatherExKernel &) = delete; + + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers). + */ + CLGatherExKernel &operator=(const CLGatherExKernel &) = delete; + + /** + * @brief Construct CLGatherExKernel object by using default move constructor + * @param[in] CLGatherExKernel object to move + */ + CLGatherExKernel(CLGatherExKernel &&) = default; + + /** + * @brief Move assignment operator + * @param[in] CLGatherExKernel object to move + */ + CLGatherExKernel &operator=(CLGatherExKernel &&) = default; + + /** + * @brief Initialise the kernel's input, output and border mode. + * @param[in] input An input tensor. Data types supported: U8/QASYMM8/S32/F32. + * @param[in] indices Indices tensor. Data types supported: S32. + * @param[out] output The output tensor, Data types supported: same as @p input1. + * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative + * values wrap around. Defaults to 0 + * @return N/A + */ + void configure(const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis = 0); + + /** + * @brief Static function to check if given info will lead to a valid configuration of @ref + * CLGatherExKernel + * @param[in] input An input tensor. Data types supported: U8/QASYMM8/S32/F32. + * @param[in] indices Indices tensor. Data types supported: S32. + * @param[out] output The output tensor, Data types supported: same as @p input1. + * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative + * values wrap around. Defaults to 0 + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *indices, + const ITensorInfo *output, int axis = 0); + + /** + * @brief Enqueue the OpenCL kernel to process the given window on the passed OpenCL command + * queue. + * @note The queue is *not* flushed by this method, and therefore the kernel will not have + * been executed by the time this method returns. + * @param[in] window Region on which to execute the kernel. (Must be a valid region of + * the window returned by window()). + * @param[in,out] queue Command queue on which to enqueue the kernel.@return N/A + * @return N/A + */ + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + const ICLTensor *_indices; + ICLTensor *_output; + int _axis; +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_CLGATHEREXKERNEL_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLHashtableLookupKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLHashtableLookupKernel.h new file mode 100644 index 000000000..8269e5a7a --- /dev/null +++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLHashtableLookupKernel.h @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file CLHashtableLookupKernel.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines CLHashtableLookupKernel class + */ + +#ifndef __ARM_COMPUTE_CLHASHTABLELOOKUPKERNEL_H__ +#define __ARM_COMPUTE_CLHASHTABLELOOKUPKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/runtime/CL/CLTensor.h" + +namespace arm_compute +{ +class ICLTensor; + +/** +* @brief Class to perform HashtableLookup operation with opencl kernel +*/ +class CLHashtableLookupKernel : public ICLKernel +{ +public: + /** + * @brief Construct a CLHashtableLookupKernel object + * */ + CLHashtableLookupKernel(); + + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers) + * */ + CLHashtableLookupKernel(const CLHashtableLookupKernel &) = delete; + + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers) + * */ + CLHashtableLookupKernel &operator=(const CLHashtableLookupKernel &) = delete; + + /** + * @brief Construct a CLHashtableLookupKernel object by using default move constructor + * @param[in] CLHashtableLookupKernel object to move + * */ + CLHashtableLookupKernel(CLHashtableLookupKernel &&) = default; + + /** + * @brief Move assignment operator + * @param[in] CLHashtableLookupKernel object to move + * */ + CLHashtableLookupKernel &operator=(CLHashtableLookupKernel &&) = default; + + /** + * @brief Destruct this object + * */ + ~CLHashtableLookupKernel() = default; + + /** + * @brief Set the input and output of the kernel + * @param[in] lookups Lookups 1D tensor that values are indices into the first dimension of + * input. + * @param[in] keys Keys 1D tensor. keys and input pair represent a map. + * Data types supported: S32 + * @param[in] input Source tensor. + * Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 + * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p + * input. + * @param[out] hits Hits 1D tensor. A boolean tensor that indicates whether the lookup hits + * (True) or not (False). Data types supported: U8/QASYMM8 + * @return N/A + */ + void configure(const ICLTensor *lookups, const ICLTensor *keys, const ICLTensor *input, + ICLTensor *output, ICLTensor *hits); + + /** + * @brief Static function to check if given info will lead to a valid configuration of @ref + * CLHashtableLookupKernel + * @param[in] lookups The lookups tensor info. Data types supported: S32. + * @param[in] keys The keys tensor info. keys and input pair represent a map. + * Data types supported: S32 + * @param[in] input The input tensor info. + * Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 + * @param[out] output The output tensor. Data types and data layouts supported: Same as @p + * input. + * @param[out] hits The hits tensor info. A boolean tensor that indicates whether the lookup + * hits + * (True) or not (False). Data types supported: U8/QASYMM8 + * @return a status + */ + static Status validate(const ITensorInfo *lookups, const ITensorInfo *keys, + const ITensorInfo *input, const ITensorInfo *output, + const ITensorInfo *hits); + + /** + * @brief Enqueue the OpenCL kernel to process the given window on the passed OpenCL command + * queue. + * @note The queue is *not* flushed by this method, and therefore the kernel will not have + * been executed by the time this method returns. + * @param[in] window Region on which to execute the kernel. (Must be a valid region of + * the window returned by window()). + * @param[in,out] queue Command queue on which to enqueue the kernel.@return N/A + * @return N/A + */ + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_lookups{nullptr}; /** Lookups tensor */ + const ICLTensor *_keys{nullptr}; /** Keys tensor */ + const ICLTensor *_input{nullptr}; /** Source tensor */ + ICLTensor *_output{nullptr}; /** Destination tensor */ + ICLTensor *_hits{nullptr}; /** Hits tensor */ + std::unique_ptr<CLTensor> _lookup_indices{nullptr}; /** Lookup indices tensor */ +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_CLHASHTABLELOOKUPKERNEL_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.h new file mode 100644 index 000000000..f5e147e03 --- /dev/null +++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.h @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYERKERNELEX_H__ +#define __ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYERKERNELEX_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for performing an instance normalization */ +class CLInstanceNormalizationLayerKernelEx : public ICLKernel +{ +public: + /** Constructor */ + CLInstanceNormalizationLayerKernelEx(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLInstanceNormalizationLayerKernelEx(const CLInstanceNormalizationLayerKernelEx &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLInstanceNormalizationLayerKernelEx & + operator=(const CLInstanceNormalizationLayerKernelEx &) = delete; + /** Default Move Constructor. */ + CLInstanceNormalizationLayerKernelEx(CLInstanceNormalizationLayerKernelEx &&) = default; + /** Default move assignment operator */ + CLInstanceNormalizationLayerKernelEx & + operator=(CLInstanceNormalizationLayerKernelEx &&) = default; + /** Default destructor */ + ~CLInstanceNormalizationLayerKernelEx() = default; + + /** Set the input and output tensors. + * + * @param[in, out] input Source tensor. Data types supported: F16/F32. Data layout supported: + * NCHW + * @param[out] output Destination tensor. Data types and data layouts supported: same as @p + * input. + * @param[in] gamma (Optional) The scale tensor applied to the normalized tensor. Defaults + * to nullptr + * @param[in] beta (Optional) The offset tensor applied to the normalized tensor. Defaults + * to nullptr + * @param[in] epsilon (Optional) Lower bound value for the normalization. Defaults to 1e-12 + */ + void configure(ICLTensor *input, ICLTensor *output, ICLTensor *gamma = nullptr, + ICLTensor *beta = nullptr, float epsilon = 1e-12f); + + /** Static function to check if given info will lead to a valid configuration of @ref + * CLInstanceNormalizationLayerEx. + * + * @param[in] input Source tensor info. In case of @p output tensor = nullptr this tensor will + * store the result of the normalization. + * Data types supported: F16/F32. Data layout supported: NHWC, NCHW + * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p + * input. + * @param[in] gamma (Optional) The scale tensor applied to the normalized tensor. Defaults to + * nullptr + * @param[in] beta (Optional) The offset tensor applied to the normalized tensor. Defaults to + * nullptr + * @param[in] epsilon (Optional) Lower bound value for the normalization. Defaults to 1e-12 + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, + const ITensorInfo *gamma = nullptr, const ITensorInfo *beta = nullptr, + float epsilon = 1e-12f); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + ICLTensor *_input; + ICLTensor *_output; + ICLTensor *_gamma; + ICLTensor *_beta; + float _epsilon; + bool _run_in_place; +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYERKERNELEX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLNegKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLNegKernel.h new file mode 100644 index 000000000..ccbea147e --- /dev/null +++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLNegKernel.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __ARM_COMPUTE_CLNEGKERNEL_H__ +#define __ARM_COMPUTE_CLNEGKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to perform a negation operation on tensor*/ +class CLNegKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLNegKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLNegKernel(const CLNegKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLNegKernel &operator=(const CLNegKernel &) = delete; + /** Allow instances of this class to be moved */ + CLNegKernel(CLNegKernel &&) = default; + /** Allow instances of this class to be moved */ + CLNegKernel &operator=(CLNegKernel &&) = default; + /** Initialize the kernel's input, output. + * + * @param[in] input Source tensor. + * @param[out] output Destination tensor. + */ + void configure(const ICLTensor *input, ICLTensor *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_CLNEGKERNEL_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLPReLUKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLPReLUKernel.h new file mode 100644 index 000000000..eff1b8bd5 --- /dev/null +++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLPReLUKernel.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __ARM_COMPUTE_CLPRELU_KERNEL_H__ +#define __ARM_COMPUTE_CLPRELU_KERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to calculate PReLU*/ +class CLPReLUKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLPReLUKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLPReLUKernel(const CLPReLUKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLPReLUKernel &operator=(const CLPReLUKernel &) = delete; + /** Allow instances of this class to be moved */ + CLPReLUKernel(CLPReLUKernel &&) = default; + /** Allow instances of this class to be moved */ + CLPReLUKernel &operator=(CLPReLUKernel &&) = default; + /** Initialize the kernel's input, output. + * + * @param[in] input Source tensor1. + * @param[in] alpha Source tensor2. + * @param[out] output Output tensor. + */ + void configure(const ICLTensor *input, const ICLTensor *alpha, ICLTensor *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + + BorderSize border_size() const override; + +private: + const ICLTensor *_input; + const ICLTensor *_alpha; + ICLTensor *_output; +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_CLPRELU_KERNEL_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLReduceOperationKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLReduceOperationKernel.h new file mode 100644 index 000000000..a26a4a7fc --- /dev/null +++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLReduceOperationKernel.h @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file CLReduceOperationKernel.h + * @brief This file defines CLReduceOperationKernel class + * @ingroup COM_AI_RUNTIME + */ + +#ifndef __ARM_COMPUTE_CLREDUCEOPERATIONKERNEL_H__ +#define __ARM_COMPUTE_CLREDUCEOPERATIONKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/TypesEx.h" + +namespace arm_compute +{ +class ICLTensor; + +/** + * @brief Class to define interface for the reduce operation kernel + */ +class CLReduceOperationKernel : public ICLKernel +{ +public: + /** + * @brief Default constructor + */ + CLReduceOperationKernel(); + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers) + */ + CLReduceOperationKernel(const CLReduceOperationKernel &) = delete; + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers) + */ + CLReduceOperationKernel &operator=(const CLReduceOperationKernel &) = delete; + /** + * @brief Allow instances of this class to be moved + */ + CLReduceOperationKernel(CLReduceOperationKernel &&) = default; + /** + * @brief Allow instances of this class to be moved + */ + CLReduceOperationKernel &operator=(CLReduceOperationKernel &&) = default; + /** + * @brief Default destructor + */ + ~CLReduceOperationKernel() = default; + + /** + * @brief Set the input and output tensors. + * @param[in] input Source tensor. Data types supported: U8/S32/F32. + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * Output will have the same number of dimensions as input. + * @param[in] axis Axis along which to reduce. + * @param[in] op Reduce operation to perform. + * @return N/A + */ + void configure(const ICLTensor *input, ICLTensor *output, const uint32_t axis, + ReduceOperation op); + + /** + * @brief Static function to check if given info will lead to a valid configuration of @ref + * CLReduceOperationKernel. + * @param[in] input Source tensor info. Data types supported: U8/S32/F32. + * @param[in] output Destination tensor info. Data types supported: Same as @p input. + * Output will have the same number of dimensions as input. + * @param[in] axis Axis along which to reduce. + * @param[in] op Reduce operation to perform. + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const uint32_t axis, + ReduceOperation op); + + /* + * @brief Run CLReduceOperationKernel op + * @param[in] window Window to be used for in_slice + * @param[in] queue CLQueue + * @return N/A + */ + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + uint32_t _axis; +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_CLREDUCEOPERATIONKERNEL_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToBatchNDKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToBatchNDKernel.h new file mode 100644 index 000000000..577e38cc4 --- /dev/null +++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToBatchNDKernel.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __ARM_COMPUTE_CLSPACE_TO_BATCH_ND_KERNEL_H__ +#define __ARM_COMPUTE_CLSPACE_TO_BATCH_ND_KERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to perform SPACE_TO_BATCH_ND operation */ +class CLSpaceToBatchNDKernel final : public ICLKernel +{ +public: + /** Default constructor */ + CLSpaceToBatchNDKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLSpaceToBatchNDKernel(const CLSpaceToBatchNDKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLSpaceToBatchNDKernel &operator=(const CLSpaceToBatchNDKernel &) = delete; + /** Allow instances of this class to be moved */ + CLSpaceToBatchNDKernel(CLSpaceToBatchNDKernel &&) = default; + /** Allow instances of this class to be moved */ + CLSpaceToBatchNDKernel &operator=(CLSpaceToBatchNDKernel &&) = default; + /** Default destructor */ + ~CLSpaceToBatchNDKernel() = default; + /** Initialise the kernel's input and output. + * + * @note The data layout of input and output must be the same. + * @note The number of dimensions of input and output must be 4, and `spatial` dimensions + * are height and width. + * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/F16/S32/F32. + * Data layout supported: NCHW/NHWC + * @param[in] block_size Block size tensor. Data types supported: S32. + * @param[in] padding_size Padding size tensor. Data types supported: S32. + * @param[out] output Output tensor. Data types supported: U8/QASYMM8/S16/F16/S32/F32. + * Data layout supported: NCHW/NHWC + */ + void configure(const ICLTensor *input, const ICLTensor *block_size, const ICLTensor *padding_size, + ICLTensor *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input{nullptr}; /**< Source tensor */ + const ICLTensor *_block_size{nullptr}; /**< Block size tensor */ + const ICLTensor *_padding_size{nullptr}; /**< Padding size tensor */ + ICLTensor *_output{nullptr}; /**< Destination tensor */ +}; + +} // namespace arm_compute + +#endif /* __ARM_COMPUTE_CLSPACE_TO_BATCH_ND_KERNEL_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h new file mode 100644 index 000000000..be845a549 --- /dev/null +++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __ARM_COMPUTE_CLSPACETODEPTHKERNEL_H__ +#define __ARM_COMPUTE_CLSPACETODEPTHKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to perform spaceTodepth operation */ +class CLSpaceToDepthKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLSpaceToDepthKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLSpaceToDepthKernel(const CLSpaceToDepthKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLSpaceToDepthKernel &operator=(const CLSpaceToDepthKernel &) = delete; + /** Allow instances of this class to be moved */ + CLSpaceToDepthKernel(CLSpaceToDepthKernel &&) = default; + /** Allow instances of this class to be moved */ + CLSpaceToDepthKernel &operator=(CLSpaceToDepthKernel &&) = default; + /** Default destructor */ + ~CLSpaceToDepthKernel() = default; + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. + * @param[in] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. + */ + void configure(const ICLTensor *input, ICLTensor *output, const int32_t block_size); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; /**< Source tensor */ + ICLTensor *_output; /**< Destination tensor */ +}; + +} // namespace arm_compute +#endif /* __ARM_COMPUTE_CLSPACETODEPTHKERNEL_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLTopKV2Kernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLTopKV2Kernel.h new file mode 100644 index 000000000..8da2daecc --- /dev/null +++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLTopKV2Kernel.h @@ -0,0 +1,657 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file CLTopKV2Kernel.h + * @brief This file defines classes for TopKV2Kernel + * @ingroup COM_AI_RUNTIME + */ + +#ifndef __ARM_COMPUTE_CLTOPKV2KERNEL_H__ +#define __ARM_COMPUTE_CLTOPKV2KERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +// these parameters can be changed +#define _ITEMS 16 // number of items in a group +#define _GROUPS 4 // the number of virtual processors is _ITEMS * _GROUPS +#define _HISTOSPLIT (_ITEMS * _GROUPS / 2) // number of splits of the histogram +#define PERMUT // store the final permutation +//////////////////////////////////////////////////////// + +// Disable GPU implementation +// TODO Enable GPU implementation with verification, or remove code +// Invalid result on GPU +#if 0 +namespace arm_compute +{ +class ICLTensor; + +/** + * @brief Class to define CLTopKV2Single + */ +class CLTopKV2Single : public ICLKernel +{ +public: + /** + * @brief Constructor + */ + CLTopKV2Single(); + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers). + * @param [in] copiedInstance Const reference of CLTopKV2Single to be copied + */ + CLTopKV2Single(const CLTopKV2Single &) = delete; + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers). + * @param [in] copiedInstance Const reference of CLTopKV2Single to be copied + * @return Reference of this instance + */ + CLTopKV2Single &operator=(const CLTopKV2Single &) = delete; + /** + * @brief Allow instances of this class to be moved + * @param [in] movedInstance Rvalue reference of CLTopKV2Single to be moved + */ + CLTopKV2Single(CLTopKV2Single &&) = default; + /** + * @brief Allow instances of this class to be moved + * @param [in] movedInstance Rvalue reference of CLTopKV2Single to be moved + * @return Reference of this instance + */ + CLTopKV2Single &operator=(CLTopKV2Single &&) = default; + + /** + * @brief Initialise kernel with params + * @param[in] input An input tensor + * @param[in] topk_values Values of the top k predictions + * @param[in] topk_indices Indices of the top k predictions + * @param[in] indices Indices + * @param[in] temp_stack Temp stack + * @param[in] k K of the top k predictions + * @param[in] n Number times to quick-sort + * return N/A + */ + void configure(ICLTensor *input, ICLTensor *topk_values, ICLTensor *topk_indices, + cl::Buffer *indices, cl::Buffer *temp_stack, int k, int n); + + /* + * @brief Run CLTopKV2Single op + * @param[in] window Window to be used for in_slice + * @param[in] queue cl::CommandQueue + * @return N/A + */ + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + ICLTensor *_input; + ICLTensor *_topk_values; + ICLTensor *_topk_indices; +}; + +/** + * @brief Class to define CLTopKV2Init + */ +class CLTopKV2Init : public ICLKernel +{ +public: + /** + * @brief Constructor + */ + CLTopKV2Init(); + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers). + * @param [in] copiedInstance Const reference of CLTopKV2Init to be copied + */ + CLTopKV2Init(const CLTopKV2Init &) = delete; + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers). + * @param [in] copiedInstance Const reference of CLTopKV2Init to be copied + * @return Reference of this instance + */ + CLTopKV2Init &operator=(const CLTopKV2Init &) = delete; + /** + * @brief Allow instances of this class to be moved + * @param [in] movedInstance Rvalue reference of CLTopKV2Init to be moved + */ + CLTopKV2Init(CLTopKV2Init &&) = default; + /** + * @brief Allow instances of this class to be moved + * @param [in] movedInstance Rvalue reference of CLTopKV2Init to be moved + * @return Reference of this instance + */ + CLTopKV2Init &operator=(CLTopKV2Init &&) = default; + + /** + * @brief Initialise kernel with params + * @param[in] input An input tensor + * @param[in] in_key_buf Buffer of input key + * @param[in] in_ind_buf Buffer of input index + * @param[in] n Number times to quick-sort + * return N/A + */ + void configure(ICLTensor *input, cl::Buffer *in_key_buf, cl::Buffer *in_ind_buf, int n); + + /* + * @brief Run CLTopKV2Init op + * @param[in] window Window to be used for in_slice + * @param[in] queue cl::CommandQueue + * @return N/A + */ + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + ICLTensor *_input; +}; + +/** + * @brief Class to define CLRadixSortHistogram + */ +class CLRadixSortHistogram : public ICLKernel +{ +public: + /** + * @brief Constructor + */ + CLRadixSortHistogram(); + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers). + * @param [in] copiedInstance Const reference of CLRadixSortHistogram to be copied + */ + CLRadixSortHistogram(const CLRadixSortHistogram &) = delete; + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers). + * @param [in] copiedInstance Const reference of CLRadixSortHistogram to be copied + * @return Reference of this instance + */ + CLRadixSortHistogram &operator=(const CLRadixSortHistogram &) = delete; + /** + * @brief Allow instances of this class to be moved + * @param [in] movedInstance Rvalue reference of CLRadixSortHistogram to be moved + */ + CLRadixSortHistogram(CLRadixSortHistogram &&) = default; + /** + * @brief Allow instances of this class to be moved + * @param [in] movedInstance Rvalue reference of CLRadixSortHistogram to be moved + * @return Reference of this instance + */ + CLRadixSortHistogram &operator=(CLRadixSortHistogram &&) = default; + + /** + * @brief Initialise kernel with params + * @param[out] hist_buf Buffer of histogram + * @param[in] bits Number of bits to be used for radix sort + * @param[in] n Integer number size to sort + * return N/A + */ + void configure(cl::Buffer *hist_buf, int bits, int n); + + /** + * @brief Set pass + * @param[in] pass Passes made of in radix sort algorithm + * @param[in] in_key_buf Buffer of input key + * return N/A + */ + void setPass(int pass, cl::Buffer *in_key_buf) + { + _pass = pass; + _in_key_buf = in_key_buf; + } + + /* + * @brief Run CLRadixSortHistogram op + * @param[in] window Window to be used for in_slice + * @param[in] queue cl::CommandQueue + * @return N/A + */ + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + int _pass; + cl::Buffer *_in_key_buf; +}; + +/** + * @brief Class to define CLRadixSortScanHistogram + */ +class CLRadixSortScanHistogram : public ICLKernel +{ +public: + /** + * @brief Constructor + */ + CLRadixSortScanHistogram(); + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers). + * @param [in] copiedInstance Const reference of CLRadixSortScanHistogram to be copied + */ + CLRadixSortScanHistogram(const CLRadixSortScanHistogram &) = delete; + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers). + * @param [in] copiedInstance Const reference of CLRadixSortScanHistogram to be copied + * @return Reference of this instance + */ + CLRadixSortScanHistogram &operator=(const CLRadixSortScanHistogram &) = delete; + /** + * @brief Allow instances of this class to be moved + * @param [in] movedInstance Rvalue reference of CLRadixSortScanHistogram to be moved + */ + CLRadixSortScanHistogram(CLRadixSortScanHistogram &&) = default; + /** + * @brief Allow instances of this class to be moved + * @param [in] movedInstance Rvalue reference of CLRadixSortScanHistogram to be moved + * @return Reference of this instance + */ + CLRadixSortScanHistogram &operator=(CLRadixSortScanHistogram &&) = default; + + /** + * @brief Initialise kernel with params + * @param[out] hist_buf Buffer of histogram + * @param[out] glob_sum_buf Buffer of global sum + * @param[in] bits Number of bits to be used for radix sort + * return N/A + */ + void configure(cl::Buffer *hist_buf, cl::Buffer *glob_sum_buf, int bits); + + /* + * @brief Run CLRadixSortScanHistogram op + * @param[in] window Window to be used for in_slice + * @param[in] queue cl::CommandQueue + * @return N/A + */ + void run(const Window &window, cl::CommandQueue &queue) override; +}; + +/** + * @brief Class to define CLRadixSortGlobalScanHistogram + */ +class CLRadixSortGlobalScanHistogram : public ICLKernel +{ +public: + /** + * @brief Constructor + */ + CLRadixSortGlobalScanHistogram(); + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers). + * @param [in] copiedInstance Const reference of CLRadixSortGlobalScanHistogram to be copied + */ + CLRadixSortGlobalScanHistogram(const CLRadixSortGlobalScanHistogram &) = delete; + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers). + * @param [in] copiedInstance Const reference of CLRadixSortGlobalScanHistogram to be copied + * @return Reference of this instance + */ + CLRadixSortGlobalScanHistogram &operator=(const CLRadixSortGlobalScanHistogram &) = delete; + /** + * @brief Allow instances of this class to be moved + * @param [in] movedInstance Rvalue reference of CLRadixSortGlobalScanHistogram to be moved + */ + CLRadixSortGlobalScanHistogram(CLRadixSortGlobalScanHistogram &&) = default; + /** + * @brief Allow instances of this class to be moved + * @param [in] movedInstance Rvalue reference of CLRadixSortGlobalScanHistogram to be moved + * @return Reference of this instance + */ + CLRadixSortGlobalScanHistogram &operator=(CLRadixSortGlobalScanHistogram &&) = default; + + /** + * @brief Initialise kernel with params + * @param[out] glob_sum_buf Buffer of global sum + * @param[out] temp_buf Temp buffer to be used while RadixSortGlobalScanHistogram + * @param[in] bits Number of bits to be used for radix sort + * return N/A + */ + void configure(cl::Buffer *glob_sum_buf, cl::Buffer *temp_buf, int bits); + + /* + * @brief Run CLRadixSortGlobalScanHistogram op + * @param[in] window Window to be used for in_slice + * @param[in] queue cl::CommandQueue + * @return N/A + */ + void run(const Window &window, cl::CommandQueue &queue) override; +}; + +/** + * @brief Class to define CLRadixSortPasteHistogram + */ +class CLRadixSortPasteHistogram : public ICLKernel +{ +public: + /** + * @brief Constructor + */ + CLRadixSortPasteHistogram(); + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers). + * @param [in] copiedInstance Const reference of CLRadixSortPasteHistogram to be copied + */ + CLRadixSortPasteHistogram(const CLRadixSortPasteHistogram &) = delete; + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers). + * @param [in] copiedInstance Const reference of CLRadixSortPasteHistogram to be copied + * @return Reference of this instance + */ + CLRadixSortPasteHistogram &operator=(const CLRadixSortPasteHistogram &) = delete; + /** + * @brief Allow instances of this class to be moved + * @param [in] movedInstance Rvalue reference of CLRadixSortPasteHistogram to be moved + */ + CLRadixSortPasteHistogram(CLRadixSortPasteHistogram &&) = default; + /** + * @brief Allow instances of this class to be moved + * @param [in] movedInstance Rvalue reference of CLRadixSortPasteHistogram to be moved + * @return Reference of this instance + */ + CLRadixSortPasteHistogram &operator=(CLRadixSortPasteHistogram &&) = default; + + /** + * @brief Initialise kernel with params + * @param[out] hist_buf Buffer of histogram + * @param[out] glob_sum_buf Buffer of global sum + * @param[in] bits Number of bits to be used for radix sort + * return N/A + */ + void configure(cl::Buffer *hist_buf, cl::Buffer *glob_sum_buf, int bits); + + /* + * @brief Run CLRadixSortPasteHistogram op + * @param[in] window Window to be used for in_slice + * @param[in] queue cl::CommandQueue + * @return N/A + */ + void run(const Window &window, cl::CommandQueue &queue) override; +}; + +/** + * @brief Class to define CLRadixSortReorder + */ +class CLRadixSortReorder : public ICLKernel +{ +public: + /** + * @brief Constructor + */ + CLRadixSortReorder(); + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers). + * @param [in] copiedInstance Const reference of CLRadixSortReorder to be copied + */ + CLRadixSortReorder(const CLRadixSortReorder &) = delete; + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers). + * @param [in] copiedInstance Const reference of CLRadixSortReorder to be copied + * @return Reference of this instance + */ + CLRadixSortReorder &operator=(const CLRadixSortReorder &) = delete; + /** + * @brief Allow instances of this class to be moved + * @param [in] movedInstance Rvalue reference of CLRadixSortReorder to be moved + */ + CLRadixSortReorder(CLRadixSortReorder &&) = default; + /** + * @brief Allow instances of this class to be moved + * @param [in] movedInstance Rvalue reference of CLRadixSortReorder to be moved + * @return Reference of this instance + */ + CLRadixSortReorder &operator=(CLRadixSortReorder &&) = default; + + /** + * @brief Initialise kernel with params + * @param[out] hist_buf Buffer of histogram + * @param[in] bits Number of bits to be used for radix sort + * @param[in] n Integer number size to sort + * return N/A + */ + void configure(cl::Buffer *hist_buf, int bits, int n); + + /** + * @brief Set pass + * @param[in] pass Passes made of in radix sort algorithm + * @param[in] in_key_buf Buffer of input key + * @param[out] out_key_buf Buffer of output key + * @param[in] in_ind_buf Buffer of input index + * @param[out] out_ind_buf Buffer of output index + * return N/A + */ + void setPass(int pass, cl::Buffer *in_key_buf, cl::Buffer *out_key_buf, cl::Buffer *in_ind_buf, + cl::Buffer *out_ind_buf) + { + _pass = pass; + _in_key_buf = in_key_buf; + _out_key_buf = out_key_buf; + _in_ind_buf = in_ind_buf; + _out_ind_buf = out_ind_buf; + } + /* + * @brief Run CLRadixSortReorder op + * @param[in] window Window to be used for in_slice + * @param[in] queue cl::CommandQueue + * @return N/A + */ + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + int _pass; + cl::Buffer *_in_key_buf; + cl::Buffer *_out_key_buf; + cl::Buffer *_in_ind_buf; + cl::Buffer *_out_ind_buf; +}; + +/** + * @brief Class to define CLTopKV2FindFirstNegative + */ +class CLTopKV2FindFirstNegative : public ICLKernel +{ +public: + /** + * @brief Constructor + */ + CLTopKV2FindFirstNegative(); + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers). + * @param [in] copiedInstance Const reference of CLTopKV2FindFirstNegative to be copied + */ + CLTopKV2FindFirstNegative(const CLTopKV2FindFirstNegative &) = delete; + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers). + * @param [in] copiedInstance Const reference of CLTopKV2FindFirstNegative to be copied + * @return Reference of this instance + */ + CLTopKV2FindFirstNegative &operator=(const CLTopKV2FindFirstNegative &) = delete; + /** + * @brief Allow instances of this class to be moved + * @param [in] movedInstance Rvalue reference of CLTopKV2FindFirstNegative to be moved + */ + CLTopKV2FindFirstNegative(CLTopKV2FindFirstNegative &&) = default; + /** + * @brief Allow instances of this class to be moved + * @param [in] movedInstance Rvalue reference of CLTopKV2FindFirstNegative to be moved + * @return Reference of this instance + */ + CLTopKV2FindFirstNegative &operator=(CLTopKV2FindFirstNegative &&) = default; + + /** + * @brief Initialise kernel with params + * @param[out] first_negative_idx_buf Buffer of the first negative index + * @param[in] n Number times to find + * return N/A + */ + void configure(cl::Buffer *first_negative_idx_buf, int n); + + /** + * @brief Set output buffer + * @param[out] out_key_buf Buffer of output key + * return N/A + */ + void setOutputBuffer(cl::Buffer *out_key_buf) { _out_key_buf = out_key_buf; } + + /* + * @brief Run CLTopKV2FindFirstNegative op + * @param[in] window Window to be used for in_slice + * @param[in] queue cl::CommandQueue + * @return N/A + */ + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + cl::Buffer *_out_key_buf; +}; + +/** + * @brief Class to define CLTopKV2ReorderNegatives + */ +class CLTopKV2ReorderNegatives : public ICLKernel +{ +public: + /** + * @brief Constructor + */ + CLTopKV2ReorderNegatives(); + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers). + * @param [in] copiedInstance Const reference of CLTopKV2ReorderNegatives to be copied + */ + CLTopKV2ReorderNegatives(const CLTopKV2ReorderNegatives &) = delete; + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers). + * @param [in] copiedInstance Const reference of CLTopKV2ReorderNegatives to be copied + * @return Reference of this instance + */ + CLTopKV2ReorderNegatives &operator=(const CLTopKV2ReorderNegatives &) = delete; + /** + * @brief Allow instances of this class to be moved + * @param [in] movedInstance Rvalue reference of CLTopKV2ReorderNegatives to be moved + */ + CLTopKV2ReorderNegatives(CLTopKV2ReorderNegatives &&) = default; + /** + * @brief Allow instances of this class to be moved + * @param [in] movedInstance Rvalue reference of CLTopKV2ReorderNegatives to be moved + * @return Reference of this instance + */ + CLTopKV2ReorderNegatives &operator=(CLTopKV2ReorderNegatives &&) = default; + + /** + * @brief Initialise kernel with params + * @param[out] first_negative_idx_buf Buffer of the first negative index + * @param[in] n Number times to find + * return N/A + */ + void configure(cl::Buffer *first_negative_idx_buf, int n); + + /** + * @brief Set buffers + * @param[in] in_key_buf Buffer of input key + * @param[out] out_key_buf Buffer of output key + * @param[in] in_ind_buf Buffer of input index + * @param[out] out_ind_buf Buffer of output index + * return N/A + */ + void setBuffers(cl::Buffer *in_key_buf, cl::Buffer *out_key_buf, cl::Buffer *in_ind_buf, + cl::Buffer *out_ind_buf) + { + _in_key_buf = in_key_buf; + _out_key_buf = out_key_buf; + _in_ind_buf = in_ind_buf; + _out_ind_buf = out_ind_buf; + } + + /* + * @brief Run CLTopKV2ReorderNegatives op + * @param[in] window Window to be used for in_slice + * @param[in] queue cl::CommandQueue + * @return N/A + */ + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + cl::Buffer *_in_key_buf; + cl::Buffer *_out_key_buf; + cl::Buffer *_in_ind_buf; + cl::Buffer *_out_ind_buf; +}; + +/** + * @brief Class to define CLTopKV2Store + */ +class CLTopKV2Store : public ICLKernel +{ +public: + /** + * @brief Constructor + */ + CLTopKV2Store(); + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers). + * @param [in] copiedInstance Const reference of CLTopKV2Store to be copied + */ + CLTopKV2Store(const CLTopKV2Store &) = delete; + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers). + * @param [in] copiedInstance Const reference of CLTopKV2Store to be copied + * @return Reference of this instance + */ + CLTopKV2Store &operator=(const CLTopKV2Store &) = delete; + /** + * @brief Allow instances of this class to be moved + * @param [in] movedInstance Rvalue reference of CLTopKV2Store to be moved + */ + CLTopKV2Store(CLTopKV2Store &&) = default; + /** + * @brief Allow instances of this class to be moved + * @param [in] movedInstance Rvalue reference of CLTopKV2Store to be moved + * @return Reference of this instance + */ + CLTopKV2Store &operator=(CLTopKV2Store &&) = default; + + /** + * @brief Initialise kernel with params + * @param[out] values Values tensor to store + * @param[out] indices Indices tensor to be used for store + * @param[in] k K of the top k predictions + * @param[in] n Number times to store + * return N/A + */ + void configure(ICLTensor *values, ICLTensor *indices, int k, int n); + + /** + * @brief Set buffers + * @param[out] out_key_buf Buffer of output key + * @param[out] out_ind_buf Buffer of output index + * return N/A + */ + void setOutputBuffers(cl::Buffer *out_key_buf, cl::Buffer *out_ind_buf); + + /* + * @brief Run CLTopKV2Store op + * @param[in] window Window to be used for in_slice + * @param[in] queue cl::CommandQueue + * @return N/A + */ + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + ICLTensor *_values; + ICLTensor *_indices; + cl::Buffer *_out_key_buf; + cl::Buffer *_out_ind_buf; +}; + +} // namespace arm_compute +#endif // Disable GPU implementation +#endif // __ARM_COMPUTE_CLTOPKV2KERNEL_H__ diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h new file mode 100644 index 000000000..c5ef730b6 --- /dev/null +++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLEKERNEL_H__ +#define __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLEKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the Upsampling layer kernel for transpose convolution on OpenCL. + */ +class CLTransposeConvLayerUpsampleKernel : public ICLKernel +{ +public: + /** Constructor */ + CLTransposeConvLayerUpsampleKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLTransposeConvLayerUpsampleKernel(const CLTransposeConvLayerUpsampleKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLTransposeConvLayerUpsampleKernel & + operator=(const CLTransposeConvLayerUpsampleKernel &) = delete; + /** Default Move Constructor. */ + CLTransposeConvLayerUpsampleKernel(CLTransposeConvLayerUpsampleKernel &&) = default; + /** Default move assignment operator */ + CLTransposeConvLayerUpsampleKernel &operator=(CLTransposeConvLayerUpsampleKernel &&) = default; + /** Default destructor */ + ~CLTransposeConvLayerUpsampleKernel() = default; + + /** Initialise the kernel's input and output. + * + * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32. + * @param[out] output Destination tensor. Data types supported: same as @p input. All but + * the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only + * performed within the XY-plane. + * @param[in] inner_border Top and right inner border sizes. These rows and columns will be + * filled with zero. + * @param[in] info Contains padding and stride information described in @ref + * PadStrideInfo. + */ + void configure(const ICLTensor *input, ICLTensor *output, const BorderSize &inner_border, + const PadStrideInfo &info); + /** Static function to check if given info will lead to a valid configuration of @ref + * CLTransposeConvLayerUpsample + * + * @param[in] input Source tensor info. Data types supported: QASYMM8/F16/F32. + * @param[in] output Destination tensor info. Data types supported: same as @p input. All + * but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is + * only performed within the XY-plane. + * @param[in] inner_border Top and right inner border sizes. These rows and columns will be filled + * with zero. + * @param[in] info Contains padding and stride information described in @ref + * PadStrideInfo. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, + const BorderSize &inner_border, const PadStrideInfo &info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + BorderSize _inner_border; + PadStrideInfo _info; +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLEKERNEL_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/CPP/kernels/CPPUpsampleKernelEx.h b/compute/ARMComputeEx/arm_compute/core/CPP/kernels/CPPUpsampleKernelEx.h new file mode 100644 index 000000000..d093c22cb --- /dev/null +++ b/compute/ARMComputeEx/arm_compute/core/CPP/kernels/CPPUpsampleKernelEx.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2017-2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CPPUPSAMPLEKERNEL_EX_H__ +#define __ARM_COMPUTE_CPPUPSAMPLEKERNEL_EX_H__ + +#include "arm_compute/core/CPP/ICPPKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** CPP kernel to perform tensor upsample. + * + */ +class CPPUpsampleKernelEx : public ICPPKernel +{ +public: + const char *name() const override { return "CPPUpsampleKernelEx"; } + /** Default constructor */ + CPPUpsampleKernelEx(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CPPUpsampleKernelEx(const CPPUpsampleKernelEx &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CPPUpsampleKernelEx &operator=(const CPPUpsampleKernelEx &) = delete; + /** Allow instances of this class to be moved */ + CPPUpsampleKernelEx(CPPUpsampleKernelEx &&) = default; + /** Allow instances of this class to be moved */ + CPPUpsampleKernelEx &operator=(CPPUpsampleKernelEx &&) = default; + /** Default destructor */ + ~CPPUpsampleKernelEx() = default; + + /** Set the input and output of the kernel. + * + * @param[in] input The input tensor to upsample. Data types supported: F32/F16/QASYMM8 + * @param[out] output The output tensor. Data types supported: Same as @p input + * @param[in] info Padding info. + */ + void configure(const ITensor *input, ITensor *output, const PadStrideInfo &info); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + bool is_parallelisable() const override; + +private: + const ITensor *_input; + ITensor *_output; + PadStrideInfo _info; +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_CPPUPSAMPLEKERNEL_EX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/NEElementwiseOperationFuncs.h b/compute/ARMComputeEx/arm_compute/core/NEON/NEElementwiseOperationFuncs.h new file mode 100644 index 000000000..358e0ebc6 --- /dev/null +++ b/compute/ARMComputeEx/arm_compute/core/NEON/NEElementwiseOperationFuncs.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __ARM_COMPUTE_NEELEMENTWISEOPERATIONFUNCS_H__ +#define __ARM_COMPUTE_NEELEMENTWISEOPERATIONFUNCS_H__ + +#include <arm_neon.h> + +namespace arm_compute +{ +class ITensor; +class Window; +class QuantizationInfo; +} // namespace arm_compute + +namespace arm_compute +{ + +float32x4x4_t load_quantized(const uint8_t *input1_ptr, const int32x4_t &offset, + const float32x4_t &scale); + +void store_quantized(uint8_t *output_ptr, const float32x4x4_t &rf, const float32x4_t &offset, + const float32x4_t &invscale); + +float32x4x4_t dup_quantized(uint8_t broadcast_value, int offset, float scale); + +void elementwise_op_quantized( + const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window, + uint8_t (*scalar_func)(const float &, const float &, QuantizationInfo), + int (*broadcast_func)(int, int, int, const uint8_t *, float32x4x4_t, uint8_t *, int32x4_t, + float32x4_t, float32x4_t, float32x4_t, const bool), + int (*neon_func)(int, int, int, const uint8_t *, const uint8_t *, uint8_t *, int32x4_t, + int32x4_t, float32x4_t, float32x4_t, float32x4_t, float32x4_t)); + +void elementwise_op(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window, + float (*scalar_func)(const float &, const float &), + int (*broadcast_func)(int, int, int, const float *, const float &, float *, + const bool), + int (*neon_func)(int, int, int, const float *, const float *, float *)); + +void elementwise_op(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window, + uint8_t (*scalar_func)(const uint8_t &, const uint8_t &), + int (*broadcast_func)(int, int, int, const uint8_t *, const uint8_t &, + uint8_t *, const bool), + int (*neon_func)(int, int, int, const uint8_t *, const uint8_t *, uint8_t *)); +} // namespace arm_compute +#endif // __ARM_COMPUTE_NEELEMENTWISEOPERATIONFUNCS_H__ diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEBinaryLogicalOperationKernel.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEBinaryLogicalOperationKernel.h new file mode 100644 index 000000000..61992bd50 --- /dev/null +++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEBinaryLogicalOperationKernel.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2018-2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEBINARYLOGICALOPERATIONKERNEL_H__ +#define __ARM_COMPUTE_NEBINARYLOGICALOPERATIONKERNEL_H__ + +#include "arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h" +#include "arm_compute/core/TypesEx.h" + +namespace arm_compute +{ + +class NEBinaryLogicalOperationKernel : public NEElementwiseOperationKernel +{ +public: + /** Default destructor */ + ~NEBinaryLogicalOperationKernel() = default; + + /** Static function to check if given info will lead to a valid configuration of @ref + * NEBinaryLogicalOperationKernel + * + * @param[in] op Binary logical operation to be executed. + * @param[in] input1 First tensor input. Data types supported: QASYMM8/U8. + * @param[in] input2 Second tensor input. Data types supported: Same as @p input1. + * @param[in] output Output tensor. Data types supported: Same as @p input1. + */ + void configure(BinaryLogicalOperation op, const ITensor *input1, const ITensor *input2, + ITensor *output); + + /** Static function to check if given info will lead to a valid configuration of @ref + * NEBinaryLogicalOperationKernel + * + * @param[in] op Binary logical operation to be executed. + * @param[in] input1 First tensor input info. Data types supported: QASYMM8/U8. + * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. + * @param[in] output Output tensor info. Data types supported: Same as @p input1. + * + * @return a Status + */ + static Status validate(BinaryLogicalOperation op, const ITensorInfo *input1, + const ITensorInfo *input2, const ITensorInfo *output); + +protected: + // Inherited methods overridden: + static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, + const ITensorInfo &output); +}; +} // namespace arm_compute +#endif /* __ARM_COMPUTE_NEBINARYLOGICALOPERATIONKERNEL_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NECastKernel.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NECastKernel.h new file mode 100644 index 000000000..fd2a2ee3b --- /dev/null +++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NECastKernel.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2017-2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NECASTKERNEL_H__ +#define __ARM_COMPUTE_NECASTKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/TypesEx.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the cast layer kernel. */ +class NECastKernel : public INEKernel +{ +public: + const char *name() const override { return "NECastKernel"; } + /** Default constructor */ + NECastKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NECastKernel(const NECastKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NECastKernel &operator=(const NECastKernel &) = delete; + /** Default Move Constructor. */ + NECastKernel(NECastKernel &&) = default; + /** Default move assignment operator */ + NECastKernel &operator=(NECastKernel &&) = default; + /** Default destructor */ + ~NECastKernel() = default; + /** Set input, output tensors. + * + * @param[in] input Source tensor. Data type supported: U8/S8/QASYMM8/U32/S32/F32. + * @param[out] output Destination tensor with the same dimensions of input. Data type supported: + * U8/S8/QASYMM8/U32/S32/F32. + * @param[in] input_subtype Sub data type of input. + */ + void configure(const ITensor *input, ITensor *output, SubDataType input_subtype); + /** Static function to check if given info will lead to a valid configuration of @ref NECastKernel + * + * @param[in] input Input tensor info. Data types supported: U8/S8/QASYMM8/U32/S32/F32. + * @param[in] output Output tensor info. Data types supported: U8/S8/QASYMM8/U32/S32/F32. + * @param[in] input_subtype Sub data type of input. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, + SubDataType input_subtype); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input; + ITensor *_output; + SubDataType _input_subtype; +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_NECASTKERNEL_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernelEx.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernelEx.h new file mode 100644 index 000000000..5b6ef6bfb --- /dev/null +++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernelEx.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEDEPTHTOSPACELAYERKERNELEX_H__ +#define __ARM_COMPUTE_NEDEPTHTOSPACELAYERKERNELEX_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the depth to space kernel */ +class NEDepthToSpaceLayerKernelEx : public INEKernel +{ +public: + const char *name() const override { return "NEDepthToSpaceLayerKernelEx"; } + /** Default constructor */ + NEDepthToSpaceLayerKernelEx(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDepthToSpaceLayerKernelEx(const NEDepthToSpaceLayerKernelEx &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDepthToSpaceLayerKernelEx &operator=(const NEDepthToSpaceLayerKernelEx &) = delete; + /** Allow instances of this class to be moved */ + NEDepthToSpaceLayerKernelEx(NEDepthToSpaceLayerKernelEx &&) = default; + /** Allow instances of this class to be moved */ + NEDepthToSpaceLayerKernelEx &operator=(NEDepthToSpaceLayerKernelEx &&) = default; + /** Default destructor */ + ~NEDepthToSpaceLayerKernelEx() = default; + /** Initialise the kernel's inputs and output. + * + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: + * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[out] output Tensor output. Data types supported: same as @p input + * @param[in] block_shape Block shape x value. + */ + void configure(const ITensor *input, ITensor *output, int32_t block_shape); + /** Static function to check if given info will lead to a valid configuration of @ref + * NEDepthToSpaceLayerKernelEx. + * + * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: + * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] output Tensor output info. Data types supported: same as @p input + * @param[in] block_shape Block shape value. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input; /**< Source tensor */ + ITensor *_output; /**< Destination tensor */ + int32_t _block_shape; /**< Block shape */ +}; +} // namespace arm_compute +#endif /* __ARM_COMPUTE_NEDEPTHTOSPACELAYERKERNELEX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEElementwiseUnaryKernelEx.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEElementwiseUnaryKernelEx.h new file mode 100644 index 000000000..d6fad1155 --- /dev/null +++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEElementwiseUnaryKernelEx.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2018-2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEELEMENTWISEUNARYKERNELEX_H__ +#define __ARM_COMPUTE_NEELEMENTWISEUNARYKERNELEX_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/TypesEx.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for an element-wise unary operation kernel + * + * Element-wise operation is computed by: + * @f[ output(x) = OP(input(x))@f] + * + */ +class NEElementwiseUnaryKernelEx : public INEKernel +{ +public: + const char *name() const override { return "NEElementwiseUnaryKernelEx"; } + /** Default constructor */ + NEElementwiseUnaryKernelEx(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEElementwiseUnaryKernelEx(const NEElementwiseUnaryKernelEx &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEElementwiseUnaryKernelEx &operator=(const NEElementwiseUnaryKernelEx &) = delete; + /** Allow instances of this class to be moved */ + NEElementwiseUnaryKernelEx(NEElementwiseUnaryKernelEx &&) = default; + /** Allow instances of this class to be moved */ + NEElementwiseUnaryKernelEx &operator=(NEElementwiseUnaryKernelEx &&) = default; + /** Default destructor */ + ~NEElementwiseUnaryKernelEx() = default; + + /** Static function to check if given info will lead to a valid configuration of @ref + * NEElementwiseUnaryKernelEx + * + * @param[in] op Arithmetic operation to be executed. + * @param[in] input First tensor input. Data types supported: F16/F32/S32. + * @param[in] output Output tensor. Data types supported: Same as @p input. + */ + void configure(ElementWiseUnaryEx op, const ITensor *input, ITensor *output); + + /** Static function to check if given info will lead to a valid configuration of @ref + * NEElementwiseUnaryKernelEx + * + * @param[in] op Arithmetic operation to be executed. + * @param[in] input First tensor input info. Data types supported: F16/F32/S32. + * @param[in] output Output tensor info. Data types supported: Same as @p input. + * + * @return a Status + */ + static Status validate(ElementWiseUnaryEx op, const ITensorInfo *input, + const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + + /** Common signature for all the specialised arithmetic functions + * + * @param[in] input An input tensor. Data types supported: F16/F32/S32. + * @param[out] output The output tensor. Data types supported: Same as @p input. + * @param[in] window Region on which to execute the kernel. + */ + using ElementwiseUnaryFunction = void(const ITensor *input, ITensor *output, + const Window &window); + +protected: + // Inherited methods overridden: + static Status validate_arguments(const ITensorInfo &input, const ITensorInfo &output); + + /** Function to use for the particular tensor types passed to configure() */ + std::function<void(const ITensor *input, ITensor *output, const Window &window)> _function; + + const ITensor *_input; + ITensor *_output; +}; +} // namespace arm_compute +#endif /* __ARM_COMPUTE_NEELEMENTWISEUNARYKERNELEX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEEmbeddingLookupKernel.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEEmbeddingLookupKernel.h new file mode 100644 index 000000000..1490e75f2 --- /dev/null +++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEEmbeddingLookupKernel.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEEMBEDDINGLOOKUPKERNEL_H__ +#define __ARM_COMPUTE_NEEMBEDDINGLOOKUPKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform EmbeddingLookup operation */ +class NEEmbeddingLookupKernel : public INEKernel +{ +public: + const char *name() const override { return "NEEmbeddingLookupKernel"; } + /** Default constructor */ + NEEmbeddingLookupKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + NEEmbeddingLookupKernel(const NEEmbeddingLookupKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + NEEmbeddingLookupKernel &operator=(const NEEmbeddingLookupKernel &) = delete; + /** Allow instances of this class to be moved */ + NEEmbeddingLookupKernel(NEEmbeddingLookupKernel &&) = default; + /** Allow instances of this class to be moved */ + NEEmbeddingLookupKernel &operator=(NEEmbeddingLookupKernel &&) = default; + /** Initialize the kernel's input, output. + * + * @param[in] input Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[out] output Destination tensor. Data types supported: same as @p input. + * @param[in] lookups Lookups are 1D tensor that values are indices into the first dimension of + * input. + */ + void configure(const ITensor *input, ITensor *output, const ITensor *lookups); + /** Static function to check if given info will lead to a valid configuration of @ref + * NEEmbeddingLookupKernel + * + * @param[in] input Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] output Destination tensor. Data types supported: same as @p input. + * @param[in] lookups Lookups info. Data types supported: S32. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, + const ITensorInfo *lookups); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input; + const ITensor *_lookups; + ITensor *_output; +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_NEEMBEDDINGLOOKUPKERNEL_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEGatherKernelEx.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEGatherKernelEx.h new file mode 100644 index 000000000..3fa9c6e9a --- /dev/null +++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEGatherKernelEx.h @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __ARM_COMPUTE_NEGATHERKERNELEX_H__ +#define __ARM_COMPUTE_NEGATHERKERNELEX_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ITensor; + +/** Kernel to perform other operation on NEON */ +class NEGatherKernelEx : public INEKernel +{ +public: + /** Default constructor. */ + NEGatherKernelEx(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + NEGatherKernelEx(const NEGatherKernelEx &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + NEGatherKernelEx &operator=(const NEGatherKernelEx &) = delete; + /** Allow instances of this class to be moved. */ + NEGatherKernelEx(NEGatherKernelEx &&) = default; + /** Allow instances of this class to be moved. */ + NEGatherKernelEx &operator=(NEGatherKernelEx &&) = default; + /** Default detructor */ + ~NEGatherKernelEx() = default; + + /** Name of the kernel + * + * @return Kernel name + */ + const char *name() const override { return "NEGatherKernelEx"; } + /** Initialise the kernel's inputs and outputs + * + * @param[in] input Source tensor. Supported tensor rank: up to 4. Data type supported: + * U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 + * @param[in] indices Indices tensor. Supported tensor rank: up to 3. Must be one of the + * following type: U32/S32. Each value Must be in range [0, input.shape[@p axis]) + * @param[out] output Destination tensor. Data type supported: Same as @p input + * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values + * wrap around. Defaults to 0 + */ + void configure(const ITensor *input, const ITensor *indices, ITensor *output, int axis = 0); + /** Static function to check if given info will lead to a valid configuration of @ref + * NEGatherKernelEx + * + * @param[in] input Source tensor info. Supported tensor rank: up to 4. Data type supported: + * U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 + * @param[in] indices Indices tensor info. Supported tensor rank: up to 3. Must be one of the + * following type: U32/S32. Each value Must be in range [0, input.shape[@p axis]) + * @param[in] output Destination tensor info. Data type supported: Same as @p input + * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values + * wrap around. Defaults to 0 + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *indices, + const ITensorInfo *output, int axis); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Implementation of the gather operation for 0 axis. + * + * For gather on the 0 axis an element by element copy is performed. + * + * @param[in] window Region on which to execute the kernel. (Must be a region of the window + * returned by window()) + * @param[in] info Info about executing thread and CPU. + */ + template <typename U> void gather_0_axis(const Window &window, const ThreadInfo &info); + + /** Implementation of the gather operation. + * + * For 1<=axis a row-wise copy is taking place. + * + * @param[in] window Region on which to execute the kernel. (Must be a region of the window + * returned by window()) + * @param[in] info Info about executing thread and CPU. + */ + template <typename U> void gather_n_axis(const Window &window, const ThreadInfo &info); + + using kernel_ptr = void (NEGatherKernelEx::*)(const Window &window, const ThreadInfo &info); + + const ITensor *_input; + const ITensor *_indices; + int _axis; + ITensor *_output; + kernel_ptr _func; +}; +} // namespace arm_compute +#endif /* __ARM_COMPUTE_NEGATHERKERNELEX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEHashtableLookupKernel.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEHashtableLookupKernel.h new file mode 100644 index 000000000..d8976e7d0 --- /dev/null +++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEHashtableLookupKernel.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEHASHTABLELOOKUPKERNEL_H__ +#define __ARM_COMPUTE_NEHASHTABLELOOKUPKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform HashtableLookup operation */ +class NEHashtableLookupKernel : public INEKernel +{ +public: + const char *name() const override { return "NEHashtableLookupKernel"; } + /** Default constructor */ + NEHashtableLookupKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + NEHashtableLookupKernel(const NEHashtableLookupKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + NEHashtableLookupKernel &operator=(const NEHashtableLookupKernel &) = delete; + /** Allow instances of this class to be moved */ + NEHashtableLookupKernel(NEHashtableLookupKernel &&) = default; + /** Allow instances of this class to be moved */ + NEHashtableLookupKernel &operator=(NEHashtableLookupKernel &&) = default; + /** Initialize the kernel's inputs, outputs. + * + * @param[in] lookups Lookups 1D tensor that values are indices into the first dimension of + * input. Data types supported: S32 + * @param[in] keys Keys 1D tensor. keys and input pair represent a map. + * Data types supported: S32 + * @param[in] input Source tensor. + * Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 + * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p + * input. + * @param[out] hits Hits 1D tensor. A boolean tensor that indicates whether the lookup hits + * (True) or not (False). Data types supported: U8/QASYMM8 + * input. + */ + void configure(const ITensor *lookups, const ITensor *keys, const ITensor *input, ITensor *output, + ITensor *hits); + /** Static function to check if given info will lead to a valid configuration of @ref + * NEHashtableLookupKernel + * + * @param[in] lookups The lookups tensor info. Data types supported: S32. + * @param[in] keys The keys tensor info. keys and input pair represent a map. + * Data types supported: S32 + * @param[in] input The input tensor info. + * Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 + * @param[out] output The output tensor info. Data types and data layouts supported: Same as @p + * input. + * @param[out] hits The hits tensor info. A boolean tensor that indicates whether the lookup + * hits (True) or not (False). Data types supported: U8/QASYMM8 + * + * @return a status + */ + static Status validate(const ITensorInfo *lookups, const ITensorInfo *keys, + const ITensorInfo *input, const ITensorInfo *output, + const ITensorInfo *hits); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_lookups; /** Lookups tensor */ + const ITensor *_keys; /** Keys tensor */ + const ITensor *_input; /** Source tensor */ + ITensor *_output; /** Destination tensor */ + ITensor *_hits; /** Hits tensor */ +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_NEHASHTABLELOOKUPKERNEL_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.h new file mode 100644 index 000000000..76e2587af --- /dev/null +++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.h @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYERKERNELEX_H__ +#define __ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYERKERNELEX_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for performing an instance normalization */ +class NEInstanceNormalizationLayerKernelEx : public INEKernel +{ +public: + const char *name() const override { return "NEInstanceNormalizationLayerKernelEx"; } + /** Default constructor */ + NEInstanceNormalizationLayerKernelEx(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEInstanceNormalizationLayerKernelEx(const NEInstanceNormalizationLayerKernelEx &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEInstanceNormalizationLayerKernelEx & + operator=(const NEInstanceNormalizationLayerKernelEx &) = delete; + /** Allow instances of this class to be moved */ + NEInstanceNormalizationLayerKernelEx(NEInstanceNormalizationLayerKernelEx &&) = default; + /** Allow instances of this class to be moved */ + NEInstanceNormalizationLayerKernelEx & + operator=(NEInstanceNormalizationLayerKernelEx &&) = default; + /** Default destructor */ + ~NEInstanceNormalizationLayerKernelEx() = default; + /** Set the input and output tensors. + * + * @param[in, out] input Source tensor. Data types supported: F16/F32. Data layout supported: + * NCHW + * In case of @p output tensor = nullptr this tensor will store the result + * of the normalization. + * @param[out] output Destination tensor. Data types and data layouts supported: same as @p + * input. + * @param[in] gamma (Optional) The scale scalar value applied to the normalized tensor. + * Defaults to 1.0 + * @param[in] beta (Optional) The offset scalar value applied to the normalized tensor. + * Defaults to 0.0 + * @param[in] epsilon (Optional) Lower bound value for the normalization. Defaults to 1e-12 + */ + void configure(ITensor *input, ITensor *output, ITensor *gamma = nullptr, ITensor *beta = nullptr, + float epsilon = 1e-12f); + + /** Static function to check if given info will lead to a valid configuration of @ref + * NEInstanceNormalizationLayer. + * + * @param[in] input Source tensor info. Data types supported: F16/F32. Data layout supported: + * NCHW + * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p + * input. + * @param[in] gamma (Optional) The scale scalar value applied to the normalized tensor. Defaults + * to 1.0 + * @param[in] beta (Optional) The offset scalar value applied to the normalized tensor. + * Defaults to 0.0 + * @param[in] epsilon (Optional) Lower bound value for the normalization. Defaults to 1e-12 + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, + const ITensorInfo *gamma = nullptr, const ITensorInfo *beta = nullptr, + float epsilon = 1e-12f); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + /** Common signature for all the specialized instance normalization functions + * + * @param[in, out] input An input tensor. In case of @p output tensor = nullptr this tensor will + * store the result of the normalization. + * @param[out] output The output tensor. + * @param[in] gamma The scale scalar value applied to the normalized tensor. Defaults to + * 1.0 + * @param[in] beta The offset scalar value applied to the normalized tensor. Defaults to + * 0.0 + * @param[in] epsilon Lower bound value for the normalization. Defaults to 1e-12 + */ + using NormalizationFunction = void(ITensor *input, ITensor *output, ITensor *gamma, ITensor *beta, + float epsilon, const Window &window); + + NormalizationFunction *_func; + ITensor *_input; + ITensor *_output; + ITensor *_gamma; + ITensor *_beta; + float _epsilon; +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYERKERNELEX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEMuliplyScaleFactorKernel.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEMuliplyScaleFactorKernel.h new file mode 100644 index 000000000..723b14523 --- /dev/null +++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEMuliplyScaleFactorKernel.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2017-2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEMULTIPLYSCALEFACTORKERNEL_H__ +#define __ARM_COMPUTE_NEMULTIPLYSCALEFACTORKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface to multiply scale factor kernel. */ +class NEMultiplyScaleFactorKernel : public INEKernel +{ +public: + const char *name() const override { return "NEMultiplyScaleFactorKernel"; } + /** Default constructor */ + NEMultiplyScaleFactorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMultiplyScaleFactorKernel(const NEMultiplyScaleFactorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMultiplyScaleFactorKernel &operator=(const NEMultiplyScaleFactorKernel &) = delete; + /** Default Move Constructor. */ + NEMultiplyScaleFactorKernel(NEMultiplyScaleFactorKernel &&) = default; + /** Default move assignment operator */ + NEMultiplyScaleFactorKernel &operator=(NEMultiplyScaleFactorKernel &&) = default; + /** Default destructor */ + ~NEMultiplyScaleFactorKernel() = default; + /** Set input, output tensors. + * + * @param[in/out] input Source tensor. Data type supported: S32. + * @param[in] scale_factor Scale tensor. Data type supported: F16/F32. + * @param[out] output Destination tensor. Data type supported: Same as @p scale_factor. + */ + void configure(const ITensor *input, const ITensor *scale_factor, ITensor *output, + float multiplier = 1.f); + /** Static function to check if given info will lead to a valid configuration of @ref + * NEMultiplyScaleFactorKernel + * + * @param[in] input Input tensor info. Data types supported: S32. + * @param[in] scale_factor Scale tensor. Data type supported: F16/F32. + * @param[in] output Output tensor info. Data types supported: Same as @p scale_factor. + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *scale_factor, + const ITensorInfo *output, float multiplier = 1.f); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + template <typename T> void multiply(const Window &window); + +private: + const ITensor *_input; + const ITensor *_scale_factor; + ITensor *_output; + float _multiplier; +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_NEMULTIPLYSCALEFACTORKERNEL_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEPReLUKernel.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEPReLUKernel.h new file mode 100644 index 000000000..79bb78661 --- /dev/null +++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEPReLUKernel.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEPRELUKERNEL_H__ +#define __ARM_COMPUTE_NEPRELUKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to perform Parametric Rectified Linear Unit + * + * Result is computed by: + * @f[ output(x) = alpha * x for x < 0, output(x) = x for x >= 0 @f] + */ +class NEPReLUKernel : public INEKernel +{ +public: + const char *name() const override { return "NEPReLUKernel"; } + /** Default constructor */ + NEPReLUKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPReLUKernel(const NEPReLUKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPReLUKernel &operator=(const NEPReLUKernel &) = delete; + /** Allow instances of this class to be moved */ + NEPReLUKernel(NEPReLUKernel &&) = default; + /** Allow instances of this class to be moved */ + NEPReLUKernel &operator=(NEPReLUKernel &&) = default; + /** Initialise the kernel's inputs and output + * + * @param[in] input Input tensor. Data type supported: QASYMM8/F32 + * @param[in] alpha Alpha tensor. Data types supported: Same as @p input + * @param[out] output Output tensor. Data types supported: Same as @p input + */ + void configure(const ITensor *input, const ITensor *alpha, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + + /** Static function to check if given info will lead to a valid configuration of @ref + * NEPReLUKernel.h + * + * @param[in] input Input tensor input info. Data types supported: QASYMM8/F32. + * @param[in] alpha Alpha tensor input info. Data types supported: Same as @p input. + * @param[in] output Output tensor info. Data types supported: Same as @p input. + * + * @return a Status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *alpha, + const ITensorInfo *output); + static Status validate_arguments(const ITensorInfo &input, const ITensorInfo &alpha, + const ITensorInfo &output); + +private: + const ITensor *_input; /**< Source tensor */ + const ITensor *_alpha; /**< Alpha tensor */ + ITensor *_output; /**< Destination tensor */ +}; +} // namespace arm_compute +#endif /* __ARM_COMPUTE_NEPRELUKERNEL_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEQuantizationSymmetricKernel.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEQuantizationSymmetricKernel.h new file mode 100644 index 000000000..590b23873 --- /dev/null +++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEQuantizationSymmetricKernel.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2017-2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEQUANTIZATIONSYMMETRICKERNEL_H__ +#define __ARM_COMPUTE_NEQUANTIZATIONSYMMETRICKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the dequantization layer kernel. */ +class NEQuantizationSymmetricKernel : public INEKernel +{ +public: + const char *name() const override { return "NEQuantizationSymmetricKernel"; } + /** Default constructor */ + NEQuantizationSymmetricKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEQuantizationSymmetricKernel(const NEQuantizationSymmetricKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEQuantizationSymmetricKernel &operator=(const NEQuantizationSymmetricKernel &) = delete; + /** Default Move Constructor. */ + NEQuantizationSymmetricKernel(NEQuantizationSymmetricKernel &&) = default; + /** Default move assignment operator */ + NEQuantizationSymmetricKernel &operator=(NEQuantizationSymmetricKernel &&) = default; + /** Default destructor */ + ~NEQuantizationSymmetricKernel() = default; + /** Set input, output tensors. + * + * @param[in] input Source tensor. Data type supported: F16/F32. + * @param[out] output Destination tensor with the same dimensions of input. Data type supported: + * S8. + * @param[out] scale_factor Scale tensor of @p output. Data type supported: Same as @p input. + */ + void configure(const ITensor *input, ITensor *output, ITensor *scale_factor); + /** Static function to check if given info will lead to a valid configuration of @ref + * NEQuantizationSymmetricKernel + * + * @param[in] input Input tensor info. Data types supported: F16/F32. + * @param[in] output Output tensor info. Data types supported: S8. + * @param[out] scale_factor Scale tensor of @p output. Data type supported: Same as @p input. + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, + const ITensorInfo *scale_factor); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + template <typename T> void quantize(const Window &window); + +private: + const ITensor *_input; + ITensor *_output; + ITensor *_scale_factor; +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_NEQUANTIZATIONSYMMETRICKERNEL_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEReductionOperationKernelEx.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEReductionOperationKernelEx.h new file mode 100644 index 000000000..73991b67d --- /dev/null +++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEReductionOperationKernelEx.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2017-2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEREDUCTIONOPERATIONKERNELEX_H__ +#define __ARM_COMPUTE_NEREDUCTIONOPERATIONKERNELEX_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/TypesEx.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform a reduction operation */ +class NEReductionOperationKernelEx : public INEKernel +{ +public: + const char *name() const override { return "NEReductionOperationKernelEx"; } + /** Default constructor */ + NEReductionOperationKernelEx(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEReductionOperationKernelEx(const NEReductionOperationKernelEx &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEReductionOperationKernelEx &operator=(const NEReductionOperationKernelEx &) = delete; + /** Allow instances of this class to be moved */ + NEReductionOperationKernelEx(NEReductionOperationKernelEx &&) = default; + /** Allow instances of this class to be moved */ + NEReductionOperationKernelEx &operator=(NEReductionOperationKernelEx &&) = default; + /** Default destructor */ + ~NEReductionOperationKernelEx() = default; + + /** Set the source, destination of the kernel + * + * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. Data layouts supported: + * NCHW. + * @param[out] output Destination tensor.Data types and data layouts supported: same as @p input. + * Output will have the same number of dimensions as input. + * @param[in] axis Axis along which to reduce. Supported reduction axis : 0 + * @param[in] op Reduction operation to perform. + */ + void configure(const ITensor *input, ITensor *output, unsigned int axis, ReduceOperation op); + + /** Static function to check if given info will lead to a valid configuration of @ref + * NEReductionOperationKernelEx. + * + * @param[in] input Source tensor info. Data type supported: QASYMM8/F16/F32. Data layouts + * supported: NCHW. + * @param[in] output Destination tensor info.Data types and data layouts supported: same as @p + * input. + * Output will have the same number of dimensions as input. + * @param[in] axis Axis along which to reduce. Supported reduction axis : 0 + * @param[in] op Reduction operation to perform. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, + ReduceOperation op); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + +private: + const ITensor *_input; + ITensor *_output; + unsigned int _reduction_axis; + ReduceOperation _op; + BorderSize _border_size; +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_NEREDUCTIONOPERATIONKERNELEX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernelEx.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernelEx.h new file mode 100644 index 000000000..5d697c2b2 --- /dev/null +++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernelEx.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NESPACETODEPTHLAYERKERNELEX_H__ +#define __ARM_COMPUTE_NESPACETODEPTHLAYERKERNELEX_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the space to depth kernel */ +class NESpaceToDepthLayerKernelEx : public INEKernel +{ +public: + const char *name() const override { return "NESpaceToDepthLayerKernelEx"; } + /** Default constructor */ + NESpaceToDepthLayerKernelEx(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESpaceToDepthLayerKernelEx(const NESpaceToDepthLayerKernelEx &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESpaceToDepthLayerKernelEx &operator=(const NESpaceToDepthLayerKernelEx &) = delete; + /** Allow instances of this class to be moved */ + NESpaceToDepthLayerKernelEx(NESpaceToDepthLayerKernelEx &&) = default; + /** Allow instances of this class to be moved */ + NESpaceToDepthLayerKernelEx &operator=(NESpaceToDepthLayerKernelEx &&) = default; + /** Default destructor */ + ~NESpaceToDepthLayerKernelEx() = default; + /** Initialise the kernel's inputs and output. + * + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: + * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[out] output Tensor output. Data types supported: same as @p input + * @param[in] block_shape Block shape value + */ + void configure(const ITensor *input, ITensor *output, int32_t block_shape); + /** Static function to check if given info will lead to a valid configuration of @ref + * NESpaceToDepthLayerKernelEx + * + * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: + * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] output Tensor output info. Data types supported: same as @p input + * @param[in] block_shape Block shape value + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input; /**< Source tensor */ + ITensor *_output; /**< Destination tensor */ + int32_t _block_shape; /**< Block shape */ +}; +} // namespace arm_compute +#endif /* __ARM_COMPUTE_NESPACETODEPTHLAYERKERNELEX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/TypesEx.h b/compute/ARMComputeEx/arm_compute/core/TypesEx.h new file mode 100644 index 000000000..3b0902f08 --- /dev/null +++ b/compute/ARMComputeEx/arm_compute/core/TypesEx.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __ARM_COMPUTE_TYPESEX_H__ +#define __ARM_COMPUTE_TYPESEX_H__ + +namespace arm_compute +{ + +/** Available ArgIndex operations **/ +enum class ArgOperation +{ + MAX, + MIN, +}; + +/** Available reduce operations */ +enum class ReduceOperation +{ + MAX, /**< Max */ + MEAN, /**< Mean */ + SUM, /**< Sum */ + MIN, /**< Min */ +}; + +/** Available binary logical operations */ +enum class BinaryLogicalOperation +{ + AND, /**< AND */ + OR, /**< OR */ +}; + +enum class ComparisonOperationEx +{ + EQUAL, /**< EQUAL */ + NOT_EQUAL, /**< NOT_EQUAL */ +}; + +enum class ElementWiseUnaryEx +{ + NEG, /**< NEG */ +}; + +enum class SubDataType +{ + NONE, + BOOL, +}; + +} // namespace arm_compute +#endif /* __ARM_COMPUTE_TYPESEX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/UtilsEx.h b/compute/ARMComputeEx/arm_compute/core/UtilsEx.h new file mode 100644 index 000000000..39026e6bb --- /dev/null +++ b/compute/ARMComputeEx/arm_compute/core/UtilsEx.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __ARM_COMPUTE_UTILSEX_H__ +#define __ARM_COMPUTE_UTILSEX_H__ + +#include <utility> + +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ + +/** Returns expected width and height of the transpose convolution's output tensor. + * + * @note This function was copied in order to fix a bug computing to wrong output dimensions. + * + * @param[in] in_width Width of input tensor (Number of columns) + * @param[in] in_height Height of input tensor (Number of rows) + * @param[in] kernel_width Kernel width. + * @param[in] kernel_height Kernel height. + * @param[in] info padding and stride info. + * @param[in] invalid_right The number of zeros added to right edge of the output. + * @param[in] invalid_top The number of zeros added to bottom edge of the output. + * + * @return A pair with the new width in the first position and the new height in the second. + */ +const std::pair<unsigned int, unsigned int> +transposeconv_output_dimensions(unsigned int in_width, unsigned int in_height, + unsigned int kernel_width, unsigned int kernel_height, + const PadStrideInfo &info, unsigned int invalid_right, + unsigned int invalid_top); +} +#endif /*__ARM_COMPUTE_UTILSEX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/utils/misc/ShapeCalculatorEx.h b/compute/ARMComputeEx/arm_compute/core/utils/misc/ShapeCalculatorEx.h new file mode 100644 index 000000000..16fd40ed9 --- /dev/null +++ b/compute/ARMComputeEx/arm_compute/core/utils/misc/ShapeCalculatorEx.h @@ -0,0 +1,222 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ARM_COMPUTE_MISC_SHAPE_CALCULATOR_EX_H__ +#define __ARM_COMPUTE_MISC_SHAPE_CALCULATOR_EX_H__ + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/ITensorInfo.h" +#include "arm_compute/core/Utils.h" + +#include "arm_compute/core/utils/helpers/tensor_transform.h" + +#include <cmath> + +namespace arm_compute +{ +namespace misc +{ +namespace shape_calculator +{ + +/** Calculate the upsampled output shape used for transpose convolution + * + * @param[in] input Input tensor info + * @param[in] weights Weights tensor shape + * @param[in] info Padding and stride info + * @param[in] out_dims Output shape dimensions + * @param[in] invalid_right The number of zeros added to right edge of the output. + * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. + * @param[out] pad_left Padding on left + * @param[out] pad_right Padding on right + * @param[out] pad_top Padding on top + * @param[out] pad_bottom Padding on bottom + * + * @return the calculated shape + */ +inline TensorShape compute_transposeconv_upsampled_shape( + const ITensorInfo &input, const ITensorInfo &weights, const PadStrideInfo &info, + std::pair<unsigned int, unsigned int> &out_dims, unsigned int invalid_right, + unsigned int invalid_bottom, unsigned int &pad_left, unsigned int &pad_right, + unsigned int &pad_top, unsigned int &pad_bottom) +{ + unsigned int sx = info.stride().first; + unsigned int sy = info.stride().second; + const DataLayout data_layout = input.data_layout(); + const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + + // Find the upsampled dimensions + // transpose conv out: + // tconv_out + pad = 1 + (in - 1) * stride + invalid + // tconv_out = 1 + (in - 1) * stride + invalid - pad + // upsample out: + // upsample_out = 1 + (in - 1) * stride + unsigned int out_x = (input.dimension(idx_w) - 1) * sx + 1; + unsigned int out_y = (input.dimension(idx_h) - 1) * sy + 1; + + // Find the padding needed for the convolution with stride 1 in order to match output shape + // upsample+pad out: + // upsample_out + pad = tconv_out + kernel - 1 + // pad = tconv_out + kernel - 1 - upsample_out + unsigned int padx = out_dims.first - (out_x - weights.dimension(idx_w) + 1); + unsigned int pady = out_dims.second - (out_y - weights.dimension(idx_h) + 1); + out_x += padx; + out_y += pady; + + unsigned int padx_all_except_invallid = padx + info.pad_left() + info.pad_right() - invalid_right; + unsigned int pady_all_except_invallid = + pady + info.pad_top() + info.pad_bottom() - invalid_bottom; + pad_left = (padx_all_except_invallid + 1) / 2 - info.pad_left(); + pad_right = pady_all_except_invallid / 2 - info.pad_right() + invalid_right; + pad_top = (padx_all_except_invallid + 1) / 2 - info.pad_top(); + pad_bottom = pady_all_except_invallid / 2 - info.pad_bottom() + invalid_bottom; + + TensorShape scale_out_shape(input.tensor_shape()); + scale_out_shape.set(idx_w, out_x); + scale_out_shape.set(idx_h, out_y); + + return scale_out_shape; +} + +/** Calculate the output shape of the transpose convolution layer + * + * @param[in] out_dims Output x and y shape dimensions + * @param[in] input Input tensor info + * @param[in] weights Weights tensor shape + * + * @return the calculated shape + */ +inline TensorShape +compute_transposeconv_output_shape(const std::pair<unsigned int, unsigned int> &out_dims, + const ITensorInfo &input, const ITensorInfo &weights) +{ + const TensorShape input_shape{input.tensor_shape()}; + const TensorShape weights_shape{weights.tensor_shape()}; + + const DataLayout data_layout = input.data_layout(); + const int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + const int channel_idx = + get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); + const int batch_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES); + + TensorShape out_shape{input_shape}; + out_shape.set(width_idx, out_dims.first); + out_shape.set(height_idx, out_dims.second); + out_shape.set(channel_idx, weights_shape[batch_idx]); + return out_shape; +} + +/** Calculate the depth to space output shape of a tensor + * + * @param[in] input Input tensor info + * @param[in] block Block shape value + * + * @return the calculated shape + */ +inline TensorShape compute_depth_to_space_shape_ex(const ITensorInfo *input, int block) +{ + ARM_COMPUTE_ERROR_ON(block < 2); + + const DataLayout data_layout = input->data_layout(); + const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + const int idx_channel = + get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); + + TensorShape output_shape{input->tensor_shape()}; + output_shape.set(idx_width, input->dimension(idx_width) * block); + output_shape.set(idx_height, input->dimension(idx_height) * block); + output_shape.set(idx_channel, input->dimension(idx_channel) / (block * block)); + + return output_shape; +} + +/** Calculate the space to batch output shape of a tensor + * + * @param[in] input Input tensor info + * @param[in] block_shape Block shape value + * + * @return the calculated shape + */ +inline TensorShape compute_space_to_depth_shape_ex(const ITensorInfo *input, int32_t block_shape) +{ + ARM_COMPUTE_ERROR_ON(block_shape < 2); + TensorShape output_shape{input->tensor_shape()}; + + const DataLayout data_layout = input->data_layout(); + const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + const int idx_depth = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); + + output_shape.set(idx_width, input->tensor_shape()[idx_width] * block_shape); + output_shape.set(idx_height, input->tensor_shape()[idx_height] * block_shape); + output_shape.set(idx_depth, input->tensor_shape()[idx_depth] / (block_shape * block_shape)); + + return output_shape; +} + +/** Calculate the gather output shape of a tensor + * + * @param[in] input_shape Input tensor shape + * @param[in] indices_shape Indices tensor shape + * @param[in] actual_axis The axis to be gathered + * + * @return the calculated shape + */ +inline TensorShape compute_gather_shape_ex(const TensorShape &input_shape, + const TensorShape &indices_shape, uint32_t actual_axis) +{ + ARM_COMPUTE_ERROR_ON(indices_shape.num_dimensions() > 3); + ARM_COMPUTE_ERROR_ON(input_shape.num_dimensions() > 4); + ARM_COMPUTE_ERROR_ON(input_shape.num_dimensions() + indices_shape.num_dimensions() - 1 > 4); + ARM_COMPUTE_ERROR_ON(actual_axis >= input_shape.num_dimensions()); + + TensorShape output_shape = input_shape; + if (indices_shape.num_dimensions() == 1) + { + output_shape[actual_axis] = indices_shape[0]; + } + else if (indices_shape.num_dimensions() > 1) + { + output_shape.shift_right(indices_shape.num_dimensions() - 1); + + for (uint32_t i = 0, o = 0; o < output_shape.num_dimensions(); ++o, ++i) + { + if (o == actual_axis) + { + ++i; + for (uint32_t in = 0; in < indices_shape.num_dimensions(); ++in, ++o) + { + output_shape[o] = indices_shape[in]; + } + } + else + { + output_shape[o] = input_shape[i]; + } + } + } + return output_shape; +} + +} // namespace shape_calculator +} // namespace misc +} // namespace arm_compute + +#endif // __ARM_COMPUTE_MISC_SHAPE_CALCULATOR_EX_H__ |