diff options
author | Chunseok Lee <chunseok.lee@samsung.com> | 2020-10-29 13:12:50 +0900 |
---|---|---|
committer | Chunseok Lee <chunseok.lee@samsung.com> | 2020-10-29 13:12:50 +0900 |
commit | d6b371e095d737922187a518b8faba1ef6f3a2b1 (patch) | |
tree | 9d90c09c887b5111389dbedf924f59206411cd5a /compute/ARMComputeEx | |
parent | c55f8a6db48cda9d3a78048338b7f18c4cca62b8 (diff) | |
download | nnfw-d6b371e095d737922187a518b8faba1ef6f3a2b1.tar.gz nnfw-d6b371e095d737922187a518b8faba1ef6f3a2b1.tar.bz2 nnfw-d6b371e095d737922187a518b8faba1ef6f3a2b1.zip |
Imported Upstream version 0.4upstream/0.4
Diffstat (limited to 'compute/ARMComputeEx')
149 files changed, 0 insertions, 25060 deletions
diff --git a/compute/ARMComputeEx/CMakeLists.txt b/compute/ARMComputeEx/CMakeLists.txt deleted file mode 100644 index 58f558db2..000000000 --- a/compute/ARMComputeEx/CMakeLists.txt +++ /dev/null @@ -1,36 +0,0 @@ -nnfw_find_package(ARMCompute QUIET) - -if(NOT ARMCompute_FOUND) - message(STATUS "Check ARM Compute library extension build: need ARM Compute library") - return() -else(NOT ARMCompute_FOUND) - message(STATUS "Check ARM Compute library extension build: OK") -endif(NOT ARMCompute_FOUND) - -set(ACL_EX_BASE ${CMAKE_CURRENT_SOURCE_DIR}) - -file(GLOB_RECURSE ACL_EX_SRCS "${ACL_EX_BASE}/*.cpp") - -# generate embeded cl_kernel -execute_process ( - WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" - COMMAND bash -c "python resolve_includes.py" -) - -add_library(arm_compute_ex SHARED ${ACL_EX_SRCS}) -target_include_directories(arm_compute_ex PUBLIC ${ACL_EX_BASE}) -target_link_libraries(arm_compute_ex PRIVATE arm_compute) -target_link_libraries(arm_compute_ex PRIVATE nnfw_common) -target_link_libraries(arm_compute_ex PRIVATE nnfw_coverage) -# Defines to enable validate check in debug build -target_compile_definitions(arm_compute_ex PRIVATE EMBEDDED_KERNELS - $<$<CONFIG:Debug>:ARM_COMPUTE_DEBUG_ENABLED ARM_COMPUTE_ASSERTS_ENABLED - ARM_COMPUTE_LOGGING_ENABLED>) -# Validate check functions are not used on release build -# Some parameter are used for validate check function call, and these parameter may not used on release build -# Because clang requires to add "-Wno-unused-parameter -Wno-unused-function" after "-Wall", -# this should be after linking nnfw_common and use interface lib linking -add_library(ignore_unused_warning INTERFACE) -target_compile_options(ignore_unused_warning INTERFACE -Wno-unused-parameter -Wno-unused-function) -target_link_libraries(arm_compute_ex PRIVATE $<$<NOT:$<CONFIG:Debug>>:ignore_unused_warning>) -install(TARGETS arm_compute_ex DESTINATION lib) diff --git a/compute/ARMComputeEx/arm_compute/core/CL/CLKernelLibraryEx.h b/compute/ARMComputeEx/arm_compute/core/CL/CLKernelLibraryEx.h deleted file mode 100644 index d29886a9d..000000000 --- a/compute/ARMComputeEx/arm_compute/core/CL/CLKernelLibraryEx.h +++ /dev/null @@ -1,268 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/** - * @file CLKernelLibraryEx.h - * @ingroup COM_AI_RUNTIME - * @brief This file is a cloned version of CLKernelLibrary.h in ACL. This file defines - * an interface for CLKernelLibrary.cpp which adds more OpenCL kernels on top of ACL. - */ - -#ifndef __ARM_COMPUTE_CLKERNELLIBRARY_EX_H__ -#define __ARM_COMPUTE_CLKERNELLIBRARY_EX_H__ - -#include "arm_compute/core/CL/OpenCL.h" - -#include <map> -#include <set> -#include <string> -#include <utility> - -namespace arm_compute -{ - -/** - * @brief Class to build OpenCL kernels added from nnfw - * */ -class CLKernelLibraryEx -{ - using StringSet = std::set<std::string>; - -private: - /** - * @brief Construct a new CLKernelLibraryEx object - */ - CLKernelLibraryEx(); - -public: - /** - * @brief Prevent instances of this class from being copied. - */ - CLKernelLibraryEx(const CLKernelLibraryEx &) = delete; - - /** - * @brief Prevent instances of this class from being copied. - */ - const CLKernelLibraryEx &operator=(const CLKernelLibraryEx &) = delete; - - /** - * @brief Get the KernelLibrary singleton. - * @return The KernelLibrary instance - */ - static CLKernelLibraryEx &get(); - - /** - * @brief Initialise the kernel library. - * @param[in] kernel_path Path of the directory from which kernel sources are loaded. - * @param[in] context CL context used to create programs. - * @param[in] device CL device for which the programs are created. - * @return N/A - */ - void init(std::string kernel_path, cl::Context context, cl::Device device) - { - _kernel_path = std::move(kernel_path); - _context = std::move(context); - _device = std::move(device); - } - - /** - * @brief Set the path that the kernels reside in. - * @param[in] kernel_path Path of the directory from which kernel sources are loaded. - * @return N/A - */ - void set_kernel_path(const std::string &kernel_path) { _kernel_path = kernel_path; }; - - /** - * @brief Get the path that the kernels reside in. - * @return the path of kernel files - */ - std::string get_kernel_path() { return _kernel_path; }; - - /** - * @brief Get the source of the selected program. - * @param[in] program_name Program name. - * @return Source of the selected program. - */ - std::string get_program_source(const std::string &program_name); - - /** - * @brief Set the CL context used to create programs. - * @note Setting the context also resets the device to the - * first one available in the new context. - * @param[in] context A CL context. - * @return N/A - */ - void set_context(cl::Context context) - { - _context = std::move(context); - if (_context.get() == nullptr) - { - _device = cl::Device(); - } - else - { - const auto cl_devices = _context.getInfo<CL_CONTEXT_DEVICES>(); - - if (cl_devices.empty()) - { - _device = cl::Device(); - } - else - { - _device = cl_devices[0]; - } - } - } - - /** - * @brief Return associated CL context. - * @return A CL context. - */ - cl::Context &context() { return _context; } - - /** - * @brief Set the CL device for which the programs are created. - * @param[in] device A CL device. - * @return N/A - */ - void set_device(cl::Device device) { _device = std::move(device); } - - /** - * @brief Gets the CL device for which the programs are created. - * @return A CL device. - */ - cl::Device &get_device() { return _device; } - - /** - * @brief Return the device version - * @return The content of CL_DEVICE_VERSION - */ - std::string get_device_version(); - - /** - * @brief Create a kernel from the kernel library. - * @param[in] kernel_name Kernel name. - * @param[in] build_options_set Kernel build options as a set. - * @return The created kernel. - */ - Kernel create_kernel(const std::string &kernel_name, - const StringSet &build_options_set = {}) const; - - /** - * @brief Find the maximum number of local work items in a workgroup can be supported for the - * kernel. - * @param[in] kernel kernel object - */ - - size_t max_local_workgroup_size(const cl::Kernel &kernel) const; - /** - * @brief Return the default NDRange for the device. - * @return default NDRangeof the device - */ - cl::NDRange default_ndrange() const; - - /** - * @brief Clear the library's cache of binary programs - * @return N/A - */ - void clear_programs_cache() - { - _programs_map.clear(); - _built_programs_map.clear(); - } - - /** - * @brief Access the cache of built OpenCL programs - * @return program map data structure of which key is name of kernel and value is - * kerel source name. (*.cl) - */ - const std::map<std::string, cl::Program> &get_built_programs() const - { - return _built_programs_map; - } - - /** - * @brief Add a new built program to the cache - * @param[in] built_program_name Name of the program - * @param[in] program Built program to add to the cache - * @return N/A - */ - void add_built_program(const std::string &built_program_name, cl::Program program); - - /** - * @brief Returns true if FP16 is supported by the CL device - * @return true if the CL device supports FP16 - */ - bool fp16_supported() const; - - /** - * @brief Returns true if int64_base_atomics extension is supported by the CL device - * @return true if the CL device supports int64_base_atomics extension - */ - bool int64_base_atomics_supported() const; - -private: - /** - * @brief Load program and its dependencies. - * @param[in] program_name Name of the program to load. - */ - const Program &load_program(const std::string &program_name) const; - /** - * @brief Concatenates contents of a set into a single string. - * @param[in] s Input set to concatenate. - * @return Concatenated string. - */ - std::string stringify_set(const StringSet &s) const; - - cl::Context _context; /**< Underlying CL context. */ - cl::Device _device; /**< Underlying CL device. */ - std::string _kernel_path; /**< Path to the kernels folder. */ - mutable std::map<std::string, const Program> - _programs_map; /**< Map with all already loaded program data. */ - mutable std::map<std::string, cl::Program> - _built_programs_map; /**< Map with all already built program data. */ - static const std::map<std::string, std::string> - _kernel_program_map; /**< Map that associates kernel names with programs. */ - static const std::map<std::string, std::string> - _program_source_map; /**< Contains sources for all programs. - Used for compile-time kernel inclusion. >*/ -}; -} -#endif /* __ARM_COMPUTE_CLKERNELLIBRARY_EX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLArgMinMaxLayerKernelEx.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLArgMinMaxLayerKernelEx.h deleted file mode 100644 index a0aa0560b..000000000 --- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLArgMinMaxLayerKernelEx.h +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2019-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLARGMINMAXLAYERKERNELEX_H -#define ARM_COMPUTE_CLARGMINMAXLAYERKERNELEX_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the reduction operation kernel - * - * @note The default data type for an uninitialized output tensor is - * signed 32-bit integer (S32). It is the user's responsibility to check - * that the results do not overflow because the indices are computed - * in unsigned 32-bit (U32). - */ -class CLArgMinMaxLayerKernelEx : public ICLKernel -{ -public: - /** Default constructor */ - CLArgMinMaxLayerKernelEx(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLArgMinMaxLayerKernelEx(const CLArgMinMaxLayerKernelEx &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLArgMinMaxLayerKernelEx &operator=(const CLArgMinMaxLayerKernelEx &) = delete; - /** Allow instances of this class to be moved */ - CLArgMinMaxLayerKernelEx(CLArgMinMaxLayerKernelEx &&) = default; - /** Allow instances of this class to be moved */ - CLArgMinMaxLayerKernelEx &operator=(CLArgMinMaxLayerKernelEx &&) = default; - /** Default destructor */ - ~CLArgMinMaxLayerKernelEx() = default; - - /** Set the input and output tensors. - * - * @param[in] input Source tensor. Data types supported: S32/F16/F32. - * @param[in] prev_output Destination tensor of the previous iterations of @ref - * CLArgMinMaxLayerKernelEx. Data types supported: U32/S32 - * Has to be nullptr for the first iteration - * @param[out] output Destination tensor. Data types supported: U32/S32 - * Output will have the same number of dimensions as input. - * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3 - * @param[in] op Reduction operation to perform. Only ArgMin and ArgMax are supported. - */ - void configure(const ICLTensor *input, const ICLTensor *prev_output, ICLTensor *output, - unsigned int axis, ReductionOperation op); - - /** Static function to check if given info will lead to a valid configuration of @ref - * CLArgMinMaxLayerKernelEx. - * - * @param[in] input Source tensor info. Data types supported: S32/F16/F32. - * @param[in] prev_output Destination tensor info of the previous iterations. Data types - * supported: U32/S32 - * Has to be nullptr for the first iteration - * @param[in] output Destination tensor info. Data types supported: U32/S32 - * Output will have the same number of dimensions as input. - * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3 - * @param[in] op Reduction operation to perform. Only ArgMin and ArgMax are supported. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *prev_output, - const ITensorInfo *output, unsigned int axis, ReductionOperation op); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - const ICLTensor *_prev_output; - ICLTensor *_output; - unsigned int _reduction_axis; - ReductionOperation _op; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLARGMINMAXLAYERKERNELEX_H */ diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h deleted file mode 100644 index bb6fcb8f5..000000000 --- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_CLBINARYLOGICALOPKERNEL_H__ -#define __ARM_COMPUTE_CLBINARYLOGICALOPKERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/TypesEx.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to return truth values of two input tensors for Binary Logical Op*/ -class CLBinaryLogicalOpKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLBinaryLogicalOpKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers). */ - CLBinaryLogicalOpKernel(const CLBinaryLogicalOpKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers). */ - CLBinaryLogicalOpKernel &operator=(const CLBinaryLogicalOpKernel &) = delete; - /** Allow instances of this class to be moved */ - CLBinaryLogicalOpKernel(CLBinaryLogicalOpKernel &&) = default; - /** Allow instances of this class to be moved */ - CLBinaryLogicalOpKernel &operator=(CLBinaryLogicalOpKernel &&) = default; - /** Initialize the kernel's input, output. - * - * @param[in] input1 Source tensor1. - * @param[in] input2 Source tensor2. - * @param[out] output Output tensor. - */ - void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, - BinaryLogicalOperation op); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - - BorderSize border_size() const override; - -private: - const ICLTensor *_input1; - const ICLTensor *_input2; - ICLTensor *_output; -}; - -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLBINARYLOGICALOPKERNEL_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLCastBoolKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLCastBoolKernel.h deleted file mode 100644 index ed668fd9c..000000000 --- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLCastBoolKernel.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/** - * @file CLCastBoolKernel.h - * @ingroup COM_AI_RUNTIME - * @brief This file defines CLCastBoolKernel class - */ - -#ifndef __ARM_COMPUTE_CLCASTBOOLKERNEL_H__ -#define __ARM_COMPUTE_CLCASTBOOLKERNEL_H__ - -#include "arm_compute/core/CL/ICLSimple3DKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** - * @brief Class for the kernel converting boolean type - */ -class CLCastBoolKernel : public ICLSimple3DKernel -{ -public: - /** - * @brief Initialise the kernel's input and output. - * @param[in] input Input tensor. Data types supported: U8 - * @param[in] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. - * @return N/A - */ - void configure(const ICLTensor *input, ICLTensor *output); - - /** Static function to check if given info will lead to a valid configuration of @ref - * CLCastBoolKernel - * - * @param[in] input Source tensor info. Data types supported: U8. - * @param[in] output Destination tensor info. Data type supported: U8/S8/U16/S16/U32/S32/F16/F32. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); -}; -} // namespace arm_compute -#endif /* __ARM_COMPUTE_CLCASTBOOLKERNEL_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h deleted file mode 100644 index a614d5259..000000000 --- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/** - * @file CLEmbeddingLookupKernel.h - * @ingroup COM_AI_RUNTIME - * @brief This file defines CLEmbeddingLookupKernel class - */ - -#ifndef __ARM_COMPUTE_CLEMBEDDINGLOOKUPKERNEL_H__ -#define __ARM_COMPUTE_CLEMBEDDINGLOOKUPKERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** -* @brief Class to perform EmbeddingLookup operation with opencl kernel -*/ -class CLEmbeddingLookupKernel : public ICLKernel -{ -public: - /** - * @brief Construct a CLEmbeddingLookupKernel object - * */ - CLEmbeddingLookupKernel(); - - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers) - * */ - CLEmbeddingLookupKernel(const CLEmbeddingLookupKernel &) = delete; - - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers) - * */ - CLEmbeddingLookupKernel &operator=(const CLEmbeddingLookupKernel &) = delete; - - /** - * @brief Construct a CLEmbeddingLookupKernel object by using default move constructor - * @param[in] CLEmbeddingLookupKernel object to move - * */ - CLEmbeddingLookupKernel(CLEmbeddingLookupKernel &&) = default; - - /** - * @brief Move assignment operator - * @param[in] CLEmbeddingLookupKernel object to move - * */ - CLEmbeddingLookupKernel &operator=(CLEmbeddingLookupKernel &&) = default; - - /** - * @brief Destruct this object - * */ - ~CLEmbeddingLookupKernel() = default; - - /** - * @brief Set the input and output of the kernel - * @param[in] input Source tensor. - * Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 - * @param[out] output Destination tensor. Data type supported: Same as @p input - * @param[in] lookups Lookups are 1D tensor that values are indices into the first - * dimension of input. - * Data types supported: S32. - * @return N/A - */ - void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *lookups); - - /** - * @brief Static function to check if given info will lead to a valid configuration of @ref - * CLEmbeddingLookupKernel - * @param[in] input The input tensor info. - * Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 - * @param[in] output The output tensor info, Data types supported: same as @p input1. - * @param[in] lookups Lookups info. Data types supported: S32. - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, - const ITensorInfo *lookups); - - /** - * @brief Enqueue the OpenCL kernel to process the given window on the passed OpenCL command - * queue. - * @note The queue is *not* flushed by this method, and therefore the kernel will not have - * been executed by the time this method returns. - * @param[in] window Region on which to execute the kernel. (Must be a valid region of - * the window returned by window()). - * @param[in,out] queue Command queue on which to enqueue the kernel.@return N/A - * @return N/A - */ - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; /** Source tensor */ - ICLTensor *_output; /** Destination tensor */ - const ICLTensor *_lookups; /** Lookups tensor */ -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLEMBEDDINGLOOKUPKERNEL_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLGatherExKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLGatherExKernel.h deleted file mode 100644 index 6630c7be7..000000000 --- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLGatherExKernel.h +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/** - * @file CLGatherExKernel.h - * @ingroup COM_AI_RUNTIME - * @brief This file defines CLGatherExKernel class - */ - -#ifndef __ARM_COMPUTE_CLGATHEREXKERNEL_H__ -#define __ARM_COMPUTE_CLGATHEREXKERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** - * @brief Class to define an interface for the gather kernel. - */ -class CLGatherExKernel : public ICLKernel -{ -public: - /** - * @brief Construct CLGatherExKernel object - * */ - CLGatherExKernel(); - - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - */ - CLGatherExKernel(const CLGatherExKernel &) = delete; - - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - */ - CLGatherExKernel &operator=(const CLGatherExKernel &) = delete; - - /** - * @brief Construct CLGatherExKernel object by using default move constructor - * @param[in] CLGatherExKernel object to move - */ - CLGatherExKernel(CLGatherExKernel &&) = default; - - /** - * @brief Move assignment operator - * @param[in] CLGatherExKernel object to move - */ - CLGatherExKernel &operator=(CLGatherExKernel &&) = default; - - /** - * @brief Initialise the kernel's input, output and border mode. - * @param[in] input An input tensor. Data types supported: U8/QASYMM8/S32/F32. - * @param[in] indices Indices tensor. Data types supported: S32. - * @param[out] output The output tensor, Data types supported: same as @p input1. - * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative - * values wrap around. Defaults to 0 - * @return N/A - */ - void configure(const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis = 0); - - /** - * @brief Static function to check if given info will lead to a valid configuration of @ref - * CLGatherExKernel - * @param[in] input An input tensor. Data types supported: U8/QASYMM8/S32/F32. - * @param[in] indices Indices tensor. Data types supported: S32. - * @param[out] output The output tensor, Data types supported: same as @p input1. - * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative - * values wrap around. Defaults to 0 - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *indices, - const ITensorInfo *output, int axis = 0); - - /** - * @brief Enqueue the OpenCL kernel to process the given window on the passed OpenCL command - * queue. - * @note The queue is *not* flushed by this method, and therefore the kernel will not have - * been executed by the time this method returns. - * @param[in] window Region on which to execute the kernel. (Must be a valid region of - * the window returned by window()). - * @param[in,out] queue Command queue on which to enqueue the kernel.@return N/A - * @return N/A - */ - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - const ICLTensor *_indices; - ICLTensor *_output; - int _axis; -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLGATHEREXKERNEL_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLHashtableLookupKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLHashtableLookupKernel.h deleted file mode 100644 index 99cfa61ec..000000000 --- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLHashtableLookupKernel.h +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/** - * @file CLHashtableLookupKernel.h - * @ingroup COM_AI_RUNTIME - * @brief This file defines CLHashtableLookupKernel class - */ - -#ifndef __ARM_COMPUTE_CLHASHTABLELOOKUPKERNEL_H__ -#define __ARM_COMPUTE_CLHASHTABLELOOKUPKERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/runtime/CL/CLTensor.h" - -namespace arm_compute -{ -class ICLTensor; - -/** -* @brief Class to perform HashtableLookup operation with opencl kernel -*/ -class CLHashtableLookupKernel : public ICLKernel -{ -public: - /** - * @brief Construct a CLHashtableLookupKernel object - * */ - CLHashtableLookupKernel(); - - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers) - * */ - CLHashtableLookupKernel(const CLHashtableLookupKernel &) = delete; - - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers) - * */ - CLHashtableLookupKernel &operator=(const CLHashtableLookupKernel &) = delete; - - /** - * @brief Construct a CLHashtableLookupKernel object by using default move constructor - * @param[in] CLHashtableLookupKernel object to move - * */ - CLHashtableLookupKernel(CLHashtableLookupKernel &&) = default; - - /** - * @brief Move assignment operator - * @param[in] CLHashtableLookupKernel object to move - * */ - CLHashtableLookupKernel &operator=(CLHashtableLookupKernel &&) = default; - - /** - * @brief Destruct this object - * */ - ~CLHashtableLookupKernel() = default; - - /** - * @brief Set the input and output of the kernel - * @param[in] lookups Lookups 1D tensor that values are indices into the first dimension of - * input. - * @param[in] keys Keys 1D tensor. keys and input pair represent a map. - * Data types supported: S32 - * @param[in] input Source tensor. - * Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 - * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p - * input. - * @param[out] hits Hits 1D tensor. A boolean tensor that indicates whether the lookup hits - * (True) or not (False). Data types supported: U8/QASYMM8 - * @return N/A - */ - void configure(const ICLTensor *lookups, const ICLTensor *keys, const ICLTensor *input, - ICLTensor *output, ICLTensor *hits); - - /** - * @brief Static function to check if given info will lead to a valid configuration of @ref - * CLHashtableLookupKernel - * @param[in] lookups The lookups tensor info. Data types supported: S32. - * @param[in] keys The keys tensor info. keys and input pair represent a map. - * Data types supported: S32 - * @param[in] input The input tensor info. - * Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 - * @param[out] output The output tensor. Data types and data layouts supported: Same as @p - * input. - * @param[out] hits The hits tensor info. A boolean tensor that indicates whether the lookup - * hits - * (True) or not (False). Data types supported: U8/QASYMM8 - * @return a status - */ - static Status validate(const ITensorInfo *lookups, const ITensorInfo *keys, - const ITensorInfo *input, const ITensorInfo *output, - const ITensorInfo *hits); - - /** - * @brief Enqueue the OpenCL kernel to process the given window on the passed OpenCL command - * queue. - * @note The queue is *not* flushed by this method, and therefore the kernel will not have - * been executed by the time this method returns. - * @param[in] window Region on which to execute the kernel. (Must be a valid region of - * the window returned by window()). - * @param[in,out] queue Command queue on which to enqueue the kernel.@return N/A - * @return N/A - */ - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_lookups{nullptr}; /** Lookups tensor */ - const ICLTensor *_keys{nullptr}; /** Keys tensor */ - const ICLTensor *_input{nullptr}; /** Source tensor */ - ICLTensor *_output{nullptr}; /** Destination tensor */ - ICLTensor *_hits{nullptr}; /** Hits tensor */ - std::unique_ptr<CLTensor> _lookup_indices{nullptr}; /** Lookup indices tensor */ -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLHASHTABLELOOKUPKERNEL_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.h deleted file mode 100644 index f57e799ad..000000000 --- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.h +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYERKERNELEX_H__ -#define __ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYERKERNELEX_H__ - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for performing an instance normalization */ -class CLInstanceNormalizationLayerKernelEx : public ICLKernel -{ -public: - /** Constructor */ - CLInstanceNormalizationLayerKernelEx(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLInstanceNormalizationLayerKernelEx(const CLInstanceNormalizationLayerKernelEx &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLInstanceNormalizationLayerKernelEx & - operator=(const CLInstanceNormalizationLayerKernelEx &) = delete; - /** Default Move Constructor. */ - CLInstanceNormalizationLayerKernelEx(CLInstanceNormalizationLayerKernelEx &&) = default; - /** Default move assignment operator */ - CLInstanceNormalizationLayerKernelEx & - operator=(CLInstanceNormalizationLayerKernelEx &&) = default; - /** Default destructor */ - ~CLInstanceNormalizationLayerKernelEx() = default; - - /** Set the input and output tensors. - * - * @param[in, out] input Source tensor. Data types supported: F16/F32. Data layout supported: - * NCHW - * @param[out] output Destination tensor. Data types and data layouts supported: same as @p - * input. - * @param[in] gamma (Optional) The scale tensor applied to the normalized tensor. Defaults - * to nullptr - * @param[in] beta (Optional) The offset tensor applied to the normalized tensor. Defaults - * to nullptr - * @param[in] epsilon (Optional) Lower bound value for the normalization. Defaults to 1e-12 - */ - void configure(ICLTensor *input, ICLTensor *output, ICLTensor *gamma = nullptr, - ICLTensor *beta = nullptr, float epsilon = 1e-12f); - - /** Static function to check if given info will lead to a valid configuration of @ref - * CLInstanceNormalizationLayerEx. - * - * @param[in] input Source tensor info. In case of @p output tensor = nullptr this tensor will - * store the result of the normalization. - * Data types supported: F16/F32. Data layout supported: NHWC, NCHW - * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p - * input. - * @param[in] gamma (Optional) The scale tensor applied to the normalized tensor. Defaults to - * nullptr - * @param[in] beta (Optional) The offset tensor applied to the normalized tensor. Defaults to - * nullptr - * @param[in] epsilon (Optional) Lower bound value for the normalization. Defaults to 1e-12 - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, - const ITensorInfo *gamma = nullptr, const ITensorInfo *beta = nullptr, - float epsilon = 1e-12f); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - ICLTensor *_input; - ICLTensor *_output; - ICLTensor *_gamma; - ICLTensor *_beta; - float _epsilon; - bool _run_in_place; -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYERKERNELEX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLMultiplyScaleFactorKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLMultiplyScaleFactorKernel.h deleted file mode 100644 index 90e8b5705..000000000 --- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLMultiplyScaleFactorKernel.h +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_CLMULTIPLYSCALEFACTORKERNEL_H__ -#define __ARM_COMPUTE_CLMULTIPLYSCALEFACTORKERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface to multiply scale factor kernel. */ -class CLMultiplyScaleFactorKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLMultiplyScaleFactorKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLMultiplyScaleFactorKernel(const CLMultiplyScaleFactorKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLMultiplyScaleFactorKernel &operator=(const CLMultiplyScaleFactorKernel &) = delete; - /** Default Move Constructor. */ - CLMultiplyScaleFactorKernel(CLMultiplyScaleFactorKernel &&) = default; - /** Default move assignment operator */ - CLMultiplyScaleFactorKernel &operator=(CLMultiplyScaleFactorKernel &&) = default; - /** Default destructor */ - ~CLMultiplyScaleFactorKernel() = default; - /** Set input, output tensors. - * - * @param[in/out] input Source tensor. Data type supported: S32. - * @param[in] scale_factor Scale tensor. Data type supported: F16/F32. - * @param[out] output Destination tensor. Data type supported: Same as @p scale_factor. - * @param[in] multiplier Additional scale value. - */ - void configure(const ICLTensor *input, const ICLTensor *scale_factor, ICLTensor *output, - float multiplier = 1.f); - /** Static function to check if given info will lead to a valid configuration of @ref - * CLMultiplyScaleFactorKernel - * - * @param[in] input Input tensor info. Data types supported: S32. - * @param[in] scale_factor Scale tensor. Data type supported: F16/F32. - * @param[in] output Output tensor info. Data types supported: Same as @p scale_factor. - * @param[in] multiplier Additional scale value. - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *scale_factor, - const ITensorInfo *output); - - /** - * @brief Enqueue the OpenCL kernel to process the given window on the passed OpenCL command - * queue. - * @note The queue is *not* flushed by this method, and therefore the kernel will not have - * been executed by the time this method returns. - * @param[in] window Region on which to execute the kernel. (Must be a valid region of - * the window returned by window()). - * @param[in,out] queue Command queue on which to enqueue the kernel.@return N/A - * @return N/A - */ - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - const ICLTensor *_scale_factor; - ICLTensor *_output; - float _multiplier; -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLMULTIPLYSCALEFACTORKERNEL_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLNegKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLNegKernel.h deleted file mode 100644 index fa383c0d0..000000000 --- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLNegKernel.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_CLNEGKERNEL_H__ -#define __ARM_COMPUTE_CLNEGKERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to perform a negation operation on tensor*/ -class CLNegKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLNegKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers). */ - CLNegKernel(const CLNegKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers). */ - CLNegKernel &operator=(const CLNegKernel &) = delete; - /** Allow instances of this class to be moved */ - CLNegKernel(CLNegKernel &&) = default; - /** Allow instances of this class to be moved */ - CLNegKernel &operator=(CLNegKernel &&) = default; - /** Initialize the kernel's input, output. - * - * @param[in] input Source tensor. - * @param[out] output Destination tensor. - */ - void configure(const ICLTensor *input, ICLTensor *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLNEGKERNEL_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLOneHotKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLOneHotKernel.h deleted file mode 100644 index a512057b9..000000000 --- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLOneHotKernel.h +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_CLONEHOTKERNEL_H__ -#define __ARM_COMPUTE_CLONEHOTKERNEL_H__ -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" -namespace arm_compute -{ -class ICLTensor; -/** Interface for the kernel to perform one-hot encoding*/ -class CLOneHotKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLOneHotKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLOneHotKernel(const CLOneHotKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLOneHotKernel &operator=(const CLOneHotKernel &) = delete; - /** Allow instances of this class to be moved */ - CLOneHotKernel(CLOneHotKernel &&) = default; - /** Allow instances of this class to be moved */ - CLOneHotKernel &operator=(CLOneHotKernel &&) = default; - /** Default destructor */ - ~CLOneHotKernel() = default; - /** Initialise the kernel's inputs and output - * - * @param[in] indices Indices tensor. Supported tensor rank: up to 3. Must be one of the - * following types: U32/S32 - * @param[in] on_value On value tensor. Supported tensor rank: only 1. Data type supported: - * U8/S8/U16/S16/F16/U32/S32/F32 - * @param[in] off_value Off value tensor. Supported tensor rank: only 1. Data type supported: - * Same as @p on_value - * @param[out] output Destination tensor. Data type supported: Same as @p on_value - * @param[in] depth The depth of the one hot dimension. - * @param[in] axis (Optional) The axis to fill. Negative values wrap around. Defaults to -1. - * value must be in range [-indices.rank , indices.rank) - */ - void configure(const ICLTensor *indices, const ICLTensor *on_value, const ICLTensor *off_value, - ICLTensor *output, int depth, int axis = -1); - /** Initialise the kernel's inputs and output already initialized to off_value - * - * @param[in] indices Indices tensor. Supported tensor rank: up to 3. Must be one of the - * following types: U32/S32 - * @param[in] on_value On value tensor. Supported tensor rank: only 1. Data type supported: - * U8/S8/U16/S16/F16/U32/S32/F32 - * @param[out] output Destination tensor. Data type supported: Same as @p on_value - * @param[in] depth The depth of the one hot dimension. - * @param[in] axis (Optional) The axis to fill. Negative values wrap around. Defaults to -1. - * value must be in range [-indices.rank , indices.rank) - */ - void configure(const ICLTensor *indices, const ICLTensor *on_value, ICLTensor *output, int depth, - int axis = -1); - /** Static function to check if given info will lead to a valid configuration of @ref - * CLOneHotKernel - * - * @param[in] indices Indices tensor. Supported tensor rank: up to 3. Must be one of the - * following types: U32/S32 - * @param[in] on_value On value tensor. Supported tensor rank: only 1. Data type supported: - * U8/S8/U16/S16/F16/U32/S32/F32 - * @param[in] off_value Off value tensor. Supported tensor rank: only 1. Data type supported: - * Same as @p on_value - * @param[in] output Destination tensor. Data type supported: Same as @p on_value - * @param[in] depth The depth of the one hot dimension. - * @param[in] axis (Optional) The axis to fill. Negative values wrap around. Defaults to -1. - * value must be in range [-indices.rank , indices.rank) - * - * @return a status - */ - static Status validate(const ITensorInfo *indices, const ITensorInfo *on_value, - const ITensorInfo *off_value, const ITensorInfo *output, int depth, - int axis = -1); - /** Static function to check if given info will lead to a valid configuration of @ref - * CLOneHotKernel without off_value - * - * @param[in] indices Indices tensor. Supported tensor rank: up to 3. Must be one of the - * following types: U32/S32 - * @param[in] on_value On value tensor. Supported tensor rank: only 1. Data type supported: - * U8/S8/U16/S16/F16/U32/S32/F32 - * @param[in] output Destination tensor. Data type supported: Same as @p on_value - * @param[in] depth The depth of the one hot dimension. - * @param[in] axis (Optional) The axis to fill. Negative values wrap around. Defaults to -1. - * value must be in range [-indices.rank , indices.rank) - * - * @return a status - */ - static Status validate(const ITensorInfo *indices, const ITensorInfo *on_value, - const ITensorInfo *output, int depth, int axis = -1); - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - /** Initialise the kernel's inputs and outputs internally - * - * @param[in] indices Indices tensor. Supported tensor rank: up to 3. Must be one of the - * following types: U32/S32 - * @param[in] on_value On value tensor. Supported tensor rank: only 1. Data type supported: - * U8/S8/U16/S16/F16/U32/S32/F32 - * @param[out] output Destination tensor. Data type supported: Same as @p on_value - * @param[in] depth The depth of the one hot dimension. - * @param[in] axis (Optional) The axis to fill. Negative values wrap around. Defaults to -1. - * value must be in range [-indices.rank , indices.rank) - */ - void configure_common(const ICLTensor *indices, const ICLTensor *on_value, ICLTensor *output, - int depth, int axis); - -private: - const ICLTensor *_indices; /**< Indices tensor */ - const ICLTensor *_on_value; /**< On value tensor */ - const ICLTensor *_off_value; /**< Off value tensor */ - ICLTensor *_output; /**< Destination tensor */ - bool _is_off_value_memset; /**< Whether off_value is zero */ -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLONEHOTKERNEL_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLQuantizationSymmetricKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLQuantizationSymmetricKernel.h deleted file mode 100644 index 4e1b56cba..000000000 --- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLQuantizationSymmetricKernel.h +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_CLQUANTIZATIONSYMMETRICKERNEL_H__ -#define __ARM_COMPUTE_CLQUANTIZATIONSYMMETRICKERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the quantization layer kernel. - * - * @note The implementation supports only 2D input tensors. - */ -class CLQuantizationSymmetricKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLQuantizationSymmetricKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLQuantizationSymmetricKernel(const CLQuantizationSymmetricKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLQuantizationSymmetricKernel &operator=(const CLQuantizationSymmetricKernel &) = delete; - /** Default Move Constructor. */ - CLQuantizationSymmetricKernel(CLQuantizationSymmetricKernel &&) = default; - /** Default move assignment operator */ - CLQuantizationSymmetricKernel &operator=(CLQuantizationSymmetricKernel &&) = default; - /** Default destructor */ - ~CLQuantizationSymmetricKernel() = default; - /** Set the input, output. - * - * @param[in] input Source tensor. Data types supported: F32/F16. - * @param[in] scale_factor Scale tensor of @p output. Data type supported: Same as @p input. - * @param[out] output Destination tensor with the same dimensions of input. Data types supported: - * S8. - * - * @note Output auto initialization is not supported by this kernel - */ - void configure(const ICLTensor *input, const ICLTensor *scale_factor, ICLTensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref - * CLQuantizationSymmetricKernel - * - * @param[in] input Input tensor info. Data types supported: F32/F16. - * @param[in] scale_factor Scale tensor of @p output. Data type supported: Same as @p input. - * @param[in] output Destination tensor info with the same dimensions of input. Data types - * supported: S8. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *scale_factor, - const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - const ICLTensor *_scale_factor; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLQUANTIZATIONSYMMETRICKERNEL_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLReduceOperationKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLReduceOperationKernel.h deleted file mode 100644 index 9b8a239d3..000000000 --- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLReduceOperationKernel.h +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/** - * @file CLReduceOperationKernel.h - * @brief This file defines CLReduceOperationKernel class - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __ARM_COMPUTE_CLREDUCEOPERATIONKERNEL_H__ -#define __ARM_COMPUTE_CLREDUCEOPERATIONKERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/TypesEx.h" - -namespace arm_compute -{ -class ICLTensor; - -/** - * @brief Class to define interface for the reduce operation kernel - */ -class CLReduceOperationKernel : public ICLKernel -{ -public: - /** - * @brief Default constructor - */ - CLReduceOperationKernel(); - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers) - */ - CLReduceOperationKernel(const CLReduceOperationKernel &) = delete; - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers) - */ - CLReduceOperationKernel &operator=(const CLReduceOperationKernel &) = delete; - /** - * @brief Allow instances of this class to be moved - */ - CLReduceOperationKernel(CLReduceOperationKernel &&) = default; - /** - * @brief Allow instances of this class to be moved - */ - CLReduceOperationKernel &operator=(CLReduceOperationKernel &&) = default; - /** - * @brief Default destructor - */ - ~CLReduceOperationKernel() = default; - - /** - * @brief Set the input and output tensors. - * @param[in] input Source tensor. Data types supported: U8/S32/F32. - * @param[out] output Destination tensor. Data types supported: Same as @p input. - * Output will have the same number of dimensions as input. - * @param[in] axis Axis along which to reduce. - * @param[in] op Reduce operation to perform. - * @return N/A - */ - void configure(const ICLTensor *input, ICLTensor *output, const uint32_t axis, - ReduceOperation op); - - /** - * @brief Static function to check if given info will lead to a valid configuration of @ref - * CLReduceOperationKernel. - * @param[in] input Source tensor info. Data types supported: U8/S32/F32. - * @param[in] output Destination tensor info. Data types supported: Same as @p input. - * Output will have the same number of dimensions as input. - * @param[in] axis Axis along which to reduce. - * @param[in] op Reduce operation to perform. - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const uint32_t axis, - ReduceOperation op); - - /* - * @brief Run CLReduceOperationKernel op - * @param[in] window Window to be used for in_slice - * @param[in] queue CLQueue - * @return N/A - */ - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; - uint32_t _axis; -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLREDUCEOPERATIONKERNEL_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLScaleFactorSymm8Kernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLScaleFactorSymm8Kernel.h deleted file mode 100644 index 4d4478ece..000000000 --- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLScaleFactorSymm8Kernel.h +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_CLSCALEFACTORSYMM8KERNEL_H__ -#define __ARM_COMPUTE_CLSCALEFACTORSYMM8KERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the kernel to perform min max search on a 3D tensor. - */ -class CLScaleFactorSymm8Kernel : public ICLKernel -{ -public: - /** Default constructor */ - CLScaleFactorSymm8Kernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLScaleFactorSymm8Kernel(const CLScaleFactorSymm8Kernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLScaleFactorSymm8Kernel &operator=(const CLScaleFactorSymm8Kernel &) = delete; - /** Allow instances of this class to be moved */ - CLScaleFactorSymm8Kernel(CLScaleFactorSymm8Kernel &&) = default; - /** Allow instances of this class to be moved */ - CLScaleFactorSymm8Kernel &operator=(CLScaleFactorSymm8Kernel &&) = default; - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor with 2 dimensions. The first dimension will be interpreted as - * batches. Data types supported: F32. - * @param[out] output Output tensor with shape [batches] which stores the scale values for each 2D - * input tensor. - * The dimensions over the first must match the batched dimensions of the input - * tensor. Data types supported: F32. - */ - void configure(const ICLTensor *input, ICLTensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref - * CLScaleFactorSymm8Kernel - * - * @param[in] input Input tensor info. Data types supported: F32. - * @param[in] output Output tensor info with shape [batches] which stores the scale values for - * each 2D input tensor. - * The dimensions over the first must match the batched dimensions of the input - * tensor. Data types supported: F32. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); - - /** Resets global minimum and maximum - * - * @param[in,out] queue Command queue on which to map and unmap the min_max tensor - */ - void reset(cl::CommandQueue &queue); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLSCALEFACTORSYMM8KERNEL_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLTopKV2Kernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLTopKV2Kernel.h deleted file mode 100644 index aa4a14812..000000000 --- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLTopKV2Kernel.h +++ /dev/null @@ -1,680 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/** - * @file CLTopKV2Kernel.h - * @brief This file defines classes for TopKV2Kernel - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __ARM_COMPUTE_CLTOPKV2KERNEL_H__ -#define __ARM_COMPUTE_CLTOPKV2KERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" - -// these parameters can be changed -#define _ITEMS 16 // number of items in a group -#define _GROUPS 4 // the number of virtual processors is _ITEMS * _GROUPS -#define _HISTOSPLIT (_ITEMS * _GROUPS / 2) // number of splits of the histogram -#define PERMUT // store the final permutation -//////////////////////////////////////////////////////// - -// Disable GPU implementation -// TODO Enable GPU implementation with verification, or remove code -// Invalid result on GPU -#if 0 -namespace arm_compute -{ -class ICLTensor; - -/** - * @brief Class to define CLTopKV2Single - */ -class CLTopKV2Single : public ICLKernel -{ -public: - /** - * @brief Constructor - */ - CLTopKV2Single(); - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - * @param [in] copiedInstance Const reference of CLTopKV2Single to be copied - */ - CLTopKV2Single(const CLTopKV2Single &) = delete; - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - * @param [in] copiedInstance Const reference of CLTopKV2Single to be copied - * @return Reference of this instance - */ - CLTopKV2Single &operator=(const CLTopKV2Single &) = delete; - /** - * @brief Allow instances of this class to be moved - * @param [in] movedInstance Rvalue reference of CLTopKV2Single to be moved - */ - CLTopKV2Single(CLTopKV2Single &&) = default; - /** - * @brief Allow instances of this class to be moved - * @param [in] movedInstance Rvalue reference of CLTopKV2Single to be moved - * @return Reference of this instance - */ - CLTopKV2Single &operator=(CLTopKV2Single &&) = default; - - /** - * @brief Initialise kernel with params - * @param[in] input An input tensor - * @param[in] topk_values Values of the top k predictions - * @param[in] topk_indices Indices of the top k predictions - * @param[in] indices Indices - * @param[in] temp_stack Temp stack - * @param[in] k K of the top k predictions - * @param[in] n Number times to quick-sort - * return N/A - */ - void configure(ICLTensor *input, ICLTensor *topk_values, ICLTensor *topk_indices, - cl::Buffer *indices, cl::Buffer *temp_stack, int k, int n); - - /* - * @brief Run CLTopKV2Single op - * @param[in] window Window to be used for in_slice - * @param[in] queue cl::CommandQueue - * @return N/A - */ - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - ICLTensor *_input; - ICLTensor *_topk_values; - ICLTensor *_topk_indices; -}; - -/** - * @brief Class to define CLTopKV2Init - */ -class CLTopKV2Init : public ICLKernel -{ -public: - /** - * @brief Constructor - */ - CLTopKV2Init(); - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - * @param [in] copiedInstance Const reference of CLTopKV2Init to be copied - */ - CLTopKV2Init(const CLTopKV2Init &) = delete; - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - * @param [in] copiedInstance Const reference of CLTopKV2Init to be copied - * @return Reference of this instance - */ - CLTopKV2Init &operator=(const CLTopKV2Init &) = delete; - /** - * @brief Allow instances of this class to be moved - * @param [in] movedInstance Rvalue reference of CLTopKV2Init to be moved - */ - CLTopKV2Init(CLTopKV2Init &&) = default; - /** - * @brief Allow instances of this class to be moved - * @param [in] movedInstance Rvalue reference of CLTopKV2Init to be moved - * @return Reference of this instance - */ - CLTopKV2Init &operator=(CLTopKV2Init &&) = default; - - /** - * @brief Initialise kernel with params - * @param[in] input An input tensor - * @param[in] in_key_buf Buffer of input key - * @param[in] in_ind_buf Buffer of input index - * @param[in] n Number times to quick-sort - * return N/A - */ - void configure(ICLTensor *input, cl::Buffer *in_key_buf, cl::Buffer *in_ind_buf, int n); - - /* - * @brief Run CLTopKV2Init op - * @param[in] window Window to be used for in_slice - * @param[in] queue cl::CommandQueue - * @return N/A - */ - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - ICLTensor *_input; -}; - -/** - * @brief Class to define CLRadixSortHistogram - */ -class CLRadixSortHistogram : public ICLKernel -{ -public: - /** - * @brief Constructor - */ - CLRadixSortHistogram(); - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - * @param [in] copiedInstance Const reference of CLRadixSortHistogram to be copied - */ - CLRadixSortHistogram(const CLRadixSortHistogram &) = delete; - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - * @param [in] copiedInstance Const reference of CLRadixSortHistogram to be copied - * @return Reference of this instance - */ - CLRadixSortHistogram &operator=(const CLRadixSortHistogram &) = delete; - /** - * @brief Allow instances of this class to be moved - * @param [in] movedInstance Rvalue reference of CLRadixSortHistogram to be moved - */ - CLRadixSortHistogram(CLRadixSortHistogram &&) = default; - /** - * @brief Allow instances of this class to be moved - * @param [in] movedInstance Rvalue reference of CLRadixSortHistogram to be moved - * @return Reference of this instance - */ - CLRadixSortHistogram &operator=(CLRadixSortHistogram &&) = default; - - /** - * @brief Initialise kernel with params - * @param[out] hist_buf Buffer of histogram - * @param[in] bits Number of bits to be used for radix sort - * @param[in] n Integer number size to sort - * return N/A - */ - void configure(cl::Buffer *hist_buf, int bits, int n); - - /** - * @brief Set pass - * @param[in] pass Passes made of in radix sort algorithm - * @param[in] in_key_buf Buffer of input key - * return N/A - */ - void setPass(int pass, cl::Buffer *in_key_buf) - { - _pass = pass; - _in_key_buf = in_key_buf; - } - - /* - * @brief Run CLRadixSortHistogram op - * @param[in] window Window to be used for in_slice - * @param[in] queue cl::CommandQueue - * @return N/A - */ - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - int _pass; - cl::Buffer *_in_key_buf; -}; - -/** - * @brief Class to define CLRadixSortScanHistogram - */ -class CLRadixSortScanHistogram : public ICLKernel -{ -public: - /** - * @brief Constructor - */ - CLRadixSortScanHistogram(); - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - * @param [in] copiedInstance Const reference of CLRadixSortScanHistogram to be copied - */ - CLRadixSortScanHistogram(const CLRadixSortScanHistogram &) = delete; - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - * @param [in] copiedInstance Const reference of CLRadixSortScanHistogram to be copied - * @return Reference of this instance - */ - CLRadixSortScanHistogram &operator=(const CLRadixSortScanHistogram &) = delete; - /** - * @brief Allow instances of this class to be moved - * @param [in] movedInstance Rvalue reference of CLRadixSortScanHistogram to be moved - */ - CLRadixSortScanHistogram(CLRadixSortScanHistogram &&) = default; - /** - * @brief Allow instances of this class to be moved - * @param [in] movedInstance Rvalue reference of CLRadixSortScanHistogram to be moved - * @return Reference of this instance - */ - CLRadixSortScanHistogram &operator=(CLRadixSortScanHistogram &&) = default; - - /** - * @brief Initialise kernel with params - * @param[out] hist_buf Buffer of histogram - * @param[out] glob_sum_buf Buffer of global sum - * @param[in] bits Number of bits to be used for radix sort - * return N/A - */ - void configure(cl::Buffer *hist_buf, cl::Buffer *glob_sum_buf, int bits); - - /* - * @brief Run CLRadixSortScanHistogram op - * @param[in] window Window to be used for in_slice - * @param[in] queue cl::CommandQueue - * @return N/A - */ - void run(const Window &window, cl::CommandQueue &queue) override; -}; - -/** - * @brief Class to define CLRadixSortGlobalScanHistogram - */ -class CLRadixSortGlobalScanHistogram : public ICLKernel -{ -public: - /** - * @brief Constructor - */ - CLRadixSortGlobalScanHistogram(); - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - * @param [in] copiedInstance Const reference of CLRadixSortGlobalScanHistogram to be copied - */ - CLRadixSortGlobalScanHistogram(const CLRadixSortGlobalScanHistogram &) = delete; - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - * @param [in] copiedInstance Const reference of CLRadixSortGlobalScanHistogram to be copied - * @return Reference of this instance - */ - CLRadixSortGlobalScanHistogram &operator=(const CLRadixSortGlobalScanHistogram &) = delete; - /** - * @brief Allow instances of this class to be moved - * @param [in] movedInstance Rvalue reference of CLRadixSortGlobalScanHistogram to be moved - */ - CLRadixSortGlobalScanHistogram(CLRadixSortGlobalScanHistogram &&) = default; - /** - * @brief Allow instances of this class to be moved - * @param [in] movedInstance Rvalue reference of CLRadixSortGlobalScanHistogram to be moved - * @return Reference of this instance - */ - CLRadixSortGlobalScanHistogram &operator=(CLRadixSortGlobalScanHistogram &&) = default; - - /** - * @brief Initialise kernel with params - * @param[out] glob_sum_buf Buffer of global sum - * @param[out] temp_buf Temp buffer to be used while RadixSortGlobalScanHistogram - * @param[in] bits Number of bits to be used for radix sort - * return N/A - */ - void configure(cl::Buffer *glob_sum_buf, cl::Buffer *temp_buf, int bits); - - /* - * @brief Run CLRadixSortGlobalScanHistogram op - * @param[in] window Window to be used for in_slice - * @param[in] queue cl::CommandQueue - * @return N/A - */ - void run(const Window &window, cl::CommandQueue &queue) override; -}; - -/** - * @brief Class to define CLRadixSortPasteHistogram - */ -class CLRadixSortPasteHistogram : public ICLKernel -{ -public: - /** - * @brief Constructor - */ - CLRadixSortPasteHistogram(); - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - * @param [in] copiedInstance Const reference of CLRadixSortPasteHistogram to be copied - */ - CLRadixSortPasteHistogram(const CLRadixSortPasteHistogram &) = delete; - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - * @param [in] copiedInstance Const reference of CLRadixSortPasteHistogram to be copied - * @return Reference of this instance - */ - CLRadixSortPasteHistogram &operator=(const CLRadixSortPasteHistogram &) = delete; - /** - * @brief Allow instances of this class to be moved - * @param [in] movedInstance Rvalue reference of CLRadixSortPasteHistogram to be moved - */ - CLRadixSortPasteHistogram(CLRadixSortPasteHistogram &&) = default; - /** - * @brief Allow instances of this class to be moved - * @param [in] movedInstance Rvalue reference of CLRadixSortPasteHistogram to be moved - * @return Reference of this instance - */ - CLRadixSortPasteHistogram &operator=(CLRadixSortPasteHistogram &&) = default; - - /** - * @brief Initialise kernel with params - * @param[out] hist_buf Buffer of histogram - * @param[out] glob_sum_buf Buffer of global sum - * @param[in] bits Number of bits to be used for radix sort - * return N/A - */ - void configure(cl::Buffer *hist_buf, cl::Buffer *glob_sum_buf, int bits); - - /* - * @brief Run CLRadixSortPasteHistogram op - * @param[in] window Window to be used for in_slice - * @param[in] queue cl::CommandQueue - * @return N/A - */ - void run(const Window &window, cl::CommandQueue &queue) override; -}; - -/** - * @brief Class to define CLRadixSortReorder - */ -class CLRadixSortReorder : public ICLKernel -{ -public: - /** - * @brief Constructor - */ - CLRadixSortReorder(); - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - * @param [in] copiedInstance Const reference of CLRadixSortReorder to be copied - */ - CLRadixSortReorder(const CLRadixSortReorder &) = delete; - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - * @param [in] copiedInstance Const reference of CLRadixSortReorder to be copied - * @return Reference of this instance - */ - CLRadixSortReorder &operator=(const CLRadixSortReorder &) = delete; - /** - * @brief Allow instances of this class to be moved - * @param [in] movedInstance Rvalue reference of CLRadixSortReorder to be moved - */ - CLRadixSortReorder(CLRadixSortReorder &&) = default; - /** - * @brief Allow instances of this class to be moved - * @param [in] movedInstance Rvalue reference of CLRadixSortReorder to be moved - * @return Reference of this instance - */ - CLRadixSortReorder &operator=(CLRadixSortReorder &&) = default; - - /** - * @brief Initialise kernel with params - * @param[out] hist_buf Buffer of histogram - * @param[in] bits Number of bits to be used for radix sort - * @param[in] n Integer number size to sort - * return N/A - */ - void configure(cl::Buffer *hist_buf, int bits, int n); - - /** - * @brief Set pass - * @param[in] pass Passes made of in radix sort algorithm - * @param[in] in_key_buf Buffer of input key - * @param[out] out_key_buf Buffer of output key - * @param[in] in_ind_buf Buffer of input index - * @param[out] out_ind_buf Buffer of output index - * return N/A - */ - void setPass(int pass, cl::Buffer *in_key_buf, cl::Buffer *out_key_buf, cl::Buffer *in_ind_buf, - cl::Buffer *out_ind_buf) - { - _pass = pass; - _in_key_buf = in_key_buf; - _out_key_buf = out_key_buf; - _in_ind_buf = in_ind_buf; - _out_ind_buf = out_ind_buf; - } - /* - * @brief Run CLRadixSortReorder op - * @param[in] window Window to be used for in_slice - * @param[in] queue cl::CommandQueue - * @return N/A - */ - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - int _pass; - cl::Buffer *_in_key_buf; - cl::Buffer *_out_key_buf; - cl::Buffer *_in_ind_buf; - cl::Buffer *_out_ind_buf; -}; - -/** - * @brief Class to define CLTopKV2FindFirstNegative - */ -class CLTopKV2FindFirstNegative : public ICLKernel -{ -public: - /** - * @brief Constructor - */ - CLTopKV2FindFirstNegative(); - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - * @param [in] copiedInstance Const reference of CLTopKV2FindFirstNegative to be copied - */ - CLTopKV2FindFirstNegative(const CLTopKV2FindFirstNegative &) = delete; - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - * @param [in] copiedInstance Const reference of CLTopKV2FindFirstNegative to be copied - * @return Reference of this instance - */ - CLTopKV2FindFirstNegative &operator=(const CLTopKV2FindFirstNegative &) = delete; - /** - * @brief Allow instances of this class to be moved - * @param [in] movedInstance Rvalue reference of CLTopKV2FindFirstNegative to be moved - */ - CLTopKV2FindFirstNegative(CLTopKV2FindFirstNegative &&) = default; - /** - * @brief Allow instances of this class to be moved - * @param [in] movedInstance Rvalue reference of CLTopKV2FindFirstNegative to be moved - * @return Reference of this instance - */ - CLTopKV2FindFirstNegative &operator=(CLTopKV2FindFirstNegative &&) = default; - - /** - * @brief Initialise kernel with params - * @param[out] first_negative_idx_buf Buffer of the first negative index - * @param[in] n Number times to find - * return N/A - */ - void configure(cl::Buffer *first_negative_idx_buf, int n); - - /** - * @brief Set output buffer - * @param[out] out_key_buf Buffer of output key - * return N/A - */ - void setOutputBuffer(cl::Buffer *out_key_buf) { _out_key_buf = out_key_buf; } - - /* - * @brief Run CLTopKV2FindFirstNegative op - * @param[in] window Window to be used for in_slice - * @param[in] queue cl::CommandQueue - * @return N/A - */ - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - cl::Buffer *_out_key_buf; -}; - -/** - * @brief Class to define CLTopKV2ReorderNegatives - */ -class CLTopKV2ReorderNegatives : public ICLKernel -{ -public: - /** - * @brief Constructor - */ - CLTopKV2ReorderNegatives(); - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - * @param [in] copiedInstance Const reference of CLTopKV2ReorderNegatives to be copied - */ - CLTopKV2ReorderNegatives(const CLTopKV2ReorderNegatives &) = delete; - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - * @param [in] copiedInstance Const reference of CLTopKV2ReorderNegatives to be copied - * @return Reference of this instance - */ - CLTopKV2ReorderNegatives &operator=(const CLTopKV2ReorderNegatives &) = delete; - /** - * @brief Allow instances of this class to be moved - * @param [in] movedInstance Rvalue reference of CLTopKV2ReorderNegatives to be moved - */ - CLTopKV2ReorderNegatives(CLTopKV2ReorderNegatives &&) = default; - /** - * @brief Allow instances of this class to be moved - * @param [in] movedInstance Rvalue reference of CLTopKV2ReorderNegatives to be moved - * @return Reference of this instance - */ - CLTopKV2ReorderNegatives &operator=(CLTopKV2ReorderNegatives &&) = default; - - /** - * @brief Initialise kernel with params - * @param[out] first_negative_idx_buf Buffer of the first negative index - * @param[in] n Number times to find - * return N/A - */ - void configure(cl::Buffer *first_negative_idx_buf, int n); - - /** - * @brief Set buffers - * @param[in] in_key_buf Buffer of input key - * @param[out] out_key_buf Buffer of output key - * @param[in] in_ind_buf Buffer of input index - * @param[out] out_ind_buf Buffer of output index - * return N/A - */ - void setBuffers(cl::Buffer *in_key_buf, cl::Buffer *out_key_buf, cl::Buffer *in_ind_buf, - cl::Buffer *out_ind_buf) - { - _in_key_buf = in_key_buf; - _out_key_buf = out_key_buf; - _in_ind_buf = in_ind_buf; - _out_ind_buf = out_ind_buf; - } - - /* - * @brief Run CLTopKV2ReorderNegatives op - * @param[in] window Window to be used for in_slice - * @param[in] queue cl::CommandQueue - * @return N/A - */ - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - cl::Buffer *_in_key_buf; - cl::Buffer *_out_key_buf; - cl::Buffer *_in_ind_buf; - cl::Buffer *_out_ind_buf; -}; - -/** - * @brief Class to define CLTopKV2Store - */ -class CLTopKV2Store : public ICLKernel -{ -public: - /** - * @brief Constructor - */ - CLTopKV2Store(); - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - * @param [in] copiedInstance Const reference of CLTopKV2Store to be copied - */ - CLTopKV2Store(const CLTopKV2Store &) = delete; - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - * @param [in] copiedInstance Const reference of CLTopKV2Store to be copied - * @return Reference of this instance - */ - CLTopKV2Store &operator=(const CLTopKV2Store &) = delete; - /** - * @brief Allow instances of this class to be moved - * @param [in] movedInstance Rvalue reference of CLTopKV2Store to be moved - */ - CLTopKV2Store(CLTopKV2Store &&) = default; - /** - * @brief Allow instances of this class to be moved - * @param [in] movedInstance Rvalue reference of CLTopKV2Store to be moved - * @return Reference of this instance - */ - CLTopKV2Store &operator=(CLTopKV2Store &&) = default; - - /** - * @brief Initialise kernel with params - * @param[out] values Values tensor to store - * @param[out] indices Indices tensor to be used for store - * @param[in] k K of the top k predictions - * @param[in] n Number times to store - * return N/A - */ - void configure(ICLTensor *values, ICLTensor *indices, int k, int n); - - /** - * @brief Set buffers - * @param[out] out_key_buf Buffer of output key - * @param[out] out_ind_buf Buffer of output index - * return N/A - */ - void setOutputBuffers(cl::Buffer *out_key_buf, cl::Buffer *out_ind_buf); - - /* - * @brief Run CLTopKV2Store op - * @param[in] window Window to be used for in_slice - * @param[in] queue cl::CommandQueue - * @return N/A - */ - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - ICLTensor *_values; - ICLTensor *_indices; - cl::Buffer *_out_key_buf; - cl::Buffer *_out_ind_buf; -}; - -} // namespace arm_compute -#endif // Disable GPU implementation -#endif // __ARM_COMPUTE_CLTOPKV2KERNEL_H__ diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/NEElementwiseOperationFuncs.h b/compute/ARMComputeEx/arm_compute/core/NEON/NEElementwiseOperationFuncs.h deleted file mode 100644 index 933d8760d..000000000 --- a/compute/ARMComputeEx/arm_compute/core/NEON/NEElementwiseOperationFuncs.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_NEELEMENTWISEOPERATIONFUNCS_H__ -#define __ARM_COMPUTE_NEELEMENTWISEOPERATIONFUNCS_H__ - -#include <arm_neon.h> - -namespace arm_compute -{ -class ITensor; -class Window; -class QuantizationInfo; -} // namespace arm_compute - -namespace arm_compute -{ - -void elementwise_op(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window, - float (*scalar_func)(const float &, const float &), - int (*broadcast_func)(int, int, int, const float *, const float &, float *, - const bool), - int (*neon_func)(int, int, int, const float *, const float *, float *)); - -void elementwise_op(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window, - uint8_t (*scalar_func)(const uint8_t &, const uint8_t &), - int (*broadcast_func)(int, int, int, const uint8_t *, const uint8_t &, - uint8_t *, const bool), - int (*neon_func)(int, int, int, const uint8_t *, const uint8_t *, uint8_t *)); -} // namespace arm_compute -#endif // __ARM_COMPUTE_NEELEMENTWISEOPERATIONFUNCS_H__ diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEActivationLayerKernelEx.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEActivationLayerKernelEx.h deleted file mode 100644 index a827f48f8..000000000 --- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEActivationLayerKernelEx.h +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_NEACTIVATIONLAYERKERNELEX_H__ -#define __ARM_COMPUTE_NEACTIVATIONLAYERKERNELEX_H__ - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/utils/misc/Traits.h" - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -#include <arm_fp16.h> -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ - -namespace arm_compute -{ -class ITensor; - -/** Interface for the activation layer kernel. */ -class NEActivationLayerKernelEx : public INEKernel -{ -public: - const char *name() const override { return "NEActivationLayerKernelEx"; } - /** Constructor */ - NEActivationLayerKernelEx(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEActivationLayerKernelEx(const NEActivationLayerKernelEx &) = delete; - /** Default move constructor */ - NEActivationLayerKernelEx(NEActivationLayerKernelEx &&) = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEActivationLayerKernelEx &operator=(const NEActivationLayerKernelEx &) = delete; - /** Default move assignment operator */ - NEActivationLayerKernelEx &operator=(NEActivationLayerKernelEx &&) = default; - /** Set the input and output tensor. - * - * @note If the output tensor is a nullptr, the activation function will be performed in-place - * - * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this - * tensor will store the result - * of the activation function. Data types supported: - * QASYMM8/QSYMM16/F16/F32. - * @param[out] output Destination tensor. Data type supported: same as @p input - * @param[in] activation_info Activation layer information. - */ - void configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info); - /** Static function to check if given info will lead to a valid configuration of @ref - * NEActivationLayerKernelEx - * - * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor - * will store the result - * of the activation function. Data types supported: QASYMM8/QSYMM16/F16/F32. - * @param[in] output Destination tensor info. Data type supported: same as @p input - * @param[in] act_info Activation layer information. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, - const ActivationLayerInfo &act_info); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - using ActivationFunction = ActivationLayerInfo::ActivationFunction; - /** Common signature for all the specialised @ref NEActivationLayerKernelEx functions - * - * @param[in] window Region on which to execute the kernel. - */ - using ActivationFunctionExecutorPtr = void (NEActivationLayerKernelEx::*)(const Window &window); - /** Function to apply an activation function on a tensor. - * - * @param[in] window Region on which to execute the kernel - */ - template <ActivationLayerInfo::ActivationFunction F, typename T> - typename std::enable_if<arm_compute::utils::traits::is_floating_point<T>::value, void>::type - activation(const Window &window); - /** Function to apply an activation function on a tensor. - * - * @param[in] window Region on which to execute the kernel - */ - template <ActivationLayerInfo::ActivationFunction F, typename T> - typename std::enable_if<std::is_same<T, qasymm8_t>::value, void>::type - activation(const Window &window); - /** Function to apply an activation function on a tensor. - * - * @param[in] window Region on which to execute the kernel - */ - template <ActivationLayerInfo::ActivationFunction F, typename T> - typename std::enable_if<std::is_same<T, qsymm16_t>::value, void>::type - activation(const Window &window); - -private: - ITensor *_input; - ITensor *_output; - ActivationFunctionExecutorPtr _func; - ActivationLayerInfo _act_info; -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_NEACTIVATIONLAYERKERNELEX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEBinaryLogicalOperationKernel.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEBinaryLogicalOperationKernel.h deleted file mode 100644 index 8c544cda8..000000000 --- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEBinaryLogicalOperationKernel.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_NEBINARYLOGICALOPERATIONKERNEL_H__ -#define __ARM_COMPUTE_NEBINARYLOGICALOPERATIONKERNEL_H__ - -#include "arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h" -#include "arm_compute/core/TypesEx.h" - -namespace arm_compute -{ - -class NEBinaryLogicalOperationKernel : public NEElementwiseOperationKernel -{ -public: - /** Default destructor */ - ~NEBinaryLogicalOperationKernel() = default; - - /** Static function to check if given info will lead to a valid configuration of @ref - * NEBinaryLogicalOperationKernel - * - * @param[in] op Binary logical operation to be executed. - * @param[in] input1 First tensor input. Data types supported: QASYMM8/U8. - * @param[in] input2 Second tensor input. Data types supported: Same as @p input1. - * @param[in] output Output tensor. Data types supported: Same as @p input1. - */ - void configure(BinaryLogicalOperation op, const ITensor *input1, const ITensor *input2, - ITensor *output); - - /** Static function to check if given info will lead to a valid configuration of @ref - * NEBinaryLogicalOperationKernel - * - * @param[in] op Binary logical operation to be executed. - * @param[in] input1 First tensor input info. Data types supported: QASYMM8/U8. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[in] output Output tensor info. Data types supported: Same as @p input1. - * - * @return a Status - */ - static Status validate(BinaryLogicalOperation op, const ITensorInfo *input1, - const ITensorInfo *input2, const ITensorInfo *output); - -protected: - // Inherited methods overridden: - static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, - const ITensorInfo &output); -}; -} // namespace arm_compute -#endif /* __ARM_COMPUTE_NEBINARYLOGICALOPERATIONKERNEL_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NECastBoolKernel.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NECastBoolKernel.h deleted file mode 100644 index 101f6ac8e..000000000 --- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NECastBoolKernel.h +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_NECASTBOOLKERNEL_H__ -#define __ARM_COMPUTE_NECASTBOOLKERNEL_H__ - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** - * @brief Class for the kernel converting boolean type - */ -class NECastBoolKernel : public INEKernel -{ -public: - const char *name() const override { return "NECastBoolKernel"; } - /** Default constructor*/ - NECastBoolKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NECastBoolKernel(const NECastBoolKernel &) = delete; - /** Default move constructor */ - NECastBoolKernel(NECastBoolKernel &&) = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NECastBoolKernel &operator=(const NECastBoolKernel &) = delete; - /** Default move assignment operator */ - NECastBoolKernel &operator=(NECastBoolKernel &&) = default; - /** Set the input and output of the kernel - * - * Valid conversions Input -> Output : - * - * - U8 -> U8, S8, U16, S16, U32, S32, F32, F16 - * - * @param[in] input The input tensor to convert. Data types supported: U8 - * @param[out] output The output tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. - */ - void configure(const ITensor *input, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref - * NECastBoolKernel - * - * @param[in] input Source tensor info. Data types supported: U8 - * @param[in] output Destination tensor info. Data type supported: U8/S8/U16/S16/U32/S32/F16/F32. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; - ITensor *_output; -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_NECASTBOOLKERNEL_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEEmbeddingLookupKernel.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEEmbeddingLookupKernel.h deleted file mode 100644 index 88f21c96e..000000000 --- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEEmbeddingLookupKernel.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_NEEMBEDDINGLOOKUPKERNEL_H__ -#define __ARM_COMPUTE_NEEMBEDDINGLOOKUPKERNEL_H__ - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel to perform EmbeddingLookup operation */ -class NEEmbeddingLookupKernel : public INEKernel -{ -public: - const char *name() const override { return "NEEmbeddingLookupKernel"; } - /** Default constructor */ - NEEmbeddingLookupKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers). */ - NEEmbeddingLookupKernel(const NEEmbeddingLookupKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers). */ - NEEmbeddingLookupKernel &operator=(const NEEmbeddingLookupKernel &) = delete; - /** Allow instances of this class to be moved */ - NEEmbeddingLookupKernel(NEEmbeddingLookupKernel &&) = default; - /** Allow instances of this class to be moved */ - NEEmbeddingLookupKernel &operator=(NEEmbeddingLookupKernel &&) = default; - /** Initialize the kernel's input, output. - * - * @param[in] input Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. - * @param[out] output Destination tensor. Data types supported: same as @p input. - * @param[in] lookups Lookups are 1D tensor that values are indices into the first dimension of - * input. - */ - void configure(const ITensor *input, ITensor *output, const ITensor *lookups); - /** Static function to check if given info will lead to a valid configuration of @ref - * NEEmbeddingLookupKernel - * - * @param[in] input Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. - * @param[in] output Destination tensor. Data types supported: same as @p input. - * @param[in] lookups Lookups info. Data types supported: S32. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, - const ITensorInfo *lookups); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; - const ITensor *_lookups; - ITensor *_output; -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_NEEMBEDDINGLOOKUPKERNEL_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEGatherKernelEx.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEGatherKernelEx.h deleted file mode 100644 index 5acfde5a8..000000000 --- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEGatherKernelEx.h +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_NEGATHERKERNELEX_H__ -#define __ARM_COMPUTE_NEGATHERKERNELEX_H__ - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ITensor; - -/** Kernel to perform other operation on NEON */ -class NEGatherKernelEx : public INEKernel -{ -public: - /** Default constructor. */ - NEGatherKernelEx(); - /** Prevent instances of this class from being copied (As this class contains pointers). */ - NEGatherKernelEx(const NEGatherKernelEx &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers). */ - NEGatherKernelEx &operator=(const NEGatherKernelEx &) = delete; - /** Allow instances of this class to be moved. */ - NEGatherKernelEx(NEGatherKernelEx &&) = default; - /** Allow instances of this class to be moved. */ - NEGatherKernelEx &operator=(NEGatherKernelEx &&) = default; - /** Default detructor */ - ~NEGatherKernelEx() = default; - - /** Name of the kernel - * - * @return Kernel name - */ - const char *name() const override { return "NEGatherKernelEx"; } - /** Initialise the kernel's inputs and outputs - * - * @param[in] input Source tensor. Supported tensor rank: up to 4. Data type supported: - * U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 - * @param[in] indices Indices tensor. Supported tensor rank: up to 3. Must be one of the - * following type: U32/S32. Each value Must be in range [0, input.shape[@p axis]) - * @param[out] output Destination tensor. Data type supported: Same as @p input - * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values - * wrap around. Defaults to 0 - */ - void configure(const ITensor *input, const ITensor *indices, ITensor *output, int axis = 0); - /** Static function to check if given info will lead to a valid configuration of @ref - * NEGatherKernelEx - * - * @param[in] input Source tensor info. Supported tensor rank: up to 4. Data type supported: - * U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 - * @param[in] indices Indices tensor info. Supported tensor rank: up to 3. Must be one of the - * following type: U32/S32. Each value Must be in range [0, input.shape[@p axis]) - * @param[in] output Destination tensor info. Data type supported: Same as @p input - * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values - * wrap around. Defaults to 0 - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *indices, - const ITensorInfo *output, int axis); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Implementation of the gather operation for 0 axis. - * - * For gather on the 0 axis an element by element copy is performed. - * - * @param[in] window Region on which to execute the kernel. (Must be a region of the window - * returned by window()) - * @param[in] info Info about executing thread and CPU. - */ - template <typename U> void gather_0_axis(const Window &window, const ThreadInfo &info); - - /** Implementation of the gather operation. - * - * For 1<=axis a row-wise copy is taking place. - * - * @param[in] window Region on which to execute the kernel. (Must be a region of the window - * returned by window()) - * @param[in] info Info about executing thread and CPU. - */ - template <typename U> void gather_n_axis(const Window &window, const ThreadInfo &info); - - using kernel_ptr = void (NEGatherKernelEx::*)(const Window &window, const ThreadInfo &info); - - const ITensor *_input; - const ITensor *_indices; - int _axis; - size_t _indices_rank; - ITensor *_output; - kernel_ptr _func; -}; -} // namespace arm_compute -#endif /* __ARM_COMPUTE_NEGATHERKERNELEX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEHashtableLookupKernel.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEHashtableLookupKernel.h deleted file mode 100644 index cb2a485d5..000000000 --- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEHashtableLookupKernel.h +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_NEHASHTABLELOOKUPKERNEL_H__ -#define __ARM_COMPUTE_NEHASHTABLELOOKUPKERNEL_H__ - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel to perform HashtableLookup operation */ -class NEHashtableLookupKernel : public INEKernel -{ -public: - const char *name() const override { return "NEHashtableLookupKernel"; } - /** Default constructor */ - NEHashtableLookupKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers). */ - NEHashtableLookupKernel(const NEHashtableLookupKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers). */ - NEHashtableLookupKernel &operator=(const NEHashtableLookupKernel &) = delete; - /** Allow instances of this class to be moved */ - NEHashtableLookupKernel(NEHashtableLookupKernel &&) = default; - /** Allow instances of this class to be moved */ - NEHashtableLookupKernel &operator=(NEHashtableLookupKernel &&) = default; - /** Initialize the kernel's inputs, outputs. - * - * @param[in] lookups Lookups 1D tensor that values are indices into the first dimension of - * input. Data types supported: S32 - * @param[in] keys Keys 1D tensor. keys and input pair represent a map. - * Data types supported: S32 - * @param[in] input Source tensor. - * Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 - * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p - * input. - * @param[out] hits Hits 1D tensor. A boolean tensor that indicates whether the lookup hits - * (True) or not (False). Data types supported: U8/QASYMM8 - * input. - */ - void configure(const ITensor *lookups, const ITensor *keys, const ITensor *input, ITensor *output, - ITensor *hits); - /** Static function to check if given info will lead to a valid configuration of @ref - * NEHashtableLookupKernel - * - * @param[in] lookups The lookups tensor info. Data types supported: S32. - * @param[in] keys The keys tensor info. keys and input pair represent a map. - * Data types supported: S32 - * @param[in] input The input tensor info. - * Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 - * @param[out] output The output tensor info. Data types and data layouts supported: Same as @p - * input. - * @param[out] hits The hits tensor info. A boolean tensor that indicates whether the lookup - * hits (True) or not (False). Data types supported: U8/QASYMM8 - * - * @return a status - */ - static Status validate(const ITensorInfo *lookups, const ITensorInfo *keys, - const ITensorInfo *input, const ITensorInfo *output, - const ITensorInfo *hits); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_lookups; /** Lookups tensor */ - const ITensor *_keys; /** Keys tensor */ - const ITensor *_input; /** Source tensor */ - ITensor *_output; /** Destination tensor */ - ITensor *_hits; /** Hits tensor */ -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_NEHASHTABLELOOKUPKERNEL_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.h deleted file mode 100644 index 8724cc69b..000000000 --- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.h +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYERKERNELEX_H__ -#define __ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYERKERNELEX_H__ - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for performing an instance normalization */ -class NEInstanceNormalizationLayerKernelEx : public INEKernel -{ -public: - const char *name() const override { return "NEInstanceNormalizationLayerKernelEx"; } - /** Default constructor */ - NEInstanceNormalizationLayerKernelEx(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEInstanceNormalizationLayerKernelEx(const NEInstanceNormalizationLayerKernelEx &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEInstanceNormalizationLayerKernelEx & - operator=(const NEInstanceNormalizationLayerKernelEx &) = delete; - /** Allow instances of this class to be moved */ - NEInstanceNormalizationLayerKernelEx(NEInstanceNormalizationLayerKernelEx &&) = default; - /** Allow instances of this class to be moved */ - NEInstanceNormalizationLayerKernelEx & - operator=(NEInstanceNormalizationLayerKernelEx &&) = default; - /** Default destructor */ - ~NEInstanceNormalizationLayerKernelEx() = default; - /** Set the input and output tensors. - * - * @param[in, out] input Source tensor. Data types supported: F16/F32. Data layout supported: - * NCHW - * In case of @p output tensor = nullptr this tensor will store the result - * of the normalization. - * @param[out] output Destination tensor. Data types and data layouts supported: same as @p - * input. - * @param[in] gamma (Optional) The scale scalar value applied to the normalized tensor. - * Defaults to 1.0 - * @param[in] beta (Optional) The offset scalar value applied to the normalized tensor. - * Defaults to 0.0 - * @param[in] epsilon (Optional) Lower bound value for the normalization. Defaults to 1e-12 - */ - void configure(ITensor *input, ITensor *output, ITensor *gamma = nullptr, ITensor *beta = nullptr, - float epsilon = 1e-12f); - - /** Static function to check if given info will lead to a valid configuration of @ref - * NEInstanceNormalizationLayer. - * - * @param[in] input Source tensor info. Data types supported: F16/F32. Data layout supported: - * NCHW - * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p - * input. - * @param[in] gamma (Optional) The scale scalar value applied to the normalized tensor. Defaults - * to 1.0 - * @param[in] beta (Optional) The offset scalar value applied to the normalized tensor. - * Defaults to 0.0 - * @param[in] epsilon (Optional) Lower bound value for the normalization. Defaults to 1e-12 - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, - const ITensorInfo *gamma = nullptr, const ITensorInfo *beta = nullptr, - float epsilon = 1e-12f); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Common signature for all the specialized instance normalization functions - * - * @param[in, out] input An input tensor. In case of @p output tensor = nullptr this tensor will - * store the result of the normalization. - * @param[out] output The output tensor. - * @param[in] gamma The scale scalar value applied to the normalized tensor. Defaults to - * 1.0 - * @param[in] beta The offset scalar value applied to the normalized tensor. Defaults to - * 0.0 - * @param[in] epsilon Lower bound value for the normalization. Defaults to 1e-12 - */ - using NormalizationFunction = void(ITensor *input, ITensor *output, ITensor *gamma, ITensor *beta, - float epsilon, const Window &window); - - NormalizationFunction *_func; - ITensor *_input; - ITensor *_output; - ITensor *_gamma; - ITensor *_beta; - float _epsilon; -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYERKERNELEX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEMuliplyScaleFactorKernel.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEMuliplyScaleFactorKernel.h deleted file mode 100644 index 198b0be9d..000000000 --- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEMuliplyScaleFactorKernel.h +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_NEMULTIPLYSCALEFACTORKERNEL_H__ -#define __ARM_COMPUTE_NEMULTIPLYSCALEFACTORKERNEL_H__ - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface to multiply scale factor kernel. */ -class NEMultiplyScaleFactorKernel : public INEKernel -{ -public: - const char *name() const override { return "NEMultiplyScaleFactorKernel"; } - /** Default constructor */ - NEMultiplyScaleFactorKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMultiplyScaleFactorKernel(const NEMultiplyScaleFactorKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMultiplyScaleFactorKernel &operator=(const NEMultiplyScaleFactorKernel &) = delete; - /** Default Move Constructor. */ - NEMultiplyScaleFactorKernel(NEMultiplyScaleFactorKernel &&) = default; - /** Default move assignment operator */ - NEMultiplyScaleFactorKernel &operator=(NEMultiplyScaleFactorKernel &&) = default; - /** Default destructor */ - ~NEMultiplyScaleFactorKernel() = default; - /** Set input, output tensors. - * - * @param[in/out] input Source tensor. Data type supported: S32. - * @param[in] scale_factor Scale tensor. Data type supported: F16/F32. - * @param[out] output Destination tensor. Data type supported: Same as @p scale_factor. - */ - void configure(const ITensor *input, const ITensor *scale_factor, ITensor *output, - float multiplier = 1.f); - /** Static function to check if given info will lead to a valid configuration of @ref - * NEMultiplyScaleFactorKernel - * - * @param[in] input Input tensor info. Data types supported: S32. - * @param[in] scale_factor Scale tensor. Data type supported: F16/F32. - * @param[in] output Output tensor info. Data types supported: Same as @p scale_factor. - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *scale_factor, - const ITensorInfo *output, float multiplier = 1.f); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - template <typename T> void multiply(const Window &window); - -private: - const ITensor *_input; - const ITensor *_scale_factor; - ITensor *_output; - float _multiplier; -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_NEMULTIPLYSCALEFACTORKERNEL_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEOneHotKernel.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEOneHotKernel.h deleted file mode 100644 index 99bb351bc..000000000 --- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEOneHotKernel.h +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_NEONEHOTKERNEL_H__ -#define __ARM_COMPUTE_NEONEHOTKERNEL_H__ -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" -namespace arm_compute -{ -// Forward declarations -class ITensor; -/** Kernel to perform other operation on NEON */ -class NEOneHotKernel : public INEKernel -{ -public: - /** Default constructor. */ - NEOneHotKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers). */ - NEOneHotKernel(const NEOneHotKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers). */ - NEOneHotKernel &operator=(const NEOneHotKernel &) = delete; - /** Allow instances of this class to be moved. */ - NEOneHotKernel(NEOneHotKernel &&) = default; - /** Allow instances of this class to be moved. */ - NEOneHotKernel &operator=(NEOneHotKernel &&) = default; - /** Default detructor */ - ~NEOneHotKernel() = default; - /** Name of the kernel - * - * @return Kernel name - */ - const char *name() const override { return "NEOneHotKernel"; } - /** Initialise the kernel's inputs and outputs - * - * @param[in] indices Indices tensor. Supported tensor rank: up to 3. Must be one of the - * following types: U32/S32 - * @param[in] depth The tensor for depth of the one hot dimension. Supported tensor rank: up to - * 3. Must be one of the following types: U32/S32 - * @param[in] on_value On value tensor. Supported tensor rank: only 1. Data type supported: - * U8/S8/U16/S16/F16/U32/S32/F32 - * @param[in] off_value Off value tensor. Supported tensor rank: only 1. Data type supported: Same - * as @p on_value - * @param[out] output Destination tensor. Data type supported: Same as @p on_value - * @param[in] axis (Optional) The axis to fill. Negative values wrap around. Defaults to -1. - * The value must be in range [-indices.rank , indices.rank) - */ - void configure(const ITensor *indices, const ITensor *depth, const ITensor *on_value, - const ITensor *off_value, ITensor *output, int axis = -1); - /** Static function to check if given info will lead to a valid configuration of @ref - * NEOneHotKernel - * - * @param[in] indices Indices tensor info. Supported tensor rank: up to 3. Must be one of the - * following types: U32/S32 - * @param[in] depth The tensor info for depth of the one hot dimension. Supported tensor rank: - * up to 3. Must be one of the following types: U32/S32 - * @param[in] on_value On value tensor info. Supported tensor rank: only 1. Data type supported: - * U8/S8/U16/S16/F16/U32/S32/F32 - * @param[in] off_value Off value tensor info. Supported tensor rank: only 1. Data type supported: - * Same as @p on_value - * @param[out] output Destination tensor info. Data type supported: Same as @p on_value - * @param[in] axis (Optional) The axis to fill. Negative values wrap around. Defaults to -1. - * The value must be in range [-indices.rank , indices.rank) - * - * @return a status - */ - static Status validate(const ITensorInfo *indices, const ITensorInfo *depth, - const ITensorInfo *on_value, const ITensorInfo *off_value, - const ITensorInfo *output, int axis = -1); - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Implementation of the onehot operation for 0 axis. - * - * For onehot on the 0 axis an element by element copy is performed. - * - * @param[in] window Region on which to execute the kernel. (Must be a region of the window - * returned by window()) - * @param[in] info Info about executing thread and CPU. - */ - template <typename U> void onehot_0_axis(const Window &window, const ThreadInfo &info); - /** Implementation of the onehot operation. - * - * For 1<=axis a row-wise copy is taking place. - * - * @param[in] window Region on which to execute the kernel. (Must be a region of the window - * returned by window()) - * @param[in] info Info about executing thread and CPU. - */ - template <typename U> void onehot_n_axis(const Window &window, const ThreadInfo &info); - using kernel_ptr = void (NEOneHotKernel::*)(const Window &window, const ThreadInfo &info); - const ITensor *_indices; - const ITensor *_depth; - const ITensor *_on_value; - const ITensor *_off_value; - int _axis; - ITensor *_output; - kernel_ptr _func; -}; -} // namespace arm_compute -#endif /* __ARM_COMPUTE_NEONEHOTKERNEL_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEQuantizationSymmetricKernel.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEQuantizationSymmetricKernel.h deleted file mode 100644 index 0b080cf73..000000000 --- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEQuantizationSymmetricKernel.h +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_NEQUANTIZATIONSYMMETRICKERNEL_H__ -#define __ARM_COMPUTE_NEQUANTIZATIONSYMMETRICKERNEL_H__ - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the dequantization layer kernel. */ -class NEQuantizationSymmetricKernel : public INEKernel -{ -public: - const char *name() const override { return "NEQuantizationSymmetricKernel"; } - /** Default constructor */ - NEQuantizationSymmetricKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEQuantizationSymmetricKernel(const NEQuantizationSymmetricKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEQuantizationSymmetricKernel &operator=(const NEQuantizationSymmetricKernel &) = delete; - /** Default Move Constructor. */ - NEQuantizationSymmetricKernel(NEQuantizationSymmetricKernel &&) = default; - /** Default move assignment operator */ - NEQuantizationSymmetricKernel &operator=(NEQuantizationSymmetricKernel &&) = default; - /** Default destructor */ - ~NEQuantizationSymmetricKernel() = default; - /** Set input, output tensors. - * - * @param[in] input Source tensor. Data type supported: F16/F32. - * @param[out] output Destination tensor with the same dimensions of input. Data type supported: - * S8. - * @param[out] scale_factor Scale tensor of @p output. Data type supported: Same as @p input. - */ - void configure(const ITensor *input, ITensor *output, ITensor *scale_factor); - /** Static function to check if given info will lead to a valid configuration of @ref - * NEQuantizationSymmetricKernel - * - * @param[in] input Input tensor info. Data types supported: F16/F32. - * @param[in] output Output tensor info. Data types supported: S8. - * @param[out] scale_factor Scale tensor of @p output. Data type supported: Same as @p input. - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, - const ITensorInfo *scale_factor); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - template <typename T> void quantize(const Window &window); - -private: - const ITensor *_input; - ITensor *_output; - ITensor *_scale_factor; -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_NEQUANTIZATIONSYMMETRICKERNEL_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEReductionOperationKernelEx.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEReductionOperationKernelEx.h deleted file mode 100644 index c9024fbb3..000000000 --- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEReductionOperationKernelEx.h +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_NEREDUCTIONOPERATIONKERNELEX_H__ -#define __ARM_COMPUTE_NEREDUCTIONOPERATIONKERNELEX_H__ - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/TypesEx.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel to perform a reduction operation */ -class NEReductionOperationKernelEx : public INEKernel -{ -public: - const char *name() const override { return "NEReductionOperationKernelEx"; } - /** Default constructor */ - NEReductionOperationKernelEx(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEReductionOperationKernelEx(const NEReductionOperationKernelEx &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEReductionOperationKernelEx &operator=(const NEReductionOperationKernelEx &) = delete; - /** Allow instances of this class to be moved */ - NEReductionOperationKernelEx(NEReductionOperationKernelEx &&) = default; - /** Allow instances of this class to be moved */ - NEReductionOperationKernelEx &operator=(NEReductionOperationKernelEx &&) = default; - /** Default destructor */ - ~NEReductionOperationKernelEx() = default; - - /** Set the source, destination of the kernel - * - * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. Data layouts supported: - * NCHW. - * @param[out] output Destination tensor.Data types and data layouts supported: same as @p input. - * Output will have the same number of dimensions as input. - * @param[in] axis Axis along which to reduce. Supported reduction axis : 0 - * @param[in] op Reduction operation to perform. - */ - void configure(const ITensor *input, ITensor *output, unsigned int axis, ReduceOperation op); - - /** Static function to check if given info will lead to a valid configuration of @ref - * NEReductionOperationKernelEx. - * - * @param[in] input Source tensor info. Data type supported: QASYMM8/F16/F32. Data layouts - * supported: NCHW. - * @param[in] output Destination tensor info.Data types and data layouts supported: same as @p - * input. - * Output will have the same number of dimensions as input. - * @param[in] axis Axis along which to reduce. Supported reduction axis : 0 - * @param[in] op Reduction operation to perform. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, - ReduceOperation op); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - const ITensor *_input; - ITensor *_output; - unsigned int _reduction_axis; - ReduceOperation _op; - BorderSize _border_size; -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_NEREDUCTIONOPERATIONKERNELEX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/TypesEx.h b/compute/ARMComputeEx/arm_compute/core/TypesEx.h deleted file mode 100644 index faba8a449..000000000 --- a/compute/ARMComputeEx/arm_compute/core/TypesEx.h +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_TYPESEX_H__ -#define __ARM_COMPUTE_TYPESEX_H__ - -namespace arm_compute -{ - -/** Available ArgIndex operations **/ -enum class ArgOperation -{ - MAX, - MIN, -}; - -/** Available reduce operations */ -enum class ReduceOperation -{ - MAX, /**< Max */ - MEAN, /**< Mean */ - SUM, /**< Sum */ - MIN, /**< Min */ -}; - -/** Available binary logical operations */ -enum class BinaryLogicalOperation -{ - AND, /**< AND */ - OR, /**< OR */ -}; - -enum class ComparisonOperationEx -{ - EQUAL, /**< EQUAL */ - NOT_EQUAL, /**< NOT_EQUAL */ -}; - -enum class ElementWiseUnaryEx -{ - NEG, /**< NEG */ -}; - -enum class SubDataType -{ - NONE, - BOOL, -}; - -} // namespace arm_compute -#endif /* __ARM_COMPUTE_TYPESEX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/UtilsEx.h b/compute/ARMComputeEx/arm_compute/core/UtilsEx.h deleted file mode 100644 index d57e8fcf5..000000000 --- a/compute/ARMComputeEx/arm_compute/core/UtilsEx.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_UTILSEX_H__ -#define __ARM_COMPUTE_UTILSEX_H__ - -#include <utility> - -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ - -/** Returns expected width and height of the transpose convolution's output tensor. - * - * @note This function was copied in order to fix a bug computing to wrong output dimensions. - * - * @param[in] in_width Width of input tensor (Number of columns) - * @param[in] in_height Height of input tensor (Number of rows) - * @param[in] kernel_width Kernel width. - * @param[in] kernel_height Kernel height. - * @param[in] info padding and stride info. - * @param[in] invalid_right The number of zeros added to right edge of the output. - * @param[in] invalid_top The number of zeros added to bottom edge of the output. - * - * @return A pair with the new width in the first position and the new height in the second. - */ -const std::pair<unsigned int, unsigned int> -transposeconv_output_dimensions(unsigned int in_width, unsigned int in_height, - unsigned int kernel_width, unsigned int kernel_height, - const PadStrideInfo &info, unsigned int invalid_right, - unsigned int invalid_top); -} -#endif /*__ARM_COMPUTE_UTILSEX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/utils/misc/ShapeCalculatorEx.h b/compute/ARMComputeEx/arm_compute/core/utils/misc/ShapeCalculatorEx.h deleted file mode 100644 index 1e69f0912..000000000 --- a/compute/ARMComputeEx/arm_compute/core/utils/misc/ShapeCalculatorEx.h +++ /dev/null @@ -1,275 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_MISC_SHAPE_CALCULATOR_EX_H__ -#define __ARM_COMPUTE_MISC_SHAPE_CALCULATOR_EX_H__ - -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/ITensorInfo.h" -#include "arm_compute/core/Utils.h" - -#include "arm_compute/core/utils/helpers/tensor_transform.h" - -#include <cmath> - -namespace arm_compute -{ -namespace misc -{ -namespace shape_calculator -{ - -/** Calculate the upsampled output shape used for transpose convolution - * - * @param[in] input Input tensor info - * @param[in] weights Weights tensor shape - * @param[in] info Padding and stride info - * @param[in] out_dims Output shape dimensions - * @param[in] invalid_right The number of zeros added to right edge of the output. - * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. - * @param[out] pad_left Padding on left - * @param[out] pad_right Padding on right - * @param[out] pad_top Padding on top - * @param[out] pad_bottom Padding on bottom - * - * @return the calculated shape - */ -inline TensorShape compute_transposeconv_upsampled_shape( - const ITensorInfo &input, const ITensorInfo &weights, const PadStrideInfo &info, - std::pair<unsigned int, unsigned int> &out_dims, unsigned int invalid_right, - unsigned int invalid_bottom, unsigned int &pad_left, unsigned int &pad_right, - unsigned int &pad_top, unsigned int &pad_bottom) -{ - unsigned int sx = info.stride().first; - unsigned int sy = info.stride().second; - const DataLayout data_layout = input.data_layout(); - const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); - - // Find the upsampled dimensions - // transpose conv out: - // tconv_out + pad = 1 + (in - 1) * stride + invalid - // tconv_out = 1 + (in - 1) * stride + invalid - pad - // upsample out: - // upsample_out = 1 + (in - 1) * stride - unsigned int out_x = (input.dimension(idx_w) - 1) * sx + 1; - unsigned int out_y = (input.dimension(idx_h) - 1) * sy + 1; - - // Find the padding needed for the convolution with stride 1 in order to match output shape - // upsample+pad out: - // upsample_out + pad = tconv_out + kernel - 1 - // pad = tconv_out + kernel - 1 - upsample_out - unsigned int padx = out_dims.first - (out_x - weights.dimension(idx_w) + 1); - unsigned int pady = out_dims.second - (out_y - weights.dimension(idx_h) + 1); - out_x += padx; - out_y += pady; - - unsigned int padx_all_except_invallid = padx + info.pad_left() + info.pad_right() - invalid_right; - unsigned int pady_all_except_invallid = - pady + info.pad_top() + info.pad_bottom() - invalid_bottom; - pad_left = (padx_all_except_invallid + 1) / 2 - info.pad_left(); - pad_right = pady_all_except_invallid / 2 - info.pad_right() + invalid_right; - pad_top = (padx_all_except_invallid + 1) / 2 - info.pad_top(); - pad_bottom = pady_all_except_invallid / 2 - info.pad_bottom() + invalid_bottom; - - TensorShape scale_out_shape(input.tensor_shape()); - scale_out_shape.set(idx_w, out_x); - scale_out_shape.set(idx_h, out_y); - - return scale_out_shape; -} - -/** Calculate the output shape of the transpose convolution layer - * - * @param[in] out_dims Output x and y shape dimensions - * @param[in] input Input tensor info - * @param[in] weights Weights tensor shape - * - * @return the calculated shape - */ -inline TensorShape -compute_transposeconv_output_shape(const std::pair<unsigned int, unsigned int> &out_dims, - const ITensorInfo &input, const ITensorInfo &weights) -{ - const TensorShape input_shape{input.tensor_shape()}; - const TensorShape weights_shape{weights.tensor_shape()}; - - const DataLayout data_layout = input.data_layout(); - const int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - const int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); - const int channel_idx = - get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); - const int batch_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES); - - TensorShape out_shape{input_shape}; - out_shape.set(width_idx, out_dims.first); - out_shape.set(height_idx, out_dims.second); - out_shape.set(channel_idx, weights_shape[batch_idx]); - return out_shape; -} - -/** Calculate the depth to space output shape of a tensor - * - * @param[in] input Input tensor info - * @param[in] block Block shape value - * - * @return the calculated shape - */ -inline TensorShape compute_depth_to_space_shape_ex(const ITensorInfo *input, int block) -{ - ARM_COMPUTE_ERROR_ON(block < 2); - - const DataLayout data_layout = input->data_layout(); - const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); - const int idx_channel = - get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); - - TensorShape output_shape{input->tensor_shape()}; - output_shape.set(idx_width, input->dimension(idx_width) * block); - output_shape.set(idx_height, input->dimension(idx_height) * block); - output_shape.set(idx_channel, input->dimension(idx_channel) / (block * block)); - - return output_shape; -} - -/** Calculate the space to batch output shape of a tensor - * - * @param[in] input Input tensor info - * @param[in] block_shape Block shape value - * - * @return the calculated shape - */ -inline TensorShape compute_space_to_depth_shape_ex(const ITensorInfo *input, int32_t block_shape) -{ - ARM_COMPUTE_ERROR_ON(block_shape < 2); - TensorShape output_shape{input->tensor_shape()}; - - const DataLayout data_layout = input->data_layout(); - const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); - const int idx_depth = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); - - output_shape.set(idx_width, input->tensor_shape()[idx_width] * block_shape); - output_shape.set(idx_height, input->tensor_shape()[idx_height] * block_shape); - output_shape.set(idx_depth, input->tensor_shape()[idx_depth] / (block_shape * block_shape)); - - return output_shape; -} - -/** Calculate the gather output shape of a tensor - * - * @param[in] input_shape Input tensor shape - * @param[in] indices_shape Indices tensor shape - * @param[in] actual_axis The axis to be gathered - * - * @return the calculated shape - */ -inline TensorShape compute_gather_shape_ex(const TensorShape &input_shape, - const TensorShape &indices_shape, uint32_t actual_axis) -{ - ARM_COMPUTE_ERROR_ON(indices_shape.num_dimensions() > 3); - ARM_COMPUTE_ERROR_ON(input_shape.num_dimensions() > 4); - ARM_COMPUTE_ERROR_ON(input_shape.num_dimensions() + indices_shape.num_dimensions() - 1 > 4); - ARM_COMPUTE_ERROR_ON(actual_axis >= input_shape.num_dimensions()); - - TensorShape output_shape = input_shape; - if (indices_shape.num_dimensions() == 1) - { - output_shape[actual_axis] = indices_shape[0]; - } - else if (indices_shape.num_dimensions() > 1) - { - output_shape.shift_right(indices_shape.num_dimensions() - 1); - - for (uint32_t i = 0, o = 0; o < output_shape.num_dimensions(); ++o, ++i) - { - if (o == actual_axis) - { - ++i; - for (uint32_t in = 0; in < indices_shape.num_dimensions(); ++in, ++o) - { - output_shape[o] = indices_shape[in]; - } - } - else - { - output_shape[o] = input_shape[i]; - } - } - } - return output_shape; -} - -/** Calculate the gather output shape of a tensor - * - * @param[in] input_shape Input tensor shape - * @param[in] indices_shape Indices tensor shape - * @param[in] actual_axis The axis to be gathered - * - * @return the calculated shape - */ -inline TensorShape compute_onehot_shape_ex(const TensorShape &indices_shape, uint32_t depth, - uint32_t actual_axis) -{ - ARM_COMPUTE_ERROR_ON(indices_shape.num_dimensions() > 3); - ARM_COMPUTE_ERROR_ON(actual_axis > indices_shape.num_dimensions()); - - TensorShape output_shape; - output_shape.set(actual_axis, depth); - - unsigned int i_shift = 0; - for (unsigned int i = 0; i < indices_shape.num_dimensions(); ++i) - { - if (i == actual_axis) - { - i_shift++; - } - output_shape.set(i + i_shift, indices_shape[i]); - } - - return output_shape; -} - -} // namespace shape_calculator -} // namespace misc -} // namespace arm_compute - -#endif // __ARM_COMPUTE_MISC_SHAPE_CALCULATOR_EX_H__ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h deleted file mode 100644 index 484ebfd0b..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __ARM_COMPUTE_CLFUNCTIONSEX_H__ -#define __ARM_COMPUTE_CLFUNCTIONSEX_H__ - -#include <arm_compute/runtime/CL/functions/CLArgMinMaxLayerEx.h> -#include <arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h> -#include <arm_compute/runtime/CL/functions/CLCastBool.h> -#include <arm_compute/runtime/CL/functions/CLEmbeddingLookup.h> -#include <arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h> -#include <arm_compute/runtime/CL/functions/CLGatherEx.h> -#include <arm_compute/runtime/CL/functions/CLHashtableLookup.h> -#include <arm_compute/runtime/CL/functions/CLInstanceNormalizationLayerEx.h> -#include <arm_compute/runtime/CL/functions/CLNeg.h> -#include <arm_compute/runtime/CL/functions/CLOneHot.h> -#include <arm_compute/runtime/CL/functions/CLReduceOperation.h> -#include <arm_compute/runtime/CL/functions/CLSplitVEx.h> -#include <arm_compute/runtime/CL/functions/CLTopKV2.h> -#include <arm_compute/runtime/CL/functions/CLTransposeConvLayer.h> - -#endif // __ARM_COMPUTE_CLFUNCTIONSEX_H__ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgMinMaxLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgMinMaxLayerEx.h deleted file mode 100644 index b1ee52bf9..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgMinMaxLayerEx.h +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_CLARGMINMAXLAYEREX_H__ -#define __ARM_COMPUTE_CLARGMINMAXLAYEREX_H__ - -#include "arm_compute/core/CL/kernels/CLArgMinMaxLayerKernelEx.h" -#include "arm_compute/core/CL/kernels/CLReshapeLayerKernel.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/CLTensor.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/MemoryGroup.h" - -namespace arm_compute -{ -class ITensorInfo; -class ICLTensor; - -/** Function to calculate the index of the minimum or maximum values in a - * tensor based on an axis. - * - * @note The default data type for an uninitialized output tensor is - * signed 32-bit integer (S32). It is the user's responsibility to check - * that the results do not overflow because the indices are computed - * in unsigned 32-bit (U32). - */ -class CLArgMinMaxLayerEx : public IFunction -{ -public: - /** Default Constructor. - * - * @param[in] memory_manager (Optional) Memory manager. - */ - CLArgMinMaxLayerEx(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - /** Set the input and output tensors. - * - * @param[in] input Input source tensor. Data types supported: QASYMM8/F16/F32. - * @param[in] axis Axis to find max/min index. - * @param[out] output Output source tensor. Data types supported: U32/S32. - * @param[in] op Reduction operation to perform. Operations supported: ARG_IDX_MAX, - * ARG_IDX_MIN - */ - void configure(const ICLTensor *input, int axis, ICLTensor *output, const ReductionOperation &op); - /** Static function to check if given info will lead to a valid configuration of @ref - * CLArgMinMaxLayerEx - * - * @param[in] input Input source tensor info. Data types supported: QASYMM8/F16/F32. - * @param[in] axis Axis to find max/min index. - * @param[in] output Output source tensor info. Data types supported: U32/S32. - * @param[in] op Reduction operation to perform. Operations supported: ARG_IDX_MAX, - * ARG_IDX_MIN - * - * @return a status - */ - static Status validate(const ITensorInfo *input, int axis, const ITensorInfo *output, - const ReductionOperation &op); - - // Inherited methods overridden: - void run() override; - -private: - MemoryGroup _memory_group; - std::vector<CLTensor> _results_vector; - CLTensor _not_reshaped_output; - std::vector<CLArgMinMaxLayerKernelEx> _reduction_kernels_vector; - CLReshapeLayerKernel _reshape_kernel; - unsigned int _num_of_stages; - unsigned int _reduction_axis; -}; -} // namespace arm_compute -#endif /* __ARM_COMPUTE_CLARGMINMAXLAYEREX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h deleted file mode 100644 index 88a9b00ec..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_CLBINARYLOGICALOP_H__ -#define __ARM_COMPUTE_CLBINARYLOGICALOP_H__ - -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" -#include "arm_compute/core/TypesEx.h" - -namespace arm_compute -{ -class ICLTensor; - -class CLBinaryLogicalOp : public ICLSimpleFunction -{ -public: - /** Initialise the function's source and destination. - * - * @param[in] input1 Source tensor1. Data types supported: U8, QASYMM8. - * @param[in] input2 Source tensor2. Data types supported: U8 QASYMM8. - * @param[out] output Output tensor. Data types supported: U8, QASYMM8. - */ - void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, - BinaryLogicalOperation op); -}; - -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLBINARYLOGICALOP_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLCastBool.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLCastBool.h deleted file mode 100644 index d6150684a..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLCastBool.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/** - * @file CLCastBool.h - * @ingroup COM_AI_RUNTIME - * @brief This file contains arm_compute::CLCastBool class - */ - -#ifndef ARM_COMPUTE_CLCASTBOOL_H -#define ARM_COMPUTE_CLCASTBOOL_H - -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - -namespace arm_compute -{ -class ICLTensor; - -/** - * @brief Class to run @ref CLCastBoolKernel. - * This converts the boolean input tensor to the output tensor's type. - */ -class CLCastBool : public ICLSimpleFunction -{ -public: - /** - * @brief Initialise the kernel's input and output - * @param[in] input Input tensor. Data types supported: U8 - * @param[out] output Output tensor. Data types supported: U8/S8/U16/S16/U32/F16/F32. - */ - void configure(ICLTensor *input, ICLTensor *output); -}; -} -#endif /* ARM_COMPUTE_CLCASTBOOL_H */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h deleted file mode 100644 index 409eaf593..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h +++ /dev/null @@ -1,201 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2019-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_CLDIRECTTRANSPOSECONVLAYER_H__ -#define __ARM_COMPUTE_CLDIRECTTRANSPOSECONVLAYER_H__ - -#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h" -#include "arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h" -#include "arm_compute/runtime/CL/functions/CLReverse.h" -#include "arm_compute/runtime/CL/functions/CLTranspose.h" - -#include "arm_compute/runtime/CL/CLTensor.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/MemoryGroup.h" - -#include <memory> - -namespace arm_compute -{ -class ICLTensor; -/** Function to run the deconvolution layer. - * - * Deconvolution Layer is the backward pass of Convolution Layer. First we transform the input - * depending on the stride and pad info and then perform a 1x1 - * convolution pass. Input stride defines how many zeroes we should put between each element of the - * input and pad is the amount of padding. - * - * The relation between input to output is as follows: - * \f[ - * width\_output = (width\_input - 1) \cdot stride\_x - 2 \cdot padding\_x + kernel\_x - * \f] - * \f[ - * height\_output = (height\_input - 1) \cdot stride\_y - 2 \cdot padding\_y + kernel\_y - * \f] - * - * where: - * width_input is the size of the first input dimension. - * height_input is the size of the second input dimension. - * width_output is the size of the first output dimension. - * height_output is the size of the second output dimension. - * kernel_x and kernel_y are the convolution sizes in x and y. - * stride_x and stride_y is the input stride of the first and second dimension. - * - * The weights used by Deconvolution are supposed to be the same as the ones used for Convolution. - * Therefore, it will be necessary to use the weights in the - * reverse order to perform an actual convolution. This is achieved by using @ref CLReverse. - * - * This function calls the following OpenCL kernels/functions: - * - * -# @ref CLDeconvolutionLayerUpsample - * -# @ref CLConvolutionLayer - * - * And the following CPP kernels: - * -# @ref CLReverse - * - */ -class CLDirectTransposeConvLayer : public IFunction -{ -public: - /** Constructor */ - CLDirectTransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDirectTransposeConvLayer(const CLDirectTransposeConvLayer &) = delete; - /** Default move constructor */ - CLDirectTransposeConvLayer(CLDirectTransposeConvLayer &&) = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDirectTransposeConvLayer &operator=(const CLDirectTransposeConvLayer &) = delete; - /** Default move assignment operator */ - CLDirectTransposeConvLayer &operator=(CLDirectTransposeConvLayer &&) = default; - /** Set the input, weights, biases and output tensors. - * - * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an - * optional 4th dimension for batch of inputs. - * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. - * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type - * supported: Same as @p input. - * @param[in] bias (Optional) The biases have one dimension. - * Data type supported: Should match @p input data type, except for - * input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type - * @param[out] output Output tensor. The output has the same number of dimensions as the - * @p input. - * @param[in] info Contains padding and policies to be used in the deconvolution, this - * is decribed in @ref PadStrideInfo. - * @param[in] invalid_right The number of zeros added to right edge of the output. - * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. - * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer, - * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel. - * - */ - void configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, - const PadStrideInfo &info, unsigned int invalid_right, unsigned int invalid_bottom, - const WeightsInfo &weights_info = WeightsInfo()); - /** Set the input, weights, biases and output tensors. - * - * @param[in] compile_context The compile context to be used. - * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and - * an optional 4th dimension for batch of inputs. - * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. - * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data - * type supported: Same as @p input. - * @param[in] bias (Optional) The biases have one dimension. - * Data type supported: Should match @p input data type, except for - * input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type - * @param[out] output Output tensor. The output has the same number of dimensions as - * the @p input. - * @param[in] info Contains padding and policies to be used in the deconvolution, - * this is decribed in @ref PadStrideInfo. - * @param[in] invalid_right The number of zeros added to right edge of the output. - * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. - * @param[in] weights_info (Optional) Weights information needed for @ref - * CLConvolutionLayer, specifies if the weights tensor has been reshaped with @ref - * CLWeightsReshapeKernel. - * - */ - void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *weights, - const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &info, - unsigned int invalid_right, unsigned int invalid_bottom, - const WeightsInfo &weights_info = WeightsInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref - * CLDirectTransposeConvLayer - * - * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an - * optional 4th dimension for batch of inputs. - * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. - * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data - * type supported: Same as @p input. - * @param[in] bias (Optional) The biases have one dimension. - * Data type supported: Should match @p input data type, except for input - * of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type - * @param[in] output Output tensor info. The output has the same number of dimensions as the - * @p input. - * @param[in] info Contains padding and policies to be used in the deconvolution, this is - * decribed in @ref PadStrideInfo. - * @param[in] invalid_right The number of zeros added to right edge of the output. - * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. - * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer, - * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, - const ITensorInfo *bias, ITensorInfo *output, const PadStrideInfo &info, - unsigned int invalid_right, unsigned int invalid_bottom, - const WeightsInfo &weights_info = WeightsInfo()); - - // Inherited methods overridden: - void run() override; - void prepare() override; - -private: - MemoryGroup _memory_group; - CLDeconvolutionLayerUpsample _scale_f; - CLConvolutionLayer _conv_f; - CLReverse _flip_weights; - - CLTensor _scaled_output; - ICLTensor *_original_weights; - CLTensor _weights_flipped; - CLTensor _flip_axis; - - bool _is_prepared; -}; -} // namespace arm_compute -#endif /* __ARM_COMPUTE_CLDIRECTTRANSPOSECONVLAYER_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h deleted file mode 100644 index fbee7e40e..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/** - * @file CLEmbeddingLookup.h - * @ingroup COM_AI_RUNTIME - * @brief This file contains arm_compute::CLEmbeddingLookup class - */ - -#ifndef __ARM_COMPUTE_CLEMBEDDINGLOOKUP_H__ -#define __ARM_COMPUTE_CLEMBEDDINGLOOKUP_H__ - -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - -#include <vector> - -namespace arm_compute -{ -class ICLTensor; - -/** - * @brief Class to perform EmbeddingLookup operation - */ -class CLEmbeddingLookup : public ICLSimpleFunction -{ -public: - /** - * @brief Set the input and output tensors. - * @param[in] input Source tensor. - * Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 - * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p - * input. - * @param[in] lookups Lookups 1D tensor that values are indices into the first dimension of - * input. - * @return N/A - */ - void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *lookups); -}; -} -#endif /*__ARM_COMPUTE_CLEMBEDDINGLOOKUP_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h deleted file mode 100644 index f3266f688..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h +++ /dev/null @@ -1,186 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_CLFULLYCONNECTEDHYBRIDLAYER_H__ -#define __ARM_COMPUTE_CLFULLYCONNECTEDHYBRIDLAYER_H__ - -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - -#include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h" -#include "arm_compute/core/CL/kernels/CLMultiplyScaleFactorKernel.h" -#include "arm_compute/core/CL/kernels/CLQuantizationSymmetricKernel.h" -#include "arm_compute/core/CL/kernels/CLScaleFactorSymm8Kernel.h" -#include "arm_compute/core/CL/kernels/CLTransposeKernel.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/CL/CLTensor.h" -#include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h" - -namespace arm_compute -{ -/** Basic function to reshape the weights of Fully Connected layer with OpenCL. This function calls - * the following kernels: - * - * -# @ref CLTransposeKernel - * - * @note The fully connected layer accepts "weights" tensors only with 2 dimensions. - */ -class CLFullyConnectedHybridLayerReshapeWeights : public ICLSimpleFunction -{ -public: - /** Set the input and output tensors. - * - * @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported: - * S8. - * @param[out] output Destination tensor which stores the transposed input tensor. Data type - * supported: Same as @p input. - */ - void configure(const ICLTensor *input, ICLTensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref - * CLFullyConnectedHybridLayerReshapeWeights - * - * @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported: - * S8. - * @param[in] output Destination tensor which stores the transposed input tensor. Data type - * supported: Same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); -}; - -/** Basic function to compute a Fully Connected layer on OpenCL. This function calls the following - * OpenCL kernels: - * - * -# @ref CLIm2ColKernel (called when the input comes from a convolutional layer) - * -# @ref CLFullyConnectedHybridLayerReshapeWeights (if @p are_weights_reshaped is set to false - * and transpose_weights is set to true ) (called once) - * -# @ref CLGEMMLowpMatrixMultiplyCore (if quantized symmetric) - * -# @ref CLGEMMMatrixAccumulateBiasesKernel (if @p biases is not equal to nullptr) - * - * @note The fully connected layer accepts "weights" tensors only with 2 dimensions. - */ -class CLFullyConnectedHybridLayer : public IFunction -{ -public: - /** Constructor */ - CLFullyConnectedHybridLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLFullyConnectedHybridLayer(const CLFullyConnectedHybridLayer &) = delete; - /** Default move constructor */ - CLFullyConnectedHybridLayer(CLFullyConnectedHybridLayer &&) = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLFullyConnectedHybridLayer &operator=(const CLFullyConnectedHybridLayer &) = delete; - /** Default move assignment operator */ - CLFullyConnectedHybridLayer &operator=(CLFullyConnectedHybridLayer &&) = default; - /** Set the input and output tensors. - * - * @param[in] input Source tensor. Data type supported: F16/F32. - * @param[in] weights Weights tensor. The weights must be 2 dimensional. - * If this function is called after a Convolution Layer, the (transposed) - * weights will have as many rows as the product of the first 3 input's dimensions. - * If it is called after another FullyConnected Layer, the (transposed) - * weights will have as many rows as the input's first dimension. - * Data type supported: S8. - * @param[in] biases Bias tensor. Can be nullptr. Data type supported:Same as @p input. - * @param[out] output Destination tensor. Its shape should be equal to the output of a matrix - * multiplication between: - * - The output of im2col on the input and the (transposed) 2D weights, if the - * function is called after a Convolution Layer - * - The input tensor and the (transposed) 2D weights, if the function is - * called after another FullyConnected Layer. - * Data type supported: Same as @p input. - * @param[in] fc_info (Optional) Fully connected layer additional info - */ - void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, - ICLTensor *output, FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref - * CLFullyConnectedHybridLayer - * - * @param[in] input Source tensor info. Data type supported: F16/F32. - * @param[in] weights Weights tensor info. The weights must be 2 dimensional. - * If this function is called after a Convolution Layer, the (transposed) - * weights will have as many rows as the product of the first 3 input's dimensions. - * If it is called after another FullyConnected Layer, the (transposed) - * weights will have as many rows as the input's first dimension. - * Data type supported: S8. - * @param[in] biases Bias tensor info. Can be nullptr. Data type supported:Same as @p input. - * @param[out] output Destination tensor info. Its shape should be equal to the output of a - * matrix multiplication between: - * - The output of im2col on the input and the (transposed) 2D weights, if the - * function is called after a Convolution Layer - * - The input tensor and the (transposed) 2D weights, if the function is - * called after another FullyConnected Layer. - * Data type supported: Same as @p input. - * @param[in] fc_info (Optional) Fully connected layer additional info - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, - const ITensorInfo *biases, const ITensorInfo *output, - FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo()); - - // Inherited methods override - void run() override; - void prepare() override; - -private: - void configure_mm(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output, - bool retain_internal_weights); - - MemoryGroup _memory_group; - CLFullyConnectedHybridLayerReshapeWeights _reshape_weights_kernel; - CLScaleFactorSymm8Kernel _scale_factor_kernel; - CLQuantizationSymmetricKernel _quant_input_kernel; - CLGEMMLowpMatrixMultiplyCore _mm_gemmlowp; - CLMultiplyScaleFactorKernel _multiply_scale_kernel; - CLGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel; // TODO(COMPMID-1889): Use CLGEMM to - // add bias in - // CLFullyConnectedHybridLayer - CLTensor _reshape_weights_output; - CLTensor _quantized_input; - CLTensor _scale_factor; - CLTensor _gemmlowp_output; - bool _are_weights_reshaped; - bool _accumulate_biases; - bool _is_prepared; - const ICLTensor *_original_weights; -}; -} -#endif /* __ARM_COMPUTE_CLFULLYCONNECTEDHYBRIDLAYER_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h deleted file mode 100644 index e65a646dc..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h +++ /dev/null @@ -1,235 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_CLFULLYCONNECTEDLAYEREX_H__ -#define __ARM_COMPUTE_CLFULLYCONNECTEDLAYEREX_H__ - -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - -#include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h" -#include "arm_compute/core/CL/kernels/CLTransposeKernel.h" -#include "arm_compute/runtime/CL/CLTensor.h" -#include "arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h" -#include "arm_compute/runtime/CL/functions/CLFlattenLayer.h" -#include "arm_compute/runtime/CL/functions/CLGEMM.h" -#include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h" -#include "arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h" -#include "arm_compute/runtime/IWeightsManager.h" -#include "arm_compute/runtime/MemoryGroup.h" - -namespace arm_compute -{ -/** Basic function to reshape the weights of Fully Connected layer with OpenCL. This function calls - * the following kernels: - * - * -# @ref CLTransposeKernel - * - * @note The fully connected layer accepts "weights" tensors only with 2 dimensions. - */ -class CLFullyConnectedLayerReshapeWeightsEx : public ICLSimpleFunction -{ -public: - /** Set the input and output tensors. - * - * @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported: - * QASYMM8/F16/F32. - * @param[out] output Destination tensor which stores the transposed input tensor. Data type - * supported: Same as @p input. - */ - void configure(const ICLTensor *input, ICLTensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref - * CLFullyConnectedLayerReshapeWeightsEx - * - * @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported: - * QASYMM8/F16/F32. - * @param[in] output Destination tensor which stores the transposed input tensor. Data type - * supported: Same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); -}; - -namespace weights_transformations -{ -/** Basic function to manage the reshape weights generated from @ref - * CLFullyConnectedLayerReshapeWeightsEx */ -class CLFullyConnectedLayerReshapeWeightsExManaged : public ITransformWeights -{ -public: - // Inherited method override - void run() override - { - _output.allocator()->allocate(); - _func.run(); - _reshape_run = true; - } - - // Inherited method override - void release() override { _output.allocator()->free(); } - - // Inherited method override - ICLTensor *get_weights() override { return &_output; } - - // Inherited method override - uint32_t uid() override { return _uid; } - - /** Configures the @ref CLFullyConnectedLayerReshapeWeightsEx function - * - * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. - */ - void configure(const ICLTensor *input) { _func.configure(input, &_output); } - -private: - static constexpr uint32_t _uid = 0x0; - CLTensor _output{}; - CLFullyConnectedLayerReshapeWeightsEx _func{}; -}; -} // namespace weights_transformations - -/** Basic function to compute a Fully Connected layer on OpenCL. This function calls the following - * OpenCL kernels: - * - * -# @ref CLIm2ColKernel (called when the input comes from a convolutional layer) - * -# @ref CLFullyConnectedLayerReshapeWeightsEx (if @p are_weights_reshaped is set to false and - * transpose_weights is set to true ) (called once) - * -# @ref CLGEMMMatrixMultiplyKernel or @ref CLGEMMLowpMatrixMultiplyCore (if quantized - * asymmetric) - * -# @ref CLGEMMMatrixAccumulateBiasesKernel or @ref - * CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint (if quantized asymmetric) (if @p biases is - * not equal to nullptr) - * - * @note The fully connected layer accepts "weights" tensors only with 2 dimensions. - */ -class CLFullyConnectedLayerEx : public IFunction -{ -public: - /** Constructor */ - CLFullyConnectedLayerEx(std::shared_ptr<IMemoryManager> memory_manager = nullptr, - IWeightsManager *weights_manager = nullptr); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLFullyConnectedLayerEx(const CLFullyConnectedLayerEx &) = delete; - /** Default move constructor */ - CLFullyConnectedLayerEx(CLFullyConnectedLayerEx &&) = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLFullyConnectedLayerEx &operator=(const CLFullyConnectedLayerEx &) = delete; - /** Default move assignment operator */ - CLFullyConnectedLayerEx &operator=(CLFullyConnectedLayerEx &&) = default; - /** Set the input and output tensors. - * - * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. - * @param[in] weights Weights tensor. The weights must be 2 dimensional. - * If this function is called after a Convolution Layer, the (transposed) - * weights will have as many rows as the product of the first 3 input's dimensions. - * If it is called after another FullyConnected Layer, the (transposed) - * weights will have as many rows as the input's first dimension. - * Data type supported: Same as @p input. - * @param[in] biases Bias tensor. Can be nullptr. Data type supported:Same as @p input. - * @param[out] output Destination tensor. Its shape should be equal to the output of a matrix - * multiplication between: - * - The output of im2col on the input and the (transposed) 2D weights, if the - * function is called after a Convolution Layer - * - The input tensor and the (transposed) 2D weights, if the function is - * called after another FullyConnected Layer. - * Data type supported: Same as @p input. - * @param[in] fc_info (Optional) Fully connected layer additional info - */ - void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, - ICLTensor *output, FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref - * CLFullyConnectedLayerEx - * - * @param[in] input Source tensor info. Data type supported: QASYMM8/F16/F32. - * @param[in] weights Weights tensor info. The weights must be 2 dimensional. - * If this function is called after a Convolution Layer, the (transposed) - * weights will have as many rows as the product of the first 3 input's dimensions. - * If it is called after another FullyConnected Layer, the (transposed) - * weights will have as many rows as the input's first dimension. - * Data type supported: Same as @p input. - * @param[in] biases Bias tensor info. Can be nullptr. Data type supported:Same as @p input. - * @param[out] output Destination tensor info. Its shape should be equal to the output of a - * matrix multiplication between: - * - The output of im2col on the input and the (transposed) 2D weights, if the - * function is called after a Convolution Layer - * - The input tensor and the (transposed) 2D weights, if the function is - * called after another FullyConnected Layer. - * Data type supported: Same as @p input. - * @param[in] fc_info (Optional) Fully connected layer additional info - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, - const ITensorInfo *biases, const ITensorInfo *output, - FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo()); - - // Inherited methods override - void run() override; - void prepare() override; - -private: - void configure_fc_fc(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, - ICLTensor *output, const FullyConnectedLayerInfo &fc_info); - void configure_conv_fc(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, - ICLTensor *output, const FullyConnectedLayerInfo &fc_info); - void configure_mm(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, - ICLTensor *output, const FullyConnectedLayerInfo &fc_info); - - MemoryGroup _memory_group; - IWeightsManager *_weights_manager; - CLConvertFullyConnectedWeights _convert_weights; - weights_transformations::CLConvertFullyConnectedWeightsManaged _convert_weights_managed; - weights_transformations::CLFullyConnectedLayerReshapeWeightsExManaged - _reshape_weights_managed_function; - CLFlattenLayer _flatten_layer; - CLFullyConnectedLayerReshapeWeightsEx _reshape_weights_function; - CLGEMM _mm_gemm; - CLGEMMLowpMatrixMultiplyCore _mm_gemmlowp; - CLTensor _flatten_output; - CLTensor _converted_weights_output; - CLTensor _reshape_weights_output; - bool _are_weights_converted; - bool _are_weights_reshaped; - bool _is_fc_after_conv; - bool _is_quantized; - bool _is_prepared; - const ICLTensor *_original_weights; -}; -} // namespace arm_compute -#endif /* __ARM_COMPUTE_CLFULLYCONNECTEDLAYEREX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h deleted file mode 100644 index 289ab167f..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file       CLFullyConnectedReshapingLayer.h - * @brief      This file contains CLFullyConnectedReshapingLayer class - * @ingroup    COM_AI_RUNTIME - */ - -#ifndef __ARM_COMPUTE_CL_FULLY_CONNECTED_RESHAPING_LAYER_H__ -#define __ARM_COMPUTE_CL_FULLY_CONNECTED_RESHAPING_LAYER_H__ - -#include <arm_compute/runtime/CL/CLTensor.h> -#include <arm_compute/runtime/CL/functions/CLReshapeLayer.h> -#include <arm_compute/runtime/IMemoryManager.h> - -namespace arm_compute -{ -/** - * @brief Class to run FullyConnected Layer after reshaping input tensor - */ -class CLFullyConnectedReshapingLayer : public arm_compute::IFunction -{ -public: - enum class KernelType - { - GENERAL, //< General FC - PREPROCESSED_WEIGHTS //< Weights are constants so it can be preprocessed - }; - -public: - CLFullyConnectedReshapingLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr) - : _input(nullptr), _weights(nullptr), _biases(nullptr), _output(nullptr), _cl_buffer{}, - _memory_manager{memory_manager}, _cl_fc{nullptr}, _cl_reshape{}, _needs_reshape(false) - { - // DO NOTHING - } - -public: - /** - * @brief Configure the layer - * @param[in] input The source tensor - * @param[in] weights The tensor that is filled with weight values - * @param[in] biases The tensor that is filled with biase values - * @param[in] output The destination tensor - * @param[in] needs_reshape Whether it needs to be reshaped or not - * @param[in] reshape The tensor shape to be reshaped. Only valid when needs_reshape is true. - * @return N/A - */ - void configure(const arm_compute::ICLTensor *input, const arm_compute::ICLTensor *weights, - const arm_compute::ICLTensor *biases, arm_compute::ICLTensor *output, - bool needs_reshape, const arm_compute::TensorShape &reshape, - KernelType kernel_type); - -public: - /** - * @brief Run the operation. Must be called after configure(). - * @return N/A - */ - void run(void) override; - /** - * @brief Prepare the operation - * @return N/A - */ - void prepare(void) override; - -private: - const arm_compute::ICLTensor *_input; - const arm_compute::ICLTensor *_weights; - const arm_compute::ICLTensor *_biases; - arm_compute::ICLTensor *_output; - - // buffer for reshaping input tensor - arm_compute::CLTensor _cl_buffer; - -private: - std::shared_ptr<IMemoryManager> _memory_manager; - std::unique_ptr<arm_compute::IFunction> _cl_fc; - CLReshapeLayer _cl_reshape; - bool _needs_reshape; -}; -} // namespace arm_compute - -#endif // __ARM_COMPUTE_CL_FULLY_CONNECTED_RESHAPING_LAYER_H__ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGatherEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGatherEx.h deleted file mode 100644 index b01ec4255..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGatherEx.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/** - * @file CLGatherEx.h - * @brief This file contains CLGatherEx class - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __ARM_COMPUTE_CLGATHEREX_H__ -#define __ARM_COMPUTE_CLGATHEREX_H__ - -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - -namespace arm_compute -{ -class ICLTensor; - -/** - * @brief Class to to run @ref CLGatherKernel. - */ -class CLGatherEx : public ICLSimpleFunction -{ -public: - /** - * @brief Initialise the kernel's inputs, output and convertion policy. - * @param[in] input An input tensor. Data types supported: U8/QASYMM8/S32/F32. - * @param[in] indices An indexes tensor. Data types supported: S32. - * @param[out] output The output tensor, Data types supported: same as @p input. - * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Defaults to 0 - * @return N/A - */ - void configure(const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis = 0); - - /** - * @brief Static function to check if given info will lead to a valid configuration - * of @ref CLGatherEx - * @param[in] input An input tensor. Data types supported: U8/QASYMM8/S32/F32. - * @param[in] indices An indexes tensor. Data types supported: S32. - * @param[out] output The output tensor, Data types supported: same as @p input. - * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Defaults to 0 - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *indices, - const ITensorInfo *output, int axis = 0); -}; -} -#endif /*__ARM_COMPUTE_CLGATHEREX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h deleted file mode 100644 index 6618f5aa4..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/** - * @file CLHashtableLookup.h - * @ingroup COM_AI_RUNTIME - * @brief This file contains arm_compute::CLHashtableLookup class - */ - -#ifndef __ARM_COMPUTE_CLHASHTABLELOOKUP_H__ -#define __ARM_COMPUTE_CLHASHTABLELOOKUP_H__ - -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - -#include <vector> - -namespace arm_compute -{ -class ICLTensor; - -/** - * @brief Class to perform HashtableLookup operation - */ -class CLHashtableLookup : public ICLSimpleFunction -{ -public: - /** - * @brief Set the input and output tensors. - * @param[in] lookups Lookups 1D tensor that values are indices into the first dimension of - * input. - * @param[in] keys Keys 1D tensor. keys and input pair represent a map. - * Data types supported: S32 - * @param[in] input Source tensor. - * Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 - * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p - * input. - * @param[out] hits Hits 1D tensor. A boolean tensor that indicates whether the lookup hits - * (True) or not (False). Data types supported: U8/QASYMM8 - * @return N/A - */ - void configure(const ICLTensor *lookups, const ICLTensor *keys, const ICLTensor *intput, - ICLTensor *output, ICLTensor *hits); -}; -} -#endif /*__ARM_COMPUTE_CLHASHTABLELOOKUP_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayerEx.h deleted file mode 100644 index 887e7aaa5..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayerEx.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYEREX_H__ -#define __ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYEREX_H__ - -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Basic function to perform a Instance normalization. - * - * This function runs the following kernels: - * -# @ref CLInstanceNormalizationLayerKernelEx - */ -class CLInstanceNormalizationLayerEx : public ICLSimpleFunction -{ -public: - /** Default constructor */ - CLInstanceNormalizationLayerEx(); - /** Set the input and output tensors. - * - * @param[in, out] input Source tensor. In case of @p output tensor = nullptr this tensor will - * store the result of the normalization. - * Data types supported: F16/F32. Data layout supported: NHWC, NCHW - * @param[out] output Destination tensor. Data types and data layouts supported: same as @p - * input. - * @param[in] gamma (Optional) The scale tensor applied to the normalized tensor. Defaults - * to nullptr - * @param[in] beta (Optional) The offset tensor applied to the normalized tensor. Defaults - * to nullptr - * @param[in] epsilon (Optional) Lower bound value for the normalization. Defaults to 1e-12 - */ - void configure(ICLTensor *input, ICLTensor *output, ICLTensor *gamma = nullptr, - ICLTensor *beta = nullptr, float epsilon = 1e-12f); - - /** Static function to check if given info will lead to a valid configuration of @ref - * CLInstanceNormalizationLayerEx. - * - * @param[in] input Source tensor info. Data types supported: F16/F32. Data layout supported: - * NHWC, NCHW - * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p - * input. - * @param[in] gamma (Optional) The scale tensor applied to the normalized tensor. Defaults to - * nullptr - * @param[in] beta (Optional) The offset tensor applied to the normalized tensor. Defaults to - * nullptr - * @param[in] epsilon (Optional) Lower bound value for the normalization. Defaults to 1e-12 - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, - const ITensorInfo *gamma = nullptr, const ITensorInfo *beta = nullptr, - float epsilon = 1e-12f); -}; -} // namespace arm_compute -#endif /* __ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYEREX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLNeg.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLNeg.h deleted file mode 100644 index 8ec9aa307..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLNeg.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_CLNEG_H__ -#define __ARM_COMPUTE_CLNEG_H__ - -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - -namespace arm_compute -{ -class ICLTensor; - -class CLNeg : public ICLSimpleFunction -{ -public: - /** Initialise the function's source and destination. - * - * @param[in] input Source tensor. Data types supported: - * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. - * @param[out] output Output tensor. Data types supported: Same as @p input. - * - */ - void configure(ICLTensor *input, ICLTensor *output); -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLNEG_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLOneHot.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLOneHot.h deleted file mode 100644 index 2bbfca821..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLOneHot.h +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_CLONEHOT_H__ -#define __ARM_COMPUTE_CLONEHOT_H__ -#include "arm_compute/core/CL/kernels/CLMemsetKernel.h" -#include "arm_compute/core/CL/kernels/CLOneHotKernel.h" -#include "arm_compute/runtime/IFunction.h" -namespace arm_compute -{ -class ICLTensor; -/** Basic function to run @ref CLOneHotKernel */ -class CLOneHot : public IFunction -{ -public: - /** Constructor */ - CLOneHot(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLOneHot(const CLOneHot &) = delete; - /** Default move constructor */ - CLOneHot(CLOneHot &&) = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLOneHot &operator=(const CLOneHot &) = delete; - /** Default move assignment operator */ - CLOneHot &operator=(CLOneHot &&) = default; - /** Initialise the kernel's inputs and outputs - * - * @param[in] indices Indices tensor. Supported tensor rank: up to 3. Must be one of the - * following types: U32/S32 - * @param[in] on_value On value tensor. Supported tensor rank: only 1. Data type supported: - * U8/S8/U16/S16/F16/U32/S32/F32 - * @param[in] off_value Off value tensor. Supported tensor rank: only 1. Data type supported: - * Same as @p on_value - * @param[out] output Destination tensor. Data type supported: Same as @p on_value - * @param[in] depth The depth of the one hot dimension. - * @param[in] axis (Optional) The axis to fill. Negative values wrap around. Defaults to -1. - * value must be in range [-indices.rank , indices.rank) - */ - void configure(const ICLTensor *indices, const ICLTensor *on_value, const ICLTensor *off_value, - ICLTensor *output, int depth, int axis = -1); - /** Initialise the kernel's inputs and outputs with off_value being constant - * - * @param[in] indices Indices tensor. Supported tensor rank: up to 3. Must be one of the - * following types: U32/S32 - * @param[in] on_value On value tensor. Supported tensor rank: only 1. Data type supported: - * U8/S8/U16/S16/F16/U32/S32/F32 - * @param[out] output Destination tensor. Data type supported: Same as @p on_value - * @param[in] off_value The PixelValue for off value. Data type supported: Same as @p on_value - * @param[in] depth The depth of the one hot dimension. - * @param[in] axis (Optional) The axis to fill. Negative values wrap around. Defaults to -1. - * value must be in range [-indices.rank , indices.rank) - */ - void configure(const ICLTensor *indices, const ICLTensor *on_value, ICLTensor *output, - PixelValue off_value, int depth, int axis = -1); - /** Static function to check if given info will lead to a valid configuration of @ref - * CLOneHotKernel - * - * @param[in] indices Indices tensor. Supported tensor rank: up to 3. Must be one of the - * following types: U32/S32 - * @param[in] on_value On value tensor. Supported tensor rank: only 1. Data type supported: - * U8/S8/U16/S16/F16/U32/S32/F32 - * @param[in] off_value Off value tensor. Supported tensor rank: only 1. Data type supported: - * Same as @p on_value - * @param[in] output Destination tensor. Data type supported: Same as @p on_value - * @param[in] depth The depth of the one hot dimension. - * @param[in] axis (Optional) The axis to fill. Negative values wrap around. Defaults to -1. - * value must be in range [-indices.rank , indices.rank) - * - * @return a status - */ - static Status validate(const ITensorInfo *indices, const ITensorInfo *on_value, - const ITensorInfo *off_value, const ITensorInfo *output, int depth, - int axis = -1); - - // Inherited methods overridden: - void run() override; - -private: - CLMemsetKernel _memset_kernel; /**< Memset kernel */ - CLOneHotKernel _onehot_kernel; /**< OneHot kernel */ - bool _has_to_memset; -}; -} // namespace arm_compute -#endif /* __ARM_COMPUTE_CLONEHOT_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h deleted file mode 100644 index 7dba84b12..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/** - * @file CLReduceOperation.h - * @ingroup COM_AI_RUNTIME - * @brief This file contains arm_compute::CLReduceOperation class - */ - -#ifndef __ARM_COMPUTE_CLREDUCEOPERATION_H__ -#define __ARM_COMPUTE_CLREDUCEOPERATION_H__ - -#include "arm_compute/core/CL/kernels/CLReduceOperationKernel.h" -#include "arm_compute/core/TypesEx.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/CL/CLTensor.h" -#include "arm_compute/runtime/CL/CLTensorAllocator.h" -#include "arm_compute/runtime/CL/functions/CLReshapeLayer.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/IMemoryManager.h" - -namespace arm_compute -{ -class ICLTensor; - -/** - * @brief Class to perform ReduceOperation - */ -class CLReduceOperation : public IFunction -{ -public: - /** - * @brief Construct a new ReduceOperation object - */ - CLReduceOperation(std::shared_ptr<IMemoryManager> memory_manager); - - /** - * @brief Set the input and output tensors. - * @param[in] input Source tensor. Data types supported: U8/S32/F32 - * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p - * input. - * @param[in] axis Axis along which to reduce. It must be sorted and no duplicates. - * @param[in] keep_dims If positive, retains reduced dimensions with length 1. - * @param[in] op Reduce operation to perform. - * @return N/A - */ - void configure(ICLTensor *input, ICLTensor *output, const std::set<uint32_t> &axis, - bool keep_dims, ReduceOperation op); - - /** - * @brief Static function to check if given info will lead to a valid configuration of @ref - * CLReduceOperation. - * @param[in] input Source tensor info. Data types supported: U8/S32/F32 - * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p - * input. - * @param[in] axis Axis along which to reduce. It must be sorted and no duplicates. - * @param[in] keep_dims If positive, retains reduced dimensions with length 1. - * @param[in] op Reduce operation to perform. - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, - const std::set<uint32_t> &axis, bool keep_dims, const ReduceOperation &op); - - /** - * @brief Run the OpenCL kernel for this operation - * @return N/A - */ - void run() override; - -private: - MemoryGroup _memory_group; - ICLTensor *_input; - ICLTensor *_output; - std::set<uint32_t> _axis; - bool _keep_dims; - - std::unique_ptr<CLTensor[]> _interm_tensors{nullptr}; - std::unique_ptr<CLReduceOperationKernel[]> _reduce_kernels{nullptr}; - CLReshapeLayer _reshape; -}; -} -#endif /*__ARM_COMPUTE_CLREDUCEOPERATION_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLSplitVEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLSplitVEx.h deleted file mode 100644 index bb741d98d..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLSplitVEx.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_CLSPLITVEX__ -#define __ARM_COMPUTE_CLSPLITVEX__ - -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/CL/functions/CLSlice.h" -#include "arm_compute/core/Types.h" -#include <vector> -#include <memory> - -namespace arm_compute -{ -class ICLTensor; - -/** Basic function to run @ref CLSplitVKernel */ -class CLSplitVEx : public IFunction -{ -public: - /** Default constructor */ - CLSplitVEx(); - /** Configure the split CL kernel - * - * @param[in] input The input tensor to split. Data types supported: - * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 - * @param[in] size_splits A 1-D tensor containing the number of tensor values per split - * @param[out] outputs A vector containing the output tensor. Data types supported: Same as @p - * input - * The output tensors should match the input tensor dimensions for all - * shape dimensions apart - * from the split dimension. - * @param[in] split_dim Integer value representing the input tensor dimension along which to - * split - * @param[in] num_splits Number of splits - */ - void configure(const ICLTensor *input, const ICLTensor *size_splits, uint32_t split_dim, - const std::vector<ICLTensor *> &outputs, unsigned int num_splits); - - void run() override; - -private: - const ICLTensor *_input; - const ICLTensor *_size_splits; - std::vector<ICLTensor *> _outputs; - unsigned int _num_splits; - std::vector<CLSlice> _slice_functions; -}; -} -#endif /* __ARM_COMPUTE_CLSPLITVEX__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h deleted file mode 100644 index e301a5152..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h +++ /dev/null @@ -1,164 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/** - * @file CLTopKV2.h - * @ingroup COM_AI_RUNTIME - * @brief This file contains arm_compute::CLTopKV2 class - */ -#ifndef __ARM_COMPUTE_CLTOPK_V2_H__ -#define __ARM_COMPUTE_CLTOPK_V2_H__ - -#include "arm_compute/core/CL/kernels/CLTopKV2Kernel.h" - -#include "arm_compute/runtime/IFunction.h" - -namespace arm_compute -{ -class ICLTensor; - -/** - * @brief Class to execute TopKV2 operation. - */ -class CLTopKV2 : public IFunction -{ -public: - /** - * @brief Construct a new CLTopKV2 object - */ - CLTopKV2(); - - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers) - */ - CLTopKV2(const CLTopKV2 &) = delete; - - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers) - */ - CLTopKV2 &operator=(const CLTopKV2 &) = delete; - - /** - * @brief Construct a new CLTopKV2 object by using copy constructor - * @param[in] CLTopKV2 object to move - */ - CLTopKV2(CLTopKV2 &&) = default; - - /** - * @brief Assign a CLTopKV2 object. - * @param[in] CLTopKV2 object to assign. This object will be moved. - */ - CLTopKV2 &operator=(CLTopKV2 &&) = default; - - /** - * @brief Initialise the kernel's inputs and outputs. - * @param[in] input Input image. Data types supported: U8/S16/F32. - * @param[in] k The value of `k`. - * @param[out] values Top k values. Data types supported: S32 if input type is U8/S16, F32 if - * input type is F32. - * @param[out] indices Indices related to top k values. Data types supported: S32 if input type - * is U8/S16, F32 if input type is F32. - * @return N/A - */ - void configure(ICLTensor *input, int k, ICLTensor *values, ICLTensor *indices, - int total_bits = 32, int bits = 4); - - /** - * @brief Run the kernels contained in the function - * Depending on the value of the following environment variables it works differently: - * - If the value of environment variable "ACL_TOPKV2" == "GPU_SINGLE", - * quick sort on GPU is used. - * - If the value of environment variable "ACL_TOPKV2" == ""GPU"", - * radix sort on GPU is used. - * - For other value, TopKV2 runs on CPU - * @return N/A - */ - void run() override; - -private: - void run_on_cpu(); - void run_on_gpu(); - void run_on_gpu_single_quicksort(); - - uint32_t _k; - uint32_t _total_bits; - uint32_t _bits; - uint32_t _radix; - uint32_t _hist_buf_size; - uint32_t _glob_sum_buf_size; - uint32_t _n; - - ICLTensor *_input; - ICLTensor *_values; - ICLTensor *_indices; - - cl::Buffer _qs_idx_buf; - cl::Buffer _qs_temp_buf; - cl::Buffer _hist_buf; - cl::Buffer _glob_sum_buf; - cl::Buffer _temp_buf; - cl::Buffer _first_negative_idx_buf; - cl::Buffer _in_key_buf; - cl::Buffer _out_key_buf; - cl::Buffer _in_ind_buf; - cl::Buffer _out_ind_buf; - - cl::Buffer *_p_in_key_buf; - cl::Buffer *_p_out_key_buf; - cl::Buffer *_p_in_ind_buf; - cl::Buffer *_p_out_ind_buf; -// Disable GPU implementation -// TODO Enable GPU implementation with verification, or remove code -// Invalid result on GPU -#if 0 - CLTopKV2Single _qs_kernel; - CLTopKV2Init _init_kernel; - CLRadixSortHistogram _hist_kernel; - CLRadixSortScanHistogram _scan_hist_kernel; - CLRadixSortGlobalScanHistogram _glob_scan_hist_kernel; - CLRadixSortPasteHistogram _paste_hist_kernel; - CLRadixSortReorder _reorder_kernel; - CLTopKV2FindFirstNegative _find_first_negative_kernel; - CLTopKV2ReorderNegatives _reorder_negatives_kernel; - CLTopKV2Store _store_kernel; -#endif -}; -} -#endif // __ARM_COMPUTE_CLTOPK_V2_H__ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h deleted file mode 100644 index 5fb102e47..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h +++ /dev/null @@ -1,150 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_CLTRANSPOSECONVLAYER_H__ -#define __ARM_COMPUTE_CLTRANSPOSECONVLAYER_H__ - -#include "arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h" -#include "arm_compute/runtime/CL/functions/CLGEMMDeconvolutionLayer.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/IMemoryManager.h" - -#include <memory> - -namespace arm_compute -{ -/** Basic function to compute the deconvolution layer. This function calls the following OpenCL - * kernels/functions: - * - * -# @ref CLGEMMDeconvolutionLayer - * -# @ref CLDirectTransposeConvLayer - */ -class CLTransposeConvLayer : public IFunction -{ -public: - /** Default constructor */ - CLTransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - - /** Set the input, weights, biases and output tensors. - * - * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an - * optional 4th dimension for batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. - * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type - * supported: Same as @p input. - * @param[in] bias (Optional) The biases have one dimension. Data type supported: Same - * as @p input. - * @param[out] output Output tensor. The output has the same number of dimensions as the - * @p input. - * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this - * is described in @ref PadStrideInfo. - * @param[in] invalid_right The number of zeros added to right edge of the output. - * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. - * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer, - * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel. - * - */ - void configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, - const PadStrideInfo &deconv_info, unsigned int invalid_right, - unsigned int invalid_bottom, const WeightsInfo &weights_info = WeightsInfo()); - /** Set the input, weights, biases and output tensors. - * - * @param[in] compile_context The compile context to be used. - * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and - * an optional 4th dimension for batch of inputs. Data types supported: - * QASYMM8_SIGNED/QASYMM8/F16/F32. - * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data - * type supported: Same as @p input. - * @param[in] bias (Optional) The biases have one dimension. Data type supported: - * Same as @p input. - * @param[out] output Output tensor. The output has the same number of dimensions as - * the @p input. - * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, - * this is described in @ref PadStrideInfo. - * @param[in] invalid_right The number of zeros added to right edge of the output. - * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. - * @param[in] weights_info (Optional) Weights information needed for @ref - * CLConvolutionLayer, specifies if the weights tensor has been reshaped with @ref - * CLWeightsReshapeKernel. - * - */ - void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *weights, - const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &deconv_info, - unsigned int invalid_right, unsigned int invalid_bottom, - const WeightsInfo &weights_info = WeightsInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref - * CLTransposeConvLayer - * - * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an - * optional 4th dimension for batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. - * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data - * type supported: Same as @p input. - * @param[in] bias (Optional) The biases have one dimension. Data type supported: Same as - * @p input. - * @param[in] output Output tensor info. The output has the same number of dimensions as the - * @p input. - * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this is - * described in @ref PadStrideInfo. - * @param[in] invalid_right The number of zeros added to right edge of the output. - * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. - * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer, - * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, - const ITensorInfo *bias, ITensorInfo *output, - const PadStrideInfo &deconv_info, unsigned int invalid_right, - unsigned int invalid_bottom, - const WeightsInfo &weights_info = WeightsInfo()); - - static DeconvolutionMethod - get_deconvolution_method(const ITensorInfo *input, const ITensorInfo *weights, - const ITensorInfo *bias, ITensorInfo *output, - const PadStrideInfo &deconv_info, unsigned int invalid_right, - unsigned int invalid_bottom, const WeightsInfo &weights_info); - // Inherited methods overridden: - void run() override; - void prepare() override; - -private: - std::shared_ptr<IMemoryManager> _memory_manager; - std::unique_ptr<IFunction> _function; -}; -} // namespace arm_compute -#endif /* __ARM_COMPUTE_CLTRANSPOSECONVLAYER_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h deleted file mode 100644 index d47b1fe62..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __ARM_COMPUTE_NEFUNCTIONSEX_H__ -#define __ARM_COMPUTE_NEFUNCTIONSEX_H__ - -#include <arm_compute/runtime/NEON/functions/NEActivationLayerEx.h> -#include <arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h> -#include <arm_compute/runtime/NEON/functions/NECastBool.h> -#include <arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h> -#include <arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h> -#include <arm_compute/runtime/NEON/functions/NEGatherEx.h> -#include <arm_compute/runtime/NEON/functions/NEHashtableLookup.h> -#include <arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayerEx.h> -#include <arm_compute/runtime/NEON/functions/NEOneHot.h> -#include <arm_compute/runtime/NEON/functions/NEReduceSum.h> -#include <arm_compute/runtime/NEON/functions/NEReduceOperation.h> -#include <arm_compute/runtime/NEON/functions/NETransposeConvLayer.h> - -#endif // __ARM_COMPUTE_NEFUNCTIONSEX_H__ diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEActivationLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEActivationLayerEx.h deleted file mode 100644 index 6156c84f8..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEActivationLayerEx.h +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_NEACTIVATIONLAYEREX_H__ -#define __ARM_COMPUTE_NEACTIVATIONLAYEREX_H__ - -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Basic function to run @ref NEActivationLayerKernelEx - * - * @note The function simulates an activation layer with the specified activation function. - */ -class NEActivationLayerEx : public INESimpleFunctionNoBorder -{ -public: - /** Constructor - * - * @param[in] ctx Runtime context to be used by the function - */ - NEActivationLayerEx(IRuntimeContext *ctx = nullptr); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEActivationLayerEx(const NEActivationLayerEx &) = delete; - /** Default move constructor */ - NEActivationLayerEx(NEActivationLayerEx &&) = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEActivationLayerEx &operator=(const NEActivationLayerEx &) = delete; - /** Default move assignment operator */ - NEActivationLayerEx &operator=(NEActivationLayerEx &&) = default; - /** [NEActivationLayerEx snippet] **/ - /** Set the input and output tensor. - * - * @note If the output tensor is a nullptr or is equal to the input, the activation function will - * be performed in-place - * - * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this - * tensor will store the result - * of the activation function. Data types supported: - * QASYMM8/QSYMM16/F16/F32. - * @param[out] output Destination tensor. Data type supported: same as @p input - * @param[in] activation_info Activation layer parameters. - */ - void configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info); - /** [NEActivationLayerEx snippet] **/ - /** Static function to check if given info will lead to a valid configuration of @ref - * NEActivationLayerEx - * - * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor - * will store the result - * of the activation function. Data types supported: QASYMM8/QSYMM16/F16/F32. - * @param[in] output Destination tensor info. Data type supported: same as @p input - * @param[in] act_info Activation layer information. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, - const ActivationLayerInfo &act_info); -}; -} // namespace arm_compute -#endif /* __ARM_COMPUTE_NEACTIVATIONLAYEREX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h deleted file mode 100644 index 026d30098..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_NEBINARYLOGICALOPERATION_H__ -#define __ARM_COMPUTE_NEBINARYLOGICALOPERATION_H__ - -#include "arm_compute/core/TypesEx.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" - -namespace arm_compute -{ -class ITensor; - -/** Basic function to run @ref NEBinaryLogicalOperationKernel. - * - * @note The tensor data type for the inputs must be QASYMM8/U8. - * @note The function performs a binary logical operation between two tensors. - */ -class NEBinaryLogicalOperation : public INESimpleFunction -{ -public: - /** Initialise the kernel's inputs, output and conversion policy. - * - * @param[in, out] input1 First tensor input. Data types supported: QASYMM8/U8. - * @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1. - * @param[out] output Output tensor. Data types supported: Same as @p input1. - * @param[in] op Binary Logical Operation to be performed. - */ - void configure(ITensor *input1, ITensor *input2, ITensor *output, BinaryLogicalOperation op); - /** Static function to check if given info will lead to a valid configuration of @ref - * NEBinaryLogicalOperationKernel - * - * @param[in] input1 First tensor input info. Data types supported: QASYMM8/U8. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[in] output Output tensor info. Data types supported: Same as @p input1. - * @param[in] op Binary Logical Operation to be performed. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, - const ITensorInfo *output, BinaryLogicalOperation op); -}; - -/** Basic function to run @ref NEBinaryLogicalOperationKernel - * - * @note The tensor data type for the inputs must be QASYMM8/U8. - * @note The function performs a binary logical operation between two tensors. - */ -template <BinaryLogicalOperation op> class NEBinaryLogicalOperationStatic : public INESimpleFunction -{ -public: - /** Initialise the kernel's inputs, output and conversion policy. - * - * @param[in, out] input1 First tensor input. Data types supported: QASYMM8/U8 - * @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1. - * @param[out] output Output tensor. Data types supported: Same as @p input1. - */ - void configure(ITensor *input1, ITensor *input2, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref - * NEBinaryLogicalOperationKernel - * - * @param[in] input1 First tensor input info. Data types supported: QASYMM8/U8 - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[in] output Output tensor info. Data types supported: Same as @p input1. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, - const ITensorInfo *output); -}; - -/** Basic function to run equal comparison. */ -using NELogicalAnd = NEBinaryLogicalOperationStatic<BinaryLogicalOperation::AND>; -/** Basic function to run not equal comparison. */ -using NELogicalOr = NEBinaryLogicalOperationStatic<BinaryLogicalOperation::OR>; -} // namespace arm_compute -#endif /* __ARM_COMPUTE_NEBINARYLOGICALOPERATION_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NECastBool.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NECastBool.h deleted file mode 100644 index c8b08af8d..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NECastBool.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2019-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_NECASTBOOL_H__ -#define __ARM_COMPUTE_NECASTBOOL_H__ - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" - -namespace arm_compute -{ -class ITensor; - -/** - * @brief Class to run @ref NECastBoolKernel. - */ -class NECastBool : public INESimpleFunction -{ -public: - /** Initialize the function's source, destination - * - * Valid conversions Input -> Output : - * - * - U8 -> U8, S8, U16, S16, U32, S32, F32, F16 - * - * @param[in] input The input tensor to convert. Data types supported: U8 - * @param[out] output The output tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. - */ - void configure(const ITensor *input, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NECastBool - * - * @param[in] input Source tensor info. Data types supported: U8. - * @param[in] output Destination tensor info. Data type supported: U8/S8/U16/S16/U32/S32/F16/F32. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_NECASTBOOL_H__*/ diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h deleted file mode 100644 index 63f7714aa..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/** - * @file NEEmbeddingLookup.h - * @ingroup COM_AI_RUNTIME - * @brief This file contains arm_compute::NEEmbeddingLookup class - */ - -#ifndef __ARM_COMPUTE_NEEMBEDDINGLOOKUP_H__ -#define __ARM_COMPUTE_NEEMBEDDINGLOOKUP_H__ - -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -#include <vector> - -namespace arm_compute -{ -class ITensor; - -/** - * @brief Class to perform EmbeddingLookup operation - */ -class NEEmbeddingLookup : public INESimpleFunctionNoBorder -{ -public: - /** - * @brief Set the input and output tensors. - * @param[in] input Source tensor. - * Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 - * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p - * input. - * @param[in] lookups Lookups 1D tensor that values are indices into the first dimension of - * input. Data types supported: S32. - * @return N/A - */ - void configure(const ITensor *input, ITensor *output, const ITensor *lookups); - /** Static function to check if given info will lead to a valid configuration of @ref NECopy - * - * @param[in] input Source tensor info. Data types supported: - * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * @param[in] output Lookups tensor info. Data types supported: S32. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, - const ITensorInfo *lookups); -}; -} -#endif /*__ARM_COMPUTE_NEEMBEDDINGLOOKUP_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h deleted file mode 100644 index 56548a479..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_NEFULLYCONNECTEDHYBRIDLAYER_H__ -#define __ARM_COMPUTE_NEFULLYCONNECTEDHYBRIDLAYER_H__ - -#include "arm_compute/core/NEON/kernels/NEQuantizationSymmetricKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h" -#include "arm_compute/core/NEON/kernels/NEMuliplyScaleFactorKernel.h" -#include "arm_compute/core/NEON/kernels/NETransposeKernel.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h" -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" -#include "arm_compute/runtime/Tensor.h" - -namespace arm_compute -{ -/** Basic function to reshape the weights of Fully Connected layer with NEON. This function calls - * the following kernels: - * - * -# @ref NETransposeKernel - * - * @note The fully connected layer accepts "weights" tensors only with 2 dimensions. - */ -class NEFullyConnectedHybridLayerReshapeWeights : public INESimpleFunctionNoBorder -{ -public: - /** Set the input and output tensors. - * - * @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported: - * QASYMM8/F16/F32. - * @param[out] output Destination tensor. Data type supported: Same as @p input. - */ - void configure(const ITensor *input, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref - * NEFullyConnectedHybridLayerReshapeWeights - * - * @param[in] input Weights tensor info. The weights must be 2 dimensional. Data types supported: - * QASYMM8/F16/F32. - * @param[in] output Destination tensor info. Data type supported: Same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); -}; - -/** Basic function to compute a Fully Connected layer on NEON. This function calls the following - * NEON kernels: - * -# @ref NEIm2ColKernel (called when the input comes from a convolutional layer) - * -# @ref NEFullyConnectedHybridLayerReshapeWeights (if @p are_weights_reshaped is set to false - * and transpose_weights is set to true ) (called once) - * -# @ref NEGEMMMatrixMultiplyKernel or @ref NEGEMMLowpMatrixMultiplyCore (if quantized - * asymmetric) - * -# @ref NEGEMMMatrixAccumulateBiasesKernel or @ref - * NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint (if quantized asymmetric) (if @p biases is - * not equal to nullptr) - * - * @note The fully connected layer accepts "weights" tensors only with 2 dimensions. - */ -class NEFullyConnectedHybridLayer : public IFunction -{ -public: - /** Constructor */ - NEFullyConnectedHybridLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEFullyConnectedHybridLayer(const NEFullyConnectedHybridLayer &) = delete; - /** Default move constructor */ - NEFullyConnectedHybridLayer(NEFullyConnectedHybridLayer &&) = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEFullyConnectedHybridLayer &operator=(const NEFullyConnectedHybridLayer &) = delete; - /** Default move assignment operator */ - NEFullyConnectedHybridLayer &operator=(NEFullyConnectedHybridLayer &&) = default; - /** Set the input and output tensors. - * - * @param[in] input Source tensor. Data type supported: F16/F32. - * @param[in] weights Weights tensor. The weights must be 2 dimensional. - * If this function is called after a Convolution Layer, the (transposed) - * weights will have as many rows as the product of the first 3 input's dimensions. - * If it is called after another FullyConnected Layer, the (transposed) - * weights will have as many rows as the input's first dimension. - * Data type supported: S8. - * @param[in] biases Bias tensor. Can be nullptr. Data type supported:Same as @p input. - * @param[out] output Destination tensor. Its shape should be equal to the output of a matrix - * multiplication between: - * - The output of im2col on the input and the (transposed) 2D weights, if the - * function is called after a Convolution Layer - * - The input tensor and the (transposed) 2D weights, if the function is - * called after another FullyConnected Layer. - * Data type supported: Same as @p input. - * @param[in] fc_info (Optional) Fully connected layer additional info - */ - void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, - ITensor *output, FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref - * NEFullyConnectedHybridLayer - * - * @param[in] input Source tensor info. Data type supported: F16/F32. - * @param[in] weights Weights tensor info. The weights must be 2 dimensional. - * If this function is called after a Convolution Layer, the (transposed) - * weights will have as many rows as the product of the first 3 input's dimensions. - * If it is called after another FullyConnected Layer, the (transposed) - * weights will have as many rows as the input's first dimension. - * Data type supported: S8. - * @param[in] biases Bias tensor info. Can be nullptr. Data type supported:Same as @p input. - * @param[out] output Destination tensor info. Its shape should be equal to the output of a - * matrix multiplication between: - * - The output of im2col on the input and the (transposed) 2D weights, if the - * function is called after a Convolution Layer - * - The input tensor and the (transposed) 2D weights, if the function is - * called after another FullyConnected Layer. - * Data type supported: Same as @p input. - * @param[in] fc_info (Optional) Fully connected layer additional info - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, - const ITensorInfo *biases, const ITensorInfo *output, - FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo()); - - // Inherited methods override - void run() override; - void prepare() override; - -private: - void configure_mm(const ITensor *input, const ITensor *weights, ITensor *output); - - MemoryGroup _memory_group; - NEFullyConnectedHybridLayerReshapeWeights _reshape_weights_function; - NEQuantizationSymmetricKernel _quant_input_kernel; - NEGEMMLowpMatrixMultiplyCore _mm_gemmlowp; - NEMultiplyScaleFactorKernel _multiply_scale_kernel; - NEGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel; - Tensor _reshape_weights_output; - Tensor _quantized_input; - Tensor _scale_factor; - Tensor _gemmlowp_output; - const ITensor *_original_weights; - bool _are_weights_reshaped; - bool _accumulate_biases; - bool _is_prepared; -}; -} // namespace arm_compute -#endif /* __ARM_COMPUTE_NEFULLYCONNECTEDHYBRIDLAYER_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedLayerEx.h deleted file mode 100644 index 8f98f220a..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedLayerEx.h +++ /dev/null @@ -1,164 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_NEFULLYCONNECTEDLAYEREX_H__ -#define __ARM_COMPUTE_NEFULLYCONNECTEDLAYEREX_H__ - -#include "arm_compute/runtime/IFunction.h" - -#include "arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h" -#include "arm_compute/core/NEON/kernels/NETransposeKernel.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h" -#include "arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h" -#include "arm_compute/runtime/NEON/functions/NEGEMM.h" -#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h" -#include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h" -#include "arm_compute/runtime/Tensor.h" - -namespace arm_compute -{ -/** Basic function to compute a Fully Connected layer on NEON. This function calls the following - * NEON kernels: - * -# @ref NEIm2ColKernel (called when the input comes from a convolutional layer) - * -# @ref NEFullyConnectedLayerReshapeWeights (if @p are_weights_reshaped is set to false and - * transpose_weights is set to true ) (called once) - * -# @ref NEGEMMMatrixMultiplyKernel or @ref NEGEMMLowpMatrixMultiplyCore (if quantized - * asymmetric) - * -# @ref NEGEMMMatrixAccumulateBiasesKernel or @ref - * NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint (if quantized asymmetric) (if @p biases is - * not equal to nullptr) - * - * @note The fully connected layer accepts "weights" tensors only with 2 dimensions. - * @note The difference from NEFullyConnectedLayer is that this class supports weights as input - * with performance loss. - */ -class NEFullyConnectedLayerEx : public IFunction -{ -public: - /** Constructor */ - NEFullyConnectedLayerEx(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEFullyConnectedLayerEx(const NEFullyConnectedLayerEx &) = delete; - /** Default move constructor */ - NEFullyConnectedLayerEx(NEFullyConnectedLayerEx &&) = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEFullyConnectedLayerEx &operator=(const NEFullyConnectedLayerEx &) = delete; - /** Default move assignment operator */ - NEFullyConnectedLayerEx &operator=(NEFullyConnectedLayerEx &&) = default; - /** Set the input and output tensors. - * - * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. - * @param[in] weights Weights tensor. The weights must be 2 dimensional. - * If this function is called after a Convolution Layer, the (transposed) - * weights will have as many rows as the product of the first 3 input's dimensions. - * If it is called after another FullyConnected Layer, the (transposed) - * weights will have as many rows as the input's first dimension. - * Data type supported: Same as @p input. - * @param[in] biases Bias tensor. Can be nullptr. Data type supported:Same as @p input. - * @param[out] output Destination tensor. Its shape should be equal to the output of a matrix - * multiplication between: - * - The output of im2col on the input and the (transposed) 2D weights, if the - * function is called after a Convolution Layer - * - The input tensor and the (transposed) 2D weights, if the function is - * called after another FullyConnected Layer. - * Data type supported: Same as @p input. - * @param[in] fc_info (Optional) Fully connected layer additional info - */ - void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, - ITensor *output, FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref - * NEFullyConnectedLayerEx - * - * @param[in] input Source tensor info. Data type supported: QASYMM8/F16/F32. - * @param[in] weights Weights tensor info. The weights must be 2 dimensional. - * If this function is called after a Convolution Layer, the (transposed) - * weights will have as many rows as the product of the first 3 input's dimensions. - * If it is called after another FullyConnected Layer, the (transposed) - * weights will have as many rows as the input's first dimension. - * Data type supported: Same as @p input. - * @param[in] biases Bias tensor info. Can be nullptr. Data type supported:Same as @p input. - * @param[out] output Destination tensor info. Its shape should be equal to the output of a - * matrix multiplication between: - * - The output of im2col on the input and the (transposed) 2D weights, if the - * function is called after a Convolution Layer - * - The input tensor and the (transposed) 2D weights, if the function is - * called after another FullyConnected Layer. - * Data type supported: Same as @p input. - * @param[in] fc_info (Optional) Fully connected layer additional info - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, - const ITensorInfo *biases, const ITensorInfo *output, - FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo()); - - // Inherited methods override - void run() override; - void prepare() override; - -private: - void configure_fc_fc(const ITensor *input, const ITensor *weights, ITensor *output); - void configure_conv_fc(const ITensor *input, const ITensor *weights, ITensor *output); - void configure_mm(const ITensor *input, const ITensor *weights, ITensor *output); - - MemoryGroup _memory_group; - NEFlattenLayerKernel _flatten_kernel; - NEConvertFullyConnectedWeights _convert_weights; - NEFullyConnectedLayerReshapeWeights _reshape_weights_function; - NEGEMM _mm_gemm; - NEGEMMLowpMatrixMultiplyCore _mm_gemmlowp; - NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint _gemmlowp_output_stage; - NEGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel; - Tensor _flatten_output; - Tensor _gemmlowp_output; - Tensor _converted_weights_output; - Tensor _reshape_weights_output; - const ITensor *_original_weights; - bool _are_weights_converted; - bool _are_weights_reshaped; - bool _is_fc_after_conv; - bool _accumulate_biases; - bool _is_quantized; - bool _is_prepared; -}; -} // namespace arm_compute -#endif /* __ARM_COMPUTE_NEFULLYCONNECTEDLAYEREX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h deleted file mode 100644 index 18cb61bf9..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file       NEFullyConnectedReshapingLayer.h - * @brief      This file contains NEFullyConnectedReshapingLayer class - * @ingroup    COM_AI_RUNTIME - */ - -#ifndef __ARM_COMPUTE_NE_FULLY_CONNECTED_RESHAPING_LAYER_H__ -#define __ARM_COMPUTE_NE_FULLY_CONNECTED_RESHAPING_LAYER_H__ - -#include <arm_compute/runtime/NEON/functions/NEReshapeLayer.h> -#include <arm_compute/runtime/IMemoryManager.h> -#include <arm_compute/runtime/Tensor.h> - -namespace arm_compute -{ -/** - * @brief Class to run FullyConnected Layer after reshaping input tensor - */ -class NEFullyConnectedReshapingLayer : public arm_compute::IFunction -{ -public: - enum class KernelType - { - GENERAL, //< General FC - PREPROCESSED_WEIGHTS //< Weights are constants so it can be preprocessed - }; - -public: - NEFullyConnectedReshapingLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr) - : _memory_manager{memory_manager}, _input(nullptr), _weights(nullptr), _biases(nullptr), - _output(nullptr), _neon_buffer{}, _neon_fc{nullptr}, _neon_reshape{}, _needs_reshape(false) - { - // DO NOTHING - } - -public: - /** - * @brief Configure the layer - * @param[in] input The source tensor - * @param[in] weights The tensor that is filled with weight values - * @param[in] biases The tensor that is filled with biase values - * @param[in] output The destination tensor - * @param[in] needs_reshape Whether it needs to be reshaped or not - * @param[in] reshape The tensor shape to be reshaped. Only valid when needs_reshape is true. - * @param[in] kernel_type The kernel type for actual FullyConnected layer - * @return N/A - */ - void configure(const arm_compute::ITensor *input, const arm_compute::ITensor *weights, - const arm_compute::ITensor *biases, arm_compute::ITensor *output, - bool needs_reshape, const arm_compute::TensorShape &reshape, - KernelType kernel_type); - -public: - /** - * @brief Run the operation. Must be called after configure(). - * @return N/A - */ - void run(void) override; - /** - * @brief Prepare the operation - * @return N/A - */ - void prepare(void) override; - -private: - std::shared_ptr<IMemoryManager> _memory_manager; - const arm_compute::ITensor *_input; - const arm_compute::ITensor *_weights; - const arm_compute::ITensor *_biases; - arm_compute::ITensor *_output; - - // buffer for reshaping input tensor - arm_compute::Tensor _neon_buffer; - -private: - std::unique_ptr<arm_compute::IFunction> _neon_fc; - NEReshapeLayer _neon_reshape; - bool _needs_reshape; -}; -} // namespace arm_compute - -#endif // __ARM_COMPUTE_NE_FULLY_CONNECTED_RESHAPING_LAYER_H__ diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEGatherEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEGatherEx.h deleted file mode 100644 index 155a1b837..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEGatherEx.h +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_NEGATHEREX_H__ -#define __ARM_COMPUTE_NEGATHEREX_H__ - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -namespace arm_compute -{ -class ITensor; - -/** Basic function to run @ref NEGatherKernelEx */ -class NEGatherEx : public INESimpleFunctionNoBorder -{ -public: - /** Initialise the kernel's inputs and outputs - * - * @param[in] input Source tensor. Supported tensor rank: up to 4. Data type supported: - * U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 - * @param[in] indices Indices tensor. Supported tensor rank: up to 3. Must be one of the - * following type: U32/S32. Each value Must be in range [0, input.shape[@p axis]) - * @param[out] output Destination tensor. Data type supported: Same as @p input - * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Defaults to 0 - */ - void configure(const ITensor *input, const ITensor *indices, ITensor *output, int axis = 0); - - /** Static function to check if given info will lead to a valid configuration of @ref - * NEGatherKernelEx - * - * @param[in] input Source tensor info. Supported tensor rank: up to 4. Data type supported: - * U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 - * @param[in] indices Indices tensor info. Supported tensor rank: up to 3. Must be one of the - * following types: U32/S32. Each value Must be in range [0, input.shape[@p axis]) - * @param[in] output Destination tensor info. Data type supported: Same as @p input - * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Defaults to 0 - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *indices, - const ITensorInfo *output, int axis); -}; - -} // namespace arm_compute - -#endif /* __ARM_COMPUTE_NEGATHEREX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEHashtableLookup.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEHashtableLookup.h deleted file mode 100644 index 521a05ad9..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEHashtableLookup.h +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/** - * @file NEHashtableLookup.h - * @ingroup COM_AI_RUNTIME - * @brief This file contains arm_compute::NEHashtableLookup class - */ - -#ifndef __ARM_COMPUTE_NEHASHTABLELOOKUP_H__ -#define __ARM_COMPUTE_NEHASHTABLELOOKUP_H__ - -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -#include <vector> - -namespace arm_compute -{ -class ITensor; - -/** - * @brief Class to perform HashtableLookup operation - */ -class NEHashtableLookup : public INESimpleFunctionNoBorder -{ -public: - /** - * @brief Set the input and output tensors. - * @param[in] lookups Lookups 1D tensor that values are indices into the first dimension of - * input. Data types supported: S32 - * @param[in] keys Keys 1D tensor. keys and input pair represent a map. - * Data types supported: S32 - * @param[in] input Source tensor. - * Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 - * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p - * input. - * @param[out] hits Hits 1D tensor. A boolean tensor that indicates whether the lookup hits - * (True) or not (False). Data types supported: U8/QASYMM8 - * @return N/A - */ - void configure(const ITensor *lookups, const ITensor *keys, const ITensor *input, ITensor *output, - ITensor *hits); - /** Static function to check if given info will lead to a valid configuration of @ref NECopy - * - * @param[in] lookups Lookups 1D tensor info. - * Data types supported: S32 - * @param[in] keys Keys 1D tensor info. keys and input pair represent a map. - * Data types supported: S32 - * @param[in] input Source tensor info. - * Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 - * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p - * input. - * @param[in] hits Hits 1D tensor info. A boolean tensor that indicates whether the lookup - * hits (True) or not (False). Data types supported: U8/QASYMM8 - * - * @return a status - */ - static Status validate(const ITensorInfo *lookups, const ITensorInfo *keys, - const ITensorInfo *input, const ITensorInfo *output, - const ITensorInfo *hits); -}; -} -#endif /*__ARM_COMPUTE_NEHASHTABLELOOKUP_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayerEx.h deleted file mode 100644 index 18e813923..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayerEx.h +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYEREX_H__ -#define __ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYEREX_H__ - -#include "arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NEPermute.h" -#include "arm_compute/runtime/NEON/functions/NEReductionOperation.h" -#include "arm_compute/runtime/Tensor.h" - -#include <memory> - -namespace arm_compute -{ -class ITensor; - -/** Basic function to perform a Instance normalization. - * - * This function runs the following kernels: - * -# @ref NEInstanceNormalizationLayerKernelEx - */ -class NEInstanceNormalizationLayerEx : public IFunction -{ -public: - /** Constructor */ - NEInstanceNormalizationLayerEx(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - /** Set the input and output tensors. - * - * @param[in, out] input Source tensor. In case of @p output tensor = nullptr this tensor will - * store the result of the normalization. - * Data types supported: F16/F32. Data layout supported: NHWC, NCHW - * @param[out] output Destination tensor. Data types and data layouts supported: same as @p - * input. - * @param[in] gamma (Optional) The scale scalar value applied to the normalized tensor. - * Defaults to 1.0 - * @param[in] beta (Optional) The offset scalar value applied to the normalized tensor. - * Defaults to 0.0 - * @param[in] epsilon (Optional) Lower bound value for the normalization. Defaults to 1e-12 - */ - void configure(ITensor *input, ITensor *output, ITensor *gamma, ITensor *beta, - float epsilon = 1e-12f); - - /** Static function to check if given info will lead to a valid configuration of @ref - * NEInstanceNormalizationLayer. - * - * @param[in] input Source tensor info. Data types supported: F16/F32. Data layout supported: - * NHWC, NCHW - * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p - * input. - * @param[in] gamma (Optional) The scale scalar value applied to the normalized tensor. Defaults - * to 1.0 - * @param[in] beta (Optional) The offset scalar value applied to the normalized tensor. - * Defaults to 0.0 - * @param[in] epsilon (Optional) Lower bound value for the normalization. Defaults to 1e-12 - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, - const ITensorInfo *gamma = nullptr, const ITensorInfo *beta = nullptr, - float epsilon = 1e-12f); - - // Inherited methods overridden: - void run() override; - -private: - MemoryGroup _memory_group; - NEInstanceNormalizationLayerKernelEx _normalization_kernel; - bool _is_nchw; - NEPermute _permute_input; - NEPermute _permute_output; - Tensor _permuted_input; - Tensor _permuted_output; -}; -} -#endif /* __ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYEREX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEOneHot.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEOneHot.h deleted file mode 100644 index b2ea6270f..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEOneHot.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_NEONEHOT_H__ -#define __ARM_COMPUTE_NEONEHOT_H__ -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" -namespace arm_compute -{ -// Forward declarations -class ITensor; -/** Basic function to run @ref NEOneHotKernel */ -class NEOneHot : public INESimpleFunctionNoBorder -{ -public: - /** Initialise the kernel's inputs and outputs - * - * @param[in] indices Indices tensor. Supported tensor rank: up to 3. Must be one of the - * following types: U32/S32 - * @param[in] depth The tensor for depth of the one hot dimension. Supported tensor rank: up - * to 3. Must be one of the following types: U32/S32 - * @param[in] on_value On value tensor. Supported tensor rank: only 1. Data type supported: - * U8/S8/U16/S16/F16/U32/S32/F32 - * @param[in] off_value Off value tensor. Supported tensor rank: only 1. Data type supported: - * Same as @p on_value - * @param[out] output Destination tensor. Data type supported: Same as @p on_value - * @param[in] axis (Optional) The axis to fill. Negative values wrap around. Defaults to -1. - * The value must be in range [-indices.rank , indices.rank) - */ - void configure(const ITensor *indices, const ITensor *depth, const ITensor *on_value, - const ITensor *off_value, ITensor *output, int axis = -1); - /** Static function to check if given info will lead to a valid configuration of @ref - * NEOneHotKernel - * - * @param[in] indices Indices tensor info. Supported tensor rank: up to 3. Must be one of the - * following types: U32/S32 - * @param[in] depth The tensor info for depth of the one hot dimension. Supported tensor rank: - * up to 3. Must be one of the following types: U32/S32 - * @param[in] on_value On value tensor info. Supported tensor rank: only 1. Data type supported: - * U8/S8/U16/S16/F16/U32/S32/F32 - * @param[in] off_value Off value tensor info. Supported tensor rank: only 1. Data type supported: - * Same as @p on_value - * @param[out] output Destination tensor info. Data type supported: Same as @p on_value - * @param[in] axis (Optional) The axis to fill. Negative values wrap around. Defaults to -1. - * The value must be in range [-indices.rank , indices.rank) - * - * @return a status - */ - static Status validate(const ITensorInfo *indices, const ITensorInfo *depth, - const ITensorInfo *on_value, const ITensorInfo *off_value, - const ITensorInfo *output, int axis = -1); -}; -} // namespace arm_compute -#endif /* __ARM_COMPUTE_NEONEHOT_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceOperation.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceOperation.h deleted file mode 100644 index 7f764b000..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceOperation.h +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_NEON_REDUCE_OPERATION_H__ -#define __ARM_COMPUTE_NEON_REDUCE_OPERATION_H__ - -#include "arm_compute/runtime/IFunction.h" - -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" -#include "arm_compute/core/TypesEx.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NEReductionOperationEx.h" -#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h" -#include "arm_compute/runtime/Tensor.h" - -namespace arm_compute -{ -class ITensor; - -/** Basic function to perform reduce operation */ -class NEReduceOperation : public IFunction -{ -public: - /** Constructor */ - NEReduceOperation(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - /** Configure kernel - * - * @note Supported tensor rank: up to 4 - * - * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32 - * @param[in] reduction_axis Reduction axis vector. - * @param[in] keep_dims If positive, retains reduced dimensions with length 1. - * @param[out] output Destination tensor. Data type supported: Same as @p input - * @param[in] op Reduce operation to perform. - */ - void configure(ITensor *input, const Coordinates &reduction_axis, bool keep_dims, ITensor *output, - ReduceOperation op); - - /** Static function to check if given info will lead to a valid configuration of @ref - * NEReduceOperation - * - * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32 - * @param[in] reduction_axis Reduction axis vector. - * @param[in] keep_dims If positive, retains reduced dimensions with length 1. - * @param[in] output Destination tensor. Data type supported: Same as @p input - * @param[in] op Reduce operation to perform. - * - * @return A status - */ - static Status validate(const ITensorInfo *input, const Coordinates &reduction_axis, - bool keep_dims, const ITensorInfo *output, ReduceOperation op); - - // Inherited methods overridden: - void run() override; - -private: - MemoryGroup _memory_group; - std::vector<NEReductionOperationEx> _reduction_kernels; - std::vector<Tensor> _reduced_outs; - NEReshapeLayer _reshape; - unsigned int _reduction_ops; - bool _keep_dims; -}; -} // namespace arm_compute -#endif /* __ARM_COMPUTE_NEON_REDUCE_OPERATION_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceSum.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceSum.h deleted file mode 100644 index 48b416923..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceSum.h +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_NEON_REDUCE_SUM_H__ -#define __ARM_COMPUTE_NEON_REDUCE_SUM_H__ - -#include "arm_compute/runtime/IFunction.h" - -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NEReductionOperation.h" -#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h" - -namespace arm_compute -{ -class ITensor; - -/** Basic function to perform reduce operation */ -class NEReduceSum : public IFunction -{ -public: - /** Constructor */ - NEReduceSum(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - /** Configure kernel - * - * @note Supported tensor rank: up to 4 - * - * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32 - * @param[in] reduction_axis Reduction axis vector. - * @param[in] keep_dims If positive, retains reduced dimensions with length 1. - * @param[out] output Destination tensor. Data type supported: Same as @p input - */ - void configure(ITensor *input, const Coordinates &reduction_axis, bool keep_dims, - ITensor *output); - - /** Static function to check if given info will lead to a valid configuration of @ref NEReduceSum - * - * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32 - * @param[in] reduction_axis Reduction axis vector. - * @param[in] keep_dims If positive, retains reduced dimensions with length 1. - * @param[in] output Destination tensor. Data type supported: Same as @p input - * - * @return A status - */ - static Status validate(const ITensorInfo *input, const Coordinates &reduction_axis, - bool keep_dims, const ITensorInfo *output); - - // Inherited methods overridden: - void run() override; - -private: - MemoryGroup _memory_group; - std::vector<NEReductionOperation> _reduction_kernels; - std::vector<Tensor> _reduced_outs; - NEReshapeLayer _reshape; - unsigned int _reduction_ops; - bool _keep_dims; -}; -} // namespace arm_compute -#endif /* __ARM_COMPUTE_NEON_REDUCE_SUM_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReductionOperationEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReductionOperationEx.h deleted file mode 100644 index 1693922b7..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReductionOperationEx.h +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_NEREDUCTIONOPERATIONEX_H__ -#define __ARM_COMPUTE_NEREDUCTIONOPERATIONEX_H__ - -#include "arm_compute/runtime/IFunction.h" - -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" -#include "arm_compute/core/NEON/kernels/NEReductionOperationKernelEx.h" -#include "arm_compute/core/TypesEx.h" - -namespace arm_compute -{ -class ITensor; - -/** Basic function to simulate a reduction operation. This function calls the following NEON - * kernels: - * - * -# @ref NEFillBorderKernel - * -# @ref NEReductionOperationKernelEx - * - */ -class NEReductionOperationEx : public IFunction -{ -public: - /** Default constructor */ - NEReductionOperationEx(); - /** Set the input and output tensors. - * - * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. - * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input. - * @param[in] axis Dimension along which to reduce. - * @param[in] op Reduction operation to perform. - */ - void configure(ITensor *input, ITensor *output, unsigned int axis, ReduceOperation op); - - /** Static function to check if given info will lead to a valid configuration of @ref - * NEReductionOperationEx. - * - * @param[in] input Source tensor info. Data type supported: QASYMM8/F16/F32. - * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p - * input. - * @param[in] axis Dimension along which to reduce. - * @param[in] op Reduction operation to perform. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, - ReduceOperation op); - - // Inherited methods overridden: - void run() override; - -private: - NEReductionOperationKernelEx _reduction_kernel; - NEFillBorderKernel _fill_border_kernel; - size_t _window_split; - int _reduction_axis; -}; -} // namespace arm_compute -#endif /* __ARM_COMPUTE_NEREDUCTIONOPERATIONEX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h deleted file mode 100644 index 24ff5dac9..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h +++ /dev/null @@ -1,172 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_NETRANSPOSECONVLAYER_H__ -#define __ARM_COMPUTE_NETRANSPOSECONVLAYER_H__ - -#include "arm_compute/runtime/CPP/functions/CPPUpsample.h" -#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h" -#include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h" -#include "arm_compute/runtime/NEON/functions/NEReverse.h" - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/Tensor.h" - -#include <memory> - -namespace arm_compute -{ -/** Function to run the deconvolution layer. - * - * Deconvolution Layer is the backward pass of Convolution Layer. First we transform the input - * depending on the stride and pad info and then perfrom a 1x1 - * convolution pass. Input stride defines how many zeroes we should put between each element of the - * input, pad is the amount of padding and finaly a is a user - * specified value where a < stride - 1 that increases the padding top and right of the input image. - * - * The relation between input to output is as follows: - * \f[ - * width\_output = (width\_input - 1) \cdot stride\_x - 2 \cdot padding\_x + kernel\_x - * \f] - * \f[ - * height\_output = (height\_input - 1) \cdot stride\_y - 2 \cdot padding\_y + kernel\_y - * \f] - * - * where - * width is the size of the first input dimension. - * height is the size of the second input dimension. - * width_output is the size of the first output dimension. - * height_output is the size of the second output dimension. - * kernel_x and kernel_y are the convolution sizes in x and y. - * stride_x and stride_y is the input stride of the first and second dimension. - * - * The weights used by Deconvolution are supposed to be the same as the ones used for Convolution. - * Therefore, it will be necessary to use the weights in the - * reverse order to perform an actual convolution. This is achieved by using @ref NEReverse. - * - * This function calls the following NEON kernels/functions: - * - * -# @ref CPPUpsampleEx - * -# @ref NEConvolutionLayer - * -# @ref NEPermute - * -# @ref NEReverse - * - */ -class NETransposeConvLayer : public IFunction -{ -public: - /** Constructor */ - NETransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NETransposeConvLayer(const NETransposeConvLayer &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NETransposeConvLayer &operator=(const NETransposeConvLayer &) = delete; - /** Allow instances of this class to be moved */ - NETransposeConvLayer(NETransposeConvLayer &&) = default; - /** Allow instances of this class to be moved */ - NETransposeConvLayer &operator=(NETransposeConvLayer &&) = default; - /** Default destructor */ - virtual ~NETransposeConvLayer() = default; - - /** Set the input, weights, biases and output tensors. - * - * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an - * optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED. - * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type - * supported: Same as @p input. - * @param[in] bias Optional, ignored if NULL. The biases have one dimension. Data type - * supported: Data types supported: S32 for QASYMM8 and QASYMM8_SIGNED input, F32 for F32 input, F16 - * for F16 input. - * @param[out] output Output tensor. The output has the same number of dimensions as the @p - * input. - * @param[in] info Contains padding and policies to be used in the deconvolution, this is - * decribed in @ref PadStrideInfo. - * @param[in] invalid_right The number of zeros added to right edge of the output. - * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. - * - */ - void configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, - const PadStrideInfo &info, unsigned int invalid_right, - unsigned int invalid_bottom); - /** Static function to check if given info will lead to a valid configuration of @ref - * NETransposeConvLayer - * - * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an - * optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED. - * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data type - * supported: Same as @p input. - * @param[in] bias (Optional) The biases have one dimension. Data type supported: Data types - * supported: S32 for QASYMM8 and QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input. - * @param[in] output Output tensor info. The output has the same number of dimensions as the @p - * input. - * @param[in] info Contains padding and policies to be used in the deconvolution, this is - * decribed in @ref PadStrideInfo. - * @param[in] innvalid_right The number of zeros added to right edge of the output. - * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, - const ITensorInfo *bias, const ITensorInfo *output, - const PadStrideInfo &info, unsigned int invalid_right, - unsigned int invalid_bottom); - - // Inherited methods overridden: - void run() override; - void prepare() override; - -private: - MemoryGroup _memory_group; - NEConvolutionLayer _conv_f; - CPPUpsample _upsample_f; - NEReverse _flip_weights; - Tensor _scaled_output; - Tensor _weights_flipped; - Tensor _flip_axis; - const ITensor *_original_weights; - ITensor *_input; - PadStrideInfo _info; - bool _is_prepared; -}; -} // arm_compute -#endif /* __ARM_COMPUTE_NETRANSPOSECONVLAYER_H__ */ diff --git a/compute/ARMComputeEx/resolve_includes.py b/compute/ARMComputeEx/resolve_includes.py deleted file mode 100755 index f37c2a957..000000000 --- a/compute/ARMComputeEx/resolve_includes.py +++ /dev/null @@ -1,116 +0,0 @@ -# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Copyright (c) 2016, 2017 ARM Limited. -# -# SPDX-License-Identifier: MIT -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to -# deal in the Software without restriction, including without limitation the -# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -# sell copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -import collections -import os.path -import re -import subprocess -import glob - - -def resolve_includes(target, source): - # File collection - FileEntry = collections.namedtuple('FileEntry', 'target_name file_contents') - - # Include pattern - pattern = re.compile("#include \"(.*)\"") - - # Get file contents - files = [] - for i in range(len(source)): - src = source[i] - dst = target[i] - f = open(src) - cts = f.read() - f.close() - contents = cts.splitlines() - entry = FileEntry(target_name=dst, file_contents=contents) - files.append((os.path.basename(src), entry)) - - # Create dictionary of tupled list - files_dict = dict(files) - - # Check for includes (can only be files in the same folder) - final_files = [] - for file in files: - done = False - tmp_file = file[1].file_contents - print(file[1].target_name) - while not done: - file_count = 0 - updated_file = [] - for line in tmp_file: - found = pattern.search(line) - if found: - include_file = found.group(1) - data = files_dict[include_file].file_contents - updated_file.extend(data) - else: - updated_file.append(line) - file_count += 1 - - # Check if all include are replaced. - if file_count == len(tmp_file): - done = True - - # Update temp file - tmp_file = updated_file - - # Append and prepend string literal identifiers and add expanded file to final list - tmp_file.insert(0, "R\"(\n") - tmp_file.append("\n)\"") - entry = FileEntry(target_name=file[1].target_name, file_contents=tmp_file) - final_files.append((file[0], entry)) - - # Write output files - for file in final_files: - with open(file[1].target_name, 'w+') as out_file: - out_file.write("\n".join(file[1].file_contents)) - - -# Generate embed files -cl_files = glob.glob('src/core/CL/cl_kernels/*.cl') -cl_files += glob.glob('src/core/CL/cl_kernels/*.h') - -# DEBUG: print cl files -print("cl_files:") -print(cl_files) - -embed_files = [f + "embed" for f in cl_files] -print("embed_files:") -print(embed_files) - -resolve_includes(embed_files, cl_files) diff --git a/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp b/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp deleted file mode 100644 index 81d0cb70f..000000000 --- a/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp +++ /dev/null @@ -1,369 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Utils.h" - -#include <algorithm> -#include <fstream> -#include <iostream> -#include <utility> -#include <vector> - -using namespace arm_compute; - -const std::map<std::string, std::string> CLKernelLibraryEx::_kernel_program_map = { - // ARMComputeEx kernels - {"arg_min_max_ex_x", "arg_min_max_ex.cl"}, - {"arg_min_max_ex_y", "arg_min_max_ex.cl"}, - {"arg_min_max_ex_z", "arg_min_max_ex.cl"}, - {"arg_min_max_ex_w", "arg_min_max_ex.cl"}, - {"binary_logical_op", "binary_logical_op.cl"}, - {"cast_bool", "cast.cl"}, - {"embedding_lookup", "embedding_lookup.cl"}, - {"gather_ex", "gather_ex.cl"}, - {"gather_ex_1d", "gather_ex.cl"}, - {"gather_ex_1d_out", "gather_ex.cl"}, - {"gemmlowp_mm_midgard_ex", "gemmlowp_ex.cl"}, - {"hashtable_lookup", "hashtable_lookup.cl"}, - {"instance_normalization_ex", "instance_normalization_ex.cl"}, - {"multiply_scale_factor", "multiply_scale_factor.cl"}, - {"neg_tensor", "neg_tensor.cl"}, - {"one_hot", "one_hot.cl"}, - {"one_hot_only_on_value", "one_hot.cl"}, - {"quantization_symm8", "quantization_symm8.cl"}, - {"reduce_min_max", "reduce_operation.cl"}, - {"reduce_sum_mean", "reduce_operation.cl"}, - {"topkv2_init", "topkv2.cl"}, - {"topkv2_find_first_negative", "topkv2.cl"}, - {"topkv2_reorder_negatives", "topkv2.cl"}, - {"topkv2_store", "topkv2.cl"}, - {"radixsort_histogram", "topkv2_radixsort.cl"}, - {"radixsort_scanhistograms", "topkv2_radixsort.cl"}, - {"radixsort_pastehistograms", "topkv2_radixsort.cl"}, - {"radixsort_reorder", "topkv2_radixsort.cl"}, - {"topkv2_quicksort", "topkv2_quicksort.cl"}, - {"scale_factor_symm8", "scale_factor.cl"}, -}; - -const std::map<std::string, std::string> CLKernelLibraryEx::_program_source_map = { -#ifdef EMBEDDED_KERNELS - { - "arg_min_max_ex.cl", -#include "./cl_kernels/arg_min_max_ex.clembed" - }, - { - "cast.cl", -#include "./cl_kernels/cast.clembed" - }, - { - "embedding_lookup.cl", -#include "./cl_kernels/embedding_lookup.clembed" - }, - { - "gather_ex.cl", -#include "./cl_kernels/gather_ex.clembed" - }, - { - "gemmlowp_ex.cl", -#include "./cl_kernels/gemmlowp_ex.clembed" - }, - { - "hashtable_lookup.cl", -#include "./cl_kernels/hashtable_lookup.clembed" - }, - { - "helpers.h", -#include "./cl_kernels/helpers.hembed" - }, - { - "helpers_asymm.h", -#include "./cl_kernels/helpers_asymm.hembed" - }, - { - "instance_normalization_ex.cl", -#include "./cl_kernels/instance_normalization_ex.clembed" - }, - { - "binary_logical_op.cl", -#include "./cl_kernels/binary_logical_op.clembed" - }, - { - "multiply_scale_factor.cl", -#include "./cl_kernels/multiply_scale_factor.clembed" - }, - { - "neg_tensor.cl", -#include "./cl_kernels/neg_tensor.clembed" - }, - { - "one_hot.cl", -#include "./cl_kernels/one_hot.clembed" - }, - { - "quantization_symm8.cl", -#include "./cl_kernels/quantization_symm8.clembed" - }, - { - "reduce_operation.cl", -#include "./cl_kernels/reduce_operation.clembed" - }, - { - "scale_factor.cl", -#include "./cl_kernels/scale_factor.clembed" - }, - { - "topkv2.cl", -#include "./cl_kernels/topkv2.clembed" - }, - { - "topkv2_radixsort.cl", -#include "./cl_kernels/topkv2_radixsort.clembed" - }, - { - "topkv2_quicksort.cl", -#include "./cl_kernels/topkv2_quicksort.clembed" - }, - -#endif /* EMBEDDED_KERNELS */ -}; - -CLKernelLibraryEx::CLKernelLibraryEx() - : _context(), _device(), _kernel_path("."), _programs_map(), _built_programs_map() -{ - opencl_is_available(); // Make sure the OpenCL symbols are initialised *before* the - // CLKernelLibraryEx is built -} - -CLKernelLibraryEx &CLKernelLibraryEx::get() -{ - static CLKernelLibraryEx _kernel_library; - return _kernel_library; -} - -Kernel CLKernelLibraryEx::create_kernel(const std::string &kernel_name, - const StringSet &build_options_set) const -{ - // Find which program contains the kernel - auto kernel_program_it = _kernel_program_map.find(kernel_name); - - if (_kernel_program_map.end() == kernel_program_it) - { - ARM_COMPUTE_ERROR_VAR("Kernel %s not found in the CLKernelLibrary", kernel_name.c_str()); - } - std::string concat_str; - - if (fp16_supported()) - { - concat_str += " -DARM_COMPUTE_OPENCL_FP16_ENABLED=1 "; - } - - if (get_cl_version(_device) == CLVersion::CL20) - { - concat_str += " -cl-std=CL2.0 "; - } - else if (arm_non_uniform_workgroup_supported(_device)) - { - concat_str += " -cl-arm-non-uniform-work-group-size "; - } - else - { - ARM_COMPUTE_ERROR("Non uniform workgroup size is not supported!!"); - } - - // Check if the program has been built before with same build options. - const std::string program_name = kernel_program_it->second; - const std::string build_options = stringify_set(build_options_set) + concat_str; - - const std::string built_program_name = program_name + "_" + build_options; - auto built_program_it = _built_programs_map.find(built_program_name); - - cl::Program cl_program; - - if (_built_programs_map.end() != built_program_it) - { - // If program has been built, retrieve to create kernel from it - cl_program = built_program_it->second; - } - else - { - // Get program - Program program = load_program(program_name); - - // Build program - cl_program = program.build(build_options); - - // Add built program to internal map - _built_programs_map.emplace(built_program_name, cl_program); - } - - // Create and return kernel - return Kernel(kernel_name, cl_program); -} - -void CLKernelLibraryEx::add_built_program(const std::string &built_program_name, - cl::Program program) -{ - _built_programs_map.emplace(built_program_name, program); -} - -bool CLKernelLibraryEx::fp16_supported() const { return ::fp16_supported(_device); } - -bool CLKernelLibraryEx::int64_base_atomics_supported() const -{ - return device_supports_extension(_device, "cl_khr_int64_base_atomics"); -} - -const Program &CLKernelLibraryEx::load_program(const std::string &program_name) const -{ - const auto program_it = _programs_map.find(program_name); - - if (program_it != _programs_map.end()) - { - return program_it->second; - } - - Program program; - -#ifdef EMBEDDED_KERNELS - const auto program_source_it = _program_source_map.find(program_name); - - if (_program_source_map.end() == program_source_it) - { - ARM_COMPUTE_ERROR_VAR("Embedded program for %s does not exist.", program_name.c_str()); - } - - program = Program(_context, program_name, program_source_it->second); -#else /* EMBEDDED_KERNELS */ - // Check for binary - std::string source_name = _kernel_path + program_name; - std::string binary_name = source_name + "bin"; - - if (std::ifstream(binary_name).is_open()) - { - const std::string program_binary = read_file(binary_name, true); - program = Program(_context, _device, program_name, - std::vector<unsigned char>(program_binary.begin(), program_binary.end())); - } - else if (std::ifstream(source_name).is_open()) - { - program = Program(_context, program_name, read_file(source_name, false)); - } - else - { - ARM_COMPUTE_ERROR_VAR("Kernel file %s does not exist.", source_name.c_str()); - } -#endif /* EMBEDDED_KERNELS */ - - // Insert program to program map - const auto new_program = _programs_map.emplace(program_name, std::move(program)); - - return new_program.first->second; -} - -std::string CLKernelLibraryEx::stringify_set(const StringSet &s) const -{ - std::string concat_set; - -#ifndef EMBEDDED_KERNELS - concat_set += "-I" + _kernel_path + " "; -#endif /* EMBEDDED_KERNELS */ - - // Concatenate set - for (const auto &el : s) - { - concat_set += " " + el; - } - - return concat_set; -} - -std::string CLKernelLibraryEx::get_program_source(const std::string &program_name) -{ - const auto program_source_it = _program_source_map.find(program_name); - - if (program_source_it == _program_source_map.end()) - { - ARM_COMPUTE_ERROR_VAR("Embedded program for %s does not exist.", program_name.c_str()); - } - - return program_source_it->second; -} - -size_t CLKernelLibraryEx::max_local_workgroup_size(const cl::Kernel &kernel) const -{ - size_t result; - - size_t err = kernel.getWorkGroupInfo(_device, CL_KERNEL_WORK_GROUP_SIZE, &result); - ARM_COMPUTE_ERROR_ON_MSG( - err != 0, - "clGetKernelWorkGroupInfo failed to return the maximum workgroup size for the kernel"); - ARM_COMPUTE_UNUSED(err); - - return result; -} - -cl::NDRange CLKernelLibraryEx::default_ndrange() const -{ - // GPUTarget _target = get_target_from_device(_device); - cl::Device device = cl::Device::getDefault(); - GPUTarget _target = get_target_from_device(device); - cl::NDRange default_range; - - switch (_target) - { - case GPUTarget::MIDGARD: - case GPUTarget::T600: - case GPUTarget::T700: - case GPUTarget::T800: - default_range = cl::NDRange(128u, 1); - break; - default: - default_range = cl::NullRange; - } - - return default_range; -} - -std::string CLKernelLibraryEx::get_device_version() { return _device.getInfo<CL_DEVICE_VERSION>(); } diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/arg_min_max_ex.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/arg_min_max_ex.cl deleted file mode 100644 index 0a014d15c..000000000 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/arg_min_max_ex.cl +++ /dev/null @@ -1,565 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2019-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "helpers.h" - -#if defined(FLOAT_DATA_TYPE) -#define ISGREATER(x, y) isgreater(x, y) -#define ISLESS(x, y) isless(x, y) -#else // !FLOAT_DATA_TYPE -#if defined(WIDTH) -#define ISGREATER(x, y) (x > y) ? 1 : 0 -#define ISLESS(x, y) (x < y) ? 1 : 0 -#else // !defined(WIDTH) -#define ISGREATER(x, y) \ - select((VEC_DATA_TYPE(DATA_TYPE_SELECT, 16))0, (VEC_DATA_TYPE(DATA_TYPE_SELECT, 16)) - 1, x > y) -#define ISLESS(x, y) \ - select((VEC_DATA_TYPE(DATA_TYPE_SELECT, 16))0, (VEC_DATA_TYPE(DATA_TYPE_SELECT, 16)) - 1, x < y) -#endif // defined(WIDTH) -#endif // defined(FLOAT_DATA_TYPE) - -#if defined(ARG_MAX) -#define CONDITION_TO_USE(x, y) ISGREATER(x, y) -#elif defined(ARG_MIN) -#define CONDITION_TO_USE(x, y) ISLESS(x, y) -#else // !(defined(ARG_MAX) || defined(ARG_MIN)) -#error "Unsupported reduction operation!" -#endif // defined(ARG_MAX) - -#if defined(DATA_TYPE_OUTPUT) && defined(DATA_TYPE_SELECT) -#if defined(WIDTH) -#if defined(ARG_MIN) -#if defined(PREV_OUTPUT) -/** Find index minimum value of a vector - * - * @param[in] input Pointer to the first value. - * - * @return index of the vector. - */ -inline DATA_TYPE_OUTPUT arg_idx_min_prev_out(__global const DATA_TYPE *input, - __global const DATA_TYPE_OUTPUT *prev_res, - const int x_idx) -{ - int end_elem = (x_idx + 1) * 16; - if (end_elem > WIDTH) - { - end_elem = WIDTH - x_idx * 16; - } - DATA_TYPE_OUTPUT res = prev_res[0]; - for (int x_v = 1; x_v < end_elem; ++x_v) - { - res = select(res, prev_res[x_v], *(input + prev_res[x_v]) < *(input + res)); - } - return res; -} -#else // !defined(PREV_OUTPUT) -/** Find index minimum value of a vector - * - * @param[in] input Pointer to the first value. - * - * @return index of the vector. - */ -inline DATA_TYPE_OUTPUT arg_idx_min(__global const DATA_TYPE *input, const int x_idx) -{ -#if WIDTH < 16 - DATA_TYPE_OUTPUT res = 0; - for (DATA_TYPE_OUTPUT x_v = res + 1; x_v < WIDTH; ++x_v) - { - res = select(res, x_v, *(input + x_v) < *(input + res)); - } - return res; -#else // WIDTH >= 16 - int x_elem = x_idx * 16; - const int x_goback = select(0, 16 - WIDTH % 16, x_elem + 16 > WIDTH); - x_elem -= x_goback; - - VEC_DATA_TYPE(DATA_TYPE, 16) - in = vload16(0, input - x_goback); - VEC_DATA_TYPE(DATA_TYPE_OUTPUT, 16) - res = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; - - VEC_DATA_TYPE(DATA_TYPE_SELECT, 8) - idx_sel = (in.s01234567 <= in.s89abcdef); - in.s01234567 = select(in.s89abcdef, in.s01234567, idx_sel); - res.s01234567 = select(res.s89abcdef, res.s01234567, CONVERT(idx_sel, int8)); - - idx_sel.s0123 = (in.s0123 < in.s4567) || - (in.s0123 == in.s4567 && - CONVERT((res.s0123 < res.s4567), VEC_DATA_TYPE(DATA_TYPE_SELECT, 4))); - in.s0123 = select(in.s4567, in.s0123, idx_sel.s0123); - res.s0123 = select(res.s4567, res.s0123, CONVERT(idx_sel.s0123, int4)); - - idx_sel.s01 = - (in.s01 < in.s23) || - (in.s01 == in.s23 && CONVERT((res.s01 < res.s23), VEC_DATA_TYPE(DATA_TYPE_SELECT, 2))); - in.s01 = select(in.s23, in.s01, idx_sel.s01); - res.s01 = select(res.s23, res.s01, CONVERT(idx_sel.s01, int2)); - - idx_sel.s0 = (in.s0 < in.s1) || (in.s0 == in.s1 && CONVERT((res.s0 < res.s1), DATA_TYPE_SELECT)); - res.s0 = select(res.s1, res.s0, CONVERT(idx_sel.s0, int)); - - return res.s0 + x_elem; -#endif // WIDTH < 16 -} -#endif // defined(PREV_OUTPUT) -#endif // defined(ARG_MIN) -#if defined(ARG_MAX) -#if defined(PREV_OUTPUT) -/** Find index maximum value of a vector - * - * @param[in] input Pointer to the first value. - * - * @return index of the vector. - */ -inline DATA_TYPE_OUTPUT arg_idx_max_prev_out(__global const DATA_TYPE *input, - __global const DATA_TYPE_OUTPUT *prev_res, - const int x_idx) -{ - int end_elem = (x_idx + 1) * 16; - if (end_elem > WIDTH) - { - end_elem = WIDTH - x_idx * 16; - } - DATA_TYPE_OUTPUT res = prev_res[0]; - unsigned int res_int = res; - DATA_TYPE_OUTPUT condition_check2; - for (int x_v = 1; x_v < end_elem; ++x_v) - { - int i1 = prev_res[x_v]; - condition_check2 = *(input + i1) > *(input + res_int); - res = select(res, prev_res[x_v], condition_check2); - } - return res; -} -#else // !defined(PREV_OUTPUT) -/** Find index maximum value of a vector - * - * @param[in] input Pointer to the first value. - * - * @return index of the vector. - */ -inline DATA_TYPE_OUTPUT arg_idx_max(__global const DATA_TYPE *input, const int x_idx) -{ -#if WIDTH < 16 - DATA_TYPE_OUTPUT res = 0; - unsigned int i1; - unsigned int i2; - DATA_TYPE_OUTPUT condition_check; - for (DATA_TYPE_OUTPUT x_v = res + 1; x_v < WIDTH; ++x_v) - { - i1 = x_v; - i2 = res; - condition_check = *(input + i1) > *(input + i2); - res = select(res, x_v, condition_check); - } - return res; -#else // WIDTH >= 16 - int x_elem = x_idx * 16; - const int x_goback = select(0, 16 - WIDTH % 16, x_elem + 16 > WIDTH); - x_elem -= x_goback; - - VEC_DATA_TYPE(DATA_TYPE, 16) - in = vload16(0, input - x_goback); - VEC_DATA_TYPE(DATA_TYPE_OUTPUT, 16) - res = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; - - VEC_DATA_TYPE(DATA_TYPE_SELECT, 8) - idx_sel = (in.s01234567 >= in.s89abcdef); - in.s01234567 = select(in.s89abcdef, in.s01234567, idx_sel); - res.s01234567 = select(res.s89abcdef, res.s01234567, CONVERT(idx_sel, int8)); - - idx_sel.s0123 = (in.s0123 > in.s4567) || - (in.s0123 == in.s4567 && - CONVERT((res.s0123 < res.s4567), VEC_DATA_TYPE(DATA_TYPE_SELECT, 4))); - in.s0123 = select(in.s4567, in.s0123, idx_sel.s0123); - res.s0123 = select(res.s4567, res.s0123, CONVERT(idx_sel.s0123, int4)); - - idx_sel.s01 = - (in.s01 > in.s23) || - (in.s01 == in.s23 && CONVERT((res.s01 < res.s23), VEC_DATA_TYPE(DATA_TYPE_SELECT, 2))); - in.s01 = select(in.s23, in.s01, idx_sel.s01); - res.s01 = select(res.s23, res.s01, CONVERT(idx_sel.s01, int2)); - - idx_sel.s0 = (in.s0 > in.s1) || (in.s0 == in.s1 && CONVERT((res.s0 < res.s1), DATA_TYPE_SELECT)); - res.s0 = select(res.s1, res.s0, CONVERT(idx_sel.s0, int)); - - return res.s0 + x_elem; -#endif // WIDTH < 16 -} -#endif // defined(PREV_OUTPUT) -#endif // defined(ARG_MAX) - -/** This kernel performs parallel reduction given an operation on x-axis. - * - * @note In case the results of previous stages are passed the flag PREV_OUTPUT has to be passed - * using -DPREV_OUTPUT - * @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float - * @note The data type of the output must be passed at compile time using -DDATA_TYPE_OUTPUT: e.g. - * -DDATA_TYPE_OUTPUT=uint - * @note The arg_max flag must be passed at compile time using -DARG_MAX if we want to compute the - * ArgMax - * @note The arg_min flag must be passed at compile time using -DARG_MIN if we want to compute the - * ArgMin - * - * @param[in] src_ptr Pointer to the source tensor. Supported data - * types: S32/F16/F32 - * @param[in] src_stride_x Stride of the source tensor in X dimension - * (in bytes) - * @param[in] src_step_x src_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] src_stride_y Stride of the source tensor in Y dimension - * (in bytes) - * @param[in] src_step_y src_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] src_offset_first_element_in_bytes The offset of the first element in the - * source tensor - * @param[in] prev_res_ptr (Optional) Pointer to previous results - * tensor. Supported data types: U32/S32 - * @param[in] prev_res_stride_x (Optional) Stride of the output tensor in X - * dimension (in bytes) - * @param[in] prev_res_step_x (Optional) prev_res_stride_x * number of - * elements along X processed per workitem(in bytes) - * @param[in] prev_res_stride_y (Optional) Stride of the output tensor in Y - * dimension (in bytes) - * @param[in] prev_res_step_y (Optional) prev_res_stride_y * number of - * elements along Y processed per workitem(in bytes) - * @param[in] prev_res_offset_first_element_in_bytes (Optional) The offset of the first element - * in the previous results tensor - * @param[in] partial_res_ptr The local buffer to hold partial result - * values. Supported data types: U32/S32 - * @param[in] partial_res_stride_x Stride of the output tensor in X dimension - * (in bytes) - * @param[in] partial_res_step_x partial_res_stride_x * number of elements - * along X processed per workitem(in bytes) - * @param[in] partial_res_stride_y Stride of the output tensor in Y dimension - * (in bytes) - * @param[in] partial_res_step_y partial_res_stride_y * number of elements - * along Y processed per workitem(in bytes) - * @param[in] partial_res_offset_first_element_in_bytes The offset of the first element in the - * source tensor - * @param[in] local_results Local buffer for storing the partial result - */ -__kernel void arg_min_max_ex_x(IMAGE_DECLARATION(src), -#if defined(PREV_OUTPUT) - IMAGE_DECLARATION(prev_res), -#endif // defined(PREV_OUTPUT) - IMAGE_DECLARATION(partial_res), - __local DATA_TYPE_OUTPUT *local_results) -{ -#if defined(PREV_OUTPUT) - Image src = CONVERT_TO_IMAGE_STRUCT_NO_STEP(src); - Image prev_res = CONVERT_TO_IMAGE_STRUCT(prev_res); -#else // !defined(PREV_OUTPUT) - Image src = CONVERT_TO_IMAGE_STRUCT(src); -#endif // defined(PREV_OUTPUT) - Image partial_res = CONVERT_TO_IMAGE_STRUCT(partial_res); - - unsigned int lsize = get_local_size(0); - unsigned int lid = get_local_id(0); - - const uint x_idx = get_global_id(0); - const uint y_idx = get_global_id(1); - const __global DATA_TYPE *src_in_row = - (const __global DATA_TYPE *)(src_ptr + src_offset_first_element_in_bytes + - y_idx * src_step_y); - - for (unsigned int y = 0; y < get_local_size(1); ++y) - { -#if defined(ARG_MAX) -#if defined(PREV_OUTPUT) - local_results[lid] = arg_idx_max_prev_out( - src_in_row, (__global DATA_TYPE_OUTPUT *)offset(&prev_res, 0, y), x_idx); -#else // !defined(PREV_OUTPUT) - local_results[lid] = arg_idx_max((__global DATA_TYPE *)offset(&src, 0, y), x_idx); -#endif // defined(PREV_OUTPUT) -#else // defined(ARG_MIN) -#if defined(PREV_OUTPUT) - local_results[lid] = arg_idx_min_prev_out( - src_in_row, (__global DATA_TYPE_OUTPUT *)offset(&prev_res, 0, y), x_idx); -#else // !defined(PREV_OUTPUT) - local_results[lid] = arg_idx_min((__global DATA_TYPE *)offset(&src, 0, y), x_idx); -#endif // defined(PREV_OUTPUT) -#endif // defined(ARG_MAX) || defined(ARG_MIN) - - barrier(CLK_LOCAL_MEM_FENCE); - - // Looking for the next highest power of 2 (maximum value of lsize is 8) - unsigned int middle = lsize - 1; - middle |= middle >> 1; - middle |= middle >> 2; - middle += 1; - // Perform parallel reduction - DATA_TYPE_OUTPUT condition_check3; - for (unsigned int i = middle; i > 0; i >>= 1) - { - if (lid < i && lid + i < lsize) - { - DATA_TYPE tmp0 = *(src_in_row + local_results[lid]); - DATA_TYPE tmp1 = *(src_in_row + local_results[lid + i]); -#if defined(ARG_MAX) - condition_check3 = - ((tmp0 == tmp1) && (local_results[lid + i] < local_results[lid])) || (tmp0 < tmp1); - local_results[lid] = select(local_results[lid], local_results[lid + i], condition_check3); -#else // defined(ARG_MIN) - local_results[lid] = select( - local_results[lid], local_results[lid + i], - ((tmp0 == tmp1) && (local_results[lid + i] < local_results[lid])) || (tmp0 > tmp1)); -#endif // defined(ARG_MAX) || defined(ARG_MIN) - } - barrier(CLK_LOCAL_MEM_FENCE); - } - - if (lid == 0) - { - ((__global DATA_TYPE_OUTPUT *)offset(&partial_res, get_group_id(0), y))[0] = local_results[0]; - } - } -} -#endif // defined(WIDTH) - -#if defined(HEIGHT) -/** This kernel performs reduction on y-axis. - * - * @note The input data type must be passed at compile time using -DDATA_TYPE: e.g. - * -DDATA_TYPE=float - * @note The data type of the output must be passed at compile time using -DDATA_TYPE_OUTPUT: e.g. - * -DDATA_TYPE_OUTPUT=uint - * @note The data type of the select results must be passed at compile time using - * -DDATA_TYPE_SELECT: e.g. -DDATA_TYPE_SELECT=int - * @note The height size must be passed at compile time using -DHEIGHT e.g. -DHEIGHT=128 - * - * @param[in] src_ptr Pointer to the source tensor. Supported data - * types: S32/F16/F32 - * @param[in] src_stride_x Stride of the source tensor in X dimension (in - * bytes) - * @param[in] src_step_x src_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] src_stride_y Stride of the source tensor in Y dimension (in - * bytes) - * @param[in] src_step_y src_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source - * tensor - * @param[in] output_ptr The local buffer to hold sumed values. Supported - * data types: U32/S32 - * @param[in] output_stride_x Stride of the output tensor in X dimension (in - * bytes) - * @param[in] output_step_x output_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the output tensor in Y dimension (in - * bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes The offset of the first element in the source - * tensor - */ -__kernel void arg_min_max_ex_y(IMAGE_DECLARATION(src), IMAGE_DECLARATION(output)) -{ - Image src = CONVERT_TO_IMAGE_STRUCT(src); - Image output = CONVERT_TO_IMAGE_STRUCT(output); - - VEC_DATA_TYPE(DATA_TYPE, 16) - res = CONVERT(vload16(0, (__global DATA_TYPE *)offset(&src, 0, 0)), VEC_DATA_TYPE(DATA_TYPE, 16)); - - VEC_DATA_TYPE(DATA_TYPE_OUTPUT, 16) - indx = 0; - for (unsigned int y = 1; y < HEIGHT; ++y) - { - VEC_DATA_TYPE(DATA_TYPE, 16) - in = - CONVERT(vload16(0, (__global DATA_TYPE *)offset(&src, 0, y)), VEC_DATA_TYPE(DATA_TYPE, 16)); - - VEC_DATA_TYPE(DATA_TYPE_OUTPUT, 16) - cond_conv = CONVERT(CONDITION_TO_USE(in, res), VEC_DATA_TYPE(DATA_TYPE_OUTPUT, 16)); - indx = select(indx, y, cond_conv); - res = select(res, in, CONDITION_TO_USE(in, res)); - } - - // Store result - vstore16(indx, 0, (__global DATA_TYPE_OUTPUT *)output.ptr); -} -#endif // defined(HEIGHT) - -#if defined(DEPTH) -/** This kernel performs reduction on z-axis. - * - * @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float - * @note The data type of the select results must be passed at compile time using - * -DDATA_TYPE_SELECT: e.g. -DDATA_TYPE_SELECT=int - * @note The depth size must be passed at compile time using -DDEPTH e.g. -DDEPTH=128 - * - * @param[in] input_ptr Pointer to the source tensor. Supported data - * types: S32/F16/F32 - * @param[in] input_stride_x Stride of the source tensor in X dimension (in - * bytes) - * @param[in] input_step_x input_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] input_stride_y Stride of the source tensor in Y dimension (in - * bytes) - * @param[in] input_step_y input_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] input_stride_z Stride of the source tensor in Z dimension (in - * bytes) - * @param[in] input_step_z input_stride_z * number of elements along Z - * processed per workitem(in bytes) - * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source - * tensor - * @param[in] output_ptr The local buffer to hold sumed values. Supported - * data types: U32/S32 - * @param[in] output_stride_x Stride of the output tensor in X dimension (in - * bytes) - * @param[in] output_step_x output_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the output tensor in Y dimension (in - * bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] output_stride_z Stride of the output tensor in Z dimension (in - * bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z - * processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes The offset of the first element in the source - * tensor - */ -__kernel void arg_min_max_ex_z(TENSOR3D_DECLARATION(input), TENSOR3D_DECLARATION(output)) -{ - Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input); - Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output); - - VEC_DATA_TYPE(DATA_TYPE, 16) - res = CONVERT(vload16(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, 0)), - VEC_DATA_TYPE(DATA_TYPE, 16)); - - VEC_DATA_TYPE(DATA_TYPE_OUTPUT, 16) - indx = 0; - for (DATA_TYPE_OUTPUT z = 1; z < DEPTH; ++z) - { - VEC_DATA_TYPE(DATA_TYPE, 16) - in = CONVERT(vload16(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, z)), - VEC_DATA_TYPE(DATA_TYPE, 16)); - - VEC_DATA_TYPE(DATA_TYPE_OUTPUT, 16) - cond_conv = CONVERT(CONDITION_TO_USE(in, res), VEC_DATA_TYPE(DATA_TYPE_OUTPUT, 16)); - indx = select(indx, z, cond_conv); - res = select(res, in, CONDITION_TO_USE(in, res)); - } - - // Store result - vstore16(indx, 0, (__global DATA_TYPE_OUTPUT *)output.ptr); -} -#endif /* defined(DEPTH) */ - -#if defined(BATCH) && defined(DEPTH) -/** This kernel performs reduction on w-axis. - * - * @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float - * @note The data type of the select results must be passed at compile time using - * -DDATA_TYPE_SELECT: e.g. -DDATA_TYPE_SELECT=int - * @note The batch size must be passed at compile time using -DBATCH e.g. -DBATCH=128 - * @note The depth size must be passed at compile time using -DBATCH e.g. -DDEPTH=128 - * - * @param[in] input_ptr Pointer to the source tensor. Supported data - * types: S32/F16/F32 - * @param[in] input_stride_x Stride of the source tensor in X dimension (in - * bytes) - * @param[in] input_step_x input_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] input_stride_y Stride of the source tensor in Y dimension (in - * bytes) - * @param[in] input_step_y input_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] input_stride_z Stride of the source tensor in Z dimension (in - * bytes) - * @param[in] input_step_z input_stride_z * number of elements along Z - * processed per workitem(in bytes) - * @param[in] input_stride_w Stride of the source tensor in W dimension (in - * bytes) - * @param[in] input_step_w input_stride_w * number of elements along W - * processed per workitem(in bytes) - * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source - * tensor - * @param[in] output_ptr The local buffer to hold sumed values. Supported - * data types: U32/S32 - * @param[in] output_stride_x Stride of the output tensor in X dimension (in - * bytes) - * @param[in] output_step_x output_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the output tensor in Y dimension (in - * bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] output_stride_z Stride of the output tensor in Z dimension (in - * bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z - * processed per workitem(in bytes) - * @param[in] output_stride_w Stride of the output tensor in W dimension (in - * bytes) - * @param[in] output_step_w output_stride_w * number of elements along W - * processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes The offset of the first element in the source - * tensor - */ -__kernel void arg_min_max_ex_w(TENSOR4D_DECLARATION(input), TENSOR4D_DECLARATION(output)) -{ - Tensor4D input = CONVERT_TO_TENSOR4D_STRUCT(input, DEPTH); - Tensor4D output = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH); - - VEC_DATA_TYPE(DATA_TYPE, 16) - res = CONVERT(vload16(0, (__global DATA_TYPE *)tensor4D_offset(&input, 0, 0, 0, 0)), - VEC_DATA_TYPE(DATA_TYPE, 16)); - - VEC_DATA_TYPE(DATA_TYPE_OUTPUT, 16) - indx = 0; - for (DATA_TYPE_OUTPUT w = 1; w < BATCH; ++w) - { - VEC_DATA_TYPE(DATA_TYPE, 16) - in = CONVERT(vload16(0, (__global DATA_TYPE *)tensor4D_offset(&input, 0, 0, 0, w)), - VEC_DATA_TYPE(DATA_TYPE, 16)); - - VEC_DATA_TYPE(DATA_TYPE_OUTPUT, 16) - cond_conv = CONVERT(CONDITION_TO_USE(in, res), VEC_DATA_TYPE(DATA_TYPE_OUTPUT, 16)); - indx = select(indx, w, cond_conv); - res = select(res, in, CONDITION_TO_USE(in, res)); - } - - // Store result - vstore16(indx, 0, (__global DATA_TYPE_OUTPUT *)output.ptr); -} -#endif /* defined(BATCH) && defined(DEPTH) */ -#endif /* defined(DATA_TYPE_OUTPUT) && defined(DATA_TYPE_SELECT) */ diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/binary_logical_op.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/binary_logical_op.cl deleted file mode 100644 index e249663bc..000000000 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/binary_logical_op.cl +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "helpers.h" - -#ifndef VEC_SIZE -#define VEC_SIZE 1 -#endif - -#if defined(OP_CODE) && defined(DATA_TYPE) -/** returns truth value of the two input tensors for BINARY LOGICAL OP. - * where BINARY LOGICAL OP can be AND, OR. - * - * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=uchar - * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. - * e.g. -DVEC_SIZE=16 - * @attention Operation type(code) specifying which operation to perform should be passed as - * preprocessor argument using -DOP_CODE = number. e.g. -DOP_CODE=1 - * - * @param[in] input1_ptr Pointer to the source tensor. - * Supported data types: QASYMM8 - * @param[in] input1_stride_x Stride of the source tensor in X dimension - * (in bytes) - * @param[in] input1_step_x input1_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] input1_stride_y Stride of the source tensor in Y dimension - * (in bytes) - * @param[in] input1_step_y input1_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] input1_stride_z Stride of the source tensor in Z dimension - * (in bytes) - * @param[in] input1_step_z input1_stride_z * number of elements along Z - * processed per workitem(in bytes) - * @param[in] input1_offset_first_element_in_bytes The offset of the first element in the source - * tensor - * @param[in] input2_ptr Pointer to the source tensor. - * Supported data types: QASYMM8 - * @param[in] input2_stride_x Stride of the source tensor in X dimension - * (in bytes) - * @param[in] input2_step_x input2_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] input2_stride_y Stride of the source tensor in Y dimension - * (in bytes) - * @param[in] input2_step_y input2_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] input2_stride_z Stride of the source tensor in Z dimension - * (in bytes) - * @param[in] input2_step_z input2_stride_z * number of elements along Z - * processed per workitem(in bytes) - * @param[in] input2_offset_first_element_in_bytes The offset of the first element in the source - * tensor - * @param[out] output_ptr Pointer to the destination tensor. - * Supported data types: QASYMM8 - * @param[in] output_stride_x Stride of the destination tensor in X dimension - * (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the destination tensor in Y dimension - * (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] output_stride_z Stride of the destination tensor in Z dimension - * (in bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z - * processed per workitem(in bytes) - */ -__kernel void binary_logical_op(TENSOR3D_DECLARATION(input1), TENSOR3D_DECLARATION(input2), - TENSOR3D_DECLARATION(output)) -{ - Tensor3D input1 = CONVERT_TO_TENSOR3D_STRUCT(input1); - Tensor3D input2 = CONVERT_TO_TENSOR3D_STRUCT(input2); - Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output); - -#if OP_CODE == 1 // LOGICAL AND - VSTORE(VEC_SIZE) - (CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input1.ptr) && - VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input2.ptr), - VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)), - 0, (__global DATA_TYPE *)output.ptr); - -#elif OP_CODE == 2 // LOGICAL OR - VSTORE(VEC_SIZE) - (CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input1.ptr) || - VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input2.ptr), - VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)), - 0, (__global DATA_TYPE *)output.ptr); - -#else // OP NOT SUPPORTED - return - -#endif -} -#endif // if defined(OP_CODE) && defined(DATA_TYPE) diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/cast.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/cast.cl deleted file mode 100644 index 3b0a175a4..000000000 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/cast.cl +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "helpers.h" - -/** This function performs a up-scaling depth conversion for boolean type input. - * - * @note The input and output data_types need to be passed at compile time using -DDATA_TYPE_IN and - * -DDATA_TYPE_OUT: - * e.g. -DDATA_TYPE_IN=uchar -DDATA_TYPE_OUT=short - * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. - * -DVEC_SIZE=16 - * @note The integer shift amount value need to be passed at compile time using -DSHIFT: - * e.g. -DSHIFT=7 - * - * @param[in] in_ptr Pointer to the source image. Supported data types: - * U8 - * @param[in] in_stride_x Stride of the source image in X dimension (in - * bytes) - * @param[in] in_step_x in_stride_x * number of elements along X processed - * per workitem(in bytes) - * @param[in] in_stride_y Stride of the source image in Y dimension (in - * bytes) - * @param[in] in_step_y in_stride_y * number of elements along Y processed - * per workitem(in bytes) - * @param[in] in_stride_z Stride of the source tensor in Z dimension (in - * bytes) - * @param[in] in_step_z in_stride_z * number of elements along Z processed - * per workitem(in bytes) - * @param[in] in_offset_first_element_in_bytes The offset of the first element in the source image - * @param[out] out_ptr Pointer to the destination image. Supported data - * types: U8/S8/U16/S16/U32/S32/F16/F32 - * @param[in] out_stride_x Stride of the destination image in X dimension (in - * bytes) - * @param[in] out_step_x out_stride_x * number of elements along X processed - * per workitem(in bytes) - * @param[in] out_stride_y Stride of the destination image in Y dimension (in - * bytes) - * @param[in] out_step_y out_stride_y * number of elements along Y processed - * per workitem(in bytes) - * @param[in] out_stride_z Stride of the source tensor in Z dimension (in - * bytes) - * @param[in] out_step_z out_stride_z * number of elements along Z processed - * per workitem(in bytes) - * @param[in] out_offset_first_element_in_bytes The offset of the first element in the destination - * image - */ -__kernel void cast_bool(TENSOR3D_DECLARATION(in), TENSOR3D_DECLARATION(out)) -{ - // Get pixels pointer - Tensor3D in = CONVERT_TO_TENSOR3D_STRUCT(in); - Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(out); - - // Load data - VEC_DATA_TYPE(DATA_TYPE_IN, VEC_SIZE) - in_data = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_IN *)in.ptr); - - VSTORE(VEC_SIZE) - (CONVERT(in_data & 1, VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE)), 0, - (__global DATA_TYPE_OUT *)out.ptr); -} diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/embedding_lookup.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/embedding_lookup.cl deleted file mode 100644 index 92e5dfbee..000000000 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/embedding_lookup.cl +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "helpers.h" - -#ifndef VEC_SIZE -#define VEC_SIZE 1 -#endif - -#if defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(NUM_DIMS) -/** Perform embedding_lookup of input tensor - * - * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. - * -DDATA_TYPE=short - * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. - * -DVEC_SIZE=16 - * @attention Output tensor depth should be given as a preprocessor argument using - * -DDEPTH_OUT=depth. e.g. -DDEPTH_OUT=16 - * @attention Number of input dimensions are passed as a preprocessor argument using - * -DNUM_DIMS=size, e.g. -DNUM_DIMS=4 - * - * @param[in] input_ptr Pointer to the source tensor. Supported data - * types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 - * @param[in] input_stride_x Stride of the source tensor in X dimension (in - * bytes) - * @param[in] input_step_x input_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] input_stride_y Stride of the source tensor in Y dimension (in - * bytes) - * @param[in] input_step_y input_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] input_stride_z Stride of the source tensor in Z dimension (in - * bytes) - * @param[in] input_step_z input_stride_z * number of elements along Z - * processed per workitem(in bytes) - * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source - * tensor - * @param[in] input_stride_w Stride of the source tensor in W dimension (in - * bytes) - * @param[in] input_step_w output_stride_w * number of elements along W - * processed per workitem(in bytes) - * @param[out] output_ptr Pointer to the destination tensor. Supported - * data types: same as @p input_ptr - * @param[in] output_stride_x Stride of the destination tensor in X dimension - * (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the destination tensor in Y dimension - * (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] output_stride_z Stride of the source tensor in Z dimension (in - * bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z - * processed per workitem(in bytes) - * @param[in] output_stride_w Stride of the source tensor in W dimension (in - * bytes) - * @param[in] output_step_w output_stride_w * number of elements along W - * processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes The offset of the first element in the - * destination tensor - * @param[in] lookups_ptr Pointer to the lookups vector. Supported data - * types: S32 - * @param[in] lookups_stride_x Stride of the lookups vector in X dimension (in - * bytes) - * @param[in] lookups_step_x lookups_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] lookups_offset_first_element_in_bytes The offset of the first element in the lookups - * vector - */ - -__kernel void embedding_lookup(TENSOR4D_DECLARATION(input), TENSOR4D_DECLARATION(output), - VECTOR_DECLARATION(lookups)) -{ - Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH_OUT); - Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, DEPTH_OUT); - - Vector lups = CONVERT_TO_VECTOR_STRUCT_NO_STEP(lookups); - - // lookup ids for based on the tensor dimensions - int lup_id[4] = {0}; - - lup_id[0] = (NUM_DIMS == 1) ? *((__global int *)vector_offset(&lups, get_global_id(0))) - : get_global_id(0); - lup_id[1] = (NUM_DIMS == 2) ? *((__global int *)vector_offset(&lups, get_global_id(1))) - : get_global_id(1); - lup_id[2] = (NUM_DIMS == 3) ? *((__global int *)vector_offset(&lups, get_global_id(2))) - : get_global_id(2) % DEPTH_OUT; - lup_id[3] = (NUM_DIMS == 4) - ? *((__global int *)vector_offset(&lups, get_global_id(2) / DEPTH_OUT)) - : get_global_id(2) / DEPTH_OUT; - - in.ptr += input_offset_first_element_in_bytes + lup_id[0] * input_step_x + - lup_id[1] * input_step_y + lup_id[2] * input_step_z + lup_id[3] * input_step_w; - - VSTORE(VEC_SIZE) - (CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)in.ptr), VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)), 0, - (__global DATA_TYPE *)out.ptr); -} -#endif // defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(NUM_DIMS) diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/gather_ex.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/gather_ex.cl deleted file mode 100644 index 2236021f1..000000000 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/gather_ex.cl +++ /dev/null @@ -1,163 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "helpers.h" - -#if defined(DATA_TYPE) && defined(AXIS) && defined(INDICES_DIM) - -/** Performs the Gather operation along the chosen axis - * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. - * -DDATA_TYPE=short - * @note Axis should be given as a preprocessor argument using -DAXIS=axis. e.g. -DAXIS=1 - * @attention Output tensor depth should be given as a preprocessor argument using - * -DOUTPUT_DIM_Z=size. e.g. -DOUTPUT_DIM_Z=16 - * @attention Input tensor depth should be given as a preprocessor argument using - * -DINPUT_DIM_Z=size. e.g. -DINPUT_DIM_Z=16 - * - * @param[in] input_ptr Pointer to the source tensor. Supported data - * types: U8/S8/U16/S16/U32/S32/F16/F32 - * @param[in] input_stride_x Stride of the source tensor in X dimension (in - * bytes) - * @param[in] input_step_x input_stride_x * number of elements along X - * processed per work item (in bytes) - * @param[in] input_stride_y Stride of the source tensor in Y dimension (in - * bytes) - * @param[in] input_step_y input_stride_y * number of elements along Y - * processed per work item (in bytes) - * @param[in] input_stride_z Stride of the source tensor in Y dimension (in - * bytes) - * @param[in] input_step_z input_stride_z * number of elements along Z - * processed per work item (in bytes) - * @param[in] input_stride_w Stride of the source tensor in Z dimension (in - * bytes) - * @param[in] input_step_w input_stride_w * number of elements along W - * processed per work item (in bytes) - * @param[in] input_offset_first_element_in_bytes Offset of the first element in the source - * tensor - * @param[in] indices_ptr Pointer to the source tensor. Supported data - * types: S32 - * @param[in] indices_stride_x Stride of the source tensor in X dimension (in - * bytes) - * @param[in] indices_step_x indices_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] indices_stride_y Stride of the source tensor in Y dimension (in - * bytes) - * @param[in] indices_step_y indices_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] indices_stride_z Stride of the source tensor in Z dimension (in - * bytes) - * @param[in] indices_step_z indices_stride_z * number of elements along Z - * processed per workitem(in bytes) - * @param[in] indices_offset_first_element_in_bytes The offset of the first element in the - * destination tensor - * @param[out] output_ptr Pointer to the destination tensor. Supported - * data types: same as @p input_ptr - * @param[in] output_stride_x Stride of the destination tensor in X dimension - * (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X - * processed per work item (in bytes) - * @param[in] output_stride_y Stride of the destination tensor in Y dimension - * (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y - * processed per work item (in bytes) - * @param[in] output_stride_z Stride of the destination tensor in Z dimension - * (in bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z - * processed per work item (in bytes) - * @param[in] output_stride_w Stride of the destination tensor in W dimension - * (in bytes) - * @param[in] output_step_w output_stride_w * number of elements along W - * processed per work item (in bytes) - * @param[in] output_offset_first_element_in_bytes Offset of the first element in the destination - * tensor - */ -__kernel void gather_ex(TENSOR4D_DECLARATION(input), TENSOR3D_DECLARATION(indices), - TENSOR4D_DECLARATION(output)) -{ - const int px = get_global_id(0); - const int py = get_global_id(1); - const int pz = get_global_id(2) % OUTPUT_DIM_Z; - const int pw = get_global_id(2) / OUTPUT_DIM_Z; - - const Tensor4D input = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, INPUT_DIM_Z); - const Tensor3D indices = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(indices); - Tensor4D output = CONVERT_TO_TENSOR4D_STRUCT(output, OUTPUT_DIM_Z); - -#if AXIS == 0 -#if INDICES_DIM == 1 - const uint index = *(__global const uint *)tensor3D_offset(&indices, px, 0, 0); - __global const uchar *input_addr = tensor4D_offset(&input, index, py, pz, pw); -#elif INDICES_DIM == 2 - const uint index = *(__global const uint *)tensor3D_offset(&indices, px, py, 0); - __global const uchar *input_addr = tensor4D_offset(&input, index, pz, pw, 0); -#elif INDICES_DIM == 3 - const uint index = *(__global const uint *)tensor3D_offset(&indices, px, py, pz); - __global const uchar *input_addr = tensor4D_offset(&input, index, pw, 0, 0); -#endif -#elif AXIS == 1 -#if INDICES_DIM == 1 - const uint index = *(__global const uint *)tensor3D_offset(&indices, py, 0, 0); - __global const uchar *input_addr = tensor4D_offset(&input, px, index, pz, pw); -#elif INDICES_DIM == 2 - const uint index = *(__global const uint *)tensor3D_offset(&indices, py, pz, 0); - __global const uchar *input_addr = tensor4D_offset(&input, px, index, pw, 0); -#elif INDICES_DIM == 3 - const uint index = *(__global const uint *)tensor3D_offset(&indices, py, pz, pw); - __global const uchar *input_addr = tensor4D_offset(&input, px, index, 0, 0); -#endif -#elif AXIS == 2 -#if INDICES_DIM == 1 - const uint index = *(__global const uint *)tensor3D_offset(&indices, pz, 0, 0); - __global const uchar *input_addr = tensor4D_offset(&input, px, py, index, pw); -#elif INDICES_DIM == 2 - const uint index = *(__global const uint *)tensor3D_offset(&indices, pz, pw, 0); - __global const uchar *input_addr = tensor4D_offset(&input, px, py, index, 0); -#endif -#elif AXIS == 3 -#if INDICES_DIM == 1 - const uint index = *(__global const uint *)tensor3D_offset(&indices, pw, 0, 0); - __global const uchar *input_addr = tensor4D_offset(&input, px, py, pz, index); -#endif -#endif // AXIS - - *(__global DATA_TYPE *)output.ptr = *((__global const DATA_TYPE *)input_addr); -} - -#endif // defined(DATA_TYPE) && defined(AXIS) && defined(INDICES_DIM) diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/gemmlowp_ex.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/gemmlowp_ex.cl deleted file mode 100644 index 80ba73d1d..000000000 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/gemmlowp_ex.cl +++ /dev/null @@ -1,354 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "helpers.h" - -#if defined(NUM_ELEMS_PROCESSED_PER_THREAD_X) && defined(NUM_ELEMS_PROCESSED_PER_THREAD_Y) && \ - defined(COLS_A) -#define VECTOR_CHAR VEC_DATA_TYPE(char, NUM_ELEMS_PROCESSED_PER_THREAD_X) -#define VECTOR_INT VEC_DATA_TYPE(int, NUM_ELEMS_PROCESSED_PER_THREAD_X) -#define VECTOR_FLOAT VEC_DATA_TYPE(float, NUM_ELEMS_PROCESSED_PER_THREAD_X) -/** This OpenCL kernel computes the matrix multiplication between matrix A (src0) and matrix B - * (src1) in case both matrices have not beed reshaped - * - * @attention The number of matrix A columns needs to be passed at compile time using -DCOLS_A - * - * @note In case the input or output have to be reinterpreted as a 3D tensor, the following - * information must be passed at compile time: - * -# REINTERPRET_INPUT_AS_3D: To reinterpret the input as 3D - * -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D - * -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D - * tensor. - * -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor - * (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns matrix A NOT reshaped - * - * @param[in] src0_ptr Pointer to the source matrix. Supported data type: - * QASYMM8 - * @param[in] src0_stride_x Stride of the source matrix in X dimension (in - * bytes) - * @param[in] src0_step_x src_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] src0_stride_y Stride of the source matrix in Y dimension (in - * bytes) - * @param[in] src0_step_y src_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] src0_offset_first_element_in_bytes The offset of the first element in the source - * matrix - * @param[in] src1_ptr Pointer to the source matrix. Supported data type: - * same as @p src0_ptr - * @param[in] src1_stride_x Stride of the source matrix in X dimension (in - * bytes) - * @param[in] src1_step_x src_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] src1_stride_y Stride of the source matrix in Y dimension (in - * bytes) - * @param[in] src1_step_y src_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] src1_offset_first_element_in_bytes The offset of the first element in the source - * matrix - * @param[out] dst_ptr Pointer to the destination matrix Supported data - * type: S32 - * @param[in] dst_stride_x Stride of the destination matrix in X dimension - * (in bytes) - * @param[in] dst_step_x dst_gx_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] dst_stride_y Stride of the destination matrix in Y dimension - * (in bytes) - * @param[in] dst_step_y dst_gx_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination - * matrix - * @param[in] src0_stride_z Stride of the source matrix in Z dimension (in - * bytes) - * @param[in] src1_stride_z Stride of the source matrix in Z dimension (in - * bytes) - * @param[in] dst_stride_z Stride of the destination tensor in Z dimension - * (in bytes) - * @param[in] src_cross_plane_pad (Optional) Bottom paddings in unit of elements for - * the input tensor (only if defined REINTERPRET_INPUT_AS_3D) - * @param[in] dst_cross_plane_pad (Optional) Bottom paddings in unit of elements for - * the output tensor (only if defined REINTERPRET_OUTPUT_AS_3D) - */ -__kernel void gemmlowp_mm_midgard_ex(IMAGE_DECLARATION(src0), IMAGE_DECLARATION(src1), - IMAGE_DECLARATION(dst), uint src0_stride_z, uint src1_stride_z, - uint dst_stride_z -#if defined(REINTERPRET_INPUT_AS_3D) - , - uint src_cross_plane_pad -#endif // REINTERPRET_INPUT_AS_3D -#if defined(REINTERPRET_OUTPUT_AS_3D) - , - uint dst_cross_plane_pad -#endif // REINTERPRET_OUTPUT_AS_3D - ) -{ - int idx = get_global_id(0) * NUM_ELEMS_PROCESSED_PER_THREAD_X; - - // Compute starting address for matrix A and Matrix B - int2 src_addr = ((int2)(src0_offset_first_element_in_bytes, src1_offset_first_element_in_bytes)); - - // Update address for the matrix A - src_addr.s0 += get_global_id(1) * src0_stride_y * NUM_ELEMS_PROCESSED_PER_THREAD_Y; - - // Update address for the matrix B - src_addr.s1 += idx; - -#if defined(REINTERPRET_INPUT_AS_3D) - // Since we load a 2D input tile from a 3D tensor, we need to check when the plane changes across - // the z dimension - // in order to take into account the presence of possible cross plane paddings - // - // | | - // | plane0 | - // | | - // |__________________| - // |******************| - // | cross_plane_pad | - // |******************| - // | | - // | plane1 | - // | | - // |__________________| - - // The plane (zin) is calculated dividing M (get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y) - // by HEIGHT_GEMM3D - uint4 zin = ((uint4)(0, 1, 2, 3) + (uint4)(get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)) / - (uint4)HEIGHT_GEMM3D; - zin = min(DEPTH_GEMM3D - 1, zin); - - // Add offset due to the cross plane paddings - zin *= (src_cross_plane_pad * src0_stride_y); - - // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we - // multiply src0_stride_z by DEPTH_GEMM3D - src_addr.s0 += get_global_id(2) * src0_stride_z * DEPTH_GEMM3D; - -#else // defined(REINTERPRET_INPUT_AS_3D) - - // Add offset for batched GEMM - src_addr.s0 += get_global_id(2) * src0_stride_z; - -#endif // defined(REINTERPRET_INPUT_AS_3D) - -#if defined(MATRIX_B_DEPTH) - // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3 - src_addr.s1 += (get_global_id(2) % MATRIX_B_DEPTH) * src1_stride_z; -#else // defined(MATRIX_B_DEPTH) - src_addr.s1 += get_global_id(2) * src1_stride_z; -#endif // defined(MATRIX_B_DEPTH) - - int end_row_vec_a = src_addr.s0 + COLS_A; - - VECTOR_INT acc0 = 0; -#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1 - VECTOR_INT acc1 = 0; -#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1 -#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2 - VECTOR_INT acc2 = 0; -#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2 -#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3 - VECTOR_INT acc3 = 0; -#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3 -#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4 - VECTOR_INT acc4 = 0; -#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4 - - for (; src_addr.s0 <= (end_row_vec_a - 2); src_addr += (int2)(2, 2 * src1_stride_y)) - { - // Load values from matrix A - char2 a0 = vload2(0, (__global char *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y)); -#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1 - char2 a1 = vload2(0, (__global char *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y)); -#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1 -#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2 - char2 a2 = vload2(0, (__global char *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y)); -#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2 -#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3 - char2 a3 = vload2(0, (__global char *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y)); -#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3 -#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4 - char2 a4 = vload2(0, (__global char *)(src0_ptr + src_addr.s0 + 4 * src0_stride_y)); -#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4 - // Load values from matrix B - VECTOR_CHAR b0 = - VLOAD(NUM_ELEMS_PROCESSED_PER_THREAD_X)(0, (__global char *)(src1_ptr + src_addr.s1)); - VECTOR_CHAR b1 = VLOAD(NUM_ELEMS_PROCESSED_PER_THREAD_X)( - 0, (__global char *)(src1_ptr + src_addr.s1 + src1_stride_y)); - - // Accumulate - acc0 += CONVERT(b0, VECTOR_INT) * (VECTOR_INT)a0.s0; - acc0 += CONVERT(b1, VECTOR_INT) * (VECTOR_INT)a0.s1; -#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1 - acc1 += CONVERT(b0, VECTOR_INT) * (VECTOR_INT)a1.s0; - acc1 += CONVERT(b1, VECTOR_INT) * (VECTOR_INT)a1.s1; -#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1 -#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2 - acc2 += CONVERT(b0, VECTOR_INT) * (VECTOR_INT)a2.s0; - acc2 += CONVERT(b1, VECTOR_INT) * (VECTOR_INT)a2.s1; -#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2 -#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3 - acc3 += CONVERT(b0, VECTOR_INT) * (VECTOR_INT)a3.s0; - acc3 += CONVERT(b1, VECTOR_INT) * (VECTOR_INT)a3.s1; -#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3 -#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4 - acc4 += CONVERT(b0, VECTOR_INT) * (VECTOR_INT)a4.s0; - acc4 += CONVERT(b1, VECTOR_INT) * (VECTOR_INT)a4.s1; -#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4 - } - - for (; src_addr.s0 < end_row_vec_a; src_addr += (int2)(1, src1_stride_y)) - { - // Load values from matrix A - char a0 = *(__global char *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y); -#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1 - char a1 = *(__global char *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y); -#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1 -#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2 - char a2 = *(__global char *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y); -#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2 -#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3 - char a3 = *(__global char *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y); -#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3 -#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4 - char a4 = *(__global char *)(src0_ptr + src_addr.s0 + 4 * src0_stride_y); -#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4 - // Load values from matrix B - VECTOR_CHAR b0 = - VLOAD(NUM_ELEMS_PROCESSED_PER_THREAD_X)(0, (__global char *)(src1_ptr + src_addr.s1)); - - // Accumulate - acc0 += CONVERT(b0, VECTOR_INT) * (VECTOR_INT)a0; -#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1 - acc1 += CONVERT(b0, VECTOR_INT) * (VECTOR_INT)a1; -#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1 -#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2 - acc2 += CONVERT(b0, VECTOR_INT) * (VECTOR_INT)a2; -#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2 -#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3 - acc3 += CONVERT(b0, VECTOR_INT) * (VECTOR_INT)a3; -#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3 -#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4 - acc4 += CONVERT(b0, VECTOR_INT) * (VECTOR_INT)a4; -#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4 - } - - const int z = get_global_id(2); - - // Compute destination address - Image dst = CONVERT_TO_IMAGE_STRUCT(dst); - -#if defined(REINTERPRET_OUTPUT_AS_3D) - // Since we store a 2D output tile in a 3D tensor, we need to check when the plane changes across - // the z dimension - // in order to take into account the presence of possible cross plane paddings - // - // | | - // | plane0 | - // | | - // |__________________| - // |******************| - // | cross_plane_pad | - // |******************| - // | | - // | plane1 | - // | | - // |__________________| - - // The plane (zout) is calculated dividing M (get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y) - // by HEIGHT_GEMM3D - uint8 zout = ((uint8)(0, 1, 2, 3, 4, 5, 6, 7) + - (uint8)(get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)) / - (uint8)HEIGHT_GEMM3D; - zout = min(DEPTH_GEMM3D - 1, zout); - - // Add offset due to the cross plane paddings - zout *= (dst_cross_plane_pad * dst_stride_y); - - // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we - // multiply dst_stride_z by DEPTH_GEMM3D - dst.ptr += z * dst_stride_z * DEPTH_GEMM3D; - - // Store the result - VSTORE(NUM_ELEMS_PROCESSED_PER_THREAD_X) - (CONVERT(acc0, VECTOR_INT), 0, (__global int *)(dst.ptr + 0 * dst_stride_y + zout.s0)); -#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1 - VSTORE(NUM_ELEMS_PROCESSED_PER_THREAD_X) - (CONVERT(acc1, VECTOR_INT), 0, (__global int *)(dst.ptr + 1 * dst_stride_y + zout.s1)); -#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1 -#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2 - VSTORE(NUM_ELEMS_PROCESSED_PER_THREAD_X) - (CONVERT(acc2, VECTOR_INT), 0, (__global int *)(dst.ptr + 2 * dst_stride_y + zout.s2)); -#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2 -#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3 - VSTORE(NUM_ELEMS_PROCESSED_PER_THREAD_X) - (CONVERT(acc3, VECTOR_INT), 0, (__global int *)(dst.ptr + 3 * dst_stride_y + zout.s3)); -#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3 -#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4 - VSTORE(NUM_ELEMS_PROCESSED_PER_THREAD_X) - (CONVERT(acc4, VECTOR_INT), 0, (__global int *)(dst.ptr + 4 * dst_stride_y + zout.s4)); -#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4 - -#else // defined(REINTERPRET_OUTPUT_AS_3D) - // Add offset for batched GEMM - dst.ptr += z * dst_stride_z; - - // Store the result - VSTORE(NUM_ELEMS_PROCESSED_PER_THREAD_X) - (CONVERT(acc0, VECTOR_INT), 0, (__global int *)(dst.ptr + 0 * dst_stride_y)); -#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1 - VSTORE(NUM_ELEMS_PROCESSED_PER_THREAD_X) - (CONVERT(acc1, VECTOR_INT), 0, (__global int *)(dst.ptr + 1 * dst_stride_y)); -#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1 -#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2 - VSTORE(NUM_ELEMS_PROCESSED_PER_THREAD_X) - (CONVERT(acc2, VECTOR_INT), 0, (__global int *)(dst.ptr + 2 * dst_stride_y)); -#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2 -#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3 - VSTORE(NUM_ELEMS_PROCESSED_PER_THREAD_X) - (CONVERT(acc3, VECTOR_INT), 0, (__global int *)(dst.ptr + 3 * dst_stride_y)); -#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3 -#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4 - VSTORE(NUM_ELEMS_PROCESSED_PER_THREAD_X) - (CONVERT(acc4, VECTOR_INT), 0, (__global int *)(dst.ptr + 4 * dst_stride_y)); -#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4 -#endif // defined(REINTERPRET_OUTPUT_AS_3D) -} -#endif // defined(NUM_ELEMS_PROCESSED_PER_THREAD_X) && defined(NUM_ELEMS_PROCESSED_PER_THREAD_Y) && - // defined(COLS_A) diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/hashtable_lookup.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/hashtable_lookup.cl deleted file mode 100644 index a4f7dbd48..000000000 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/hashtable_lookup.cl +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "helpers.h" - -#ifndef VEC_SIZE -#define VEC_SIZE 1 -#endif - -#if defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(NUM_DIMS) -/** Perform hashtable_lookup of input tensor - * - * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. - * -DDATA_TYPE=short - * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. - * -DVEC_SIZE=16 - * @attention Output tensor depth should be given as a preprocessor argument using - * -DDEPTH_OUT=depth. e.g. -DDEPTH_OUT=16 - * @attention Number of input dimensions are passed as a preprocessor argument using - * -DNUM_DIMS=size, e.g. -DNUM_DIMS=4 - * - * @param[in] input_ptr Pointer to the source tensor. Supported data - * types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 - * @param[in] input_stride_x Stride of the source tensor in X dimension (in - * bytes) - * @param[in] input_step_x input_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] input_stride_y Stride of the source tensor in Y dimension (in - * bytes) - * @param[in] input_step_y input_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] input_stride_z Stride of the source tensor in Z dimension (in - * bytes) - * @param[in] input_step_z input_stride_z * number of elements along Z - * processed per workitem(in bytes) - * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source - * tensor - * @param[in] input_stride_w Stride of the source tensor in W dimension (in - * bytes) - * @param[in] input_step_w output_stride_w * number of elements along W - * processed per workitem(in bytes) - * @param[out] output_ptr Pointer to the destination tensor. Supported - * data types: same as @p input_ptr - * @param[in] output_stride_x Stride of the destination tensor in X dimension - * (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the destination tensor in Y dimension - * (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] output_stride_z Stride of the source tensor in Z dimension (in - * bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z - * processed per workitem(in bytes) - * @param[in] output_stride_w Stride of the source tensor in W dimension (in - * bytes) - * @param[in] output_step_w output_stride_w * number of elements along W - * processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes The offset of the first element in the - * destination tensor - * @param[in] lookups_ptr Pointer to the lookups vector. Supported data - * types: S32 - * @param[in] lookups_stride_x Stride of the lookups vector in X dimension (in - * bytes) - * @param[in] lookups_step_x lookups_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] lookups_offset_first_element_in_bytes The offset of the first element in the lookups - * vector - */ -__kernel void hashtable_lookup(TENSOR4D_DECLARATION(input), TENSOR4D_DECLARATION(output), - VECTOR_DECLARATION(lookups)) -{ - Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH_OUT); - Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, DEPTH_OUT); - - Vector lups = CONVERT_TO_VECTOR_STRUCT_NO_STEP(lookups); - - int lup_id[4] = {0}; - - lup_id[0] = (NUM_DIMS == 1) ? *((__global int *)vector_offset(&lups, get_global_id(0))) - : get_global_id(0); - lup_id[1] = (NUM_DIMS == 2) ? *((__global int *)vector_offset(&lups, get_global_id(1))) - : get_global_id(1); - lup_id[2] = (NUM_DIMS == 3) ? *((__global int *)vector_offset(&lups, get_global_id(2))) - : get_global_id(2) % DEPTH_OUT; - lup_id[3] = (NUM_DIMS == 4) - ? *((__global int *)vector_offset(&lups, get_global_id(2) / DEPTH_OUT)) - : get_global_id(2) / DEPTH_OUT; - - if (lup_id[NUM_DIMS - 1] < 0) - { - VSTORE(VEC_SIZE)((VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE))0, 0, (__global DATA_TYPE *)out.ptr); - return; - } - - in.ptr += input_offset_first_element_in_bytes + lup_id[0] * input_step_x + - lup_id[1] * input_step_y + lup_id[2] * input_step_z + lup_id[3] * input_step_w; - - VSTORE(VEC_SIZE) - (CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)in.ptr), VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)), 0, - (__global DATA_TYPE *)out.ptr); -} -#endif // defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(NUM_DIMS) diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers.h b/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers.h deleted file mode 100644 index e07a25ec9..000000000 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers.h +++ /dev/null @@ -1,571 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_HELPER_H -#define ARM_COMPUTE_HELPER_H - -#if defined(ARM_COMPUTE_OPENCL_FP16_ENABLED) && defined(cl_khr_fp16) -#pragma OPENCL EXTENSION cl_khr_fp16 : enable -#endif // defined(ARM_COMPUTE_OPENCL_FP16_ENABLED) && defined(cl_khr_fp16) - -#if defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8) -#pragma OPENCL EXTENSION cl_arm_integer_dot_product_int8 : enable -#endif // defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8) - -#if defined(ARM_COMPUTE_OPENCL_DOT8_ACC_ENABLED) && \ - defined(cl_arm_integer_dot_product_accumulate_int8) -#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_int8 : enable -#endif // defined(ARM_COMPUTE_OPENCL_DOT8_ACC_ENABLED) && - // defined(cl_arm_integer_dot_product_accumulate_int8) - -#if defined(ARM_COMPUTE_DEBUG_ENABLED) && defined(cl_arm_printf) -#pragma OPENCL EXTENSION cl_arm_printf : enable -#endif // defined(ARM_COMPUTE_DEBUG_ENABLED) && defined(cl_arm_printf) - -#define GPU_ARCH_MIDGARD 0x100 -#define GPU_ARCH_BIFROST 0x200 - -/** Concatenate two inputs. - * - * @param[in] a The first input to be concatenated - * @param[in] b The second input to be concatenated - * - * @return The concatenated output - */ -#define CONCAT(a, b) a##b - -/** Expand the given vector - * - * @param[in] x The vector to be expanded - * - * @return The expanded output - */ -#define EXPAND(x) x - -/** Clamp the given value between an upper and lower bound. - * - * @param[in] x The value to be clamped - * @param[in] min_val The lower bound - * @param[in] max_val The upper bound - * - * @return The clamped value. - */ -#define CLAMP(x, min_val, max_val) min(max(x, min_val), max_val) - -/** REVn reverses the given vector whose size is n. - * @name REVn - * - * @param[in] x The vector to be reversed - * - * @return The reversed vector - * @{ - */ -#define REV1(x) ((x)) -#define REV2(x) ((x).s10) -#define REV3(x) ((x).s210) -#define REV4(x) ((x).s3210) -#define REV8(x) ((x).s76543210) -#define REV16(x) ((x).sFEDCBA9876543210) -/** @} */ // end of group REVn - -/** Reverse the given vector. - * @name REVERSE - * - * @param[in] x The vector to be reversed - * @param[in] s The size of the vector - * - * @return The reversed vector - * @{ - */ -#define REVERSE_STR(x, s) REV##s((x)) -#define REVERSE(x, s) REVERSE_STR(x, s) -/** @} */ // end of group REVERSE - -/** Circular-right-shift (rotate-right) the vector of size s by the amount of n. - * @name ROTs_n - * - * @param[in] x The vector to be shifted - * - * @return The shifted vector - * @{ - */ -#define ROT1_0(x) ((x)) - -#define ROT2_0(x) ((x)) -#define ROT2_1(x) ((x).s10) - -#define ROT3_0(x) ((x)) -#define ROT3_1(x) ((x).s201) -#define ROT3_2(x) ((x).s120) - -#define ROT4_0(x) ((x)) -#define ROT4_1(x) ((x).s3012) -#define ROT4_2(x) ((x).s2301) -#define ROT4_3(x) ((x).s1230) - -#define ROT8_0(x) ((x)) -#define ROT8_1(x) ((x).s70123456) -#define ROT8_2(x) ((x).s67012345) -#define ROT8_3(x) ((x).s56701234) -#define ROT8_4(x) ((x).s45670123) -#define ROT8_5(x) ((x).s34567012) -#define ROT8_6(x) ((x).s23456701) -#define ROT8_7(x) ((x).s12345670) - -#define ROT16_0(x) ((x)) -#define ROT16_1(x) ((x).sF0123456789ABCDE) -#define ROT16_2(x) ((x).sEF0123456789ABCD) -#define ROT16_3(x) ((x).sDEF0123456789ABC) -#define ROT16_4(x) ((x).sCDEF0123456789AB) -#define ROT16_5(x) ((x).sBCDEF0123456789A) -#define ROT16_6(x) ((x).sABCDEF0123456789) -#define ROT16_7(x) ((x).s9ABCDEF012345678) -#define ROT16_8(x) ((x).s89ABCDEF01234567) -#define ROT16_9(x) ((x).s789ABCDEF0123456) -#define ROT16_10(x) ((x).s6789ABCDEF012345) -#define ROT16_11(x) ((x).s56789ABCDEF01234) -#define ROT16_12(x) ((x).s456789ABCDEF0123) -#define ROT16_13(x) ((x).s3456789ABCDEF012) -#define ROT16_14(x) ((x).s23456789ABCDEF01) -#define ROT16_15(x) ((x).s123456789ABCDEF0) -/** @} */ // end of group ROTs_n - -/** Circular-right-shift (rotate-right) the given vector by the given amount. - * @name ROTATE - * - * @param[in] x The vector to be shifted - * @param[in] s The size of the vector - * @param[in] n The amount to be shifted - * - * @return The shifted vector - * @{ - */ -#define ROTATE_STR(x, s, n) ROT##s##_##n(x) -#define ROTATE(x, s, n) ROTATE_STR(x, s, n) -/** @} */ // end of group ROTATE - -/** Creates a vector of size n filled with offset values corresponding to the location of each - * element. - * @name V_OFFSn - * - * @param[in] dt The data type of the output vector - * - * @return The vector filled with offset values - * @{ - */ -#define V_OFFS1(dt) (dt)(0) -#define V_OFFS2(dt) (dt)(0, 1) -#define V_OFFS3(dt) (dt)(0, 1, 3) -#define V_OFFS4(dt) (dt)(0, 1, 2, 3) -#define V_OFFS8(dt) (dt)(0, 1, 2, 3, 4, 5, 6, 7) -#define V_OFFS16(dt) (dt)(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) -/** @} */ // end of group V_OFFSn - -/** Create a vector filled with offset values corresponding to the location of each element. - * @name VEC_OFFS - * - * @param[in] dt The data type of the output vector - * @param[in] s The size of the output vector - * - * @return The vector filled with offset values - * @{ - */ -#define VEC_OFFS_STR(dt, s) V_OFFS##s(dt) -#define VEC_OFFS(dt, s) VEC_OFFS_STR(dt, s) -/** @} */ // end of group VEC_OFFS - -#define VLOAD_STR(size) vload##size -#define VLOAD(size) VLOAD_STR(size) - -#define VSTORE_STR(size) vstore##size -#define VSTORE(size) VSTORE_STR(size) - -#define float1 float -#define half1 half -#define char1 char -#define uchar1 uchar -#define short1 short -#define ushort1 ushort -#define int1 int -#define uint1 uint -#define long1 long -#define ulong1 ulong -#define double1 double - -#define vload1(OFFSET, PTR) *(OFFSET + PTR) -#define vstore1(DATA, OFFSET, PTR) *(OFFSET + PTR) = DATA - -// Convert built-in functions with _sat modifier are not supported in floating point so we create -// defines -// without _sat to overcome this issue -#define convert_float_sat convert_float -#define convert_float1_sat convert_float -#define convert_float2_sat convert_float2 -#define convert_float3_sat convert_float3 -#define convert_float4_sat convert_float4 -#define convert_float8_sat convert_float8 -#define convert_float16_sat convert_float16 -#define convert_half_sat convert_float -#define convert_half1_sat convert_half -#define convert_half2_sat convert_half2 -#define convert_half3_sat convert_half3 -#define convert_half4_sat convert_half4 -#define convert_half8_sat convert_half8 -#define convert_half16_sat convert_half16 - -#define convert_float1 convert_float -#define convert_half1 convert_half -#define convert_char1 convert_char -#define convert_uchar1 convert_uchar -#define convert_short1 convert_short -#define convert_ushort1 convert_ushort -#define convert_int1 convert_int -#define convert_uint1 convert_uint -#define convert_long1 convert_long -#define convert_ulong1 convert_ulong -#define convert_double1 convert_double - -#define convert_char1_sat convert_char_sat -#define convert_uchar1_sat convert_uchar_sat -#define convert_short1_sat convert_short_sat -#define convert_ushort1_sat convert_ushort_sat -#define convert_int1_sat convert_int_sat -#define convert_uint1_sat convert_uint_sat -#define convert_long1_sat convert_long_sat -#define convert_ulong1_sat convert_ulong_sat -#define convert_double1_sat convert_double_sat - -#define VEC_DATA_TYPE_STR(type, size) type##size -#define VEC_DATA_TYPE(type, size) VEC_DATA_TYPE_STR(type, size) - -#define CL_VEC_DATA_TYPE_STR(type, size) type##size -#define CL_VEC_DATA_TYPE(type, size) CL_VEC_DATA_TYPE_STR(type, size) - -#define CONVERT_STR(x, type) (convert_##type((x))) -#define CONVERT(x, type) CONVERT_STR(x, type) - -#define CONVERT_SAT_STR(x, type) (convert_##type##_sat((x))) -#define CONVERT_SAT(x, type) CONVERT_SAT_STR(x, type) - -#define CONVERT_SAT_ROUND_STR(x, type, round) (convert_##type##_sat_##round((x))) -#define CONVERT_SAT_ROUND(x, type, round) CONVERT_SAT_ROUND_STR(x, type, round) - -#define VECTOR_DECLARATION(name) \ - __global uchar *name##_ptr, uint name##_stride_x, uint name##_step_x, \ - uint name##_offset_first_element_in_bytes - -#define IMAGE_DECLARATION(name) \ - __global uchar *name##_ptr, uint name##_stride_x, uint name##_step_x, uint name##_stride_y, \ - uint name##_step_y, uint name##_offset_first_element_in_bytes - -#define TENSOR3D_DECLARATION(name) \ - __global uchar *name##_ptr, uint name##_stride_x, uint name##_step_x, uint name##_stride_y, \ - uint name##_step_y, uint name##_stride_z, uint name##_step_z, \ - uint name##_offset_first_element_in_bytes - -#define TENSOR4D_DECLARATION(name) \ - __global uchar *name##_ptr, uint name##_stride_x, uint name##_step_x, uint name##_stride_y, \ - uint name##_step_y, uint name##_stride_z, uint name##_step_z, uint name##_stride_w, \ - uint name##_step_w, uint name##_offset_first_element_in_bytes - -#define CONVERT_TO_VECTOR_STRUCT(name) \ - update_vector_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, \ - name##_step_x) - -#define CONVERT_TO_VECTOR_STRUCT_NO_STEP(name) \ - update_vector_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0) - -#define CONVERT_TO_IMAGE_STRUCT(name) \ - update_image_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, \ - name##_step_x, name##_stride_y, name##_step_y) - -#define CONVERT_TO_IMAGE_STRUCT_NO_STEP(name) \ - update_image_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0, \ - name##_stride_y, 0) - -#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT(name) \ - update_image_from_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, \ - name##_stride_x, name##_step_x, name##_stride_y, \ - name##_step_y, name##_stride_z, name##_step_z) - -#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP(name) \ - update_image_from_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, \ - name##_stride_x, 0, name##_stride_y, 0, name##_stride_z, \ - name##_step_z) - -#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT(name) \ - update_image_from_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, \ - name##_stride_x, name##_step_x, name##_stride_y, \ - name##_step_y, name##_stride_z, name##_step_z) - -#define CONVERT_TO_TENSOR3D_STRUCT(name) \ - update_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, \ - name##_step_x, name##_stride_y, name##_step_y, name##_stride_z, \ - name##_step_z) - -#define CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(name) \ - update_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, \ - 0, name##_stride_y, 0, name##_stride_z, 0) - -#define CONVERT_TO_TENSOR4D_STRUCT(name, mod_size) \ - update_tensor4D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, \ - name##_step_x, name##_stride_y, name##_step_y, name##_stride_z, \ - name##_step_z, name##_stride_w, name##_step_w, mod_size) - -#define CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(name, mod_size) \ - update_tensor4D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, \ - 0, name##_stride_y, 0, name##_stride_z, 0, name##_stride_w, 0, \ - mod_size) - -/** Structure to hold Vector information */ -typedef struct Vector -{ - __global uchar *ptr; /**< Pointer to the starting postion of the buffer */ - int offset_first_element_in_bytes; /**< The offset of the first element in the source image */ - int stride_x; /**< Stride of the image in X dimension (in bytes) */ -} Vector; - -/** Structure to hold Image information */ -typedef struct Image -{ - __global uchar *ptr; /**< Pointer to the starting postion of the buffer */ - int offset_first_element_in_bytes; /**< The offset of the first element in the source image */ - int stride_x; /**< Stride of the image in X dimension (in bytes) */ - int stride_y; /**< Stride of the image in Y dimension (in bytes) */ -} Image; - -/** Structure to hold 3D tensor information */ -typedef struct Tensor3D -{ - __global uchar *ptr; /**< Pointer to the starting postion of the buffer */ - int offset_first_element_in_bytes; /**< The offset of the first element in the source image */ - int stride_x; /**< Stride of the image in X dimension (in bytes) */ - int stride_y; /**< Stride of the image in Y dimension (in bytes) */ - int stride_z; /**< Stride of the image in Z dimension (in bytes) */ -} Tensor3D; - -/** Structure to hold 4D tensor information */ -typedef struct Tensor4D -{ - __global uchar *ptr; /**< Pointer to the starting postion of the buffer */ - int offset_first_element_in_bytes; /**< The offset of the first element in the source image */ - int stride_x; /**< Stride of the image in X dimension (in bytes) */ - int stride_y; /**< Stride of the image in Y dimension (in bytes) */ - int stride_z; /**< Stride of the image in Z dimension (in bytes) */ - int stride_w; /**< Stride of the image in W dimension (in bytes) */ -} Tensor4D; - -/** Wrap vector information into an Vector structure, and make the pointer point at this workitem's - * data. - * - * @param[in] ptr Pointer to the starting postion of the buffer - * @param[in] offset_first_element_in_bytes The offset of the first element in the source vector - * @param[in] stride_x Stride of the vector in X dimension (in bytes) - * @param[in] step_x stride_x * number of elements along X processed per - * workitem(in bytes) - * - * @return An image object - */ -inline Vector update_vector_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, - uint stride_x, uint step_x) -{ - Vector vector = { - .ptr = ptr, - .offset_first_element_in_bytes = offset_first_element_in_bytes, - .stride_x = stride_x, - }; - vector.ptr += vector.offset_first_element_in_bytes + get_global_id(0) * step_x; - return vector; -} - -/** Wrap image information into an Image structure, and make the pointer point at this workitem's - * data. - * - * @param[in] ptr Pointer to the starting postion of the buffer - * @param[in] offset_first_element_in_bytes The offset of the first element in the source image - * @param[in] stride_x Stride of the image in X dimension (in bytes) - * @param[in] step_x stride_x * number of elements along X processed per - * workitem(in bytes) - * @param[in] stride_y Stride of the image in Y dimension (in bytes) - * @param[in] step_y stride_y * number of elements along Y processed per - * workitem(in bytes) - * - * @return An image object - */ -inline Image update_image_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, - uint stride_x, uint step_x, uint stride_y, uint step_y) -{ - Image img = {.ptr = ptr, - .offset_first_element_in_bytes = offset_first_element_in_bytes, - .stride_x = stride_x, - .stride_y = stride_y}; - img.ptr += - img.offset_first_element_in_bytes + get_global_id(0) * step_x + get_global_id(1) * step_y; - return img; -} - -/** Wrap 3D tensor information into an image structure, and make the pointer point at this - * workitem's data. - * - * @param[in] ptr Pointer to the starting postion of the buffer - * @param[in] offset_first_element_in_bytes The offset of the first element in the source image - * @param[in] stride_x Stride of the image in X dimension (in bytes) - * @param[in] step_x stride_x * number of elements along X processed per - * workitem(in bytes) - * @param[in] stride_y Stride of the image in Y dimension (in bytes) - * @param[in] step_y stride_y * number of elements along Y processed per - * workitem(in bytes) - * @param[in] stride_z Stride of the image in Z dimension (in bytes) - * @param[in] step_z stride_z * number of elements along Z processed per - * workitem(in bytes) - * - * @return A 3D tensor object - */ -inline Image update_image_from_tensor3D_workitem_ptr(__global uchar *ptr, - uint offset_first_element_in_bytes, - uint stride_x, uint step_x, uint stride_y, - uint step_y, uint stride_z, uint step_z) -{ - Image img = {.ptr = ptr, - .offset_first_element_in_bytes = offset_first_element_in_bytes, - .stride_x = stride_x, - .stride_y = stride_y}; - img.ptr += img.offset_first_element_in_bytes + get_global_id(0) * step_x + - get_global_id(1) * step_y + get_global_id(2) * step_z; - return img; -} - -/** Wrap 3D tensor information into an tensor structure, and make the pointer point at this - * workitem's data. - * - * @param[in] ptr Pointer to the starting postion of the buffer - * @param[in] offset_first_element_in_bytes The offset of the first element in the source image - * @param[in] stride_x Stride of the image in X dimension (in bytes) - * @param[in] step_x stride_x * number of elements along X processed per - * workitem(in bytes) - * @param[in] stride_y Stride of the image in Y dimension (in bytes) - * @param[in] step_y stride_y * number of elements along Y processed per - * workitem(in bytes) - * @param[in] stride_z Stride of the image in Z dimension (in bytes) - * @param[in] step_z stride_z * number of elements along Z processed per - * workitem(in bytes) - * - * @return A 3D tensor object - */ -inline Tensor3D update_tensor3D_workitem_ptr(__global uchar *ptr, - uint offset_first_element_in_bytes, uint stride_x, - uint step_x, uint stride_y, uint step_y, uint stride_z, - uint step_z) -{ - Tensor3D tensor = {.ptr = ptr, - .offset_first_element_in_bytes = offset_first_element_in_bytes, - .stride_x = stride_x, - .stride_y = stride_y, - .stride_z = stride_z}; - tensor.ptr += tensor.offset_first_element_in_bytes + get_global_id(0) * step_x + - get_global_id(1) * step_y + get_global_id(2) * step_z; - return tensor; -} - -inline Tensor4D update_tensor4D_workitem_ptr(__global uchar *ptr, - uint offset_first_element_in_bytes, uint stride_x, - uint step_x, uint stride_y, uint step_y, uint stride_z, - uint step_z, uint stride_w, uint step_w, uint mod_size) -{ - Tensor4D tensor = {.ptr = ptr, - .offset_first_element_in_bytes = offset_first_element_in_bytes, - .stride_x = stride_x, - .stride_y = stride_y, - .stride_z = stride_z, - .stride_w = stride_w}; - - tensor.ptr += tensor.offset_first_element_in_bytes + get_global_id(0) * step_x + - get_global_id(1) * step_y + (get_global_id(2) % mod_size) * step_z + - (get_global_id(2) / mod_size) * step_w; - return tensor; -} - -/** Get the pointer position of a Vector - * - * @param[in] vec Pointer to the starting position of the buffer - * @param[in] x Relative X position - */ -inline __global const uchar *vector_offset(const Vector *vec, int x) -{ - return vec->ptr + x * vec->stride_x; -} - -/** Get the pointer position of a Image - * - * @param[in] img Pointer to the starting position of the buffer - * @param[in] x Relative X position - * @param[in] y Relative Y position - */ -inline __global uchar *offset(const Image *img, int x, int y) -{ - return img->ptr + x * img->stride_x + y * img->stride_y; -} - -/** Get the pointer position of a Tensor3D - * - * @param[in] tensor Pointer to the starting position of the buffer - * @param[in] x Relative X position - * @param[in] y Relative Y position - * @param[in] z Relative Z position - */ -inline __global const uchar *tensor3D_offset(const Tensor3D *tensor, int x, int y, int z) -{ - return tensor->ptr + x * tensor->stride_x + y * tensor->stride_y + z * tensor->stride_z; -} - -/** Get the pointer position of a Tensor4D - * - * @param[in] tensor Pointer to the starting position of the buffer - * @param[in] x Relative X position - * @param[in] y Relative Y position - * @param[in] z Relative Z position - * @param[in] w Relative W position - */ -inline __global const uchar *tensor4D_offset(const Tensor4D *tensor, int x, int y, int z, int w) -{ - return tensor->ptr + x * tensor->stride_x + y * tensor->stride_y + z * tensor->stride_z + - w * tensor->stride_w; -} - -#endif // _HELPER_H diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers_asymm.h b/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers_asymm.h deleted file mode 100644 index 5f1b3f902..000000000 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers_asymm.h +++ /dev/null @@ -1,578 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_HELPERS_ASYMM_H -#define ARM_COMPUTE_HELPERS_ASYMM_H - -#include "helpers.h" - -/** Convert the given vector with round to nearest even rounding mode - * - * @param[in] x The target to be converted - * @param[in] type The target type - * - * @return The converted vector - */ -#define CONVERT_DOWN_RTE_STR(x, type) (convert_##type##_rte((x))) -#define CONVERT_DOWN_RTE(x, type) CONVERT_DOWN_RTE_STR(x, type) - -/** Quantize a floating-point scalar value to 8-bit asymmetric - * - * @param[in] input Input value to quantize - * @param[in] offset Quantization offset - * @param[in] scale Quantization scale - * - * @return quantized value - */ -inline uchar quantize_qasymm8(float input, float offset, float scale) -{ - float out_f32 = input / scale + offset; - uchar res_u8 = CONVERT_SAT(CONVERT_DOWN_RTE(out_f32, int), uchar); - return res_u8; -} - -/** Dequantize a scalar value from 8-bit asymmetric to floating-point - * - * @param[in] input Input value to quantize - * @param[in] offset Quantization offset - * @param[in] scale Quantization scale - * - * @return quantized value - */ -inline float dequantize_qasymm8(uchar input, float offset, float scale) -{ - return ((float)input - offset) * scale; -} - -/** Dequantize a scalar value from signed 8-bit asymmetric to floating-point - * - * @param[in] input Input value to quantize - * @param[in] offset Quantization offset - * @param[in] scale Quantization scale - * - * @return quantized value - */ -inline float dequantize_qasymm8_signed(char input, float offset, float scale) -{ - return ((float)input - offset) * scale; -} - -/** Quantize a vector of values from floating-point - * - * @param[in] type Output data type. - * @param[in] size Size of vector. - * - * @return quantized values - */ -#define QUANTIZE_IMPL(type, size) \ - inline VEC_DATA_TYPE(type, size) \ - quantize_##type##size(VEC_DATA_TYPE(float, size) input, float offset, float scale) \ - { \ - VEC_DATA_TYPE(float, size) \ - out_f32 = input / (VEC_DATA_TYPE(float, size))(scale) + (VEC_DATA_TYPE(float, size))(offset); \ - VEC_DATA_TYPE(type, size) \ - res = CONVERT_SAT(CONVERT_DOWN_RTE(out_f32, VEC_DATA_TYPE(int, size)), \ - VEC_DATA_TYPE(type, size)); \ - return res; \ - } - -/** Dequantize a vector of values to floating-point - * - * @param[in] type Input data type. - * @param[in] size Size of vector. - * - * @return dequantized values in floating point - */ -#define DEQUANTIZE_IMPL(type, size) \ - inline VEC_DATA_TYPE(float, size) \ - dequantize_##type##size(VEC_DATA_TYPE(type, size) input, float offset, float scale) \ - { \ - return (CONVERT(input, VEC_DATA_TYPE(float, size)) - offset) * scale; \ - } - -/** Correctly-rounded-to-nearest division by a power-of-two. - * - * @param[in] size Size of vector. - * - * @return Correctly-rounded-to-nearest division by a power-of-two. - */ -#define ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(size) \ - inline VEC_DATA_TYPE(int, size) asymm_rounding_divide_by_POW2_##size( \ - VEC_DATA_TYPE(int, size) x, VEC_DATA_TYPE(int, size) exponent) \ - { \ - const VEC_DATA_TYPE(int, size) zero = (VEC_DATA_TYPE(int, size))0; \ - const VEC_DATA_TYPE(int, size) one = (VEC_DATA_TYPE(int, size))1; \ - VEC_DATA_TYPE(int, size) \ - mask = (one << exponent) - one; \ - VEC_DATA_TYPE(int, size) \ - threshold = (mask >> 1) + select(zero, one, x < 0); \ - return (x >> exponent) + select(zero, one, (x & mask) > threshold); \ - } - -/** Product of two numbers, interpreting them as fixed-point values in the interval [-1, 1), - * rounding to the nearest value, and saturating -1 * -1 to the maximum value. - * - * @param[in] size Size of vector. - * - * @return Product of two fixed-point numbers. - */ -#define ASYMM_MULT_IMPL(size) \ - inline VEC_DATA_TYPE(int, size) \ - asymm_mult##size(VEC_DATA_TYPE(int, size) a, VEC_DATA_TYPE(int, size) b) \ - { \ - VEC_DATA_TYPE(int, size) \ - overflow = a == b && a == INT_MIN; \ - VEC_DATA_TYPE(long, size) \ - a_64 = convert_long##size(a); \ - VEC_DATA_TYPE(long, size) \ - b_64 = convert_long##size(b); \ - VEC_DATA_TYPE(long, size) \ - ab_64 = a_64 * b_64; \ - /* Revert COMPMID-907 */ \ - VEC_DATA_TYPE(long, size) \ - mask1 = 1 << 30; \ - VEC_DATA_TYPE(long, size) \ - mask2 = 1 - (1 << 30); \ - VEC_DATA_TYPE(long, size) \ - is_positive_or_zero = ab_64 >= 0; \ - VEC_DATA_TYPE(long, size) \ - nudge = select(mask2, mask1, is_positive_or_zero); \ - VEC_DATA_TYPE(long, size) \ - mask = 1ll << 31; \ - VEC_DATA_TYPE(int, size) \ - ab_x2_high32 = convert_int##size((ab_64 + nudge) / mask); \ - return select(ab_x2_high32, INT_MAX, overflow); \ - } - -/** Calculates \f$ exp(x) \f$ for x in [-1/4, 0). - * - * @param[in] size Size of vector. - * - * @return Result in fixed-point format Q0. - */ -#define ASYMM_EXP_ON_INTERVAL_BETWEEN_NEGATIVE_ONE_QUARTER_AND_0_EXCL_IMPL(size) \ - inline VEC_DATA_TYPE(int, size) \ - asymm_exp_on_interval_between_negative_one_quarter_and_0_excl##size(VEC_DATA_TYPE(int, size) \ - a) \ - { \ - const VEC_DATA_TYPE(int, size) constant_term = 1895147668; \ - const VEC_DATA_TYPE(int, size) constant_1_over_3 = 715827883; \ - const int k_fractional_bits = 31; \ - VEC_DATA_TYPE(int, size) \ - x = a + (1 << (k_fractional_bits - 3)); \ - VEC_DATA_TYPE(int, size) \ - x2 = ASYMM_MULT(x, x, size); \ - VEC_DATA_TYPE(int, size) \ - x3 = ASYMM_MULT(x2, x, size); \ - VEC_DATA_TYPE(int, size) \ - x4 = ASYMM_MULT(x2, x2, size); \ - VEC_DATA_TYPE(int, size) \ - x4_over_4 = ASYMM_ROUNDING_DIVIDE_BY_POW2(x4, 2, size); \ - VEC_DATA_TYPE(int, size) \ - x4_over_24_plus_x3_over_6_plus_x2 = \ - ASYMM_MULT((x4_over_4 + x3), constant_1_over_3, size) + x2; \ - VEC_DATA_TYPE(int, size) \ - x4_over_24_plus_x3_over_6_plus_x2_over_2 = \ - ASYMM_ROUNDING_DIVIDE_BY_POW2(x4_over_24_plus_x3_over_6_plus_x2, 1, size); \ - return constant_term + \ - ASYMM_MULT(constant_term, x + x4_over_24_plus_x3_over_6_plus_x2_over_2, size); \ - } - -/** Each bit of the result is set to the corresponding bit of either then_val or - * else_val depending on whether the corresponding bit of if_mask is set. - * Equivalent to the VBSL instruction in ARM NEON. - * - * @param[in] size Size of vector. - * - * @returns Result contaning bits from @p then_val or from @p else_val depending on corresponding - * bit in @p if_mask is set or not. - */ -#define ASYMM_SELECT_USING_MASK_IMPL(size) \ - inline VEC_DATA_TYPE(int, size) asymm_select_using_mask##size(VEC_DATA_TYPE(int, size) if_mask, \ - VEC_DATA_TYPE(int, size) then_val, \ - VEC_DATA_TYPE(int, size) else_val) \ - { \ - return (if_mask & then_val) ^ (~if_mask & else_val); \ - } - -/** For each element of input vector, the corresponding bits of the result item are set - * if the input item is zero. - * - * @param[in] size Size of vector. - * - * @returns Output vector with bits set when corresponding bit in @p a is zero. - */ -#define ASYMM_MASK_IF_ZERO_IMPL(size) \ - inline VEC_DATA_TYPE(int, size) asymm_mask_if_zero##size(VEC_DATA_TYPE(int, size) a) \ - { \ - const VEC_DATA_TYPE(int, size) all_zeros = 0; \ - const VEC_DATA_TYPE(int, size) all_ones = ~0; \ - return select(all_zeros, all_ones, a == 0); \ - } - -/** For each element of input vector, the corresponding bits of the result item are set - * if the input item is non-zero. - * - * @param[in] size Size of vector. - * - * @returns Output vector with bits set when corresponding bit in @p a is non zero. - */ -#define ASYMM_MASK_IF_NON_ZERO_IMPL(size) \ - inline VEC_DATA_TYPE(int, size) asymm_mask_if_non_zero##size(VEC_DATA_TYPE(int, size) a) \ - { \ - const VEC_DATA_TYPE(int, size) all_zeros = 0; \ - const VEC_DATA_TYPE(int, size) all_ones = ~0; \ - return select(all_zeros, all_ones, a != 0); \ - } - -#define EXP_BARREL_SHIFTER_IMPL(size) \ - inline VEC_DATA_TYPE(int, size) exp_barrel_shifter##size( \ - VEC_DATA_TYPE(int, size) result, int exponent, int fp_multiplier, int k_integer_bits, \ - int k_fractional_bits, VEC_DATA_TYPE(int, size) remainder) \ - { \ - if (k_integer_bits > exponent) \ - { \ - const int k_shift_amount = k_integer_bits > exponent ? k_fractional_bits + exponent : 0; \ - return ASYMM_SELECT_USING_MASK( \ - ASYMM_MASK_IF_NON_ZERO(remainder & (1 << k_shift_amount), size), \ - ASYMM_MULT(result, fp_multiplier, size), result, size); \ - } \ - \ - return result; \ - } - -/** Calculates \f$ exp(x) \f$ for x < 0. - * - * @param[in] size Size of vector. - * - * @return Result in fixed-point format Q0. - */ -#define ASYMM_EXP_ON_NEGATIVE_VALUES_IMPL(size) \ - inline VEC_DATA_TYPE(int, size) \ - asymm_exp_on_negative_values##size(VEC_DATA_TYPE(int, size) a, int k_integer_bits) \ - { \ - const int k_fractional_bits = 31 - k_integer_bits; \ - VEC_DATA_TYPE(int, size) \ - k_one_quarter = 1 << (k_fractional_bits - 2); \ - VEC_DATA_TYPE(int, size) \ - mask = k_one_quarter - 1; \ - VEC_DATA_TYPE(int, size) \ - a_mod_quarter_minus_one_quarter = (a & mask) - k_one_quarter; \ - VEC_DATA_TYPE(int, size) \ - a_mod_quarter_minus_one_quarter_scaled = a_mod_quarter_minus_one_quarter << k_integer_bits; \ - VEC_DATA_TYPE(int, size) \ - result = ASYMM_EXP_ON_INTERVAL_BETWEEN_NEGATIVE_ONE_QUARTER_AND_0_EXCL( \ - a_mod_quarter_minus_one_quarter_scaled, size); \ - VEC_DATA_TYPE(int, size) \ - remainder = a_mod_quarter_minus_one_quarter - a; \ - \ - result = EXP_BARREL_SHIFTER(result, -2, 1672461947, k_integer_bits, k_fractional_bits, \ - remainder, size); \ - result = EXP_BARREL_SHIFTER(result, -1, 1302514674, k_integer_bits, k_fractional_bits, \ - remainder, size); \ - result = EXP_BARREL_SHIFTER(result, +0, 790015084, k_integer_bits, k_fractional_bits, \ - remainder, size); \ - result = EXP_BARREL_SHIFTER(result, +1, 290630308, k_integer_bits, k_fractional_bits, \ - remainder, size); \ - result = EXP_BARREL_SHIFTER(result, +2, 39332535, k_integer_bits, k_fractional_bits, \ - remainder, size); \ - result = EXP_BARREL_SHIFTER(result, +3, 720401, k_integer_bits, k_fractional_bits, remainder, \ - size); \ - result = \ - EXP_BARREL_SHIFTER(result, +4, 242, k_integer_bits, k_fractional_bits, remainder, size); \ - \ - if (k_integer_bits > 5) \ - { \ - const VEC_DATA_TYPE(int, size) clamp = -(1 << (k_fractional_bits + 5)); \ - result = ASYMM_SELECT_USING_MASK(ASYMM_MASK_IF_NON_ZERO(a < clamp, size), 0, result, size); \ - } \ - \ - const VEC_DATA_TYPE(int, size) Q0_one = INT_MAX; \ - return ASYMM_SELECT_USING_MASK(ASYMM_MASK_IF_ZERO(a, size), Q0_one, result, size); \ - } - -/** Calculates the product of a integer value by a power of two, with either a positive exponent - * (equivalent to an arithmetic left shift, saturating) or a negative exponent - * (equivalent to an arithmetic right shift, rounding to nearest). - * - * @param[in] size Size of vector. - * - * @return Arithmetic left or right shift. - */ -#define ASYMM_SATURATING_ROUNDING_MULT_BY_POW2_IMPL(size) \ - inline VEC_DATA_TYPE(int, size) \ - asymm_saturating_rounding_mult_by_pow2##size(VEC_DATA_TYPE(int, size) x, int exponent) \ - { \ - if (exponent < 0) \ - { \ - return ASYMM_ROUNDING_DIVIDE_BY_POW2(x, -exponent, size); \ - } \ - \ - const VEC_DATA_TYPE(int, size) min = INT_MIN; \ - const VEC_DATA_TYPE(int, size) max = INT_MAX; \ - int threshold = ((1 << (31 - exponent)) - 1); \ - VEC_DATA_TYPE(int, size) \ - positive_mask = ASYMM_MASK_IF_NON_ZERO(x > threshold, size); \ - VEC_DATA_TYPE(int, size) \ - negative_mask = ASYMM_MASK_IF_NON_ZERO(x < -threshold, size); \ - VEC_DATA_TYPE(int, size) \ - result = x << exponent; \ - result = ASYMM_SELECT_USING_MASK(positive_mask, max, result, size); \ - result = ASYMM_SELECT_USING_MASK(negative_mask, min, result, size); \ - return result; \ - } - -/** Calculates (a+b)/2, rounded to the nearest integer. - * Equivalent to VRHADD in the ARM NEON instruction set. - * - * @param[in] size Size of vector. - * - * @return (a+b)/2, rounded to the nearest integer. - */ -#define ASYMM_ROUNDING_HALF_SUM_IMPL(size) \ - inline VEC_DATA_TYPE(int, size) \ - asymm_rounding_half_sum##size(VEC_DATA_TYPE(int, size) a, VEC_DATA_TYPE(int, size) b) \ - { \ - VEC_DATA_TYPE(long, size) \ - a64 = convert_long##size(a); \ - VEC_DATA_TYPE(long, size) \ - b64 = convert_long##size(b); \ - VEC_DATA_TYPE(long, size) \ - sum = a64 + b64; \ - const VEC_DATA_TYPE(long, size) one = 1; \ - const VEC_DATA_TYPE(long, size) minus_one = -1; \ - VEC_DATA_TYPE(long, size) \ - sign = select(minus_one, one, sum >= 0); \ - return convert_int##size((sum + sign) / 2); \ - } - -/** Calculates \f$ 1 / (1 + x) \f$ for x in (0, 1). - * - * @param[in] size Size of vector. - * - * @return Result in fixed-point format Q0. - */ -#define ASYMM_ONE_OVER_ONE_PLUS_X_FOR_X_IN_0_1_IMPL(size) \ - inline VEC_DATA_TYPE(int, size) \ - asymm_one_over_one_plus_x_for_x_in_0_1##size(VEC_DATA_TYPE(int, size) a) \ - { \ - const VEC_DATA_TYPE(int, size) Q0_one = INT_MAX; \ - const VEC_DATA_TYPE(int, size) Q2_one = 1 << (31 - 2); \ - VEC_DATA_TYPE(int, size) \ - half_denominator = ASYMM_ROUNDING_HALF_SUM(a, Q0_one, size); \ - const VEC_DATA_TYPE(int, size) Q2_48_over_17 = 1515870810; \ - const VEC_DATA_TYPE(int, size) Q2_neg_32_over_17 = -1010580540; \ - VEC_DATA_TYPE(int, size) \ - x = Q2_48_over_17 + ASYMM_MULT(half_denominator, Q2_neg_32_over_17, size); \ - for (int i = 0; i < 3; i++) \ - { \ - VEC_DATA_TYPE(int, size) \ - half_denominator_times_x = ASYMM_MULT(half_denominator, x, size); \ - VEC_DATA_TYPE(int, size) \ - one_minus_half_denominator_times_x = Q2_one - half_denominator_times_x; \ - VEC_DATA_TYPE(int, size) \ - tmp = ASYMM_MULT(x, one_minus_half_denominator_times_x, size); \ - x = x + ASYMM_SATURATING_ROUNDING_MULT_BY_POW2(tmp, 2, size); \ - } \ - return ASYMM_SATURATING_ROUNDING_MULT_BY_POW2(x, 1, size); \ - } - -/** Considering the integer value as fixed-point, change the number of integer bits and update value - * accordingly. - * - * @param[in] size Size of vector. - * - * @return Rescaled value. - */ -#define ASYMM_RESCALE_IMPL(size) \ - inline VEC_DATA_TYPE(int, size) asymm_rescale##size(VEC_DATA_TYPE(int, size) value, \ - int src_integer_bits, int dst_integer_bits) \ - { \ - int exponent = src_integer_bits - dst_integer_bits; \ - return ASYMM_SATURATING_ROUNDING_MULT_BY_POW2(value, exponent, size); \ - } - -#define QUANTIZE_STR(input, offset, scale, type, size) quantize_##type##size(input, offset, scale) -#define QUANTIZE(input, offset, scale, type, size) QUANTIZE_STR(input, offset, scale, type, size) -#define DEQUANTIZE_STR(input, offset, scale, type, size) \ - dequantize_##type##size(input, offset, scale) -#define DEQUANTIZE(input, offset, scale, type, size) \ - DEQUANTIZE_STR(input, offset, scale, type, size) - -#define ASYMM_ROUNDING_DIVIDE_BY_POW2(x, exponent, size) \ - asymm_rounding_divide_by_POW2_##size(x, exponent) -#define ASYMM_MULT(a, b, size) asymm_mult##size(a, b) -#define ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(x, quantized_multiplier, left_shift, size) \ - ASYMM_MULT(x *((VEC_DATA_TYPE(int, size))(1) << (-left_shift)), quantized_multiplier, size) -#define ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(x, quantized_multiplier, right_shift, size) \ - ASYMM_ROUNDING_DIVIDE_BY_POW2(ASYMM_MULT(x, quantized_multiplier, size), right_shift, size) -#define ASYMM_EXP_ON_INTERVAL_BETWEEN_NEGATIVE_ONE_QUARTER_AND_0_EXCL(a, size) \ - asymm_exp_on_interval_between_negative_one_quarter_and_0_excl##size(a) -#define ASYMM_SELECT_USING_MASK(if_mask, then_val, else_val, size) \ - asymm_select_using_mask##size(if_mask, then_val, else_val) -#define ASYMM_MASK_IF_ZERO(a, size) asymm_mask_if_zero##size(a) -#define ASYMM_MASK_IF_NON_ZERO(a, size) asymm_mask_if_non_zero##size(a) -#define EXP_BARREL_SHIFTER(result, exponent, fp_multiplier, k_integer_bits, k_fractional_bits, \ - remainder, size) \ - exp_barrel_shifter##size(result, exponent, fp_multiplier, k_integer_bits, k_fractional_bits, \ - remainder) -#define ASYMM_EXP_ON_NEGATIVE_VALUES(a, k_integer_bits, size) \ - asymm_exp_on_negative_values##size(a, k_integer_bits) -#define ASYMM_ONE_OVER_ONE_PLUS_X_FOR_X_IN_0_1(a, size) \ - asymm_one_over_one_plus_x_for_x_in_0_1##size(a) -#define ASYMM_SATURATING_ROUNDING_MULT_BY_POW2(x, exponent, size) \ - asymm_saturating_rounding_mult_by_pow2##size(x, exponent) -#define ASYMM_ROUNDING_HALF_SUM(a, b, size) asymm_rounding_half_sum##size(a, b) -#define ASYMM_RESCALE(value, src_integer_bits, dst_integer_bits, size) \ - asymm_rescale##size(value, src_integer_bits, dst_integer_bits) - -#define MULTIPLY_BY_QUANTIZED_MULTIPLIER_IMPL(size) \ - inline VEC_DATA_TYPE(int, size) \ - multiply_by_quantized_multiplier##size(VEC_DATA_TYPE(int, size) input, int qmul, int shift) \ - { \ - const int left_shift = shift > 0 ? shift : 0; \ - const int right_shift = shift > 0 ? 0 : -shift; \ - return ASYMM_ROUNDING_DIVIDE_BY_POW2(ASYMM_MULT(input * (1 << left_shift), qmul, size), \ - right_shift, size); \ - } -#define MULTIPLY_BY_QUANTIZED_MULTIPLIER(input, qmul, shift, size) \ - multiply_by_quantized_multiplier##size(input, qmul, shift) - -QUANTIZE_IMPL(uchar, 1) -QUANTIZE_IMPL(char, 1) -QUANTIZE_IMPL(uint, 1) -QUANTIZE_IMPL(int, 1) -QUANTIZE_IMPL(uchar, 4) -QUANTIZE_IMPL(ushort, 4) -QUANTIZE_IMPL(short, 4) -QUANTIZE_IMPL(uchar, 16) -QUANTIZE_IMPL(char, 16) -QUANTIZE_IMPL(ushort, 16) -QUANTIZE_IMPL(short, 16) -QUANTIZE_IMPL(uint, 16) -QUANTIZE_IMPL(int, 16) - -DEQUANTIZE_IMPL(uchar, 1) -DEQUANTIZE_IMPL(char, 1) -DEQUANTIZE_IMPL(uint, 1) -DEQUANTIZE_IMPL(int, 1) -DEQUANTIZE_IMPL(uchar, 4) -DEQUANTIZE_IMPL(ushort, 4) -DEQUANTIZE_IMPL(short, 4) -DEQUANTIZE_IMPL(uchar, 16) -DEQUANTIZE_IMPL(char, 16) -DEQUANTIZE_IMPL(ushort, 16) -DEQUANTIZE_IMPL(short, 16) -DEQUANTIZE_IMPL(uint, 16) -DEQUANTIZE_IMPL(int, 16) - -ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(1) -ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(2) -ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(4) -ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(8) -ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(16) - -ASYMM_MULT_IMPL(1) -ASYMM_MULT_IMPL(2) -ASYMM_MULT_IMPL(4) -ASYMM_MULT_IMPL(8) -ASYMM_MULT_IMPL(16) - -ASYMM_EXP_ON_INTERVAL_BETWEEN_NEGATIVE_ONE_QUARTER_AND_0_EXCL_IMPL(2) -ASYMM_EXP_ON_INTERVAL_BETWEEN_NEGATIVE_ONE_QUARTER_AND_0_EXCL_IMPL(4) -ASYMM_EXP_ON_INTERVAL_BETWEEN_NEGATIVE_ONE_QUARTER_AND_0_EXCL_IMPL(8) -ASYMM_EXP_ON_INTERVAL_BETWEEN_NEGATIVE_ONE_QUARTER_AND_0_EXCL_IMPL(16) - -ASYMM_SELECT_USING_MASK_IMPL(1) -ASYMM_SELECT_USING_MASK_IMPL(2) -ASYMM_SELECT_USING_MASK_IMPL(4) -ASYMM_SELECT_USING_MASK_IMPL(8) -ASYMM_SELECT_USING_MASK_IMPL(16) - -ASYMM_MASK_IF_ZERO_IMPL(1) -ASYMM_MASK_IF_ZERO_IMPL(2) -ASYMM_MASK_IF_ZERO_IMPL(4) -ASYMM_MASK_IF_ZERO_IMPL(8) -ASYMM_MASK_IF_ZERO_IMPL(16) - -ASYMM_MASK_IF_NON_ZERO_IMPL(1) -ASYMM_MASK_IF_NON_ZERO_IMPL(2) -ASYMM_MASK_IF_NON_ZERO_IMPL(4) -ASYMM_MASK_IF_NON_ZERO_IMPL(8) -ASYMM_MASK_IF_NON_ZERO_IMPL(16) - -EXP_BARREL_SHIFTER_IMPL(2) -EXP_BARREL_SHIFTER_IMPL(4) -EXP_BARREL_SHIFTER_IMPL(8) -EXP_BARREL_SHIFTER_IMPL(16) - -ASYMM_EXP_ON_NEGATIVE_VALUES_IMPL(2) -ASYMM_EXP_ON_NEGATIVE_VALUES_IMPL(4) -ASYMM_EXP_ON_NEGATIVE_VALUES_IMPL(8) -ASYMM_EXP_ON_NEGATIVE_VALUES_IMPL(16) - -ASYMM_SATURATING_ROUNDING_MULT_BY_POW2_IMPL(1) -ASYMM_SATURATING_ROUNDING_MULT_BY_POW2_IMPL(2) -ASYMM_SATURATING_ROUNDING_MULT_BY_POW2_IMPL(4) -ASYMM_SATURATING_ROUNDING_MULT_BY_POW2_IMPL(8) -ASYMM_SATURATING_ROUNDING_MULT_BY_POW2_IMPL(16) - -ASYMM_ROUNDING_HALF_SUM_IMPL(2) -ASYMM_ROUNDING_HALF_SUM_IMPL(4) -ASYMM_ROUNDING_HALF_SUM_IMPL(8) -ASYMM_ROUNDING_HALF_SUM_IMPL(16) - -ASYMM_ONE_OVER_ONE_PLUS_X_FOR_X_IN_0_1_IMPL(2) -ASYMM_ONE_OVER_ONE_PLUS_X_FOR_X_IN_0_1_IMPL(4) -ASYMM_ONE_OVER_ONE_PLUS_X_FOR_X_IN_0_1_IMPL(8) -ASYMM_ONE_OVER_ONE_PLUS_X_FOR_X_IN_0_1_IMPL(16) - -ASYMM_RESCALE_IMPL(1) -ASYMM_RESCALE_IMPL(2) -ASYMM_RESCALE_IMPL(4) -ASYMM_RESCALE_IMPL(8) -ASYMM_RESCALE_IMPL(16) - -MULTIPLY_BY_QUANTIZED_MULTIPLIER_IMPL(1) -MULTIPLY_BY_QUANTIZED_MULTIPLIER_IMPL(2) -MULTIPLY_BY_QUANTIZED_MULTIPLIER_IMPL(4) -MULTIPLY_BY_QUANTIZED_MULTIPLIER_IMPL(8) -MULTIPLY_BY_QUANTIZED_MULTIPLIER_IMPL(16) - -#endif // ARM_COMPUTE_HELPERS_ASYMM_H diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/instance_normalization_ex.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/instance_normalization_ex.cl deleted file mode 100644 index 014842680..000000000 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/instance_normalization_ex.cl +++ /dev/null @@ -1,267 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "helpers.h" - -#if defined(VEC_SIZE) && defined(DATA_TYPE) && defined(EPSILON) && defined(DIM_X) && \ - defined(DIM_Y) && defined(DIM_Z) -/** This function normalizes the input 2D tensor across the first dimension with respect to mean and - * standard deviation of the same dimension. - * - * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. - * -DVEC_SIZE=16 - * @attention Data type should be passed using the -DDATA_TYPE=data_type compile flag, e.g. - * -DDATA_TYPE=float - * @attention Normalization epsilon parameter should be given as a preprocessor argument with - * -DEPSILON=value. e.g. -DEPSILON=0.001f - * @attention Dimensions X, Y, and Z should be given as a preprocessor argument with -DDIM_X=value, - * -DDIM_Y=value, -DDIM_Z=value. e.g. -DDIM_X=6, -DDIM_Y=2, -DDIM_Z=7 - * - * @param[in] input_ptr Pointer to the first source tensor. Supported - * data types: F16/F32 - * @param[in] input_stride_x Stride of the first source tensor in X dimension - * (in bytes) - * @param[in] input_step_x input_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] input_stride_y Stride of the first source tensor in Y dimension - * (in bytes) - * @param[in] input_step_y input_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] input_stride_z Stride of the first source tensor in Z dimension - * (in bytes) - * @param[in] input_step_z input_stride_z * number of elements along Z - * processed per workitem(in bytes) - * @param[in] input_offset_first_element_in_bytes The offset of the first element in the first - * source tensor - * @param[out] output_ptr (Optional) Pointer to the destination tensor. - * Supported data types: same as @p input_ptr - * @param[in] output_stride_x (Optional) Stride of the destination tensor in X - * dimension (in bytes) - * @param[in] output_step_x (Optional) output_stride_x * number of elements - * along X processed per workitem(in bytes) - * @param[in] output_stride_y (Optional) Stride of the destination tensor in Y - * dimension (in bytes) - * @param[in] output_step_y (Optional) output_stride_y * number of elements - * along Y processed per workitem(in bytes) - * @param[in] output_stride_z (Optional) Stride of the destination tensor in Z - * dimension (in bytes) - * @param[in] output_step_z (Optional) output_stride_z * number of elements - * along Z processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes (Optional) The offset of the first element in - * the destination tensor - * @param[in] gamma_ptr (Optional) Pointer to the gamma tensor. - * Supported data types: same as @p input_ptr - * @param[in] gamma_stride_x (Optional) Stride of the gamma tensor in X - * dimension (in bytes) - * @param[in] gamma_step_x (Optional) output_stride_x * number of elements - * along X processed per workitem(in bytes) - * @param[in] gamma_offset_first_element_in_bytes (Optional) The offset of the first element in - * the gamma tensor - * @param[in] beta_ptr (Optional) Pointer to the beta tensor. Supported - * data types: same as @p input_ptr - * @param[in] beta_stride_x (Optional) Stride of the beta tensor in X - * dimension (in bytes) - * @param[in] beta_step_x (Optional) output_stride_x * number of elements - * along X processed per workitem(in bytes) - * @param[in] beta_offset_first_element_in_bytes (Optional) The offset of the first element in - * the beta tensor - */ -__kernel void instance_normalization_ex(TENSOR4D_DECLARATION(input), -#ifndef IN_PLACE - TENSOR4D_DECLARATION(output) -#endif /* IN_PLACE */ -#ifdef GAMMA - , - VECTOR_DECLARATION(gamma) -#endif // GAMMA -#ifdef BETA - , - VECTOR_DECLARATION(beta) -#endif // BETA - ) -{ - Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, 0); -#ifndef IN_PLACE - Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(output, 0); -#endif /* IN_PLACE */ - - float sum = 0.f; - float sum_sq = 0.f; - -#if defined(NHWC) - - const int ch = get_global_id(0); // Current channel - const int batch = get_global_id(2); // Current batch - const int elements_plane = DIM_Y * DIM_Z; - - for (int i_w = 0; i_w < DIM_Y; ++i_w) - { - for (int i_h = 0; i_h < DIM_Z; ++i_h) - { - float data = (float)*((__global DATA_TYPE *)tensor4D_offset(&in, ch, i_w, i_h, batch)); - sum += data; - sum_sq += data * data; - } - } - -#else // !defined(NHWC) - const int ch = get_global_id(2) % DIM_Z; // Current channel - const int batch = get_global_id(2) / DIM_Z; // Current batch - const int elements_plane = DIM_X * DIM_Y; - - VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) - part_sum = 0.f; - VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) - part_sum_sq = 0.f; - // Calculate partial sum - for (int y = 0; y < DIM_Y; ++y) - { - int x = 0; - for (; x <= (DIM_X - VEC_SIZE); x += VEC_SIZE) - { - // Load data - VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) - data = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)tensor4D_offset(&in, x, y, ch, batch)); - part_sum += data; - part_sum_sq += data * data; - } - // Left-overs loop - for (; x < DIM_X; ++x) - { - DATA_TYPE data = *((__global DATA_TYPE *)tensor4D_offset(&in, x, y, ch, batch)); - part_sum.s0 += data; - part_sum_sq.s0 += data * data; - } - } -// Perform reduction -#if VEC_SIZE > 8 - part_sum.s01234567 += part_sum.s89abcdef; - part_sum_sq.s01234567 += part_sum_sq.s89abcdef; -#endif // VEC_SIZE > 8 -#if VEC_SIZE > 4 - part_sum.s0123 += part_sum.s4567; - part_sum_sq.s0123 += part_sum_sq.s4567; -#endif // VEC_SIZE > 4 -#if VEC_SIZE > 2 - part_sum.s01 += part_sum.s23; - part_sum_sq.s01 += part_sum_sq.s23; -#endif // VEC_SIZE > 2 - part_sum.s0 += part_sum.s1; - part_sum_sq.s0 += part_sum_sq.s1; - - sum = (float)part_sum.s0; - sum_sq = (float)part_sum_sq.s0; - -#endif // defined(NHWC) - - const float mean_float = (sum / elements_plane); - const DATA_TYPE mean = (DATA_TYPE)mean_float; - const float var_float = (sum_sq / elements_plane) - (mean_float * mean_float); -#if defined(GAMMA) - const float multip_float = *((__global DATA_TYPE *)gamma_ptr + ch) / sqrt(var_float + EPSILON); - const DATA_TYPE multip = (DATA_TYPE)multip_float; -#else // !defined(GAMMA) - const DATA_TYPE multip = (DATA_TYPE)0; -#endif // defined(GAMMA) -#if defined(BETA) - const DATA_TYPE beta = *((__global DATA_TYPE *)beta_ptr + ch); -#else // !defined(BETA) - const DATA_TYPE beta = 0; -#endif // defined(BETA) - -#if defined(NHWC) - - for (int i_w = 0; i_w < DIM_Y; ++i_w) - { - for (int i_h = 0; i_h < DIM_Z; ++i_h) - { - __global DATA_TYPE *input_address = - (__global DATA_TYPE *)tensor4D_offset(&in, ch, i_w, i_h, batch); -#ifdef IN_PLACE - __global DATA_TYPE *output_address = input_address; -#else /* !IN_PLACE */ - __global DATA_TYPE *output_address = - (__global DATA_TYPE *)tensor4D_offset(&out, ch, i_w, i_h, batch); -#endif /* IN_PLACE */ - *(output_address) = (*(input_address)-mean) * multip + beta; - } - } - -#else // !defined(NHWC) - for (int y = 0; y < DIM_Y; ++y) - { - int x = 0; - for (; x <= (DIM_X - VEC_SIZE); x += VEC_SIZE) - { - __global DATA_TYPE *input_address = - (__global DATA_TYPE *)tensor4D_offset(&in, x, y, ch, batch); -#ifdef IN_PLACE - __global DATA_TYPE *output_address = input_address; -#else /* !IN_PLACE */ - __global DATA_TYPE *output_address = - (__global DATA_TYPE *)tensor4D_offset(&out, x, y, ch, batch); -#endif /* IN_PLACE */ - - VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) - data = VLOAD(VEC_SIZE)(0, input_address); - - VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) - res = (data - mean) * multip + beta; - VSTORE(VEC_SIZE) - (res, 0, output_address); - } - // Left-overs loop - for (; x < DIM_X; ++x) - { - __global DATA_TYPE *input_address = - (__global DATA_TYPE *)tensor4D_offset(&in, x, y, ch, batch); -#ifdef IN_PLACE - __global DATA_TYPE *output_address = input_address; -#else /* !IN_PLACE */ - __global DATA_TYPE *output_address = - (__global DATA_TYPE *)tensor4D_offset(&out, x, y, ch, batch); -#endif /* IN_PLACE */ - *(output_address) = (*(input_address)-mean) * multip + beta; - } - } -#endif // defined(NHWC) -} -#endif /* defined(VEC_SIZE) && defined(DATA_TYPE) && defined(EPSILON) && defined(DIM_X) && \ - defined(DIM_Y) && defined(DIM_Z) */ diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/multiply_scale_factor.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/multiply_scale_factor.cl deleted file mode 100644 index 3943fc4c2..000000000 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/multiply_scale_factor.cl +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "helpers.h" - -#if defined(VEC_SIZE) && defined(DATA_TYPE) - -/** This performs to multiply input by scale_factor. - * - * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. - * -DDATA_TYPE=float - * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. - * -DVEC_SIZE=16 - * @note Quantization scale of input tensor is passed in with -DSCALE=scale. - * - * @param[in] input_ptr Pointer to the source tensor. Supported data - * types: S8 - * @param[in] input_stride_x Stride of the source tensor in X dimension (in - * bytes) - * @param[in] input_step_x input_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] input_stride_y Stride of the source tensor in Y dimension (in - * bytes) - * @param[in] input_step_y input_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source - * tensor - * @param[in] scale_ptr Pointer to the source tensor. Supported data - * types: S32 - * @param[in] scale_stride_x Stride of the source tensor in X dimension (in - * bytes) - * @param[in] scale_step_x scale_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] scale_offset_first_element_in_bytes The offset of the first element in the scale - * tensor - * @param[out] output_ptr Pointer to the destination tensor. Supported - * data types: F16/F32 - * @param[in] output_stride_x Stride of the destination tensor in X dimension - * (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the destination tensor in Y dimension - * (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes The offset of the first element in the - * destination tensor - */ -__kernel void multiply_scale_factor(IMAGE_DECLARATION(input), VECTOR_DECLARATION(scale), - IMAGE_DECLARATION(output), float multiplier) -{ - // Get pixels pointer - Image input = CONVERT_TO_IMAGE_STRUCT(input); - Image output = CONVERT_TO_IMAGE_STRUCT(output); - -#if defined(VEC_SIZE) && defined(LAST_ACCESSED_X) - // Check if access on width gets out of bounds - // If it does shift access vector to access elements within bounds - const int xi = (int)(get_global_id(0) * VEC_SIZE); - input.ptr -= max(xi - (int)LAST_ACCESSED_X, 0) * input_stride_x; - output.ptr -= max(xi - (int)LAST_ACCESSED_X, 0) * output_stride_x; - - // Load data - VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) - val = CONVERT(VLOAD(VEC_SIZE)(0, (__global int *)input.ptr), VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)); - - // Create scale vector - VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) - vscale = *(((__global DATA_TYPE *)(scale_ptr)) + get_global_id(1)); - - // Dequantize - vscale *= (DATA_TYPE)(multiplier); - val *= vscale; - - // Store result - VSTORE(VEC_SIZE) - (val, 0, (__global DATA_TYPE *)output.ptr); -#else // !defined(VEC_SIZE) || !defined(LAST_ACCESSED_X) - *((__global DATA_TYPE *)(output.ptr)) = - ((DATA_TYPE)(*((__global int *)(input.ptr)))) * - *(((__global DATA_TYPE *)(scale_ptr)) + get_global_id(1)) * (DATA_TYPE)(multiplier); -#endif // defined(VEC_SIZE) && defined(LAST_ACCESSED_X) -} - -#endif // defined(VEC_SIZE) && defined(DATA_TYPE) diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/neg_tensor.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/neg_tensor.cl deleted file mode 100644 index 15c16f80c..000000000 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/neg_tensor.cl +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "helpers.h" - -#ifndef VEC_SIZE -#define VEC_SIZE 1 -#endif - -#if defined(DATA_TYPE) -/** Performs a negation of input tensor. - * - * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. - * -DVEC_SIZE=16 - * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float - * - * @param[in] in_ptr Pointer to the source image. Supported data types: - * S16/S32/F16/F32. - * @param[in] in_stride_x Stride of the source image in X dimension (in - * bytes) - * @param[in] in_step_x in_stride_x * number of elements along X processed - * per work item (in bytes) - * @param[in] in_offset_first_element_in_bytes Offset of the first element in the source image - * @param[out] out_ptr Pointer to the destination image. Supported data - * types: same as @p input_ptr - * @param[in] out_stride_x Stride of the destination image in X dimension (in - * bytes) - * @param[in] out_step_x out_stride_x * number of elements along X processed - * per work item (in bytes) - * @param[in] out_offset_first_element_in_bytes Offset of the first element in the destination - * image - * - */ -__kernel void neg_tensor(TENSOR3D_DECLARATION(input), TENSOR3D_DECLARATION(output)) -{ - Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input); - Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output); - - VSTORE(VEC_SIZE) - (-VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input.ptr), 0, (__global DATA_TYPE *)output.ptr); -} -#endif // defined(DATA_TYPE) diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/one_hot.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/one_hot.cl deleted file mode 100644 index c274aba62..000000000 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/one_hot.cl +++ /dev/null @@ -1,222 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "helpers.h" - -#if defined(DATA_TYPE) && defined(AXIS) && defined(DEPTH) && defined(OUTPUT_DIM_Z) - -/** Performs the OneHot operation along the chosen axis - * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. - * -DDATA_TYPE=short - * @note Axis should be given as a preprocessor argument using -DAXIS=axis. e.g. -DAXIS=1 - * @attention Output tensor depth should be given as a preprocessor argument using - * -DOUTPUT_DIM_Z=size. e.g. -DOUTPUT_DIM_Z=16 - * @attention Input tensor depth should be given as a preprocessor argument using - * -DINPUT_DIM_Z=size. e.g. -DINPUT_DIM_Z=16 - * - * - * @param[in] indices_ptr Pointer to the source tensor. Supported data - * types: S32 - * @param[in] indices_stride_x Stride of the source tensor in X dimension - * (in bytes) - * @param[in] indices_step_x indices_stride_x * number of elements along - * X processed per work item (in bytes) - * @param[in] indices_stride_y Stride of the source tensor in Y dimension - * (in bytes) - * @param[in] indices_step_y indices_stride_y * number of elements along - * Y processed per work item (in bytes) - * @param[in] indices_stride_z Stride of the source tensor in Y dimension - * (in bytes) - * @param[in] indices_step_z indices_stride_z * number of elements along - * Z processed per work item (in bytes) - * @param[in] indices_offset_first_element_in_bytes Offset of the first element in the source - * tensor - * @param[in] on_value_ptr Pointer to the on_value vector. Supported - * data types: U8/S8/U16/S16/F16/U32/S32/F32. - * @param[in] on_value_stride_x Stride of the on_value vector in X dimension - * (in bytes) - * @param[in] on_value_step_x on_value_stride_x * number of elements along - * X processed per work item (in bytes) - * @param[in] on_value_offset_first_element_in_bytes Offset of the first element in the on_value - * vector - * @param[in] off_value_ptr Pointer to the off_value vector. Supported - * data types: Same as @p on_value. - * @param[in] off_value_stride_x Stride of the off_value vector in X - * dimension (in bytes) - * @param[in] off_value_step_x off_value_stride_x * number of elements - * along X processed per work item (in bytes) - * @param[in] off_value_offset_first_element_in_bytes Offset of the first element in the off_value - * vector - * @param[out] output_ptr Pointer to the destination tensor. Supported - * data types: same as @p on_value - * @param[in] output_stride_x Stride of the destination tensor in X - * dimension (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X - * processed per work item (in bytes) - * @param[in] output_stride_y Stride of the destination tensor in Y - * dimension (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y - * processed per work item (in bytes) - * @param[in] output_stride_z Stride of the destination tensor in Z - * dimension (in bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z - * processed per work item (in bytes) - * @param[in] output_stride_w Stride of the destination tensor in W - * dimension (in bytes) - * @param[in] output_step_w output_stride_w * number of elements along W - * processed per work item (in bytes) - * @param[in] output_offset_first_element_in_bytes Offset of the first element in the - * destination tensor - */ -__kernel void one_hot(TENSOR3D_DECLARATION(indices), VECTOR_DECLARATION(on_value), - VECTOR_DECLARATION(off_value), TENSOR4D_DECLARATION(output)) -{ - const int px = get_global_id(0); - const int py = get_global_id(1); - const int pz = get_global_id(2) % OUTPUT_DIM_Z; - const int pw = get_global_id(2) / OUTPUT_DIM_Z; - - const Tensor3D indices = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(indices); - Tensor4D output = CONVERT_TO_TENSOR4D_STRUCT(output, OUTPUT_DIM_Z); - -#if AXIS == 0 - const int index = *(__global const int *)tensor3D_offset(&indices, py, pz, pw); - *(__global DATA_TYPE *)output.ptr = index == px ? *((__global const DATA_TYPE *)on_value_ptr) - : *((__global const DATA_TYPE *)off_value_ptr); -#elif AXIS == 1 - const uint index = *(__global const uint *)tensor3D_offset(&indices, px, pz, pw); - *(__global DATA_TYPE *)output.ptr = index == py ? *((__global const DATA_TYPE *)on_value_ptr) - : *((__global const DATA_TYPE *)off_value_ptr); -#elif AXIS == 2 - const uint index = *(__global const uint *)tensor3D_offset(&indices, px, py, pw); - *(__global DATA_TYPE *)output.ptr = index == pz ? *((__global const DATA_TYPE *)on_value_ptr) - : *((__global const DATA_TYPE *)off_value_ptr); -#elif AXIS == 3 - const uint index = *(__global const uint *)tensor3D_offset(&indices, px, py, pz); - *(__global DATA_TYPE *)output.ptr = index == pw ? *((__global const DATA_TYPE *)on_value_ptr) - : *((__global const DATA_TYPE *)off_value_ptr); -#endif // AXIS -} - -/** Performs the OneHot operation along the chosen axis as off_value being zero - * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. - * -DDATA_TYPE=short - * @note Axis should be given as a preprocessor argument using -DAXIS=axis. e.g. -DAXIS=1 - * @attention Output tensor depth should be given as a preprocessor argument using - * -DOUTPUT_DIM_Z=size. e.g. -DOUTPUT_DIM_Z=16 - * @attention Input tensor depth should be given as a preprocessor argument using - * -DINPUT_DIM_Z=size. e.g. -DINPUT_DIM_Z=16 - * - * - * @param[in] indices_ptr Pointer to the source tensor. Supported data - * types: S32 - * @param[in] indices_stride_x Stride of the source tensor in X dimension - * (in bytes) - * @param[in] indices_step_x indices_stride_x * number of elements along - * X processed per work item (in bytes) - * @param[in] indices_stride_y Stride of the source tensor in Y dimension - * (in bytes) - * @param[in] indices_step_y indices_stride_y * number of elements along - * Y processed per work item (in bytes) - * @param[in] indices_stride_z Stride of the source tensor in Y dimension - * (in bytes) - * @param[in] indices_step_z indices_stride_z * number of elements along - * Z processed per work item (in bytes) - * @param[in] indices_offset_first_element_in_bytes Offset of the first element in the source - * tensor - * @param[in] on_value_ptr Pointer to the on_value vector. Supported - * data types: U8/S8/U16/S16/F16/U32/S32/F32. - * @param[in] on_value_stride_x Stride of the on_value vector in X dimension - * (in bytes) - * @param[in] on_value_step_x on_value_stride_x * number of elements along - * X processed per work item (in bytes) - * @param[in] on_value_offset_first_element_in_bytes Offset of the first element in the on_value - * vector - * @param[out] output_ptr Pointer to the destination tensor. Supported - * data types: same as @p on_value - * @param[in] output_stride_x Stride of the destination tensor in X - * dimension (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X - * processed per work item (in bytes) - * @param[in] output_stride_y Stride of the destination tensor in Y - * dimension (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y - * processed per work item (in bytes) - * @param[in] output_stride_z Stride of the destination tensor in Z - * dimension (in bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z - * processed per work item (in bytes) - * @param[in] output_stride_w Stride of the destination tensor in W - * dimension (in bytes) - * @param[in] output_step_w output_stride_w * number of elements along W - * processed per work item (in bytes) - * @param[in] output_offset_first_element_in_bytes Offset of the first element in the - * destination tensor - */ -__kernel void one_hot_only_on_value(TENSOR3D_DECLARATION(indices), VECTOR_DECLARATION(on_value), - TENSOR4D_DECLARATION(output)) -{ - const int px = get_global_id(0); - const int py = get_global_id(1); - const int pz = get_global_id(2); - - const Tensor3D indices = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(indices); - const Tensor4D output = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(output, OUTPUT_DIM_Z); - - const int index = *(__global const int *)tensor3D_offset(&indices, px, py, pz); - - if (index < 0 || index >= DEPTH) - return; - -#if AXIS == 0 - *(__global DATA_TYPE *)tensor4D_offset(&output, index, px, py, pz) = - *((__global const DATA_TYPE *)on_value_ptr); -#elif AXIS == 1 - *(__global DATA_TYPE *)tensor4D_offset(&output, px, index, py, pz) = - *((__global const DATA_TYPE *)on_value_ptr); -#elif AXIS == 2 - *(__global DATA_TYPE *)tensor4D_offset(&output, px, py, index, pz) = - *((__global const DATA_TYPE *)on_value_ptr); -#elif AXIS == 3 - *(__global DATA_TYPE *)tensor4D_offset(&output, px, py, pz, index) = - *((__global const DATA_TYPE *)on_value_ptr); -#endif // AXIS -} - -#endif // defined(DATA_TYPE) && defined(AXIS) && defined(DEPTH) && defined(OUTPUT_DIM_Z) diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_mul_quantized.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_mul_quantized.cl deleted file mode 100644 index 76fda9041..000000000 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_mul_quantized.cl +++ /dev/null @@ -1,159 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016, 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "helpers_asymm.h" - -#ifdef SATURATE -#define CONVERT_OP_FLOAT_STR(x, type, round) (convert_##type##_sat##round(x)) -#else /* SATURATE */ -#define CONVERT_OP_FLOAT_STR(x, type, round) (convert_##type##round(x)) -#endif /* SATURATE */ -#define CONVERT_OP_FLOAT(x, type, round) CONVERT_OP_FLOAT_STR(x, type, round) - -#if defined(RESULT_OFFSET) && defined(RESULT_MULT_INT) && defined(RESULT_SHIFT) -/** Performs a pixelwise multiplication used to quantize down the int32 accumulator values of - * GEMMLowp to QASYMM8 - * - * The following computations will be performed by the kernel: - * - * -# Add offset terms to inputs - * -# Multiply inputs - * -# Add offset terms to final result - * -# Multiply each entry of result by result_mult_int - * -# Shift the int32 accumulator by result_shift - * -# Clamp the resulting int32 values to the [0..255] range and cast to QASYMM8. - * - * @attention The inputs and output data types need to be passed at compile time using - * -DDATA_TYPE_IN1, -DDATA_TYPE_IN2 and -DDATA_TYPE_OUT: - * e.g. -DDATA_TYPE_IN1=uchar -DDATA_TYPE_IN2=uchar -DDATA_TYPE_OUT=uchar - * @attention The offset factor of inputs must be passed at compile time using -DIN1_OFFSET and - * -DIN2_OFFSET - * @attention The offset, scalar scale factor and number of bits to shift right of output tensor - * must be passed at compile time using -DRESULT_OFFSET, -RESULT_MULT_INT and - * -DRESULT_SHIFT - * - * @param[in] in1_ptr Pointer to the source image. Supported data types: - * U8 - * @param[in] in1_stride_x Stride of the source image in X dimension (in - * bytes) - * @param[in] in1_step_x in1_stride_x * number of elements along X processed - * per workitem(in bytes) - * @param[in] in1_stride_y Stride of the source image in Y dimension (in - * bytes) - * @param[in] in1_step_y in1_stride_y * number of elements along Y processed - * per workitem(in bytes) - * @param[in] in1_stride_z Stride of the source image in Y dimension (in - * bytes) - * @param[in] in1_step_z in1_stride_z * number of elements along Y processed - * per workitem(in bytes) - * @param[in] in1_offset_first_element_in_bytes The offset of the first element in the source image - * @param[in] in2_ptr Pointer to the source image. Supported data types: - * U8 - * @param[in] in2_stride_x Stride of the source image in X dimension (in - * bytes) - * @param[in] in2_step_x in2_stride_x * number of elements along X processed - * per workitem(in bytes) - * @param[in] in2_stride_y Stride of the source image in Y dimension (in - * bytes) - * @param[in] in2_step_y in2_stride_y * number of elements along Y processed - * per workitem(in bytes) - * @param[in] in2_stride_z Stride of the source image in Y dimension (in - * bytes) - * @param[in] in2_step_z in2_stride_z * number of elements along Y processed - * per workitem(in bytes) - * @param[in] in2_offset_first_element_in_bytes The offset of the first element in the source image - * @param[out] out_ptr Pointer to the destination image. Supported data - * types: U8 - * @param[in] out_stride_x Stride of the destination image in X dimension (in - * bytes) - * @param[in] out_step_x out_stride_x * number of elements along X processed - * per workitem(in bytes) - * @param[in] out_stride_y Stride of the destination image in Y dimension (in - * bytes) - * @param[in] out_step_y out_stride_y * number of elements along Y processed - * per workitem(in bytes) - * @param[in] out_stride_z Stride of the destination image in Y dimension (in - * bytes) - * @param[in] out_step_z out_stride_z * number of elements along Y processed - * per workitem(in bytes) - * @param[in] out_offset_first_element_in_bytes The offset of the first element in the destination - * image - * @param[in] scale Float scaling factor. Supported data types: F32 - */ -__kernel void pixelwise_mul_qasymm8(TENSOR3D_DECLARATION(in1), TENSOR3D_DECLARATION(in2), - TENSOR3D_DECLARATION(out), const float scale) -{ - // Get pixels pointer - Tensor3D in1 = CONVERT_TO_TENSOR3D_STRUCT(in1); - Tensor3D in2 = CONVERT_TO_TENSOR3D_STRUCT(in2); - Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(out); - - // Load data - VEC_DATA_TYPE(int, 16) - in1_data = CONVERT(vload16(0, (__global DATA_TYPE_IN1 *)in1.ptr), VEC_DATA_TYPE(int, 16)); - VEC_DATA_TYPE(int, 16) - in2_data = CONVERT(vload16(0, (__global DATA_TYPE_IN2 *)in2.ptr), VEC_DATA_TYPE(int, 16)); - - // Perform multiplication of two inputs - VEC_DATA_TYPE(int, 16) in1_val = in1_data + (VEC_DATA_TYPE(int, 16))(IN1_OFFSET); - VEC_DATA_TYPE(int, 16) in2_val = in2_data + (VEC_DATA_TYPE(int, 16))(IN2_OFFSET); - VEC_DATA_TYPE(int, 16) out_val = in1_val * in2_val; - - // Multiply with a multiplier smaller than 1 - out_val = - ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(out_val, RESULT_MULT_INT, RESULT_SHIFT, 16); - out_val += (VEC_DATA_TYPE(int, 16))(RESULT_OFFSET); - - VEC_DATA_TYPE(uchar, 16) res = CONVERT(out_val, VEC_DATA_TYPE(uchar, 16)); - - // TODO: Apply min-max BOUND to support fuse with relu. - /* - #if defined(MIN_BOUND) - res = max(res, (uchar16)MIN_BOUND); - #endif // defined(MIN_BOUND) - #if defined(MAX_BOUND) - res = min(res, (uchar16)MAX_BOUND); - #endif // defined(MAX_BOUND) - */ - - // Store result - VSTORE(16)(CONVERT(res, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)), 0, (__global DATA_TYPE_OUT *)out.ptr); -} -#endif // defined(RESULT_OFFSET) && defined(RESULT_MULT_INT) && defined(RESULT_SHIFT) diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/quantization_symm8.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/quantization_symm8.cl deleted file mode 100644 index 4ae9adb0b..000000000 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/quantization_symm8.cl +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "helpers.h" - -#define CONVERT_RTE(x, type) (convert_##type##_rte((x))) -#define CONVERT_RTE_VEC_STR(x, type, size) (convert_##type##size##_rte((x))) -#define CONVERT_RTE_VEC(x, type, size) CONVERT_RTE_VEC_STR(x, type, size) -#define MIN_QUANT_VAL -127 -#define MAX_QUANT_VAL 127 - -#if defined(VEC_SIZE) && defined(DATA_TYPE_IN) && defined(DATA_TYPE_OUT) - -/** This performs the quantization of floating point inputs to 8-bit unsigned integers. - * - * @note Input data type should be given as a preprocessor argument using -DDATA_TYPE_IN=type. e.g. - * -DDATA_TYPE=short - * @note Output data type should be given as a preprocessor argument using -DDATA_TYPE_OUT=type. - * e.g. -DDATA_TYPE=short - * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. - * -DVEC_SIZE=16 - * @note Quantization scale should be given as a preprocessor argument using -DSCALE=scale. e.g. - * -DSCALE=0.125 - * @note Quantization offset should be given as a preprocessor argument using -DOFFSET=offset. e.g. - * -DOFFSET=125 - * @note Minimum value for quantized type should be given as a preprocessor argument using - * -DMIN_QUANT_VAL=value. e.g. -DMIN_QUANT_VAL=0 - * @note Maximum value for quantized type should be given as a preprocessor argument using - * -DMAX_QUANT_VAL=value. e.g. -DMAXIN_QUANT_VAL=255 - * - * @param[in] input_ptr Pointer to the source tensor. Supported data - * types: F32 - * @param[in] input_stride_x Stride of the source tensor in X dimension (in - * bytes) - * @param[in] input_step_x input_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] input_stride_y Stride of the source tensor in Y dimension (in - * bytes) - * @param[in] input_step_y input_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source - * tensor - * @param[out] output_ptr Pointer to the destination tensor. Supported - * data types: S8 - * @param[in] output_stride_x Stride of the destination tensor in X dimension - * (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the destination tensor in Y dimension - * (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes The offset of the first element in the - * destination tensor - * @param[out] scale_ptr Pointer to the scale tensor. Supported data - * types: F32 - * @param[in] scale_stride_x Stride of the destination tensor in X dimension - * (in bytes) - * @param[in] scale_step_x scale_stride_x * number of elements along X - * processed per workitem(in bytes) - */ -__kernel void quantization_symm8(IMAGE_DECLARATION(input), VECTOR_DECLARATION(scale), - IMAGE_DECLARATION(output)) -{ - // Get pixels pointer - Image input = CONVERT_TO_IMAGE_STRUCT(input); - Image output = CONVERT_TO_IMAGE_STRUCT(output); - -#if defined(VEC_SIZE) && defined(LAST_ACCESSED_X) - // Check if access on width gets out of bounds - // If it does shift access vector to access elements within bounds - const int xi = (int)(get_global_id(0) * VEC_SIZE); - input.ptr -= max(xi - (int)LAST_ACCESSED_X, 0) * input_stride_x; - output.ptr -= max(xi - (int)LAST_ACCESSED_X, 0) * output_stride_x; - - // Load data - VEC_DATA_TYPE(DATA_TYPE_IN, VEC_SIZE) - val = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_IN *)input.ptr); - - // Create scale vector - const VEC_DATA_TYPE(DATA_TYPE_IN, VEC_SIZE) vscale = - *(((__global DATA_TYPE_IN *)(scale_ptr)) + get_global_id(1)); - - // Quantize - VEC_DATA_TYPE(int, VEC_SIZE) - res = CLAMP(CONVERT_RTE_VEC(val / vscale, int, VEC_SIZE), MIN_QUANT_VAL, MAX_QUANT_VAL); - - // Store result - VSTORE(VEC_SIZE) - (CONVERT(res, VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE)), 0, (__global DATA_TYPE_OUT *)output.ptr); -#else //! defined(VEC_SIZE) || !defined(LAST_ACCESSED_X) - *((__global DATA_TYPE_OUT *)(output.ptr)) = (DATA_TYPE_OUT)CLAMP( - CONVERT_RTE((*(__global DATA_TYPE_IN *)input.ptr) / - (*(((__global DATA_TYPE_IN *)(scale_ptr)) + get_global_id(1))), - int), - MIN_QUANT_VAL, MAX_QUANT_VAL); -#endif // defined(VEC_SIZE) && defined(LAST_ACCESSED_X) -} -#endif // defined(VEC_SIZE) && defined(DATA_TYPE_IN) && defined(DATA_TYPE_OUT) diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/reduce_operation.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/reduce_operation.cl deleted file mode 100644 index 832ac1270..000000000 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/reduce_operation.cl +++ /dev/null @@ -1,212 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016, 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "helpers.h" - -#if defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(OP_CODE) -/** Perform reduce max/min - * - * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. - * -DDATA_TYPE=short - * @attention Output tensor depth should be given as a preprocessor argument using -DDEPTH_OUT=size. - * e.g. -DDEPTH_OUT=16 - * @attention Operation type(code) specifying which operation to perform should be passed as - * preprocessor argument using -DOP_CODE = number. e.g. -DOP_CODE=1 - * - * @param[in] input_ptr Pointer to the source image. Supported data - * types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 - * @param[in] input_stride_x Stride of the source image in X dimension (in - * bytes) - * @param[in] input_step_x input_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] input_stride_y Stride of the source image in Y dimension (in - * bytes) - * @param[in] input_step_y input_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] input_stride_z Stride of the source tensor in Z dimension (in - * bytes) - * @param[in] input_step_z input_stride_z * number of elements along Z - * processed per workitem(in bytes) - * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source - * image - * @param[in] input_stride_w Stride of the source tensor in W dimension (in - * bytes) - * @param[in] input_step_w output_stride_w * number of elements along W - * processed per workitem(in bytes) - * @param[out] output_ptr Pointer to the destination image. Supported data - * types: same as @p input_ptr - * @param[in] output_stride_x Stride of the destination image in X dimension - * (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the destination image in Y dimension - * (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] output_stride_z Stride of the source tensor in Z dimension (in - * bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z - * processed per workitem(in bytes) - * @param[in] output_stride_w Stride of the source tensor in W dimension (in - * bytes) - * @param[in] output_step_w output_stride_w * number of elements along W - * processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes The offset of the first element in the - * destination image - * @param[in] axis Axis through which reduction occurs - * @param[in] dim Dimension across the axis to be reduced. - */ -__kernel void reduce_min_max(TENSOR4D_DECLARATION(input), TENSOR4D_DECLARATION(output), - const int axis, const int dim) -{ - Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT(input, 0); - Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH_OUT); - - int indices[4] = { - get_global_id(0), get_global_id(1), get_global_id(2) % DEPTH_OUT, - get_global_id(2) / DEPTH_OUT, - }; - - DATA_TYPE value = - *((__global DATA_TYPE *)tensor4D_offset(&in, indices[0], indices[1], indices[2], indices[3])); - for (int i = 1; i < dim; ++i) - { - indices[axis] = i; - -#if OP_CODE == 1 // REDUCE_MAX - value = max(value, *((__global DATA_TYPE *)tensor4D_offset(&in, indices[0], indices[1], - indices[2], indices[3]))); - -#elif OP_CODE == 2 // REDUCE_MIN - value = min(value, *((__global DATA_TYPE *)tensor4D_offset(&in, indices[0], indices[1], - indices[2], indices[3]))); - -#else // OP NOT SUPPORTED - return; - -#endif - } - - *((__global DATA_TYPE *)out.ptr) = value; -} - -/** Perform reduce sum/mean - * - * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. - * -DDATA_TYPE=short - * @attention Output tensor depth should be given as a preprocessor argument using -DDEPTH_OUT=size. - * e.g. -DDEPTH_OUT=16 - * @attention Operation type(code) specifying which operation to perform should be passed as - * preprocessor argument using -DOP_CODE = number. e.g. -DOP_CODE=1 - * - * @param[in] input_ptr Pointer to the source image. Supported data - * types: U8/S8/U16/S16/F16/U32/S32/F32 - * @param[in] input_stride_x Stride of the source image in X dimension (in - * bytes) - * @param[in] input_step_x input_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] input_stride_y Stride of the source image in Y dimension (in - * bytes) - * @param[in] input_step_y input_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] input_stride_z Stride of the source tensor in Z dimension (in - * bytes) - * @param[in] input_step_z input_stride_z * number of elements along Z - * processed per workitem(in bytes) - * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source - * image - * @param[in] input_stride_w Stride of the source tensor in W dimension (in - * bytes) - * @param[in] input_step_w output_stride_w * number of elements along W - * processed per workitem(in bytes) - * @param[out] output_ptr Pointer to the destination image. Supported data - * types: same as @p input_ptr - * @param[in] output_stride_x Stride of the destination image in X dimension - * (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the destination image in Y dimension - * (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] output_stride_z Stride of the source tensor in Z dimension (in - * bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z - * processed per workitem(in bytes) - * @param[in] output_stride_w Stride of the source tensor in W dimension (in - * bytes) - * @param[in] output_step_w output_stride_w * number of elements along W - * processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes The offset of the first element in the - * destination image - * @param[in] axis Axis through which reduction occurs - * @param[in] dim Dimension across the axis to be reduced. - */ -__kernel void reduce_sum_mean(TENSOR4D_DECLARATION(input), TENSOR4D_DECLARATION(output), - const int axis, const int dim) -{ - Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT(input, 0); - Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH_OUT); - - int indices[4] = { - get_global_id(0), get_global_id(1), get_global_id(2) % DEPTH_OUT, - get_global_id(2) / DEPTH_OUT, - }; - - DATA_TYPE sum_value = (DATA_TYPE)0; - for (int i = 0; i < dim; ++i) - { - indices[axis] = i; - sum_value += *( - (__global DATA_TYPE *)tensor4D_offset(&in, indices[0], indices[1], indices[2], indices[3])); - } - -#if OP_CODE == 3 // REDUCE_SUM - *((__global DATA_TYPE *)out.ptr) = sum_value; - -#elif OP_CODE == 4 // REDUCE_MEAN - *((__global DATA_TYPE *)out.ptr) = sum_value / CONVERT(dim, DATA_TYPE); - -#else // OP NOT SUPPORTED - return; - -#endif -} -#endif // defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(OP_CODE) diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/scale_factor.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/scale_factor.cl deleted file mode 100644 index 3d5e90356..000000000 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/scale_factor.cl +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "helpers.h" - -#if defined(WIDTH) -/** This function identifies the min and maximum value of an input 3D tensor. - * - * @note The width, height and depth of the input tensor must be provided at compile time using - * -DWIDTH, -DHEIGHT and -DDEPTH (e.g. -DWIDTH=320, -DHEIGHT=240, -DDEPTH=3) - * - * @param[in] src_ptr Pointer to the source tensor. Supported data types: - * F32 - * @param[in] src_stride_x Stride of the source image in X dimension (in bytes) - * @param[in] src_step_x src_stride_x * number of elements along X processed - * per workitem(in bytes) - * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes) - * @param[in] src_step_y src_stride_y * number of elements along Y processed - * per workitem(in bytes) - * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image - * @param[in] dst_ptr Pointer to the min/max vector. Minimum value in - * position 0, maximum value in position 1. Supported data types: F32. - * @param[in] dst_stride_x Stride of the min/max vector in X dimension (in - * bytes) - * @param[in] dst_step_x dst_stride_x * number of elements along X processed - * per workitem(in bytes) - * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the min/max - * vector - */ -__kernel void scale_factor_symm8(IMAGE_DECLARATION(src), VECTOR_DECLARATION(dst)) -{ - Image src = CONVERT_TO_IMAGE_STRUCT(src); - - float4 min_value = (float4)FLT_MAX; - float4 max_value = (float4)-FLT_MAX; - - int x = 0; - __global float *src_addr = (__global float *)(src.ptr); - - for (; x <= (int)(WIDTH - 8); x += 8) - { - float8 value = vload8(0, (__global float *)(src_addr + x)); - - min_value = select(value.s0123, min_value, min_value < value.s0123); - min_value = select(value.s4567, min_value, min_value < value.s4567); - - max_value = select(value.s0123, max_value, max_value > value.s0123); - max_value = select(value.s4567, max_value, max_value > value.s4567); - } - - for (; x < WIDTH; ++x) - { - float value = *(src_addr + x); - - min_value.s0 = min(min_value.s0, value); - max_value.s0 = max(max_value.s0, value); - } - - // Perform min/max reduction - min_value.s01 = min(min_value.s01, min_value.s23); - min_value.s0 = min(min_value.s0, min_value.s1); - max_value.s01 = max(max_value.s01, max_value.s23); - max_value.s0 = max(max_value.s0, max_value.s1); - - // Extract scale - max_value.s0 = max(fabs(min_value.s0), fabs(max_value.s0)) / 127.0f; - - // Store min and max - *((__global float *)(dst_ptr) + get_global_id(1)) = max_value.s0; -} -#endif // defined(WIDTH) diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2.cl deleted file mode 100644 index 3eb1a4ce7..000000000 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2.cl +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "helpers.h" - -__kernel void topkv2_init(VECTOR_DECLARATION(input), __global float *in_key_buf, - __global int *in_ind_buf, const int n) -{ - int gid = get_global_id(0); - int lws = get_local_size(0); - int groups = get_num_groups(0); - int gws = lws * groups; - int iter = n / gws; - - Vector input = CONVERT_TO_VECTOR_STRUCT_NO_STEP(input); - - for (int i = 0; i < iter; ++i) - { - int idx = i * gws + gid; - in_key_buf[idx] = *(__global float *)(input.ptr + idx * input.stride_x); - in_ind_buf[idx] = idx; - } -} - -__kernel void topkv2_find_first_negative(__global float *out_key_buf, - __global int *first_negative_idx, int n) -{ - int gid = get_global_id(0); - - if (gid == n - 1) - { - // if the last item is positive, the first negative index is n. - if (out_key_buf[gid] > 0.f) - *first_negative_idx = n; - } - else if (gid == 0) - { - // if the first item is negative, set it 0. - if (out_key_buf[gid] < 0.f) - *first_negative_idx = 0; - } - else - { - // if its left is positive and it is negative, then it is the first negative item. - if (out_key_buf[gid - 1] > 0.f && out_key_buf[gid] < 0.f) - *first_negative_idx = gid; - } -} - -__kernel void topkv2_reorder_negatives(__global float *in_key_buf, __global float *out_key_buf, - __global float *in_ind_buf, __global float *out_ind_buf, - __global int *first_negative_idx, int n) -{ - int gid = get_global_id(0); - - int num_negs = n - *first_negative_idx; - int in_idx; - - if (gid < num_negs) - { - in_idx = n - 1 - gid; - } - else - { - in_idx = gid - num_negs; - } - - out_key_buf[gid] = in_key_buf[in_idx]; - out_ind_buf[gid] = in_ind_buf[in_idx]; -} - -__kernel void topkv2_store(VECTOR_DECLARATION(values), VECTOR_DECLARATION(indices), - __global float *out_key_buf, __global int *out_ind_buf, int n) -{ - int gid = get_global_id(0); - - Vector values = CONVERT_TO_VECTOR_STRUCT_NO_STEP(values); - Vector indices = CONVERT_TO_VECTOR_STRUCT_NO_STEP(indices); - - int idx = n - 1 - gid; - - *(__global float *)(values.ptr + gid * values.stride_x) = out_key_buf[idx]; - *(__global int *)(indices.ptr + gid * indices.stride_x) = out_ind_buf[idx]; -} diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2_quicksort.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2_quicksort.cl deleted file mode 100644 index 460de790b..000000000 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2_quicksort.cl +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "helpers.h" - -__global inline float *get_vec_elem(Vector *vec, int idx) -{ - return (__global float *)(vec->ptr + idx * vec->stride_x); -} - -__global inline int *get_vec_elem_int(Vector *vec, int idx) -{ - return (__global int *)(vec->ptr + idx * vec->stride_x); -} - -// A utility function to swap two elements -void swap(__global float *a, __global float *b) -{ - float t = *a; - *a = *b; - *b = t; -} - -void swap_idx(__global int *a, __global int *b) -{ - int t = *a; - *a = *b; - *b = t; -} - -/* This function is same in both iterative and recursive*/ -int partition(Vector *arr, __global int *indices, int l, int h) -{ - float x = *get_vec_elem(arr, h); - int i = (l - 1); - - for (int j = l; j <= h - 1; j++) - { - if (*get_vec_elem(arr, j) >= x) - { - i++; - swap(get_vec_elem(arr, i), get_vec_elem(arr, j)); - swap_idx(&indices[i], &indices[j]); - } - } - swap(get_vec_elem(arr, i + 1), get_vec_elem(arr, h)); - swap_idx(&indices[i + 1], &indices[h]); - return (i + 1); -} - -/* A[] --> Array to be sorted, - l --> Starting index, - h --> Ending index */ -void quickSortIterative(Vector *arr, __global int *indices, __global int *stack, int l, int h) -{ - // Create an auxiliary stack - - // initialize top of stack - int top = -1; - - // push initial values of l and h to stack - stack[++top] = l; - stack[++top] = h; - - // Keep popping from stack while is not empty - while (top >= 0) - { - // Pop h and l - h = stack[top--]; - l = stack[top--]; - - // Set pivot element at its correct position - // in sorted array - int p = partition(arr, indices, l, h); - - // If there are elements on left side of pivot, - // then push left side to stack - if (p - 1 > l) - { - stack[++top] = l; - stack[++top] = p - 1; - } - - // If there are elements on right side of pivot, - // then push right side to stack - if (p + 1 < h) - { - stack[++top] = p + 1; - stack[++top] = h; - } - } -} - -__kernel void topkv2_quicksort(VECTOR_DECLARATION(input), VECTOR_DECLARATION(topk_values), - VECTOR_DECLARATION(topk_indices), __global int *indices, - __global int *temp_stack, int k, int n) -{ - Vector input = CONVERT_TO_VECTOR_STRUCT_NO_STEP(input); - Vector topk_values = CONVERT_TO_VECTOR_STRUCT_NO_STEP(topk_values); - Vector topk_indices = CONVERT_TO_VECTOR_STRUCT_NO_STEP(topk_indices); - - for (int i = 0; i < n; ++i) - { - indices[i] = i; - } - - quickSortIterative(&input, indices, temp_stack, 0, n - 1); - - // extract k items. - for (int i = 0; i < k; ++i) - { - *get_vec_elem(&topk_values, i) = *get_vec_elem(&input, i); - *get_vec_elem_int(&topk_indices, i) = indices[i]; - } -} diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2_radixsort.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2_radixsort.cl deleted file mode 100644 index e9d4696b4..000000000 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2_radixsort.cl +++ /dev/null @@ -1,292 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// reference: -// https://code.google.com/archive/p/ocl-radix-sort/source/default/source -// OpenCL kernel sources for the CLRadixSort class -// the #include does not exist in OpenCL -// Copyright Philippe Helluy, Université de Strasbourg, France, 2011, helluy@math.unistra.fr -// licensed under the GNU Lesser General Public License see http://www.gnu.org/copyleft/lesser.html -// if you find this software usefull you can cite the following work in your reports or articles: -// Philippe HELLUY, A portable implementation of the radix sort algorithm in OpenCL, 2011. -// http://hal.archives-ouvertes.fr/hal-00596730 - -// Reference for floating point radix sort: -// http://www.codercorner.com/RadixSortRevisited.htm - -// compute the histogram for each radix and each virtual processor for the pass -__kernel void radixsort_histogram(__global float *in_key_buf, __global int *d_Histograms, - const int pass, __local int *loc_histo, const int n) -{ - int it = get_local_id(0); // i local number of the processor - int ig = get_global_id(0); // global number = i + g I - - int gr = get_group_id(0); // g group number - - int groups = get_num_groups(0); - int items = get_local_size(0); - - // set the local histograms to zero - for (int ir = 0; ir < _RADIX; ir++) - { - loc_histo[ir * items + it] = 0; - } - - barrier(CLK_LOCAL_MEM_FENCE); - - // range of keys that are analyzed by the work item - int size = n / groups / items; // size of the sub-list - int start = ig * size; // beginning of the sub-list - - unsigned int key; - int shortkey, k; - - // compute the index - // the computation depends on the transposition - for (int j = 0; j < size; j++) - { -#ifdef TRANSPOSE - k = groups * items * j + ig; -#else - k = j + start; -#endif - - key = *((__global unsigned int *)(in_key_buf + k)); - - // extract the group of _BITS bits of the pass - // the result is in the range 0.._RADIX-1 - shortkey = ((key >> (pass * _BITS)) & (_RADIX - 1)); - - // increment the local histogram - loc_histo[shortkey * items + it]++; - } - - barrier(CLK_LOCAL_MEM_FENCE); - - // copy the local histogram to the global one - for (int ir = 0; ir < _RADIX; ir++) - { - d_Histograms[items * (ir * groups + gr) + it] = loc_histo[ir * items + it]; - } - - barrier(CLK_GLOBAL_MEM_FENCE); -} - -// initial transpose of the list for improving -// coalescent memory access -__kernel void transpose(const __global int *invect, __global int *outvect, const int nbcol, - const int nbrow, const __global int *inperm, __global int *outperm, - __local int *blockmat, __local int *blockperm, const int tilesize) -{ - - int i0 = get_global_id(0) * tilesize; // first row index - int j = get_global_id(1); // column index - - int jloc = get_local_id(1); // local column index - - // fill the cache - for (int iloc = 0; iloc < tilesize; iloc++) - { - int k = (i0 + iloc) * nbcol + j; // position in the matrix - blockmat[iloc * tilesize + jloc] = invect[k]; -#ifdef PERMUT - blockperm[iloc * tilesize + jloc] = inperm[k]; -#endif - } - - barrier(CLK_LOCAL_MEM_FENCE); - - // first row index in the transpose - int j0 = get_group_id(1) * tilesize; - - // put the cache at the good place - for (int iloc = 0; iloc < tilesize; iloc++) - { - int kt = (j0 + iloc) * nbrow + i0 + jloc; // position in the transpose - outvect[kt] = blockmat[jloc * tilesize + iloc]; -#ifdef PERMUT - outperm[kt] = blockperm[jloc * tilesize + iloc]; -#endif - } -} - -// each virtual processor reorders its data using the scanned histogram -__kernel void radixsort_reorder(__global float *in_key, __global float *out_key, - __global int *d_Histograms, const int pass, - __global int *indices_in, __global int *indices_out, - __local int *loc_histo, const int n) -{ - - int it = get_local_id(0); - int ig = get_global_id(0); - - int gr = get_group_id(0); - int groups = get_num_groups(0); - int items = get_local_size(0); - - int start = ig * (n / groups / items); - int size = n / groups / items; - - // take the histogram in the cache - for (int ir = 0; ir < _RADIX; ir++) - { - loc_histo[ir * items + it] = d_Histograms[items * (ir * groups + gr) + it]; - } - barrier(CLK_LOCAL_MEM_FENCE); - - int newpos, shortkey, k, newpost; - unsigned int key; - - for (int j = 0; j < size; j++) - { -#ifdef TRANSPOSE - k = groups * items * j + ig; -#else - k = j + start; -#endif - float org_value = in_key[k]; - key = *(__global unsigned int *)(in_key + k); - shortkey = ((key >> (pass * _BITS)) & (_RADIX - 1)); - - newpos = loc_histo[shortkey * items + it]; - -#ifdef TRANSPOSE - int ignew, jnew; - ignew = newpos / (n / groups / items); - jnew = newpos % (n / groups / items); - newpost = jnew * (groups * items) + ignew; -#else - newpost = newpos; -#endif - - // d_outKeys[newpost]= key; // killing line !!! - out_key[newpost] = org_value; - -#ifdef PERMUT - indices_out[newpost] = indices_in[k]; -#endif - - newpos++; - loc_histo[shortkey * items + it] = newpos; - } -} - -// perform a parallel prefix sum (a scan) on the local histograms -// (see Blelloch 1990) each workitem worries about two memories -// see also http://http.developer.nvidia.com/GPUGems3/gpugems3_ch39.html -__kernel void radixsort_scanhistograms(__global int *histo, __local int *temp, - __global int *globsum) -{ - int it = get_local_id(0); - int ig = get_global_id(0); - int decale = 1; - int n = get_local_size(0) * 2; - int gr = get_group_id(0); - - // load input into local memory - // up sweep phase - temp[2 * it] = histo[2 * ig]; - temp[2 * it + 1] = histo[2 * ig + 1]; - - // parallel prefix sum (algorithm of Blelloch 1990) - for (int d = n >> 1; d > 0; d >>= 1) - { - barrier(CLK_LOCAL_MEM_FENCE); - if (it < d) - { - int ai = decale * (2 * it + 1) - 1; - int bi = decale * (2 * it + 2) - 1; - temp[bi] += temp[ai]; - } - decale *= 2; - } - - // store the last element in the global sum vector - // (maybe used in the next step for constructing the global scan) - // clear the last element - if (it == 0) - { - globsum[gr] = temp[n - 1]; - temp[n - 1] = 0; - } - - // down sweep phase - for (int d = 1; d < n; d *= 2) - { - decale >>= 1; - barrier(CLK_LOCAL_MEM_FENCE); - - if (it < d) - { - int ai = decale * (2 * it + 1) - 1; - int bi = decale * (2 * it + 2) - 1; - - int t = temp[ai]; - temp[ai] = temp[bi]; - temp[bi] += t; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - - // write results to device memory - - histo[2 * ig] = temp[2 * it]; - histo[2 * ig + 1] = temp[2 * it + 1]; - - barrier(CLK_GLOBAL_MEM_FENCE); -} - -// use the global sum for updating the local histograms -// each work item updates two values -__kernel void radixsort_pastehistograms(__global int *histo, __global int *globsum) -{ - int ig = get_global_id(0); - int gr = get_group_id(0); - - int s; - - s = globsum[gr]; - - // write results to device memory - histo[2 * ig] += s; - histo[2 * ig + 1] += s; - - barrier(CLK_GLOBAL_MEM_FENCE); -} diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLArgMinMaxLayerKernelEx.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLArgMinMaxLayerKernelEx.cpp deleted file mode 100644 index 047004d5e..000000000 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLArgMinMaxLayerKernelEx.cpp +++ /dev/null @@ -1,329 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2019-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/CL/kernels/CLArgMinMaxLayerKernelEx.h" - -#include "arm_compute/core/AccessWindowStatic.h" -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" -#include "arm_compute/core/CL/CLValidate.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" - -#include "support/StringSupport.h" - -namespace arm_compute -{ -namespace -{ -constexpr unsigned int vector_size = 16; - -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *prev_output, - const ITensorInfo *output, unsigned int axis, ReductionOperation op) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::S32, - DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(op != ReductionOperation::ARG_IDX_MAX && - op != ReductionOperation::ARG_IDX_MIN, - "Only ARG_IDX_MAX and ARG_IDX_MIN are supported"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, - "Reduction axis greater than max number of dimensions"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis > 3, "Unsupported reduction axis"); - - if (output->total_size() != 0) - { - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U32, DataType::S32, - DataType::S64); - } - if (prev_output != nullptr && prev_output->total_size() != 0) - { - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(prev_output, 1, DataType::U32, - DataType::S32, DataType::S64); - if (output->total_size() != 0) - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(prev_output, output); - } - } - - return Status{}; -} - -std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, - ITensorInfo *prev_output, - ITensorInfo *output, unsigned int axis, - ReductionOperation op) -{ - ARM_COMPUTE_UNUSED(op); - // Output tensor auto initialization if not yet initialized - TensorShape output_shape{input->tensor_shape()}; - output_shape.set(axis, 1); - DataType output_data_type = (prev_output != nullptr) ? (prev_output->data_type()) : DataType::S32; - auto_init_if_empty(*output, input->clone() - ->set_tensor_shape(output_shape) - .set_data_type(output_data_type) - .reset_padding() - .set_is_resizable(true)); - - Window win = calculate_max_window((prev_output != nullptr) ? (*prev_output) : (*input), - Steps(vector_size)); - bool window_changed = false; - - switch (axis) - { - case 0: - { - ITensorInfo *input_tensor_access = prev_output != nullptr ? prev_output : input; - AccessWindowStatic input_access(input_tensor_access, 0, 0, - static_cast<int>(input_tensor_access->dimension(0)), 1); - AccessWindowHorizontal output_access(output, 0, 1); - window_changed = update_window_and_padding(win, input_access, output_access); - output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape())); - } - break; - case 1: - case 2: - case 3: - { - AccessWindowHorizontal input_access(input, 0, vector_size); - AccessWindowHorizontal output_access(output, 0, vector_size); - window_changed = update_window_and_padding(win, input_access, output_access); - output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape())); - } - break; - default: - ARM_COMPUTE_ERROR("Not supported"); - } - - Status err = (window_changed) - ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") - : Status{}; - return std::make_tuple(err, win); -} -} // namespace - -CLArgMinMaxLayerKernelEx::CLArgMinMaxLayerKernelEx() - : _input(nullptr), _prev_output(nullptr), _output(nullptr), _reduction_axis(0), - _op(ReductionOperation::ARG_IDX_MAX) -{ -} - -void CLArgMinMaxLayerKernelEx::configure(const ICLTensor *input, const ICLTensor *prev_output, - ICLTensor *output, unsigned int axis, - ReductionOperation op) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON( - validate_arguments(input->info(), (prev_output != nullptr) ? prev_output->info() : nullptr, - output->info(), axis, op)); - auto win_config = validate_and_configure_window( - input->info(), (prev_output != nullptr) ? prev_output->info() : nullptr, output->info(), axis, - op); - ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config)); - - _input = input; - _prev_output = prev_output; - _output = output; - _reduction_axis = axis; - _op = op; - - // Set build options - CLBuildOptions build_opts; - - build_opts.add_option_if(_prev_output != nullptr, "-DPREV_OUTPUT"); - build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())); - build_opts.add_option_if(is_data_type_float(input->info()->data_type()), "-DFLOAT_DATA_TYPE"); - build_opts.add_option_if_else(op == ReductionOperation::ARG_IDX_MAX, "-DARG_MAX", "-DARG_MIN"); - build_opts.add_option("-DDATA_TYPE_OUTPUT=" + - get_cl_type_from_data_type(output->info()->data_type())); - build_opts.add_option("-DDATA_TYPE_SELECT=" + - get_cl_signed_type_from_element_size(input->info()->element_size())); - - // Create kernel - cl::NDRange lws_hint = CLKernelLibrary::get().default_ndrange(); - std::string kernel_axis_name; - switch (axis) - { - case 0: - { - const ICLTensor *input_for_width = prev_output != nullptr ? _prev_output : _input; - build_opts.add_option("-DWIDTH=" + - support::cpp11::to_string(input_for_width->info()->dimension(0))); - - kernel_axis_name = "x"; - lws_hint = create_lws_hint_parallel_implementations(input_for_width->info()->dimension(0), - vector_size); - } - break; - case 1: - build_opts.add_option("-DHEIGHT=" + support::cpp11::to_string(input->info()->dimension(1))); - kernel_axis_name = "y"; - break; - case 2: - build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(input->info()->dimension(2))); - kernel_axis_name = "z"; - break; - case 3: - build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(input->info()->dimension(2))); - build_opts.add_option("-DBATCH=" + support::cpp11::to_string(input->info()->dimension(3))); - kernel_axis_name = "w"; - break; - default: - ARM_COMPUTE_ERROR("Not supported"); - } - _kernel = static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel( - "arg_min_max_ex_" + kernel_axis_name, build_opts.options())); - - // Configure kernel window - ICLKernel::configure_internal(std::get<1>(win_config), lws_hint); -} - -Status CLArgMinMaxLayerKernelEx::validate(const ITensorInfo *input, const ITensorInfo *prev_output, - const ITensorInfo *output, unsigned int axis, - ReductionOperation op) -{ - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, prev_output, output, axis, op)); - ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>(validate_and_configure_window( - input->clone().get(), (prev_output != nullptr) ? prev_output->clone().get() : nullptr, - output->clone().get(), axis, op))); - return Status{}; -} - -void CLArgMinMaxLayerKernelEx::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - switch (_reduction_axis) - { - case 0: - { - // Set out window - Window out_window(window); - out_window.set(Window::DimX, Window::Dimension(0, 0, 0)); - - // Get first input and output slices - Window in_slice = window.first_slice_window_2D(); - Window out_slice = out_window.first_slice_window_2D(); - - // Reshape window - const unsigned int num_tensors = _prev_output != nullptr ? 3 : 2; - - // Set local sums buffer - unsigned int local_res_size = lws_hint()[0] * _output->info()->element_size(); - _kernel.setArg(num_arguments_per_2D_tensor() * num_tensors, local_res_size, nullptr); - do - { - unsigned int idx = 0; - add_2D_tensor_argument(idx, _input, in_slice); - if (_prev_output != nullptr) - { - add_2D_tensor_argument(idx, _prev_output, in_slice); - } - add_2D_tensor_argument(idx, _output, out_slice); - enqueue(queue, *this, in_slice, lws_hint()); - } while (window.slide_window_slice_2D(in_slice) && window.slide_window_slice_2D(out_slice)); - } - break; - case 1: - { - // Get first input and output slices - Window window_in{window}; - window_in.set(Window::DimY, Window::Dimension(0, _input->info()->dimension(1), - _input->info()->dimension(1))); - Window in_slice = window_in.first_slice_window_2D(); - Window out_slice = window.first_slice_window_2D(); - - do - { - unsigned int idx = 0; - add_2D_tensor_argument(idx, _input, in_slice); - add_2D_tensor_argument(idx, _output, out_slice); - enqueue(queue, *this, in_slice, lws_hint()); - } while (window_in.slide_window_slice_2D(in_slice) && - window.slide_window_slice_2D(out_slice)); - } - break; - case 2: - { - // Get first input and output slices - Window window_in{window}; - window_in.set(Window::DimZ, Window::Dimension(0, _input->info()->dimension(2), - _input->info()->dimension(2))); - Window in_slice = window_in.first_slice_window_3D(); - Window out_slice = window.first_slice_window_3D(); - - do - { - unsigned int idx = 0; - add_3D_tensor_argument(idx, _input, in_slice); - add_3D_tensor_argument(idx, _output, out_slice); - enqueue(queue, *this, in_slice, lws_hint()); - } while (window_in.slide_window_slice_3D(in_slice) && - window.slide_window_slice_3D(out_slice)); - } - break; - case 3: - { - // Get first input and output slices - Window window_in{window}; - window_in.set(3, Window::Dimension(0, 1, 1)); - Window in_slice = window_in.first_slice_window_4D(); - Window out_slice = window.first_slice_window_4D(); - - do - { - unsigned int idx = 0; - add_4D_tensor_argument(idx, _input, in_slice); - add_4D_tensor_argument(idx, _output, out_slice); - enqueue(queue, *this, in_slice, lws_hint()); - } while (window_in.slide_window_slice_4D(in_slice) && - window.slide_window_slice_4D(out_slice)); - } - break; - default: - ARM_COMPUTE_ERROR("Not supported"); - } -} -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLBinaryLogicalOpKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLBinaryLogicalOpKernel.cpp deleted file mode 100644 index fbc76f5e1..000000000 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLBinaryLogicalOpKernel.cpp +++ /dev/null @@ -1,197 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "support/StringSupport.h" - -using namespace arm_compute; - -namespace -{ -constexpr unsigned int num_elems_processed_per_iteration = 16; - -Status validate_parameters(const ITensorInfo *input1, const ITensorInfo *input2, - const ITensorInfo *output) -{ - const TensorShape &out_shape = - TensorShape::broadcast_shape(input1->tensor_shape(), input2->tensor_shape()); - - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::QASYMM8); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::QASYMM8); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0, - "Inputs are not broadcast compatible"); - // Validate in case of configured output - if (output->total_size() > 0) - { - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, - DataType::QASYMM8); - ARM_COMPUTE_RETURN_ERROR_ON_MSG( - detail::have_different_dimensions(out_shape, output->tensor_shape(), 0), - "Wrong shape for output"); - } - return Status{}; -} -} // namespace - -CLBinaryLogicalOpKernel::CLBinaryLogicalOpKernel() - : _input1(nullptr), _input2(nullptr), _output(nullptr) -{ -} - -void CLBinaryLogicalOpKernel::configure(const ICLTensor *input1, const ICLTensor *input2, - ICLTensor *output, BinaryLogicalOperation op) -{ - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input1, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_parameters(input1->info(), input2->info(), output->info())); - - _input1 = input1; - _input2 = input2; - _output = output; - - // Create kernel - std::string kernel_name = "binary_logical_op"; - std::set<std::string> build_opts; - build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input1->info()->data_type()))); - - int op_code = 0; - switch (op) - { - case BinaryLogicalOperation::AND: - op_code = 1; - break; - case BinaryLogicalOperation::OR: - op_code = 2; - break; - default: - throw std::runtime_error("Operation not supported, yet"); - } - - build_opts.emplace(("-DOP_CODE=" + support::cpp11::to_string(op_code))); - build_opts.emplace( - ("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration))); - - _kernel = - static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts)); - - const std::pair<TensorShape, ValidRegion> broadcast_pair = - ITensorInfo::broadcast_shape_and_valid_region(*input1->info(), *input2->info()); - - const ValidRegion &valid_region = broadcast_pair.second; - - Window win = calculate_max_window(valid_region, Steps(num_elems_processed_per_iteration)); - Window win_input1 = win.broadcast_if_dimension_le_one(*input1->info()); - Window win_input2 = win.broadcast_if_dimension_le_one(*input2->info()); - - AccessWindowHorizontal input1_access(input1->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal input2_access(input2->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - - update_window_and_padding(win_input1, input1_access) || - update_window_and_padding(win_input2, input2_access) || - update_window_and_padding(win, output_access); - - output_access.set_valid_region(win, valid_region); - - ICLKernel::configure_internal(win); -} - -void CLBinaryLogicalOpKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - const TensorShape &in_shape1 = _input1->info()->tensor_shape(); - const TensorShape &in_shape2 = _input2->info()->tensor_shape(); - const TensorShape &out_shape = _output->info()->tensor_shape(); - - bool can_collapse = true; - if (std::min(in_shape1.total_size(), in_shape2.total_size()) > 1) - { - can_collapse = - (std::min(in_shape1.num_dimensions(), in_shape2.num_dimensions()) > Window::DimZ); - for (size_t d = Window::DimZ; can_collapse && (d < out_shape.num_dimensions()); d++) - { - can_collapse = (in_shape1[d] == in_shape2[d]); - } - } - - bool has_collapsed = false; - Window collapsed = - can_collapse ? window.collapse_if_possible(ICLKernel::window(), Window::DimZ, &has_collapsed) - : window; - - const TensorShape &in_shape1_collapsed = - has_collapsed ? in_shape1.collapsed_from(Window::DimZ) : in_shape1; - const TensorShape &in_shape2_collapsed = - has_collapsed ? in_shape2.collapsed_from(Window::DimZ) : in_shape2; - - Window slice = collapsed.first_slice_window_3D(); - Window slice_input1 = slice.broadcast_if_dimension_le_one(in_shape1_collapsed); - Window slice_input2 = slice.broadcast_if_dimension_le_one(in_shape2_collapsed); - - do - { - unsigned int idx = 0; - add_3D_tensor_argument(idx, _input1, slice_input1); - add_3D_tensor_argument(idx, _input2, slice_input2); - add_3D_tensor_argument(idx, _output, slice); - - enqueue(queue, *this, slice); - - collapsed.slide_window_slice_3D(slice_input1); - collapsed.slide_window_slice_3D(slice_input2); - } while (collapsed.slide_window_slice_3D(slice)); -} - -BorderSize CLBinaryLogicalOpKernel::border_size() const -{ - const unsigned int replicateSize = - _output->info()->dimension(0) - - std::min(_input1->info()->dimension(0), _input2->info()->dimension(0)); - const unsigned int border = - std::min<unsigned int>(num_elems_processed_per_iteration - 1U, replicateSize); - return BorderSize(0, border, 0, 0); -} diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLCastBoolKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLCastBoolKernel.cpp deleted file mode 100644 index 6e0bcde7f..000000000 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLCastBoolKernel.cpp +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/CL/kernels/CLCastBoolKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" -#include "arm_compute/core/CL/CLValidate.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" -#include "support/StringSupport.h" - -#include <cstddef> -#include <set> -#include <string> - -namespace arm_compute -{ -namespace -{ -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(output); - ARM_COMPUTE_RETURN_ERROR_ON(input == output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S8, - DataType::S16, DataType::U16, DataType::U32, - DataType::S32, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == output->data_type(), - "Input and output data types must be different"); - - // Validate in case of configured output - if (output->total_size() > 0) - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); - } - - return Status{}; -} -} // namespace - -void CLCastBoolKernel::configure(const ICLTensor *input, ICLTensor *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - - // Auto initialize output shape if not initialized (We can only auto-configure the shape, datatype - // must be given) - set_shape_if_empty(*output->info(), input->info()->tensor_shape()); - - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info())); - - // Get number of elements to process per iterations - constexpr unsigned int num_elems_processed_per_iteration = 16; - - // Set build options - CLBuildOptions build_opts; - build_opts.add_option("-DVEC_SIZE=" + - support::cpp11::to_string(num_elems_processed_per_iteration)); - build_opts.add_option("-DDATA_TYPE_IN=" + get_cl_type_from_data_type(input->info()->data_type())); - build_opts.add_option("-DDATA_TYPE_OUT=" + - get_cl_type_from_data_type(output->info()->data_type())); - - // Create kernel - const std::string kernel_name = "cast_bool"; - _kernel = static_cast<cl::Kernel>( - CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts.options())); - - // Configure kernel - ICLSimple2DKernel::configure(input, output, num_elems_processed_per_iteration); - - // Collapse window - const Window &full_window = window(); - Window collapsed_window = full_window.collapse_if_possible(full_window, Window::DimZ); - ICLKernel::configure_internal(collapsed_window); - - // Set config_id for enabling LWS tuning - _config_id = kernel_name; - _config_id += "_"; - _config_id += lower_string(string_from_data_type(output->info()->data_type())); - _config_id += "_"; - _config_id += support::cpp11::to_string(output->info()->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(output->info()->dimension(1)); -} - -Status CLCastBoolKernel::validate(const ITensorInfo *input, const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output)); - - return Status{}; -} -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLEmbeddingLookupKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLEmbeddingLookupKernel.cpp deleted file mode 100644 index 67aaf2db6..000000000 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLEmbeddingLookupKernel.cpp +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "support/StringSupport.h" - -using namespace arm_compute; - -namespace -{ -constexpr unsigned int num_elems_processed_per_iteration = 16; - -std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output) -{ - Window win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration)); - AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration); - AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration); - - bool window_changed = update_window_and_padding(win, input_access, output_access); - input_access.set_valid_region(win, output->valid_region()); - - Status err = (window_changed) - ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") - : Status{}; - return std::make_pair(err, win); -} -} // namespace - -CLEmbeddingLookupKernel::CLEmbeddingLookupKernel() - : _input(nullptr), _output(nullptr), _lookups(nullptr) -{ -} - -Status CLEmbeddingLookupKernel::validate(const ITensorInfo *input, const ITensorInfo *output, - const ITensorInfo *lookups) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, lookups); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN( - input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16, - DataType::U32, DataType::S32, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lookups, 1, DataType::S32); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - - ARM_COMPUTE_ERROR_ON(input->num_dimensions() < 2 && input->num_dimensions() > 4); - ARM_COMPUTE_ERROR_ON(lookups->num_dimensions() > 1); - - return Status{}; -} - -void CLEmbeddingLookupKernel::configure(const ICLTensor *input, ICLTensor *output, - const ICLTensor *lookups) -{ - ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), output->info(), lookups->info())); - - _input = input; - _output = output; - _lookups = lookups; - - // Set kernel build options - std::stringstream kernel_name; - std::set<std::string> build_opts; - kernel_name << "embedding_lookup"; - - build_opts.emplace("-DDEPTH_OUT=" + support::cpp11::to_string(output->info()->dimension(2))); - build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())); - build_opts.emplace("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)); - build_opts.emplace("-DNUM_DIMS=" + support::cpp11::to_string(_input->info()->num_dimensions())); - - // Create kernel - _kernel = static_cast<cl::Kernel>( - CLKernelLibraryEx::get().create_kernel(kernel_name.str(), build_opts)); - - // Configure kernel window - auto win_config = validate_and_configure_window(input->info(), output->info()); - ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure_internal(win_config.second); -} - -void CLEmbeddingLookupKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - Window slice_in = window.first_slice_window_4D().collapse(ICLKernel::window(), 2, 4); - - Window win_lookup; - win_lookup.set(Window::DimX, Window::Dimension(0, 0, 0)); - - do - { - unsigned int idx = 0; - add_4D_tensor_argument(idx, _input, slice_in); - add_4D_tensor_argument(idx, _output, slice_in); - add_1D_tensor_argument(idx, _lookups, win_lookup); - - enqueue(queue, *this, slice_in); - } while (window.slide_window_slice_4D(slice_in) && window.slide_window_slice_1D(win_lookup)); -} diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLGatherExKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLGatherExKernel.cpp deleted file mode 100644 index 3bfe3e407..000000000 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLGatherExKernel.cpp +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/core/CL/kernels/CLGatherExKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/utils/misc/ShapeCalculatorEx.h" -#include "arm_compute/core/UtilsEx.h" -#include "support/StringSupport.h" - -using namespace arm_compute; - -namespace -{ - -inline Status validate_arguments(const ITensorInfo *input, const ITensorInfo *indices, - const ITensorInfo *output, int axis) -{ - const uint32_t actual_axis = wrap_around(axis, static_cast<int>(input->num_dimensions())); - ARM_COMPUTE_RETURN_ERROR_ON(indices->num_dimensions() > 3); - ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4); - ARM_COMPUTE_ERROR_ON(input->num_dimensions() + indices->num_dimensions() - 1 > 4); - ARM_COMPUTE_RETURN_ERROR_ON(actual_axis >= input->num_dimensions()); - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN( - input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16, - DataType::U32, DataType::S32, DataType::F16, DataType::F32); - - if (output->total_size() != 0) - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output); - TensorShape output_shape = arm_compute::misc::shape_calculator::compute_gather_shape_ex( - input->tensor_shape(), indices->tensor_shape(), actual_axis); - ARM_COMPUTE_RETURN_ERROR_ON(output_shape.total_size() != output->tensor_shape().total_size()); - } - - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(indices, 1, DataType::U32, DataType::S32); - - return Status{}; -} - -std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *indices, - ITensorInfo *output, int axis) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, indices); - const uint32_t actual_axis = wrap_around(axis, static_cast<int>(input->num_dimensions())); - std::unique_ptr<ITensorInfo> output_info = input->clone(); - output_info->set_tensor_shape(arm_compute::misc::shape_calculator::compute_gather_shape_ex( - input->tensor_shape(), indices->tensor_shape(), actual_axis)); - // Output auto initialization if not yet initialized - auto_init_if_empty((*output), output_info->tensor_shape(), 1, input->data_type()); - - // Create window - Window win = calculate_max_window(*output, Steps()); - output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape())); - - return std::make_pair(Status{}, win); -} - -} // namespace - -CLGatherExKernel::CLGatherExKernel() - : _input(nullptr), _indices(nullptr), _output(nullptr), _axis(0) -{ -} - -void CLGatherExKernel::configure(const ICLTensor *input, const ICLTensor *indices, - ICLTensor *output, int axis) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, indices); - ARM_COMPUTE_ERROR_THROW_ON( - validate_arguments(input->info(), indices->info(), output->info(), axis)); - - // Configure kernel window - auto win_config = - validate_and_configure_window(input->info(), indices->info(), output->info(), axis); - ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - - _input = input; - _output = output; - _indices = indices; - _axis = wrap_around(axis, static_cast<int>(input->info()->num_dimensions())); - - // Set build options - CLBuildOptions build_opts; - build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())); - build_opts.add_option("-DOUTPUT_DIM_Z=" + - support::cpp11::to_string(output->info()->dimension(2))); - build_opts.add_option("-DINPUT_DIM_Z=" + support::cpp11::to_string(input->info()->dimension(2))); - build_opts.add_option("-DAXIS=" + support::cpp11::to_string(_axis)); - build_opts.add_option("-DINDICES_DIM=" + - support::cpp11::to_string(indices->info()->num_dimensions())); - - // Create kernel - _kernel = static_cast<cl::Kernel>( - CLKernelLibraryEx::get().create_kernel("gather_ex", build_opts.options())); - ICLKernel::configure_internal(win_config.second); -} - -Status CLGatherExKernel::validate(const ITensorInfo *input, const ITensorInfo *indices, - const ITensorInfo *output, int axis) -{ - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, indices, output, axis)); - ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), - indices->clone().get(), - output->clone().get(), axis) - .first); - return Status{}; -} - -void CLGatherExKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - Window window_collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ, 4); - unsigned int idx = 0; - add_4D_tensor_argument(idx, _input, window_collapsed); - add_3D_tensor_argument(idx, _indices, window_collapsed); - add_4D_tensor_argument(idx, _output, window_collapsed); - enqueue(queue, *this, window_collapsed, lws_hint()); -} diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLHashtableLookupKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLHashtableLookupKernel.cpp deleted file mode 100644 index 930e7c944..000000000 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLHashtableLookupKernel.cpp +++ /dev/null @@ -1,203 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/core/CL/kernels/CLHashtableLookupKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "support/StringSupport.h" - -using namespace arm_compute; - -namespace -{ -constexpr unsigned int num_elems_processed_per_iteration = 16; - -std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output) -{ - Window win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration)); - AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration); - AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration); - - bool window_changed = update_window_and_padding(win, input_access, output_access); - input_access.set_valid_region(win, output->valid_region()); - - Status err = (window_changed) - ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") - : Status{}; - return std::make_pair(err, win); -} -} // namespace - -CLHashtableLookupKernel::CLHashtableLookupKernel() -{ - // DO NOTHING -} - -Status CLHashtableLookupKernel::validate(const ITensorInfo *lookups, const ITensorInfo *keys, - const ITensorInfo *input, const ITensorInfo *output, - const ITensorInfo *hits) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(lookups, keys, input, output, hits); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN( - input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16, - DataType::U32, DataType::S32, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lookups, 1, DataType::S32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(keys, 1, DataType::S32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(hits, 1, DataType::U8, DataType::QASYMM8); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->tensor_shape().total_size() == 0, - "Output's shape was not set"); - - ARM_COMPUTE_ERROR_ON(lookups->dimension(0) != hits->dimension(0) || - output->dimension(output->num_dimensions() - 1) != lookups->dimension(0)); - ARM_COMPUTE_ERROR_ON(input->num_dimensions() < 2 && input->num_dimensions() > 4); - ARM_COMPUTE_ERROR_ON(lookups->num_dimensions() > 1); - ARM_COMPUTE_ERROR_ON(keys->num_dimensions() > 1); - ARM_COMPUTE_ERROR_ON(hits->num_dimensions() > 1); - - return Status{}; -} - -void CLHashtableLookupKernel::configure(const ICLTensor *lookups, const ICLTensor *keys, - const ICLTensor *input, ICLTensor *output, ICLTensor *hits) -{ - ARM_COMPUTE_ERROR_THROW_ON( - validate(lookups->info(), keys->info(), input->info(), output->info(), hits->info())); - - _lookups = lookups; - _keys = keys; - _input = input; - _output = output; - _hits = hits; - - // Make _lookup_indices tensor - _lookup_indices = support::cpp14::make_unique<CLTensor>(); - _lookup_indices->allocator()->init( - TensorInfo(lookups->info()->tensor_shape(), lookups->info()->num_channels(), DataType::S32)); - _lookup_indices->allocator()->allocate(); - - // Set kernel build options - std::stringstream kernel_name; - std::set<std::string> build_opts; - kernel_name << "hashtable_lookup"; - - build_opts.emplace("-DDEPTH_OUT=" + support::cpp11::to_string(output->info()->dimension(2))); - build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())); - build_opts.emplace("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)); - build_opts.emplace("-DNUM_DIMS=" + support::cpp11::to_string(_input->info()->num_dimensions())); - - // Create kernel - _kernel = static_cast<cl::Kernel>( - CLKernelLibraryEx::get().create_kernel(kernel_name.str(), build_opts)); - - // Configure kernel window - auto win_config = validate_and_configure_window(input->info(), output->info()); - ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure_internal(win_config.second); -} - -void CLHashtableLookupKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - const_cast<ICLTensor *>(_lookups)->map(queue); - const_cast<ICLTensor *>(_keys)->map(queue); - _hits->map(queue); - _lookup_indices->map(queue); - - // Set values of hits - const int32_t *lookups_buf = - reinterpret_cast<int32_t *>(const_cast<ICLTensor *>(_lookups)->buffer()); - const int32_t *keys_buf = reinterpret_cast<int32_t *>(const_cast<ICLTensor *>(_keys)->buffer()); - uint8_t *hits_buf = reinterpret_cast<uint8_t *>(_hits->buffer()); - int32_t *lookup_indices_buf = reinterpret_cast<int32_t *>(_lookup_indices->buffer()); - - std::map<int32_t, size_t> key_map; - const size_t keys_num = _keys->info()->dimension(0); - for (size_t key_index = 0; key_index < keys_num; key_index++) - { - key_map[keys_buf[key_index]] = key_index; - } - - const size_t lookups_num = _lookups->info()->dimension(0); - for (size_t i = 0; i < lookups_num; ++i) - { - const auto lookup_value = lookups_buf[i]; - const auto it = key_map.find(lookup_value); - if (it != key_map.end()) - { -#if defined(ARM_COMPUTE_DEBUG_ENABLED) - if (it->second >= lookups_num) - ARM_COMPUTE_ERROR("HashTable Lookup: index out of bounds."); -#endif // defined(ARM_COMPUTE_DEBUG_ENABLED) - lookup_indices_buf[i] = static_cast<int32_t>(it->second); - hits_buf[i] = static_cast<uint8_t>(1); - } - else - { - lookup_indices_buf[i] = -1; - hits_buf[i] = static_cast<uint8_t>(0); - } - } - - const_cast<ICLTensor *>(_lookups)->unmap(queue); - const_cast<ICLTensor *>(_keys)->unmap(queue); - _hits->unmap(queue); - _lookup_indices->unmap(queue); - - Window win = window.collapse(ICLKernel::window(), 2, 4); - - Window win_lookup; - win_lookup.set(Window::DimX, Window::Dimension(0, 0, 0)); - - do - { - unsigned int idx = 0; - add_4D_tensor_argument(idx, _input, win); - add_4D_tensor_argument(idx, _output, win); - add_1D_tensor_argument(idx, _lookup_indices.get(), win_lookup); - - enqueue(queue, *this, win); - } while (window.slide_window_slice_4D(win) && window.slide_window_slice_1D(win_lookup)); -} diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.cpp deleted file mode 100644 index 61c14d271..000000000 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.cpp +++ /dev/null @@ -1,193 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" -#include "arm_compute/core/CL/CLValidate.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Window.h" -#include "support/StringSupport.h" -#include "support/ToolchainSupport.h" - -namespace arm_compute -{ -namespace -{ -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, - const ITensorInfo *gamma, const ITensorInfo *beta, float epsilon) -{ - ARM_COMPUTE_UNUSED(gamma); - ARM_COMPUTE_UNUSED(beta); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(epsilon == 0.f, "Epsilon must be different than 0"); - - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(input, DataType::F16, DataType::F32); - - if (output != nullptr && output->total_size() != 0) - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->num_channels() != output->num_channels(), - "Input and output have different number of channels"); - } - - return Status{}; -} - -std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output) -{ - // We handle the planes manually - Window win = calculate_max_window(*input, Steps(1)); - - // Output auto initialization if not yet initialized - auto_init_if_empty(*output, input->tensor_shape(), 1, input->data_type()); - - // CLInstanceNormalizationLayerKernelEx doesn't need padding so update_window_and_padding() can be - // skipped - Coordinates coord; - coord.set_num_dimensions(output->num_dimensions()); - output->set_valid_region(ValidRegion(coord, output->tensor_shape())); - return std::make_pair(Status{}, win); -} -} // namespace - -CLInstanceNormalizationLayerKernelEx::CLInstanceNormalizationLayerKernelEx() - : _input(nullptr), _output(nullptr), _gamma(nullptr), _beta(nullptr), _epsilon(1e-12), - _run_in_place(false) -{ -} - -void CLInstanceNormalizationLayerKernelEx::configure(ICLTensor *input, ICLTensor *output, - ICLTensor *gamma, ICLTensor *beta, - float epsilon) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input); - - _input = input; - _output = output == nullptr ? input : output; - _gamma = gamma; - _beta = beta; - _epsilon = epsilon; - - _run_in_place = (output == nullptr) || (output == input); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(_input->info(), _output->info(), - gamma ? gamma->info() : nullptr, - beta ? beta->info() : nullptr, epsilon)); - const unsigned int num_elems_processed_per_iteration = 16 / input->info()->element_size(); - - CLBuildOptions build_opts; - build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())); - build_opts.add_option("-DVEC_SIZE=" + - support::cpp11::to_string(num_elems_processed_per_iteration)); - build_opts.add_option("-DDIM_X=" + support::cpp11::to_string(input->info()->dimension(0))); - build_opts.add_option("-DDIM_Y=" + support::cpp11::to_string(input->info()->dimension(1))); - build_opts.add_option("-DDIM_Z=" + support::cpp11::to_string(input->info()->dimension(2))); - build_opts.add_option("-DEPSILON=" + float_to_string_with_full_precision(epsilon)); - build_opts.add_option_if(gamma, "-DGAMMA"); - build_opts.add_option_if(beta, "-DBETA"); - build_opts.add_option_if(_run_in_place, "-DIN_PLACE"); - build_opts.add_option_if(_input->info()->data_layout() == DataLayout::NHWC, "-DNHWC"); - - // Create kernel - _kernel = static_cast<cl::Kernel>( - CLKernelLibraryEx::get().create_kernel("instance_normalization_ex", build_opts.options())); - - // Configure kernel window - auto win_config = validate_and_configure_window(_input->info(), _output->info()); - ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config)); - ICLKernel::configure_internal(std::get<1>(win_config)); -} - -Status CLInstanceNormalizationLayerKernelEx::validate(const ITensorInfo *input, - const ITensorInfo *output, - const ITensorInfo *gamma, - const ITensorInfo *beta, float epsilon) -{ - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, gamma, beta, epsilon)); - ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>(validate_and_configure_window( - input->clone().get(), (output == nullptr ? input->clone().get() : output->clone().get())))); - return Status{}; -} - -void CLInstanceNormalizationLayerKernelEx::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - Window collapsed_window = window.collapse(window, Window::DimZ); - - // We will process the planes together - if (_input->info()->data_layout() == DataLayout::NCHW) - { - collapsed_window.set(Window::DimX, Window::Dimension(0, 1, 1)); - collapsed_window.set(Window::DimY, Window::Dimension(0, 1, 1)); - } - else - { - collapsed_window.set(Window::DimY, Window::Dimension(0, 1, 1)); - collapsed_window.set(Window::DimZ, Window::Dimension(0, _input->info()->dimension(3), 1)); - } - - Window vec_window; - vec_window.set(Window::DimX, Window::Dimension(0, 0, 0)); - - unsigned int idx = 0; - add_4D_tensor_argument(idx, _input, collapsed_window); - if (!_run_in_place) - { - add_4D_tensor_argument(idx, _output, collapsed_window); - } - if (_gamma) - { - add_1D_tensor_argument(idx, _gamma, vec_window); - } - if (_beta) - { - add_1D_tensor_argument(idx, _beta, vec_window); - } - - enqueue(queue, *this, collapsed_window, lws_hint()); -} -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLMultiplyScaleFactorKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLMultiplyScaleFactorKernel.cpp deleted file mode 100644 index 6b27c9917..000000000 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLMultiplyScaleFactorKernel.cpp +++ /dev/null @@ -1,174 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/core/CL/kernels/CLMultiplyScaleFactorKernel.h" - -#include "arm_compute/core/AccessWindowStatic.h" -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" -#include "arm_compute/core/CL/CLValidate.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" -#include "support/StringSupport.h" - -using namespace arm_compute; - -namespace -{ -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *scale_factor, - const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 2); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S32); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(scale_factor, 1, DataType::F16, - DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON(scale_factor->tensor_shape().total_size() == 0); - ARM_COMPUTE_RETURN_ERROR_ON(scale_factor->num_dimensions() > 1); - ARM_COMPUTE_RETURN_ERROR_ON(scale_factor->dimension(0) != input->dimension(1)); - ARM_COMPUTE_RETURN_ERROR_ON(output->tensor_shape().total_size() == 0); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); - - // Checks performed when output is configured - if ((output->total_size() != 0)) - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); - } - - return Status{}; -} - -std::tuple<Status, Window> validate_and_configure_window(const ITensorInfo *input, - ITensorInfo *output) -{ - // Configure kernel window - Window win = calculate_max_window(*input, Steps()); - - // Output tensor auto initialization if not yet initialized - auto_init_if_empty(*output, input->tensor_shape(), 1, DataType::F32); - - // CLMultiplyScaleFactorKernel doesn't need padding so update_window_and_padding() can be - // skipped - Coordinates coord; - coord.set_num_dimensions(output->num_dimensions()); - output->set_valid_region(ValidRegion(coord, output->tensor_shape())); - - return std::make_tuple(Status{}, win); -} -} // namespace - -CLMultiplyScaleFactorKernel::CLMultiplyScaleFactorKernel() - : _input(nullptr), _scale_factor(nullptr), _output(nullptr), _multiplier(1.f) -{ -} - -void CLMultiplyScaleFactorKernel::configure(const ICLTensor *input, const ICLTensor *scale_factor, - ICLTensor *output, float multiplier) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON( - validate_arguments(input->info(), scale_factor->info(), output->info())); - - _input = input; - _scale_factor = scale_factor; - _output = output; - _multiplier = multiplier; - - const int vec_size_x = 16 / output->info()->element_size(); - const int output_width_x = output->info()->tensor_shape().x(); - const bool multi_access_x = (output_width_x / vec_size_x > 0); - - // Create and update the window (if needed) - Window win = calculate_max_window(*output->info()); - if (multi_access_x) - { - win.set(Window::DimX, - Window::Dimension(win.x().start(), ceil_to_multiple(win.x().end(), vec_size_x), - vec_size_x)); - } - ICLKernel::configure_internal(win); - - // Create kernel - CLBuildOptions build_opts; - build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x)); - build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(output->info()->data_type())); - build_opts.add_option_if( - multi_access_x, "-DLAST_ACCESSED_X=" + - support::cpp11::to_string(std::max<int>(output_width_x - vec_size_x, 0))); - - _kernel = static_cast<cl::Kernel>( - CLKernelLibraryEx::get().create_kernel("multiply_scale_factor", build_opts.options())); -} - -Status CLMultiplyScaleFactorKernel::validate(const ITensorInfo *input, - const ITensorInfo *scale_factor, - const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, scale_factor, output)); - ARM_COMPUTE_RETURN_ON_ERROR( - std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get()))); - return Status{}; -} - -void CLMultiplyScaleFactorKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - Window window_collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ); - Window slice = window_collapsed.first_slice_window_2D(); - - // Set scale_factor window - Window win_scale = calculate_max_window(*_scale_factor->info(), Steps()); - - do - { - unsigned int idx = 0; - add_2D_tensor_argument(idx, _input, slice); - add_1D_tensor_argument(idx, _scale_factor, win_scale); - add_2D_tensor_argument(idx, _output, slice); - _kernel.setArg<float>(idx++, _multiplier); - enqueue(queue, *this, slice, lws_hint()); - } while (window_collapsed.slide_window_slice_2D(slice)); -} diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLNegKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLNegKernel.cpp deleted file mode 100644 index 643c8b110..000000000 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLNegKernel.cpp +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/core/CL/kernels/CLNegKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "support/StringSupport.h" - -using namespace arm_compute; - -namespace -{ -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S16, DataType::S32, - DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::S16, DataType::S32, - DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(input->tensor_shape(), output->tensor_shape()); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - - return Status{}; -} - -} // namespace - -CLNegKernel::CLNegKernel() : _input(nullptr), _output(nullptr) {} - -void CLNegKernel::configure(const ICLTensor *input, ICLTensor *output) -{ - - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info())); - - _input = input; - _output = output; - - constexpr unsigned int num_elems_processed_per_iteration = 16; - - // Create kernel - std::set<std::string> build_opts; - build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()))); - build_opts.emplace( - ("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration))); - _kernel = - static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel("neg_tensor", build_opts)); - - // Configure window - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); - - AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - update_window_and_padding(win, input_access, output_access); - output_access.set_valid_region(win, input->info()->valid_region()); - - ICLKernel::configure_internal(win); -} - -void CLNegKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - Window collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ); - Window slice = collapsed.first_slice_window_3D(); - - do - { - unsigned int idx = 0; - add_3D_tensor_argument(idx, _input, slice); - add_3D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice, lws_hint()); - } while (collapsed.slide_window_slice_3D(slice)); -} diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLOneHotKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLOneHotKernel.cpp deleted file mode 100644 index 35d70d689..000000000 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLOneHotKernel.cpp +++ /dev/null @@ -1,185 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/CL/kernels/CLOneHotKernel.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/utils/misc/ShapeCalculatorEx.h" -#include "support/StringSupport.h" -#include <string> -namespace arm_compute -{ -namespace -{ -inline Status validate_arguments(const ITensorInfo *indices, const ITensorInfo *on_value, - const ITensorInfo *output, int depth, int axis) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(indices, on_value, output); - const uint32_t actual_axis = wrap_around(axis, static_cast<int>(output->num_dimensions())); - ARM_COMPUTE_RETURN_ERROR_ON(output->num_dimensions() > 4); - ARM_COMPUTE_RETURN_ERROR_ON(on_value->tensor_shape().total_size() != 1); - ARM_COMPUTE_RETURN_ERROR_ON(depth <= 0); - ARM_COMPUTE_RETURN_ERROR_ON(actual_axis >= output->num_dimensions()); - ARM_COMPUTE_RETURN_ERROR_ON(on_value->data_type() == DataType::UNKNOWN); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(on_value, 1, DataType::U8, DataType::S8, - DataType::U16, DataType::S16, DataType::F16, - DataType::U32, DataType::S32, DataType::F32); - if (output->total_size() != 0) - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(on_value, output); - TensorShape output_shape = arm_compute::misc::shape_calculator::compute_onehot_shape_ex( - indices->tensor_shape(), static_cast<uint32_t>(depth), actual_axis); - ARM_COMPUTE_RETURN_ERROR_ON(output_shape.total_size() != output->tensor_shape().total_size()); - } - return Status{}; -} - -std::pair<Status, Window> validate_and_configure_window(ITensorInfo *indices, - const ITensorInfo *on_value, - ITensorInfo *output, int depth, int axis) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(indices, on_value, output, indices); - const uint32_t actual_axis = wrap_around(axis, static_cast<int>(output->num_dimensions())); - // Output auto initialization if not yet initialized - TensorShape output_shape = arm_compute::misc::shape_calculator::compute_onehot_shape_ex( - indices->tensor_shape(), static_cast<uint32_t>(depth), actual_axis); - auto_init_if_empty((*output), output_shape, 1, on_value->data_type()); - // Create window - Window win = calculate_max_window(*output, Steps()); - output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape())); - return std::make_pair(Status{}, win); -} -} // namespace -CLOneHotKernel::CLOneHotKernel() - : _indices(nullptr), _on_value(nullptr), _off_value(nullptr), _output(nullptr), - _is_off_value_memset(false) -{ -} -void CLOneHotKernel::configure(const ICLTensor *indices, const ICLTensor *on_value, - const ICLTensor *off_value, ICLTensor *output, int depth, int axis) -{ - _is_off_value_memset = false; - ARM_COMPUTE_ERROR_ON_NULLPTR(indices, on_value, off_value, output); - ARM_COMPUTE_ERROR_ON_NULLPTR(off_value->info()); - ARM_COMPUTE_ERROR_ON(off_value->info()->tensor_shape().total_size() != 1); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(on_value, off_value); - _off_value = off_value; - configure_common(indices, on_value, output, depth, axis); -} -void CLOneHotKernel::configure(const ICLTensor *indices, const ICLTensor *on_value, - ICLTensor *output, int depth, int axis) -{ - _is_off_value_memset = true; - ARM_COMPUTE_ERROR_ON_NULLPTR(indices, on_value, output); - configure_common(indices, on_value, output, depth, axis); -} -void CLOneHotKernel::configure_common(const ICLTensor *indices, const ICLTensor *on_value, - ICLTensor *output, int depth, int axis) -{ - ARM_COMPUTE_ERROR_THROW_ON( - validate_arguments(indices->info(), on_value->info(), output->info(), depth, axis)); - // Configure kernel window - auto win_config = - validate_and_configure_window(indices->info(), on_value->info(), output->info(), depth, axis); - ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - if (_is_off_value_memset) - { - // Replace window with calculated by infices info - win_config.second = calculate_max_window(*indices->info(), Steps()); - } - _indices = indices; - _on_value = on_value; - _output = output; - const auto actual_axis = wrap_around(axis, static_cast<int>(output->info()->num_dimensions())); - // Set build options - CLBuildOptions build_opts; - build_opts.add_option("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size( - data_size_from_type(on_value->info()->data_type()))); - build_opts.add_option("-DAXIS=" + support::cpp11::to_string(actual_axis)); - build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(depth)); - build_opts.add_option("-DOUTPUT_DIM_Z=" + - support::cpp11::to_string(output->info()->dimension(2))); - // Create kernel - const std::string kernel_name = _is_off_value_memset ? "one_hot_only_on_value" : "one_hot"; - _kernel = static_cast<cl::Kernel>( - CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts.options())); - ICLKernel::configure_internal(win_config.second); -} -Status CLOneHotKernel::validate(const ITensorInfo *indices, const ITensorInfo *on_value, - const ITensorInfo *off_value, const ITensorInfo *output, int depth, - int axis) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(off_value); - ARM_COMPUTE_RETURN_ERROR_ON(off_value->tensor_shape().total_size() != 1); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(on_value, off_value); - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(indices, on_value, output, depth, axis)); - ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(indices->clone().get(), - on_value->clone().get(), - output->clone().get(), depth, axis) - .first); - return Status{}; -} -Status CLOneHotKernel::validate(const ITensorInfo *indices, const ITensorInfo *on_value, - const ITensorInfo *output, int depth, int axis) -{ - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(indices, on_value, output, depth, axis)); - ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(indices->clone().get(), - on_value->clone().get(), - output->clone().get(), depth, axis) - .first); - return Status{}; -} -void CLOneHotKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - Window window_collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ); - unsigned int idx = 0; - add_3D_tensor_argument(idx, _indices, window_collapsed); - add_1D_tensor_argument(idx, _on_value, window_collapsed); - if (!_is_off_value_memset) - { - add_1D_tensor_argument(idx, _off_value, window_collapsed); - } - add_4D_tensor_argument(idx, _output, window_collapsed); - enqueue(queue, *this, window_collapsed, lws_hint()); -} - -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLQuantizationSymmetricKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLQuantizationSymmetricKernel.cpp deleted file mode 100644 index 1a7a18cfa..000000000 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLQuantizationSymmetricKernel.cpp +++ /dev/null @@ -1,173 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/core/CL/kernels/CLQuantizationSymmetricKernel.h" - -#include "arm_compute/core/AccessWindowStatic.h" -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" -#include "arm_compute/core/CL/CLValidate.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" -#include "support/StringSupport.h" - -namespace arm_compute -{ -namespace -{ -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *scale_factor, - const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::F16); - ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 2); - ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); - - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, scale_factor); - ARM_COMPUTE_RETURN_ERROR_ON(scale_factor->tensor_shape().total_size() == 0); - ARM_COMPUTE_RETURN_ERROR_ON(scale_factor->num_dimensions() > 1); - ARM_COMPUTE_RETURN_ERROR_ON(scale_factor->dimension(0) != input->dimension(1)); - - // Output must always be initialized - ARM_COMPUTE_RETURN_ERROR_ON(output->tensor_shape().total_size() == 0); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QASYMM8_SIGNED); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); - - return Status{}; -} - -std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output) -{ - // Configure kernel window - Window win = calculate_max_window(*input, Steps()); - - const int vec_size_x = 16 / input->element_size(); - const int input_width_x = input->tensor_shape().x(); - const bool multi_access_x = (input_width_x / vec_size_x > 0); - - if (multi_access_x) - { - win.set(Window::DimX, - Window::Dimension(win.x().start(), ceil_to_multiple(win.x().end(), vec_size_x), - vec_size_x)); - } - - Coordinates coord; - coord.set_num_dimensions(output->num_dimensions()); - output->set_valid_region(ValidRegion(coord, output->tensor_shape())); - - return std::make_pair(Status{}, win); -} -} // namespace - -CLQuantizationSymmetricKernel::CLQuantizationSymmetricKernel() - : _input(nullptr), _scale_factor(nullptr), _output(nullptr) -{ -} - -void CLQuantizationSymmetricKernel::configure(const ICLTensor *input, const ICLTensor *scale_factor, - ICLTensor *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, scale_factor, output); - ARM_COMPUTE_ERROR_THROW_ON( - validate_arguments(input->info(), scale_factor->info(), output->info())); - - _input = input; - _scale_factor = scale_factor; - _output = output; - - const int vec_size_x = 16 / input->info()->element_size(); - const int input_width_x = input->info()->tensor_shape().x(); - const bool multi_access_x = (input_width_x / vec_size_x > 0); - - // Configure kernel window - auto win_config = validate_and_configure_window(input->info(), output->info()); - ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure_internal(win_config.second); - - // Create kernel - CLBuildOptions build_opts; - build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x)); - build_opts.add_option("-DDATA_TYPE_IN=" + get_cl_type_from_data_type(input->info()->data_type())); - build_opts.add_option("-DDATA_TYPE_OUT=" + - get_cl_type_from_data_type(output->info()->data_type())); - build_opts.add_option_if( - multi_access_x, "-DLAST_ACCESSED_X=" + - support::cpp11::to_string(std::max<int>(input_width_x - vec_size_x, 0))); - - _kernel = static_cast<cl::Kernel>( - CLKernelLibraryEx::get().create_kernel("quantization_symm8", build_opts.options())); -} - -Status CLQuantizationSymmetricKernel::validate(const ITensorInfo *input, - const ITensorInfo *scale_factor, - const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, scale_factor, output)); - ARM_COMPUTE_RETURN_ON_ERROR( - validate_and_configure_window(input->clone().get(), output->clone().get()).first); - - return Status{}; -} - -void CLQuantizationSymmetricKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - // Support only 2D - Window window_collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ); - Window slice = window_collapsed.first_slice_window_2D(); - - do - { - Window scale_slice = slice.shift_dimensions(1); - - unsigned int idx = 0; - add_2D_tensor_argument(idx, _input, slice); - add_1D_tensor_argument(idx, _scale_factor, scale_slice); - add_2D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice, lws_hint()); - } while (window_collapsed.slide_window_slice_2D(slice)); -} -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLReduceOperationKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLReduceOperationKernel.cpp deleted file mode 100644 index 06c2579f2..000000000 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLReduceOperationKernel.cpp +++ /dev/null @@ -1,204 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/core/CL/kernels/CLReduceOperationKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "support/StringSupport.h" - -using namespace arm_compute; -namespace -{ -// NOTE This is necessary because it is not guaranteed that the axis positions of input and output -// are the same. -const TensorShape inferOutputShape(const TensorShape &input_shape, const uint32_t axis) -{ - TensorShape out_shape{input_shape}; - - out_shape.set(axis, 1); - - return out_shape; -} -} // namespace - -namespace -{ -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const uint32_t axis, - ReduceOperation op) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - - if (output->total_size() != 0) - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - } - - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, - DataType::F32, DataType::S32); - if (op == ReduceOperation::SUM) - { - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::QASYMM8, - "Not support QASYMM8, yet"); - } - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->tensor_shape().total_size() == 0, - "Inputs are not broadcast compatible"); - - const auto num_dimensions = input->tensor_shape().num_dimensions(); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= num_dimensions, "axis must be less than (input's rank)."); - - const TensorShape output_shape = inferOutputShape(input->tensor_shape(), axis); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(output_shape.total_size() != output->tensor_shape().total_size(), - "output shape's size does not match axis"); - - return Status{}; -} -} // namespace - -CLReduceOperationKernel::CLReduceOperationKernel() : _input(nullptr), _output(nullptr), _axis() {} - -void CLReduceOperationKernel::configure(const ICLTensor *input, ICLTensor *output, - const uint32_t axis, ReduceOperation op) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), axis, op)); - - _input = input; - _output = output; - _axis = axis; - - std::unique_ptr<ITensorInfo> output_info = output->info()->clone(); - output_info->set_tensor_shape(inferOutputShape(input->info()->tensor_shape(), axis)); - - // Construct kernel name - std::string kernel_name; - int op_code = 0; - if (op == ReduceOperation::MAX) - { - kernel_name = "reduce_min_max"; - op_code = 1; - } - else if (op == ReduceOperation::MIN) - { - kernel_name = "reduce_min_max"; - op_code = 2; - } - else if (op == ReduceOperation::SUM) - { - kernel_name = "reduce_sum_mean"; - op_code = 3; - } - else if (op == ReduceOperation::MEAN) - { - kernel_name = "reduce_sum_mean"; - op_code = 4; - } - else - throw std::runtime_error("Operation not supported, yet"); - - // Set kernel build options - std::set<std::string> build_opts; - build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(output_info->data_type())); - build_opts.emplace("-DDEPTH_OUT=" + support::cpp11::to_string(output_info->dimension(2))); - build_opts.emplace("-DOP_CODE=" + support::cpp11::to_string(op_code)); - - // Create kernel - _kernel = - static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts)); - - // Configure kernel window - Window win = calculate_max_window(*output_info, Steps()); - - Coordinates coord; - coord.set_num_dimensions(output_info->num_dimensions()); - output->info()->set_valid_region(ValidRegion(coord, output_info->tensor_shape())); - - ICLKernel::configure_internal(win); -} - -Status CLReduceOperationKernel::validate(const ITensorInfo *input, const ITensorInfo *output, - const uint32_t axis, ReduceOperation op) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, axis, op)); - - return Status{}; -} - -void CLReduceOperationKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - const TensorShape &shape_in = _input->info()->tensor_shape(); - - unsigned int idx = 2 * num_arguments_per_4D_tensor(); // Skip the input and output parameters - - _kernel.setArg<cl_int>(idx++, _axis); - _kernel.setArg<cl_int>(idx++, shape_in[_axis]); - - // Support dimensions up to 4 - Window slice_out = window.collapse(ICLKernel::window(), 2, 4); - - // Setup input slice - Window slice_in(slice_out); - slice_in.set(Window::DimX, Window::Dimension(0, 0, 0)); - slice_in.set(Window::DimY, Window::Dimension(0, 0, 0)); - slice_in.set(Window::DimZ, Window::Dimension(0, 0, 0)); - slice_in.set(3, Window::Dimension(0, 0, 0)); - - // Copy output's shape in order to use for recovering at end of this method - // TODO Remove changing and recovering output's shape if it is guaranteed that the axis positions - // of input and output are the same - const TensorShape shape_out = _output->info()->tensor_shape(); - _output->info()->set_tensor_shape(inferOutputShape(shape_in, _axis)); - - idx = 0; - add_4D_tensor_argument(idx, _input, slice_in); - add_4D_tensor_argument(idx, _output, slice_out); - enqueue(queue, *this, slice_out, lws_hint()); - - // Recover output's shape of output tensor - _output->info()->set_tensor_shape(shape_out); -} diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLScaleFactorSymm8Kernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLScaleFactorSymm8Kernel.cpp deleted file mode 100644 index 8d8853c81..000000000 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLScaleFactorSymm8Kernel.cpp +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/core/CL/kernels/CLScaleFactorSymm8Kernel.h" - -#include "arm_compute/core/AccessWindowStatic.h" -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "support/StringSupport.h" - -#include <climits> - -using namespace arm_compute; -using namespace arm_compute::misc::shape_calculator; - -namespace -{ -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 2); - - if (output->tensor_shape().total_size() > 0) - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - - TensorShape output_shape = TensorShape{input->dimension(1)}; - - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape); - } - - return Status{}; -} - -std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output) -{ - TensorShape output_shape = TensorShape{input->dimension(1)}; - - // Output auto initialization if not yet initialized - auto_init_if_empty(*output, output_shape, 1, input->data_type()); - - const unsigned int num_elems_processed_per_iteration = 1; - - // Configure kernel window - Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration)); - AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration); - AccessWindowStatic output_access(output, 0, 0, output->dimension(0), 1); - - bool window_changed = update_window_and_padding(win, input_access, output_access); - - output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape())); - - Status err = (window_changed) - ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") - : Status{}; - return std::make_tuple(err, win); -} -} // namespace - -CLScaleFactorSymm8Kernel::CLScaleFactorSymm8Kernel() : _input(nullptr), _output(nullptr) {} - -void CLScaleFactorSymm8Kernel::configure(const ICLTensor *input, ICLTensor *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info())); - - _input = input; - _output = output; - - std::set<std::string> build_opts; - build_opts.emplace("-DWIDTH=" + support::cpp11::to_string(input->info()->dimension(0))); - - // Create kernel - _kernel = static_cast<cl::Kernel>( - CLKernelLibraryEx::get().create_kernel("scale_factor_symm8", build_opts)); - - auto win_config = validate_and_configure_window(input->info(), output->info()); - - ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config)); - - ICLKernel::configure_internal(std::get<1>(win_config)); -} - -Status CLScaleFactorSymm8Kernel::validate(const ITensorInfo *input, const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output)); - ARM_COMPUTE_RETURN_ON_ERROR( - std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get()))); - - return Status{}; -} - -void CLScaleFactorSymm8Kernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - Window window_collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ); - Window slice = window_collapsed.first_slice_window_2D(); - slice.set(Window::DimX, Window::Dimension(0, 1, 1)); - - do - { - Window output_slice = slice.shift_dimensions(1); - - unsigned int idx = 0; - // Set inputs - add_2D_tensor_argument(idx, _input, slice); - add_1D_tensor_argument(idx, _output, output_slice); - enqueue(queue, *this, slice, lws_hint()); - } while (window_collapsed.slide_window_slice_2D(slice)); -} diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLTopKV2Kernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLTopKV2Kernel.cpp deleted file mode 100644 index 151d45e8d..000000000 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLTopKV2Kernel.cpp +++ /dev/null @@ -1,497 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/core/CL/kernels/CLTopKV2Kernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" -#include "arm_compute/core/CL/ICLTensor.h" - -// Disable GPU implementation -// TODO Enable GPU implementation with verification, or remove code -// Invalid result on GPU -#if 0 -namespace arm_compute -{ -//////////////////////////////////////////////////////////////////////////////// -CLTopKV2Single::CLTopKV2Single() : _input(nullptr), _topk_values(nullptr), _topk_indices(nullptr) {} - -void CLTopKV2Single::configure(ICLTensor *input, ICLTensor *topk_values, ICLTensor *topk_indices, - cl::Buffer *indices, cl::Buffer *temp_stack, int k, int n) -{ - ARM_COMPUTE_ERROR_ON(input == nullptr && indices == nullptr); - ARM_COMPUTE_ERROR_ON(topk_values == nullptr && topk_indices == nullptr); - ARM_COMPUTE_ERROR_ON(n == 0); - - _input = input; - _topk_values = topk_values; - _topk_indices = topk_indices; - - // Set kernel build options - std::set<std::string> build_opts; - - // Create kernel - _kernel = static_cast<cl::Kernel>( - CLKernelLibraryEx::get().create_kernel("topkv2_quicksort", build_opts)); - - unsigned int idx = 3 * num_arguments_per_1D_tensor(); - _kernel.setArg(idx++, *indices); - _kernel.setArg(idx++, *temp_stack); - _kernel.setArg<cl_int>(idx++, k); - _kernel.setArg<cl_int>(idx++, n); - - // Configure kernel window - Window win; - win.set(0, Window::Dimension(0, 1, 1)); - ICLKernel::configure_internal(win); -} - -void CLTopKV2Single::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - unsigned int idx = 0; - add_1D_tensor_argument(idx, _input, window); - add_1D_tensor_argument(idx, _topk_values, window); - add_1D_tensor_argument(idx, _topk_indices, window); - - enqueue(queue, *this, window); -} - -//////////////////////////////////////////////////////////////////////////////// -CLTopKV2Init::CLTopKV2Init() : _input(nullptr) {} - -void CLTopKV2Init::configure(ICLTensor *input, cl::Buffer *in_key_buf, cl::Buffer *in_ind_buf, - int n) -{ - ARM_COMPUTE_ERROR_ON(input == nullptr && in_key_buf == nullptr); - ARM_COMPUTE_ERROR_ON(in_ind_buf == nullptr); - ARM_COMPUTE_ERROR_ON(n == 0); - - _input = input; - - // Set kernel build options - std::set<std::string> build_opts; - - // Create kernel - _kernel = - static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel("topkv2_init", build_opts)); - - unsigned int idx = num_arguments_per_1D_tensor(); - _kernel.setArg(idx++, *in_key_buf); - _kernel.setArg(idx++, *in_ind_buf); - _kernel.setArg<cl_int>(idx++, n); - - // Configure kernel window - Window win; - win.set(0, Window::Dimension(0, n, 1)); - ICLKernel::configure_internal(win); -} - -void CLTopKV2Init::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - unsigned int idx = 0; - add_1D_tensor_argument(idx, _input, window); - - enqueue(queue, *this, window); -} - -//////////////////////////////////////////////////////////////////////////////// -// This kernel makes a histogram of radix for each work item. -CLRadixSortHistogram::CLRadixSortHistogram() : _pass(0), _in_key_buf(nullptr) {} - -void CLRadixSortHistogram::configure(cl::Buffer *hist_buf, int bits, int n) -{ - ARM_COMPUTE_ERROR_ON(hist_buf == nullptr); - - unsigned int radix = 1 << bits; - // Set kernel build options - std::set<std::string> build_opts; - build_opts.emplace("-D_BITS=" + support::cpp11::to_string(bits)); - build_opts.emplace("-D_RADIX=" + support::cpp11::to_string(radix)); - build_opts.emplace("-DPERMUT=1"); - - // Create kernel - _kernel = static_cast<cl::Kernel>( - CLKernelLibraryEx::get().create_kernel("radixsort_histogram", build_opts)); - - int loc_histo_size = radix * _ITEMS * sizeof(cl_int); - - unsigned int idx = 1; - _kernel.setArg(idx++, *hist_buf); - - idx = 3; - _kernel.setArg(idx++, loc_histo_size, nullptr); - _kernel.setArg<cl_int>(idx++, n); - - // Configure kernel window - Window win; - win.set(0, Window::Dimension(0, _GROUPS * _ITEMS, 1)); - ICLKernel::configure_internal(win); -} - -void CLRadixSortHistogram::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - _kernel.setArg(0, *_in_key_buf); - _kernel.setArg<cl_int>(2, _pass); - - cl::NDRange lws = cl::NDRange(_ITEMS, 1); - - enqueue(queue, *this, window, lws); -} - -//////////////////////////////////////////////////////////////////////////////// -CLRadixSortScanHistogram::CLRadixSortScanHistogram() {} - -void CLRadixSortScanHistogram::configure(cl::Buffer *hist_buf, cl::Buffer *glob_sum_buf, int bits) -{ - ARM_COMPUTE_ERROR_ON(hist_buf == nullptr && glob_sum_buf == nullptr); - - unsigned int radix = 1 << bits; - // Set kernel build options - std::set<std::string> build_opts; - build_opts.emplace("-D_BITS=" + support::cpp11::to_string(bits)); - build_opts.emplace("-D_RADIX=" + support::cpp11::to_string(radix)); - build_opts.emplace("-DPERMUT=1"); - - // Create kernel - _kernel = static_cast<cl::Kernel>( - CLKernelLibraryEx::get().create_kernel("radixsort_scanhistograms", build_opts)); - - int temp_size = - std::max<uint32_t>(_HISTOSPLIT, _ITEMS * _GROUPS * radix / _HISTOSPLIT) * sizeof(cl_uint); - - unsigned int idx = 0; - _kernel.setArg(idx++, *hist_buf); - _kernel.setArg(idx++, temp_size, nullptr); - _kernel.setArg(idx++, *glob_sum_buf); - - // Configure kernel window - Window win; - win.set(0, Window::Dimension(0, radix * _GROUPS * _ITEMS / 2, 1)); - ICLKernel::configure_internal(win); -} - -void CLRadixSortScanHistogram::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - const unsigned int gws_x = (window.x().end() - window.x().start()) / window.x().step(); - cl::NDRange lws = cl::NDRange(gws_x / _HISTOSPLIT, 1); - - enqueue(queue, *this, window, lws); -} - -//////////////////////////////////////////////////////////////////////////////// -CLRadixSortGlobalScanHistogram::CLRadixSortGlobalScanHistogram() {} - -void CLRadixSortGlobalScanHistogram::configure(cl::Buffer *glob_sum_buf, cl::Buffer *temp_buf, - int bits) -{ - ARM_COMPUTE_ERROR_ON(glob_sum_buf == nullptr && temp_buf == nullptr); - - unsigned int radix = 1 << bits; - // Set kernel build options - std::set<std::string> build_opts; - build_opts.emplace("-D_BITS=" + support::cpp11::to_string(bits)); - build_opts.emplace("-D_RADIX=" + support::cpp11::to_string(radix)); - build_opts.emplace("-DPERMUT=1"); - - // Create kernel - _kernel = static_cast<cl::Kernel>( - CLKernelLibraryEx::get().create_kernel("radixsort_scanhistograms", build_opts)); - - int temp_size = - std::max<uint32_t>(_HISTOSPLIT, _ITEMS * _GROUPS * radix / _HISTOSPLIT) * sizeof(cl_uint); - - unsigned int idx = 0; - _kernel.setArg(idx++, *glob_sum_buf); - _kernel.setArg(idx++, temp_size, nullptr); - _kernel.setArg(idx++, *temp_buf); - - // Configure kernel window - Window win; - win.set(0, Window::Dimension(0, _HISTOSPLIT / 2, 1)); - ICLKernel::configure_internal(win); -} - -void CLRadixSortGlobalScanHistogram::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - const unsigned int gws_x = (window.x().end() - window.x().start()) / window.x().step(); - cl::NDRange lws = cl::NDRange(gws_x, 1); - - enqueue(queue, *this, window, lws); -} - -//////////////////////////////////////////////////////////////////////////////// -CLRadixSortPasteHistogram::CLRadixSortPasteHistogram() {} - -void CLRadixSortPasteHistogram::configure(cl::Buffer *hist_buf, cl::Buffer *glob_sum_buf, int bits) -{ - ARM_COMPUTE_ERROR_ON(hist_buf == nullptr && glob_sum_buf == nullptr); - - unsigned int radix = 1 << bits; - // Set kernel build options - std::set<std::string> build_opts; - build_opts.emplace("-D_BITS=" + support::cpp11::to_string(bits)); - build_opts.emplace("-D_RADIX=" + support::cpp11::to_string(radix)); - build_opts.emplace("-DPERMUT=1"); - - // Create kernel - _kernel = static_cast<cl::Kernel>( - CLKernelLibraryEx::get().create_kernel("radixsort_pastehistograms", build_opts)); - - unsigned int idx = 0; - _kernel.setArg(idx++, *hist_buf); - _kernel.setArg(idx++, *glob_sum_buf); - - // Configure kernel window - Window win; - win.set(0, Window::Dimension(0, radix * _GROUPS * _ITEMS / 2, 1)); - ICLKernel::configure_internal(win); -} - -void CLRadixSortPasteHistogram::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - const unsigned int gws_x = (window.x().end() - window.x().start()) / window.x().step(); - cl::NDRange lws = cl::NDRange(gws_x / _HISTOSPLIT, 1); - - enqueue(queue, *this, window, lws); -} - -//////////////////////////////////////////////////////////////////////////////// -CLRadixSortReorder::CLRadixSortReorder() - : _pass(0), _in_key_buf(nullptr), _out_key_buf(nullptr), _in_ind_buf(nullptr), - _out_ind_buf(nullptr) -{ -} - -void CLRadixSortReorder::configure(cl::Buffer *hist_buf, int bits, int n) -{ - ARM_COMPUTE_ERROR_ON(hist_buf == nullptr); - ARM_COMPUTE_ERROR_ON(n == 0); - - unsigned int radix = 1 << bits; - // Set kernel build options - std::set<std::string> build_opts; - build_opts.emplace("-D_BITS=" + support::cpp11::to_string(bits)); - build_opts.emplace("-D_RADIX=" + support::cpp11::to_string(radix)); - build_opts.emplace("-DPERMUT=1"); - - // Create kernel - _kernel = static_cast<cl::Kernel>( - CLKernelLibraryEx::get().create_kernel("radixsort_reorder", build_opts)); - - unsigned int idx = 2; - _kernel.setArg(idx++, *hist_buf); - - idx = 6; - _kernel.setArg(idx++, sizeof(uint) * radix * _ITEMS, nullptr); - _kernel.setArg<cl_int>(idx++, n); - - // Configure kernel window - Window win; - win.set(0, Window::Dimension(0, _GROUPS * _ITEMS, 1)); - ICLKernel::configure_internal(win); -} - -void CLRadixSortReorder::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - const unsigned int gws_x = (window.x().end() - window.x().start()) / window.x().step(); - unsigned int lx = std::max(1U, (gws_x / _HISTOSPLIT)); - cl::NDRange lws = (lx < gws_x) ? cl::NDRange(lx, 1) : cl::NDRange(1, 1); - - _kernel.setArg(0, *_in_key_buf); - _kernel.setArg(1, *_out_key_buf); - _kernel.setArg<cl_int>(3, _pass); - _kernel.setArg(4, *_in_ind_buf); - _kernel.setArg(5, *_out_ind_buf); - - enqueue(queue, *this, window, lws); -} - -//////////////////////////////////////////////////////////////////////////////// -CLTopKV2FindFirstNegative::CLTopKV2FindFirstNegative() : _out_key_buf(nullptr) {} - -void CLTopKV2FindFirstNegative::configure(cl::Buffer *first_negative_idx_buf, int n) -{ - ARM_COMPUTE_ERROR_ON(first_negative_idx_buf == nullptr); - ARM_COMPUTE_ERROR_ON(n == 0); - - // Set kernel build options - std::set<std::string> build_opts; - - // Create kernel - _kernel = static_cast<cl::Kernel>( - CLKernelLibraryEx::get().create_kernel("topkv2_find_first_negative", build_opts)); - - unsigned int idx = 1; - _kernel.setArg(idx++, *first_negative_idx_buf); - _kernel.setArg<cl_int>(idx++, n); - - // Configure kernel window - Window win; - win.set(0, Window::Dimension(0, n, 1)); - ICLKernel::configure_internal(win); -} - -void CLTopKV2FindFirstNegative::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - unsigned int idx = 0; - _kernel.setArg(idx++, *_out_key_buf); - - enqueue(queue, *this, window); -} - -//////////////////////////////////////////////////////////////////////////////// -CLTopKV2ReorderNegatives::CLTopKV2ReorderNegatives() - : _in_key_buf(nullptr), _out_key_buf(nullptr), _in_ind_buf(nullptr), _out_ind_buf(nullptr) -{ -} - -void CLTopKV2ReorderNegatives::configure(cl::Buffer *first_negative_idx_buf, int n) -{ - ARM_COMPUTE_ERROR_ON(first_negative_idx_buf == nullptr); - ARM_COMPUTE_ERROR_ON(n == 0); - - // Set kernel build options - std::set<std::string> build_opts; - - // Create kernel - _kernel = static_cast<cl::Kernel>( - CLKernelLibraryEx::get().create_kernel("topkv2_reorder_negatives", build_opts)); - - unsigned int idx = 4; - _kernel.setArg(idx++, *first_negative_idx_buf); - _kernel.setArg<cl_int>(idx++, n); - - // Configure kernel window - Window win; - win.set(0, Window::Dimension(0, n, 1)); - ICLKernel::configure_internal(win); -} - -void CLTopKV2ReorderNegatives::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - unsigned int idx = 0; - _kernel.setArg(idx++, *_in_key_buf); - _kernel.setArg(idx++, *_out_key_buf); - _kernel.setArg(idx++, *_in_ind_buf); - _kernel.setArg(idx++, *_out_ind_buf); - - enqueue(queue, *this, window); -} - -//////////////////////////////////////////////////////////////////////////////// -CLTopKV2Store::CLTopKV2Store() - : _values(nullptr), _indices(nullptr), _out_key_buf(nullptr), _out_ind_buf(nullptr) -{ -} - -void CLTopKV2Store::configure(ICLTensor *values, ICLTensor *indices, int k, int n) -{ - ARM_COMPUTE_ERROR_ON(values == nullptr && indices == nullptr); - ARM_COMPUTE_ERROR_ON(k == 0); - ARM_COMPUTE_ERROR_ON(k > n); - - _values = values; - _indices = indices; - - // Set kernel build options - std::set<std::string> build_opts; - - // Create kernel - _kernel = - static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel("topkv2_store", build_opts)); - - unsigned int idx = 2 * num_arguments_per_1D_tensor() + 2; - _kernel.setArg<cl_int>(idx++, n); - - // Configure kernel window - Window win; - win.set(0, Window::Dimension(0, k, 1)); - ICLKernel::configure_internal(win); -} - -void CLTopKV2Store::setOutputBuffers(cl::Buffer *out_key_buf, cl::Buffer *out_ind_buf) -{ - _out_key_buf = out_key_buf; - _out_ind_buf = out_ind_buf; -} - -void CLTopKV2Store::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - unsigned int idx = 0; - add_1D_tensor_argument(idx, _values, window); - add_1D_tensor_argument(idx, _indices, window); - _kernel.setArg(idx++, *_out_key_buf); - _kernel.setArg(idx++, *_out_ind_buf); - - enqueue(queue, *this, window); -} - -} // namespace arm_compute -#endif // Disable GPU implementation diff --git a/compute/ARMComputeEx/src/core/NEON/NEElementwiseOperationFuncs.cpp b/compute/ARMComputeEx/src/core/NEON/NEElementwiseOperationFuncs.cpp deleted file mode 100644 index dfe5d59b0..000000000 --- a/compute/ARMComputeEx/src/core/NEON/NEElementwiseOperationFuncs.cpp +++ /dev/null @@ -1,167 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/core/NEON/NEElementwiseOperationFuncs.h" - -#include <algorithm> -#include "arm_compute/core/Types.h" -#include "arm_compute/core/NEON/NEAsymm.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Window.h" - -namespace -{ - -using namespace arm_compute; -template <typename InputScalarType, typename OutputScalarType, typename InputVectorType> -void elementwise_op_templ( - const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window, - OutputScalarType (*scalar_func)(const InputScalarType &, const InputScalarType &), - int (*broadcast_func)(int, int, int, const InputScalarType *, const InputScalarType &, - OutputScalarType *, const bool), - int (*neon_func)(int, int, int, const InputScalarType *, const InputScalarType *, - OutputScalarType *)) -{ - // Create input windows - Window input1_win = window.broadcast_if_dimension_le_one(in1->info()->tensor_shape()); - Window input2_win = window.broadcast_if_dimension_le_one(in2->info()->tensor_shape()); - - // Clear X Dimension on execution window as we handle manually - Window win = window; - win.set(Window::DimX, Window::Dimension(0, 1, 1)); - - const int window_step_x = std::min(16 / static_cast<int>(sizeof(OutputScalarType)), 8); - const auto window_start_x = static_cast<int>(window.x().start()); - const auto window_end_x = static_cast<int>(window.x().end()); - const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0); - - if (is_broadcast_across_x) - { - const bool is_broadcast_input_2 = input2_win.x().step() == 0; - Window broadcast_win = is_broadcast_input_2 ? input2_win : input1_win; - Window non_broadcast_win = !is_broadcast_input_2 ? input2_win : input1_win; - const ITensor *broadcast_tensor = is_broadcast_input_2 ? in2 : in1; - const ITensor *non_broadcast_tensor = !is_broadcast_input_2 ? in2 : in1; - - // Clear X Dimension on execution window as we handle manually - non_broadcast_win.set(Window::DimX, Window::Dimension(0, 1, 1)); - - Iterator broadcast_input(broadcast_tensor, broadcast_win); - Iterator non_broadcast_input(non_broadcast_tensor, non_broadcast_win); - Iterator output(out, win); - - execute_window_loop(win, - [&](const Coordinates &) { - auto output_ptr = reinterpret_cast<OutputScalarType *>(output.ptr()); - const auto non_broadcast_input_ptr = - reinterpret_cast<const InputScalarType *>(non_broadcast_input.ptr()); - const InputScalarType broadcast_value = - *reinterpret_cast<const InputScalarType *>(broadcast_input.ptr()); - - int x = (*broadcast_func)(window_start_x, window_end_x, window_step_x, - non_broadcast_input_ptr, broadcast_value, - output_ptr, !is_broadcast_input_2); - for (; x < window_end_x; ++x) - { - const auto a = *(non_broadcast_input_ptr + x); - *(output_ptr + x) = - (*scalar_func)(!is_broadcast_input_2 ? broadcast_value : a, - !is_broadcast_input_2 ? a : broadcast_value); - } - }, - broadcast_input, non_broadcast_input, output); - } - else - { - // Clear X Dimension on execution window as we handle manually - input1_win.set(Window::DimX, Window::Dimension(0, 1, 1)); - input2_win.set(Window::DimX, Window::Dimension(0, 1, 1)); - - Iterator input1(in1, input1_win); - Iterator input2(in2, input2_win); - Iterator output(out, win); - - execute_window_loop(win, - [&](const Coordinates &) { - auto output_ptr = reinterpret_cast<OutputScalarType *>(output.ptr()); - const auto input1_ptr = - reinterpret_cast<const InputScalarType *>(input1.ptr()); - const auto input2_ptr = - reinterpret_cast<const InputScalarType *>(input2.ptr()); - - int x = (*neon_func)(window_start_x, window_end_x, window_step_x, - input1_ptr, input2_ptr, output_ptr); - for (; x < window_end_x; ++x) - { - const auto a = *(input1_ptr + x); - const auto b = *(input2_ptr + x); - *(output_ptr + x) = (*scalar_func)(a, b); - } - }, - input1, input2, output); - } -} - -} // namespace - -namespace arm_compute -{ - -void elementwise_op(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window, - float (*scalar_func)(const float &, const float &), - int (*broadcast_func)(int, int, int, const float *, const float &, float *, - const bool), - int (*neon_func)(int, int, int, const float *, const float *, float *)) -{ - elementwise_op_templ<float, float, float32x4_t>(in1, in2, out, window, scalar_func, - broadcast_func, neon_func); -} - -void elementwise_op(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window, - uint8_t (*scalar_func)(const uint8_t &, const uint8_t &), - int (*broadcast_func)(int, int, int, const uint8_t *, const uint8_t &, - uint8_t *, const bool), - int (*neon_func)(int, int, int, const uint8_t *, const uint8_t *, uint8_t *)) -{ - elementwise_op_templ<uint8_t, uint8_t, uint8x16_t>(in1, in2, out, window, scalar_func, - broadcast_func, neon_func); -} -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEActivationLayerKernelEx.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEActivationLayerKernelEx.cpp deleted file mode 100644 index 648705ba9..000000000 --- a/compute/ARMComputeEx/src/core/NEON/kernels/NEActivationLayerKernelEx.cpp +++ /dev/null @@ -1,730 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/core/NEON/kernels/NEActivationLayerKernelEx.h" - -#include "arm_compute/core/CPP/Validate.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/NEAsymm.h" -#include "arm_compute/core/NEON/NEFixedPoint.h" -#include "arm_compute/core/NEON/NEMath.h" -#include "arm_compute/core/NEON/NESymm.h" -#include "arm_compute/core/NEON/wrapper/wrapper.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" - -#include <arm_neon.h> -#include <array> -#include <cmath> -#include <map> -#include <set> - -using namespace arm_compute; -namespace -{ -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, - const ActivationLayerInfo &activation_info) -{ - ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN( - input, 1, DataType::U8, DataType::QASYMM8, DataType::QSYMM16, DataType::F16, DataType::F32); - - static std::set<ActivationLayerInfo::ActivationFunction> qasymm8_supported_activations = { - ActivationLayerInfo::ActivationFunction::RELU, - ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, - ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, - ActivationLayerInfo::ActivationFunction::LOGISTIC, - ActivationLayerInfo::ActivationFunction::TANH}; - static std::set<ActivationLayerInfo::ActivationFunction> qsymm16_supported_activations = { - ActivationLayerInfo::ActivationFunction::LOGISTIC, - ActivationLayerInfo::ActivationFunction::TANH}; - const DataType data_type = input->data_type(); - const QuantizationInfo &oq_info = - (output != nullptr) ? output->quantization_info() : input->quantization_info(); - const ActivationLayerInfo::ActivationFunction f_act = activation_info.activation(); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG( - is_data_type_quantized_asymmetric(data_type) && - (qasymm8_supported_activations.count(f_act) == 0), - "For QASYMM8 only tanh, logistic, relu and lower/upper bounded relu are supported"); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_quantized_symmetric(data_type) && - (qsymm16_supported_activations.count(f_act) == 0), - "For QSYMM16 only tanh and logistic are supported"); - ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_asymmetric(data_type) && - (f_act == ActivationLayerInfo::ActivationFunction::TANH) && - (oq_info != QuantizationInfo(1.f / 128.f, 128))); - ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_asymmetric(data_type) && - (f_act == ActivationLayerInfo::ActivationFunction::LOGISTIC) && - (oq_info != QuantizationInfo(1.f / 256.f, 0))); - - ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_symmetric(data_type) && - (f_act == ActivationLayerInfo::ActivationFunction::TANH) && - (oq_info != QuantizationInfo(1.f / 32768.f, 0))); - ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_symmetric(data_type) && - (f_act == ActivationLayerInfo::ActivationFunction::LOGISTIC) && - (oq_info != QuantizationInfo(1.f / 32768.f, 0))); - - // Checks performed when output is configured - if ((output != nullptr) && (output->total_size() != 0)) - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - } - - return Status{}; -} - -std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output) -{ - // Configure kernel window - Window win = calculate_max_window(*input, Steps()); - - if (output != nullptr) - { - // Output auto inizialitation if not yet initialized - auto_init_if_empty(*output, *input->clone()); - - // NEActivationLayerKernelEx doesn't need padding so update_window_and_padding() can be skipped - Coordinates coord; - coord.set_num_dimensions(output->num_dimensions()); - output->set_valid_region(ValidRegion(coord, output->tensor_shape())); - } - - return std::make_pair(Status{}, win); -} - -inline uint32x4_t vreinterpret_unsigend_int(const float32x4_t &vec) -{ - return vreinterpretq_u32_f32(vec); -} - -inline float32x4_t vreinterpret_floating_point(const uint32x4_t &vec) -{ - return vreinterpretq_f32_u32(vec); -} - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -inline uint16x8_t vreinterpret_unsigend_int(const float16x8_t &vec) -{ - return vreinterpretq_u16_f16(vec); -} -inline float16x8_t vreinterpret_floating_point(const uint16x8_t &vec) -{ - return vreinterpretq_f16_u16(vec); -} -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC*/ -} // namespace - -NEActivationLayerKernelEx::NEActivationLayerKernelEx() - : _input(nullptr), _output(nullptr), _func(nullptr), _act_info() -{ -} - -void NEActivationLayerKernelEx::configure(ITensor *input, ITensor *output, - ActivationLayerInfo activation_info) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input); - - _input = input; - _act_info = activation_info; - _output = input; - - // Out-of-place calculation - if (output != nullptr) - { - _output = output; - } - - // Disabled activation, thus no operation needed - if (!activation_info.enabled()) - { - _func = nullptr; - } - - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments( - input->info(), (output != nullptr) ? output->info() : nullptr, activation_info)); - - // Activation functions : FP32 - static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_f32 = { - {ActivationFunction::ABS, - &NEActivationLayerKernelEx::activation<ActivationFunction::ABS, float>}, - {ActivationFunction::LINEAR, - &NEActivationLayerKernelEx::activation<ActivationFunction::LINEAR, float>}, - {ActivationFunction::LOGISTIC, - &NEActivationLayerKernelEx::activation<ActivationFunction::LOGISTIC, float>}, - {ActivationFunction::RELU, - &NEActivationLayerKernelEx::activation<ActivationFunction::RELU, float>}, - {ActivationFunction::BOUNDED_RELU, - &NEActivationLayerKernelEx::activation<ActivationFunction::BOUNDED_RELU, float>}, - {ActivationFunction::LU_BOUNDED_RELU, - &NEActivationLayerKernelEx::activation<ActivationFunction::LU_BOUNDED_RELU, float>}, - {ActivationFunction::LEAKY_RELU, - &NEActivationLayerKernelEx::activation<ActivationFunction::LEAKY_RELU, float>}, - {ActivationFunction::SOFT_RELU, - &NEActivationLayerKernelEx::activation<ActivationFunction::SOFT_RELU, float>}, - {ActivationFunction::ELU, - &NEActivationLayerKernelEx::activation<ActivationFunction::ELU, float>}, - {ActivationFunction::SQRT, - &NEActivationLayerKernelEx::activation<ActivationFunction::SQRT, float>}, - {ActivationFunction::SQUARE, - &NEActivationLayerKernelEx::activation<ActivationFunction::SQUARE, float>}, - {ActivationFunction::TANH, - &NEActivationLayerKernelEx::activation<ActivationFunction::TANH, float>}, - {ActivationFunction::IDENTITY, - &NEActivationLayerKernelEx::activation<ActivationFunction::IDENTITY, float>}, - }; - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - // Activation functions : FP16 - static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_f16 = { - {ActivationFunction::ABS, - &NEActivationLayerKernelEx::activation<ActivationFunction::ABS, float16_t>}, - {ActivationFunction::LINEAR, - &NEActivationLayerKernelEx::activation<ActivationFunction::LINEAR, float16_t>}, - {ActivationFunction::LOGISTIC, - &NEActivationLayerKernelEx::activation<ActivationFunction::LOGISTIC, float16_t>}, - {ActivationFunction::RELU, - &NEActivationLayerKernelEx::activation<ActivationFunction::RELU, float16_t>}, - {ActivationFunction::BOUNDED_RELU, - &NEActivationLayerKernelEx::activation<ActivationFunction::BOUNDED_RELU, float16_t>}, - {ActivationFunction::LU_BOUNDED_RELU, - &NEActivationLayerKernelEx::activation<ActivationFunction::LU_BOUNDED_RELU, float16_t>}, - {ActivationFunction::LEAKY_RELU, - &NEActivationLayerKernelEx::activation<ActivationFunction::LEAKY_RELU, float16_t>}, - {ActivationFunction::SOFT_RELU, - &NEActivationLayerKernelEx::activation<ActivationFunction::SOFT_RELU, float16_t>}, - {ActivationFunction::ELU, - &NEActivationLayerKernelEx::activation<ActivationFunction::ELU, float16_t>}, - {ActivationFunction::SQRT, - &NEActivationLayerKernelEx::activation<ActivationFunction::SQRT, float16_t>}, - {ActivationFunction::SQUARE, - &NEActivationLayerKernelEx::activation<ActivationFunction::SQUARE, float16_t>}, - {ActivationFunction::TANH, - &NEActivationLayerKernelEx::activation<ActivationFunction::TANH, float16_t>}, - {ActivationFunction::IDENTITY, - &NEActivationLayerKernelEx::activation<ActivationFunction::IDENTITY, float16_t>}, - }; -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC*/ - - // Activation functions : QASYMM8 - static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_qasymm8 = { - {ActivationFunction::LOGISTIC, - &NEActivationLayerKernelEx::activation<ActivationFunction::LOGISTIC, qasymm8_t>}, - {ActivationFunction::BOUNDED_RELU, - &NEActivationLayerKernelEx::activation<ActivationFunction::BOUNDED_RELU, qasymm8_t>}, - {ActivationFunction::LU_BOUNDED_RELU, - &NEActivationLayerKernelEx::activation<ActivationFunction::LU_BOUNDED_RELU, qasymm8_t>}, - {ActivationFunction::RELU, - &NEActivationLayerKernelEx::activation<ActivationFunction::RELU, qasymm8_t>}, - {ActivationFunction::TANH, - &NEActivationLayerKernelEx::activation<ActivationFunction::TANH, qasymm8_t>}, - {ActivationFunction::IDENTITY, - &NEActivationLayerKernelEx::activation<ActivationFunction::IDENTITY, qasymm8_t>}, - }; - - // Activation functions : QSYMM16 - static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_qsymm16 = { - {ActivationFunction::LOGISTIC, - &NEActivationLayerKernelEx::activation<ActivationFunction::LOGISTIC, qsymm16_t>}, - {ActivationFunction::TANH, - &NEActivationLayerKernelEx::activation<ActivationFunction::TANH, qsymm16_t>}, - }; - - switch (input->info()->data_type()) - { - case DataType::QASYMM8: - _func = act_map_qasymm8[activation_info.activation()]; - break; - case DataType::QSYMM16: - _func = act_map_qsymm16[activation_info.activation()]; - break; - case DataType::F32: - _func = act_map_f32[activation_info.activation()]; - break; -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - case DataType::F16: - _func = act_map_f16[activation_info.activation()]; - break; -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ - default: - ARM_COMPUTE_ERROR("Unsupported data type."); - } - - // Configure kernel window - auto win_config = - validate_and_configure_window(input->info(), (output != nullptr) ? output->info() : nullptr); - ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICPPKernel::configure(win_config.second); -} - -template <ActivationLayerInfo::ActivationFunction F, typename T> -typename std::enable_if<arm_compute::utils::traits::is_floating_point<T>::value, void>::type -NEActivationLayerKernelEx::activation(const Window &window) -{ - /** NEON vector tag type. */ - using ExactTagType = - typename wrapper::traits::neon_bitvector_tag_t<T, wrapper::traits::BitWidth::W128>; - - const int window_step_x = 16 / sizeof(T); - const auto window_start_x = static_cast<int>(window.x().start()); - const auto window_end_x = static_cast<int>(window.x().end()); - const ActivationFunction act = F; - - Window win_collapsed = window.collapse_if_possible(window, Window::DimZ); - win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1)); - - Iterator input(_input, win_collapsed); - Iterator output(_output, win_collapsed); - - const auto infinity = wrapper::vdup_n(std::numeric_limits<T>::infinity(), ExactTagType{}); - const auto epsilon = wrapper::vdup_n(static_cast<T>(1e-24), ExactTagType{}); - const auto const_1 = wrapper::vdup_n(static_cast<T>(1.f), ExactTagType{}); - const auto const_0 = wrapper::vdup_n(static_cast<T>(0.f), ExactTagType{}); - const auto va = wrapper::vdup_n(static_cast<T>(_act_info.a()), ExactTagType{}); - const auto vb = wrapper::vdup_n(static_cast<T>(_act_info.b()), ExactTagType{}); - const auto a = static_cast<T>(_act_info.a()); - const auto b = static_cast<T>(_act_info.b()); - - execute_window_loop( - win_collapsed, - [&](const Coordinates &) { - const auto input_ptr = reinterpret_cast<const T *>(input.ptr()); - const auto output_ptr = reinterpret_cast<T *>(output.ptr()); - - wrapper::traits::neon_bitvector_t<T, wrapper::traits::BitWidth::W128> tmp; - - // Compute S elements per iteration - int x = window_start_x; - - for (; x <= (window_end_x - window_step_x); x += window_step_x) - { - const auto vin = wrapper::vloadq(input_ptr + x); - switch (act) - { - case ActivationFunction::ABS: - tmp = wrapper::vabs(vin); - break; - case ActivationFunction::LINEAR: - tmp = wrapper::vmla(vb, va, vin); - break; - case ActivationFunction::LOGISTIC: - // exp(-vin) - tmp = wrapper::vexpq(wrapper::vneg(vin)); - - // NaN -> INF - tmp = vreinterpret_floating_point(wrapper::vorr( - wrapper::vand(wrapper::vnot(wrapper::vceq(tmp, tmp)), - vreinterpret_unsigend_int(infinity)), - wrapper::vand(wrapper::vceq(tmp, tmp), vreinterpret_unsigend_int(tmp)))); - - // 1 / 1 + tmp - tmp = wrapper::vinv(wrapper::vadd(const_1, tmp)); - break; - case ActivationFunction::RELU: - tmp = wrapper::vmax(const_0, vin); - break; - case ActivationFunction::BOUNDED_RELU: - tmp = wrapper::vmin(va, wrapper::vmax(const_0, vin)); - break; - case ActivationFunction::LU_BOUNDED_RELU: - tmp = wrapper::vmin(va, wrapper::vmax(vb, vin)); - break; - case ActivationFunction::LEAKY_RELU: - tmp = wrapper::vbsl(wrapper::vcgt(vin, const_0), vin, wrapper::vmul(va, vin)); - break; - case ActivationFunction::SOFT_RELU: - tmp = wrapper::vlog(wrapper::vadd(const_1, wrapper::vexpq(vin))); - break; - case ActivationFunction::ELU: - tmp = wrapper::vbsl(wrapper::vcge(vin, const_0), vin, - wrapper::vmul(va, wrapper::vsub(wrapper::vexpq(vin), const_1))); - break; - case ActivationFunction::SQRT: - tmp = wrapper::vinv(wrapper::vinvsqrt(vin + epsilon)); - break; - case ActivationFunction::SQUARE: - tmp = wrapper::vmul(vin, vin); - break; - case ActivationFunction::TANH: - tmp = wrapper::vmul(va, wrapper::vtanh(wrapper::vmul(vb, vin))); - break; - case ActivationFunction::IDENTITY: - tmp = vin; - break; - default: - ARM_COMPUTE_ERROR("Unsupported activation function"); - } - wrapper::vstore(output_ptr + x, tmp); - } - - // Compute left-over elements - for (; x < window_end_x; ++x) - { - const T in = *(reinterpret_cast<const T *>(input_ptr + x)); - T tmp; - switch (act) - { - case ActivationFunction::ABS: - tmp = std::abs(in); - break; - case ActivationFunction::LINEAR: - tmp = a * in + b; - break; - case ActivationFunction::LOGISTIC: - tmp = static_cast<T>(1) / (static_cast<T>(1) + std::exp(-in)); - break; - case ActivationFunction::RELU: - tmp = std::max<T>(static_cast<T>(0), in); - break; - case ActivationFunction::BOUNDED_RELU: - tmp = std::min<T>(a, std::max(static_cast<T>(0), in)); - break; - case ActivationFunction::LU_BOUNDED_RELU: - tmp = std::min<T>(a, std::max<T>(b, in)); - break; - case ActivationFunction::LEAKY_RELU: - tmp = (in > 0) ? in : a * in; - break; - case ActivationFunction::SOFT_RELU: - tmp = std::log(static_cast<T>(1) + std::exp(in)); - break; - case ActivationFunction::ELU: - tmp = (in >= 0) ? in : a * (std::exp(in) - 1); - break; - case ActivationFunction::SQRT: - tmp = std::sqrt(in); - break; - case ActivationFunction::SQUARE: - tmp = in * in; - break; - case ActivationFunction::TANH: - tmp = a * std::tanh(b * in); - break; - case ActivationFunction::IDENTITY: - tmp = in; - break; - default: - ARM_COMPUTE_ERROR("Unsupported activation function"); - } - *(output_ptr + x) = tmp; - } - }, - input, output); -} - -template <ActivationLayerInfo::ActivationFunction F, typename T> -typename std::enable_if<std::is_same<T, qasymm8_t>::value, void>::type -NEActivationLayerKernelEx::activation(const Window &window) -{ - const int window_step_x = 16 / sizeof(T); - const auto window_start_x = static_cast<int>(window.x().start()); - const auto window_end_x = static_cast<int>(window.x().end()); - const ActivationFunction act = F; - - Window win_collapsed = window.collapse_if_possible(window, Window::DimZ); - win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1)); - - Iterator input(_input, win_collapsed); - Iterator output(_output, win_collapsed); - - const UniformQuantizationInfo qi_in = _input->info()->quantization_info().uniform(); - const UniformQuantizationInfo qi_out = _output->info()->quantization_info().uniform(); - const qasymm8x16_t va = vdupq_n_u8(quantize_qasymm8(_act_info.a(), qi_in)); - const qasymm8x16_t vb = vdupq_n_u8(quantize_qasymm8(_act_info.b(), qi_in)); - const qasymm8_t a = quantize_qasymm8(_act_info.a(), qi_in); - const qasymm8_t b = quantize_qasymm8(_act_info.b(), qi_in); - const qasymm8_t const_0 = quantize_qasymm8(0.f, qi_in); - const qasymm8x16_t vconst_0 = vdupq_n_u8(const_0); - const auto vconst_1 = vdupq_n_f32(1.f); - const float32x4_t va_f32 = vdupq_n_f32(_act_info.a()); - const float32x4_t vb_f32 = vdupq_n_f32(_act_info.b()); - const float a_f32 = _act_info.a(); - const float b_f32 = _act_info.b(); - - // Initialise scale/offset for re-quantization - float s = qi_in.scale / qi_out.scale; - float o = -qi_in.offset * s + qi_out.offset; - float32x4_t vs = vdupq_n_f32(s); - float32x4_t vo = vdupq_n_f32(o); - - execute_window_loop( - win_collapsed, - [&](const Coordinates &) { - const auto input_ptr = reinterpret_cast<const T *>(input.ptr()); - const auto output_ptr = reinterpret_cast<T *>(output.ptr()); - - wrapper::traits::neon_bitvector_t<T, wrapper::traits::BitWidth::W128> tmp; - - // Compute S elements per iteration - int x = window_start_x; - for (; x <= (window_end_x - window_step_x); x += window_step_x) - { - const auto vin = wrapper::vloadq(input_ptr + x); - if (act == ActivationFunction::RELU) - { - // Perform activation - tmp = vmaxq_u8(vconst_0, vin); - // Re-quantize to new output space - tmp = vmlaq_qasymm8(tmp, vs, vo); - } - else if (act == ActivationFunction::BOUNDED_RELU) - { - // Perform activation - tmp = vminq_u8(va, vmaxq_u8(vconst_0, vin)); - // Re-quantize to new output space - tmp = vmlaq_qasymm8(tmp, vs, vo); - } - else if (act == ActivationFunction::LU_BOUNDED_RELU) - { - // Perform activation - tmp = vminq_u8(va, vmaxq_u8(vb, vin)); - // Re-quantize to new output space - tmp = vmlaq_qasymm8(tmp, vs, vo); - } - else if (act == ActivationFunction::LOGISTIC) - { - // De-quantize - const auto vin_deq = vdequantize(vin, qi_in); - // Perform activation - const float32x4x4_t tmp_dep = {{ - wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg( - vin_deq.val[0])))), - wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg( - vin_deq.val[1])))), - wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg( - vin_deq.val[2])))), - wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg( - vin_deq.val[3])))), - }}; - // Re-quantize to new output space - tmp = vquantize(tmp_dep, qi_out); - } - else if (act == ActivationFunction::TANH) - { - // De-quantize - const auto vin_deq = vdequantize(vin, qi_in); - // Perform activation - const float32x4x4_t tmp_dep = {{ - wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[0], vb_f32))), - wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[1], vb_f32))), - wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[2], vb_f32))), - wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[3], vb_f32))), - }}; - // Re-quantize to new output space - tmp = vquantize(tmp_dep, qi_out); - } - else - { - ARM_COMPUTE_ERROR("Unsupported activation function"); - } - wrapper::vstore(output_ptr + x, tmp); - } - - // Compute left-over elements - for (; x < window_end_x; ++x) - { - T in = *(reinterpret_cast<const T *>(input_ptr + x)); - T tmp; - if (act == ActivationFunction::RELU) - { - tmp = std::max(const_0, in); - tmp = std::max<int32_t>(0, std::min<int32_t>(tmp * s + o, 255)); - } - else if (act == ActivationFunction::BOUNDED_RELU) - { - tmp = std::min(a, std::max(const_0, in)); - tmp = std::max<int32_t>(0, std::min<int32_t>(tmp * s + o, 255)); - } - else if (act == ActivationFunction::LU_BOUNDED_RELU) - { - tmp = std::min(a, std::max(b, in)); - tmp = std::max<int32_t>(0, std::min<int32_t>(tmp * s + o, 255)); - } - else if (act == ActivationFunction::LOGISTIC) - { - float tmp_f = dequantize_qasymm8(in, qi_in); - tmp_f = 1.f / (1.f + std::exp(-tmp_f)); - tmp = quantize_qasymm8(tmp_f, qi_out); - } - else if (act == ActivationFunction::TANH) - { - float tmp_f = dequantize_qasymm8(in, qi_in); - tmp_f = a_f32 * std::tanh(b_f32 * tmp_f); - tmp = quantize_qasymm8(tmp_f, qi_out); - } - else - { - ARM_COMPUTE_ERROR("Unsupported activation function"); - } - *(output_ptr + x) = tmp; - } - }, - input, output); -} - -template <ActivationLayerInfo::ActivationFunction F, typename T> -typename std::enable_if<std::is_same<T, qsymm16_t>::value, void>::type -NEActivationLayerKernelEx::activation(const Window &window) -{ - const int window_step_x = 16 / sizeof(T); - const auto window_start_x = static_cast<int>(window.x().start()); - const auto window_end_x = static_cast<int>(window.x().end()); - const ActivationFunction act = F; - - Window win_collapsed = window.collapse_if_possible(window, Window::DimZ); - win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1)); - - Iterator input(_input, win_collapsed); - Iterator output(_output, win_collapsed); - - const UniformQuantizationInfo qi_in = _input->info()->quantization_info().uniform(); - const UniformQuantizationInfo qi_out = _output->info()->quantization_info().uniform(); - const auto vconst_1 = vdupq_n_f32(1.f); - const float32x4_t va_f32 = vdupq_n_f32(_act_info.a()); - const float32x4_t vb_f32 = vdupq_n_f32(_act_info.b()); - const float a_f32 = _act_info.a(); - const float b_f32 = _act_info.b(); - - execute_window_loop( - win_collapsed, - [&](const Coordinates &) { - const auto input_ptr = reinterpret_cast<const T *>(input.ptr()); - const auto output_ptr = reinterpret_cast<T *>(output.ptr()); - - wrapper::traits::neon_bitvector_t<T, wrapper::traits::BitWidth::W128> tmp; - ARM_COMPUTE_UNUSED(tmp); - - // Compute S elements per iteration - int x = window_start_x; - for (; x <= (window_end_x - window_step_x); x += window_step_x) - { - const auto vin = wrapper::vloadq(input_ptr + x); - if (act == ActivationFunction::LOGISTIC) - { - // De-quantize - const auto vin_deq = vdequantize_int16(vin, qi_in.scale); - // Perform activation - const float32x4x2_t tmp_dep = {{ - wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg( - vin_deq.val[0])))), - wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg( - vin_deq.val[1])))), - }}; - // Re-quantize to new output space - tmp = vquantize_int16(tmp_dep, qi_out.scale); - } - else if (act == ActivationFunction::TANH) - { - // De-quantize - const auto vin_deq = vdequantize_int16(vin, qi_in.scale); - // Perform activation - const float32x4x2_t tmp_dep = {{ - wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[0], vb_f32))), - wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[1], vb_f32))), - }}; - // Re-quantize to new output space - tmp = vquantize_int16(tmp_dep, qi_out.scale); - } - else - { - ARM_COMPUTE_ERROR("Unsupported activation function"); - } - wrapper::vstore(output_ptr + x, tmp); - } - - // Compute left-over elements - for (; x < window_end_x; ++x) - { - T in = *(reinterpret_cast<const T *>(input_ptr + x)); - T tmp; - if (act == ActivationFunction::LOGISTIC) - { - float tmp_f = dequantize_qsymm16(in, qi_in.scale); - tmp_f = 1.f / (1.f + std::exp(-tmp_f)); - tmp = quantize_qsymm16(tmp_f, qi_out); - } - else if (act == ActivationFunction::TANH) - { - float tmp_f = dequantize_qsymm16(in, qi_in.scale); - tmp_f = a_f32 * std::tanh(b_f32 * tmp_f); - tmp = quantize_qsymm16(tmp_f, qi_out); - } - else - { - ARM_COMPUTE_ERROR("Unsupported activation function"); - } - *(output_ptr + x) = tmp; - } - }, - input, output); -} - -Status NEActivationLayerKernelEx::validate(const ITensorInfo *input, const ITensorInfo *output, - const ActivationLayerInfo &act_info) -{ - ARM_COMPUTE_UNUSED(act_info); - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, act_info)); - ARM_COMPUTE_RETURN_ON_ERROR( - validate_and_configure_window(input->clone().get(), - (output != nullptr) ? output->clone().get() : nullptr) - .first); - - return Status{}; -} - -void NEActivationLayerKernelEx::run(const Window &window, const ThreadInfo &info) -{ - // Early exit on disabled activation - if (!_act_info.enabled()) - { - return; - } - - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - ARM_COMPUTE_ERROR_ON(_func == nullptr); - - (this->*_func)(window); -} diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEBinaryLogicalOperationKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEBinaryLogicalOperationKernel.cpp deleted file mode 100644 index 32d7d6237..000000000 --- a/compute/ARMComputeEx/src/core/NEON/kernels/NEBinaryLogicalOperationKernel.cpp +++ /dev/null @@ -1,253 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/core/NEON/kernels/NEBinaryLogicalOperationKernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/wrapper/wrapper.h" -#include "arm_compute/core/NEON/NEElementwiseOperationFuncs.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Validate.h" - -#include <algorithm> -#include <arm_neon.h> -#include <map> -#include <string> - -namespace arm_compute -{ -class Coordinates; -} // namespace arm_compute - -namespace arm_compute -{ - -template <BinaryLogicalOperation op, typename ScalarType> -inline ScalarType elementwise_logic_op_scalar(const ScalarType &a, const ScalarType &b) -{ - auto res = ScalarType(0); - - switch (op) - { - case BinaryLogicalOperation::AND: - res = a & b; - break; - case BinaryLogicalOperation::OR: - res = a | b; - break; - default: - ARM_COMPUTE_ERROR("NOT_SUPPORTED!"); - } - return res; -} - -template <BinaryLogicalOperation op, typename VectorType> -inline VectorType elementwise_logic_op(const VectorType &a, const VectorType &b) -{ - VectorType res = {0, 0, 0, 0}; - - switch (op) - { - case BinaryLogicalOperation::AND: - res = wrapper::vand(a, b); - break; - case BinaryLogicalOperation::OR: - res = wrapper::vorr(a, b); - break; - default: - ARM_COMPUTE_ERROR("NOT_SUPPORTED!"); - } - return res; -} - -template <BinaryLogicalOperation op> -inline uint8x16x4_t elementwise_logic_op(const uint8x16x4_t &a, const uint8x16x4_t &b) -{ - uint8x16x4_t out = {{ - elementwise_logic_op<op>(a.val[0], b.val[0]), elementwise_logic_op<op>(a.val[1], b.val[1]), - elementwise_logic_op<op>(a.val[2], b.val[2]), elementwise_logic_op<op>(a.val[3], b.val[3]), - }}; - return out; -} - -template <BinaryLogicalOperation op, typename ScalarType, typename VectorType> -inline VectorType elementwise_logic_op_broadcast(const VectorType &a, - const ScalarType &broadcast_value, - const bool reorder) -{ - VectorType broadcast_vector = wrapper::vdup_n(broadcast_value, wrapper::traits::vector_128_tag()); - return elementwise_logic_op<op>(reorder ? broadcast_vector : a, reorder ? a : broadcast_vector); -} - -template <BinaryLogicalOperation op, typename ScalarType, typename VectorType> -inline int elementwise_logic_op_loop(int window_start_x, int window_end_x, int window_step_x, - const ScalarType *input1_ptr, const ScalarType *input2_ptr, - ScalarType *output_ptr) -{ - int x = window_start_x; - for (; x <= (window_end_x - window_step_x); x += window_step_x) - { - const auto a = wrapper::vloadq(input1_ptr + x); - const auto b = wrapper::vloadq(input2_ptr + x); - wrapper::vstore(output_ptr + x, elementwise_logic_op<op>(a, b)); - } - return x; -} - -template <BinaryLogicalOperation op, typename ScalarType, typename VectorType> -inline int elementwise_logic_op_broadcast_loop(int window_start_x, int window_end_x, - int window_step_x, - const ScalarType *non_broadcast_input_ptr, - const ScalarType &broadcast_value, - ScalarType *output_ptr, const bool reorder) -{ - int x = window_start_x; - for (; x <= (window_end_x - window_step_x); x += window_step_x) - { - const auto a = wrapper::vloadq((non_broadcast_input_ptr + x)); - wrapper::vstore(output_ptr + x, - elementwise_logic_op_broadcast<op>(a, broadcast_value, reorder)); - } - return x; -} - -template <BinaryLogicalOperation op, typename ScalarType, typename VectorType> -void elementwise_logic_op(const ITensor *in1, const ITensor *in2, ITensor *out, - const Window &window) -{ - elementwise_op(in1, in2, out, window, &elementwise_logic_op_scalar<op, ScalarType>, - &elementwise_logic_op_broadcast_loop<op, ScalarType, VectorType>, - &elementwise_logic_op_loop<op, ScalarType, VectorType>); -} - -std::function<void(const ITensor *, const ITensor *, ITensor *, const Window &)> configure_func( - const ITensor *input1, const ITensor *input2, ITensor *output, - std::map<std::string, NEElementwiseOperationKernel::ElementwiseFunction *> map_function) -{ - std::string function_to_call("op_"); - function_to_call += string_from_data_type(input1->info()->data_type()) + "_"; - function_to_call += string_from_data_type(input2->info()->data_type()) + "_"; - function_to_call += string_from_data_type(output->info()->data_type()); - - auto it = map_function.find(function_to_call); - - if (it != map_function.end()) - { - auto func = it->second; - return [func](const ITensor *input1, const ITensor *input2, ITensor *output, - const Window &window) { func(input1, input2, output, window); }; - } - return nullptr; -} - -template <BinaryLogicalOperation op> -std::function<void(const ITensor *, const ITensor *, ITensor *, const Window &)> -configure_logic_func(const ITensor *input1, const ITensor *input2, ITensor *output) -{ - static std::map<std::string, NEElementwiseOperationKernel::ElementwiseFunction *> map_function = { - {"op_U8_U8_U8", &elementwise_logic_op<op, uint8_t, uint8x16_t>}, - {"op_QASYMM8_QASYMM8_QASYMM8", &elementwise_logic_op<op, uint8_t, uint8x16_t>}}; - - return configure_func(input1, input2, output, map_function); -} - -void NEBinaryLogicalOperationKernel::configure(BinaryLogicalOperation op, const ITensor *input1, - const ITensor *input2, ITensor *output) -{ - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*input1->info(), *input2->info(), *output->info())); - configure_common(input1, input2, output); - switch (op) - { - case BinaryLogicalOperation::AND: - _function = configure_logic_func<BinaryLogicalOperation::AND>(input1, input2, output); - break; - case BinaryLogicalOperation::OR: - _function = configure_logic_func<BinaryLogicalOperation::OR>(input1, input2, output); - break; - default: - ARM_COMPUTE_ERROR("NOT_SUPPORTED!"); - } -} - -Status NEBinaryLogicalOperationKernel::validate_arguments(const ITensorInfo &input1, - const ITensorInfo &input2, - const ITensorInfo &output) -{ - // Validate in case of configured output - if (output.total_size() > 0) - { - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&output, 1, DataType::U8, - DataType::QASYMM8); - } - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::U8, DataType::QASYMM8); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input2, 1, DataType::U8, DataType::QASYMM8); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&input1, &input2); - - const TensorShape out_shape = - TensorShape::broadcast_shape(input1.tensor_shape(), input2.tensor_shape()); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0, - "Inputs are not broadcast compatible"); - - // Validate in case of configured output - if (output.total_size() > 0) - { - ARM_COMPUTE_RETURN_ERROR_ON_MSG( - detail::have_different_dimensions(out_shape, output.tensor_shape(), 0), - "Wrong shape for output"); - } - - return Status{}; -} - -Status NEBinaryLogicalOperationKernel::validate(BinaryLogicalOperation op, - const ITensorInfo *input1, - const ITensorInfo *input2, - const ITensorInfo *output) -{ - ARM_COMPUTE_UNUSED(op); - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, output); - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(*input1, *input2, *output)); - return Status{}; -} - -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NECastBoolKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NECastBoolKernel.cpp deleted file mode 100644 index 12017e543..000000000 --- a/compute/ARMComputeEx/src/core/NEON/kernels/NECastBoolKernel.cpp +++ /dev/null @@ -1,343 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/NEON/kernels/NECastBoolKernel.h" - -#include "arm_compute/core/CPP/Validate.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/NEMath.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/misc/SaturateCast.h" - -#include "arm_compute/core/NEON/wrapper/wrapper.h" - -using namespace arm_compute; - -namespace -{ -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(output); - ARM_COMPUTE_RETURN_ERROR_ON(input == output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S8, - DataType::S16, DataType::U16, DataType::F16, - DataType::U32, DataType::S32, DataType::F32); - - // Validate in case of configured output - if (output->total_size() > 0) - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); - } - - return Status{}; -} -} // namespace - -NECastBoolKernel::NECastBoolKernel() : _input(nullptr), _output(nullptr) {} - -void NECastBoolKernel::configure(const ITensor *input, ITensor *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - - // Auto initialize output shape if not initialized (We can only auto-configure the shape, datatype - // must be given) - set_shape_if_empty(*output->info(), input->info()->tensor_shape()); - - _input = input; - _output = output; - - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info())); - - // Configure kernel window - Window win = calculate_max_window(*input->info(), Steps()); - Coordinates coord; - coord.set_num_dimensions(output->info()->num_dimensions()); - output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape())); - - ICPPKernel::configure(win); -} - -Status NECastBoolKernel::validate(const ITensorInfo *input, const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output)); - return Status{}; -} - -void NECastBoolKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - ARM_COMPUTE_ERROR_ON_NULLPTR(_input, _output); - ARM_COMPUTE_ERROR_ON(_input == _output); - - const auto window_start_x = static_cast<int>(window.x().start()); - const auto window_end_x = static_cast<int>(window.x().end()); - const int window_step_x = 16; - - Window win{window}; - win.set(Window::DimX, Window::Dimension(0, 1, 1)); - - Iterator input(_input, win); - Iterator output(_output, win); - - const uint8_t true_val = 1; - const uint8x8_t mask_bool = vdup_n_u8(true_val); - - switch (_output->info()->data_type()) - { - case DataType::S8: - { - /* Conversion U8 -> S8 */ - execute_window_loop(win, - [&](const Coordinates &) { - const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr()); - const auto output_ptr = reinterpret_cast<int8_t *>(output.ptr()); - - int x = window_start_x; - for (; x <= (window_end_x - window_step_x); x += window_step_x) - { - const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x); - - vst1q_s8(output_ptr + x, vreinterpretq_s8_u8(vandq_u8( - texels_u8, vdupq_n_u8(true_val)))); - } - - // Compute left-over elements - for (; x < window_end_x; ++x) - { - *(output_ptr + x) = static_cast<int8_t>(*(input_ptr + x) & true_val); - } - }, - input, output); - break; - } - case DataType::S16: - { - /* Up-conversion U8 -> S16 */ - execute_window_loop( - win, - [&](const Coordinates &) { - const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr()); - const auto output_ptr = reinterpret_cast<int16_t *>(output.ptr()); - - int x = window_start_x; - for (; x <= (window_end_x - window_step_x); x += window_step_x) - { - const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x); - - const int16x8x2_t texels = { - {vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_low_u8(texels_u8), mask_bool))), - vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_high_u8(texels_u8), mask_bool)))}}; - - vst1q_s16(output_ptr + x, texels.val[0]); - vst1q_s16(output_ptr + x + 8, texels.val[1]); - } - - // Compute left-over elements - for (; x < window_end_x; ++x) - { - *(output_ptr + x) = static_cast<int32_t>(*(input_ptr + x) & true_val); - } - }, - input, output); - break; - } - case DataType::S32: - { - /* Up-conversion U8 -> S32 */ - execute_window_loop( - win, - [&](const Coordinates &) { - const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr()); - const auto output_ptr = reinterpret_cast<int32_t *>(output.ptr()); - - int x = window_start_x; - for (; x <= (window_end_x - window_step_x); x += window_step_x) - { - const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x); - - const int16x8x2_t texels = { - {vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_low_u8(texels_u8), mask_bool))), - vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_high_u8(texels_u8), mask_bool)))}}; - - vst1q_s32(output_ptr + x, vmovl_s16(vget_low_s16(texels.val[0]))); - vst1q_s32(output_ptr + x + 4, vmovl_s16(vget_high_s16(texels.val[0]))); - vst1q_s32(output_ptr + x + 8, vmovl_s16(vget_low_s16(texels.val[1]))); - vst1q_s32(output_ptr + x + 12, vmovl_s16(vget_high_s16(texels.val[1]))); - } - - // Compute left-over elements - for (; x < window_end_x; ++x) - { - *(output_ptr + x) = static_cast<uint32_t>(*(input_ptr + x) & true_val); - } - }, - input, output); - break; - } - case DataType::F32: - { - /* Up-conversion U8 -> F32 */ - execute_window_loop( - win, - [&](const Coordinates &) { - const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr()); - const auto output_ptr = reinterpret_cast<float *>(output.ptr()); - - int x = window_start_x; - for (; x <= (window_end_x - window_step_x); x += window_step_x) - { - const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x); - - const int16x8x2_t texels = { - {vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_low_u8(texels_u8), mask_bool))), - vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_high_u8(texels_u8), mask_bool)))}}; - vst1q_f32(output_ptr + x, vcvtq_f32_s32(vmovl_s16(vget_low_s16(texels.val[0])))); - vst1q_f32(output_ptr + x + 4, vcvtq_f32_s32(vmovl_s16(vget_high_s16(texels.val[0])))); - vst1q_f32(output_ptr + x + 8, vcvtq_f32_s32(vmovl_s16(vget_low_s16(texels.val[1])))); - vst1q_f32(output_ptr + x + 12, - vcvtq_f32_s32(vmovl_s16(vget_high_s16(texels.val[1])))); - } - - // Compute left-over elements - for (; x < window_end_x; ++x) - { - auto in = static_cast<uint32_t>(*(input_ptr + x) & true_val); - *(output_ptr + x) = static_cast<float>(in); - } - }, - input, output); - break; - } -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - case DataType::F16: - { - /* Up-conversion U8 -> F16 */ - execute_window_loop( - win, - [&](const Coordinates &) { - const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr()); - const auto output_ptr = reinterpret_cast<float16_t *>(output.ptr()); - - int x = window_start_x; - for (; x <= (window_end_x - window_step_x); x += window_step_x) - { - const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x); - - const int16x8x2_t texels = { - {vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_low_u8(texels_u8), mask_bool))), - vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_high_u8(texels_u8), mask_bool)))}}; - vst1q_f16(output_ptr + x, vcvtq_f16_s16(texels.val[0])); - vst1q_f16(output_ptr + x + 8, vcvtq_f16_s16(texels.val[1])); - } - - // Compute left-over elements - for (; x < window_end_x; ++x) - { - *(output_ptr + x) = static_cast<float16_t>(*(input_ptr + x) & true_val); - } - }, - input, output); - break; - } -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - case DataType::U8: - { - /* Conversion U8 -> S8 */ - execute_window_loop(win, - [&](const Coordinates &) { - const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr()); - const auto output_ptr = reinterpret_cast<uint8_t *>(output.ptr()); - - int x = window_start_x; - for (; x <= (window_end_x - window_step_x); x += window_step_x) - { - const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x); - - vst1q_u8(output_ptr + x, vandq_u8(texels_u8, vdupq_n_u8(true_val))); - } - - // Compute left-over elements - for (; x < window_end_x; ++x) - { - *(output_ptr + x) = static_cast<uint8_t>(*(input_ptr + x) & true_val); - } - }, - input, output); - break; - } - case DataType::U16: - { - /* Up-conversion U8 -> U16 */ - execute_window_loop( - win, - [&](const Coordinates &) { - const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr()); - const auto output_ptr = reinterpret_cast<uint16_t *>(output.ptr()); - - int x = window_start_x; - for (; x <= (window_end_x - window_step_x); x += window_step_x) - { - const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x); - - const uint16x8x2_t texels = {{vmovl_u8(vand_u8(vget_low_u8(texels_u8), mask_bool)), - vmovl_u8(vand_u8(vget_high_u8(texels_u8), mask_bool))}}; - - vst1q_u16(output_ptr + x, texels.val[0]); - vst1q_u16(output_ptr + x + 8, texels.val[1]); - } - - // Compute left-over elements - for (; x < window_end_x; ++x) - { - *(output_ptr + x) = static_cast<uint16_t>(*(input_ptr + x) & true_val); - } - }, - input, output); - break; - } - default: - ARM_COMPUTE_ERROR("Output data type not supported"); - } -} diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEEmbeddingLookupKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEEmbeddingLookupKernel.cpp deleted file mode 100644 index 091d38c56..000000000 --- a/compute/ARMComputeEx/src/core/NEON/kernels/NEEmbeddingLookupKernel.cpp +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/core/NEON/kernels/NEEmbeddingLookupKernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" - -using namespace arm_compute; - -NEEmbeddingLookupKernel::NEEmbeddingLookupKernel() - : _input(nullptr), _lookups(nullptr), _output(nullptr) -{ -} - -void NEEmbeddingLookupKernel::configure(const ITensor *input, ITensor *output, - const ITensor *lookups) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), output->info(), lookups->info())); - - _input = input; - _output = output; - _lookups = lookups; - - // Auto initialize output if not initialized - auto out_shape = input->info()->tensor_shape(); - out_shape.set(out_shape.num_dimensions() - 1, lookups->info()->num_dimensions()); - auto_init_if_empty(*output->info(), out_shape, 1, input->info()->data_type(), - input->info()->quantization_info()); - - INEKernel::configure(calculate_max_window(*output->info())); -} - -Status NEEmbeddingLookupKernel::validate(const arm_compute::ITensorInfo *input, - const arm_compute::ITensorInfo *output, - const arm_compute::ITensorInfo *lookups) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, lookups); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN( - input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16, - DataType::U32, DataType::S32, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lookups, 1, DataType::S32); - - ARM_COMPUTE_ERROR_ON(input->num_dimensions() < 2 && input->num_dimensions() > 4); - ARM_COMPUTE_ERROR_ON(lookups->num_dimensions() > 1); - - // Validate in case of configured output - if (output->total_size() > 0) - { - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_ERROR_ON(input->num_dimensions() != output->num_dimensions()); - ARM_COMPUTE_ERROR_ON(output->dimension(output->num_dimensions() - 1) != lookups->dimension(0)); - for (size_t i = 0; i < output->num_dimensions() - 1; ++i) - { - ARM_COMPUTE_ERROR_ON(input->dimension(i) != output->dimension(i)); - } - } - - return Status{}; -} - -void NEEmbeddingLookupKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - - const size_t lookup_dim = _output->info()->num_dimensions() - 1; - - Window output_window{window}; - output_window.set(Window::DimX, - Window::Dimension(output_window.x().start(), output_window.x().end(), - _input->info()->dimension(0))); - - Window out_slice = output_window.first_slice_window_4D(); - do - { - Iterator output_it(_output, out_slice); - - execute_window_loop(out_slice, - [&](const Coordinates &id) { - const int32_t lookup = *reinterpret_cast<int32_t *>( - _lookups->ptr_to_element(Coordinates{id[lookup_dim]})); - Coordinates input_id{id}; - input_id.set(lookup_dim, lookup); - memcpy(output_it.ptr(), _input->ptr_to_element(input_id), - _output->info()->dimension(0) * _output->info()->element_size()); - }, - output_it); - - } while (window.slide_window_slice_4D(out_slice)); -} diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEGatherKernelEx.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEGatherKernelEx.cpp deleted file mode 100644 index 93963a504..000000000 --- a/compute/ARMComputeEx/src/core/NEON/kernels/NEGatherKernelEx.cpp +++ /dev/null @@ -1,272 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/core/NEON/kernels/NEGatherKernelEx.h" - -#include "arm_compute/core/CPP/Validate.h" -#include "arm_compute/core/Coordinates.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/IAccessWindow.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" -#include "arm_compute/core/utils/misc/ShapeCalculatorEx.h" - -namespace arm_compute -{ -namespace -{ -/** Validate the indices - * - * Validate that indices are not negative - * - * @param[in] indices Indices tensor info. - */ -template <typename U> void validate_indices(const ITensor *indices) -{ - for (size_t i = 0; i < indices->info()->tensor_shape()[0]; ++i) - { - ARM_COMPUTE_ERROR_ON(*(reinterpret_cast<U *>(indices->ptr_to_element(Coordinates(i)))) < 0); - } -} - -} // namespace - -NEGatherKernelEx::NEGatherKernelEx() - : _input{}, _indices{}, _axis{}, _indices_rank{}, _output{}, _func{} -{ -} - -template <typename U> -inline void NEGatherKernelEx::gather_0_axis(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - - // Validate that the indices are not negative - validate_indices<U>(_indices); - - Iterator output_it(_output, window); - execute_window_loop( - window, - [&](const Coordinates &id) { - Coordinates gather_id(id); - gather_id.collapse(_indices_rank); - - U new_index; - switch (_indices_rank) - { - case 1: - new_index = *(reinterpret_cast<U *>(_indices->ptr_to_element(Coordinates(id[0])))); - break; - case 2: - new_index = - *(reinterpret_cast<U *>(_indices->ptr_to_element(Coordinates(id[0], id[1])))); - break; - case 3: - new_index = *( - reinterpret_cast<U *>(_indices->ptr_to_element(Coordinates(id[0], id[1], id[2])))); - break; - default: - ARM_COMPUTE_ERROR("Wrong num of dimensions"); - break; - } - - gather_id.set(0, new_index); - - std::copy_n(_input->ptr_to_element(gather_id), _output->info()->element_size(), - output_it.ptr()); - }, - output_it); -} - -template <typename U> -void NEGatherKernelEx::gather_n_axis(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - - // Validate that the indices are not negative - validate_indices<U>(_indices); - - Window output_window{window}; - output_window.set(Window::DimX, Window::Dimension(0, 1, 1)); - - Iterator output_it(_output, output_window); - execute_window_loop( - output_window, - [&](const Coordinates &id) { - Coordinates gather_id(id); - gather_id.collapse(_indices_rank, _axis); - - U new_index; - switch (_indices_rank) - { - case 1: - new_index = *(reinterpret_cast<U *>(_indices->ptr_to_element(Coordinates(id[_axis])))); - break; - case 2: - new_index = *(reinterpret_cast<U *>( - _indices->ptr_to_element(Coordinates(id[_axis], id[_axis + 1])))); - break; - case 3: - new_index = *(reinterpret_cast<U *>( - _indices->ptr_to_element(Coordinates(id[_axis], id[_axis + 1], id[_axis + 2])))); - break; - default: - ARM_COMPUTE_ERROR("Wrong num of dimensions"); - break; - } - - gather_id.set(_axis, new_index); - - std::copy_n(_input->ptr_to_element(gather_id), - _input->info()->dimension(0) * _output->info()->element_size(), - output_it.ptr()); - }, - output_it); -} - -void NEGatherKernelEx::configure(const ITensor *input, const ITensor *indices, ITensor *output, - int axis) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, indices); - ARM_COMPUTE_ERROR_ON(indices->info()->num_dimensions() > 3); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(indices, 1, DataType::U32, DataType::S32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN( - input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16, - DataType::U32, DataType::S32, DataType::F16, DataType::F32); - - _input = input; - _indices = indices; - _output = output; - _axis = axis; - _indices_rank = indices->info()->num_dimensions(); - - if (_axis < 0) - { - _axis += input->info()->num_dimensions(); - } - ARM_COMPUTE_ERROR_ON(0 > _axis || _axis >= static_cast<int32_t>(input->info()->num_dimensions())); - - if (0 == _axis) - { - switch (_indices->info()->data_type()) - { - case DataType::U32: - _func = &NEGatherKernelEx::gather_0_axis<uint32_t>; - break; - case DataType::S32: - _func = &NEGatherKernelEx::gather_0_axis<int32_t>; - break; - default: - ARM_COMPUTE_ERROR("Not supported"); - break; - } - } - else - { - switch (_indices->info()->data_type()) - { - case DataType::U32: - _func = &NEGatherKernelEx::gather_n_axis<uint32_t>; - break; - case DataType::S32: - _func = &NEGatherKernelEx::gather_n_axis<int32_t>; - break; - default: - ARM_COMPUTE_ERROR("Not supported"); - break; - } - } - // Output auto initialization if not yet initialized - TensorShape output_shape = arm_compute::misc::shape_calculator::compute_gather_shape_ex( - input->info()->tensor_shape(), indices->info()->tensor_shape(), _axis); - auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type()); - - // Create window - Window win = calculate_max_window(*output->info(), Steps()); - output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape())); - - INEKernel::configure(win); -} - -Status NEGatherKernelEx::validate(const ITensorInfo *input, const ITensorInfo *indices, - const ITensorInfo *output, int axis) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, indices, output); - ARM_COMPUTE_RETURN_ERROR_ON(indices->num_dimensions() > 3); - ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4); - ARM_COMPUTE_ERROR_ON(input->num_dimensions() + indices->num_dimensions() - 1 > 4); - - if (axis < 0) - { - axis += input->num_dimensions(); - } - - ARM_COMPUTE_RETURN_ERROR_ON(0 > axis || axis >= static_cast<int32_t>(input->num_dimensions())); - ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN( - input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16, - DataType::U32, DataType::S32, DataType::F16, DataType::F32); - - if (output->total_size() != 0) - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output); - TensorShape output_shape = arm_compute::misc::shape_calculator::compute_gather_shape_ex( - input->tensor_shape(), indices->tensor_shape(), axis); - ARM_COMPUTE_RETURN_ERROR_ON(output_shape.total_size() != output->tensor_shape().total_size()); - } - - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(indices, 1, DataType::U32, DataType::S32); - - return Status{}; -} - -void NEGatherKernelEx::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON(_func == nullptr); - - (this->*_func)(window, info); -} - -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEHashtableLookupKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEHashtableLookupKernel.cpp deleted file mode 100644 index 30787c0a4..000000000 --- a/compute/ARMComputeEx/src/core/NEON/kernels/NEHashtableLookupKernel.cpp +++ /dev/null @@ -1,197 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/core/NEON/kernels/NEHashtableLookupKernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" - -#include <unordered_map> - -using namespace arm_compute; - -namespace -{ -constexpr size_t NOT_HIT = 0xFFFFFFFF; -} // namespace - -NEHashtableLookupKernel::NEHashtableLookupKernel() - : _lookups(nullptr), _keys(nullptr), _input(nullptr), _output(nullptr), _hits{nullptr} -{ -} - -void NEHashtableLookupKernel::configure(const ITensor *lookups, const ITensor *keys, - const ITensor *input, ITensor *output, ITensor *hits) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(lookups, keys, input, output, hits); - ARM_COMPUTE_ERROR_THROW_ON( - validate(lookups->info(), keys->info(), input->info(), output->info(), hits->info())); - - _lookups = lookups; - _keys = keys; - _input = input; - _output = output; - _hits = hits; - - // Auto initialize output if not initialized - auto out_shape{input->info()->tensor_shape()}; - out_shape.set(out_shape.num_dimensions() - 1, lookups->info()->num_dimensions(), false); - auto_init_if_empty(*output->info(), out_shape, 1, input->info()->data_type(), - input->info()->quantization_info()); - - // Auto initialize hits if not initialized - auto_init_if_empty(*hits->info(), lookups->info()->tensor_shape(), 1, DataType::U8); - - INEKernel::configure(calculate_max_window(*output->info())); -} - -Status NEHashtableLookupKernel::validate(const ITensorInfo *lookups, const ITensorInfo *keys, - const ITensorInfo *input, const ITensorInfo *output, - const ITensorInfo *hits) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(lookups, keys, input, output, hits); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN( - input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16, - DataType::U32, DataType::S32, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lookups, 1, DataType::S32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(keys, 1, DataType::S32); - - ARM_COMPUTE_ERROR_ON(input->num_dimensions() < 2 && input->num_dimensions() > 4); - ARM_COMPUTE_ERROR_ON(lookups->num_dimensions() > 1); - ARM_COMPUTE_ERROR_ON(keys->num_dimensions() > 1); - ARM_COMPUTE_ERROR_ON(keys->dimension(0) != input->dimension(input->num_dimensions() - 1)); - - // Validate in case of configured output - if (output->total_size() > 0) - { - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_ERROR_ON(input->num_dimensions() != output->num_dimensions()); - ARM_COMPUTE_ERROR_ON(output->dimension(output->num_dimensions() - 1) != lookups->dimension(0)); - for (size_t i = 0; i < output->num_dimensions() - 1; ++i) - { - ARM_COMPUTE_ERROR_ON(input->dimension(i) != output->dimension(i)); - } - } - - // Validate in case of configured hits - if (hits->total_size() > 0) - { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(hits, 1, DataType::U8, DataType::QASYMM8); - ARM_COMPUTE_ERROR_ON(hits->dimension(0) != output->dimension(output->num_dimensions() - 1)); - ARM_COMPUTE_ERROR_ON(hits->dimension(0) != lookups->dimension(0)); - ARM_COMPUTE_ERROR_ON(hits->num_dimensions() > 1); - } - - return Status{}; -} - -void NEHashtableLookupKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - - const size_t lookup_dim = _output->info()->num_dimensions() - 1; - const int const_0 = _output->info()->data_type() == DataType::QASYMM8 - ? _output->info()->quantization_info().uniform().offset - : 0; - - std::unordered_map<int32_t, size_t> key_index_map; - for (size_t n = 0; n < _keys->info()->dimension(0); ++n) - { - const int32_t key = *reinterpret_cast<int32_t *>(_keys->ptr_to_element({n})); - key_index_map[key] = n; - } - std::vector<size_t> lookup_indices; - for (size_t k = 0; k < _lookups->info()->dimension(0); ++k) - { - const int32_t key = *reinterpret_cast<int32_t *>(_lookups->ptr_to_element({k})); - const auto it = key_index_map.find(key); - if (it == key_index_map.end()) - { - lookup_indices.emplace_back(NOT_HIT); - *_hits->ptr_to_element({k}) = 0; - } - else - { -#if defined(ARM_COMPUTE_DEBUG_ENABLED) - if (it->second >= _keys->info()->dimension(0)) - ARM_COMPUTE_ERROR("HashTable Lookup: Index out of bounds."); -#endif // defined(ARM_COMPUTE_DEBUG_ENABLED) - lookup_indices.emplace_back(it->second); - *_hits->ptr_to_element({k}) = 1; - } - } - - Window output_window{window}; - output_window.set(Window::DimX, - Window::Dimension(output_window.x().start(), output_window.x().end(), - _input->info()->dimension(0))); - - Window out_slice = output_window.first_slice_window_4D(); - do - { - Iterator output_it(_output, out_slice); - - execute_window_loop(out_slice, - [&](const Coordinates &id) { - const auto lookup = lookup_indices.at(id[lookup_dim]); - if (lookup == NOT_HIT) - { - memset(output_it.ptr(), const_0, - _output->info()->dimension(0) * _output->info()->element_size()); - } - else - { - Coordinates input_id{id}; - input_id.set(lookup_dim, lookup); - memcpy(output_it.ptr(), _input->ptr_to_element(input_id), - _output->info()->dimension(0) * _output->info()->element_size()); - } - - }, - output_it); - - } while (window.slide_window_slice_4D(out_slice)); -} diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.cpp deleted file mode 100644 index 49adf1462..000000000 --- a/compute/ARMComputeEx/src/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.cpp +++ /dev/null @@ -1,296 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.h" - -#include "arm_compute/core/CPP/Validate.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/NEMath.h" -#include "arm_compute/core/NEON/wrapper/wrapper.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" - -#include <arm_neon.h> - -namespace arm_compute -{ -namespace -{ -template <typename T> -void instance_normalization_nchw(ITensor *input, ITensor *output, ITensor *gamma, ITensor *beta, - float epsilon, const Window &window) -{ - /** NEON vector tag type. */ - using ExactTagType = - typename wrapper::traits::neon_bitvector_tag_t<T, wrapper::traits::BitWidth::W128>; - - // Clear X/Y dimensions on execution window as we handle the planes manually - Window win = window; - win.set(Window::DimX, Window::Dimension(0, 1, 1)); - win.set(Window::DimY, Window::Dimension(0, 1, 1)); - - constexpr int window_step_x = 16 / sizeof(T); - const unsigned int elements_plane = input->info()->dimension(0) * output->info()->dimension(1); - const auto channel_idx = - get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::CHANNEL); - - Iterator input_it(input, win); - execute_window_loop( - win, - [&](const Coordinates &id) { - Window win_plane = window; - win_plane.set(Window::DimX, Window::Dimension(0, 1, 1)); - win_plane.set(Window::DimZ, Window::Dimension(id[2], id[2] + 1, 1)); - win_plane.set(3, Window::Dimension(id[3], id[3] + 1, 1)); - - Iterator input_plane_it(input, win_plane); - Iterator output_plane_it(output, win_plane); - - auto sum_h_w = static_cast<T>(0.f); - auto sum_squares_h_w = static_cast<T>(0.f); - - execute_window_loop( - win_plane, - [&](const Coordinates &) { - const auto input_ptr = reinterpret_cast<const T *>(input_plane_it.ptr()); - - auto vec_sum_h_w = wrapper::vdup_n(static_cast<T>(0.f), ExactTagType{}); - auto vec_sum_squares_h_w = wrapper::vdup_n(static_cast<T>(0.f), ExactTagType{}); - - // Compute S elements per iteration - int x = window.x().start(); - for (; x <= (window.x().end() - window_step_x); x += window_step_x) - { - auto vec_input_val = wrapper::vloadq(input_ptr + x); - vec_sum_h_w = wrapper::vadd(vec_sum_h_w, vec_input_val); - vec_sum_squares_h_w = - wrapper::vadd(vec_sum_squares_h_w, wrapper::vmul(vec_input_val, vec_input_val)); - } - - auto vec2_sum_h_w = - wrapper::vpadd(wrapper::vgethigh(vec_sum_h_w), wrapper::vgetlow(vec_sum_h_w)); - auto vec2_sum_squares_h_w = wrapper::vpadd(wrapper::vgethigh(vec_sum_squares_h_w), - wrapper::vgetlow(vec_sum_squares_h_w)); - for (int i = 0; i < window_step_x / 4; ++i) - { - vec2_sum_h_w = wrapper::vpadd(vec2_sum_h_w, vec2_sum_h_w); - vec2_sum_squares_h_w = wrapper::vpadd(vec2_sum_squares_h_w, vec2_sum_squares_h_w); - } - sum_h_w += wrapper::vgetlane(vec2_sum_h_w, 0); - sum_squares_h_w += wrapper::vgetlane(vec2_sum_squares_h_w, 0); - - // Compute left-over elements - for (; x < window.x().end(); ++x) - { - const auto value = *(input_ptr + x); - sum_h_w += value; - sum_squares_h_w += value * value; - } - }, - input_plane_it, output_plane_it); - - const auto mean_h_w = sum_h_w / elements_plane; - const auto var_h_w = sum_squares_h_w / elements_plane - mean_h_w * mean_h_w; - - auto gamma_val = 1.0f; - if (gamma != nullptr) - { - gamma_val = *reinterpret_cast<T *>(gamma->ptr_to_element({id[channel_idx]})); - } - const auto multip_h_w = gamma_val / std::sqrt(var_h_w + epsilon); - const auto vec_mean_h_w = wrapper::vdup_n(static_cast<T>(mean_h_w), ExactTagType{}); - const auto vec_multip_h_w = wrapper::vdup_n(static_cast<T>(multip_h_w), ExactTagType{}); - auto beta_val = 0.0f; - if (beta != nullptr) - { - beta_val = *reinterpret_cast<T *>(beta->ptr_to_element({id[channel_idx]})); - } - const auto vec_beta = wrapper::vdup_n(static_cast<T>(beta_val), ExactTagType{}); - - execute_window_loop( - win_plane, - [&](const Coordinates &) { - auto input_ptr = reinterpret_cast<T *>(input_plane_it.ptr()); - auto output_ptr = reinterpret_cast<T *>(output_plane_it.ptr()); - - // Compute S elements per iteration - int x = window.x().start(); - auto vec_val = wrapper::vdup_n(static_cast<T>(0.0f), ExactTagType{}); - for (; x <= (window.x().end() - window_step_x); x += window_step_x) - { - vec_val = wrapper::vloadq(input_ptr + x); - vec_val = wrapper::vadd( - wrapper::vmul(wrapper::vsub(vec_val, vec_mean_h_w), vec_multip_h_w), vec_beta); - wrapper::vstore(output_ptr + x, vec_val); - } - - // Compute left-over elements - for (; x < window.x().end(); ++x) - { - *(output_ptr + x) = ((*(input_ptr + x)) - mean_h_w) * multip_h_w + beta_val; - } - }, - input_plane_it, output_plane_it); - }, - input_it); -} - -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, - const ITensorInfo *gamma, const ITensorInfo *beta, float epsilon) -{ - ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(epsilon == 0.f, "Epsilon must be different than 0"); - - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(input, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_layout() == DataLayout::NHWC, - "NHWC data layout is not supported by the kernel directly"); - - if (output != nullptr && output->total_size() != 0) - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->num_channels() != output->num_channels(), - "Input and output have different number of channels"); - } - - if (gamma != nullptr) - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, gamma); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->dimension(get_data_layout_dimension_index( - input->data_layout(), DataLayoutDimension::CHANNEL)) != - gamma->dimension(0), - "Gamma's size must be the same as size of input's channel"); - } - - if (beta != nullptr) - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, beta); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->dimension(get_data_layout_dimension_index( - input->data_layout(), DataLayoutDimension::CHANNEL)) != - beta->dimension(0), - "Beta's size must be the same as size of input's channel"); - } - - return Status{}; -} - -std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output) -{ - // We handle the planes manually - Window win = calculate_max_window(*input, Steps(1)); - - // Output auto initialization if not yet initialized - auto_init_if_empty(*output, input->tensor_shape(), 1, input->data_type()); - - // NEInstanceNormalizationLayerKernelEx doesn't need padding so update_window_and_padding() can be - // skipped - Coordinates coord; - coord.set_num_dimensions(output->num_dimensions()); - output->set_valid_region(ValidRegion(coord, output->tensor_shape())); - return std::make_pair(Status{}, win); -} -} // namespace - -NEInstanceNormalizationLayerKernelEx::NEInstanceNormalizationLayerKernelEx() - : _func(nullptr), _input(nullptr), _output(nullptr), _gamma(nullptr), _beta(nullptr), - _epsilon(1e-12) -{ -} - -void NEInstanceNormalizationLayerKernelEx::configure(ITensor *input, ITensor *output, - ITensor *gamma, ITensor *beta, float epsilon) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input); - - _input = input; - _output = output == nullptr ? input : output; - _gamma = gamma; - _beta = beta; - _epsilon = epsilon; - - ARM_COMPUTE_ERROR_THROW_ON( - validate_arguments(_input->info(), _output->info(), gamma->info(), beta->info(), epsilon)); - - if (_input->info()->data_type() == DataType::F32) - { - _func = &instance_normalization_nchw<float>; - } -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - else if (_input->info()->data_type() == DataType::F16) - { - _func = &instance_normalization_nchw<float16_t>; - } -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - else - { - ARM_COMPUTE_ERROR("Unsupported data type"); - } - - // Configure kernel window - auto win_config = validate_and_configure_window(_input->info(), _output->info()); - ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config)); - - INEKernel::configure(std::get<1>(win_config)); -} - -Status NEInstanceNormalizationLayerKernelEx::validate(const ITensorInfo *input, - const ITensorInfo *output, - const ITensorInfo *gamma, - const ITensorInfo *beta, float epsilon) -{ - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, gamma, beta, epsilon)); - ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>(validate_and_configure_window( - input->clone().get(), (output == nullptr ? input->clone().get() : output->clone().get())))); - return Status{}; -} - -void NEInstanceNormalizationLayerKernelEx::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - (*_func)(_input, _output, _gamma, _beta, _epsilon, window); -} -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEMultiplyScaleFactorKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEMultiplyScaleFactorKernel.cpp deleted file mode 100644 index b92130cec..000000000 --- a/compute/ARMComputeEx/src/core/NEON/kernels/NEMultiplyScaleFactorKernel.cpp +++ /dev/null @@ -1,223 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/core/NEON/kernels/NEMuliplyScaleFactorKernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/NEON/NEAsymm.h" -#include "arm_compute/core/NEON/wrapper/wrapper.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" - -#include "arm_compute/core/CPP/Validate.h" - -#include <arm_neon.h> - -using namespace arm_compute; - -namespace -{ -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *scale_factor, - const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 2); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S32); - ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(output); - ARM_COMPUTE_RETURN_ERROR_ON(output->tensor_shape().total_size() == 0); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(scale_factor, 1, DataType::F16, - DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON(scale_factor->tensor_shape().total_size() == 0); - ARM_COMPUTE_RETURN_ERROR_ON(scale_factor->num_dimensions() > 1); - ARM_COMPUTE_RETURN_ERROR_ON(scale_factor->dimension(0) != input->dimension(1)); - - // Checks performed when output is configured - if ((output->total_size() != 0)) - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); - } - - return Status{}; -} - -inline int32x4x4_t load_value(const int32_t *input_ptr) -{ - return {wrapper::vloadq(input_ptr), wrapper::vloadq(input_ptr + 4), - wrapper::vloadq(input_ptr + 8), wrapper::vloadq(input_ptr + 12)}; -} - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -inline const float32x4x4_t load_value(const float16_t *input_ptr) -{ - return {vcvt_f32_f16(wrapper::vload(input_ptr)), vcvt_f32_f16(wrapper::vload(input_ptr + 4)), - vcvt_f32_f16(wrapper::vload(input_ptr + 8)), - vcvt_f32_f16(wrapper::vload(input_ptr + 12))}; -} - -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -template <typename T> inline void store_result(T *ptr, const float32x4x4_t &v) -{ - ARM_COMPUTE_UNUSED(ptr, v); -} - -template <> inline void store_result<float>(float *ptr, const float32x4x4_t &v) -{ - wrapper::vstore(ptr, v.val[0]); - wrapper::vstore(ptr + 4, v.val[1]); - wrapper::vstore(ptr + 8, v.val[2]); - wrapper::vstore(ptr + 12, v.val[3]); -} - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -template <> inline void store_result<float16_t>(float16_t *ptr, const float32x4x4_t &v) -{ - wrapper::vstore(ptr, vcombine_f16(vcvt_f16_f32(v.val[0]), vcvt_f16_f32(v.val[1]))); - wrapper::vstore(ptr + 8, vcombine_f16(vcvt_f16_f32(v.val[2]), vcvt_f16_f32(v.val[3]))); -} -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ - -inline float32x4x4_t multiply_scale_vec(const int32x4x4_t &iv, float scale) -{ - const float32x4_t vscale = vdupq_n_f32(scale); - - const float32x4x4_t ret = {{ - vmulq_f32(vcvtq_f32_s32(iv.val[0]), vscale), vmulq_f32(vcvtq_f32_s32(iv.val[1]), vscale), - vmulq_f32(vcvtq_f32_s32(iv.val[2]), vscale), vmulq_f32(vcvtq_f32_s32(iv.val[3]), vscale), - }}; - return ret; -} -} // namespace - -NEMultiplyScaleFactorKernel::NEMultiplyScaleFactorKernel() - : _input(nullptr), _scale_factor(nullptr), _output(nullptr), _multiplier(1.f) -{ -} - -void NEMultiplyScaleFactorKernel::configure(const ITensor *input, const ITensor *scale_factor, - ITensor *output, float multiplier) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON( - validate_arguments(input->info(), scale_factor->info(), output->info())); - - _input = input; - _scale_factor = scale_factor; - _output = output; - _multiplier = multiplier; - - // Configure kernel window - Window win_config = calculate_max_window(*input->info(), Steps()); - - Coordinates coord; - coord.set_num_dimensions(output->info()->num_dimensions()); - output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape())); - - INEKernel::configure(win_config); -} - -Status NEMultiplyScaleFactorKernel::validate(const ITensorInfo *input, - const ITensorInfo *scale_factor, - const ITensorInfo *output, float multiplier) -{ - ARM_COMPUTE_UNUSED(multiplier); - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, scale_factor, output)); - - return Status{}; -} - -template <typename T> void NEMultiplyScaleFactorKernel::multiply(const Window &window) -{ - constexpr auto window_step = 16; - const auto window_start_x = static_cast<int>(window.x().start()); - const auto window_end_x = static_cast<int>(window.x().end()); - - // Collapse window and reset first dimension to handle tail calculations manually - // Support Only 2D input - Window win_collapsed = window.collapse_if_possible(window, Window::DimZ); - Iterator input(_input, win_collapsed); - Iterator output(_output, win_collapsed); - win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1)); - execute_window_loop( - win_collapsed, - [&](const Coordinates &id) { - auto scale = *reinterpret_cast<T *>(_scale_factor->ptr_to_element({id.y()})); - scale *= _multiplier; - - const auto input_ptr = reinterpret_cast<const int32_t *>(input.ptr()); - auto output_ptr = reinterpret_cast<T *>(output.ptr()); - int x = window_start_x; - for (; x <= (window_end_x - window_step); x += window_step) - { - store_result<float>(&output_ptr[x], multiply_scale_vec(load_value(&input_ptr[x]), scale)); - } - // Compute left-over elements - for (; x < window_end_x; ++x) - { - output_ptr[x] = input_ptr[x] * scale; - } - }, - input, output); -} - -void NEMultiplyScaleFactorKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - - switch (_output->info()->data_type()) - { - case DataType::F32: - NEMultiplyScaleFactorKernel::multiply<float>(window); - break; -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - case DataType::F16: - NEMultiplyScaleFactorKernel::multiply<float16_t>(window); - break; -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - default: - ARM_COMPUTE_ERROR("Unsupported data type."); - } -} diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEOneHotKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEOneHotKernel.cpp deleted file mode 100644 index 0a11eb509..000000000 --- a/compute/ARMComputeEx/src/core/NEON/kernels/NEOneHotKernel.cpp +++ /dev/null @@ -1,229 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/NEON/kernels/NEOneHotKernel.h" -#include "arm_compute/core/CPP/Validate.h" -#include "arm_compute/core/Coordinates.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/IAccessWindow.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" -#include "arm_compute/core/utils/misc/ShapeCalculatorEx.h" -namespace arm_compute -{ -namespace -{ -/** Validate the depth - * - * Validate that depth are not negative - * - * @param[in] depth Depth tensor. - * @param[in] output Output tensor. - * @param[in] axis Axis of depth. - */ -template <typename U> void validate_depth(const ITensor *depth, const ITensor *output, int axis) -{ - ARM_COMPUTE_ERROR_ON(*(reinterpret_cast<U *>(depth->buffer())) < 0); - ARM_COMPUTE_ERROR_ON(static_cast<U>(output->info()->tensor_shape()[axis]) != - *(reinterpret_cast<U *>(depth->buffer()))); -} - -Status validate_arguments(const ITensorInfo *indices, const ITensorInfo *depth, - const ITensorInfo *on_value, const ITensorInfo *off_value, - const ITensorInfo *output, int axis) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(indices, depth, on_value, off_value, output); - const int actual_axis = wrap_around(axis, static_cast<int>(output->num_dimensions())); - ARM_COMPUTE_RETURN_ERROR_ON(output->num_dimensions() > 4); - ARM_COMPUTE_RETURN_ERROR_ON(on_value->tensor_shape().total_size() != 1); - ARM_COMPUTE_RETURN_ERROR_ON(0 > actual_axis || - actual_axis >= static_cast<int>(output->num_dimensions())); - ARM_COMPUTE_RETURN_ERROR_ON(on_value->data_type() == DataType::UNKNOWN); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(on_value, 1, DataType::U8, DataType::S8, - DataType::U16, DataType::S16, DataType::F16, - DataType::U32, DataType::S32, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(indices, 1, DataType::U32, DataType::S32); - - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(on_value, off_value); - if (output->total_size() != 0) - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(on_value, output); - } - - return Status{}; -} - -template <typename U, typename Enable = void> bool isOnValue(U) { return true; } - -template <typename U, std::enable_if_t<std::is_integral<U>::value, int> = 0> -bool isOnValue(U index, U depth) -{ - return index >= 0 && index < depth; -} -} // namespace - -NEOneHotKernel::NEOneHotKernel() - : _indices{nullptr}, _depth{nullptr}, _on_value{nullptr}, _off_value{nullptr}, _axis{-1}, - _output{nullptr}, _func{} -{ -} - -template <typename U> -void NEOneHotKernel::onehot_0_axis(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - // Validate that the depth are not negative - validate_depth<U>(_depth, _output, _axis); - Window output_window{window}; - output_window.set(Window::DimX, Window::Dimension(0, 1, 1)); - Iterator output_it(_output, output_window); - const U off_value = *reinterpret_cast<U *>(_off_value->buffer()); - execute_window_loop( - output_window, - [&](const Coordinates &id) { - std::fill_n(output_it.ptr(), - _output->info()->dimension(0) * _output->info()->element_size(), off_value); - Coordinates indices_id(id); - indices_id.remove(0); - const U new_index = *(reinterpret_cast<U *>(_indices->ptr_to_element(indices_id))); - if (isOnValue(new_index, *(reinterpret_cast<U *>(_depth->buffer())))) - { - Coordinates onehot_id(id); - onehot_id.set(0, new_index); - std::copy_n(_on_value->buffer(), _output->info()->element_size(), - _output->ptr_to_element(onehot_id)); - } - }, - output_it); -} - -template <typename U> -inline void NEOneHotKernel::onehot_n_axis(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - // Validate that the indices are not negative - validate_depth<U>(_depth, _output, _axis); - Iterator output_it(_output, window); - execute_window_loop(window, - [&](const Coordinates &id) { - Coordinates indices_id(id); - indices_id.remove(_axis); - const U new_index = - *(reinterpret_cast<U *>(_indices->ptr_to_element(indices_id))); - if (isOnValue(new_index, *(reinterpret_cast<U *>(_depth->buffer())))) - { - Coordinates onehot_id(id); - onehot_id.set(_axis, new_index); - std::copy_n(static_cast<U>(id[_axis]) == new_index ? _on_value->buffer() - : _off_value->buffer(), - _output->info()->element_size(), output_it.ptr()); - } - }, - output_it); -} - -void NEOneHotKernel::configure(const ITensor *indices, const ITensor *depth, - const ITensor *on_value, const ITensor *off_value, ITensor *output, - int axis) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(indices, depth, on_value, off_value, output); - ARM_COMPUTE_ERROR_ON(output->info()->total_size() == 0); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(indices->info(), depth->info(), on_value->info(), - off_value->info(), output->info(), axis)); - _indices = indices; - _depth = depth; - _on_value = on_value; - _off_value = off_value; - _output = output; - _axis = wrap_around(axis, static_cast<int>(output->info()->num_dimensions())); - if (0 == _axis) - { - switch (_indices->info()->data_type()) - { - case DataType::U32: - _func = &NEOneHotKernel::onehot_0_axis<uint32_t>; - break; - case DataType::S32: - _func = &NEOneHotKernel::onehot_0_axis<int32_t>; - break; - default: - ARM_COMPUTE_ERROR("Not supported"); - break; - } - } - else - { - switch (_indices->info()->data_type()) - { - case DataType::U32: - _func = &NEOneHotKernel::onehot_n_axis<uint32_t>; - break; - case DataType::S32: - _func = &NEOneHotKernel::onehot_n_axis<int32_t>; - break; - default: - ARM_COMPUTE_ERROR("Not supported"); - break; - } - } - // Create window - Window win = calculate_max_window(*output->info(), Steps()); - output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape())); - INEKernel::configure(win); -} - -Status NEOneHotKernel::validate(const ITensorInfo *indices, const ITensorInfo *depth, - const ITensorInfo *on_value, const ITensorInfo *off_value, - const ITensorInfo *output, int axis) -{ - ARM_COMPUTE_RETURN_ON_ERROR( - validate_arguments(indices, depth, on_value, off_value, output, axis)); - return Status{}; -} - -void NEOneHotKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON(_func == nullptr); - (this->*_func)(window, info); -} -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEQuantizationSymmetricKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEQuantizationSymmetricKernel.cpp deleted file mode 100644 index 5841f1d69..000000000 --- a/compute/ARMComputeEx/src/core/NEON/kernels/NEQuantizationSymmetricKernel.cpp +++ /dev/null @@ -1,240 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/core/NEON/kernels/NEQuantizationSymmetricKernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/NEON/NEAsymm.h" -#include "arm_compute/core/NEON/wrapper/wrapper.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" - -#include "arm_compute/core/CPP/Validate.h" - -#include <arm_neon.h> - -using namespace arm_compute; - -namespace -{ -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, - const ITensorInfo *scale_factor) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input); - ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 2); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON(output->tensor_shape().total_size() == 0); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QASYMM8_SIGNED); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(scale_factor, 1, DataType::F16, - DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON(scale_factor->tensor_shape().total_size() == 0); - ARM_COMPUTE_RETURN_ERROR_ON(scale_factor->num_dimensions() > 1); - ARM_COMPUTE_RETURN_ERROR_ON(scale_factor->dimension(0) != input->dimension(1)); - - return Status{}; -} - -inline float32x4x4_t load_value(const float *input_ptr) -{ - return {wrapper::vloadq(input_ptr), wrapper::vloadq(input_ptr + 4), - wrapper::vloadq(input_ptr + 8), wrapper::vloadq(input_ptr + 12)}; -} -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -inline const float32x4x4_t load_value(const float16_t *input_ptr) -{ - return {vcvt_f32_f16(wrapper::vload(input_ptr)), vcvt_f32_f16(wrapper::vload(input_ptr + 4)), - vcvt_f32_f16(wrapper::vload(input_ptr + 8)), - vcvt_f32_f16(wrapper::vload(input_ptr + 12))}; -} - -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -inline float32x4_t round(const float32x4_t &fv) -{ - const float32x4_t point5_f32x4 = vdupq_n_f32(0.5f); - const float32x4_t zero_f32x4 = vdupq_n_f32(0.0f); - // If value < 0, mask = -1, else mask = 0 - int32x4_t mask_less_zero_ui32x4 = reinterpret_cast<int32x4_t>(vcltq_f32(fv, zero_f32x4)); - return vaddq_f32(fv, vaddq_f32(vcvtq_f32_s32(mask_less_zero_ui32x4), point5_f32x4)); -} - -inline int8x16_t vquantizeSymm(const float32x4x4_t &fv, float scale_factor_inv, int32_t max_scale) -{ - const float32x4_t vinvscale = vdupq_n_f32(scale_factor_inv); - const int32x4_t vposend = vdupq_n_s32(max_scale); - const int32x4_t vnagend = vdupq_n_s32(-max_scale); - - const int32x4x4_t rf = {{ -#ifdef __aarch64__ - vminq_s32(vposend, - vmaxq_s32(vnagend, vcvtnq_s32_f32(round(vmulq_f32(fv.val[0], vinvscale))))), - vminq_s32(vposend, - vmaxq_s32(vnagend, vcvtnq_s32_f32(round(vmulq_f32(fv.val[1], vinvscale))))), - vminq_s32(vposend, - vmaxq_s32(vnagend, vcvtnq_s32_f32(round(vmulq_f32(fv.val[2], vinvscale))))), - vminq_s32(vposend, - vmaxq_s32(vnagend, vcvtnq_s32_f32(round(vmulq_f32(fv.val[3], vinvscale))))), -#else //__aarch64__ - vminq_s32(vposend, vmaxq_s32(vnagend, vcvtq_s32_f32(round(vmulq_f32(fv.val[0], vinvscale))))), - vminq_s32(vposend, vmaxq_s32(vnagend, vcvtq_s32_f32(round(vmulq_f32(fv.val[1], vinvscale))))), - vminq_s32(vposend, vmaxq_s32(vnagend, vcvtq_s32_f32(round(vmulq_f32(fv.val[2], vinvscale))))), - vminq_s32(vposend, vmaxq_s32(vnagend, vcvtq_s32_f32(round(vmulq_f32(fv.val[3], vinvscale))))), -#endif //__aarch64__ - }}; - const int8x8_t pa = vqmovn_s16(vcombine_s16(vqmovn_s32(rf.val[0]), vqmovn_s32(rf.val[1]))); - const int8x8_t pb = vqmovn_s16(vcombine_s16(vqmovn_s32(rf.val[2]), vqmovn_s32(rf.val[3]))); - return vcombine_s8(pa, pb); -} -} // namespace - -NEQuantizationSymmetricKernel::NEQuantizationSymmetricKernel() - : _input(nullptr), _output(nullptr), _scale_factor(nullptr) -{ -} - -void NEQuantizationSymmetricKernel::configure(const ITensor *input, ITensor *output, - ITensor *scale_factor) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON( - validate_arguments(input->info(), output->info(), scale_factor->info())); - - _input = input; - _output = output; - _scale_factor = scale_factor; - - // Configure kernel window - Window win_config = calculate_max_window(*input->info(), Steps()); - - Coordinates coord; - coord.set_num_dimensions(output->info()->num_dimensions()); - output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape())); - - INEKernel::configure(win_config); -} - -Status NEQuantizationSymmetricKernel::validate(const ITensorInfo *input, const ITensorInfo *output, - const ITensorInfo *scale_factor) -{ - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, scale_factor)); - - return Status{}; -} - -template <typename T> void NEQuantizationSymmetricKernel::quantize(const Window &window) -{ - constexpr auto window_step = 16; - const auto window_start_x = static_cast<int>(window.x().start()); - const auto window_end_x = static_cast<int>(window.x().end()); - -#ifdef __aarch64__ - constexpr RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_EVEN; -#else //__aarch64__ - constexpr RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP; -#endif //__aarch64__ - - // Collapse window and reset first dimension to handle tail calculations manually - // Support Only 2D input - Window win_collapsed = window; - Iterator input(_input, win_collapsed); - Iterator output(_output, win_collapsed); - const auto dim_x = _input->info()->dimension(0); - win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1)); - execute_window_loop( - win_collapsed, - [&](const Coordinates &id) { - const auto start = reinterpret_cast<const T *>(input.ptr()); - const auto min_max = std::minmax_element(start, start + dim_x); - const auto int8_scale = 127; - auto range = std::max(std::abs(*min_max.first), std::abs(*min_max.second)); - if (range == 0) - { - *reinterpret_cast<T *>(_scale_factor->ptr_to_element({id.y()})) = 1; - range = 1; - } - else - { - *reinterpret_cast<T *>(_scale_factor->ptr_to_element({id.y()})) = range / int8_scale; - } - const auto scale_factor_inv = int8_scale / range; - - auto input_ptr = reinterpret_cast<const T *>(input.ptr()); - auto output_ptr = reinterpret_cast<int8_t *>(output.ptr()); - int x = window_start_x; - for (; x <= (window_end_x - window_step); x += window_step) - { - wrapper::vstore(&output_ptr[x], - vquantizeSymm(load_value(&input_ptr[x]), scale_factor_inv, int8_scale)); - } - // Compute left-over elements - for (; x < window_end_x; ++x) - { - int quantized = arm_compute::round(input_ptr[x] * scale_factor_inv, rounding_policy); - quantized = std::min(int8_scale, std::max(quantized, -int8_scale)); - output_ptr[x] = static_cast<int8_t>(quantized); - } - }, - input, output); -} - -void NEQuantizationSymmetricKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - - switch (_input->info()->data_type()) - { - case DataType::F32: - NEQuantizationSymmetricKernel::quantize<float>(window); - break; -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - case DataType::F16: - NEQuantizationSymmetricKernel::quantize<float16_t>(window); - break; -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - default: - ARM_COMPUTE_ERROR("Unsupported data type."); - } -} diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEReductionOperationKernelEx.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEReductionOperationKernelEx.cpp deleted file mode 100644 index 3b65eac10..000000000 --- a/compute/ARMComputeEx/src/core/NEON/kernels/NEReductionOperationKernelEx.cpp +++ /dev/null @@ -1,693 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/core/NEON/kernels/NEReductionOperationKernelEx.h" - -#include "arm_compute/core/CPP/Validate.h" -#include "arm_compute/core/Coordinates.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/IAccessWindow.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/NEON/NEMath.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" - -#include "arm_compute/core/NEON/wrapper/wrapper.h" -#include <arm_neon.h> - -namespace arm_compute -{ -namespace -{ -// Helper function to calculate the minimum value of the input vector. All the elements in the -// output vector contain the min value. -float32x2_t calculate_min(float32x4_t in) -{ - auto pmin = wrapper::vpmin(wrapper::vgethigh(in), wrapper::vgetlow(in)); - return wrapper::vpmin(pmin, pmin); -} - -// Helper function to calculate the maximum value of the input vector. All the elements in the -// output vector contain the max value. -float32x2_t calculate_max(float32x4_t in) -{ - auto pmax = wrapper::vpmax(wrapper::vgethigh(in), wrapper::vgetlow(in)); - return wrapper::vpmax(pmax, pmax); -} -// Helper function to calculate the minimum value of the input vector. All the elements in the -// output vector contain the min value. -int32x2_t calculate_min(int32x4_t in) -{ - auto pmin = wrapper::vpmin(wrapper::vgethigh(in), wrapper::vgetlow(in)); - return wrapper::vpmin(pmin, pmin); -} - -// Helper function to calculate the maximum value of the input vector. All the elements in the -// output vector contain the max value. -int32x2_t calculate_max(int32x4_t in) -{ - auto pmax = wrapper::vpmax(wrapper::vgethigh(in), wrapper::vgetlow(in)); - return wrapper::vpmax(pmax, pmax); -} - -// Helper function to calculate the minimum value of the input vector. All the elements in the -// output vector contain the min value. -inline uint8x8_t calculate_min(uint8x16_t in) -{ - auto pmin = wrapper::vpmin(wrapper::vgethigh(in), wrapper::vgetlow(in)); - pmin = wrapper::vpmin(pmin, pmin); - pmin = wrapper::vpmin(pmin, pmin); - return wrapper::vpmin(pmin, pmin); -} -// Helper function to calculate the maximum value of the input vector. All the elements in the -// output vector contain the max value. -inline uint8x8_t calculate_max(uint8x16_t in) -{ - auto pmax = wrapper::vpmax(wrapper::vgethigh(in), wrapper::vgetlow(in)); - pmax = wrapper::vpmax(pmax, pmax); - pmax = wrapper::vpmax(pmax, pmax); - return wrapper::vpmax(pmax, pmax); -} - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -// Helper function to calculate the minimum value of the input vector. All the elements in the -// output vector contain the min value. -inline float16x4_t calculate_min(float16x8_t in) -{ - auto pmin = wrapper::vpmin(wrapper::vgethigh(in), wrapper::vgetlow(in)); - pmin = wrapper::vpmin(pmin, pmin); - return wrapper::vpmin(pmin, pmin); -} -// Helper function to calculate the maximum value of the input vector. All the elements in the -// output vector contain the max value. -inline float16x4_t calculate_max(float16x8_t in) -{ - auto pmax = wrapper::vpmax(wrapper::vgethigh(in), wrapper::vgetlow(in)); - pmax = wrapper::vpmax(pmax, pmax); - return wrapper::vpmax(pmax, pmax); -} -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -template <class F> class Reducer -{ -public: - static void reduceX(const Window &window, const ITensor *input, ITensor *output, F f, - const ReduceOperation op) - { - // Set out window - Window out_window(window); - out_window.set(Window::DimX, Window::Dimension(0, 0, 0)); - - // Get first input and output slices - Window in_slice = window.first_slice_window_1D(); - Window out_slice = out_window.first_slice_window_1D(); - - do - { - Iterator in(input, in_slice); - Iterator out(output, out_slice); - - f(in, out, in_slice, out_slice, *input->info(), op); - } while (window.slide_window_slice_1D(in_slice) && out_window.slide_window_slice_1D(out_slice)); - } - static void reduceY(const Window &window, const ITensor *input, ITensor *output, F f, - const ReduceOperation op) - { - // Set in window - Window in_window(window); - Window out_window(window); - - in_window.set(Window::DimY, Window::Dimension(0, 1, 1)); - out_window.set(Window::DimY, Window::Dimension(0, output->info()->dimension(1), - output->info()->dimension(1))); - - // Get first input and output slices - Window in_slice = in_window.first_slice_window_2D(); - Window out_slice = out_window.first_slice_window_2D(); - - do - { - Iterator in(input, in_slice); - Iterator out(output, out_slice); - - f(in, out, in_slice, out_slice, *input->info(), 1, op); - } while (in_window.slide_window_slice_2D(in_slice) && - out_window.slide_window_slice_2D(out_slice)); - } - static void reduceZ(const Window &window, const ITensor *input, ITensor *output, F f, - const ReduceOperation op) - { - // Set in window - Window in_window(window); - Window out_window(window); - - in_window.set(Window::DimZ, Window::Dimension(0, 1, 1)); - out_window.set(Window::DimZ, Window::Dimension(0, output->info()->dimension(2), - output->info()->dimension(2))); - - // Get first input and output slices - Window in_slice = in_window.first_slice_window_3D(); - Window out_slice = out_window.first_slice_window_3D(); - - do - { - Iterator in(input, in_slice); - Iterator out(output, out_slice); - - f(in, out, in_slice, out_slice, *input->info(), 2, op); - } while (in_window.slide_window_slice_3D(in_slice) && - out_window.slide_window_slice_3D(out_slice)); - } - static void reduceW(const Window &window, const ITensor *input, ITensor *output, F f, - const ReduceOperation op) - { - // Set in/out window - Window in_window(window); - Window out_window(window); - - in_window.set(3, Window::Dimension(0, 1, 1)); - out_window.set(3, Window::Dimension(0, 1, 1)); - - // Get first input and output slices - Window in_slice = in_window.first_slice_window_4D(); - Window out_slice = out_window.first_slice_window_4D(); - - do - { - Iterator in(input, in_slice); - Iterator out(output, out_slice); - - f(in, out, in_slice, out_slice, *input->info(), 3, op); - } while (in_window.slide_window_slice_4D(in_slice) && - out_window.slide_window_slice_4D(out_slice)); - } -}; - -template <typename T, int S> struct RedOpX -{ - /** NEON vector tag type. */ - using ExactTagType = typename wrapper::traits::neon_vector<T, S>::tag_type; - - inline void operator()(Iterator &input, Iterator &output, Window &in_slice, Window &out_slice, - const TensorInfo &in_info, const ReduceOperation op) - { - ARM_COMPUTE_UNUSED(out_slice); - ARM_COMPUTE_UNUSED(in_info); - auto init_res_value = static_cast<T>(0.f); - switch (op) - { - case ReduceOperation::MIN: - case ReduceOperation::MAX: - { - init_res_value = *reinterpret_cast<T *>(input.ptr()); - break; - } - default: - break; - } - auto vec_res_value = wrapper::vdup_n(init_res_value, ExactTagType{}); - - execute_window_loop(in_slice, - [&](const Coordinates &) { - const auto in_ptr = reinterpret_cast<const T *>(input.ptr()); - const auto vec_elements = wrapper::vloadq(in_ptr); - - switch (op) - { - case ReduceOperation::MIN: - { - vec_res_value = wrapper::vmin(vec_elements, vec_res_value); - break; - } - case ReduceOperation::MAX: - { - vec_res_value = wrapper::vmax(vec_elements, vec_res_value); - break; - } - default: - ARM_COMPUTE_ERROR("Not supported"); - } - }, - input); - - switch (op) - { - case ReduceOperation::MIN: - { - *(reinterpret_cast<T *>(output.ptr())) = wrapper::vgetlane(calculate_min(vec_res_value), 0); - break; - } - case ReduceOperation::MAX: - { - *(reinterpret_cast<T *>(output.ptr())) = wrapper::vgetlane(calculate_max(vec_res_value), 0); - break; - } - default: - ARM_COMPUTE_ERROR("Not supported"); - } - } -}; - -struct RedOpX_qasymm8 -{ - inline void operator()(Iterator &input, Iterator &output, Window &in_slice, Window &out_slice, - const TensorInfo &in_info, const ReduceOperation op) - { - ARM_COMPUTE_UNUSED(out_slice); - ARM_COMPUTE_UNUSED(in_info); - - uint8x16_t vec_res_value = {0}; - - if (op == ReduceOperation::MIN || op == ReduceOperation::MAX) - { - vec_res_value = wrapper::vdup_n(*input.ptr(), wrapper::traits::vector_128_tag{}); - } - - execute_window_loop(in_slice, - [&](const Coordinates &) { - const auto vec_elements = wrapper::vloadq(input.ptr()); - switch (op) - { - case ReduceOperation::MIN: - { - vec_res_value = wrapper::vmin(vec_elements, vec_res_value); - break; - } - case ReduceOperation::MAX: - { - vec_res_value = wrapper::vmax(vec_elements, vec_res_value); - break; - } - default: - ARM_COMPUTE_ERROR("Not supported"); - } - }, - input); - - switch (op) - { - case ReduceOperation::MIN: - { - *(output.ptr()) = static_cast<uint8_t>(wrapper::vgetlane(calculate_min(vec_res_value), 0)); - break; - } - case ReduceOperation::MAX: - { - *(output.ptr()) = static_cast<uint8_t>(wrapper::vgetlane(calculate_max(vec_res_value), 0)); - break; - } - default: - { - ARM_COMPUTE_ERROR("Not supported"); - } - } - } -}; - -template <typename T, int S> struct RedOpYZW -{ - /** NEON vector tag type. */ - using ExactTagType = typename wrapper::traits::neon_vector<T, S>::tag_type; - using neon_vector = typename wrapper::traits::neon_vector<T, S>::type; - - inline void operator()(Iterator &input, Iterator &output, Window &in_slice, Window &out_slice, - const TensorInfo &in_info, int axis, const ReduceOperation op) - { - ARM_COMPUTE_UNUSED(out_slice); - - execute_window_loop( - in_slice, - [&](const Coordinates &) { - neon_vector vec_res_value = {0}; - switch (op) - { - case ReduceOperation::MIN: - case ReduceOperation::MAX: - { - vec_res_value = wrapper::vloadq(reinterpret_cast<T *>(input.ptr())); - break; - } - default: - { - vec_res_value = wrapper::vdup_n(static_cast<T>(0.f), ExactTagType{}); - break; - } - } - - for (unsigned int dim = 0; dim < in_info.dimension(axis); ++dim) - { - T *in_ptr; - switch (axis) - { - case 1: - in_ptr = reinterpret_cast<T *>( - input.ptr() + in_info.offset_element_in_bytes(Coordinates(0, dim))); - break; - case 2: - in_ptr = reinterpret_cast<T *>( - input.ptr() + in_info.offset_element_in_bytes(Coordinates(0, 0, dim))); - break; - case 3: - in_ptr = reinterpret_cast<T *>( - input.ptr() + in_info.offset_element_in_bytes(Coordinates(0, 0, 0, dim))); - break; - default: - ARM_COMPUTE_ERROR("Not supported"); - } - const auto vec_elements = wrapper::vloadq(in_ptr); - - switch (op) - { - case ReduceOperation::MIN: - { - vec_res_value = wrapper::vmin(vec_elements, vec_res_value); - break; - } - case ReduceOperation::MAX: - { - vec_res_value = wrapper::vmax(vec_elements, vec_res_value); - break; - } - default: - ARM_COMPUTE_ERROR("Not supported"); - } - } - wrapper::vstore(reinterpret_cast<T *>(output.ptr()), vec_res_value); - }, - input, output); - } -}; - -struct RedOpYZW_qasymm8 -{ - inline void operator()(Iterator &input, Iterator &output, Window &in_slice, Window &out_slice, - const TensorInfo &in_info, int axis, const ReduceOperation op) - { - ARM_COMPUTE_UNUSED(out_slice); - - execute_window_loop( - in_slice, - [&](const Coordinates &) { - auto vec_res_value = wrapper::vloadq(input.ptr()); - - for (unsigned int index_dim = 0; index_dim < in_info.dimension(axis); ++index_dim) - { - uint8_t *in_ptr; - switch (axis) - { - case 1: - in_ptr = input.ptr() + in_info.offset_element_in_bytes(Coordinates(0, index_dim)); - break; - case 2: - in_ptr = - input.ptr() + in_info.offset_element_in_bytes(Coordinates(0, 0, index_dim)); - break; - case 3: - in_ptr = - input.ptr() + in_info.offset_element_in_bytes(Coordinates(0, 0, 0, index_dim)); - break; - default: - ARM_COMPUTE_ERROR("Not supported"); - } - const auto vec_elements = wrapper::vloadq(in_ptr); - - switch (op) - { - case ReduceOperation::MIN: - { - vec_res_value = wrapper::vmin(vec_elements, vec_res_value); - break; - } - case ReduceOperation::MAX: - { - vec_res_value = wrapper::vmax(vec_elements, vec_res_value); - break; - } - default: - ARM_COMPUTE_ERROR("Not supported"); - } - } - wrapper::vstore(reinterpret_cast<uint8_t *>(output.ptr()), vec_res_value); - }, - input, output); - } -}; - -void reduce_op(const Window &window, const ITensor *input, ITensor *output, unsigned int axis, - const ReduceOperation op) -{ - const bool is_complex = (input->info()->num_channels() == 2); - if (is_complex) - { - ARM_COMPUTE_ERROR("Not supported"); - } - - switch (axis) - { - case 0: - switch (input->info()->data_type()) - { - case DataType::QASYMM8: - return Reducer<RedOpX_qasymm8>::reduceX(window, input, output, RedOpX_qasymm8(), op); -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - case DataType::F16: - return Reducer<RedOpX<float16_t, 8>>::reduceX(window, input, output, - RedOpX<float16_t, 8>(), op); -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - case DataType::F32: - return Reducer<RedOpX<float, 4>>::reduceX(window, input, output, RedOpX<float, 4>(), op); - case DataType::S32: - return Reducer<RedOpX<int32_t, 4>>::reduceX(window, input, output, RedOpX<int32_t, 4>(), - op); - default: - ARM_COMPUTE_ERROR("Not supported"); - } - case 1: - switch (input->info()->data_type()) - { - case DataType::QASYMM8: - return Reducer<RedOpYZW_qasymm8>::reduceY(window, input, output, RedOpYZW_qasymm8(), op); -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - case DataType::F16: - return Reducer<RedOpYZW<float16_t, 8>>::reduceY(window, input, output, - RedOpYZW<float16_t, 8>(), op); -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - case DataType::F32: - return Reducer<RedOpYZW<float, 4>>::reduceY(window, input, output, RedOpYZW<float, 4>(), - op); - case DataType::S32: - return Reducer<RedOpYZW<int32_t, 4>>::reduceY(window, input, output, - RedOpYZW<int32_t, 4>(), op); - default: - ARM_COMPUTE_ERROR("Not supported"); - } - case 2: - switch (input->info()->data_type()) - { - case DataType::QASYMM8: - return Reducer<RedOpYZW_qasymm8>::reduceZ(window, input, output, RedOpYZW_qasymm8(), op); -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - case DataType::F16: - return Reducer<RedOpYZW<float16_t, 8>>::reduceZ(window, input, output, - RedOpYZW<float16_t, 8>(), op); -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - case DataType::F32: - return Reducer<RedOpYZW<float, 4>>::reduceZ(window, input, output, RedOpYZW<float, 4>(), - op); - case DataType::S32: - return Reducer<RedOpYZW<int32_t, 4>>::reduceZ(window, input, output, - RedOpYZW<int32_t, 4>(), op); - default: - ARM_COMPUTE_ERROR("Not supported"); - } - case 3: - switch (input->info()->data_type()) - { - case DataType::QASYMM8: - return Reducer<RedOpYZW_qasymm8>::reduceW(window, input, output, RedOpYZW_qasymm8(), op); -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - case DataType::F16: - return Reducer<RedOpYZW<float16_t, 8>>::reduceW(window, input, output, - RedOpYZW<float16_t, 8>(), op); -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - case DataType::F32: - return Reducer<RedOpYZW<float, 4>>::reduceW(window, input, output, RedOpYZW<float, 4>(), - op); - case DataType::S32: - return Reducer<RedOpYZW<int32_t, 4>>::reduceW(window, input, output, - RedOpYZW<int32_t, 4>(), op); - default: - ARM_COMPUTE_ERROR("Not supported"); - } - default: - ARM_COMPUTE_ERROR("Unsupported reduction axis"); - } -} - -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, - ReduceOperation op) -{ - ARM_COMPUTE_UNUSED(op); - - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input); - - if (input->num_channels() == 1) - { - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::S32, - DataType::F16, DataType::F32); - } - else - { - ARM_COMPUTE_RETURN_ERROR_MSG("Not support complex"); - } - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, - "Reduction axis greater than max number of dimensions"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis > 3, "Unsupported reduction axis"); - - if (output->total_size() != 0) - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output); - ARM_COMPUTE_RETURN_ERROR_ON(input->num_channels() != output->num_channels()); - - const TensorShape output_shape = - arm_compute::misc::shape_calculator::compute_reduced_shape(input->tensor_shape(), axis); - const TensorInfo tensor_info_reshaped = input->clone()->set_tensor_shape(output_shape); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &tensor_info_reshaped); - } - - return Status{}; -} - -std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, - unsigned int axis, ReduceOperation op) -{ - ARM_COMPUTE_UNUSED(op); - - // Calculate output shape and set if empty - const TensorShape output_shape = - arm_compute::misc::shape_calculator::compute_reduced_shape(input->tensor_shape(), axis); - - // Output auto initialization if not yet initialized - DataType output_data_type = input->data_type(); - auto_init_if_empty(*output, input->clone() - ->set_tensor_shape(output_shape) - .set_data_type(output_data_type) - .reset_padding() - .set_is_resizable(true)); - - unsigned int num_elems_processed_per_iteration = 16 / data_size_from_type(input->data_type()); - - // Configure kernel window - Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration)); - AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration); - AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration); - - bool window_changed = update_window_and_padding(win, input_access, output_access); - output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape())); - - Status err = (window_changed) - ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") - : Status{}; - - return std::make_tuple(err, win); -} -} // namespace - -NEReductionOperationKernelEx::NEReductionOperationKernelEx() - : _input(nullptr), _output(nullptr), _reduction_axis(0), _op(ReduceOperation::MAX), - _border_size() -{ -} - -BorderSize NEReductionOperationKernelEx::border_size() const { return _border_size; } - -void NEReductionOperationKernelEx::configure(const ITensor *input, ITensor *output, - unsigned int axis, ReduceOperation op) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), axis, op)); - - unsigned int num_elems_processed_per_iteration = - 16 / data_size_from_type(input->info()->data_type()); - - _input = input; - _output = output; - _border_size = - (axis == 0) - ? BorderSize(0, num_elems_processed_per_iteration - - (input->info()->dimension(0) % num_elems_processed_per_iteration), - 0, 0) - : BorderSize(); - _op = op; - _reduction_axis = axis; - - // Configure kernel window - auto win_config = validate_and_configure_window(_input->info(), _output->info(), axis, op); - - ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config)); - - INEKernel::configure(std::get<1>(win_config)); -} - -Status NEReductionOperationKernelEx::validate(const ITensorInfo *input, const ITensorInfo *output, - unsigned int axis, ReduceOperation op) -{ - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, axis, op)); - ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>( - validate_and_configure_window(input->clone().get(), output->clone().get(), axis, op))); - - return Status{}; -} - -void NEReductionOperationKernelEx::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - - reduce_op(window, _input, _output, _reduction_axis, _op); -} -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/core/UtilsEx.cpp b/compute/ARMComputeEx/src/core/UtilsEx.cpp deleted file mode 100644 index 863316909..000000000 --- a/compute/ARMComputeEx/src/core/UtilsEx.cpp +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/core/UtilsEx.h" -#include "arm_compute/core/Error.h" - -using namespace arm_compute; - -const std::pair<unsigned int, unsigned int> -arm_compute::transposeconv_output_dimensions(unsigned int in_width, unsigned int in_height, - unsigned int kernel_width, unsigned int kernel_height, - const PadStrideInfo &info, unsigned int invalid_right, - unsigned int invalid_bottom) -{ - const unsigned int stride_x = info.stride().first; - const unsigned int stride_y = info.stride().second; - const unsigned int padx = info.pad_left() + info.pad_right(); - const unsigned int pady = info.pad_top() + info.pad_bottom(); - - ARM_COMPUTE_ERROR_ON(in_width < 1 || in_height < 1); - ARM_COMPUTE_ERROR_ON(kernel_width <= padx); - ARM_COMPUTE_ERROR_ON(kernel_height <= pady); - - // Find the transpose conv out dimensions - // transpose conv out: - // tconv_out + pad = 1 + (in - 1) * stride + invalid - // tconv_out = 1 + (in - 1) * stride + invalid - pad - const int w = stride_x * (in_width - 1) + kernel_width - padx + invalid_right; - const int h = stride_y * (in_height - 1) + kernel_height - pady + invalid_bottom; - - return std::make_pair<unsigned int, unsigned int>(w, h); -} diff --git a/compute/ARMComputeEx/src/runtime/CL/CLFunctionsEx.cpp b/compute/ARMComputeEx/src/runtime/CL/CLFunctionsEx.cpp deleted file mode 100644 index 158fe0b0c..000000000 --- a/compute/ARMComputeEx/src/runtime/CL/CLFunctionsEx.cpp +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "arm_compute/runtime/CL/CLFunctionsEx.h" - -// NOTE This empty file aims to validate "CLFunctionsEx.h". -// DO NOT REMOVE this file. diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLArgMinMaxLayerEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLArgMinMaxLayerEx.cpp deleted file mode 100644 index 267228eac..000000000 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLArgMinMaxLayerEx.cpp +++ /dev/null @@ -1,221 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/CL/functions/CLArgMinMaxLayerEx.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/runtime/Utils.h" - -namespace arm_compute -{ -CLArgMinMaxLayerEx::CLArgMinMaxLayerEx(std::shared_ptr<IMemoryManager> memory_manager) - : _memory_group(std::move(memory_manager)), _results_vector(), _not_reshaped_output(), - _reduction_kernels_vector(), _reshape_kernel(), _num_of_stages(), _reduction_axis() -{ -} - -Status CLArgMinMaxLayerEx::validate(const ITensorInfo *input, int axis, const ITensorInfo *output, - const ReductionOperation &op) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(op != ReductionOperation::ARG_IDX_MAX && - op != ReductionOperation::ARG_IDX_MIN, - "Invalid reduction operation"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= static_cast<int>(TensorShape::num_max_dimensions), - "Reduction axis greater than max number of dimensions"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis > 3, "Unsupported reduction axis"); - const unsigned int num_of_stages = - calculate_number_of_stages_only_x_axis(input->dimension(0), axis); - - DataType output_data_type = DataType::S32; - TensorInfo not_reshaped_output; - const auto input_num_channles = input->num_channels(); - const auto input_qinfo = input->quantization_info(); - - if (output->total_size() != 0) - { - output_data_type = output->data_type(); - const TensorInfo expected_output_shape = output->clone()->set_tensor_shape( - arm_compute::misc::shape_calculator::compute_reduced_shape(input->tensor_shape(), axis, - false)); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(&expected_output_shape, output); - } - - auto shape_before_reshape = input->tensor_shape(); - shape_before_reshape.set(axis, 1); - auto initialize_tensorinfo = [](TensorInfo &ti, TensorShape shape, DataType data_type, - int num_channels, QuantizationInfo qinfo) { - ti.set_data_type(data_type) - .set_tensor_shape(shape) - .set_num_channels(num_channels) - .set_quantization_info(qinfo); - }; - - initialize_tensorinfo(not_reshaped_output, shape_before_reshape, output_data_type, - input_num_channles, input_qinfo); - - if (num_of_stages == 1) - { - ARM_COMPUTE_RETURN_ON_ERROR( - CLArgMinMaxLayerKernelEx::validate(input, nullptr, ¬_reshaped_output, axis, op)); - } - else - { - // Create temporary tensor infos - std::vector<TensorInfo> sums_vector(num_of_stages - 1); - - // Create intermediate tensor info - TensorShape shape{input->tensor_shape()}; - - for (unsigned int i = 0; i < num_of_stages - 1; i++) - { - shape.set(0, ceil(shape.x() / 128.f)); - sums_vector[i].set_data_type(input->data_type()); - sums_vector[i].set_tensor_shape(shape); - sums_vector[i].set_num_channels(input->num_channels()); - } - - // Validate ReductionOperation only on first kernel - ARM_COMPUTE_RETURN_ON_ERROR( - CLArgMinMaxLayerKernelEx::validate(input, nullptr, &sums_vector[0], axis, op)); - - // Validate ReductionOperation on intermediate stages - for (unsigned int i = 1; i < num_of_stages - 1; ++i) - { - ARM_COMPUTE_RETURN_ON_ERROR(CLArgMinMaxLayerKernelEx::validate(input, &sums_vector[i - 1], - &sums_vector[i], axis, op)); - } - - // Validate ReductionOperation on the last stage - const unsigned int last_stage = num_of_stages - 1; - ARM_COMPUTE_RETURN_ON_ERROR(CLArgMinMaxLayerKernelEx::validate( - input, &sums_vector[last_stage - 1], ¬_reshaped_output, axis, op)); - } - ARM_COMPUTE_RETURN_ON_ERROR(CLReshapeLayerKernel::validate(¬_reshaped_output, output)); - return Status{}; -} - -void CLArgMinMaxLayerEx::configure(const ICLTensor *input, int axis, ICLTensor *output, - const ReductionOperation &op) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - _num_of_stages = calculate_number_of_stages_only_x_axis(input->info()->dimension(0), axis); - _reduction_axis = axis; - - const TensorShape output_shape = arm_compute::misc::shape_calculator::compute_reduced_shape( - input->info()->tensor_shape(), axis, false); - DataType output_data_type = (output->info()->data_type() == DataType::UNKNOWN) - ? DataType::S32 - : output->info()->data_type(); - auto_init_if_empty(*output->info(), input->info() - ->clone() - ->set_tensor_shape(output_shape) - .set_data_type(output_data_type) - .reset_padding() - .set_is_resizable(true)); - - // Configure reduction operation kernels - _reduction_kernels_vector.resize(_num_of_stages); - - _memory_group.manage(&_not_reshaped_output); - // Create temporary tensors - if (_num_of_stages == 1) - { - // Force an early initialization for int64 output type - TensorShape output_shape{input->info()->tensor_shape()}; - output_shape.set(axis, 1); - auto_init_if_empty(*_not_reshaped_output.info(), input->info() - ->clone() - ->set_tensor_shape(output_shape) - .set_data_type(output_data_type) - .reset_padding() - .set_is_resizable(true)); - _not_reshaped_output.info()->set_tensor_shape(output_shape); - _reduction_kernels_vector[0].configure(input, nullptr, &_not_reshaped_output, axis, op); - } - else - { - _results_vector.resize(_num_of_stages - 1); - TensorShape shape{input->info()->tensor_shape()}; - for (unsigned int i = 0; i < _num_of_stages - 1; i++) - { - shape.set(0, ceil(shape.x() / 128.f)); - _results_vector[i].allocator()->init( - input->info()->clone()->set_tensor_shape(shape).set_data_type(output_data_type)); - } - - // Apply ReductionOperation only on first kernel - _memory_group.manage(&_results_vector[0]); - _reduction_kernels_vector[0].configure(input, nullptr, &_results_vector[0], axis, op); - - // Apply ReductionOperation on intermediate stages - for (unsigned int i = 1; i < _num_of_stages - 1; ++i) - { - _memory_group.manage(&_results_vector[i]); - _reduction_kernels_vector[i].configure(input, &_results_vector[i - 1], &_results_vector[i], - axis, op); - _results_vector[i - 1].allocator()->allocate(); - } - - // Apply ReductionOperation on the last stage - const unsigned int last_stage = _num_of_stages - 1; - _reduction_kernels_vector[last_stage].configure(input, &_results_vector[last_stage - 1], - &_not_reshaped_output, axis, op); - _results_vector[last_stage - 1].allocator()->allocate(); - } - _reshape_kernel.configure(&_not_reshaped_output, output); - _not_reshaped_output.allocator()->allocate(); -} - -void CLArgMinMaxLayerEx::run() -{ - MemoryGroupResourceScope scope_mg(_memory_group); - - for (unsigned int i = 0; i < _num_of_stages; ++i) - { - CLScheduler::get().enqueue(_reduction_kernels_vector[i], false); - } - CLScheduler::get().enqueue(_reshape_kernel, false); -} -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp deleted file mode 100644 index e5122ab8f..000000000 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h" - -#include "arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h" -#include "arm_compute/core/CL/ICLTensor.h" - -using namespace arm_compute; - -void CLBinaryLogicalOp::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, - BinaryLogicalOperation op) -{ - auto k = support::cpp14::make_unique<CLBinaryLogicalOpKernel>(); - k->configure(input1, input2, output, op); - _kernel = std::move(k); - - if (output->info()->dimension(0) > 1) - { - ICLTensor *broadcasted_info = (input1->info()->dimension(0) == 1) ? input1 : input2; - if (broadcasted_info->info()->dimension(0) == 1) - { - _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE); - } - } -} diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLCastBool.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLCastBool.cpp deleted file mode 100644 index c7d0ac8e2..000000000 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLCastBool.cpp +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/CL/functions/CLCastBool.h" - -#include "arm_compute/core/CL/kernels/CLCastBoolKernel.h" - -using namespace arm_compute; - -void CLCastBool::configure(ICLTensor *input, ICLTensor *output) -{ - auto k = arm_compute::support::cpp14::make_unique<CLCastBoolKernel>(); - k->configure(input, output); - _kernel = std::move(k); -} diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLDirectTransposeConvLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLDirectTransposeConvLayer.cpp deleted file mode 100644 index 3dede0562..000000000 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLDirectTransposeConvLayer.cpp +++ /dev/null @@ -1,267 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2019-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h" - -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/UtilsEx.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/misc/ShapeCalculatorEx.h" -#include "arm_compute/runtime/CL/CLScheduler.h" - -#include <memory> -#include <tuple> - -namespace arm_compute -{ -using namespace arm_compute::misc::shape_calculator; - -CLDirectTransposeConvLayer::CLDirectTransposeConvLayer( - std::shared_ptr<IMemoryManager> memory_manager) // NOLINT - : _memory_group(std::move(memory_manager)), - _scale_f(), - _conv_f(), - _flip_weights(), - _scaled_output(), - _original_weights(nullptr), - _weights_flipped(), - _flip_axis(), - _is_prepared(false) -{ -} - -Status CLDirectTransposeConvLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, - const ITensorInfo *bias, ITensorInfo *output, - const PadStrideInfo &info, unsigned int invalid_right, - unsigned int invalid_bottom, - const WeightsInfo &weights_info) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN( - input, 1, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, weights); - const DataLayout data_layout = input->data_layout(); - - const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); - const size_t idx_c = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); - - ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) != weights->dimension(idx_h)); - ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) < 1); - - auto out_dims = transposeconv_output_dimensions( - input->dimension(idx_w), input->dimension(idx_h), weights->dimension(idx_w), - weights->dimension(idx_h), info, invalid_right, invalid_bottom); - - const TensorShape output_shape = compute_transposeconv_output_shape(out_dims, *input, *weights); - - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output, weights); - - if (bias != nullptr) - { - if (is_data_type_quantized_asymmetric(input->data_type())) - { - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bias, 1, DataType::S32); - } - else - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, bias); - } - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, bias); - } - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(idx_w) != output_shape[idx_w], - "Output's width is invalid."); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(idx_h) != output_shape[idx_h], - "Output's height is invalid."); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(idx_c) != output_shape[idx_c], - "Output's depth is invalid."); - - unsigned int pad_left = 0; - unsigned int pad_right = 0; - unsigned int pad_top = 0; - unsigned int pad_bottom = 0; - const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape( - *input, *weights, info, out_dims, invalid_right, invalid_bottom, pad_left, pad_right, pad_top, - pad_bottom); - TensorInfo scale_out_info(input->clone() - ->set_is_resizable(true) - .reset_padding() - .set_tensor_shape(scale_out_shape) - .set_data_layout(data_layout)); - const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL); - - ARM_COMPUTE_RETURN_ON_ERROR(CLDeconvolutionLayerUpsample::validate(input, &scale_out_info, info)); - ARM_COMPUTE_RETURN_ON_ERROR(CLConvolutionLayer::validate(&scale_out_info, weights, bias, output, - conv_info, weights_info)); - - return Status{}; -} - -void CLDirectTransposeConvLayer::configure(ICLTensor *input, ICLTensor *weights, - const ICLTensor *bias, ICLTensor *output, - const PadStrideInfo &info, unsigned int invalid_right, - unsigned int invalid_bottom, - const WeightsInfo &weights_info) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, weights, bias, output, info, - invalid_right, invalid_bottom, weights_info); -} - -void CLDirectTransposeConvLayer::configure(const CLCompileContext &compile_context, - ICLTensor *input, ICLTensor *weights, - const ICLTensor *bias, ICLTensor *output, - const PadStrideInfo &info, unsigned int invalid_right, - unsigned int invalid_bottom, - const WeightsInfo &weights_info) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); - - unsigned int pad_left = 0; - unsigned int pad_right = 0; - unsigned int pad_top = 0; - unsigned int pad_bottom = 0; - const unsigned int stride_x = info.stride().first; - const unsigned int stride_y = info.stride().second; - - const DataLayout data_layout = input->info()->data_layout(); - - const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); - - _original_weights = weights; - _flip_axis.allocator()->init(TensorInfo(TensorShape(2U), 1, DataType::U32)); - _weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout)); - _flip_weights.configure(compile_context, weights, &_weights_flipped, &_flip_axis); - - auto out_dims = transposeconv_output_dimensions( - input->info()->dimension(idx_w), input->info()->dimension(idx_h), - weights->info()->dimension(idx_w), weights->info()->dimension(idx_h), info, invalid_right, - invalid_bottom); - - const TensorShape output_shape = - compute_transposeconv_output_shape(out_dims, *input->info(), *weights->info()); - - // Output auto initialization if not yet initialized - auto_init_if_empty( - *output->info(), - input->info()->clone()->set_tensor_shape(output_shape).set_data_layout(data_layout)); - - // Perform validation step - ARM_COMPUTE_ERROR_THROW_ON(CLDirectTransposeConvLayer::validate( - input->info(), weights->info(), bias == nullptr ? nullptr : bias->info(), output->info(), - info, invalid_right, invalid_bottom)); - - _is_prepared = weights_info.retain_internal_weights(); - - _memory_group.manage(&_scaled_output); - - // Find the upsampled dimensions and the padding needed for the convolution with stride 1 in order - // to match output shape - const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape( - *input->info(), *weights->info(), info, out_dims, invalid_right, invalid_bottom, pad_left, - pad_right, pad_top, pad_bottom); - - TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(), - input->info()->quantization_info()); - scale_out_info.set_data_layout(data_layout); - _scaled_output.allocator()->init(scale_out_info); - - // configure scale function - const PadStrideInfo upsample_info(stride_x, stride_y, pad_left, pad_right, pad_top, pad_bottom, - DimensionRoundingType::FLOOR); - _scale_f.configure(input, &_scaled_output, upsample_info); - - // Setup the function to convolve the upscaled output - const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL); - _conv_f.configure(compile_context, &_scaled_output, &_weights_flipped, bias, output, conv_info, - weights_info); - _scaled_output.allocator()->allocate(); - - // Setup flip axis data - _flip_axis.allocator()->allocate(); - _flip_axis.map(true); - auto axis_data = reinterpret_cast<uint32_t *>(_flip_axis.buffer()); - if (weights->info()->data_layout() == DataLayout::NHWC) - { - axis_data[0] = 1; - axis_data[1] = 2; - } - else - { - axis_data[0] = 0; - axis_data[1] = 1; - } - _flip_axis.unmap(); -} - -void CLDirectTransposeConvLayer::run() -{ - prepare(); - - MemoryGroupResourceScope scope_mg(_memory_group); - - _scale_f.run(); - _conv_f.run(); -} - -void CLDirectTransposeConvLayer::prepare() -{ - if (!_is_prepared) - { - ARM_COMPUTE_ERROR_ON(!_original_weights->is_used()); - - // Run weights flipping and mark original weights tensor as unused - _weights_flipped.allocator()->allocate(); - _flip_weights.run(); - _original_weights->mark_as_unused(); - - // Prepare convolution - _conv_f.prepare(); - - // Free flipped weights - if (!_weights_flipped.is_used()) - { - _weights_flipped.allocator()->free(); - } - - _is_prepared = true; - } -} -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp deleted file mode 100644 index ae9d8afc6..000000000 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/CL/functions/CLEmbeddingLookup.h" - -#include "arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h" - -using namespace arm_compute; - -void CLEmbeddingLookup::configure(const ICLTensor *input, ICLTensor *output, - const ICLTensor *lookups) -{ - auto k = support::cpp14::make_unique<CLEmbeddingLookupKernel>(); - k->configure(input, output, lookups); - _kernel = std::move(k); -} diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp deleted file mode 100644 index 01989461e..000000000 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp +++ /dev/null @@ -1,339 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h" - -#include "arm_compute/core/Size2D.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/core/utils/quantization/AsymmHelpers.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "support/MemorySupport.h" - -#include <algorithm> - -using namespace arm_compute; -using namespace arm_compute::misc::shape_calculator; - -namespace -{ -Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const ITensorInfo &output) -{ - ARM_COMPUTE_UNUSED(input); - ARM_COMPUTE_UNUSED(weights); - ARM_COMPUTE_UNUSED(output); - ARM_COMPUTE_RETURN_ON_ERROR( - CLGEMMLowpMatrixMultiplyCore::validate(&input, &weights, nullptr, &output)); - - return Status{}; -} -} // namespace - -void CLFullyConnectedHybridLayerReshapeWeights::configure(const ICLTensor *input, ICLTensor *output) -{ - auto k = support::cpp14::make_unique<CLTransposeKernel>(); - k->configure(input, output); - _kernel = std::move(k); -} - -Status CLFullyConnectedHybridLayerReshapeWeights::validate(const ITensorInfo *input, - const ITensorInfo *output) -{ - return CLTransposeKernel::validate(input, output); -} - -CLFullyConnectedHybridLayer::CLFullyConnectedHybridLayer( - std::shared_ptr<IMemoryManager> memory_manager) - : _memory_group(memory_manager), _reshape_weights_kernel(), _quant_input_kernel(), - _mm_gemmlowp(memory_manager), _multiply_scale_kernel(), _accumulate_biases_kernel(), - _reshape_weights_output(), _quantized_input(), _scale_factor(), _gemmlowp_output(), - _are_weights_reshaped(true), _accumulate_biases(false), _is_prepared(false), - _original_weights(nullptr) -{ -} -void CLFullyConnectedHybridLayer::configure_mm(const ICLTensor *input, const ICLTensor *weights, - ICLTensor *output, bool retain_internal_weights) -{ - ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != weights->info()->dimension(1)); - - ARM_COMPUTE_UNUSED(output); - ARM_COMPUTE_UNUSED(retain_internal_weights); - // Configure gemmlowp function - _mm_gemmlowp.configure(input, weights, nullptr, output); -} - -void CLFullyConnectedHybridLayer::configure(const ICLTensor *input, const ICLTensor *weights, - const ICLTensor *biases, ICLTensor *output, - FullyConnectedLayerInfo fc_info) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); - - // Perform validate step - ARM_COMPUTE_ERROR_THROW_ON(CLFullyConnectedHybridLayer::validate( - input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(), - fc_info)); - - _are_weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true; - _accumulate_biases = false; - _is_prepared = fc_info.retain_internal_weights; - _original_weights = weights; - - // Configure accumulate biases kernel for non quantized asymmetric types - if (biases != nullptr) - { - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases); - - _accumulate_biases = true; - - // Configure accumulate biases kernel - _accumulate_biases_kernel.set_target(CLScheduler::get().target()); - _accumulate_biases_kernel.configure(output, biases); - } - - const ICLTensor *weights_to_use = weights; - - // With the Fully Connected layer we can have 4 different cases: - // 1) Convolution layer -> Fully Connected layer without batches - // 2) Fully Connected layer -> Fully Connected layer without batches - // 3) Convolution layer -> Fully Connected layer with batches - // 4) Fully Connected layer -> Fully Connected layer with batches - - // Check if we have a fully connected layer with batches - const bool is_batched_fc_layer = output->info()->dimension(1) > 1; - bool is_fc_after_conv = false; - if (is_batched_fc_layer) - { - is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) && - (std::equal(input->info()->tensor_shape().cbegin() + 3, - input->info()->tensor_shape().cend(), - output->info()->tensor_shape().cbegin() + 1)); - } - else - { - is_fc_after_conv = input->info()->num_dimensions() > 1 && input->info()->dimension(1) > 1; - } - ARM_COMPUTE_ERROR_ON_MSG(is_fc_after_conv, - "CLFullyConnectedHybridLayer does not support after conv"); - ARM_COMPUTE_UNUSED(is_fc_after_conv); - - // Reshape weights if needed - if (!_are_weights_reshaped) - { - // Reshape the weights - _reshape_weights_output.allocator()->init( - weights->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape( - compute_transposed_shape(*weights->info()))); - _reshape_weights_kernel.configure(weights_to_use, &_reshape_weights_output); - weights_to_use = &_reshape_weights_output; - } - - // Extract scale factor - _scale_factor.allocator()->init( - TensorInfo(TensorShape{output->info()->dimension(1)}, 1, input->info()->data_type())); - _memory_group.manage(&_scale_factor); - _scale_factor_kernel.configure(input, &_scale_factor); - - // Quantize input - _quantized_input.allocator()->init( - input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type( - DataType::QASYMM8_SIGNED)); - _memory_group.manage(&_quantized_input); - _quant_input_kernel.configure(input, &_scale_factor, &_quantized_input); - - // GEMMLowp - _gemmlowp_output.allocator()->init( - output->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32)); - _memory_group.manage(&_gemmlowp_output); - configure_mm(&_quantized_input, weights_to_use, &_gemmlowp_output, - fc_info.retain_internal_weights); - _quantized_input.allocator()->allocate(); - - // Multiply scale - _multiply_scale_kernel.configure(&_gemmlowp_output, &_scale_factor, output, - weights->info()->quantization_info().uniform().scale); - _gemmlowp_output.allocator()->allocate(); - _scale_factor.allocator()->allocate(); - - _are_weights_reshaped = _are_weights_reshaped || fc_info.retain_internal_weights; -} - -Status CLFullyConnectedHybridLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, - const ITensorInfo *biases, const ITensorInfo *output, - FullyConnectedLayerInfo fc_info) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QASYMM8_SIGNED); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 2); - - bool weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true; - bool is_fc_after_conv = true; - const GPUTarget gpu_target = CLScheduler::get().target(); - - const ITensorInfo &reshaped_weights = - TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape( - compute_transposed_shape(*weights))); - - // Configure accumulate biases kernel for non quantized asymmetric types - if (biases != nullptr) - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases); - ARM_COMPUTE_RETURN_ON_ERROR( - CLGEMMMatrixAccumulateBiasesKernel::validate(output, biases, gpu_target)); - } - - // With the Fully Connected layer we can have 4 different cases: - // 1) Convolution layer -> Fully Connected layer without batches - // 2) Fully Connected layer -> Fully Connected layer without batches - // 3) Convolution layer -> Fully Connected layer with batches - // 4) Fully Connected layer -> Fully Connected layer with batches - - const ITensorInfo *weights_to_use = weights; - - // Check if we have a fully connected layer with batches - const bool is_batched_fc_layer = output->dimension(1) > 1; - if (is_batched_fc_layer) - { - is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) && - (std::equal(input->tensor_shape().cbegin() + 3, input->tensor_shape().cend(), - output->tensor_shape().cbegin() + 1)); - } - else - { - is_fc_after_conv = input->num_dimensions() > 1 && input->dimension(1) > 1; - } - ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_fc_after_conv, - "CLFullyConnectedHybridLayer does not support after conv"); - - if (!weights_reshaped) - { - // Validate reshape weights kernel - ARM_COMPUTE_RETURN_ON_ERROR( - CLFullyConnectedHybridLayerReshapeWeights::validate(weights_to_use, &reshaped_weights)); - weights_to_use = &reshaped_weights; - } - - // Validate Scale factor kernel - const ITensorInfo &scale_factor = - TensorInfo(TensorShape{output->dimension(1)}, 1, input->data_type()); - ARM_COMPUTE_RETURN_ON_ERROR(CLScaleFactorSymm8Kernel::validate(input, &scale_factor)); - - // Validate quantization symm8 kernel - const ITensorInfo &quantized_input = - TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_data_type( - DataType::QASYMM8_SIGNED)); - ARM_COMPUTE_RETURN_ON_ERROR( - CLQuantizationSymmetricKernel::validate(input, &scale_factor, &quantized_input)); - - // Fully Connected layer after a Fully Connected Layer without batches - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) != weights_to_use->dimension(1)); - - // Validate matrix multiply kernel - const ITensorInfo &gemmlowp_output = TensorInfo( - output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32)); - ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(quantized_input, *weights_to_use, gemmlowp_output)); - - // Multiply scale - ARM_COMPUTE_RETURN_ON_ERROR( - CLMultiplyScaleFactorKernel::validate(&gemmlowp_output, &scale_factor, output)); - - return Status{}; -} - -void CLFullyConnectedHybridLayer::run() -{ - prepare(); - - MemoryGroupResourceScope scope_mg(_memory_group); - - // Extract scale_factor - CLScheduler::get().enqueue(_scale_factor_kernel); - - // Quantize input - CLScheduler::get().enqueue(_quant_input_kernel); - - // Run matrix multiply - _mm_gemmlowp.run(); - - // Multiply scale factor - CLScheduler::get().enqueue(_multiply_scale_kernel); - - // Accumulate biases if provided - if (_accumulate_biases) - { - CLScheduler::get().enqueue(_accumulate_biases_kernel); - } -} - -void CLFullyConnectedHybridLayer::prepare() -{ - if (!_is_prepared) - { - ARM_COMPUTE_ERROR_ON(!_original_weights->is_used()); - - auto release_unused = [](CLTensor *w) { - if (!w->is_used()) - { - CLScheduler::get().queue().finish(); - w->allocator()->free(); - } - }; - - // Reshape of the weights if needed (happens only once) - if (!_are_weights_reshaped) - { - // Run reshape weights kernel and mark weights as unused - _reshape_weights_output.allocator()->allocate(); - _reshape_weights_kernel.run(); - - _are_weights_reshaped = true; - // We can not release _original_weights because it can be used in other nodes - } - - // Prepare GEMM prepare and release unused weights - _mm_gemmlowp.prepare(); - - // Release reshaped weights if unused - release_unused(&_reshape_weights_output); - - _is_prepared = true; - } -} diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp deleted file mode 100644 index 2ff4b9659..000000000 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp +++ /dev/null @@ -1,583 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h" - -#include "arm_compute/core/Size2D.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/misc/Cast.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/core/utils/quantization/AsymmHelpers.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "support/MemorySupport.h" - -#include <algorithm> - -namespace arm_compute -{ -using namespace arm_compute::misc::shape_calculator; -using namespace arm_compute::utils::cast; - -namespace -{ -Status construct_gemmlowp_output_stage(const ITensorInfo &input, const ITensorInfo &weights, - const ITensorInfo &output, - GEMMLowpOutputStageInfo &gemmlowp_output_stage) -{ - gemmlowp_output_stage.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT; - gemmlowp_output_stage.gemmlowp_offset = 0; - gemmlowp_output_stage.gemmlowp_multiplier = 0; - gemmlowp_output_stage.gemmlowp_shift = 0; - - // Configure output stage for quantized case - if (is_data_type_quantized_asymmetric(input.data_type())) - { - const UniformQuantizationInfo iq_info = input.quantization_info().uniform(); - const UniformQuantizationInfo wq_info = weights.quantization_info().uniform(); - const UniformQuantizationInfo oq_info = output.quantization_info().uniform(); - - const auto output_quant_info = (output.total_size() == 0) ? iq_info : oq_info; - - const float multiplier = (iq_info.scale * wq_info.scale) / output_quant_info.scale; - int output_multiplier = 0; - int output_shift = 0; - ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier_less_than_one( - multiplier, &output_multiplier, &output_shift)); - - // Set the GEMMLowp output stage info - gemmlowp_output_stage.gemmlowp_offset = output_quant_info.offset; - gemmlowp_output_stage.gemmlowp_multiplier = output_multiplier; - gemmlowp_output_stage.gemmlowp_shift = output_shift; - gemmlowp_output_stage.gemmlowp_min_bound = 0; - gemmlowp_output_stage.gemmlowp_max_bound = 255; - gemmlowp_output_stage.gemmlowp_multipliers.push_back(output_multiplier); - gemmlowp_output_stage.gemmlowp_shifts.push_back(output_shift); - } - - return Status{}; -} - -Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const ITensorInfo *bias, - const ITensorInfo &output, const FullyConnectedLayerInfo &fc_info) -{ - GEMMLowpOutputStageInfo gemmlowp_output_stage; - ARM_COMPUTE_RETURN_ON_ERROR( - construct_gemmlowp_output_stage(input, weights, output, gemmlowp_output_stage)); - - const GEMMInfo &gemm_info = GEMMInfo(false, // is_a_reshaped - false, // is_b_reshaped - true, // reshape_b_only_on_first_run - 0, // depth_output_gemm3d - false, // reinterpret_input_as_3d - fc_info.retain_internal_weights, // retain_internal_weights - gemmlowp_output_stage, // gemmlowp_output_stage - fc_info.fp_mixed_precision, // fp_mixed_precision - true, // broadcast_bias - ActivationLayerInfo()); // activation_info - - if (is_data_type_quantized_asymmetric(input.data_type())) - { - const UniformQuantizationInfo iq_info = input.quantization_info().uniform(); - const UniformQuantizationInfo wq_info = weights.quantization_info().uniform(); - - // Since we need negative offsets for computing convolution, we need to change - // QuantizationInfo() - // Extract and negate input and weights offset - const QuantizationInfo input_quantization_info(iq_info.scale, -iq_info.offset); - const QuantizationInfo weights_quantization_info(wq_info.scale, -wq_info.offset); - - // Validate gemmlowp function - ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixMultiplyCore::validate( - &input.clone()->set_quantization_info(input_quantization_info), - &weights.clone()->set_quantization_info(weights_quantization_info), bias, &output, - gemm_info)); - } - else - { - ARM_COMPUTE_RETURN_ON_ERROR( - CLGEMM::validate(&input, &weights, bias, &output, 1.f, 1.f, gemm_info)); - } - - return Status{}; -} -} // namespace - -void CLFullyConnectedLayerReshapeWeightsEx::configure(const ICLTensor *input, ICLTensor *output) -{ - auto k = support::cpp14::make_unique<CLTransposeKernel>(); - k->configure(input, output); - _kernel = std::move(k); -} - -Status CLFullyConnectedLayerReshapeWeightsEx::validate(const ITensorInfo *input, - const ITensorInfo *output) -{ - return CLTransposeKernel::validate(input, output); -} - -CLFullyConnectedLayerEx::CLFullyConnectedLayerEx(std::shared_ptr<IMemoryManager> memory_manager, - IWeightsManager *weights_manager) - : _memory_group(memory_manager), _weights_manager(weights_manager), _convert_weights(), - _convert_weights_managed(), _reshape_weights_managed_function(), _flatten_layer(), - _reshape_weights_function(), _mm_gemm(memory_manager, weights_manager), - _mm_gemmlowp(memory_manager), _flatten_output(), _converted_weights_output(), - _reshape_weights_output(), _are_weights_converted(true), _are_weights_reshaped(true), - _is_fc_after_conv(true), _is_quantized(false), _is_prepared(false), _original_weights(nullptr) -{ -} -void CLFullyConnectedLayerEx::configure_mm(const ICLTensor *input, const ICLTensor *weights, - const ICLTensor *bias, ICLTensor *output, - const FullyConnectedLayerInfo &fc_info) -{ - GEMMLowpOutputStageInfo gemmlowp_output_stage; - construct_gemmlowp_output_stage(*input->info(), *weights->info(), *output->info(), - gemmlowp_output_stage); - - const GEMMInfo &gemm_info = GEMMInfo(false, // is_a_reshaped - false, // is_b_reshaped - true, // reshape_b_only_on_first_run - 0, // depth_output_gemm3d - false, // reinterpret_input_as_3d - fc_info.retain_internal_weights, // retain_internal_weights - gemmlowp_output_stage, // gemmlowp_output_stage - fc_info.fp_mixed_precision, // fp_mixed_precision - true, // broadcast_bias - ActivationLayerInfo()); // activation_info - - if (_is_quantized) - { - // Since we need negative offsets for computing convolution, we need to change - // QuantizationInfo() - // Extract and negate input and weights offset - const QuantizationInfo input_quantization_info = input->info()->quantization_info(); - const QuantizationInfo weights_quantization_info = weights->info()->quantization_info(); - - input->info()->set_quantization_info(QuantizationInfo( - input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset)); - weights->info()->set_quantization_info(QuantizationInfo( - weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset)); - - // Configure gemmlowp function - _mm_gemmlowp.configure(input, weights, bias, output, gemm_info); - - // Revert back QuantizatioInfo as input and weights could be used in other fully connected - // layers - input->info()->set_quantization_info(input_quantization_info); - weights->info()->set_quantization_info(weights_quantization_info); - } - else - { - // Configure matrix multiply kernel - _mm_gemm.configure(input, weights, bias, output, 1.f, 1.f, gemm_info); - } -} - -void CLFullyConnectedLayerEx::configure_conv_fc(const ICLTensor *input, const ICLTensor *weights, - const ICLTensor *bias, ICLTensor *output, - const FullyConnectedLayerInfo &fc_info) -{ - ARM_COMPUTE_ERROR_ON( - (weights->info()->dimension(1) != - (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2)))); - - // If the fully connected layer is called after a convolution layer, the input tensor must be - // linearized - - // Initialize output tensor for flatten - TensorShape shape_flatten = compute_flatten_shape(input->info()); - _flatten_output.allocator()->init(input->info() - ->clone() - ->set_is_resizable(true) - .reset_padding() - .set_tensor_shape(shape_flatten) - .set_data_layout(DataLayout::NCHW)); - - // Configure flatten kernel - _memory_group.manage(&_flatten_output); - _flatten_layer.configure(input, &_flatten_output); - - // Configure matrix multiply kernel - configure_mm(&_flatten_output, weights, bias, output, fc_info); - - // Allocate the output tensor for flatten once all the configure methods have been called - _flatten_output.allocator()->allocate(); -} - -void CLFullyConnectedLayerEx::configure_fc_fc(const ICLTensor *input, const ICLTensor *weights, - const ICLTensor *bias, ICLTensor *output, - const FullyConnectedLayerInfo &fc_info) -{ - ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != weights->info()->dimension(1)); - - // Configure matrix multiply kernel - configure_mm(input, weights, bias, output, fc_info); -} - -void CLFullyConnectedLayerEx::configure(const ICLTensor *input, const ICLTensor *weights, - const ICLTensor *biases, ICLTensor *output, - FullyConnectedLayerInfo fc_info) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); - - // Perform validate step - ARM_COMPUTE_ERROR_THROW_ON(CLFullyConnectedLayerEx::validate( - input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(), - fc_info)); - - _are_weights_converted = true; - _are_weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true; - _is_fc_after_conv = true; - _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type()); - _is_prepared = fc_info.retain_internal_weights; - _original_weights = weights; - - if (_weights_manager) - { - _weights_manager->manage(weights); - } - - const ICLTensor *weights_to_use = weights; - - // With the Fully Connected layer we can have 4 different cases: - // 1) Convolution layer -> Fully Connected layer without batches - // 2) Fully Connected layer -> Fully Connected layer without batches - // 3) Convolution layer -> Fully Connected layer with batches - // 4) Fully Connected layer -> Fully Connected layer with batches - - // Check if we have a fully connected layer with batches - const bool is_batched_fc_layer = output->info()->dimension(1) > 1; - if (is_batched_fc_layer) - { - _is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) && - (std::equal(input->info()->tensor_shape().cbegin() + 3, - input->info()->tensor_shape().cend(), - output->info()->tensor_shape().cbegin() + 1)); - } - else - { - _is_fc_after_conv = input->info()->num_dimensions() > 1; - } - - // Reshape weights if needed - if (!_are_weights_reshaped) - { - if (_weights_manager && _weights_manager->are_weights_managed(weights)) - { - _reshape_weights_managed_function.configure(weights); - weights_to_use = utils::cast::polymorphic_downcast<ICLTensor *>( - _weights_manager->acquire(weights, &_reshape_weights_managed_function)); - } - else - { - // Reshape the weights - _reshape_weights_function.configure(weights, &_reshape_weights_output); - weights_to_use = &_reshape_weights_output; - } - } - - // Convert weights if needed - if (_is_fc_after_conv && (input->info()->data_layout() != fc_info.weights_trained_layout)) - { - if (_weights_manager && _weights_manager->are_weights_managed(weights_to_use)) - { - _convert_weights_managed.configure(weights_to_use, input->info()->tensor_shape(), - fc_info.weights_trained_layout); - weights_to_use = utils::cast::polymorphic_downcast<ICLTensor *>( - _weights_manager->acquire(weights, &_convert_weights_managed)); - } - else - { - // Convert weights - _convert_weights.configure(weights_to_use, &_converted_weights_output, - input->info()->tensor_shape(), fc_info.weights_trained_layout); - - weights_to_use = &_converted_weights_output; - } - _are_weights_converted = false; - } - - if (_is_fc_after_conv) - { - // Fully Connected layer after a Convolution Layer without batches - configure_conv_fc(input, weights_to_use, biases, output, fc_info); - } - else - { - // Fully Connected layer after a Fully Connected Layer without batches - configure_fc_fc(input, weights_to_use, biases, output, fc_info); - } -} - -Status CLFullyConnectedLayerEx::validate(const ITensorInfo *input, const ITensorInfo *weights, - const ITensorInfo *biases, const ITensorInfo *output, - FullyConnectedLayerInfo fc_info) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, - DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output); - ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 2); - - bool weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true; - bool is_fc_after_conv = true; - - const ITensorInfo &flatten_input = TensorInfo(input->clone() - ->set_is_resizable(true) - .reset_padding() - .set_tensor_shape(compute_flatten_shape(input)) - .set_data_layout(DataLayout::NCHW)); - const ITensorInfo &reshaped_weights = - TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape( - compute_transposed_shape(*weights))); - const ITensorInfo &converted_weights = - weights_reshaped ? TensorInfo(weights->clone()->set_is_resizable(true).reset_padding()) - : TensorInfo(*reshaped_weights.clone()); - - // With the Fully Connected layer we can have 4 different cases: - // 1) Convolution layer -> Fully Connected layer without batches - // 2) Fully Connected layer -> Fully Connected layer without batches - // 3) Convolution layer -> Fully Connected layer with batches - // 4) Fully Connected layer -> Fully Connected layer with batches - - const ITensorInfo *input_to_use = input; - const ITensorInfo *weights_to_use = weights; - - // Check if we have a fully connected layer with batches - const bool is_batched_fc_layer = output->dimension(1) > 1; - if (is_batched_fc_layer) - { - is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) && - (std::equal(input->tensor_shape().cbegin() + 3, input->tensor_shape().cend(), - output->tensor_shape().cbegin() + 1)); - } - else - { - is_fc_after_conv = input->num_dimensions() > 1; - } - - if (!weights_reshaped) - { - // Validate reshape weights kernel - ARM_COMPUTE_RETURN_ON_ERROR( - CLFullyConnectedLayerReshapeWeightsEx::validate(weights, &reshaped_weights)); - weights_to_use = &reshaped_weights; - } - - if (is_fc_after_conv && (input->data_layout() != fc_info.weights_trained_layout)) - { - // Validate convert weights kernel - ARM_COMPUTE_RETURN_ON_ERROR(CLConvertFullyConnectedWeights::validate( - weights_to_use, &converted_weights, input->tensor_shape(), fc_info.weights_trained_layout)); - weights_to_use = &converted_weights; - } - - if (is_fc_after_conv) - { - // Fully Connected layer after a Convolution Layer without batches - ARM_COMPUTE_RETURN_ERROR_ON( - (weights_to_use->dimension(1) != - (input->dimension(0) * input->dimension(1) * input->dimension(2)))); - - // Validate flatten kernel - ARM_COMPUTE_RETURN_ON_ERROR(CLFlattenLayer::validate(input, &flatten_input)); - input_to_use = &flatten_input; - } - else - { - // Fully Connected layer after a Fully Connected Layer without batches - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) != weights_to_use->dimension(1)); - } - - // Validate matrix multiply kernel - ARM_COMPUTE_RETURN_ON_ERROR( - validate_mm(*input_to_use, *weights_to_use, biases, *output, fc_info)); - - return Status{}; -} - -void CLFullyConnectedLayerEx::run() -{ - if (!_is_prepared) - { - if (!_are_weights_reshaped) - _reshape_weights_output.allocator()->allocate(); - if (!_are_weights_converted) - _converted_weights_output.allocator()->allocate(); - _is_prepared = true; - } - - { - if (!_weights_manager) - { - ARM_COMPUTE_ERROR_ON(!_original_weights->is_used()); - } - - // Pointer to current weights - const ICLTensor *cur_weights = _original_weights; - // Reshape of the weights - if (!_are_weights_reshaped) - { - if (_weights_manager && _weights_manager->are_weights_managed(cur_weights)) - { - _original_weights = utils::cast::polymorphic_downcast<ICLTensor *>( - _weights_manager->run(cur_weights, &_reshape_weights_managed_function)); - } - else - { - _reshape_weights_function.run(); - cur_weights = &_reshape_weights_output; - } - } - - // Convert weights if needed - if (!_are_weights_converted) - { - if (_weights_manager && _weights_manager->are_weights_managed(cur_weights)) - { - _weights_manager->run(cur_weights, &_convert_weights_managed); - } - else - { - _convert_weights.run(); - } - } - - // Prepare GEMM prepare - if (!_is_quantized) - { - _mm_gemm.prepare(); - } - } - - MemoryGroupResourceScope scope_mg(_memory_group); - - // Linearize input if it comes from a convolutional layer - if (_is_fc_after_conv) - { - _flatten_layer.run(); - } - - // Run matrix multiply - if (_is_quantized) - { - _mm_gemmlowp.run(); - } - else - { - _mm_gemm.run(); - } -} - -void CLFullyConnectedLayerEx::prepare() -{ -#if 0 // TODO Remove this block - if(!_is_prepared) - { - if(!_weights_manager) - { - ARM_COMPUTE_ERROR_ON(!_original_weights->is_used()); - } - - auto release_unused = [](CLTensor * w) - { - if(!w->is_used()) - { - CLScheduler::get().queue().finish(); - w->allocator()->free(); - } - }; - - // Pointer to current weights - const ICLTensor *cur_weights = _original_weights; - - // Reshape of the weights if needed (happens only once) - if(!_are_weights_reshaped) - { - if(_weights_manager && _weights_manager->are_weights_managed(_original_weights)) - { - cur_weights = utils::cast::polymorphic_downcast<ICLTensor *>(_weights_manager->run(cur_weights, &_reshape_weights_managed_function)); - } - else - { - // Run reshape weights kernel and mark weights as unused - _reshape_weights_output.allocator()->allocate(); - _reshape_weights_function.run(); - - cur_weights->mark_as_unused(); - cur_weights = &_reshape_weights_output; - } - _are_weights_reshaped = true; - } - - // Convert weights if needed (happens only once) - if(!_are_weights_converted) - { - if(_weights_manager && _weights_manager->are_weights_managed(cur_weights)) - { - _weights_manager->run(cur_weights, &_convert_weights_managed); - } - else - { - _converted_weights_output.allocator()->allocate(); - _convert_weights.run(); - cur_weights->mark_as_unused(); - } - - _are_weights_converted = true; - } - - // Release reshaped weights if unused - release_unused(&_reshape_weights_output); - - // Prepare GEMM prepare and release unused weights - if(!_is_quantized) - { - _mm_gemm.prepare(); - } - - // Release converted weights if unused - release_unused(&_reshape_weights_output); - release_unused(&_converted_weights_output); - - _is_prepared = true; - } -#endif -} -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp deleted file mode 100644 index 157b4d977..000000000 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h" - -#include <arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h> -#include <arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h> -#include <arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h> - -using namespace arm_compute; - -void CLFullyConnectedReshapingLayer::configure(const arm_compute::ICLTensor *input, - const arm_compute::ICLTensor *weights, - const arm_compute::ICLTensor *biases, - arm_compute::ICLTensor *output, bool needs_reshape, - const arm_compute::TensorShape &reshape, - KernelType kernel_type) -{ - _input = input; - _weights = weights; - _biases = biases; - _output = output; - _needs_reshape = needs_reshape; - - const ICLTensor *input_to_use = input; - if (_needs_reshape) - { - // reshape - auto_init_if_empty(*_cl_buffer.info(), - _input->info()->clone()->set_tensor_shape(reshape).set_data_layout( - _input->info()->data_layout())); - _cl_reshape.configure(_input, &_cl_buffer); - input_to_use = &_cl_buffer; - } - - _cl_fc = [&]() { - if (kernel_type == KernelType::GENERAL) - { - auto fc = new arm_compute::CLFullyConnectedLayerEx{_memory_manager}; - fc->configure(input_to_use, _weights, _biases, _output); - return std::unique_ptr<arm_compute::IFunction>(fc); - } - else if (kernel_type == KernelType::PREPROCESSED_WEIGHTS) - { - bool is_hybrid = (input->info()->data_type() == DataType::F32 || - input->info()->data_type() == DataType::F16) && - (weights->info()->data_type() == DataType::S8 || - weights->info()->data_type() == DataType::QASYMM8_SIGNED); - - if (is_hybrid) - { - auto fc = new arm_compute::CLFullyConnectedHybridLayer{_memory_manager}; - ITensorInfo *weights_info = const_cast<ITensorInfo *>(_weights->info()); - const auto orgin_weights_data_type = weights_info->data_type(); - weights_info->set_data_type(DataType::QASYMM8_SIGNED); - fc->configure(input_to_use, _weights, _biases, _output); - weights_info->set_data_type(orgin_weights_data_type); - return std::unique_ptr<arm_compute::IFunction>(fc); - } - else - { - auto fc = new arm_compute::CLFullyConnectedLayer{_memory_manager}; - fc->configure(input_to_use, _weights, _biases, _output); - return std::unique_ptr<arm_compute::IFunction>(fc); - } - } - else - { - throw std::runtime_error("CLFullyConnectedReshapingLayer: Unsupported kernel type"); - } - - }(); - - if (_needs_reshape) - { - // NOTE _cl_buffer is inaccessible from outside, and thus it is safe to invoke allocate here. - _cl_buffer.allocator()->allocate(); - } -} - -void CLFullyConnectedReshapingLayer::run(void) -{ - if (_needs_reshape) - _cl_reshape.run(); - - _cl_fc->run(); -} - -void CLFullyConnectedReshapingLayer::prepare(void) { _cl_fc->prepare(); } diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp deleted file mode 100644 index e0b833b04..000000000 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/CL/functions/CLGatherEx.h" - -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/kernels/CLGatherExKernel.h" - -using namespace arm_compute; - -void CLGatherEx::configure(const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, - int axis) -{ - auto k = support::cpp14::make_unique<CLGatherExKernel>(); - k->configure(input, indices, output, axis); - _kernel = std::move(k); -} - -Status CLGatherEx::validate(const ITensorInfo *input, const ITensorInfo *indices, - const ITensorInfo *output, int axis) -{ - return CLGatherExKernel::validate(input, indices, output, axis); -} diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp deleted file mode 100644 index 65b89a389..000000000 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/CL/functions/CLHashtableLookup.h" - -#include "arm_compute/core/CL/kernels/CLHashtableLookupKernel.h" - -using namespace arm_compute; - -void CLHashtableLookup::configure(const ICLTensor *lookups, const ICLTensor *keys, - const ICLTensor *input, ICLTensor *output, ICLTensor *hits) -{ - auto k = support::cpp14::make_unique<CLHashtableLookupKernel>(); - k->configure(lookups, keys, input, output, hits); - _kernel = std::move(k); -} diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp deleted file mode 100644 index 5a7e40839..000000000 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/CL/functions/CLInstanceNormalizationLayerEx.h" - -#include "arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -CLInstanceNormalizationLayerEx::CLInstanceNormalizationLayerEx() {} - -void CLInstanceNormalizationLayerEx::configure(ICLTensor *input, ICLTensor *output, - ICLTensor *gamma, ICLTensor *beta, float epsilon) -{ - auto k = support::cpp14::make_unique<CLInstanceNormalizationLayerKernelEx>(); - k->configure(input, output, gamma, beta, epsilon); - _kernel = std::move(k); -} - -Status CLInstanceNormalizationLayerEx::validate(const ITensorInfo *input, const ITensorInfo *output, - const ITensorInfo *gamma, const ITensorInfo *beta, - float epsilon) -{ - return CLInstanceNormalizationLayerKernelEx::validate(input, output, gamma, beta, epsilon); -} -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp deleted file mode 100644 index 28e5bc0da..000000000 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/CL/functions/CLNeg.h" - -#include "arm_compute/core/CL/kernels/CLNegKernel.h" - -using namespace arm_compute; - -void CLNeg::configure(ICLTensor *input, ICLTensor *output) -{ - auto k = arm_compute::support::cpp14::make_unique<CLNegKernel>(); - k->configure(input, output); - _kernel = std::move(k); -} diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLOneHot.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLOneHot.cpp deleted file mode 100644 index aa9f32ec6..000000000 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLOneHot.cpp +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/CL/functions/CLOneHot.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/kernels/CLOneHotKernel.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "support/MemorySupport.h" -namespace arm_compute -{ -CLOneHot::CLOneHot() : _memset_kernel(), _onehot_kernel(), _has_to_memset(false) {} -void CLOneHot::configure(const ICLTensor *indices, const ICLTensor *on_value, - const ICLTensor *off_value, ICLTensor *output, int depth, int axis) -{ - _onehot_kernel.configure(indices, on_value, off_value, output, depth, axis); -} -void CLOneHot::configure(const ICLTensor *indices, const ICLTensor *on_value, ICLTensor *output, - PixelValue off_value, int depth, int axis) -{ - _has_to_memset = true; - _memset_kernel.configure(output, off_value); - _onehot_kernel.configure(indices, on_value, output, depth, axis); -} -Status CLOneHot::validate(const ITensorInfo *indices, const ITensorInfo *on_value, - const ITensorInfo *off_value, const ITensorInfo *output, int depth, - int axis) -{ - return CLOneHotKernel::validate(indices, on_value, off_value, output, depth, axis); -} -void CLOneHot::run() -{ - if (_has_to_memset) - { - CLScheduler::get().enqueue(_memset_kernel, true); - } - - CLScheduler::get().enqueue(_onehot_kernel, false); -} -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp deleted file mode 100644 index b198e7330..000000000 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/CL/functions/CLReduceOperation.h" - -#include "arm_compute/core/CL/kernels/CLReduceOperationKernel.h" -#include "arm_compute/core/TensorShape.h" -#include "arm_compute/runtime/CL/CLScheduler.h" - -using namespace arm_compute; - -CLReduceOperation::CLReduceOperation(std::shared_ptr<IMemoryManager> memory_manager) - : _memory_group(std::move(memory_manager)), _input(nullptr), _output(nullptr), _axis(), - _keep_dims(false), _interm_tensors(), _reduce_kernels(), _reshape() -{ -} - -Status CLReduceOperation::validate(const ITensorInfo *input, const ITensorInfo *output, - const std::set<uint32_t> &axis, bool keep_dims, - const ReduceOperation &op) -{ - const size_t num_of_kernels = axis.size(); - const size_t num_of_interm_tensors = num_of_kernels - (keep_dims ? 1 : 0); - - ARM_COMPUTE_RETURN_ERROR_ON(num_of_kernels < 1); - - // Create temporary tensor infos - auto interm_tensors = support::cpp14::make_unique<TensorInfo[]>(num_of_interm_tensors); - - // Create intermediate tensor info - TensorShape shape{input->tensor_shape()}; - - auto it = axis.begin(); - for (size_t i = 0; i < num_of_interm_tensors; ++i, ++it) - { - shape.set(*it, 1, false); - interm_tensors[i].set_data_type(input->data_type()); - interm_tensors[i].set_tensor_shape(shape); - interm_tensors[i].set_num_channels(input->num_channels()); - interm_tensors[i].set_data_layout(input->data_layout()); - interm_tensors[i].set_quantization_info(input->quantization_info()); - } - - // Set a vector that is ordered ITensorInfo sequentially. - std::vector<const ITensorInfo *> tensors; - tensors.emplace_back(input); - for (size_t i = 0; i < num_of_interm_tensors; ++i) - { - tensors.emplace_back(interm_tensors.get() + i); - } - tensors.emplace_back(output); - - // Validate ReduceOperation only on all kernels - it = axis.begin(); - for (size_t i = 0; i < num_of_kernels; ++i, ++it) - { - ARM_COMPUTE_RETURN_ON_ERROR( - CLReduceOperationKernel::validate(tensors[i], tensors[i + 1], *it, op)); - } - - if (!keep_dims) - { - ARM_COMPUTE_RETURN_ON_ERROR( - CLReshapeLayer::validate(&interm_tensors[num_of_interm_tensors - 1], output)); - } - - return Status{}; -} - -void CLReduceOperation::configure(ICLTensor *input, ICLTensor *output, - const std::set<uint32_t> &axis, bool keep_dims, - ReduceOperation op) -{ - ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), output->info(), axis, keep_dims, op)); - - _axis = axis; - - _input = input; - _output = output; - _keep_dims = keep_dims; - - // NOTE The axis must have no duplication. - const size_t num_of_kernels = axis.size(); - const size_t num_of_interm_tensors = num_of_kernels - (keep_dims ? 1 : 0); - - if (num_of_kernels < 1) - { - throw std::runtime_error("CLReduceOperation: there is no axis to reduce"); - } - - _interm_tensors = support::cpp14::make_unique<CLTensor[]>(num_of_interm_tensors); - _reduce_kernels = support::cpp14::make_unique<CLReduceOperationKernel[]>(num_of_kernels); - - // Set a vector that is ordered ICLTensors sequentially. - std::vector<ICLTensor *> tensors; - tensors.emplace_back(input); - for (size_t i = 0; i < num_of_interm_tensors; ++i) - { - tensors.emplace_back(_interm_tensors.get() + i); - } - tensors.emplace_back(output); - - // Apply ReduceOperation on all kernels - TensorShape shape{input->info()->tensor_shape()}; - auto it = axis.begin(); - for (size_t i = 0; i < num_of_kernels; ++i, ++it) - { - shape.set(*it, 1, false); - if (!keep_dims || i != (num_of_kernels - 1)) - { - _interm_tensors[i].allocator()->init(input->info()->clone()->set_tensor_shape(shape)); - _memory_group.manage(&_interm_tensors[i]); - } - _reduce_kernels[i].configure(tensors[i], tensors[i + 1], *it, op); - if (i != 0) - { - _interm_tensors[i - 1].allocator()->allocate(); - } - } - - // Configure reshape layer if we want to drop the dimensions - if (!keep_dims) - { - _reshape.configure(&_interm_tensors[num_of_interm_tensors - 1], output); - _interm_tensors[num_of_interm_tensors - 1].allocator()->allocate(); - } -} - -void CLReduceOperation::run() -{ - MemoryGroupResourceScope scope_mg(_memory_group); - - const size_t num_of_kernels = _axis.size(); - for (size_t i = 0; i < num_of_kernels; ++i) - { - CLScheduler::get().enqueue(_reduce_kernels[i]); - } - - if (!_keep_dims) - { - _reshape.run(); - } -} diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLSplitVEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLSplitVEx.cpp deleted file mode 100644 index a502f032e..000000000 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLSplitVEx.cpp +++ /dev/null @@ -1,196 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/CL/functions/CLSplitVEx.h" -#include "support/ToolchainSupport.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include <cassert> - -using namespace arm_compute; - -namespace -{ -Status validate_arguments(const ICLTensor *size_splits, const std::vector<ICLTensor *> &outputs, - unsigned int num_splits) -{ - ARM_COMPUTE_RETURN_ERROR_ON_MSG(size_splits->info()->num_dimensions() != 1, - "size_splits must be a 1-D tensor."); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(num_splits != outputs.size(), - "Number of output tensors does not match number of splits."); - return Status{}; -} - -Status validate_slices(const ITensorInfo *input, const std::vector<ITensorInfo *> &outputs, - uint32_t split_dim) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input); - ARM_COMPUTE_RETURN_ERROR_ON(split_dim >= input->num_dimensions()); - ARM_COMPUTE_RETURN_ERROR_ON(outputs.size() < 2); - - // Start/End coordinates - Coordinates start_coords; - Coordinates end_coords; - for (unsigned int d = 0; d < input->num_dimensions(); ++d) - { - end_coords.set(d, -1); - } - unsigned int axis_offset = 0; - // Validate output tensors - for (const auto &output : outputs) - { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output); - // Get output shape - const TensorShape output_shape = output->tensor_shape(); - ARM_COMPUTE_RETURN_ERROR_ON(output_shape.total_size() == 0); - - const size_t axis_split_step = output_shape[split_dim]; - - // Output auto inizialitation if not yet initialized - TensorInfo tmp_output_info = *output->clone(); - auto_init_if_empty(tmp_output_info, - input->clone()->set_is_resizable(true).set_tensor_shape(output_shape)); - - // Update coordinate on axis - start_coords.set(split_dim, axis_offset); - end_coords.set(split_dim, axis_offset + axis_split_step); - - ARM_COMPUTE_RETURN_ON_ERROR(CLSlice::validate(input, output, start_coords, end_coords)); - - axis_offset += axis_split_step; - } - - return Status{}; -} - -void configure_slices(const ICLTensor *input, const std::vector<ICLTensor *> &outputs, - std::vector<CLSlice> &_slice_functions, uint32_t split_dim) -{ - unsigned int axis_offset = 0; - // Start/End coordinates - Coordinates start_coords; - Coordinates end_coords; - for (unsigned int d = 0; d < input->info()->num_dimensions(); ++d) - { - end_coords.set(d, -1); - } - int out_iter = 0; - for (const auto &output : outputs) - { - const TensorShape output_shape = output->info()->tensor_shape(); - auto op_size = output_shape.total_size(); - if (!op_size) - { - continue; - } - - assert(op_size != 0); - assert(split_dim <= output_shape.num_dimensions()); - - const size_t axis_split_step = output_shape[split_dim]; - - // Output auto inizialitation if not yet initialized - TensorInfo tmp_output_info = *output->info()->clone(); - auto_init_if_empty( - tmp_output_info, - input->info()->clone()->set_is_resizable(true).set_tensor_shape(output_shape)); - - // Update coordinate on axis - start_coords.set(split_dim, axis_offset); - end_coords.set(split_dim, axis_offset + axis_split_step); - - // Configure slice function - _slice_functions[out_iter].configure(input, output, start_coords, end_coords); - - // Set valid region from shape - outputs[out_iter++]->info()->set_valid_region(ValidRegion(Coordinates(), output_shape)); - axis_offset += axis_split_step; - } -} - -} // namespace - -CLSplitVEx::CLSplitVEx() - : _input(nullptr), _size_splits(nullptr), _outputs(), _num_splits(0), _slice_functions() -{ -} - -void CLSplitVEx::configure(const ICLTensor *input, const ICLTensor *size_splits, uint32_t split_dim, - const std::vector<ICLTensor *> &outputs, unsigned int num_splits) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, size_splits); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(size_splits, outputs, num_splits)); - - _input = input; - _size_splits = size_splits; - _outputs = outputs; - _num_splits = num_splits; - - // Create tensor slices - _slice_functions.resize(_num_splits); - - // Extract output tensor info - std::vector<ITensorInfo *> outputs_info; - for (auto &output : _outputs) - { - ARM_COMPUTE_ERROR_ON_NULLPTR(output); - outputs_info.emplace_back(output->info()); - } - - // Validate slices - ARM_COMPUTE_ERROR_THROW_ON(validate_slices(_input->info(), outputs_info, split_dim)); - - // Configure slices - configure_slices(_input, _outputs, _slice_functions, split_dim); -} - -void CLSplitVEx::run() -{ - // execute the slices - for (unsigned i = 0; i < _outputs.size(); ++i) - { - _slice_functions[i].run(); - } -} diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp deleted file mode 100644 index 3ac95a8e6..000000000 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp +++ /dev/null @@ -1,335 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/CL/functions/CLTopKV2.h" -#include "arm_compute/runtime/CL/CLScheduler.h" - -#include "arm_compute/core/CL/ICLTensor.h" - -#include "../../topk_v2.h" - -namespace arm_compute -{ - -CLTopKV2::CLTopKV2() - : _k(0), _total_bits(0), _bits(0), _radix(0), _hist_buf_size(0), _glob_sum_buf_size(0), _n(0), - _input(nullptr), _values(nullptr), _indices(nullptr), _qs_idx_buf(), _qs_temp_buf(), - _hist_buf(), _glob_sum_buf(), _temp_buf(), _first_negative_idx_buf(), _in_key_buf(), - _out_key_buf(), _in_ind_buf(), _out_ind_buf(), _p_in_key_buf(nullptr), - _p_out_key_buf(nullptr), _p_in_ind_buf(nullptr), _p_out_ind_buf(nullptr) /*, _qs_kernel(), - _init_kernel(), _hist_kernel(), _scan_hist_kernel(), _glob_scan_hist_kernel(), - _paste_hist_kernel(), _reorder_kernel(), _find_first_negative_kernel(), - _reorder_negatives_kernel(), _store_kernel()*/ -{ -} - -void CLTopKV2::configure(ICLTensor *input, int k, ICLTensor *values, ICLTensor *indices, - int total_bits, int bits) -{ - _total_bits = total_bits; - _bits = bits; - _n = input->info()->tensor_shape()[0]; - - // _total_bits should be divided by _bits. - ARM_COMPUTE_ERROR_ON((_total_bits % _bits) != 0); - - _k = k; - _radix = 1 << bits; - - _input = input; - _values = values; - _indices = indices; - - std::string topk_env; - -// Disable GPU implementation -// TODO Enable GPU implementation with verification, or remove code -// Invalid result on GPU -#if 0 - char *env = getenv("ACL_TOPKV2"); - if (env) - topk_env = env; - - if (topk_env == "GPU_SINGLE") - { - _qs_idx_buf = cl::Buffer(CLScheduler::get().context(), - CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_int) * _n); - _qs_temp_buf = cl::Buffer(CLScheduler::get().context(), - CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_int) * _n); - - _qs_kernel.configure(input, values, indices, &_qs_idx_buf, &_qs_temp_buf, k, _n); - } - else if (topk_env == "GPU") - { - // n should be divided by (_GROUPS * _ITEMS) - ARM_COMPUTE_ERROR_ON((_n % (_GROUPS * _ITEMS)) != 0); - - _hist_buf_size = _radix * _GROUPS * _ITEMS; - _glob_sum_buf_size = _HISTOSPLIT; - - _hist_buf = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, - sizeof(cl_int) * _hist_buf_size); - _glob_sum_buf = - cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, - sizeof(cl_int) * _glob_sum_buf_size); - _temp_buf = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, - sizeof(cl_int) * _glob_sum_buf_size); - _first_negative_idx_buf = cl::Buffer(CLScheduler::get().context(), - CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_int)); - _in_key_buf = cl::Buffer(CLScheduler::get().context(), - CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_float) * _n); - _out_key_buf = cl::Buffer(CLScheduler::get().context(), - CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_float) * _n); - _in_ind_buf = cl::Buffer(CLScheduler::get().context(), - CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_int) * _n); - _out_ind_buf = cl::Buffer(CLScheduler::get().context(), - CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_int) * _n); - - _p_in_key_buf = &_in_key_buf; - _p_out_key_buf = &_out_key_buf; - _p_in_ind_buf = &_in_ind_buf; - _p_out_ind_buf = &_out_ind_buf; - - _init_kernel.configure(input, _p_in_key_buf, _p_in_ind_buf, _n); - _hist_kernel.configure(&_hist_buf, bits, _n); - _scan_hist_kernel.configure(&_hist_buf, &_glob_sum_buf, bits); - _glob_scan_hist_kernel.configure(&_glob_sum_buf, &_temp_buf, bits); - _paste_hist_kernel.configure(&_hist_buf, &_glob_sum_buf, bits); - _reorder_kernel.configure(&_hist_buf, bits, _n); - _find_first_negative_kernel.configure(&_first_negative_idx_buf, _n); - _reorder_negatives_kernel.configure(&_first_negative_idx_buf, _n); - _store_kernel.configure(values, indices, k, _n); - } - else -#endif // Disable GPU implementation - { - // DO NOTHING for CPU. - } -} - -void CLTopKV2::run() -{ - std::string topk_env; -#if 0 - char *env = getenv("ACL_TOPKV2"); - if (env) - topk_env = env; - - if (topk_env == "GPU_SINGLE") - { - run_on_gpu_single_quicksort(); - } - else if (topk_env == "GPU") - { - run_on_gpu(); - } - else -#endif - { - run_on_cpu(); - } -} - -#if 0 -void CLTopKV2::run_on_gpu_single_quicksort() -{ - // This is a single threaded quick sort implementation. - CLScheduler::get().enqueue(_qs_kernel, false); - - arm_compute::CLScheduler::get().sync(); -} - -void CLTopKV2::run_on_gpu() -{ - cl::CommandQueue q = CLScheduler::get().queue(); - - // 1. CLTopKV2Init set key buffer and index buffer. - // - Key buffer is set as the same value of the layer's input - // - Values in the index buffer are set as their indices. - CLScheduler::get().enqueue(_init_kernel, false); - - int n_passes = _total_bits / _bits; - - // 2. Repeat (total_bits/bits) times. - // - total_bits is the number of bits of the data type (e.g., 32 for float) - // - bits defines number of buckets (e.g. 16 buckets where bit is 4) - for (int pass = 0; pass < n_passes; ++pass) - { - arm_compute::CLScheduler::get().sync(); - - // 2.1. Calculate histogram with _GROUPS * _ITEMS threads - _hist_kernel.setPass(pass, _p_in_key_buf); - CLScheduler::get().enqueue(_hist_kernel, false); - - // 2.2. Calculate prefix sum locally with multiple threads - CLScheduler::get().enqueue(_scan_hist_kernel, false); - // 2.3. Calculate prefix sum within a work group - CLScheduler::get().enqueue(_glob_scan_hist_kernel, false); - // 2.4. Calculate global prefix sum - CLScheduler::get().enqueue(_paste_hist_kernel, false); - - // 2.5. Reorder keys and indices based on the global prefix sum - _reorder_kernel.setPass(pass, _p_in_key_buf, _p_out_key_buf, _p_in_ind_buf, _p_out_ind_buf); - CLScheduler::get().enqueue(_reorder_kernel, false); - - cl::Buffer *tmp; - // swap key buffers - tmp = _p_in_key_buf; - _p_in_key_buf = _p_out_key_buf; - _p_out_key_buf = tmp; - - // swap index buffers - tmp = _p_in_ind_buf; - _p_in_ind_buf = _p_out_ind_buf; - _p_out_ind_buf = tmp; - } - - // 3. Get the first negative index - // Because we swap in_buf and out_buf at the end of the above for loop, - // the output buffers are in bufs. - _find_first_negative_kernel.setOutputBuffer(_p_in_key_buf); - CLScheduler::get().enqueue(_find_first_negative_kernel, false); - - // 4. Correct odering of negatives - // - Since radix sort does not consider negatives, negatives are considered as bigger values - // than positives. - // reordered data will be stored in _p_out_key_buf and _p_out_ind_buf - _reorder_negatives_kernel.setBuffers(_p_in_key_buf, _p_out_key_buf, _p_in_ind_buf, - _p_out_ind_buf); - CLScheduler::get().enqueue(_reorder_negatives_kernel, false); - - // 5. Extract top k values from sorted keys and indices. - _store_kernel.setOutputBuffers(_p_out_key_buf, _p_out_ind_buf); - CLScheduler::get().enqueue(_store_kernel, false); - - arm_compute::CLScheduler::get().sync(); - -#if 0 - // below code is left for debugging. - int first_neg; - q.enqueueReadBuffer(_first_negative_idx_buf, CL_TRUE, 0, sizeof(cl_int), &first_neg); - std::cout << "first neg = " << first_neg << std::endl; - - float in_key[_n]; - q.enqueueReadBuffer(*_p_in_key_buf, CL_TRUE, 0, sizeof(cl_float)*_n, in_key); - for(uint32_t i = 0 ; i < _n; ++i) { - std::cout << "in_key[" << i << "] = " << in_key[i] << std::endl; - } - - float out_key[_n]; - q.enqueueReadBuffer(*_p_out_key_buf, CL_TRUE, 0, sizeof(cl_float)*_n, out_key); - for(uint32_t i = 0 ; i < _n; ++i) { - std::cout << "out_key[" << i << "] = " << out_key[i] << std::endl; - } - - int in_ind[_n]; - q.enqueueReadBuffer(*_p_in_ind_buf, CL_TRUE, 0, sizeof(cl_int)*_n, in_ind); - for(uint32_t i = 0 ; i < _n; ++i) { - std::cout << "in_ind[" << i << "] = " << in_ind[i] << std::endl; - } - - int out_ind[_n]; - q.enqueueReadBuffer(*_p_out_ind_buf, CL_TRUE, 0, sizeof(cl_int)*_n, out_ind); - for(uint32_t i = 0 ; i < _n; ++i) { - std::cout << "out_ind[" << i << "] = " << out_ind[i] << std::endl; - } - - int hist_buf[_hist_buf_size]; - q.enqueueReadBuffer(_hist_buf, CL_TRUE, 0, sizeof(cl_int)*_hist_buf_size, hist_buf); - for(uint32_t i = 0 ; i < _hist_buf_size; ++i) { - std::cout << "hist_buf[" << i << "] = " << hist_buf[i] << std::endl; - } - - int glob_sum_buf[_glob_sum_buf_size]; - q.enqueueReadBuffer(_glob_sum_buf, CL_TRUE, 0, sizeof(cl_int)*_glob_sum_buf_size, glob_sum_buf); - for(uint32_t i = 0 ; i < _glob_sum_buf_size; ++i) { - std::cout << "glob_sum_buf[" << i << "] = " << glob_sum_buf[i] << std::endl; - } - -#endif -} -#endif // Disable GPU implementation - -void CLTopKV2::run_on_cpu() -{ - cl::CommandQueue q = CLScheduler::get().queue(); - // const Window& w = _topkv2_kernel.window(); - - _input->map(q); - _values->map(q); - _indices->map(q); - - // int row_size = (w[0].end() - w[0].start()) / w[0].step(); - int row_size = _input->info()->tensor_shape()[0]; - int rank = _input->info()->num_dimensions(); - - if (rank > 2) - throw std::runtime_error("Not supported type."); - - int row_num = (rank == 2 ? _input->info()->tensor_shape()[1] : 1); - - if (_input->info()->data_type() == DataType::F32) - { - nnfw::rt::optimized_ops::TopK<float>(row_size, row_num, (float *)_input->buffer(), _k, - (int32 *)_indices->buffer(), (float *)_values->buffer()); - } - else if (_input->info()->data_type() == DataType::S32) - { - nnfw::rt::optimized_ops::TopK<int32_t>(row_size, row_num, (int32_t *)_input->buffer(), _k, - (int32 *)_indices->buffer(), - (int32_t *)_values->buffer()); - } - else if (_input->info()->data_type() == DataType::QASYMM8) - { - nnfw::rt::optimized_ops::TopK<uint8_t>(row_size, row_num, (uint8_t *)_input->buffer(), _k, - (int32 *)_indices->buffer(), - (uint8_t *)_values->buffer()); - } - else - { - throw std::runtime_error("Not supported type."); - } - - _input->unmap(q); - _values->unmap(q); - _indices->unmap(q); -} - -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp deleted file mode 100644 index 3215d01a7..000000000 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp +++ /dev/null @@ -1,160 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/CL/functions/CLTransposeConvLayer.h" - -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/core/utils/quantization/AsymmHelpers.h" -#include "arm_compute/runtime/CL/CLScheduler.h" - -#include <cmath> -#include <memory> -#include <tuple> - -using namespace arm_compute; -using namespace arm_compute::misc::shape_calculator; - -CLTransposeConvLayer::CLTransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager) - : _memory_manager(std::move(memory_manager)), _function() -{ -} - -void CLTransposeConvLayer::configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, - ICLTensor *output, const PadStrideInfo &deconv_info, - unsigned int invalid_right, unsigned int invalid_bottom, - const WeightsInfo &weights_info) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, weights, bias, output, deconv_info, - invalid_right, invalid_bottom, weights_info); -} - -void CLTransposeConvLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, - ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, - const PadStrideInfo &deconv_info, unsigned int invalid_right, - unsigned int invalid_bottom, const WeightsInfo &weights_info) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); - - switch (CLTransposeConvLayer::get_deconvolution_method(input->info(), weights->info(), nullptr, - output->info(), deconv_info, invalid_right, - invalid_bottom, weights_info)) - { - case DeconvolutionMethod::DIRECT: - { - auto f = arm_compute::support::cpp14::make_unique<CLDirectTransposeConvLayer>(); - f->configure(compile_context, input, weights, bias, output, deconv_info, invalid_right, - invalid_bottom, weights_info); - _function = std::move(f); - break; - } - case DeconvolutionMethod::GEMM: - { - auto f = arm_compute::support::cpp14::make_unique<CLGEMMDeconvolutionLayer>(_memory_manager); - f->configure(compile_context, input, weights, bias, output, deconv_info); - _function = std::move(f); - break; - } - default: - ARM_COMPUTE_ERROR("Not supported."); - break; - } -} - -Status CLTransposeConvLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, - const ITensorInfo *bias, ITensorInfo *output, - const PadStrideInfo &deconv_info, unsigned int invalid_right, - unsigned int invalid_bottom, const WeightsInfo &weights_info) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); - switch (CLTransposeConvLayer::get_deconvolution_method( - input, weights, bias, output, deconv_info, invalid_right, invalid_bottom, weights_info)) - { - case DeconvolutionMethod::DIRECT: - { - // Validate direct convolution layer - ARM_COMPUTE_RETURN_ON_ERROR(CLDirectTransposeConvLayer::validate( - input, weights, bias, output, deconv_info, invalid_right, invalid_bottom, weights_info)); - break; - } - case DeconvolutionMethod::GEMM: - { - // Validate gemm-based convolution layer - ARM_COMPUTE_RETURN_ON_ERROR( - CLGEMMDeconvolutionLayer::validate(input, weights, bias, output, deconv_info)); - break; - } - default: - ARM_COMPUTE_ERROR("Not supported."); - break; - } - - return Status{}; -} - -DeconvolutionMethod CLTransposeConvLayer::get_deconvolution_method( - const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, - ITensorInfo *output, const PadStrideInfo &deconv_info, unsigned int invalid_right, - unsigned int invalid_bottom, const WeightsInfo &weights_info) -{ - ARM_COMPUTE_UNUSED(output, bias, weights_info); - - const DataLayout data_layout = input->data_layout(); - - const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); - - if (weights->dimension(idx_w) != deconv_info.stride().first || - weights->dimension(idx_h) != deconv_info.stride().second || invalid_right != 0 || - invalid_bottom != 0) - { - return DeconvolutionMethod::DIRECT; - } - - return DeconvolutionMethod::GEMM; -} - -void CLTransposeConvLayer::run() -{ - prepare(); - _function->run(); -} - -void CLTransposeConvLayer::prepare() { _function->prepare(); } diff --git a/compute/ARMComputeEx/src/runtime/NEON/NEFunctionsEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/NEFunctionsEx.cpp deleted file mode 100644 index 80fbf359d..000000000 --- a/compute/ARMComputeEx/src/runtime/NEON/NEFunctionsEx.cpp +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "arm_compute/runtime/NEON/NEFunctionsEx.h" - -// NOTE This empty file aims to validate "NEFunctionsEx.h". -// DO NOT REMOVE this file. diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEActivationLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEActivationLayerEx.cpp deleted file mode 100644 index 2752eb6aa..000000000 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEActivationLayerEx.cpp +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/NEON/functions/NEActivationLayerEx.h" - -#include "arm_compute/core/NEON/kernels/NEActivationLayerKernelEx.h" -#include "arm_compute/runtime/IRuntimeContext.h" -#include "support/MemorySupport.h" - -namespace arm_compute -{ -NEActivationLayerEx::NEActivationLayerEx(IRuntimeContext *ctx) // NOLINT - : INESimpleFunctionNoBorder(ctx) -{ -} -void NEActivationLayerEx::configure(ITensor *input, ITensor *output, - ActivationLayerInfo activation_info) -{ - auto k = support::cpp14::make_unique<NEActivationLayerKernelEx>(); - k->configure(input, output, activation_info); - _kernel = std::move(k); -} - -Status NEActivationLayerEx::validate(const ITensorInfo *input, const ITensorInfo *output, - const ActivationLayerInfo &act_info) -{ - return NEActivationLayerKernelEx::validate(input, output, act_info); -} -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp deleted file mode 100644 index 2fc94b267..000000000 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h" -#include <arm_compute/core/NEON/kernels/NEBinaryLogicalOperationKernel.h> - -#include "arm_compute/core/ITensor.h" -#include "support/MemorySupport.h" - -#include <utility> - -namespace arm_compute -{ - -template <BinaryLogicalOperation COP> -void NEBinaryLogicalOperationStatic<COP>::configure(ITensor *input1, ITensor *input2, - ITensor *output) -{ - auto k = support::cpp14::make_unique<NEBinaryLogicalOperationKernel>(); - k->configure(COP, input1, input2, output); - _kernel = std::move(k); -} - -template <BinaryLogicalOperation COP> -Status NEBinaryLogicalOperationStatic<COP>::validate(const ITensorInfo *input1, - const ITensorInfo *input2, - const ITensorInfo *output) -{ - return NEBinaryLogicalOperationKernel::validate(COP, input1, input2, output); -} - -void NEBinaryLogicalOperation::configure(ITensor *input1, ITensor *input2, ITensor *output, - BinaryLogicalOperation op) -{ - auto k = support::cpp14::make_unique<NEBinaryLogicalOperationKernel>(); - k->configure(op, input1, input2, output); - _kernel = std::move(k); -} - -Status NEBinaryLogicalOperation::validate(const ITensorInfo *input1, const ITensorInfo *input2, - const ITensorInfo *output, BinaryLogicalOperation op) -{ - return NEBinaryLogicalOperationKernel::validate(op, input1, input2, output); -} - -// Supported Specializations -template class NEBinaryLogicalOperationStatic<BinaryLogicalOperation::AND>; -template class NEBinaryLogicalOperationStatic<BinaryLogicalOperation::OR>; -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NECastBool.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NECastBool.cpp deleted file mode 100644 index 6ad3e1b12..000000000 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NECastBool.cpp +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NECastBool.h" - -#include "arm_compute/core/NEON/kernels/NECastBoolKernel.h" -#include "support/MemorySupport.h" - -using namespace arm_compute; - -void NECastBool::configure(const ITensor *input, ITensor *output) -{ - auto k = arm_compute::support::cpp14::make_unique<NECastBoolKernel>(); - k->configure(input, output); - _kernel = std::move(k); -} - -Status NECastBool::validate(const ITensorInfo *input, const ITensorInfo *output) -{ - return NECastBoolKernel::validate(input, output); -} diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp deleted file mode 100644 index e0ab3e025..000000000 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h" - -#include "arm_compute/core/NEON/kernels/NEEmbeddingLookupKernel.h" -#include "support/MemorySupport.h" - -using namespace arm_compute; - -void NEEmbeddingLookup::configure(const ITensor *input, ITensor *output, const ITensor *lookups) -{ - auto k = support::cpp14::make_unique<NEEmbeddingLookupKernel>(); - k->configure(input, output, lookups); - _kernel = std::move(k); -} diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp deleted file mode 100644 index a123439d9..000000000 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp +++ /dev/null @@ -1,300 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h" - -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Size2D.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/core/utils/quantization/AsymmHelpers.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" - -#include <algorithm> -#include <cmath> - -using namespace arm_compute; -using namespace arm_compute::misc::shape_calculator; - -namespace -{ -Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const ITensorInfo &output) -{ - ARM_COMPUTE_RETURN_ON_ERROR( - NEGEMMLowpMatrixMultiplyCore::validate(&input, &weights, nullptr, &output)); - - return Status{}; -} -} // namespace - -void NEFullyConnectedHybridLayerReshapeWeights::configure(const ITensor *input, ITensor *output) -{ - auto k = support::cpp14::make_unique<NETransposeKernel>(); - k->configure(input, output); - _kernel = std::move(k); -} - -Status NEFullyConnectedHybridLayerReshapeWeights::validate(const ITensorInfo *input, - const ITensorInfo *output) -{ - return NETransposeKernel::validate(input, output); -} - -NEFullyConnectedHybridLayer::NEFullyConnectedHybridLayer( - std::shared_ptr<IMemoryManager> memory_manager) - : _memory_group(std::move(memory_manager)), _reshape_weights_function(), _quant_input_kernel(), - _mm_gemmlowp(), _accumulate_biases_kernel(), _reshape_weights_output(), _quantized_input(), - _scale_factor(), _original_weights(nullptr), _are_weights_reshaped(false), - _accumulate_biases(false), _is_prepared(false) -{ -} - -void NEFullyConnectedHybridLayer::configure_mm(const ITensor *input, const ITensor *weights, - ITensor *output) -{ - ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != weights->info()->dimension(1)); - - // Configure gemmlowp function - _mm_gemmlowp.configure(input, weights, nullptr, output); -} - -void NEFullyConnectedHybridLayer::configure(const ITensor *input, const ITensor *weights, - const ITensor *biases, ITensor *output, - FullyConnectedLayerInfo fc_info) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); - - // Perform validate step - ARM_COMPUTE_ERROR_THROW_ON(NEFullyConnectedHybridLayer::validate( - input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(), - fc_info)); - - _are_weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true; - _accumulate_biases = false; - _original_weights = weights; - - // Configure accumulate biases kernel for non quantized asymmetric types - if (biases != nullptr) - { - _accumulate_biases = true; - - // Configure accumulate biases kernel - _accumulate_biases_kernel.configure(output, biases); - } - - // With the Fully Connected layer we can have 4 different cases: - // 1) Convolution layer -> Fully Connected layer without batches - // 2) Fully Connected layer -> Fully Connected layer without batches - // 3) Convolution layer -> Fully Connected layer with batches - // 4) Fully Connected layer -> Fully Connected layer with batches - - const ITensor *weights_to_use = weights; - - // Check if we have a fully connected layer with batches - const bool is_batched_fc_layer = output->info()->dimension(1) > 1; - bool _is_fc_after_conv; - if (is_batched_fc_layer) - { - _is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) && - (std::equal(input->info()->tensor_shape().cbegin() + 3, - input->info()->tensor_shape().cend(), - output->info()->tensor_shape().cbegin() + 1)); - } - else - { - _is_fc_after_conv = input->info()->num_dimensions() > 1 && input->info()->dimension(1) > 1; - } - ARM_COMPUTE_ERROR_ON_MSG(_is_fc_after_conv, - "NEFullyConnectedHybridLayer does not support after conv"); - (void)_is_fc_after_conv; - - // Reshape weights if needed - if (!_are_weights_reshaped) - { - // Reshape the weights - _reshape_weights_output.allocator()->init( - weights->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape( - compute_transposed_shape(*weights->info()))); - _reshape_weights_function.configure(weights_to_use, &_reshape_weights_output); - weights_to_use = &_reshape_weights_output; - } - - // Quantize input - _quantized_input.allocator()->init( - input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type( - DataType::QASYMM8_SIGNED)); - _scale_factor.allocator()->init( - TensorInfo(TensorShape{output->info()->dimension(1)}, 1, DataType::F32)); - _quant_input_kernel.configure(input, &_quantized_input, &_scale_factor); - - // GEMM - _gemmlowp_output.allocator()->init( - output->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32)); - configure_mm(&_quantized_input, weights_to_use, &_gemmlowp_output); - - // Multiply scale - _multiply_scale_kernel.configure(&_gemmlowp_output, &_scale_factor, output, - weights->info()->quantization_info().uniform().scale); - - _are_weights_reshaped = _are_weights_reshaped || fc_info.retain_internal_weights; - - _quantized_input.allocator()->allocate(); - _scale_factor.allocator()->allocate(); - _gemmlowp_output.allocator()->allocate(); -} - -Status NEFullyConnectedHybridLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, - const ITensorInfo *biases, const ITensorInfo *output, - FullyConnectedLayerInfo fc_info) -{ - ARM_COMPUTE_UNUSED(fc_info.retain_internal_weights); - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QASYMM8_SIGNED); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 2); - ARM_COMPUTE_RETURN_ERROR_ON(output->num_dimensions() > 2); - - bool weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true; - - const ITensorInfo &reshaped_weights = - TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape( - compute_transposed_shape(*weights))); - - // Configure accumulate biases kernel for non quantized asymmetric types - if (biases != nullptr) - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases); - ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMMatrixAccumulateBiasesKernel::validate(output, biases)); - } - - // With the Fully Connected layer we can have 4 different cases: - // 1) Convolution layer -> Fully Connected layer without batches - // 2) Fully Connected layer -> Fully Connected layer without batches - // 3) Convolution layer -> Fully Connected layer with batches - // 4) Fully Connected layer -> Fully Connected layer with batches - - const ITensorInfo *weights_to_use = weights; - - if (!weights_reshaped) - { - // Validate reshape weights kernel - ARM_COMPUTE_RETURN_ON_ERROR( - NEFullyConnectedHybridLayerReshapeWeights::validate(weights_to_use, &reshaped_weights)); - weights_to_use = &reshaped_weights; - } - - // Fully Connected layer after a Fully Connected Layer without batches - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) != weights_to_use->dimension(1)); - - // Validate quantization kernel - const ITensorInfo &quantized_input = - TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_data_type( - DataType::QASYMM8_SIGNED)); - const ITensorInfo &scale_factor = TensorInfo(TensorShape{output->dimension(1)}, 1, DataType::F32); - ARM_COMPUTE_RETURN_ON_ERROR( - NEQuantizationSymmetricKernel::validate(input, &quantized_input, &scale_factor)); - - const ITensorInfo &gemmlowp_output = TensorInfo( - output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32)); - // Validate matrix multiply kernel - ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(quantized_input, *weights_to_use, gemmlowp_output)); - - ARM_COMPUTE_RETURN_ON_ERROR(NEMultiplyScaleFactorKernel::validate( - &gemmlowp_output, &scale_factor, output, weights->quantization_info().uniform().scale)); - - return Status{}; -} - -void NEFullyConnectedHybridLayer::run() -{ - prepare(); - - MemoryGroupResourceScope scope_mg(_memory_group); - - // Quantize input - NEScheduler::get().schedule(&_quant_input_kernel, Window::DimY); - - // Run matrix multiply - _mm_gemmlowp.run(); - - // Multiply scale factor - NEScheduler::get().schedule(&_multiply_scale_kernel, Window::DimY); - - // Accumulate biases if provided - if (_accumulate_biases) - { - NEScheduler::get().schedule(&_accumulate_biases_kernel, Window::DimY); - } -} - -void NEFullyConnectedHybridLayer::prepare() -{ - if (!_is_prepared) - { - ARM_COMPUTE_ERROR_ON(!_original_weights->is_used()); - - auto release_unused = [](Tensor *w) { - if (!w->is_used()) - { - w->allocator()->free(); - } - }; - - // Reshape of the weights (happens only once) - if (!_are_weights_reshaped) - { - // Run reshape weights kernel and mark weights as unused - _reshape_weights_output.allocator()->allocate(); - _reshape_weights_function.run(); - - _are_weights_reshaped = true; - // We can not release _original_weights because it can be used in other nodes - } - - // Prepare GEMM prepare and release unused weights - _mm_gemmlowp.prepare(); - - // Release reshaped weights if unused - release_unused(&_reshape_weights_output); - - _is_prepared = true; - } -} diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp deleted file mode 100644 index cb7557a5a..000000000 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp +++ /dev/null @@ -1,494 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayerEx.h" - -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Size2D.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/core/utils/quantization/AsymmHelpers.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" - -#include <algorithm> -#include <cmath> - -using namespace arm_compute; -using namespace arm_compute::misc::shape_calculator; - -namespace -{ -Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const ITensorInfo &output) -{ - if (is_data_type_quantized_asymmetric(input.data_type())) - { - // Since we need negative offsets for computing convolution, we need to change - // QuantizationInfo() - // Extract and negate input and weights offset - const QuantizationInfo input_quantization_info(input.quantization_info().uniform().scale, - -input.quantization_info().uniform().offset); - const QuantizationInfo weights_quantization_info(weights.quantization_info().uniform().scale, - -weights.quantization_info().uniform().offset); - - // Validate gemmlowp function - ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixMultiplyCore::validate( - &input.clone()->set_quantization_info(input_quantization_info), - &weights.clone()->set_quantization_info(weights_quantization_info), nullptr, &output)); - } - else - { - ARM_COMPUTE_RETURN_ON_ERROR(NEGEMM::validate( - &input, &weights, nullptr, &output, 1.f, 0.0f, - GEMMInfo(false, false, false /* Reshape weights only for the first run */))); - } - - return Status{}; -} -} // namespace - -NEFullyConnectedLayerEx::NEFullyConnectedLayerEx(std::shared_ptr<IMemoryManager> memory_manager) - : _memory_group(std::move(memory_manager)), _flatten_kernel(), _convert_weights(), - _reshape_weights_function(), _mm_gemm(), _mm_gemmlowp(), _gemmlowp_output_stage(), - _accumulate_biases_kernel(), _flatten_output(), _gemmlowp_output(), - _converted_weights_output(), _reshape_weights_output(), _original_weights(nullptr), - _are_weights_converted(true), _are_weights_reshaped(false), _is_fc_after_conv(false), - _accumulate_biases(false), _is_quantized(false), _is_prepared(false) -{ -} - -void NEFullyConnectedLayerEx::configure_mm(const ITensor *input, const ITensor *weights, - ITensor *output) -{ - if (_is_quantized) - { - // Since we need negative offsets for computing convolution, we need to change - // QuantizationInfo() - // Extract and negate input and weights offset - const QuantizationInfo input_quantization_info = input->info()->quantization_info(); - const QuantizationInfo weights_quantization_info = weights->info()->quantization_info(); - - input->info()->set_quantization_info(QuantizationInfo( - input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset)); - weights->info()->set_quantization_info(QuantizationInfo( - weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset)); - - // Configure gemmlowp function - _mm_gemmlowp.configure(input, weights, nullptr, output); - - // Revert back QuantizatioInfo as input and weights could be used in other fully connected - // layers - input->info()->set_quantization_info(input_quantization_info); - weights->info()->set_quantization_info(weights_quantization_info); - } - else - { - // Configure matrix multiply kernel - _mm_gemm.configure(input, weights, nullptr, output, 1.f, 0.0f, - GEMMInfo(false, false, false /* Reshape weights only for the first run */)); - } -} - -void NEFullyConnectedLayerEx::configure_conv_fc(const ITensor *input, const ITensor *weights, - ITensor *output) -{ - ARM_COMPUTE_ERROR_ON( - (weights->info()->dimension(1) != - (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2)))); - - // If the fully connected layer is called after a convolution layer, the input tensor must be - // linearized - - // Initialize output tensor for flatten - TensorShape shape_flatten = compute_flatten_shape(input->info()); - _flatten_output.allocator()->init( - input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape( - shape_flatten)); - - // Configure flatten kernel - _memory_group.manage(&_flatten_output); - _flatten_kernel.configure(input, &_flatten_output); - - // Configure matrix multiply kernel - configure_mm(&_flatten_output, weights, output); - - // Allocate the output tensor for flatten once all the configure methods have been called - _flatten_output.allocator()->allocate(); -} - -void NEFullyConnectedLayerEx::configure_fc_fc(const ITensor *input, const ITensor *weights, - ITensor *output) -{ - ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != weights->info()->dimension(1)); - - // Configure matrix multiply kernel - configure_mm(input, weights, output); -} - -void NEFullyConnectedLayerEx::configure(const ITensor *input, const ITensor *weights, - const ITensor *biases, ITensor *output, - FullyConnectedLayerInfo fc_info) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); - - // Perform validate step - ARM_COMPUTE_ERROR_THROW_ON(NEFullyConnectedLayerEx::validate( - input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(), - fc_info)); - - _are_weights_converted = true; - _are_weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true; - _is_fc_after_conv = true; - _accumulate_biases = false; - _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type()); - _original_weights = weights; - - // Configure gemmlowp output - if (_is_quantized) - { - _gemmlowp_output.allocator()->init( - output->info()->clone()->set_is_resizable(true).reset_padding().set_data_type( - DataType::S32)); - } - - // Configure accumulate biases kernel for non quantized asymmetric types - if (biases != nullptr && !_is_quantized) - { - _accumulate_biases = true; - - // Configure accumulate biases kernel - _accumulate_biases_kernel.configure(output, biases); - } - - // With the Fully Connected layer we can have 4 different cases: - // 1) Convolution layer -> Fully Connected layer without batches - // 2) Fully Connected layer -> Fully Connected layer without batches - // 3) Convolution layer -> Fully Connected layer with batches - // 4) Fully Connected layer -> Fully Connected layer with batches - - const ITensor *weights_to_use = weights; - - // Check if we have a fully connected layer with batches - const bool is_batched_fc_layer = output->info()->dimension(1) > 1; - if (is_batched_fc_layer) - { - _is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) && - (std::equal(input->info()->tensor_shape().cbegin() + 3, - input->info()->tensor_shape().cend(), - output->info()->tensor_shape().cbegin() + 1)); - } - else - { - _is_fc_after_conv = input->info()->num_dimensions() > 1; - } - - // Reshape weights if needed - if (!_are_weights_reshaped) - { - // Reshape the weights - _reshape_weights_function.configure(weights, &_reshape_weights_output); - weights_to_use = &_reshape_weights_output; - } - - // Convert weights if needed - if (_is_fc_after_conv && (input->info()->data_layout() != fc_info.weights_trained_layout)) - { - // Convert weights - _convert_weights.configure(weights_to_use, &_converted_weights_output, - input->info()->tensor_shape(), fc_info.weights_trained_layout); - - weights_to_use = &_converted_weights_output; - _are_weights_converted = false; - } - - ITensor *tmp_output = (_is_quantized) ? &_gemmlowp_output : output; - if (_is_fc_after_conv) - { - // Fully Connected layer after a Convolution Layer without batches - configure_conv_fc(input, weights_to_use, tmp_output); - } - else - { - // Fully Connected layer after a Fully Connected Layer without batches - configure_fc_fc(input, weights_to_use, tmp_output); - } - - // Configure output stage for asymmetric quantized types - if (_is_quantized) - { - float multiplier = input->info()->quantization_info().uniform().scale * - weights->info()->quantization_info().uniform().scale / - output->info()->quantization_info().uniform().scale; - int output_multiplier; - int output_shift; - quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, - &output_shift); - _gemmlowp_output_stage.configure(&_gemmlowp_output, biases, output, output_multiplier, - output_shift, - output->info()->quantization_info().uniform().offset); - _gemmlowp_output.allocator()->allocate(); - } - - _are_weights_reshaped = _are_weights_reshaped || fc_info.retain_internal_weights; -} - -Status NEFullyConnectedLayerEx::validate(const ITensorInfo *input, const ITensorInfo *weights, - const ITensorInfo *biases, const ITensorInfo *output, - FullyConnectedLayerInfo fc_info) -{ - ARM_COMPUTE_UNUSED(fc_info.retain_internal_weights); - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, - DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output); - ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 2); - - bool weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true; - bool is_fc_after_conv = true; - bool is_quantized = is_data_type_quantized_asymmetric(input->data_type()); - - const ITensorInfo &flatten_input = - TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape( - compute_flatten_shape(input))); - const ITensorInfo &reshaped_weights = - TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape( - compute_transposed_shape(*weights))); - const ITensorInfo &converted_weights = - weights_reshaped ? TensorInfo(weights->clone()->set_is_resizable(true).reset_padding()) - : TensorInfo(*reshaped_weights.clone()); - const ITensorInfo &gemmlowp_output = TensorInfo( - output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32)); - - // Configure accumulate biases kernel for non quantized asymmetric types - if (biases != nullptr && !is_quantized) - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases); - ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMMatrixAccumulateBiasesKernel::validate(output, biases)); - } - - // With the Fully Connected layer we can have 4 different cases: - // 1) Convolution layer -> Fully Connected layer without batches - // 2) Fully Connected layer -> Fully Connected layer without batches - // 3) Convolution layer -> Fully Connected layer with batches - // 4) Fully Connected layer -> Fully Connected layer with batches - - const ITensorInfo *input_to_use = input; - const ITensorInfo *weights_to_use = weights; - const ITensorInfo *tmp_output = (is_quantized) ? &gemmlowp_output : output; - - // Check if we have a fully connected layer with batches - const bool is_batched_fc_layer = output->dimension(1) > 1; - - if (is_batched_fc_layer) - { - is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) && - (std::equal(input->tensor_shape().cbegin() + 3, input->tensor_shape().cend(), - output->tensor_shape().cbegin() + 1)); - } - else - { - is_fc_after_conv = input->num_dimensions() > 1; - } - - if (!weights_reshaped) - { - // Validate reshape weights kernel - ARM_COMPUTE_RETURN_ON_ERROR( - NEFullyConnectedLayerReshapeWeights::validate(weights, &reshaped_weights)); - weights_to_use = &reshaped_weights; - } - - if (is_fc_after_conv && (input->data_layout() != fc_info.weights_trained_layout)) - { - // Validate convert weights kernel - ARM_COMPUTE_RETURN_ON_ERROR(NEConvertFullyConnectedWeights::validate( - weights_to_use, &converted_weights, input->tensor_shape(), fc_info.weights_trained_layout)); - weights_to_use = &converted_weights; - } - - if (is_fc_after_conv) - { - // Fully Connected layer after a Convolution Layer without batches - ARM_COMPUTE_RETURN_ERROR_ON( - (weights_to_use->dimension(1) != - (input->dimension(0) * input->dimension(1) * input->dimension(2)))); - - // Validate flatten kernel - ARM_COMPUTE_RETURN_ON_ERROR(NEFlattenLayerKernel::validate(input, &flatten_input)); - input_to_use = &flatten_input; - } - else - { - // Fully Connected layer after a Fully Connected Layer without batches - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) != weights_to_use->dimension(1)); - } - // Validate matrix multiply kernel - ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(*input_to_use, *weights_to_use, *tmp_output)); - - // Validate output stage for asymmetric quantized types - if (is_quantized) - { - ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint::validate( - &gemmlowp_output, biases, output)); - } - - return Status{}; -} - -void NEFullyConnectedLayerEx::run() -{ - if (!_is_prepared) - { - if (!_are_weights_reshaped) - _reshape_weights_output.allocator()->allocate(); - if (!_are_weights_converted) - _converted_weights_output.allocator()->allocate(); - _is_prepared = true; - } - - { - ARM_COMPUTE_ERROR_ON(!_original_weights->is_used()); - - // Reshape of the weights - if (!_are_weights_reshaped) - { - _reshape_weights_function.run(); - } - - // Convert weights if needed - if (!_are_weights_converted) - { - _convert_weights.run(); - } - - // Prepare GEMM prepare - if (!_is_quantized) - { - _mm_gemm.prepare(); - } - } - - MemoryGroupResourceScope scope_mg(_memory_group); - - // Linearize input if it comes from a convolutional layer - if (_is_fc_after_conv) - { - NEScheduler::get().schedule(&_flatten_kernel, Window::DimY); - } - - // Run matrix multiply - if (_is_quantized) - { - _mm_gemmlowp.run(); - } - else - { - _mm_gemm.run(); - } - - // Accumulate biases if provided - if (_is_quantized) - { - _gemmlowp_output_stage.run(); - } - else - { - if (_accumulate_biases) - { - NEScheduler::get().schedule(&_accumulate_biases_kernel, Window::DimY); - } - } -} - -void NEFullyConnectedLayerEx::prepare() -{ -#if 0 // TODO Remove this block - if (!_is_prepared) - { - ARM_COMPUTE_ERROR_ON(!_original_weights->is_used()); - - auto release_unused = [](Tensor *w) { - if (!w->is_used()) - { - w->allocator()->free(); - } - }; - - // Pointer to current weights - const ITensor *cur_weights = _original_weights; - - // Reshape of the weights (happens only once) - if (!_are_weights_reshaped) - { - // Run reshape weights kernel and mark weights as unused - _reshape_weights_output.allocator()->allocate(); - _reshape_weights_function.run(); - - cur_weights->mark_as_unused(); - cur_weights = &_reshape_weights_output; - _are_weights_reshaped = true; - } - - // Convert weights if needed (happens only once) - if (!_are_weights_converted) - { - _converted_weights_output.allocator()->allocate(); - _convert_weights.run(); - - cur_weights->mark_as_unused(); - _are_weights_converted = true; - } - - // Release reshaped weights if unused - release_unused(&_reshape_weights_output); - - // Prepare GEMM prepare and release unused weights - if (!_is_quantized) - { - _mm_gemm.prepare(); - } - - // Release converted weights if unused - release_unused(&_reshape_weights_output); - release_unused(&_converted_weights_output); - - _is_prepared = true; - } -#endif -} diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedReshapingLayer.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedReshapingLayer.cpp deleted file mode 100644 index dc6c78478..000000000 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedReshapingLayer.cpp +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h" - -#include <arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h> -#include <arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h> -#include <arm_compute/runtime/NEON/functions/NEFullyConnectedLayerEx.h> - -using namespace arm_compute; - -void NEFullyConnectedReshapingLayer::configure(const arm_compute::ITensor *input, - const arm_compute::ITensor *weights, - const arm_compute::ITensor *biases, - arm_compute::ITensor *output, bool needs_reshape, - const arm_compute::TensorShape &reshape, - KernelType kernel_type) -{ - _input = input; - _weights = weights; - _biases = biases; - _output = output; - _needs_reshape = needs_reshape; - - const ITensor *input_to_use = input; - if (_needs_reshape) - { - // reshape - auto_init_if_empty(*_neon_buffer.info(), _input->info()->clone()->set_tensor_shape(reshape)); - _neon_reshape.configure(_input, &_neon_buffer); - input_to_use = &_neon_buffer; - } - - _neon_fc = [&]() { - if (kernel_type == KernelType::GENERAL) - { - auto fc = new arm_compute::NEFullyConnectedLayerEx{_memory_manager}; - fc->configure(input_to_use, _weights, _biases, _output); - return std::unique_ptr<arm_compute::IFunction>(fc); - } - else - { - assert(kernel_type == KernelType::PREPROCESSED_WEIGHTS); - - bool is_hybrid = input->info()->data_type() == DataType::F32 && - (weights->info()->data_type() == DataType::S8 || - weights->info()->data_type() == DataType::QASYMM8_SIGNED); - - if (is_hybrid) - { - auto fc = new arm_compute::NEFullyConnectedHybridLayer{_memory_manager}; - ITensorInfo *weights_info = const_cast<ITensorInfo *>(_weights->info()); - const auto orgin_weights_data_type = weights_info->data_type(); - weights_info->set_data_type(DataType::QASYMM8_SIGNED); - fc->configure(input_to_use, _weights, _biases, _output); - weights_info->set_data_type(orgin_weights_data_type); - return std::unique_ptr<arm_compute::IFunction>(fc); - } - else - { - auto fc = new arm_compute::NEFullyConnectedLayer{_memory_manager}; - fc->configure(input_to_use, _weights, _biases, _output); - return std::unique_ptr<arm_compute::IFunction>(fc); - } - } - }(); - - // NOTE _neon_buffer is inaccessible from outside, and thus it is safe to invoke allocate here. - if (_needs_reshape) - { - _neon_buffer.allocator()->allocate(); - } -} - -void NEFullyConnectedReshapingLayer::run(void) -{ - if (_needs_reshape) - _neon_reshape.run(); - - _neon_fc->run(); -} - -void NEFullyConnectedReshapingLayer::prepare(void) { _neon_fc->prepare(); } diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp deleted file mode 100644 index 433c35d58..000000000 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/NEON/functions/NEGatherEx.h" - -#include "arm_compute/core/NEON/kernels/NEGatherKernelEx.h" -#include "support/MemorySupport.h" - -#include <utility> - -namespace arm_compute -{ -void NEGatherEx::configure(const ITensor *input, const ITensor *indices, ITensor *output, int axis) -{ - auto k = support::cpp14::make_unique<NEGatherKernelEx>(); - k->configure(input, indices, output, axis); - _kernel = std::move(k); -} - -Status NEGatherEx::validate(const ITensorInfo *input, const ITensorInfo *indices, - const ITensorInfo *output, int axis) -{ - return NEGatherKernelEx::validate(input, indices, output, axis); -} - -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEHashtableLookup.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEHashtableLookup.cpp deleted file mode 100644 index 52d58accf..000000000 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEHashtableLookup.cpp +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/NEON/functions/NEHashtableLookup.h" - -#include "arm_compute/core/NEON/kernels/NEHashtableLookupKernel.h" -#include "support/MemorySupport.h" - -using namespace arm_compute; - -void NEHashtableLookup::configure(const ITensor *lookups, const ITensor *keys, const ITensor *input, - ITensor *output, ITensor *hits) -{ - auto k = support::cpp14::make_unique<NEHashtableLookupKernel>(); - k->configure(lookups, keys, input, output, hits); - _kernel = std::move(k); -} - -Status NEHashtableLookup::validate(const ITensorInfo *lookups, const ITensorInfo *keys, - const ITensorInfo *input, const ITensorInfo *output, - const ITensorInfo *hits) -{ - return NEHashtableLookupKernel::validate(lookups, keys, input, output, hits); -} diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEInstanceNormalizationLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEInstanceNormalizationLayerEx.cpp deleted file mode 100644 index 16d74e62d..000000000 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEInstanceNormalizationLayerEx.cpp +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayerEx.h" - -#include "arm_compute/core/Helpers.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" - -namespace arm_compute -{ -NEInstanceNormalizationLayerEx::NEInstanceNormalizationLayerEx( - std::shared_ptr<IMemoryManager> memory_manager) - : _memory_group(std::move(memory_manager)), _normalization_kernel(), _is_nchw(false), - _permute_input(), _permute_output(), _permuted_input(), _permuted_output() -{ -} - -void NEInstanceNormalizationLayerEx::configure(ITensor *input, ITensor *output, ITensor *gamma, - ITensor *beta, float epsilon) -{ - const DataLayout data_layout = input->info()->data_layout(); - - // Configure Kernels - _is_nchw = data_layout == DataLayout::NCHW; - - if (!_is_nchw) - { - _memory_group.manage(&_permuted_input); - _memory_group.manage(&_permuted_output); - - // Configure the function to transform the input tensor from NHWC -> NCHW - _permute_input.configure(input, &_permuted_input, PermutationVector(1U, 2U, 0U)); - _permuted_input.info()->set_data_layout(DataLayout::NCHW); - - _normalization_kernel.configure(&_permuted_input, &_permuted_output, gamma, beta, epsilon); - _permuted_output.info()->set_data_layout(DataLayout::NCHW); - - _permute_output.configure(&_permuted_output, output != nullptr ? output : input, - PermutationVector(2U, 0U, 1U)); - _permuted_input.allocator()->allocate(); - _permuted_output.allocator()->allocate(); - } - else - { - _normalization_kernel.configure(input, output, gamma, beta, epsilon); - } -} - -Status NEInstanceNormalizationLayerEx::validate(const ITensorInfo *input, const ITensorInfo *output, - const ITensorInfo *gamma, const ITensorInfo *beta, - float epsilon) -{ - return NEInstanceNormalizationLayerKernelEx::validate( - &input->clone()->set_data_layout(DataLayout::NCHW), - &output->clone()->set_data_layout(DataLayout::NCHW), gamma, beta, epsilon); -} - -void NEInstanceNormalizationLayerEx::run() -{ - MemoryGroupResourceScope scope_mg(_memory_group); - - // Permute input - if (!_is_nchw) - { - _permute_input.run(); - } - - NEScheduler::get().schedule(&_normalization_kernel, Window::DimZ); - - // Permute output - if (!_is_nchw) - { - _permute_output.run(); - } -} -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEOneHot.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEOneHot.cpp deleted file mode 100644 index 275c55024..000000000 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEOneHot.cpp +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NEOneHot.h" -#include "arm_compute/core/NEON/kernels/NEOneHotKernel.h" -#include "support/MemorySupport.h" -#include <utility> -namespace arm_compute -{ -void NEOneHot::configure(const ITensor *indices, const ITensor *depth, const ITensor *on_value, - const ITensor *off_value, ITensor *output, int axis) -{ - auto k = arm_compute::support::cpp14::make_unique<NEOneHotKernel>(); - k->configure(indices, depth, on_value, off_value, output, axis); - _kernel = std::move(k); -} -Status NEOneHot::validate(const ITensorInfo *indices, const ITensorInfo *depth, - const ITensorInfo *on_value, const ITensorInfo *off_value, - const ITensorInfo *output, int axis) -{ - return NEOneHotKernel::validate(indices, depth, on_value, off_value, output, axis); -} -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp deleted file mode 100644 index aedb537e9..000000000 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp +++ /dev/null @@ -1,182 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/NEON/functions/NEReduceOperation.h" - -#include "arm_compute/core/CPP/Validate.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/runtime/Tensor.h" - -using namespace arm_compute; - -NEReduceOperation::NEReduceOperation(std::shared_ptr<IMemoryManager> memory_manager) - : _memory_group(std::move(memory_manager)), _reduction_kernels(), _reduced_outs(), _reshape(), - _reduction_ops(), _keep_dims() -{ -} - -Status NEReduceOperation::validate(const ITensorInfo *input, const Coordinates &reduction_axis, - bool keep_dims, const ITensorInfo *output, ReduceOperation op) -{ - ARM_COMPUTE_UNUSED(keep_dims); - ARM_COMPUTE_UNUSED(op); - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input); - ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, - DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON(reduction_axis.num_dimensions() > input->num_dimensions()); - - TensorShape out_shape = input->tensor_shape(); - const unsigned int reduction_ops = reduction_axis.num_dimensions(); - const int input_dims = input->num_dimensions(); - Coordinates axis_local = reduction_axis; - - // Convert negative axis - for (unsigned int i = 0; i < reduction_ops; ++i) - { - axis_local[i] = wrap_around(axis_local[i], input_dims); - } - - std::sort(axis_local.begin(), axis_local.begin() + reduction_ops); - for (unsigned int i = 0; i < reduction_ops; ++i) - { - ARM_COMPUTE_RETURN_ERROR_ON(axis_local[i] > 3); - ARM_COMPUTE_RETURN_ERROR_ON(static_cast<unsigned int>(axis_local[i]) > - input->num_dimensions() - 1); - if (output->total_size() > 0 && keep_dims) - { - ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(axis_local[i]) != 1); - } - if (keep_dims) - { - out_shape.set(axis_local[i], 1); - } - else - { - out_shape.remove_dimension(axis_local[i] - i); - } - } - const TensorInfo out_info = input->clone()->set_tensor_shape(out_shape); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &out_info); - - return Status{}; -} - -void NEReduceOperation::configure(ITensor *input, const Coordinates &reduction_axis, bool keep_dims, - ITensor *output, ReduceOperation op) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input); - - _reduction_ops = reduction_axis.num_dimensions(); - _reduction_kernels.resize(_reduction_ops); - _reduced_outs.resize(_reduction_ops - (keep_dims ? 1 : 0)); - _keep_dims = keep_dims; - - Coordinates axis_local = reduction_axis; - const int input_dims = input->info()->num_dimensions(); - const unsigned int reduction_ops = reduction_axis.num_dimensions(); - - // Convert negative axis - for (unsigned int i = 0; i < reduction_ops; ++i) - { - axis_local[i] = wrap_around(axis_local[i], input_dims); - } - - // Perform reduction for every axis - for (unsigned int i = 0; i < _reduction_ops; ++i) - { - TensorShape out_shape = - i == 0 ? input->info()->tensor_shape() : (&_reduced_outs[i - 1])->info()->tensor_shape(); - out_shape.set(axis_local[i], 1); - auto in = (i == 0) ? input : (&_reduced_outs[i - 1]); - - if (i == _reduction_ops - 1 && keep_dims) - { - _reduction_kernels[i].configure(in, output, axis_local[i], op); - } - else - { - _reduced_outs[i].allocator()->init(TensorInfo(out_shape, input->info()->num_channels(), - input->info()->data_type(), - input->info()->quantization_info())); - _memory_group.manage(&_reduced_outs[i]); - _reduction_kernels[i].configure(in, &_reduced_outs[i], axis_local[i], op); - } - } - - // Allocate intermediate tensors - for (unsigned int i = 0; i < _reduction_ops - (keep_dims ? 1 : 0); ++i) - { - _reduced_outs[i].allocator()->allocate(); - } - - // Configure reshape layer if we want to drop the dimensions - if (!keep_dims) - { - TensorShape out_shape = input->info()->tensor_shape(); - - // We have to sort the reduction axis vectors in order for remove_dimension - // to work properly - std::sort(axis_local.begin(), axis_local.begin() + _reduction_ops); - for (unsigned int i = 0; i < _reduction_ops; ++i) - { - out_shape.remove_dimension(axis_local[i] - i); - } - auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(out_shape)); - _reshape.configure(&_reduced_outs[_reduction_ops - 1], output); - } -} - -void NEReduceOperation::run() -{ - MemoryGroupResourceScope scope_mg(_memory_group); - - for (unsigned int i = 0; i < _reduction_ops; ++i) - { - _reduction_kernels[i].run(); - } - - if (!_keep_dims) - { - _reshape.run(); - } -} diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceSum.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceSum.cpp deleted file mode 100644 index 26a887912..000000000 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceSum.cpp +++ /dev/null @@ -1,181 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/NEON/functions/NEReduceSum.h" - -#include "arm_compute/core/CPP/Validate.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" - -using namespace arm_compute; - -NEReduceSum::NEReduceSum(std::shared_ptr<IMemoryManager> memory_manager) - : _memory_group(std::move(memory_manager)), _reduction_kernels(), _reduced_outs(), _reshape(), - _reduction_ops(), _keep_dims() -{ -} - -Status NEReduceSum::validate(const ITensorInfo *input, const Coordinates &reduction_axis, - bool keep_dims, const ITensorInfo *output) -{ - ARM_COMPUTE_UNUSED(keep_dims); - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input); - ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, - DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON(reduction_axis.num_dimensions() > input->num_dimensions()); - - TensorShape out_shape = input->tensor_shape(); - const unsigned int reduction_ops = reduction_axis.num_dimensions(); - const int input_dims = input->num_dimensions(); - Coordinates axis_local = reduction_axis; - - // Convert negative axis - for (unsigned int i = 0; i < reduction_ops; ++i) - { - axis_local[i] = wrap_around(axis_local[i], input_dims); - } - - std::sort(axis_local.begin(), axis_local.begin() + reduction_ops); - for (unsigned int i = 0; i < reduction_ops; ++i) - { - ARM_COMPUTE_RETURN_ERROR_ON(axis_local[i] > 3); - ARM_COMPUTE_RETURN_ERROR_ON(static_cast<unsigned int>(axis_local[i]) > - input->num_dimensions() - 1); - if (output->total_size() > 0 && keep_dims) - { - ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(axis_local[i]) != 1); - } - if (keep_dims) - { - out_shape.set(axis_local[i], 1); - } - else - { - out_shape.remove_dimension(axis_local[i] - i); - } - } - const TensorInfo out_info = input->clone()->set_tensor_shape(out_shape); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &out_info); - - return Status{}; -} - -void NEReduceSum::configure(ITensor *input, const Coordinates &reduction_axis, bool keep_dims, - ITensor *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input); - - _reduction_ops = reduction_axis.num_dimensions(); - _reduction_kernels.resize(_reduction_ops); - _reduced_outs.resize(_reduction_ops - (keep_dims ? 1 : 0)); - _keep_dims = keep_dims; - - Coordinates axis_local = reduction_axis; - const int input_dims = input->info()->num_dimensions(); - const unsigned int reduction_ops = reduction_axis.num_dimensions(); - - // Convert negative axis - for (unsigned int i = 0; i < reduction_ops; ++i) - { - axis_local[i] = wrap_around(axis_local[i], input_dims); - } - - // Perform reduction for every axis - for (unsigned int i = 0; i < _reduction_ops; ++i) - { - TensorShape out_shape = - i == 0 ? input->info()->tensor_shape() : (&_reduced_outs[i - 1])->info()->tensor_shape(); - out_shape.set(axis_local[i], 1); - auto in = (i == 0) ? input : (&_reduced_outs[i - 1]); - - if (i == _reduction_ops - 1 && keep_dims) - { - _reduction_kernels[i].configure(in, output, axis_local[i], ReductionOperation::SUM); - } - else - { - _reduced_outs[i].allocator()->init(TensorInfo(out_shape, input->info()->num_channels(), - input->info()->data_type(), - input->info()->quantization_info()) - .set_data_layout(input->info()->data_layout())); - _memory_group.manage(&_reduced_outs[i]); - _reduction_kernels[i].configure(in, &_reduced_outs[i], axis_local[i], - ReductionOperation::SUM); - } - } - - // Allocate intermediate tensors - for (unsigned int i = 0; i < _reduction_ops - (keep_dims ? 1 : 0); ++i) - { - _reduced_outs[i].allocator()->allocate(); - } - - // Configure reshape layer if we want to drop the dimensions - if (!keep_dims) - { - TensorShape out_shape = input->info()->tensor_shape(); - - // We have to sort the reduction axis vectors in order for remove_dimension - // to work properly - std::sort(axis_local.begin(), axis_local.begin() + _reduction_ops); - for (unsigned int i = 0; i < _reduction_ops; ++i) - { - out_shape.remove_dimension(axis_local[i] - i); - } - auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(out_shape)); - _reshape.configure(&_reduced_outs[_reduction_ops - 1], output); - } -} - -void NEReduceSum::run() -{ - MemoryGroupResourceScope scope_mg(_memory_group); - - for (unsigned int i = 0; i < _reduction_ops; ++i) - { - _reduction_kernels[i].run(); - } - - if (!_keep_dims) - { - _reshape.run(); - } -} diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReductionOperationEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReductionOperationEx.cpp deleted file mode 100644 index 2aa0d2d4b..000000000 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReductionOperationEx.cpp +++ /dev/null @@ -1,173 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/NEON/functions/NEReductionOperationEx.h" - -#include "arm_compute/core/Helpers.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" - -namespace arm_compute -{ -namespace -{ -/** Define dimension to split the window - * - * @param[in] axis Reduction axis - * - * @return The dimension to split the window - */ -size_t reduction_window_split_dimension(unsigned int axis) -{ - switch (axis) - { - case 0: - return Window::DimY; - case 1: - case 2: - case 3: - return Window::DimX; - default: - ARM_COMPUTE_ERROR("Unsupported reduction axis"); - } -} -} // namespace - -NEReductionOperationEx::NEReductionOperationEx() - : _reduction_kernel(), _fill_border_kernel(), _window_split(0), _reduction_axis() -{ -} - -Status NEReductionOperationEx::validate(const ITensorInfo *input, const ITensorInfo *output, - unsigned int axis, ReduceOperation op) -{ - ARM_COMPUTE_RETURN_ON_ERROR(NEReductionOperationKernelEx::validate(input, output, axis, op)); - - return Status{}; -} - -void NEReductionOperationEx::configure(ITensor *input, ITensor *output, unsigned int axis, - ReduceOperation op) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON( - NEReductionOperationEx::validate(input->info(), output->info(), axis, op)); - - // Configure reduction kernel - _reduction_kernel.configure(input, output, axis, op); - _window_split = reduction_window_split_dimension(axis); - _reduction_axis = axis; - - if (axis == 0) - { - // Configure fill border kernel - const BorderSize fill_border_size = _reduction_kernel.border_size(); - PixelValue pixelValue; - switch (op) - { - case ReduceOperation::MIN: - { - switch (input->info()->data_type()) - { - case DataType::F32: - { - pixelValue = PixelValue(std::numeric_limits<float>::max()); - break; - } - case DataType::F16: - { - pixelValue = PixelValue(static_cast<half>(65504.0f)); - break; - } - case DataType::QASYMM8: - { - pixelValue = - PixelValue(255, input->info()->data_type(), input->info()->quantization_info()); - break; - } - default: - { - ARM_COMPUTE_ERROR("Unsupported DataType"); - } - } - break; - } - case ReduceOperation::MAX: - { - switch (input->info()->data_type()) - { - case DataType::F32: - { - pixelValue = PixelValue(-std::numeric_limits<float>::max()); - break; - } - case DataType::F16: - { - pixelValue = PixelValue(static_cast<half>(-65504.0f)); - break; - } - case DataType::QASYMM8: - { - pixelValue = - PixelValue(0, input->info()->data_type(), input->info()->quantization_info()); - break; - } - default: - { - ARM_COMPUTE_ERROR("Unsupported DataType"); - } - } - break; - } - default: - ARM_COMPUTE_ERROR("Reduction Operation unsupported"); - } - _fill_border_kernel.configure(input, fill_border_size, BorderMode::CONSTANT, pixelValue); - } -} - -void NEReductionOperationEx::run() -{ - if (_reduction_axis == 0) - { - NEScheduler::get().schedule(&_fill_border_kernel, Window::DimY); - } - NEScheduler::get().schedule(&_reduction_kernel, _window_split); -} -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp deleted file mode 100644 index aa165cc15..000000000 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp +++ /dev/null @@ -1,242 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NETransposeConvLayer.h" - -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/UtilsEx.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/misc/ShapeCalculatorEx.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" - -using namespace arm_compute::misc::shape_calculator; - -namespace arm_compute -{ - -NETransposeConvLayer::NETransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT - : _memory_group(std::move(memory_manager)), - _conv_f(), - _upsample_f(), - _flip_weights(), - _scaled_output(), - _weights_flipped(), - _flip_axis(), - _original_weights(nullptr), - _input(nullptr), - _info(), - _is_prepared(false) -{ -} - -Status NETransposeConvLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, - const ITensorInfo *bias, const ITensorInfo *output, - const PadStrideInfo &info, unsigned int invalid_right, - unsigned int invalid_bottom) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::F16, - DataType::QASYMM8, DataType::QASYMM8_SIGNED); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(weights, input); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(weights, input); - const unsigned int width_idx = - get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::WIDTH); - const unsigned int height_idx = - get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::HEIGHT); - ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(width_idx) != weights->dimension(height_idx)); - ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(width_idx) < 1); - - auto out_dims = transposeconv_output_dimensions( - input->dimension(width_idx), input->dimension(height_idx), weights->dimension(width_idx), - weights->dimension(height_idx), info, invalid_right, invalid_bottom); - - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); - if (bias != nullptr) - { - if (is_data_type_quantized_asymmetric(input->data_type())) - { - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bias, 1, DataType::S32); - } - else - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, bias); - } - } - - if (output->tensor_shape().total_size() > 0) - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - - const TensorShape output_shape = compute_transposeconv_output_shape(out_dims, *input, *weights); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimX) != output_shape.x(), - "Output's width is invalid."); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimY) != output_shape.y(), - "Output's height is invalid."); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimZ) != output_shape.z(), - "Output's depth is invalid."); - } - - unsigned int pad_left = 0; - unsigned int pad_right = 0; - unsigned int pad_top = 0; - unsigned int pad_bottom = 0; - const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape( - *input, *weights, info, out_dims, invalid_right, invalid_bottom, pad_left, pad_right, pad_top, - pad_bottom); - TensorInfo scale_out_info( - input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(scale_out_shape)); - const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL); - - const unsigned int batches_idx = - get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::BATCHES); - const unsigned int channel_idx = - get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::CHANNEL); - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(batches_idx) != - scale_out_info.dimension(batches_idx)); - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(channel_idx) != - scale_out_info.dimension(channel_idx)); - - ARM_COMPUTE_RETURN_ON_ERROR(NEConvolutionLayer::validate(&scale_out_info, weights, bias, output, - conv_info, WeightsInfo())); - - return Status{}; -} - -void NETransposeConvLayer::configure(ITensor *input, const ITensor *weights, const ITensor *bias, - ITensor *output, const PadStrideInfo &info, - unsigned int invalid_right, unsigned int invalid_bottom) -{ - // Perform validation step - ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); - ARM_COMPUTE_ERROR_THROW_ON(NETransposeConvLayer::validate( - input->info(), weights->info(), (bias == nullptr) ? nullptr : bias->info(), output->info(), - info, invalid_right, invalid_bottom)); - - const DataLayout data_layout = input->info()->data_layout(); - const unsigned int width_idx = - get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - const unsigned int height_idx = - get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); - auto out_dims = transposeconv_output_dimensions( - input->info()->dimension(width_idx), input->info()->dimension(height_idx), - weights->info()->dimension(width_idx), weights->info()->dimension(height_idx), info, - invalid_right, invalid_bottom); - - const TensorShape output_shape = - compute_transposeconv_output_shape(out_dims, *input->info(), *weights->info()); - - _input = input; - _original_weights = weights; - _info = info; - _is_prepared = false; - - unsigned int pad_left = 0; - unsigned int pad_right = 0; - unsigned int pad_top = 0; - unsigned int pad_bottom = 0; - const unsigned int stride_x = info.stride().first; - const unsigned int stride_y = info.stride().second; - - // Output auto initialization if not yet initialized - auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), - input->info()->quantization_info()); - - _flip_axis.allocator()->init(TensorInfo(TensorShape(2U), 1, DataType::U32)); - _memory_group.manage(&_scaled_output); - - _weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout)); - _flip_weights.configure(weights, &_weights_flipped, &_flip_axis); - - // setup the function to convolve the upscaled output - const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL); - - const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape( - *input->info(), *weights->info(), info, out_dims, invalid_right, invalid_bottom, pad_left, - pad_right, pad_top, pad_bottom); - - const PadStrideInfo upsample_info(stride_x, stride_y, pad_left, pad_right, pad_top, pad_bottom, - DimensionRoundingType::FLOOR); - - TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(), - input->info()->quantization_info()); - scale_out_info.set_data_layout(data_layout); - _scaled_output.allocator()->init(scale_out_info); - - _upsample_f.configure(input, &_scaled_output, upsample_info); - - _conv_f.configure(&_scaled_output, &_weights_flipped, bias, output, conv_info); - - // Setup flip axis data - _flip_axis.allocator()->allocate(); - auto axis_data = reinterpret_cast<uint32_t *>(_flip_axis.buffer()); - axis_data[0] = static_cast<uint32_t>(width_idx); - axis_data[1] = static_cast<uint32_t>(height_idx); - - _scaled_output.allocator()->allocate(); -} - -void NETransposeConvLayer::run() -{ - prepare(); - - MemoryGroupResourceScope scope_mg(_memory_group); - - _upsample_f.run(); - _conv_f.run(); -} - -void NETransposeConvLayer::prepare() -{ - if (!_is_prepared) - { - ARM_COMPUTE_ERROR_ON(!_original_weights->is_used()); - - // Run weights flipping and mark original weights tensor as unused - _weights_flipped.allocator()->allocate(); - _flip_weights.run(); - _original_weights->mark_as_unused(); - - // Prepare convolution - _conv_f.prepare(); - - _is_prepared = true; - } -} -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/runtime/topk_v2.h b/compute/ARMComputeEx/src/runtime/topk_v2.h deleted file mode 100644 index f94effea1..000000000 --- a/compute/ARMComputeEx/src/runtime/topk_v2.h +++ /dev/null @@ -1,191 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2018 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file topk_v2.h - * @brief This file contains TopK method and TopContainer class for TopK operation - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __NNFW_RT_OPTIMIZED_OPS_TOPK_V2_H__ -#define __NNFW_RT_OPTIMIZED_OPS_TOPK_V2_H__ - -typedef int32_t int32; - -namespace nnfw -{ -namespace rt -{ -namespace optimized_ops -{ -/** - * @brief class to define TopK operation - * @note The follwing codes are impemented and modified while referring to TFLite topk_v2.cc file. - * TopK_v2 of NN Runtime supports TENSOR_FLOAT32, TENSOR_QUANT8_ASYMM, TENSOR_INT32 other than - * TFLite. - * (TFLite additionaly supports kTfLiteInt64.) - * - * The class that collects top indexes of k values. Based on template - * tensorflow::gtl::TopN<> but, for optimization, - * it re-uses the same container. - */ -template <typename T> class TopContainer -{ -public: - /** - * @brief Prevent default constructor of of this class - */ - TopContainer() = delete; - /** - * @brief Constructor with params - * @param [in] row_size Size of row in data - * @param [in] k The top k predictions - */ - TopContainer(int32 k, int32 row_size) : k_(k), container_(), values_(nullptr) - { - container_.reserve(std::min(k, row_size) + 1); - } - - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers) - * @param [in] topContainer To copy - */ - TopContainer(const TopContainer &) = delete; - /* - * @brief Prevent instances of this class from being copied (As this class contains pointers) - * @param [in] topContainer To copy - * @return Reference of TopContainer - */ - TopContainer &operator=(const TopContainer &) = delete; - - /** - * @brief Start collecting - * @param [in] values To set as values - * @return N/A - */ - void start_collecting(const T *values) - { - values_ = values; - container_.clear(); - } - - /** - * @brief Push a value to be compared for topk - * @param [in] a A value to compare - * @return N/A - */ - void push(int32 a) - { - auto comparator = [this](int32 a, int32 b) { return compare_fun(a, b); }; - if (container_.size() <= (size_t)k_) - { - container_.push_back(a); - if (container_.size() == (size_t)(k_ + 1)) - { - std::make_heap(container_.begin(), container_.end(), comparator); - std::pop_heap(container_.begin(), container_.end(), comparator); - } - } - else if (comparator(a, container_.front())) - { - container_.back() = a; - std::push_heap(container_.begin(), container_.end(), comparator); - std::pop_heap(container_.begin(), container_.end(), comparator); - } - } - - /** - * @brief Get sorted result from pushed values - * @return Reference of vector with sorted values - */ - const std::vector<int32> &sorted_result() - { - auto comparator = [this](int32 a, int32 b) { return compare_fun(a, b); }; - if (container_.size() <= (size_t)(k_)) - { - std::sort(container_.begin(), container_.end(), comparator); - } - else - { - std::sort_heap(container_.begin(), container_.end() - 1, comparator); - container_.resize(k_); - } - return container_; - } - -private: - int32 k_; - std::vector<int32> container_; - const T *values_ = nullptr; - - bool compare_fun(int32 a, int32 b) const - { - if (values_[b] < values_[a]) - { - return true; - } - else if (values_[b] > values_[a]) - { - return false; - } - else - { - return a < b; - } - } -}; - -/** - * @brief Operates TopK operation with params - * @param [in] row_size Size of row in data - * @param [in] num_rows The number of rows in data - * @param [in] data To be operated in - * @param [in] k The top k predictions - * @param [out] output_indexes Indexes of targets in the top k predictions - * @param [out] output_values Values of targets in the top k predictions - * @return N/A - */ -template <typename T> -void TopK(int32 row_size, int32 num_rows, const T *data, int32 k, int32 *output_indexes, - T *output_values) -{ - TopContainer<T> topc(k, row_size); - for (int row = 0; row < num_rows; ++row) - { - const T *values_row = data + row * row_size; - topc.start_collecting(values_row); - for (int32 c = 0; c < row_size; ++c) - { - topc.push(c); - } - - // Prepare output buffers. - int32 *indexes_row = output_indexes + row * k; - T *output_row = output_values + row * k; - // We always assume that the output is sorted. - const auto &top_k = topc.sorted_result(); - std::copy(top_k.begin(), top_k.end(), indexes_row); - std::transform(top_k.begin(), top_k.end(), output_row, - [values_row](const int32 loc) { return values_row[loc]; }); - } -} - -} // namespace optimized_ops -} // namespace rt -} // namespace nnfw - -#endif // __NNFW_RT_OPTIMIZED_OPS_TOPK_V2_H__ |