diff options
Diffstat (limited to 'libs')
228 files changed, 0 insertions, 24110 deletions
diff --git a/libs/.FORMATCHECKED b/libs/.FORMATCHECKED deleted file mode 100644 index e69de29bb..000000000 --- a/libs/.FORMATCHECKED +++ /dev/null diff --git a/libs/ARMComputeEx/CMakeLists.txt b/libs/ARMComputeEx/CMakeLists.txt deleted file mode 100644 index 2483fb55d..000000000 --- a/libs/ARMComputeEx/CMakeLists.txt +++ /dev/null @@ -1,21 +0,0 @@ -if("${TARGET_ARCH}" STREQUAL "x86_64") - return() -endif() - -nnfw_find_package(ARMCompute REQUIRED) - -set(ACL_EX_BASE ${CMAKE_SOURCE_DIR}/libs/ARMComputeEx) - -file(GLOB_RECURSE ACL_EX_SRCS "${ACL_EX_BASE}/*.cpp") - -# generate embeded cl_kernel -execute_process ( - WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}/libs/ARMComputeEx" - COMMAND bash -c "python resolve_includes.py" -) - -add_library(arm_compute_ex SHARED ${ACL_EX_SRCS}) -set_target_properties(arm_compute_ex PROPERTIES COMPILE_FLAGS "-DEMBEDDED_KERNELS=1") -target_include_directories(arm_compute_ex PUBLIC ${CMAKE_SOURCE_DIR}/libs/ARMComputeEx) -target_link_libraries(arm_compute_ex arm_compute_core) -install(TARGETS arm_compute_ex DESTINATION lib) diff --git a/libs/ARMComputeEx/arm_compute/core/CL/CLKernelLibraryEx.h b/libs/ARMComputeEx/arm_compute/core/CL/CLKernelLibraryEx.h deleted file mode 100644 index e4e752ef9..000000000 --- a/libs/ARMComputeEx/arm_compute/core/CL/CLKernelLibraryEx.h +++ /dev/null @@ -1,245 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file CLKernelLibraryEx.h - * @ingroup COM_AI_RUNTIME - * @brief This file is a cloned version of CLKernelLibrary.h in ACL. This file defines - * an interface for CLKernelLibrary.cpp which adds more OpenCL kernels on top of ACL. - */ - -#ifndef __ARM_COMPUTE_CLKERNELLIBRARY_EX_H__ -#define __ARM_COMPUTE_CLKERNELLIBRARY_EX_H__ - -#include "arm_compute/core/CL/OpenCL.h" - -#include <map> -#include <set> -#include <string> -#include <utility> - -namespace arm_compute -{ - -/** - * @brief Class to build OpenCL kernels added from nnfw - * */ -class CLKernelLibraryEx -{ - using StringSet = std::set<std::string>; - -private: - /** - * @brief Construct a new CLKernelLibraryEx object - */ - CLKernelLibraryEx(); - -public: - /** - * @brief Prevent instances of this class from being copied. - */ - CLKernelLibraryEx(const CLKernelLibraryEx &) = delete; - - /** - * @brief Prevent instances of this class from being copied. - */ - const CLKernelLibraryEx &operator=(const CLKernelLibraryEx &) = delete; - - /** - * @brief Get the KernelLibrary singleton. - * @return The KernelLibrary instance - */ - static CLKernelLibraryEx &get(); - - /** - * @brief Initialise the kernel library. - * @param[in] kernel_path Path of the directory from which kernel sources are loaded. - * @param[in] context CL context used to create programs. - * @param[in] device CL device for which the programs are created. - * @return N/A - */ - void init(std::string kernel_path, cl::Context context, cl::Device device) - { - _kernel_path = std::move(kernel_path); - _context = std::move(context); - _device = std::move(device); - } - - /** - * @brief Set the path that the kernels reside in. - * @param[in] kernel_path Path of the directory from which kernel sources are loaded. - * @return N/A - */ - void set_kernel_path(const std::string &kernel_path) { _kernel_path = kernel_path; }; - - /** - * @brief Get the path that the kernels reside in. - * @return the path of kernel files - */ - std::string get_kernel_path() { return _kernel_path; }; - - /** - * @brief Get the source of the selected program. - * @param[in] program_name Program name. - * @return Source of the selected program. - */ - std::string get_program_source(const std::string &program_name); - - /** - * @brief Set the CL context used to create programs. - * @note Setting the context also resets the device to the - * first one available in the new context. - * @param[in] context A CL context. - * @return N/A - */ - void set_context(cl::Context context) - { - _context = std::move(context); - if (_context.get() == nullptr) - { - _device = cl::Device(); - } - else - { - const auto cl_devices = _context.getInfo<CL_CONTEXT_DEVICES>(); - - if (cl_devices.empty()) - { - _device = cl::Device(); - } - else - { - _device = cl_devices[0]; - } - } - } - - /** - * @brief Return associated CL context. - * @return A CL context. - */ - cl::Context &context() { return _context; } - - /** - * @brief Set the CL device for which the programs are created. - * @param[in] device A CL device. - * @return N/A - */ - void set_device(cl::Device device) { _device = std::move(device); } - - /** - * @brief Gets the CL device for which the programs are created. - * @return A CL device. - */ - cl::Device &get_device() { return _device; } - - /** - * @brief Return the device version - * @return The content of CL_DEVICE_VERSION - */ - std::string get_device_version(); - - /** - * @brief Create a kernel from the kernel library. - * @param[in] kernel_name Kernel name. - * @param[in] build_options_set Kernel build options as a set. - * @return The created kernel. - */ - Kernel create_kernel(const std::string &kernel_name, - const StringSet &build_options_set = {}) const; - - /** - * @brief Find the maximum number of local work items in a workgroup can be supported for the - * kernel. - * @param[in] kernel kernel object - */ - - size_t max_local_workgroup_size(const cl::Kernel &kernel) const; - /** - * @brief Return the default NDRange for the device. - * @return default NDRangeof the device - */ - cl::NDRange default_ndrange() const; - - /** - * @brief Clear the library's cache of binary programs - * @return N/A - */ - void clear_programs_cache() - { - _programs_map.clear(); - _built_programs_map.clear(); - } - - /** - * @brief Access the cache of built OpenCL programs - * @return program map data structure of which key is name of kernel and value is - * kerel source name. (*.cl) - */ - const std::map<std::string, cl::Program> &get_built_programs() const - { - return _built_programs_map; - } - - /** - * @brief Add a new built program to the cache - * @param[in] built_program_name Name of the program - * @param[in] program Built program to add to the cache - * @return N/A - */ - void add_built_program(const std::string &built_program_name, cl::Program program); - - /** - * @brief Returns true if FP16 is supported by the CL device - * @return true if the CL device supports FP16 - */ - bool fp16_supported() const; - - /** - * @brief Returns true if int64_base_atomics extension is supported by the CL device - * @return true if the CL device supports int64_base_atomics extension - */ - bool int64_base_atomics_supported() const; - -private: - /** - * @brief Load program and its dependencies. - * @param[in] program_name Name of the program to load. - */ - const Program &load_program(const std::string &program_name) const; - /** - * @brief Concatenates contents of a set into a single string. - * @param[in] s Input set to concatenate. - * @return Concatenated string. - */ - std::string stringify_set(const StringSet &s) const; - - cl::Context _context; /**< Underlying CL context. */ - cl::Device _device; /**< Underlying CL device. */ - std::string _kernel_path; /**< Path to the kernels folder. */ - mutable std::map<std::string, const Program> - _programs_map; /**< Map with all already loaded program data. */ - mutable std::map<std::string, cl::Program> - _built_programs_map; /**< Map with all already built program data. */ - static const std::map<std::string, std::string> - _kernel_program_map; /**< Map that associates kernel names with programs. */ - static const std::map<std::string, std::string> - _program_source_map; /**< Contains sources for all programs. - Used for compile-time kernel inclusion. >*/ -}; -} -#endif /* __ARM_COMPUTE_CLKERNELLIBRARY_EX_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/OpenCLEx.h b/libs/ARMComputeEx/arm_compute/core/CL/OpenCLEx.h deleted file mode 100644 index dbda354d6..000000000 --- a/libs/ARMComputeEx/arm_compute/core/CL/OpenCLEx.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_OPENCLEX_H__ -#define __ARM_COMPUTE_OPENCLEX_H__ - -#include <string> -#include <utility> - -/* Configure the Khronos C++ wrapper to target OpenCL 1.2: */ -#ifndef ARM_COMPUTE_NO_EXCEPTIONS -#define CL_HPP_ENABLE_EXCEPTIONS -#endif // ARM_COMPUTE_NO_EXCEPTIONS -#define CL_HPP_CL_1_2_DEFAULT_BUILD -#define CL_HPP_TARGET_OPENCL_VERSION 110 -#define CL_HPP_MINIMUM_OPENCL_VERSION 110 -#include <CL/cl2.hpp> - -namespace arm_compute -{ -/** Class for loading OpenCL symbols. */ -class CLSymbolsEx final -{ -private: - CLSymbolsEx() = default; - void load_symbols(void *handle); - -public: - /** Get the static instance of CLSymbols. - * - * @return The static instance of CLSymbols. - */ - static CLSymbolsEx &get(); - /** Load symbols from the given OpenCL library path. - * - * @param[in] library Path to the OpenCL library. - * - * @return True if loading the library is successful. - */ - bool load(const std::string &library); - /** Load symbols from any of the default OpenCL library names. - * - * @return True if loading any library is successful. - */ - bool load_default(); - -#define DECLARE_FUNCTION_PTR(func_name) std::function<decltype(func_name)> func_name##_ptr = nullptr - - DECLARE_FUNCTION_PTR(clGetEventInfo); - DECLARE_FUNCTION_PTR(clSetEventCallback); - -#undef DECLARE_FUNCTION_PTR - -private: - std::pair<bool, bool> _loaded{false, false}; -}; -} // namespace arm_compute -#endif /* __ARM_COMPUTE_OPENCLEX_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLActivationLayerExKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLActivationLayerExKernel.h deleted file mode 100644 index 080cc47ef..000000000 --- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLActivationLayerExKernel.h +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __ARM_COMPUTE_CLACTIVATIONLAYEREXKERNEL_H__ -#define __ARM_COMPUTE_CLACTIVATIONLAYEREXKERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/TypesEx.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the activation layer kernel. */ -class CLActivationLayerExKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLActivationLayerExKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLActivationLayerExKernel(const CLActivationLayerExKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLActivationLayerExKernel &operator=(const CLActivationLayerExKernel &) = delete; - /** Allow instances of this class to be moved */ - CLActivationLayerExKernel(CLActivationLayerExKernel &&) = default; - /** Allow instances of this class to be moved */ - CLActivationLayerExKernel &operator=(CLActivationLayerExKernel &&) = default; - /** Default destructor */ - ~CLActivationLayerExKernel() = default; - /** Set the input and output tensor. - * - * @note If the output tensor is a nullptr, the activation function will be performed in-place - * - * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will - * store the result - * of the activation function. Data types supported: - * QASYMM8/F16/F32. - * @param[out] output Destination tensor. Data type supported: same as @p input - * @param[in] act_info Activation layer information. - */ - void configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfoEx act_info); - /** Static function to check if given info will lead to a valid configuration of @ref - * CLActivationLayerKernel - * - * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor - * will store the result - * of the activation function. Data types supported: QASYMM8/F16/F32. - * @param[in] output Destination tensor info. Data type supported: same as @p input - * @param[in] act_info Activation layer information. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, - const ActivationLayerInfoEx &act_info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - ICLTensor *_input; - ICLTensor *_output; - bool _run_in_place; -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLACTIVATIONLAYEREXKERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLArgMinMaxKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLArgMinMaxKernel.h deleted file mode 100644 index b91a26159..000000000 --- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLArgMinMaxKernel.h +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file CLArgMinMaxKernel.h - * @brief This file defines CLArgMinMaxKernel - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __ARM_COMPUTE_CLARG_MIN_MAX_KERNEL_H__ -#define __ARM_COMPUTE_CLARG_MIN_MAX_KERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/TypesEx.h" - -namespace arm_compute -{ -class ICLTensor; - -/** - * @brief Class to define interface for the argminmax max kernel. - */ -class CLArgMinMaxKernel : public ICLKernel -{ -public: - /** - * @brief Default constructor. - */ - CLArgMinMaxKernel(); - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - * @param [in] copiedInstance Const reference of CLArgMinMaxKernel to be copied - */ - CLArgMinMaxKernel(const CLArgMinMaxKernel &) = delete; - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - * @param [in] copiedInstance Const reference of CLArgMinMaxKernel to be copied - * @return Reference of this instance - */ - CLArgMinMaxKernel &operator=(const CLArgMinMaxKernel &) = delete; - /** - * @brief Allow instances of this class to be moved - * @param [in] movedInstance Rvalue reference of CLArgMinMaxKernel to be moved - */ - CLArgMinMaxKernel(CLArgMinMaxKernel &&) = default; - /** - * @brief Allow instances of this class to be moved - * @param [in] movedInstance Rvalue reference of CLArgMinMaxKernel to be moved - * @return Reference of this instance - */ - CLArgMinMaxKernel &operator=(CLArgMinMaxKernel &&) = default; - /** - * @brief Initialise the kernel's input, output and border mode. - * @param[in] input An input tensor. Data types supported: U8/QASYMM8/S32/F32. - * @param[out] output The output tensor, Data types supported: same as @p input. - * @param[in] argminmax_axis Axis to argminmax - * return N/A - */ - void configure(const ICLTensor *input, ICLTensor *output, const uint32_t argminmax_axis, - ArgOperation op); - /** - * @brief Static function to check if given info will lead to a valid configuration of @ref - * CLArgMinMaxKernel - * @param[in] input An input tensor info. Data types supported: U8/QASYMM8/S32/F32. - * @param[in] output The output tensor info, Data types supported: same as @p input1. - * @param[in] argminmax_axis Axis to argminmax - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, - const uint32_t argminmax_axis, ArgOperation op); - - /* - * @brief Run CLArgMinMaxKernel op - * @param[in] window Window to be used for in_slice - * @param[in] queue cl::CommandQueue - * @return N/A - */ - void run(const Window &window, cl::CommandQueue &queue) override; - /* - * @brief Run CLArgMinMaxKernel op on CPU - * @param[in] queue cl::CommandQueue - * @return N/A - */ - void run_on_cpu(cl::CommandQueue &queue); - -private: - const ICLTensor *_input; - ICLTensor *_output; - uint32_t _argminmax_axis; -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLargminmaxMAXKERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLArithmeticSubtractionExKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLArithmeticSubtractionExKernel.h deleted file mode 100644 index 9a765f310..000000000 --- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLArithmeticSubtractionExKernel.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __ARM_COMPUTE_CLARITHMETICSUBTRACTIONEXKERNEL_H__ -#define __ARM_COMPUTE_CLARITHMETICSUBTRACTIONEXKERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the arithmetic subtraction kernel (support broadcasting) - * - * Arithmetic subtraction is computed by: - * @f[ output(x,y) = input1(x,y) - input2(x,y) @f] - */ -class CLArithmeticSubtractionExKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLArithmeticSubtractionExKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLArithmeticSubtractionExKernel(const CLArithmeticSubtractionExKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLArithmeticSubtractionExKernel &operator=(const CLArithmeticSubtractionExKernel &) = delete; - /** Allow instances of this class to be moved */ - CLArithmeticSubtractionExKernel(CLArithmeticSubtractionExKernel &&) = default; - /** Allow instances of this class to be moved */ - CLArithmeticSubtractionExKernel &operator=(CLArithmeticSubtractionExKernel &&) = default; - /** Default destructor */ - ~CLArithmeticSubtractionExKernel() = default; - - /** Initialise the kernel's inputs, output and convertion policy. - * - * @param[in] input1 First tensor input. Data types supported: U8/S16/F16/F32. - * @param[in] input2 Second tensor input. Data types supported: U8/S16/F16/F32. - * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), - * S16/F16/F32. - * @param[in] policy Policy to use to handle overflow. - */ - void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, - ConvertPolicy policy); - /** Static function to check if given info will lead to a valid configuration of @ref - * CLArithmeticSubtractionExKernel - * - * @param[in] input1 First tensor input info. Data types supported: U8/S16/F16/F32. - * @param[in] input2 Second tensor input info. Data types supported: U8/S16/F16/F32. - * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8), - * S16/F16/F32. - * @param[in] policy Policy to use to handle overflow. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, - const ITensorInfo *output, ConvertPolicy policy); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - const ICLTensor *_input1; /**< Source tensor 1 */ - const ICLTensor *_input2; /**< Source tensor 2 */ - ICLTensor *_output; /**< Destination tensor */ -}; -} // namespace arm_compute -#endif /* __ARM_COMPUTE_CLARITHMETICSUBTRACTIONEXKERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLBatchToSpaceNDKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLBatchToSpaceNDKernel.h deleted file mode 100644 index 1387897c9..000000000 --- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLBatchToSpaceNDKernel.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __ARM_COMPUTE_CLBATCH_TO_SPACE_ND_KERNEL_H__ -#define __ARM_COMPUTE_CLBATCH_TO_SPACE_ND_KERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to perform BATCH_TO_SPACE_ND operation */ -class CLBatchToSpaceNDKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLBatchToSpaceNDKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLBatchToSpaceNDKernel(const CLBatchToSpaceNDKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLBatchToSpaceNDKernel &operator=(const CLBatchToSpaceNDKernel &) = delete; - /** Allow instances of this class to be moved */ - CLBatchToSpaceNDKernel(CLBatchToSpaceNDKernel &&) = default; - /** Allow instances of this class to be moved */ - CLBatchToSpaceNDKernel &operator=(CLBatchToSpaceNDKernel &&) = default; - /** Default destructor */ - ~CLBatchToSpaceNDKernel() = default; - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. - * @param[in] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. - */ - void configure(const ICLTensor *input, ICLTensor *output, const int32_t *block_size); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; /**< Source tensor */ - ICLTensor *_output; /**< Destination tensor */ -}; - -} // namespace arm_compute -#endif /* __ARM_COMPUTE_CLSPACE_TO_BATCH_ND_KERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h deleted file mode 100644 index ab33d9d3a..000000000 --- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __ARM_COMPUTE_CLBINARYLOGICALOPKERNEL_H__ -#define __ARM_COMPUTE_CLBINARYLOGICALOPKERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/TypesEx.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to return truth values of two input tensors for Binary Logical Op*/ -class CLBinaryLogicalOpKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLBinaryLogicalOpKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers). */ - CLBinaryLogicalOpKernel(const CLBinaryLogicalOpKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers). */ - CLBinaryLogicalOpKernel &operator=(const CLBinaryLogicalOpKernel &) = delete; - /** Allow instances of this class to be moved */ - CLBinaryLogicalOpKernel(CLBinaryLogicalOpKernel &&) = default; - /** Allow instances of this class to be moved */ - CLBinaryLogicalOpKernel &operator=(CLBinaryLogicalOpKernel &&) = default; - /** Initialize the kernel's input, output. - * - * @param[in] input1 Source tensor1. - * @param[in] input2 Source tensor2. - * @param[out] output Output tensor. - */ - void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, - BinaryLogicalOperation op); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - - BorderSize border_size() const override; - -private: - const ICLTensor *_input1; - const ICLTensor *_input2; - ICLTensor *_output; -}; - -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLBINARYLOGICALOPKERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLCastKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLCastKernel.h deleted file mode 100644 index 4c2feb903..000000000 --- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLCastKernel.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file CLCastKernel.h - * @ingroup COM_AI_RUNTIME - * @brief This file defines CLCastKernel class - */ - -#ifndef __ARM_COMPUTE_CLCASTKERNEL_H__ -#define __ARM_COMPUTE_CLCASTKERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** - * @brief Class to define OpenCL kernel for cast operation - */ -class CLCastKernel : public ICLKernel -{ -public: - /** - * @brief Construct CLCastKernel object - */ - CLCastKernel(); - - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers) - */ - CLCastKernel(const CLCastKernel &) = delete; - - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers) - */ - CLCastKernel &operator=(const CLCastKernel &) = delete; - - /** - * @brief Construct CLCastKernel object using default move constructor - * @param[in] CLCastKernel object to move - */ - CLCastKernel(CLCastKernel &&) = default; - - /** - * @brief Allow instances of this class to be moved - * @param[in] CLCastKernel object to move - */ - CLCastKernel &operator=(CLCastKernel &&) = default; - - /** - * @brief Destruct this CLCastKernel object - */ - ~CLCastKernel() = default; - - /** - * @brief Initialise the kernel's input and output. - * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. - * @param[in] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. - * @return N/A - */ - void configure(const ICLTensor *input, ICLTensor *output); - - /** - * @brief Enqueue the OpenCL kernel to process the given window on the passed OpenCL command - * queue. - * @note The queue is *not* flushed by this method, and therefore the kernel will not have - * been executed by the time this method returns. - * @param[in] window Region on which to execute the kernel. (Must be a valid region of - * the window returned by window()). - * @param[in,out] queue Command queue on which to enqueue the kernel.@return N/A - * @return N/A - */ - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; /**< Source tensor */ - ICLTensor *_output; /**< Destination tensor */ -}; -} // namespace arm_compute -#endif /* __ARM_COMPUTE_CLCASTKERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLComparisonOpKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLComparisonOpKernel.h deleted file mode 100644 index f5f455993..000000000 --- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLComparisonOpKernel.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __ARM_COMPUTE_CLCOMPARISON_OP_KERNEL_H__ -#define __ARM_COMPUTE_CLCOMPARISON_OP_KERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/TypesEx.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to check if values in both tensors are equal*/ -class CLComparisonOpKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLComparisonOpKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers). */ - CLComparisonOpKernel(const CLComparisonOpKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers). */ - CLComparisonOpKernel &operator=(const CLComparisonOpKernel &) = delete; - /** Allow instances of this class to be moved */ - CLComparisonOpKernel(CLComparisonOpKernel &&) = default; - /** Allow instances of this class to be moved */ - CLComparisonOpKernel &operator=(CLComparisonOpKernel &&) = default; - /** Initialize the kernel's input, output. - * - * @param[in] input1 Source tensor1. - * @param[in] input2 Source tensor2. - * @param[out] output Output tensor. - */ - void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, - const ComparisonOperation &op); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - - BorderSize border_size() const override; - -private: - const ICLTensor *_input1; - const ICLTensor *_input2; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLCOMPARISON_OP_KERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h deleted file mode 100644 index 60ec7a82a..000000000 --- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __ARM_COMPUTE_CLDEPTHTOSPACEKERNEL_H__ -#define __ARM_COMPUTE_CLDEPTHTOSPACEKERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to perform depthTospace operation */ -class CLDepthToSpaceKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLDepthToSpaceKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDepthToSpaceKernel(const CLDepthToSpaceKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDepthToSpaceKernel &operator=(const CLDepthToSpaceKernel &) = delete; - /** Allow instances of this class to be moved */ - CLDepthToSpaceKernel(CLDepthToSpaceKernel &&) = default; - /** Allow instances of this class to be moved */ - CLDepthToSpaceKernel &operator=(CLDepthToSpaceKernel &&) = default; - /** Default destructor */ - ~CLDepthToSpaceKernel() = default; - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. - * @param[in] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. - */ - void configure(const ICLTensor *input, ICLTensor *output, const int32_t block_size); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; /**< Source tensor */ - ICLTensor *_output; /**< Destination tensor */ -}; - -} // namespace arm_compute -#endif /* __ARM_COMPUTE_CLDEPTHTOSPACEKERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h deleted file mode 100644 index da075db69..000000000 --- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file CLEmbeddingLookupKernel.h - * @ingroup COM_AI_RUNTIME - * @brief This file defines CLEmbeddingLookupKernel class - */ - -#ifndef __ARM_COMPUTE_CLEMBEDDINGLOOKUPKERNEL_H__ -#define __ARM_COMPUTE_CLEMBEDDINGLOOKUPKERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** -* @brief Class to perform EmbeddingLookup operation with opencl kernel -*/ -class CLEmbeddingLookupKernel : public ICLKernel -{ -public: - /** - * @brief Construct a CLEmbeddingLookupKernel object - * */ - CLEmbeddingLookupKernel(); - - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers) - * */ - CLEmbeddingLookupKernel(const CLEmbeddingLookupKernel &) = delete; - - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers) - * */ - CLEmbeddingLookupKernel &operator=(const CLEmbeddingLookupKernel &) = delete; - - /** - * @brief Construct a CLEmbeddingLookupKernel object by using default move constructor - * @param[in] CLEmbeddingLookupKernel object to move - * */ - CLEmbeddingLookupKernel(CLEmbeddingLookupKernel &&) = default; - - /** - * @brief Move assignment operator - * @param[in] CLEmbeddingLookupKernel object to move - * */ - CLEmbeddingLookupKernel &operator=(CLEmbeddingLookupKernel &&) = default; - - /** - * @brief Destruct this object - * */ - ~CLEmbeddingLookupKernel() = default; - - /** - * @brief Set the input and output of the kernel - * @param[in] input Source tensor. - * Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 - * @param[out] output Destination tensor. Data type supported: Same as @p input - * @param[in] lookups Lookups are 1D tensor that values are indices into the first - * dimension of input. - * Data types supported: S32. - * @return N/A - */ - void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *lookups); - - /** - * @brief Static function to check if given info will lead to a valid configuration of @ref - * CLEmbeddingLookupKernel - * @param[in] input The input tensor info. - * Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 - * @param[in] output The output tensor info, Data types supported: same as @p input1. - * @param[in] lookups Lookups info. Data types supported: S32. - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, - const ITensorInfo *lookups); - - /** - * @brief Enqueue the OpenCL kernel to process the given window on the passed OpenCL command - * queue. - * @note The queue is *not* flushed by this method, and therefore the kernel will not have - * been executed by the time this method returns. - * @param[in] window Region on which to execute the kernel. (Must be a valid region of - * the window returned by window()). - * @param[in,out] queue Command queue on which to enqueue the kernel.@return N/A - * @return N/A - */ - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; /** Source tensor */ - ICLTensor *_output; /** Destination tensor */ - const ICLTensor *_lookups; /** Lookups tensor */ -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLEMBEDDINGLOOKUPKERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLExpKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLExpKernel.h deleted file mode 100644 index a6ea539f8..000000000 --- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLExpKernel.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __ARM_COMPUTE_CLEXPKERNEL_H__ -#define __ARM_COMPUTE_CLEXPKERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to perform an exponential operation */ -class CLExpKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLExpKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLExpKernel(const CLExpKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLExpKernel &operator=(const CLExpKernel &) = delete; - /** Allow instances of this class to be moved */ - CLExpKernel(CLExpKernel &&) = default; - /** Allow instances of this class to be moved */ - CLExpKernel &operator=(CLExpKernel &&) = default; - /** Default destructor */ - ~CLExpKernel() = default; - /** Set the source, destination of the kernel - * - * @param[in] input Source tensor. Data type supported: F32. - * @param[out] output Destination tensor. Data type supported: F32. - */ - void configure(const ICLTensor *input, ICLTensor *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLEXPKERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLGatherKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLGatherKernel.h deleted file mode 100644 index 7e35a80b0..000000000 --- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLGatherKernel.h +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file CLGatherKernel.h - * @ingroup COM_AI_RUNTIME - * @brief This file defines CLGatherKernel class - */ - -#ifndef __ARM_COMPUTE_CLGATHERKERNEL_H__ -#define __ARM_COMPUTE_CLGATHERKERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** - * @brief Class to define an interface for the gather kernel. - */ -class CLGatherKernel : public ICLKernel -{ -public: - /** - * @brief Construct CLGatherKernel object - * */ - CLGatherKernel(); - - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - */ - CLGatherKernel(const CLGatherKernel &) = delete; - - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - */ - CLGatherKernel &operator=(const CLGatherKernel &) = delete; - - /** - * @brief Construct CLGatherKernel object by using default move constructor - * @param[in] CLGatherKernel object to move - */ - CLGatherKernel(CLGatherKernel &&) = default; - - /** - * @brief Move assignment operator - * @param[in] CLGatherKernel object to move - */ - CLGatherKernel &operator=(CLGatherKernel &&) = default; - - /** - * @brief Initialise the kernel's input, output and border mode. - * @param[in] input1 An input tensor. Data types supported: U8/S32/F32. - * @param[in] input2 An input tensor. Data types supported: S32. - * @param[out] output The output tensor, Data types supported: same as @p input1. - * @return N/A - */ - void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); - - /** - * @brief Static function to check if given info will lead to a valid configuration of @ref - * CLGatherKernel - * @param[in] input1 An input tensor. Data types supported: U8/S32/F32. - * @param[in] input2 An input tensor. Data types supported: S32. - * @param[out] output The output tensor, Data types supported: same as @p input1. - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, - const ITensorInfo *output); - - /** - * @brief Enqueue the OpenCL kernel to process the given window on the passed OpenCL command - * queue. - * @note The queue is *not* flushed by this method, and therefore the kernel will not have - * been executed by the time this method returns. - * @param[in] window Region on which to execute the kernel. (Must be a valid region of - * the window returned by window()). - * @param[in,out] queue Command queue on which to enqueue the kernel.@return N/A - * @return N/A - */ - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input1; - const ICLTensor *_input2; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLGATHERKERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLHashtableLookupKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLHashtableLookupKernel.h deleted file mode 100644 index c3fc15637..000000000 --- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLHashtableLookupKernel.h +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file CLHashtableLookupKernel.h - * @ingroup COM_AI_RUNTIME - * @brief This file defines CLHashtableLookupKernel class - */ - -#ifndef __ARM_COMPUTE_CLHASHTABLELOOKUPKERNEL_H__ -#define __ARM_COMPUTE_CLHASHTABLELOOKUPKERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/runtime/CL/CLTensor.h" - -namespace arm_compute -{ -class ICLTensor; - -/** -* @brief Class to perform HashtableLookup operation with opencl kernel -*/ -class CLHashtableLookupKernel : public ICLKernel -{ -public: - /** - * @brief Construct a CLHashtableLookupKernel object - * */ - CLHashtableLookupKernel(); - - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers) - * */ - CLHashtableLookupKernel(const CLHashtableLookupKernel &) = delete; - - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers) - * */ - CLHashtableLookupKernel &operator=(const CLHashtableLookupKernel &) = delete; - - /** - * @brief Construct a CLHashtableLookupKernel object by using default move constructor - * @param[in] CLHashtableLookupKernel object to move - * */ - CLHashtableLookupKernel(CLHashtableLookupKernel &&) = default; - - /** - * @brief Move assignment operator - * @param[in] CLHashtableLookupKernel object to move - * */ - CLHashtableLookupKernel &operator=(CLHashtableLookupKernel &&) = default; - - /** - * @brief Destruct this object - * */ - ~CLHashtableLookupKernel() = default; - - /** - * @brief Set the input and output of the kernel - * @param[in] lookups Lookups 1D tensor that values are indices into the first dimension of - * input. - * @param[in] keys Keys 1D tensor. keys and input pair represent a map. - * Data types supported: S32 - * @param[in] input Source tensor. - * Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 - * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p - * input. - * @param[out] hits Hits 1D tensor. A boolean tensor that indicates whether the lookup hits - * (True) or not (False). Data types supported: U8/QASYMM8 - * @return N/A - */ - void configure(const ICLTensor *lookups, const ICLTensor *keys, const ICLTensor *input, - ICLTensor *output, ICLTensor *hits); - - /** - * @brief Static function to check if given info will lead to a valid configuration of @ref - * CLHashtableLookupKernel - * @param[in] lookups The lookups tensor info. Data types supported: S32. - * @param[in] keys The keys tensor info. keys and input pair represent a map. - * Data types supported: S32 - * @param[in] input The input tensor info. - * Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 - * @param[out] output The output tensor. Data types and data layouts supported: Same as @p - * input. - * @param[out] hits The hits tensor info. A boolean tensor that indicates whether the lookup - * hits - * (True) or not (False). Data types supported: U8/QASYMM8 - * @return a status - */ - static Status validate(const ITensorInfo *lookups, const ITensorInfo *keys, - const ITensorInfo *input, const ITensorInfo *output, - const ITensorInfo *hits); - - /** - * @brief Enqueue the OpenCL kernel to process the given window on the passed OpenCL command - * queue. - * @note The queue is *not* flushed by this method, and therefore the kernel will not have - * been executed by the time this method returns. - * @param[in] window Region on which to execute the kernel. (Must be a valid region of - * the window returned by window()). - * @param[in,out] queue Command queue on which to enqueue the kernel.@return N/A - * @return N/A - */ - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_lookups; /** Lookups tensor */ - const ICLTensor *_keys; /** Keys tensor */ - const ICLTensor *_input; /** Source tensor */ - ICLTensor *_output; /** Destination tensor */ - ICLTensor *_hits; /** Hits tensor */ - std::unique_ptr<CLTensor> _lookup_indices{nullptr}; /** Lookup indices tensor */ -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLHASHTABLELOOKUPKERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLNegKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLNegKernel.h deleted file mode 100644 index ccbea147e..000000000 --- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLNegKernel.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __ARM_COMPUTE_CLNEGKERNEL_H__ -#define __ARM_COMPUTE_CLNEGKERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to perform a negation operation on tensor*/ -class CLNegKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLNegKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers). */ - CLNegKernel(const CLNegKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers). */ - CLNegKernel &operator=(const CLNegKernel &) = delete; - /** Allow instances of this class to be moved */ - CLNegKernel(CLNegKernel &&) = default; - /** Allow instances of this class to be moved */ - CLNegKernel &operator=(CLNegKernel &&) = default; - /** Initialize the kernel's input, output. - * - * @param[in] input Source tensor. - * @param[out] output Destination tensor. - */ - void configure(const ICLTensor *input, ICLTensor *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLNEGKERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLNormalizationLayerExKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLNormalizationLayerExKernel.h deleted file mode 100644 index 181a6226a..000000000 --- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLNormalizationLayerExKernel.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __ARM_COMPUTE_CLNORMALIZATIONLAYEREXKERNEL_H__ -#define __ARM_COMPUTE_CLNORMALIZATIONLAYEREXKERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the normalization layer kernel. - */ -class CLNormalizationLayerExKernel : public ICLKernel -{ -public: - /** Constructor */ - CLNormalizationLayerExKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLNormalizationLayerExKernel(const CLNormalizationLayerExKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLNormalizationLayerExKernel &operator=(const CLNormalizationLayerExKernel &) = delete; - /** Default Move Constructor. */ - CLNormalizationLayerExKernel(CLNormalizationLayerExKernel &&) = default; - /** Default move assignment operator */ - CLNormalizationLayerExKernel &operator=(CLNormalizationLayerExKernel &&) = default; - /** Set the input and output tensors. - * - * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions - * [width, height, IFM], - * and an optional 4th dimension for batch of inputs. Data types supported: - * F16/F32. - * @param[out] output Destination tensor. Output will have the same number of dimensions as - * input. Data types supported: same as @p input. - * @param[in] norm_info Normalization layer information like the normalization type, - * normalization size and other parameters. - */ - void configure(const ICLTensor *input, ICLTensor *output, NormalizationLayerInfo norm_info); - /** Static function to check if given info will lead to a valid configuration of @ref - * CLNormalizationLayerKernel - * - * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions - * [width, height, IFM], - * and an optional 4th dimension for batch of inputs. Data types supported: - * F16/F32. - * @param[in] output Destination tensor. Output will have the same number of dimensions as - * input. Data types supported: same as @p input. - * @param[in] norm_info Normalization layer information like the normalization type, normalization - * size and other parameters. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, - NormalizationLayerInfo norm_info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - const ICLTensor *_input; - ICLTensor *_output; - BorderSize _border_size; - bool _is_in_map; -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLNORMALIZATIONLAYEREXKERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPReLUKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPReLUKernel.h deleted file mode 100644 index eff1b8bd5..000000000 --- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPReLUKernel.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __ARM_COMPUTE_CLPRELU_KERNEL_H__ -#define __ARM_COMPUTE_CLPRELU_KERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to calculate PReLU*/ -class CLPReLUKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLPReLUKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers). */ - CLPReLUKernel(const CLPReLUKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers). */ - CLPReLUKernel &operator=(const CLPReLUKernel &) = delete; - /** Allow instances of this class to be moved */ - CLPReLUKernel(CLPReLUKernel &&) = default; - /** Allow instances of this class to be moved */ - CLPReLUKernel &operator=(CLPReLUKernel &&) = default; - /** Initialize the kernel's input, output. - * - * @param[in] input Source tensor1. - * @param[in] alpha Source tensor2. - * @param[out] output Output tensor. - */ - void configure(const ICLTensor *input, const ICLTensor *alpha, ICLTensor *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - - BorderSize border_size() const override; - -private: - const ICLTensor *_input; - const ICLTensor *_alpha; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLPRELU_KERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPadLayerKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPadLayerKernel.h deleted file mode 100644 index cbaa2adee..000000000 --- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPadLayerKernel.h +++ /dev/null @@ -1,60 +0,0 @@ -/* -* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved -* Copyright (c) 2016-2018 ARM Limited. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ -#ifndef __ARM_COMPUTE_CLPADLAYERKERNEL_H__ -#define __ARM_COMPUTE_CLPADLAYERKERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to perform PAD operation */ -class CLPadLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLPadLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLPadLayerKernel(const CLPadLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLPadLayerKernel &operator=(const CLPadLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLPadLayerKernel(CLPadLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLPadLayerKernel &operator=(CLPadLayerKernel &&) = default; - /** Default destructor */ - ~CLPadLayerKernel() = default; - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. - * @param[in] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. - * @param[in] pad_size Padding Size tensor. Data types supported : S32 - */ - void configure(const ICLTensor *input, ICLTensor *output, ICLTensor *pad_size); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; /**< Source tensor */ - ICLTensor *_output; /**< Destination tensor */ - ICLTensor *_pad_size; /**< Padding Size tensor */ -}; - -} // namespace arm_compute -#endif /* __ARM_COMPUTE_CLPADLAYERKERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPermuteExKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPermuteExKernel.h deleted file mode 100644 index 3434deee8..000000000 --- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPermuteExKernel.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __ARM_COMPUTE_CLPERMUTEEXKERNEL_H__ -#define __ARM_COMPUTE_CLPERMUTEEXKERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to perform tensor permutation. - * - * Permutes given a permutation vector - */ -class CLPermuteExKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLPermuteExKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLPermuteExKernel(const CLPermuteExKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLPermuteExKernel &operator=(const CLPermuteExKernel &) = delete; - /** Allow instances of this class to be moved */ - CLPermuteExKernel(CLPermuteExKernel &&) = default; - /** Allow instances of this class to be moved */ - CLPermuteExKernel &operator=(CLPermuteExKernel &&) = default; - /** Set the input and output of the kernel. - * - * @param[in] input The input tensor to permute. Data types supported: - * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 - * @param[in] output The output tensor. Data types supported: Same as @p input - * @param[in] perm Permutation vector - */ - void configure(const ICLTensor *input, ICLTensor *output, const PermutationVector &perm); - /** Static function to check if given info will lead to a valid configuration of @ref - * CLPermuteKernel - * - * @param[in] input First tensor input info. Data types supported: - * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. - * @param[in] output Output tensor info. Data types supported: same as @p input. - * @param[in] perm Permutation vector - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, - const PermutationVector &perm); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; - PermutationVector _perm; -}; -} // arm_compute -#endif /*__ARM_COMPUTE_CLPERMUTEEXKERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPixelWiseDivisionKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPixelWiseDivisionKernel.h deleted file mode 100644 index d579f5d8f..000000000 --- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPixelWiseDivisionKernel.h +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file CLPixelWiseDivisionKernel.h - * @ingroup COM_AI_RUNTIME - * @brief This file defines CLPixelWiseDivisionKernel class - */ - -#ifndef __ARM_COMPUTE_CLPIXELWISEDIVISIONKERNEL_H__ -#define __ARM_COMPUTE_CLPIXELWISEDIVISIONKERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** - * @brief Interface for the pixelwise division kernel. - */ -class CLPixelWiseDivisionKernel : public ICLKernel -{ -public: - /** - * @brief Construct a CLPixelWiseDivisionKernel object - */ - CLPixelWiseDivisionKernel(); - - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - */ - CLPixelWiseDivisionKernel(const CLPixelWiseDivisionKernel &) = delete; - - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - */ - CLPixelWiseDivisionKernel &operator=(const CLPixelWiseDivisionKernel &) = delete; - - /** - * @brief Construct a CLPixelWiseDivisionKernel object by using move constructor - * @param[in] CLPixelWiseDivisionKernel object to move - */ - CLPixelWiseDivisionKernel(CLPixelWiseDivisionKernel &&) = default; - - /** - * @brief Allow instances of this class to be moved - * @param[in] CLPixelWiseDivisionKernel object to move - */ - CLPixelWiseDivisionKernel &operator=(CLPixelWiseDivisionKernel &&) = default; - - /** - * @brief Initialise the kernel's input, output and border mode. - * @param[in] input1 An input tensor. Data types supported: U8/S16/F16/F32. - * @param[in] input2 An input tensor. Data types supported: same as @p input1. - * @param[out] output The output tensor, Data types supported: same as @p input1. Note: - * U8 requires both inputs to be U8. - * @param[in] scale Scale to apply after division. - * Scale must be positive and its value must be either 1/255 or 1/2^n - * where n is between 0 and 15. - * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate - * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest - * even. - * @return N/A - */ - void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float scale, - ConvertPolicy overflow_policy, RoundingPolicy rounding_policy); - - /** - * @brief Static function to check if given info will lead to a valid configuration of @ref - * CLPixelWiseDivisionKernel - * @param[in] input1 An input tensor info. Data types supported: U8/S16/F16/F32. - * @param[in] input2 An input tensor info. Data types supported: same as @p input1. - * @param[in] output The output tensor info, Data types supported: same as @p input1. - * Note: U8 requires both inputs to be U8. - * @param[in] scale Scale to apply after division. - * Scale must be positive and its value must be either 1/255 or 1/2^n - * where n is between 0 and 15. - * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate - * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even. - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, - const ITensorInfo *output, float scale, ConvertPolicy overflow_policy, - RoundingPolicy rounding_policy); - - /** - * @brief Enqueue the OpenCL kernel to process the given window on the passed OpenCL command - * queue. - * @note The queue is *not* flushed by this method, and therefore the kernel will not have - * been executed by the time this method returns. - * @param[in] window Region on which to execute the kernel. (Must be a valid region of - * the window returned by window()). - * @param[in,out] queue Command queue on which to enqueue the kernel.@return N/A - * @return N/A - */ - void run(const Window &window, cl::CommandQueue &queue) override; - - /** - * @brief The size of the border for that kernel - * @return The width in number of elements of the border. - */ - BorderSize border_size() const override; - -private: - const ICLTensor *_input1; - const ICLTensor *_input2; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLPIXELWISEDIVISIONKERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLReduceOperationKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLReduceOperationKernel.h deleted file mode 100644 index a26a4a7fc..000000000 --- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLReduceOperationKernel.h +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file CLReduceOperationKernel.h - * @brief This file defines CLReduceOperationKernel class - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __ARM_COMPUTE_CLREDUCEOPERATIONKERNEL_H__ -#define __ARM_COMPUTE_CLREDUCEOPERATIONKERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/TypesEx.h" - -namespace arm_compute -{ -class ICLTensor; - -/** - * @brief Class to define interface for the reduce operation kernel - */ -class CLReduceOperationKernel : public ICLKernel -{ -public: - /** - * @brief Default constructor - */ - CLReduceOperationKernel(); - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers) - */ - CLReduceOperationKernel(const CLReduceOperationKernel &) = delete; - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers) - */ - CLReduceOperationKernel &operator=(const CLReduceOperationKernel &) = delete; - /** - * @brief Allow instances of this class to be moved - */ - CLReduceOperationKernel(CLReduceOperationKernel &&) = default; - /** - * @brief Allow instances of this class to be moved - */ - CLReduceOperationKernel &operator=(CLReduceOperationKernel &&) = default; - /** - * @brief Default destructor - */ - ~CLReduceOperationKernel() = default; - - /** - * @brief Set the input and output tensors. - * @param[in] input Source tensor. Data types supported: U8/S32/F32. - * @param[out] output Destination tensor. Data types supported: Same as @p input. - * Output will have the same number of dimensions as input. - * @param[in] axis Axis along which to reduce. - * @param[in] op Reduce operation to perform. - * @return N/A - */ - void configure(const ICLTensor *input, ICLTensor *output, const uint32_t axis, - ReduceOperation op); - - /** - * @brief Static function to check if given info will lead to a valid configuration of @ref - * CLReduceOperationKernel. - * @param[in] input Source tensor info. Data types supported: U8/S32/F32. - * @param[in] output Destination tensor info. Data types supported: Same as @p input. - * Output will have the same number of dimensions as input. - * @param[in] axis Axis along which to reduce. - * @param[in] op Reduce operation to perform. - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const uint32_t axis, - ReduceOperation op); - - /* - * @brief Run CLReduceOperationKernel op - * @param[in] window Window to be used for in_slice - * @param[in] queue CLQueue - * @return N/A - */ - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; - uint32_t _axis; -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLREDUCEOPERATIONKERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToBatchNDKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToBatchNDKernel.h deleted file mode 100644 index 68534f1ab..000000000 --- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToBatchNDKernel.h +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __ARM_COMPUTE_CLSPACE_TO_BATCH_ND_KERNEL_H__ -#define __ARM_COMPUTE_CLSPACE_TO_BATCH_ND_KERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to perform SPACE_TO_BATCH_ND operation */ -class CLSpaceToBatchNDKernel final : public ICLKernel -{ -public: - /** Default constructor */ - CLSpaceToBatchNDKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLSpaceToBatchNDKernel(const CLSpaceToBatchNDKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLSpaceToBatchNDKernel &operator=(const CLSpaceToBatchNDKernel &) = delete; - /** Allow instances of this class to be moved */ - CLSpaceToBatchNDKernel(CLSpaceToBatchNDKernel &&) = default; - /** Allow instances of this class to be moved */ - CLSpaceToBatchNDKernel &operator=(CLSpaceToBatchNDKernel &&) = default; - /** Default destructor */ - ~CLSpaceToBatchNDKernel() = default; - /** Initialise the kernel's input and output. - * - * @note The data layout of input and output must be the same. - * @note The number of dimensions of input and output must be 4, and `spatial` dimensions - * are height and width. - * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/F16/S32/F32. - * Data layout supported: NCHW/NHWC - * @param[in] block_size Block size tensor. Data types supported: S32. - * @param[in] padding_size Padding size tensor. Data types supported: S32. - * @param[out] output Output tensor. Data types supported: U8/QASYMM8/S16/F16/S32/F32. - * Data layout supported: NCHW/NHWC - */ - void configure(const ICLTensor *input, const ICLTensor *block_size, const ICLTensor *padding_size, - ICLTensor *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; /**< Source tensor */ - const ICLTensor *_block_size; /**< Block size tensor */ - const ICLTensor *_padding_size; /**< Padding size tensor */ - ICLTensor *_output; /**< Destination tensor */ -}; - -} // namespace arm_compute - -#endif /* __ARM_COMPUTE_CLSPACE_TO_BATCH_ND_KERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h deleted file mode 100644 index be845a549..000000000 --- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __ARM_COMPUTE_CLSPACETODEPTHKERNEL_H__ -#define __ARM_COMPUTE_CLSPACETODEPTHKERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to perform spaceTodepth operation */ -class CLSpaceToDepthKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLSpaceToDepthKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLSpaceToDepthKernel(const CLSpaceToDepthKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLSpaceToDepthKernel &operator=(const CLSpaceToDepthKernel &) = delete; - /** Allow instances of this class to be moved */ - CLSpaceToDepthKernel(CLSpaceToDepthKernel &&) = default; - /** Allow instances of this class to be moved */ - CLSpaceToDepthKernel &operator=(CLSpaceToDepthKernel &&) = default; - /** Default destructor */ - ~CLSpaceToDepthKernel() = default; - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. - * @param[in] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. - */ - void configure(const ICLTensor *input, ICLTensor *output, const int32_t block_size); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; /**< Source tensor */ - ICLTensor *_output; /**< Destination tensor */ -}; - -} // namespace arm_compute -#endif /* __ARM_COMPUTE_CLSPACETODEPTHKERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLSquaredDifferenceKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLSquaredDifferenceKernel.h deleted file mode 100644 index a4c44e35d..000000000 --- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLSquaredDifferenceKernel.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __ARM_COMPUTE_CLSQUARED_DIFFERENCE_KERNEL_H__ -#define __ARM_COMPUTE_CLSQUARED_DIFFERENCE_KERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to return squared difference value of two tensors (x-y)^2*/ -class CLSquaredDifferenceKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLSquaredDifferenceKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers). */ - CLSquaredDifferenceKernel(const CLSquaredDifferenceKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers). */ - CLSquaredDifferenceKernel &operator=(const CLSquaredDifferenceKernel &) = delete; - /** Allow instances of this class to be moved */ - CLSquaredDifferenceKernel(CLSquaredDifferenceKernel &&) = default; - /** Allow instances of this class to be moved */ - CLSquaredDifferenceKernel &operator=(CLSquaredDifferenceKernel &&) = default; - /** Initialize the kernel's input, output. - * - * @param[in] input1 Source tensor1. - * @param[in] input2 Source tensor2. - * @param[out] output Output tensor. - */ - void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - - BorderSize border_size() const override; - -private: - const ICLTensor *_input1; - const ICLTensor *_input2; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLSQUARED_DIFFERENCE_KERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLStridedSliceExKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLStridedSliceExKernel.h deleted file mode 100644 index 6368c380e..000000000 --- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLStridedSliceExKernel.h +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file CLStridedSliceExKernel.h - * @ingroup COM_AI_RUNTIME - * @brief This file defines CLStridedSliceExKernel class - */ - -#ifndef __ARM_COMPUTE_CLSTRIDEDSLICEEXKERNEL_H__ -#define __ARM_COMPUTE_CLSTRIDEDSLICEEXKERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** -* @brief Class to define an interface for the kernel to extract a strided slice of a tensor -*/ -class CLStridedSliceExKernel : public ICLKernel -{ -public: - /** - * @brief Construct a CLStridedSliceExKernel object - * */ - CLStridedSliceExKernel(); - - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers) - * */ - CLStridedSliceExKernel(const CLStridedSliceExKernel &) = delete; - - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers) - * */ - CLStridedSliceExKernel &operator=(const CLStridedSliceExKernel &) = delete; - - /** - * @brief Construct a CLStridedSliceExKernel object by using default move constructor - * @param[in] CLStridedSliceExKernel object to move - * */ - CLStridedSliceExKernel(CLStridedSliceExKernel &&) = default; - - /** - * @brief Move assignment operator - * @param[in] CLStridedSliceExKernel object to move - * */ - CLStridedSliceExKernel &operator=(CLStridedSliceExKernel &&) = default; - - /** - * @brief Destruct this object - * */ - ~CLStridedSliceExKernel() = default; - - /** - * @brief Set the input and output of the kernel - * @param[in] input Source tensor. Data type supported: - * U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 - * @param[out] output Destination tensor. Data type supported: Same as @p input - * @param[in] beginData The begin tensor. Data types supported: S32. - * The number of dimensions must be 1. - * The length must be the same as the number of dimensions of input. - * @param[in] endData The end tensor. Data types supported: S32. - * The number of dimensions must be 1. - * The length must be the same as the number of dimensions of input. - * @param[in] strideData The stride tensor. Data types supported: S32. - * The number of dimensions must be 1. - * The length must be the same as the number of dimensions of input. - * @param[in] beginMask Mask for begin - * @param[in] endMask Mask for end - * @param[in] shrinkAxisMask Mask for shrink axis. - * @return N/A - */ - void configure(const ICLTensor *input, ICLTensor *output, ICLTensor *beginData, - ICLTensor *endData, ICLTensor *stridesData, int32_t beginMask, int32_t endMask, - int32_t shrinkAxisMask); - - /** - * @brief Static function to check if given info will lead to a valid configuration of @ref - * CLStridedSliceExKernel - * @param[in] input The input tensor info. Data types supported: - * U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 - * @param[in] output The output tensor info, Data types supported: same as @p input1. - * @param[in] begin The begin tensor info. Data types supported: S32. - * The number of dimensions must be 1. - * The length must be the same as the number of dimensions of input. - * @param[in] end The end tensor info. Data types supported: S32. - * The number of dimensions must be 1. - * The length must be the same as the number of dimensions of input. - * @param[in] stride The stride tensor info. Data types supported: S32. - * The number of dimensions must be 1. - * The length must be the same as the number of dimensions of input. - * @param[in] beginMask Mask for begin - * @param[in] endMask Mask for end - * @param[in] shrinkAxisMask Mask for shrink axis. - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, - const ITensorInfo *begin, const ITensorInfo *end, - const ITensorInfo *stride, int32_t beginMask, int32_t endMask, - int32_t shrinkAxisMask); - - /** - * @brief Enqueue the OpenCL kernel to process the given window on the passed OpenCL command - * queue. - * @note The queue is *not* flushed by this method, and therefore the kernel will not have - * been executed by the time this method returns. - * @param[in] window Region on which to execute the kernel. (Must be a valid region of - * the window returned by window()). - * @param[in,out] queue Command queue on which to enqueue the kernel.@return N/A - * @return N/A - */ - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; /** Source tensor */ - ICLTensor *_output; /** Destination tensor */ - ICLTensor *_beginData; /** Start indices of input tensor */ - ICLTensor *_endData; /** Stop indices of input tensor */ - ICLTensor *_stridesData; /** Strides tensor */ - int32_t _beginMask; /** Begin mask */ - int32_t _endMask; /** End mask */ - int32_t _shrinkAxisMask; /** Shrink axis mask */ -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLSTRIDEDSLICEEXKERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLTopKV2Kernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLTopKV2Kernel.h deleted file mode 100644 index eb2bad254..000000000 --- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLTopKV2Kernel.h +++ /dev/null @@ -1,653 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file CLTopKV2Kernel.h - * @brief This file defines classes for TopKV2Kernel - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __ARM_COMPUTE_CLTOPKV2KERNEL_H__ -#define __ARM_COMPUTE_CLTOPKV2KERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" - -// these parameters can be changed -#define _ITEMS 16 // number of items in a group -#define _GROUPS 4 // the number of virtual processors is _ITEMS * _GROUPS -#define _HISTOSPLIT (_ITEMS * _GROUPS / 2) // number of splits of the histogram -#define PERMUT // store the final permutation -//////////////////////////////////////////////////////// - -namespace arm_compute -{ -class ICLTensor; - -/** - * @brief Class to define CLTopKV2Single - */ -class CLTopKV2Single : public ICLKernel -{ -public: - /** - * @brief Constructor - */ - CLTopKV2Single(); - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - * @param [in] copiedInstance Const reference of CLTopKV2Single to be copied - */ - CLTopKV2Single(const CLTopKV2Single &) = delete; - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - * @param [in] copiedInstance Const reference of CLTopKV2Single to be copied - * @return Reference of this instance - */ - CLTopKV2Single &operator=(const CLTopKV2Single &) = delete; - /** - * @brief Allow instances of this class to be moved - * @param [in] movedInstance Rvalue reference of CLTopKV2Single to be moved - */ - CLTopKV2Single(CLTopKV2Single &&) = default; - /** - * @brief Allow instances of this class to be moved - * @param [in] movedInstance Rvalue reference of CLTopKV2Single to be moved - * @return Reference of this instance - */ - CLTopKV2Single &operator=(CLTopKV2Single &&) = default; - - /** - * @brief Initialise kernel with params - * @param[in] input An input tensor - * @param[in] topk_values Values of the top k predictions - * @param[in] topk_indices Indices of the top k predictions - * @param[in] indices Indices - * @param[in] temp_stack Temp stack - * @param[in] k K of the top k predictions - * @param[in] n Number times to quick-sort - * return N/A - */ - void configure(ICLTensor *input, ICLTensor *topk_values, ICLTensor *topk_indices, - cl::Buffer *indices, cl::Buffer *temp_stack, int k, int n); - - /* - * @brief Run CLTopKV2Single op - * @param[in] window Window to be used for in_slice - * @param[in] queue cl::CommandQueue - * @return N/A - */ - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - ICLTensor *_input; - ICLTensor *_topk_values; - ICLTensor *_topk_indices; -}; - -/** - * @brief Class to define CLTopKV2Init - */ -class CLTopKV2Init : public ICLKernel -{ -public: - /** - * @brief Constructor - */ - CLTopKV2Init(); - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - * @param [in] copiedInstance Const reference of CLTopKV2Init to be copied - */ - CLTopKV2Init(const CLTopKV2Init &) = delete; - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - * @param [in] copiedInstance Const reference of CLTopKV2Init to be copied - * @return Reference of this instance - */ - CLTopKV2Init &operator=(const CLTopKV2Init &) = delete; - /** - * @brief Allow instances of this class to be moved - * @param [in] movedInstance Rvalue reference of CLTopKV2Init to be moved - */ - CLTopKV2Init(CLTopKV2Init &&) = default; - /** - * @brief Allow instances of this class to be moved - * @param [in] movedInstance Rvalue reference of CLTopKV2Init to be moved - * @return Reference of this instance - */ - CLTopKV2Init &operator=(CLTopKV2Init &&) = default; - - /** - * @brief Initialise kernel with params - * @param[in] input An input tensor - * @param[in] in_key_buf Buffer of input key - * @param[in] in_ind_buf Buffer of input index - * @param[in] n Number times to quick-sort - * return N/A - */ - void configure(ICLTensor *input, cl::Buffer *in_key_buf, cl::Buffer *in_ind_buf, int n); - - /* - * @brief Run CLTopKV2Init op - * @param[in] window Window to be used for in_slice - * @param[in] queue cl::CommandQueue - * @return N/A - */ - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - ICLTensor *_input; -}; - -/** - * @brief Class to define CLRadixSortHistogram - */ -class CLRadixSortHistogram : public ICLKernel -{ -public: - /** - * @brief Constructor - */ - CLRadixSortHistogram(); - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - * @param [in] copiedInstance Const reference of CLRadixSortHistogram to be copied - */ - CLRadixSortHistogram(const CLRadixSortHistogram &) = delete; - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - * @param [in] copiedInstance Const reference of CLRadixSortHistogram to be copied - * @return Reference of this instance - */ - CLRadixSortHistogram &operator=(const CLRadixSortHistogram &) = delete; - /** - * @brief Allow instances of this class to be moved - * @param [in] movedInstance Rvalue reference of CLRadixSortHistogram to be moved - */ - CLRadixSortHistogram(CLRadixSortHistogram &&) = default; - /** - * @brief Allow instances of this class to be moved - * @param [in] movedInstance Rvalue reference of CLRadixSortHistogram to be moved - * @return Reference of this instance - */ - CLRadixSortHistogram &operator=(CLRadixSortHistogram &&) = default; - - /** - * @brief Initialise kernel with params - * @param[out] hist_buf Buffer of histogram - * @param[in] bits Number of bits to be used for radix sort - * @param[in] n Integer number size to sort - * return N/A - */ - void configure(cl::Buffer *hist_buf, int bits, int n); - - /** - * @brief Set pass - * @param[in] pass Passes made of in radix sort algorithm - * @param[in] in_key_buf Buffer of input key - * return N/A - */ - void setPass(int pass, cl::Buffer *in_key_buf) - { - _pass = pass; - _in_key_buf = in_key_buf; - } - - /* - * @brief Run CLRadixSortHistogram op - * @param[in] window Window to be used for in_slice - * @param[in] queue cl::CommandQueue - * @return N/A - */ - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - int _pass; - cl::Buffer *_in_key_buf; -}; - -/** - * @brief Class to define CLRadixSortScanHistogram - */ -class CLRadixSortScanHistogram : public ICLKernel -{ -public: - /** - * @brief Constructor - */ - CLRadixSortScanHistogram(); - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - * @param [in] copiedInstance Const reference of CLRadixSortScanHistogram to be copied - */ - CLRadixSortScanHistogram(const CLRadixSortScanHistogram &) = delete; - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - * @param [in] copiedInstance Const reference of CLRadixSortScanHistogram to be copied - * @return Reference of this instance - */ - CLRadixSortScanHistogram &operator=(const CLRadixSortScanHistogram &) = delete; - /** - * @brief Allow instances of this class to be moved - * @param [in] movedInstance Rvalue reference of CLRadixSortScanHistogram to be moved - */ - CLRadixSortScanHistogram(CLRadixSortScanHistogram &&) = default; - /** - * @brief Allow instances of this class to be moved - * @param [in] movedInstance Rvalue reference of CLRadixSortScanHistogram to be moved - * @return Reference of this instance - */ - CLRadixSortScanHistogram &operator=(CLRadixSortScanHistogram &&) = default; - - /** - * @brief Initialise kernel with params - * @param[out] hist_buf Buffer of histogram - * @param[out] glob_sum_buf Buffer of global sum - * @param[in] bits Number of bits to be used for radix sort - * return N/A - */ - void configure(cl::Buffer *hist_buf, cl::Buffer *glob_sum_buf, int bits); - - /* - * @brief Run CLRadixSortScanHistogram op - * @param[in] window Window to be used for in_slice - * @param[in] queue cl::CommandQueue - * @return N/A - */ - void run(const Window &window, cl::CommandQueue &queue) override; -}; - -/** - * @brief Class to define CLRadixSortGlobalScanHistogram - */ -class CLRadixSortGlobalScanHistogram : public ICLKernel -{ -public: - /** - * @brief Constructor - */ - CLRadixSortGlobalScanHistogram(); - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - * @param [in] copiedInstance Const reference of CLRadixSortGlobalScanHistogram to be copied - */ - CLRadixSortGlobalScanHistogram(const CLRadixSortGlobalScanHistogram &) = delete; - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - * @param [in] copiedInstance Const reference of CLRadixSortGlobalScanHistogram to be copied - * @return Reference of this instance - */ - CLRadixSortGlobalScanHistogram &operator=(const CLRadixSortGlobalScanHistogram &) = delete; - /** - * @brief Allow instances of this class to be moved - * @param [in] movedInstance Rvalue reference of CLRadixSortGlobalScanHistogram to be moved - */ - CLRadixSortGlobalScanHistogram(CLRadixSortGlobalScanHistogram &&) = default; - /** - * @brief Allow instances of this class to be moved - * @param [in] movedInstance Rvalue reference of CLRadixSortGlobalScanHistogram to be moved - * @return Reference of this instance - */ - CLRadixSortGlobalScanHistogram &operator=(CLRadixSortGlobalScanHistogram &&) = default; - - /** - * @brief Initialise kernel with params - * @param[out] glob_sum_buf Buffer of global sum - * @param[out] temp_buf Temp buffer to be used while RadixSortGlobalScanHistogram - * @param[in] bits Number of bits to be used for radix sort - * return N/A - */ - void configure(cl::Buffer *glob_sum_buf, cl::Buffer *temp_buf, int bits); - - /* - * @brief Run CLRadixSortGlobalScanHistogram op - * @param[in] window Window to be used for in_slice - * @param[in] queue cl::CommandQueue - * @return N/A - */ - void run(const Window &window, cl::CommandQueue &queue) override; -}; - -/** - * @brief Class to define CLRadixSortPasteHistogram - */ -class CLRadixSortPasteHistogram : public ICLKernel -{ -public: - /** - * @brief Constructor - */ - CLRadixSortPasteHistogram(); - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - * @param [in] copiedInstance Const reference of CLRadixSortPasteHistogram to be copied - */ - CLRadixSortPasteHistogram(const CLRadixSortPasteHistogram &) = delete; - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - * @param [in] copiedInstance Const reference of CLRadixSortPasteHistogram to be copied - * @return Reference of this instance - */ - CLRadixSortPasteHistogram &operator=(const CLRadixSortPasteHistogram &) = delete; - /** - * @brief Allow instances of this class to be moved - * @param [in] movedInstance Rvalue reference of CLRadixSortPasteHistogram to be moved - */ - CLRadixSortPasteHistogram(CLRadixSortPasteHistogram &&) = default; - /** - * @brief Allow instances of this class to be moved - * @param [in] movedInstance Rvalue reference of CLRadixSortPasteHistogram to be moved - * @return Reference of this instance - */ - CLRadixSortPasteHistogram &operator=(CLRadixSortPasteHistogram &&) = default; - - /** - * @brief Initialise kernel with params - * @param[out] hist_buf Buffer of histogram - * @param[out] glob_sum_buf Buffer of global sum - * @param[in] bits Number of bits to be used for radix sort - * return N/A - */ - void configure(cl::Buffer *hist_buf, cl::Buffer *glob_sum_buf, int bits); - - /* - * @brief Run CLRadixSortPasteHistogram op - * @param[in] window Window to be used for in_slice - * @param[in] queue cl::CommandQueue - * @return N/A - */ - void run(const Window &window, cl::CommandQueue &queue) override; -}; - -/** - * @brief Class to define CLRadixSortReorder - */ -class CLRadixSortReorder : public ICLKernel -{ -public: - /** - * @brief Constructor - */ - CLRadixSortReorder(); - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - * @param [in] copiedInstance Const reference of CLRadixSortReorder to be copied - */ - CLRadixSortReorder(const CLRadixSortReorder &) = delete; - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - * @param [in] copiedInstance Const reference of CLRadixSortReorder to be copied - * @return Reference of this instance - */ - CLRadixSortReorder &operator=(const CLRadixSortReorder &) = delete; - /** - * @brief Allow instances of this class to be moved - * @param [in] movedInstance Rvalue reference of CLRadixSortReorder to be moved - */ - CLRadixSortReorder(CLRadixSortReorder &&) = default; - /** - * @brief Allow instances of this class to be moved - * @param [in] movedInstance Rvalue reference of CLRadixSortReorder to be moved - * @return Reference of this instance - */ - CLRadixSortReorder &operator=(CLRadixSortReorder &&) = default; - - /** - * @brief Initialise kernel with params - * @param[out] hist_buf Buffer of histogram - * @param[in] bits Number of bits to be used for radix sort - * @param[in] n Integer number size to sort - * return N/A - */ - void configure(cl::Buffer *hist_buf, int bits, int n); - - /** - * @brief Set pass - * @param[in] pass Passes made of in radix sort algorithm - * @param[in] in_key_buf Buffer of input key - * @param[out] out_key_buf Buffer of output key - * @param[in] in_ind_buf Buffer of input index - * @param[out] out_ind_buf Buffer of output index - * return N/A - */ - void setPass(int pass, cl::Buffer *in_key_buf, cl::Buffer *out_key_buf, cl::Buffer *in_ind_buf, - cl::Buffer *out_ind_buf) - { - _pass = pass; - _in_key_buf = in_key_buf; - _out_key_buf = out_key_buf; - _in_ind_buf = in_ind_buf; - _out_ind_buf = out_ind_buf; - } - /* - * @brief Run CLRadixSortReorder op - * @param[in] window Window to be used for in_slice - * @param[in] queue cl::CommandQueue - * @return N/A - */ - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - int _pass; - cl::Buffer *_in_key_buf; - cl::Buffer *_out_key_buf; - cl::Buffer *_in_ind_buf; - cl::Buffer *_out_ind_buf; -}; - -/** - * @brief Class to define CLTopKV2FindFirstNegative - */ -class CLTopKV2FindFirstNegative : public ICLKernel -{ -public: - /** - * @brief Constructor - */ - CLTopKV2FindFirstNegative(); - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - * @param [in] copiedInstance Const reference of CLTopKV2FindFirstNegative to be copied - */ - CLTopKV2FindFirstNegative(const CLTopKV2FindFirstNegative &) = delete; - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - * @param [in] copiedInstance Const reference of CLTopKV2FindFirstNegative to be copied - * @return Reference of this instance - */ - CLTopKV2FindFirstNegative &operator=(const CLTopKV2FindFirstNegative &) = delete; - /** - * @brief Allow instances of this class to be moved - * @param [in] movedInstance Rvalue reference of CLTopKV2FindFirstNegative to be moved - */ - CLTopKV2FindFirstNegative(CLTopKV2FindFirstNegative &&) = default; - /** - * @brief Allow instances of this class to be moved - * @param [in] movedInstance Rvalue reference of CLTopKV2FindFirstNegative to be moved - * @return Reference of this instance - */ - CLTopKV2FindFirstNegative &operator=(CLTopKV2FindFirstNegative &&) = default; - - /** - * @brief Initialise kernel with params - * @param[out] first_negative_idx_buf Buffer of the first negative index - * @param[in] n Number times to find - * return N/A - */ - void configure(cl::Buffer *first_negative_idx_buf, int n); - - /** - * @brief Set output buffer - * @param[out] out_key_buf Buffer of output key - * return N/A - */ - void setOutputBuffer(cl::Buffer *out_key_buf) { _out_key_buf = out_key_buf; } - - /* - * @brief Run CLTopKV2FindFirstNegative op - * @param[in] window Window to be used for in_slice - * @param[in] queue cl::CommandQueue - * @return N/A - */ - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - cl::Buffer *_out_key_buf; -}; - -/** - * @brief Class to define CLTopKV2ReorderNegatives - */ -class CLTopKV2ReorderNegatives : public ICLKernel -{ -public: - /** - * @brief Constructor - */ - CLTopKV2ReorderNegatives(); - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - * @param [in] copiedInstance Const reference of CLTopKV2ReorderNegatives to be copied - */ - CLTopKV2ReorderNegatives(const CLTopKV2ReorderNegatives &) = delete; - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - * @param [in] copiedInstance Const reference of CLTopKV2ReorderNegatives to be copied - * @return Reference of this instance - */ - CLTopKV2ReorderNegatives &operator=(const CLTopKV2ReorderNegatives &) = delete; - /** - * @brief Allow instances of this class to be moved - * @param [in] movedInstance Rvalue reference of CLTopKV2ReorderNegatives to be moved - */ - CLTopKV2ReorderNegatives(CLTopKV2ReorderNegatives &&) = default; - /** - * @brief Allow instances of this class to be moved - * @param [in] movedInstance Rvalue reference of CLTopKV2ReorderNegatives to be moved - * @return Reference of this instance - */ - CLTopKV2ReorderNegatives &operator=(CLTopKV2ReorderNegatives &&) = default; - - /** - * @brief Initialise kernel with params - * @param[out] first_negative_idx_buf Buffer of the first negative index - * @param[in] n Number times to find - * return N/A - */ - void configure(cl::Buffer *first_negative_idx_buf, int n); - - /** - * @brief Set buffers - * @param[in] in_key_buf Buffer of input key - * @param[out] out_key_buf Buffer of output key - * @param[in] in_ind_buf Buffer of input index - * @param[out] out_ind_buf Buffer of output index - * return N/A - */ - void setBuffers(cl::Buffer *in_key_buf, cl::Buffer *out_key_buf, cl::Buffer *in_ind_buf, - cl::Buffer *out_ind_buf) - { - _in_key_buf = in_key_buf; - _out_key_buf = out_key_buf; - _in_ind_buf = in_ind_buf; - _out_ind_buf = out_ind_buf; - } - - /* - * @brief Run CLTopKV2ReorderNegatives op - * @param[in] window Window to be used for in_slice - * @param[in] queue cl::CommandQueue - * @return N/A - */ - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - cl::Buffer *_in_key_buf; - cl::Buffer *_out_key_buf; - cl::Buffer *_in_ind_buf; - cl::Buffer *_out_ind_buf; -}; - -/** - * @brief Class to define CLTopKV2Store - */ -class CLTopKV2Store : public ICLKernel -{ -public: - /** - * @brief Constructor - */ - CLTopKV2Store(); - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - * @param [in] copiedInstance Const reference of CLTopKV2Store to be copied - */ - CLTopKV2Store(const CLTopKV2Store &) = delete; - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - * @param [in] copiedInstance Const reference of CLTopKV2Store to be copied - * @return Reference of this instance - */ - CLTopKV2Store &operator=(const CLTopKV2Store &) = delete; - /** - * @brief Allow instances of this class to be moved - * @param [in] movedInstance Rvalue reference of CLTopKV2Store to be moved - */ - CLTopKV2Store(CLTopKV2Store &&) = default; - /** - * @brief Allow instances of this class to be moved - * @param [in] movedInstance Rvalue reference of CLTopKV2Store to be moved - * @return Reference of this instance - */ - CLTopKV2Store &operator=(CLTopKV2Store &&) = default; - - /** - * @brief Initialise kernel with params - * @param[out] values Values tensor to store - * @param[out] indices Indices tensor to be used for store - * @param[in] k K of the top k predictions - * @param[in] n Number times to store - * return N/A - */ - void configure(ICLTensor *values, ICLTensor *indices, int k, int n); - - /** - * @brief Set buffers - * @param[out] out_key_buf Buffer of output key - * @param[out] out_ind_buf Buffer of output index - * return N/A - */ - void setOutputBuffers(cl::Buffer *out_key_buf, cl::Buffer *out_ind_buf); - - /* - * @brief Run CLTopKV2Store op - * @param[in] window Window to be used for in_slice - * @param[in] queue cl::CommandQueue - * @return N/A - */ - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - ICLTensor *_values; - ICLTensor *_indices; - cl::Buffer *_out_key_buf; - cl::Buffer *_out_ind_buf; -}; - -} // namespace arm_compute - -#endif // __ARM_COMPUTE_CLTOPKV2KERNEL_H__ diff --git a/libs/ARMComputeEx/arm_compute/core/NEON/kernels/NENormalizationLayerExKernel.h b/libs/ARMComputeEx/arm_compute/core/NEON/kernels/NENormalizationLayerExKernel.h deleted file mode 100644 index f7bf72985..000000000 --- a/libs/ARMComputeEx/arm_compute/core/NEON/kernels/NENormalizationLayerExKernel.h +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __ARM_COMPUTE_NENORMALIZATIONLAYEREXKERNEL_H__ -#define __ARM_COMPUTE_NENORMALIZATIONLAYEREXKERNEL_H__ - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the normalization layer kernel. - */ -class NENormalizationLayerExKernel : public INEKernel -{ -public: - const char *name() const override { return "NENormalizationLayerKernel"; } - /** Default constructor */ - NENormalizationLayerExKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NENormalizationLayerExKernel(const NENormalizationLayerExKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NENormalizationLayerExKernel &operator=(const NENormalizationLayerExKernel &) = delete; - /** Default Move Constructor. */ - NENormalizationLayerExKernel(NENormalizationLayerExKernel &&) = default; - /** Default move assignment operator */ - NENormalizationLayerExKernel &operator=(NENormalizationLayerExKernel &&) = default; - /** Default destructor */ - ~NENormalizationLayerExKernel() = default; - /** Set the input and output tensors. - * - * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions - * [width, height, IFM], - * and an optional 4th dimension for batch of inputs. Data types - * supported: FP16/F32. - * @param[in] input_squared Source with each element has been squared. 3 lower dims represent a - * single input with dimensions [width, height, IFM], - * Data type supported: same as @p input - * @param[out] output Destination tensor. Output will have the same number of dimensions as - * input. Data type supported: same as @p input - * @param[in] norm_info Normalization layer information like the normalization type, - * normalization size and other parameters. - */ - void configure(const ITensor *input, const ITensor *input_squared, ITensor *output, - NormalizationLayerInfo norm_info); - /** Static function to check if given info will lead to a valid configuration of @ref - * NENormalizationLayerKernel - * - * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions - * [width, height, IFM], - * and an optional 4th dimension for batch of inputs. Data types - * supported: FP16/F32. - * @param[in] input_squared Source with each element has been squared. 3 lower dims represent a - * single input with dimensions [width, height, IFM], - * Data type supported: same as @p input - * @param[in] output Destination tensor. Output will have the same number of dimensions as - * input. Data type supported: same as @p input - * @param[in] norm_info Normalization layer information like the normalization type, - * normalization size and other parameters. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *input_squared, - const ITensorInfo *output, NormalizationLayerInfo norm_info); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - /** Function to perform normalization depending on the given template - * dimension. The second template parameter specifies whether the - * normalization has to be 1D or 2D. - * - * @note Only supported normalizations are: - * - 1D over X or Z - * - 2D over X and Y - * - * @param[in] window Region on which to execute the kernel. - */ - template <DataType dt, unsigned int dim, bool do_2D_norm> - void normalize_float(const Window &window); - - /** Common signature for all the specialised normalization functions - * - * @param[in] window Region on which to execute the kernel. - */ - using NormalizationFunctionEx = void (NENormalizationLayerExKernel::*)(const Window &window); - -private: - NormalizationFunctionEx _func; - const ITensor *_input; - const ITensor *_input_squared; - ITensor *_output; - NormalizationLayerInfo _norm_info; - BorderSize _border_size; -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_NENORMALIZATIONLAYEREXKERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/TypesEx.h b/libs/ARMComputeEx/arm_compute/core/TypesEx.h deleted file mode 100644 index 8381f1cc6..000000000 --- a/libs/ARMComputeEx/arm_compute/core/TypesEx.h +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __ARM_COMPUTE_TYPESEX_H__ -#define __ARM_COMPUTE_TYPESEX_H__ - -#include <cmath> -#include <cstddef> -#include <cstdint> -#include <string> -#include <utility> - -namespace arm_compute -{ - -/** Available ArgIndex operations **/ -enum class ArgOperation -{ - MAX, - MIN, -}; - -/** Available reduce operations */ -enum class ReduceOperation -{ - MAX, /**< Max */ - MEAN, /**< Mean */ - SUM, /**< Sum */ - MIN, /**< Min */ -}; - -/** Available binary logical operations */ -enum class BinaryLogicalOperation -{ - AND, /**< AND */ - OR, /**< OR */ -}; - -enum class ComparisonOperation -{ - EQUAL, /**< EQUAL */ - NOT_EQUAL, /**< NOT_EQUAL */ -}; - -/** Activation Layer Information class */ -class ActivationLayerInfoEx -{ -public: - /** Available activation functions */ - enum class ActivationFunction - { - RSQRT /**< Inverse Square root ( \f$ f(x) = \rsqrt{x} \f$ )*/ - }; - - ActivationLayerInfoEx() = default; - /** Default Constructor - * - * @param[in] f The activation function to use. - * @param[in] a (Optional) The alpha parameter used by some activation functions - * (@ref ActivationFunction::BOUNDED_RELU, @ref ActivationFunction::LU_BOUNDED_RELU, - * @ref ActivationFunction::LINEAR, @ref ActivationFunction::TANH). - * @param[in] b (Optional) The beta parameter used by some activation functions (@ref - * ActivationFunction::LINEAR, @ref ActivationFunction::LU_BOUNDED_RELU, @ref - * ActivationFunction::TANH). - */ - ActivationLayerInfoEx(ActivationFunction f, float a = 0.0f, float b = 0.0f) - : _act(f), _a(a), _b(b), _enabled(true) - { - } - /** Get the type of activation function */ - ActivationFunction activation() const { return _act; } - /** Get the alpha value */ - float a() const { return _a; } - /** Get the beta value */ - float b() const { return _b; } - /** Check if initialised */ - bool enabled() const { return _enabled; } - -private: - ActivationFunction _act = {ActivationLayerInfoEx::ActivationFunction::RSQRT}; - float _a = {}; - float _b = {}; - bool _enabled = {false}; -}; - -} // namespace arm_compute -#endif /* __ARM_COMPUTE_TYPESEX_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/UtilsEx.h b/libs/ARMComputeEx/arm_compute/core/UtilsEx.h deleted file mode 100644 index 8dd68a0c3..000000000 --- a/libs/ARMComputeEx/arm_compute/core/UtilsEx.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __ARM_COMPUTE_UTILSEX_H__ -#define __ARM_COMPUTE_UTILSEX_H__ - -#include "arm_compute/core/TypesEx.h" - -#include <cstdint> -#include <cstdlib> -#include <sstream> -#include <string> - -namespace arm_compute -{ -/** Translates a given activation function to a string. - * - * @param[in] act @ref ActivationLayerInfo::ActivationFunction to be translated to string. - * - * @return The string describing the activation function. - */ -const std::string &string_from_activation_func_ex(ActivationLayerInfoEx::ActivationFunction act); -} -#endif /*__ARM_COMPUTE_UTILSEX_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLActivationLayerEx.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLActivationLayerEx.h deleted file mode 100644 index 7e578550f..000000000 --- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLActivationLayerEx.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __ARM_COMPUTE_CLACTIVATIONLAYEREX_H__ -#define __ARM_COMPUTE_CLACTIVATIONLAYEREX_H__ - -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - -#include "arm_compute/core/TypesEx.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Basic function to run @ref CLActivationLayerExKernel - * - * @note The function simulates an activation layer with the specified activation function. - */ -class CLActivationLayerEx : public ICLSimpleFunction -{ -public: - /** Set the input and output tensor. - * - * @note If the output tensor is a nullptr or is equal to the input, the activation function will - * be performed in-place - * - * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will - * store the result - * of the activation function. Data types supported: - * QASYMM8/F16/F32. - * @param[out] output Destination tensor. Data type supported: same as @p input - * @param[in] act_info Activation layer parameters. - */ - void configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfoEx act_info); - /** Static function to check if given info will lead to a valid configuration of @ref - * CLActivationLayer - * - * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor - * will store the result - * of the activation function. Data types supported: QASYMM8/F16/F32. - * @param[in] output Destination tensor info. Data type supported: same as @p input - * @param[in] act_info Activation layer information. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, - const ActivationLayerInfoEx &act_info); -}; -} -#endif /* __ARM_COMPUTE_CLACTIVATIONLAYEREX_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgMinMax.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgMinMax.h deleted file mode 100644 index 8044c58af..000000000 --- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgMinMax.h +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file CLArgMinMax.h - * @ingroup COM_AI_RUNTIME - * @brief This file contains arm_compute::CLArgMinMax class - */ - -#ifndef __ARM_COMPUTE_CLARG_MIN_MAX_H__ -#define __ARM_COMPUTE_CLARG_MIN_MAX_H__ - -#include "arm_compute/core/CL/kernels/CLArgMinMaxKernel.h" -#include "arm_compute/runtime/CL/CLTensor.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/core/TypesEx.h" - -namespace arm_compute -{ -class ICLTensor; - -/** - * @brief Class to execute CLArgMinMax operation - */ -class CLArgMinMax : public IFunction -{ -public: - /** - * @brief Construct a new CLArgMinMax object - */ - CLArgMinMax(); - - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers) - */ - CLArgMinMax(const CLArgMinMax &) = delete; - - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers) - */ - CLArgMinMax &operator=(const CLArgMinMax &) = delete; - - /** - * @brief Construct a new CLArgMinMax object by using copy constructor - * @param[in] CLArgMinMax object to move - */ - CLArgMinMax(CLArgMinMax &&) = default; - - /** - * @brief Assign a CLArgMinMax object. - * @param[in] CLArgMinMax object to assign. This object will be moved. - */ - CLArgMinMax &operator=(CLArgMinMax &&) = default; - - /** - * @brief Initialise the kernel's inputs and outputs. - * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S32/F32. - * @param[out] output The result of argminmaxMax operation. Data types supported: same as @p - * input. - * @param[in] axis Axis to argminmax. It must be sorted and no duplicates. - * @param[in] is_min True for ArgMin operation. - * @param[in] is_max Ture for ArgMax operation. - * @return N/A - */ - void configure(ICLTensor *input, ICLTensor *output, std::vector<uint32_t> argminmax_axis, - ArgOperation op); - - /** - * @brief Static function to check if given info will lead to a valid configuration - * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S32/F32. - * @param[in] axis Axis to argminmax - * @param[out] output The result of argminmaxMax operation. Data types supported: same as @p - * input. - * @return a status - */ - static Status validate(const ITensorInfo *input, const std::vector<uint32_t> &argminmax_axis, - const ITensorInfo *output, ArgOperation op); - - /** - * @brief Run the kernels contained in the function - * This operation works on CPU on GPU depending on the value of argminmax_MAX_RUN_ON_CPU macro - * in CLArgMinMax.cpp. - * If argminmax_MAX_RUN_ON_CPU == 1, CPU runs this operation. - * Otherwise GPU runs this operation. - * @return N/A - */ - void run() override; - -private: - ICLTensor *_input; - ICLTensor *_output; - std::vector<uint32_t> _argminmax_axis; - ArgOperation _arg_op; - - std::unique_ptr<CLTensor[]> _interm_tensors{nullptr}; - std::unique_ptr<CLArgMinMaxKernel[]> _argminmax_kernels{nullptr}; - size_t _num_of_kernels; -}; -} -#endif /*__ARM_COMPUTE_CLargminmax_MAX_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLArithmeticSubtractionEx.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLArithmeticSubtractionEx.h deleted file mode 100644 index 34e6c6334..000000000 --- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLArithmeticSubtractionEx.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __ARM_COMPUTE_CLARITHMETICSUBTRACTIONEX_H__ -#define __ARM_COMPUTE_CLARITHMETICSUBTRACTIONEX_H__ - -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Basic function to run @ref CLArithmeticSubtractionExKernel - * - * @note The tensor data type for the inputs must be U8/S16/F16/F32. - * @note The function performs an arithmetic subtraction between two tensors. - */ -class CLArithmeticSubtractionEx : public ICLSimpleFunction -{ -public: - /** Initialise the kernel's inputs, output and convertion policy. - * - * @param[in, out] input1 An input tensor. Data types supported: U8/S16/F16/F32. - * The input tensor is [in, out] because its TensorInfo might be modified - * inside the kernel in case of broadcasting of dimension 0. - * @param[in, out] input2 An input tensor. Data types supported: same as @p input1. - * The input tensor is [in, out] because its TensorInfo might be modified - * inside the kernel in case of broadcasting of dimension 0. - * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), - * S16/F16/F32. - * @param[in] policy Policy to use to handle overflow. - */ - void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy); - /** Static function to check if given info will lead to a valid configuration of @ref - * CLArithmeticSubtractionEx - * - * @param[in] input1 First tensor input info. Data types supported: U8/S16/F16/F32. - * @param[in] input2 Second tensor input info. Data types supported: U8/S16/F16/F32. - * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8), - * S16/F16/F32. - * @param[in] policy Policy to use to handle overflow. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, - const ITensorInfo *output, ConvertPolicy policy); -}; -} -#endif /* __ARM_COMPUTE_CLARITHMETICSUBTRACTIONEX_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLBatchToSpaceND.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLBatchToSpaceND.h deleted file mode 100644 index d16a0762d..000000000 --- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLBatchToSpaceND.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __ARM_COMPUTE_CLBATCH_TO_SPACE_ND_H__ -#define __ARM_COMPUTE_CLBATCH_TO_SPACE_ND_H__ - -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Basic function to run @ref CLBatchToSpaceNDKernel - * - * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/F16/F32. - * @note The function converts the input tensor to the tensor of the output tensor's type. - */ -class CLBatchToSpaceND : public ICLSimpleFunction -{ -public: - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. - * @param[out] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. - * @param[in] block_size A pointer to an array of integer values specifying block sizes - * for spatial dimension. - */ - void configure(ICLTensor *input, ICLTensor *output, const int32_t *block_size); -}; - -} // namespace arm_compute -#endif /* __ARM_COMPUTE_CLBATCH_TO_SPACE_ND_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h deleted file mode 100644 index 061e34f26..000000000 --- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __ARM_COMPUTE_CLBINARYLOGICALOP_H__ -#define __ARM_COMPUTE_CLBINARYLOGICALOP_H__ - -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" -#include "arm_compute/core/TypesEx.h" - -namespace arm_compute -{ -class ICLTensor; - -class CLBinaryLogicalOp : public ICLSimpleFunction -{ -public: - /** Initialise the function's source and destination. - * - * @param[in] input1 Source tensor1. Data types supported: U8, QASYMM8. - * @param[in] input2 Source tensor2. Data types supported: U8 QASYMM8. - * @param[out] output Output tensor. Data types supported: U8, QASYMM8. - */ - void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, - BinaryLogicalOperation op); -}; - -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLBINARYLOGICALOP_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLCast.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLCast.h deleted file mode 100644 index 56b8408e2..000000000 --- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLCast.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file CLCast.h - * @ingroup COM_AI_RUNTIME - * @brief This file contains arm_compute::CLCast class - */ - -#ifndef __ARM_COMPUTE_CLCAST_H__ -#define __ARM_COMPUTE_CLCAST_H__ - -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - -namespace arm_compute -{ -class ICLTensor; - -/** - * @brief Class to run @ref CLCastKernel. - * This converts the input tensor to the tensor of the output tensor's type. - */ -class CLCast : public ICLSimpleFunction -{ -public: - /** - * @brief Initialise the kernel's input and output - * @param[in, out] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. - * The input tensor is [in, out] because its TensorInfo might be - * modified inside the kernel. - * @param[out] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. - */ - void configure(ICLTensor *input, ICLTensor *output); -}; -} -#endif /* __ARM_COMPUTE_CLCAST_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLComparisonOp.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLComparisonOp.h deleted file mode 100644 index 1b0d70e7f..000000000 --- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLComparisonOp.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __ARM_COMPUTE_CLCOMPARISON_OP_H__ -#define __ARM_COMPUTE_CLCOMPARISON_OP_H__ - -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" -#include "arm_compute/core/TypesEx.h" - -namespace arm_compute -{ -class ICLTensor; - -class CLComparisonOp : public ICLSimpleFunction -{ -public: - /** Initialise the function's source and destination. - * - * @param[in] input1 Source tensor1. Data types supported: - * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. - * @param[in] input2 Source tensor2. Data types supported: - * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. - * @param[out] output Output tensor. Data types supported: Same as @p input. - */ - void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, - const ComparisonOperation &op); -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLCOMPARISON_OP_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLDepthToSpace.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLDepthToSpace.h deleted file mode 100644 index d78a6ada4..000000000 --- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLDepthToSpace.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __ARM_COMPUTE_CLDEPTHTOSPACE_H__ -#define __ARM_COMPUTE_CLDEPTHTOSPACE_H__ - -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Basic function to run @ref CLDepthToSpaceKernel - * - * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/F16/F32. - * @note The function converts the input tensor to the tensor of the output tensor's type. - */ -class CLDepthToSpace : public ICLSimpleFunction -{ -public: - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. - * @param[out] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. - * @param[block_size] block size integer only - */ - void configure(ICLTensor *input, ICLTensor *output, const int32_t block_size); -}; -} // namesace arm_compute - -#endif /* __ARM_COMPUTE_CLDEPTHTOSPACE_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h deleted file mode 100644 index 257772a89..000000000 --- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file CLEmbeddingLookup.h - * @ingroup COM_AI_RUNTIME - * @brief This file contains arm_compute::CLEmbeddingLookup class - */ - -#ifndef __ARM_COMPUTE_CLEMBEDDINGLOOKUP_H__ -#define __ARM_COMPUTE_CLEMBEDDINGLOOKUP_H__ - -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - -#include <vector> - -namespace arm_compute -{ -class ICLTensor; - -/** - * @brief Class to perform EmbeddingLookup operation - */ -class CLEmbeddingLookup : public ICLSimpleFunction -{ -public: - /** - * @brief Set the input and output tensors. - * @param[in] input Source tensor. - * Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 - * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p - * input. - * @param[in] lookups Lookups 1D tensor that values are indices into the first dimension of - * input. - * @return N/A - */ - void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *lookups); -}; -} -#endif /*__ARM_COMPUTE_CLEMBEDDINGLOOKUP_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLExp.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLExp.h deleted file mode 100644 index 2d0fc23a4..000000000 --- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLExp.h +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __ARM_COMPUTE_CLEXP_H__ -#define __ARM_COMPUTE_CLEXP_H__ - -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Basic function to run @ref CLExpKernel */ -class CLExp : public ICLSimpleFunction -{ -public: - /** Set the source, destination of the kernel - * - * @param[in] input Source tensor. Data type supported: F32. - * @param[out] output Destination tensor. Data type supported: F32. - */ - void configure(const ICLTensor *input, ICLTensor *output); -}; -} -#endif /* __ARM_COMPUTE_CLEXP_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLGather.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLGather.h deleted file mode 100644 index f7fd3cda1..000000000 --- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLGather.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file CLGather.h - * @brief This file contains CLGather class - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __ARM_COMPUTE_CLGATHER_H__ -#define __ARM_COMPUTE_CLGATHER_H__ - -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - -namespace arm_compute -{ -class ICLTensor; - -/** - * @brief Class to to run @ref CLGatherKernel. - */ -class CLGather : public ICLSimpleFunction -{ -public: - /** - * @brief Initialise the kernel's inputs, output and convertion policy. - * @param[in] input1 An input tensor. Data types supported: U8/S32/F32. - * @param[in] input2 An indexes tensor. Data types supported: S32. - * @param[out] output The output tensor, Data types supported: same as @p input1. - * @return N/A - */ - void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output); - - /** - * @brief Static function to check if given info will lead to a valid configuration - * of @ref CLGather - * @param[in] input1 An input tensor. Data types supported: U8/S32/F32. - * @param[in] input2 An indexes tensor. Data types supported: S32. - * @param[out] output The output tensor, Data types supported: same as @p input1. - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, - const ITensorInfo *output); -}; -} -#endif /*__ARM_COMPUTE_CLGATHER_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h deleted file mode 100644 index 65aa6cbd5..000000000 --- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file CLHashtableLookup.h - * @ingroup COM_AI_RUNTIME - * @brief This file contains arm_compute::CLHashtableLookup class - */ - -#ifndef __ARM_COMPUTE_CLHASHTABLELOOKUP_H__ -#define __ARM_COMPUTE_CLHASHTABLELOOKUP_H__ - -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - -#include <vector> - -namespace arm_compute -{ -class ICLTensor; - -/** - * @brief Class to perform HashtableLookup operation - */ -class CLHashtableLookup : public ICLSimpleFunction -{ -public: - /** - * @brief Set the input and output tensors. - * @param[in] lookups Lookups 1D tensor that values are indices into the first dimension of - * input. - * @param[in] keys Keys 1D tensor. keys and input pair represent a map. - * Data types supported: S32 - * @param[in] input Source tensor. - * Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 - * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p - * input. - * @param[out] hits Hits 1D tensor. A boolean tensor that indicates whether the lookup hits - * (True) or not (False). Data types supported: U8/QASYMM8 - * @return N/A - */ - void configure(const ICLTensor *lookups, const ICLTensor *keys, const ICLTensor *intput, - ICLTensor *output, ICLTensor *hits); -}; -} -#endif /*__ARM_COMPUTE_CLHASHTABLELOOKUP_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLNeg.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLNeg.h deleted file mode 100644 index 198a0fd4e..000000000 --- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLNeg.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __ARM_COMPUTE_CLNEG_H__ -#define __ARM_COMPUTE_CLNEG_H__ - -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - -namespace arm_compute -{ -class ICLTensor; - -class CLNeg : public ICLSimpleFunction -{ -public: - /** Initialise the function's source and destination. - * - * @param[in] input Source tensor. Data types supported: - * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. - * @param[out] output Output tensor. Data types supported: Same as @p input. - * - */ - void configure(ICLTensor *input, ICLTensor *output); -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLNEG_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLNormalizationLayerEx.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLNormalizationLayerEx.h deleted file mode 100644 index 4077245d5..000000000 --- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLNormalizationLayerEx.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __ARM_COMPUTE_CLNORMALIZATIONLAYEREX_H__ -#define __ARM_COMPUTE_CLNORMALIZATIONLAYEREX_H__ - -#include "arm_compute/runtime/IFunction.h" - -#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" -#include "arm_compute/core/CL/kernels/CLNormalizationLayerExKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Basic function to compute a normalization layer. This function calls the following CL kernels: - * - * -# @ref CLFillBorderKernel - * -# @ref CLNormalizationLayerKernelEx - * - */ -class CLNormalizationLayerEx : public IFunction -{ -public: - /** Default constructor */ - CLNormalizationLayerEx(); - /** Set the input and output tensors. - * - * @param[in, out] input Source tensor. 3 lower dims represent a single input with dimensions - * [width, height, IFM], - * and an optional 4th dimension for batch of inputs. Data types - * supported: F16/F32 (Written to by the border handler) - * @param[out] output Destination tensor. Dimensions, data type and number of channels must - * match the input ones. - * @param[in] norm_info Normalization layer information like the normalization type, - * normalization size and other parameters. - */ - void configure(ICLTensor *input, ICLTensor *output, const NormalizationLayerInfo &norm_info); - /** Static function to check if given info will lead to a valid configuration of @ref - * CLNormalizationLayer - * - * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions - * [width, height, IFM], - * and an optional 4th dimension for batch of inputs. Data types supported: - * F16/F32 - * @param[in] output Destination tensor. Dimensions, data type and number of channels must - * match the input ones. - * @param[in] norm_info Normalization layer information like the normalization type, normalization - * size and other parameters. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, - const NormalizationLayerInfo &norm_info); - - // Inherited methods overridden: - void run() override; - -private: - CLNormalizationLayerExKernel _norm_kernel; /**< Normalization layer kernel to run */ - CLFillBorderKernel _border_handler; /**< Kernel to handle borders */ -}; -} -#endif /* __ARM_COMPUTE_CLNORMALIZATIONLAYEREX_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPReLU.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPReLU.h deleted file mode 100644 index 622a61b5e..000000000 --- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPReLU.h +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __ARM_COMPUTE_CLPRELU_H__ -#define __ARM_COMPUTE_CLPRELU_H__ - -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - -namespace arm_compute -{ -class ICLTensor; - -class CLPReLU : public ICLSimpleFunction -{ -public: - /** Initialise the function's source and destination. - * - * @param[in] input. Data types supported: - * QASYMM8/F16/F32. - * @param[in] alpha. Data types supported: - * QASYMM8/F16/F32. - * @param[out] output Output tensor. Data types supported: Same as @p input. - */ - void configure(ICLTensor *input, ICLTensor *alpha, ICLTensor *output); -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLPRELU_H__*/ diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPadLayerEx.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPadLayerEx.h deleted file mode 100644 index d6ea486d1..000000000 --- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPadLayerEx.h +++ /dev/null @@ -1,47 +0,0 @@ -/* -* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved -* Copyright (c) 2016-2018 ARM Limited. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ -#ifndef __ARM_COMPUTE_CLPADLAYEREX_H__ -#define __ARM_COMPUTE_CLPADLAYEREX_H__ - -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Basic function to run @ref CLPadLayerKernel - * - * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/F16/F32. - * @note The function converts the input tensor to the tensor of the output tensor's type. - */ -class CLPadLayerEx : public ICLSimpleFunction -{ -public: - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data types supported: - * U8/QASYMM8/S16/S32/F16/F32. - * @param[out] output Output tensor. Data types supported: - * U8/QASYMM8/S16/S32/F16/F32. - * @param[in] pad_size Tensor for Padding values in NHWC format shape [n, 2], - * where n is the rank of tensor . Data types supported: S32 - */ - void configure(ICLTensor *input, ICLTensor *output, ICLTensor *pad_size); -}; - -} // namespace arm_compute -#endif /* __ARM_COMPUTE_CLPADLAYEREX_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPermuteEx.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPermuteEx.h deleted file mode 100644 index 9a0cc213c..000000000 --- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPermuteEx.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __ARM_COMPUTE_CLPERMUTEEX_H__ -#define __ARM_COMPUTE_CLPERMUTEEX_H__ - -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Basic function to execute an @ref CLPermuteKernel. */ -class CLPermuteEx : public ICLSimpleFunction -{ -public: - /** Set the input and output tensors. - * - * @param[in] input The input tensor to permute. Data types supported: - * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 - * @param[in] output The output tensor. Data types supported: Same as @p input - * @param[in] perm Permutation vector - */ - void configure(const ICLTensor *input, ICLTensor *output, const PermutationVector &perm); - /** Static function to check if given info will lead to a valid configuration of @ref CLPermute. - * - * @param[in] input First tensor input info. Data types supported: - * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. - * @param[in] output Output tensor info. Data types supported: same as @p input. - * @param[in] perm Permutation vector - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, - const PermutationVector &perm); -}; -} -#endif /*__ARM_COMPUTE_CLPERMUTEEX_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPixelWiseDivision.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPixelWiseDivision.h deleted file mode 100644 index b142d3a2e..000000000 --- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPixelWiseDivision.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file CLPixelWiseDivision.h - * @ingroup COM_AI_RUNTIME - * @brief This file contains arm_compute::CLPixelWiseDivision class - */ -#ifndef __ARM_COMPUTE_CLPIXELWISEDIVISION_H__ -#define __ARM_COMPUTE_CLPIXELWISEDIVISION_H__ - -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - -namespace arm_compute -{ -class ICLTensor; - -/** - * @brief Class to run @ref CLPixelWiseDivisionKernel. - */ -class CLPixelWiseDivision : public ICLSimpleFunction -{ -public: - /** - * @brief Initialise the kernel's inputs, output and convertion policy. - * @param[in, out] input1 An input tensor. Data types supported: U8/S16/F16/F32 - * The input tensor is [in, out] because its TensorInfo might be - * modified inside the kernel in case of broadcasting of dimension 0. - * @param[in, out] input2 An input tensor. Data types supported: same as @p input1. - * The input tensor is [in, out] because its TensorInfo might be - * modified inside the kernel in case of broadcasting of dimension 0. - * @param[out] output The output tensor, Data types supported: same as @p input1. - * Note: U8 requires both inputs to be U8. - * @param[in] scale Scale to apply after multiplication. - * Scale must be positive and its value must be either 1/255 or - * 1/2^n where n is between 0 and 15. - * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate - * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest - * even. - * @return N/A - */ - void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, float scale = 1.f, - ConvertPolicy overflow_policy = ConvertPolicy::WRAP, - RoundingPolicy rounding_policy = RoundingPolicy::TO_ZERO); - - /** - * @brief Static function to check if given info will lead to a valid configuration of @ref - * CLPixelWiseDivision - * @param[in] input1 An input tensor info. Data types supported: U8/S16/F16/F32 - * @param[in] input2 An input tensor info. Data types supported: same as @p input1. - * @param[in] output The output tensor info, Data types supported: same as @p input1. - * Note: U8 requires both inputs to be U8. - * @param[in] scale Scale to apply after multiplication. - * Scale must be positive and its value must be either 1/255 or 1/2^n - * where n is between 0 and 15. - * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate - * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even. - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, - const ITensorInfo *output, float scale = 1.f, - ConvertPolicy overflow_policy = ConvertPolicy::WRAP, - RoundingPolicy rounding_policy = RoundingPolicy::TO_ZERO); -}; -} -#endif /*__ARM_COMPUTE_CLPIXELWISEDIVISION_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h deleted file mode 100644 index e1a6f6ab4..000000000 --- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file CLReduceOperation.h - * @ingroup COM_AI_RUNTIME - * @brief This file contains arm_compute::CLReduceOperation class - */ - -#ifndef __ARM_COMPUTE_CLREDUCEOPERATION_H__ -#define __ARM_COMPUTE_CLREDUCEOPERATION_H__ - -#include "arm_compute/core/CL/kernels/CLReduceOperationKernel.h" -#include "arm_compute/core/TypesEx.h" -#include "arm_compute/runtime/CL/CLTensor.h" -#include "arm_compute/runtime/IFunction.h" - -namespace arm_compute -{ -class ICLTensor; - -/** - * @brief Class to perform ReduceOperation - */ -class CLReduceOperation : public IFunction -{ -public: - /** - * @brief Construct a new ReduceOperation object - */ - CLReduceOperation(); - - /** - * @brief Set the input and output tensors. - * @param[in] input Source tensor. Data types supported: U8/S32/F32 - * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p - * input. - * @param[in] axis Axis along which to reduce. It must be sorted and no duplicates. - * @param[in] op Reduce operation to perform. - * @return N/A - */ - void configure(ICLTensor *input, ICLTensor *output, const std::set<uint32_t> &axis, - ReduceOperation op); - - /** - * @brief Static function to check if given info will lead to a valid configuration of @ref - * CLReduceOperation. - * @param[in] input Source tensor info. Data types supported: U8/S32/F32 - * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p - * input. - * @param[in] axis Axis along which to reduce. It must be sorted and no duplicates. - * @param[in] op Reduce operation to perform. - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, - const std::set<uint32_t> &axis, const ReduceOperation &op); - - /** - * @brief Run the OpenCL kernel for this operation - * @return N/A - */ - void run() override; - -private: - ICLTensor *_input; - ICLTensor *_output; - std::set<uint32_t> _axis; - - std::unique_ptr<CLTensor[]> _interm_tensors{nullptr}; - std::unique_ptr<CLReduceOperationKernel[]> _reduce_kernels{nullptr}; -}; -} -#endif /*__ARM_COMPUTE_CLREDUCEOPERATION_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToBatchND.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToBatchND.h deleted file mode 100644 index 7e2df8986..000000000 --- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToBatchND.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __ARM_COMPUTE_CLSPACE_TO_BATCH_ND_H__ -#define __ARM_COMPUTE_CLSPACE_TO_BATCH_ND_H__ - -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Basic function to run @ref CLSpaceToBatchNDKernel - * - * @note The tensor data type for the inputs must be U8/QASYMM8/S16/F16/S32/F32. - * @note The function divides "spatial" dimensions of the input into a grid of blocks of shape - * block_shape, and interleaves these blocks with the "batch" dimension such that in the output. - */ -class CLSpaceToBatchND : public ICLSimpleFunction -{ -public: - /** Initialise the kernel's input and output. - * - * @note The data layout of input and output must be the same. - * @note The number of dimensions of input and output must be 4, and `spatial` dimensions - * are height and width. - * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/F16/S32/F32. - * Data layout supported: NCHW/NHWC - * @param[in] block_size Tensor of integer values specifying block sizes for spatial - * dimension. - * Data types supported: S32 - * @param[in] padding_size Tensor of integer values specifying padding sizes for spatial - * dimension. - * Data types supported: S32 - * @param[out] output Output tensor. Data types supported: same as @p input. - * Data layout supported: NCHW/NHWC - */ - void configure(const ICLTensor *input, const ICLTensor *block_size, const ICLTensor *padding_size, - ICLTensor *output); -}; - -} // namespace arm_compute -#endif /* __ARM_COMPUTE_CLSPACE_TO_BATCH_ND_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToDepth.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToDepth.h deleted file mode 100644 index 17f762092..000000000 --- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToDepth.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __ARM_COMPUTE_CLSPACETODEPTH_H__ -#define __ARM_COMPUTE_CLSPACETODEPTH_H__ - -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Basic function to run @ref CLSpaceToDepthKernel - * - * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/F16/F32. - * @note The function converts the input tensor to the tensor of the output tensor's type. - */ -class CLSpaceToDepth : public ICLSimpleFunction -{ -public: - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. - * @param[out] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. - * @param[block_size] block size integer only - */ - void configure(ICLTensor *input, ICLTensor *output, const int32_t block_size); -}; - -} // namespace arm_compute -#endif /* __ARM_COMPUTE_CLSPACETODEPTH_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLSquaredDifference.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLSquaredDifference.h deleted file mode 100644 index 3610ba71c..000000000 --- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLSquaredDifference.h +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __ARM_COMPUTE_CLSQUARED_DIFFERENCE_H__ -#define __ARM_COMPUTE_CLSQUARED_DIFFERENCE_H__ - -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - -namespace arm_compute -{ -class ICLTensor; - -class CLSquaredDifference : public ICLSimpleFunction -{ -public: - /** Initialise the function's source and destination. - * - * @param[in] input1 Source tensor1. Data types supported: - * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. - * @param[in] input2 Source tensor2. Data types supported: - * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. - * @param[out] output Output tensor. Data types supported: Same as @p input. - */ - void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output); -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLSQUARED_DIFFERENCE_H__*/ diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLStridedSliceEx.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLStridedSliceEx.h deleted file mode 100644 index 6b26a85c8..000000000 --- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLStridedSliceEx.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file CLStridedSlice.h - * @ingroup COM_AI_RUNTIME - * @brief This file contains arm_compute::CLStridedSlice and arm_compute::CLStridedSliceCPU class - */ - -#ifndef __ARM_COMPUTE_CLSTRIDEDSLICEEX_H__ -#define __ARM_COMPUTE_CLSTRIDEDSLICEEX_H__ - -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - -namespace arm_compute -{ -class ICLTensor; - -/** - * @brief Class to run @ref CLStridedSliceKernel - */ -class CLStridedSliceEx : public ICLSimpleFunction -{ -public: - /** - * @brief Initialise the kernel's inputs and outputs - * @param[in] input Tensor input. Data type supported: - * U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 - * @param[out] output Output tensor. Data type supported: Same as @p input - * @param[in] beginData 'begin' vector of strided slice operation - * @param[in] endData 'end' vector of strided slice operation - * @param[in] stridesData 'strides' vector of strided slice operation - * @param[in] beginMask If the ith bit is set, begin[i] is ignored - * @param[in] endMask If the ith bit is set, end[i] is ignored - * @param[in] shrinkAxisMask If the ith bit is set, the ith specification shrinks the - * dimensionality by 1, taking on the value at index begin[i] - * @return N/A - */ - void configure(const ICLTensor *input, ICLTensor *output, ICLTensor *beginData, - ICLTensor *endData, ICLTensor *stridesData, int32_t beginMask, int32_t endMask, - int32_t shrinkAxisMask); -}; -} -#endif /*__ARM_COMPUTE_CLSTRIDEDSLICEEX_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h deleted file mode 100644 index 5327e016f..000000000 --- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file CLTopKV2.h - * @ingroup COM_AI_RUNTIME - * @brief This file contains arm_compute::CLTopKV2 class - */ -#ifndef __ARM_COMPUTE_CLTOPK_V2_H__ -#define __ARM_COMPUTE_CLTOPK_V2_H__ - -#include "arm_compute/core/CL/kernels/CLTopKV2Kernel.h" - -#include "arm_compute/runtime/IFunction.h" - -namespace arm_compute -{ -class ICLTensor; - -/** - * @brief Class to execute TopKV2 operation. - */ -class CLTopKV2 : public IFunction -{ -public: - /** - * @brief Construct a new CLTopKV2 object - */ - CLTopKV2(); - - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers) - */ - CLTopKV2(const CLTopKV2 &) = delete; - - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers) - */ - CLTopKV2 &operator=(const CLTopKV2 &) = delete; - - /** - * @brief Construct a new CLTopKV2 object by using copy constructor - * @param[in] CLTopKV2 object to move - */ - CLTopKV2(CLTopKV2 &&) = default; - - /** - * @brief Assign a CLTopKV2 object. - * @param[in] CLTopKV2 object to assign. This object will be moved. - */ - CLTopKV2 &operator=(CLTopKV2 &&) = default; - - /** - * @brief Initialise the kernel's inputs and outputs. - * @param[in] input Input image. Data types supported: U8/S16/F32. - * @param[in] k The value of `k`. - * @param[out] values Top k values. Data types supported: S32 if input type is U8/S16, F32 if - * input type is F32. - * @param[out] indices Indices related to top k values. Data types supported: S32 if input type - * is U8/S16, F32 if input type is F32. - * @return N/A - */ - void configure(ICLTensor *input, int k, ICLTensor *values, ICLTensor *indices, - int total_bits = 32, int bits = 4); - - /** - * @brief Run the kernels contained in the function - * Depending on the value of the following environment variables it works differently: - * - If the value of environment variable "ACL_TOPKV2" == "GPU_SINGLE", - * quick sort on GPU is used. - * - If the value of environment variable "ACL_TOPKV2" == ""GPU"", - * radix sort on GPU is used. - * - For other value, TopKV2 runs on CPU - * @return N/A - */ - void run() override; - -private: - void run_on_cpu(); - void run_on_gpu(); - void run_on_gpu_single_quicksort(); - - uint32_t _k; - uint32_t _total_bits; - uint32_t _bits; - uint32_t _radix; - uint32_t _hist_buf_size; - uint32_t _glob_sum_buf_size; - uint32_t _n; - - ICLTensor *_input; - ICLTensor *_values; - ICLTensor *_indices; - - cl::Buffer _qs_idx_buf; - cl::Buffer _qs_temp_buf; - cl::Buffer _hist_buf; - cl::Buffer _glob_sum_buf; - cl::Buffer _temp_buf; - cl::Buffer _first_negative_idx_buf; - cl::Buffer _in_key_buf; - cl::Buffer _out_key_buf; - cl::Buffer _in_ind_buf; - cl::Buffer _out_ind_buf; - - cl::Buffer *_p_in_key_buf; - cl::Buffer *_p_out_key_buf; - cl::Buffer *_p_in_ind_buf; - cl::Buffer *_p_out_ind_buf; - - CLTopKV2Single _qs_kernel; - CLTopKV2Init _init_kernel; - CLRadixSortHistogram _hist_kernel; - CLRadixSortScanHistogram _scan_hist_kernel; - CLRadixSortGlobalScanHistogram _glob_scan_hist_kernel; - CLRadixSortPasteHistogram _paste_hist_kernel; - CLRadixSortReorder _reorder_kernel; - CLTopKV2FindFirstNegative _find_first_negative_kernel; - CLTopKV2ReorderNegatives _reorder_negatives_kernel; - CLTopKV2Store _store_kernel; -}; -} -#endif // __ARM_COMPUTE_CLTOPK_V2_H__ diff --git a/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NENormalizationLayerEx.h b/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NENormalizationLayerEx.h deleted file mode 100644 index fa7408ecd..000000000 --- a/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NENormalizationLayerEx.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __ARM_COMPUTE_NENORMALIZATIONLAYEREX_H__ -#define __ARM_COMPUTE_NENORMALIZATIONLAYEREX_H__ - -#include "arm_compute/runtime/IFunction.h" - -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" -#include "arm_compute/core/NEON/kernels/NENormalizationLayerExKernel.h" -#include "arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h" -#include "arm_compute/runtime/MemoryGroup.h" - -namespace arm_compute -{ -class ITensor; - -/** Basic function to compute a normalization layer. This function calls the following NEON kernels: - * - * -# @ref NEPixelWiseMultiplicationKernel - * -# @ref NEFillBorderKernel - * -# @ref NENormalizationLayerKernelEx - * - */ -class NENormalizationLayerEx : public IFunction -{ -public: - /** Default constructor */ - NENormalizationLayerEx(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - /** Set the input and output tensors. - * - * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions - * [width, height, IFM], - * and an optional 4th dimension for batch of inputs. Data type supported: - * F16/F32 - * @param[out] output Destination with the same dimensions, data type and number of channels of - * @p input - * @param[in] norm_info Normalization layer information like the normalization type, - * normalization size and other parameters. - */ - void configure(const ITensor *input, ITensor *output, const NormalizationLayerInfo &norm_info); - /** Static function to check if given info will lead to a valid configuration of @ref - * NENormalizationLayer - * - * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions - * [width, height, IFM], - * and an optional 4th dimension for batch of inputs. Data type supported: - * F16/F32 - * @param[in] output Destination with the same dimensions, data type and number of channels of - * @p input - * @param[in] norm_info Normalization layer information like the normalization type, normalization - * size and other parameters. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, - const NormalizationLayerInfo &norm_info); - - // Inherited methods overridden: - void run() override; - -private: - MemoryGroup _memory_group; /**< Function memory group */ - NENormalizationLayerExKernel _norm_kernel; /**< Normalization layer kernel */ - NEPixelWiseMultiplicationKernel _multiply_kernel; /**< Pixel multiplication kernel */ - NEFillBorderKernel _border_handler; /**< Kernel to handle borders */ - Tensor _input_squared; /**< The intermediate buffer which stores results of squaring input */ -}; -} -#endif /* __ARM_COMPUTE_NENORMALIZATIONLAYEREX_H__ */ diff --git a/libs/ARMComputeEx/resolve_includes.py b/libs/ARMComputeEx/resolve_includes.py deleted file mode 100644 index b3e252892..000000000 --- a/libs/ARMComputeEx/resolve_includes.py +++ /dev/null @@ -1,102 +0,0 @@ -# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved -# Copyright (c) 2016, 2017 ARM Limited. -# -# SPDX-License-Identifier: MIT -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to -# deal in the Software without restriction, including without limitation the -# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -# sell copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. -import collections -import os.path -import re -import subprocess -import glob - - -def resolve_includes(target, source): - # File collection - FileEntry = collections.namedtuple('FileEntry', 'target_name file_contents') - - # Include pattern - pattern = re.compile("#include \"(.*)\"") - - # Get file contents - files = [] - for i in range(len(source)): - src = source[i] - dst = target[i] - f = open(src) - cts = f.read() - f.close() - contents = cts.splitlines() - entry = FileEntry(target_name=dst, file_contents=contents) - files.append((os.path.basename(src), entry)) - - # Create dictionary of tupled list - files_dict = dict(files) - - # Check for includes (can only be files in the same folder) - final_files = [] - for file in files: - done = False - tmp_file = file[1].file_contents - print(file[1].target_name) - while not done: - file_count = 0 - updated_file = [] - for line in tmp_file: - found = pattern.search(line) - if found: - include_file = found.group(1) - data = files_dict[include_file].file_contents - updated_file.extend(data) - else: - updated_file.append(line) - file_count += 1 - - # Check if all include are replaced. - if file_count == len(tmp_file): - done = True - - # Update temp file - tmp_file = updated_file - - # Append and prepend string literal identifiers and add expanded file to final list - tmp_file.insert(0, "R\"(\n") - tmp_file.append("\n)\"") - entry = FileEntry(target_name=file[1].target_name, file_contents=tmp_file) - final_files.append((file[0], entry)) - - # Write output files - for file in final_files: - with open(file[1].target_name, 'w+') as out_file: - out_file.write("\n".join(file[1].file_contents)) - - -# Generate embed files -cl_files = glob.glob('src/core/CL/cl_kernels/*.cl') -cl_files += glob.glob('src/core/CL/cl_kernels/*.h') - -# DEBUG: print cl files -print("cl_files:") -print(cl_files) - -embed_files = [f + "embed" for f in cl_files] -print("embed_files:") -print(embed_files) - -resolve_includes(embed_files, cl_files) diff --git a/libs/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp b/libs/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp deleted file mode 100644 index 05ecdeb22..000000000 --- a/libs/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp +++ /dev/null @@ -1,409 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Utils.h" - -#include <algorithm> -#include <fstream> -#include <iostream> -#include <utility> -#include <vector> - -using namespace arm_compute; - -const std::map<std::string, std::string> CLKernelLibraryEx::_kernel_program_map = { - // ARMComputeEx kernels - {"activation_layer_ex", "activation_layer_ex.cl"}, - {"arg_op", "arg_operation.cl"}, - {"arithmetic_sub_ex", "arithmetic_op_ex.cl"}, - {"arithmetic_add_qasymm8", "arithmetic_op_quantized.cl"}, - {"batch_to_space_nd", "batch_to_space_nd.cl"}, - {"binary_logical_op", "binary_logical_op.cl"}, - {"cast", "cast.cl"}, - {"cast_qasymm_in", "cast.cl"}, - {"cast_qasymm_out", "cast.cl"}, - {"comparison_op", "comparison_op.cl"}, - {"comparison_op_qasymm8", "comparison_op_quantized.cl"}, - {"depth_to_space", "depth_to_space.cl"}, - {"embedding_lookup", "embedding_lookup.cl"}, - {"exp_layer", "exp.cl"}, - {"gather", "gather.cl"}, - {"gather_1d", "gather.cl"}, - {"gather_1d_out", "gather.cl"}, - {"hashtable_lookup", "hashtable_lookup.cl"}, - {"neg_tensor", "neg_tensor.cl"}, - {"pad", "pad.cl"}, - {"permute_generic", "permute_ex.cl"}, - {"pixelwise_mul_qasymm8", "pixelwise_mul_quantized.cl"}, - {"pixelwise_div_float", "pixelwise_div_float.cl"}, - {"pixelwise_div_int", "pixelwise_div_int.cl"}, - {"prelu", "prelu.cl"}, - {"prelu_qasymm8", "prelu_quantized.cl"}, - {"reduce_min_max", "reduce_operation.cl"}, - {"reduce_sum_mean", "reduce_operation.cl"}, - {"squared_difference", "squared_difference.cl"}, - {"strided_slice_ex", "strided_slice_ex.cl"}, - {"topkv2_init", "topkv2.cl"}, - {"topkv2_find_first_negative", "topkv2.cl"}, - {"topkv2_reorder_negatives", "topkv2.cl"}, - {"topkv2_store", "topkv2.cl"}, - {"radixsort_histogram", "topkv2_radixsort.cl"}, - {"radixsort_scanhistograms", "topkv2_radixsort.cl"}, - {"radixsort_pastehistograms", "topkv2_radixsort.cl"}, - {"radixsort_reorder", "topkv2_radixsort.cl"}, - {"topkv2_quicksort", "topkv2_quicksort.cl"}, - {"space_to_batch_4d_nchw", "space_to_batch.cl"}, - {"space_to_batch_4d_nhwc", "space_to_batch.cl"}, - {"space_to_depth", "space_to_depth.cl"}, -}; - -const std::map<std::string, std::string> CLKernelLibraryEx::_program_source_map = { -#ifdef EMBEDDED_KERNELS - { - "activation_layer_ex.cl", -#include "./cl_kernels/activation_layer_ex.clembed" - }, - { - "arg_operation.cl", -#include "./cl_kernels/arg_operation.clembed" - }, - { - "arithmetic_op_ex.cl", -#include "./cl_kernels/arithmetic_op_ex.clembed" - }, - { - "batch_to_space_nd.cl", -#include "./cl_kernels/batch_to_space_nd.clembed" - }, - { - "cast.cl", -#include "./cl_kernels/cast.clembed" - }, - { - "comparison_op.cl", -#include "./cl_kernels/comparison_op.clembed" - }, - { - "comparison_op_quantized.cl", -#include "./cl_kernels/comparison_op_quantized.clembed" - }, - { - "embedding_lookup.cl", -#include "./cl_kernels/embedding_lookup.clembed" - }, - { - "depth_to_space.cl", -#include "./cl_kernels/depth_to_space.clembed" - }, - { - "exp.cl", -#include "./cl_kernels/exp.clembed" - }, - { - "gather.cl", -#include "./cl_kernels/gather.clembed" - }, - { - "hashtable_lookup.cl", -#include "./cl_kernels/hashtable_lookup.clembed" - }, - { - "helpers.h", -#include "./cl_kernels/helpers.hembed" - }, - { - "helpers_asymm.h", -#include "./cl_kernels/helpers_asymm.hembed" - }, - { - "binary_logical_op.cl", -#include "./cl_kernels/binary_logical_op.clembed" - }, - { - "neg_tensor.cl", -#include "./cl_kernels/neg_tensor.clembed" - }, - { - "pad.cl", -#include "./cl_kernels/pad.clembed" - }, - { - "pixelwise_div_float.cl", -#include "./cl_kernels/pixelwise_div_float.clembed" - }, - { - "pixelwise_div_int.cl", -#include "./cl_kernels/pixelwise_div_int.clembed" - }, - { - "prelu.cl", -#include "./cl_kernels/prelu.clembed" - }, - { - "prelu_quantized.cl", -#include "./cl_kernels/prelu_quantized.clembed" - }, - { - "reduce_operation.cl", -#include "./cl_kernels/reduce_operation.clembed" - }, - { - "space_to_batch.cl", -#include "./cl_kernels/space_to_batch.clembed" - }, - { - "space_to_depth.cl", -#include "./cl_kernels/space_to_depth.clembed" - }, - { - "squared_difference.cl", -#include "./cl_kernels/squared_difference.clembed" - }, - { - "strided_slice_ex.cl", -#include "./cl_kernels/strided_slice_ex.clembed" - }, - { - "topkv2.cl", -#include "./cl_kernels/topkv2.clembed" - }, - { - "topkv2_radixsort.cl", -#include "./cl_kernels/topkv2_radixsort.clembed" - }, - { - "topkv2_quicksort.cl", -#include "./cl_kernels/topkv2_quicksort.clembed" - }, - { - "permute_ex.cl", -#include "./cl_kernels/permute_ex.clembed" - }, - -#endif /* EMBEDDED_KERNELS */ -}; - -CLKernelLibraryEx::CLKernelLibraryEx() - : _context(), _device(), _kernel_path("."), _programs_map(), _built_programs_map() -{ - opencl_is_available(); // Make sure the OpenCL symbols are initialised *before* the - // CLKernelLibraryEx is built -} - -CLKernelLibraryEx &CLKernelLibraryEx::get() -{ - static CLKernelLibraryEx _kernel_library; - return _kernel_library; -} - -Kernel CLKernelLibraryEx::create_kernel(const std::string &kernel_name, - const StringSet &build_options_set) const -{ - // Find which program contains the kernel - auto kernel_program_it = _kernel_program_map.find(kernel_name); - - if (_kernel_program_map.end() == kernel_program_it) - { - ARM_COMPUTE_ERROR("Kernel %s not found in the CLKernelLibrary", kernel_name.c_str()); - } - std::string concat_str; - - if (fp16_supported()) - { - concat_str += " -DARM_COMPUTE_OPENCL_FP16_ENABLED=1 "; - } - - if (get_cl_version(_device) == CLVersion::CL20) - { - concat_str += " -cl-std=CL2.0 "; - } - else if (arm_non_uniform_workgroup_supported(_device)) - { - concat_str += " -cl-arm-non-uniform-work-group-size "; - } - else - { - ARM_COMPUTE_ERROR("Non uniform workgroup size is not supported!!"); - } - - // Check if the program has been built before with same build options. - const std::string program_name = kernel_program_it->second; - const std::string build_options = stringify_set(build_options_set) + concat_str; - - const std::string built_program_name = program_name + "_" + build_options; - auto built_program_it = _built_programs_map.find(built_program_name); - - cl::Program cl_program; - - if (_built_programs_map.end() != built_program_it) - { - // If program has been built, retrieve to create kernel from it - cl_program = built_program_it->second; - } - else - { - // Get program - Program program = load_program(program_name); - - // Build program - cl_program = program.build(build_options); - - // Add built program to internal map - _built_programs_map.emplace(built_program_name, cl_program); - } - - // Create and return kernel - return Kernel(kernel_name, cl_program); -} - -void CLKernelLibraryEx::add_built_program(const std::string &built_program_name, - cl::Program program) -{ - _built_programs_map.emplace(built_program_name, program); -} - -bool CLKernelLibraryEx::fp16_supported() const { return ::fp16_supported(_device); } - -bool CLKernelLibraryEx::int64_base_atomics_supported() const -{ - return device_supports_extension(_device, "cl_khr_int64_base_atomics"); -} - -const Program &CLKernelLibraryEx::load_program(const std::string &program_name) const -{ - const auto program_it = _programs_map.find(program_name); - - if (program_it != _programs_map.end()) - { - return program_it->second; - } - - Program program; - -#ifdef EMBEDDED_KERNELS - const auto program_source_it = _program_source_map.find(program_name); - - if (_program_source_map.end() == program_source_it) - { - ARM_COMPUTE_ERROR("Embedded program for %s does not exist.", program_name.c_str()); - } - - program = Program(_context, program_name, program_source_it->second); -#else /* EMBEDDED_KERNELS */ - // Check for binary - std::string source_name = _kernel_path + program_name; - std::string binary_name = source_name + "bin"; - - if (std::ifstream(binary_name).is_open()) - { - const std::string program_binary = read_file(binary_name, true); - program = Program(_context, _device, program_name, - std::vector<unsigned char>(program_binary.begin(), program_binary.end())); - } - else if (std::ifstream(source_name).is_open()) - { - program = Program(_context, program_name, read_file(source_name, false)); - } - else - { - ARM_COMPUTE_ERROR("Kernel file %s does not exist.", source_name.c_str()); - } -#endif /* EMBEDDED_KERNELS */ - - // Insert program to program map - const auto new_program = _programs_map.emplace(program_name, std::move(program)); - - return new_program.first->second; -} - -std::string CLKernelLibraryEx::stringify_set(const StringSet &s) const -{ - std::string concat_set; - -#ifndef EMBEDDED_KERNELS - concat_set += "-I" + _kernel_path + " "; -#endif /* EMBEDDED_KERNELS */ - - // Concatenate set - for (const auto &el : s) - { - concat_set += " " + el; - } - - return concat_set; -} - -std::string CLKernelLibraryEx::get_program_source(const std::string &program_name) -{ - const auto program_source_it = _program_source_map.find(program_name); - - if (program_source_it == _program_source_map.end()) - { - ARM_COMPUTE_ERROR("Embedded program for %s does not exist.", program_name.c_str()); - } - - return program_source_it->second; -} - -size_t CLKernelLibraryEx::max_local_workgroup_size(const cl::Kernel &kernel) const -{ - size_t result; - - size_t err = kernel.getWorkGroupInfo(_device, CL_KERNEL_WORK_GROUP_SIZE, &result); - ARM_COMPUTE_ERROR_ON_MSG( - err != 0, - "clGetKernelWorkGroupInfo failed to return the maximum workgroup size for the kernel"); - ARM_COMPUTE_UNUSED(err); - - return result; -} - -cl::NDRange CLKernelLibraryEx::default_ndrange() const -{ - // GPUTarget _target = get_target_from_device(_device); - cl::Device device = cl::Device::getDefault(); - GPUTarget _target = get_target_from_device(device); - cl::NDRange default_range; - - switch (_target) - { - case GPUTarget::MIDGARD: - case GPUTarget::T600: - case GPUTarget::T700: - case GPUTarget::T800: - default_range = cl::NDRange(128u, 1); - break; - default: - default_range = cl::NullRange; - } - - return default_range; -} - -std::string CLKernelLibraryEx::get_device_version() { return _device.getInfo<CL_DEVICE_VERSION>(); } diff --git a/libs/ARMComputeEx/src/core/CL/OpenCLEx.cpp b/libs/ARMComputeEx/src/core/CL/OpenCLEx.cpp deleted file mode 100644 index cbda169fb..000000000 --- a/libs/ARMComputeEx/src/core/CL/OpenCLEx.cpp +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/core/CL/OpenCLEx.h" - -#include <dlfcn.h> -#include <iostream> - -namespace arm_compute -{ -CLSymbolsEx &CLSymbolsEx::get() -{ - static CLSymbolsEx symbols; - return symbols; -} - -bool CLSymbolsEx::load_default() -{ - static const std::vector<std::string> libraries{"libOpenCL.so", "libGLES_mali.so", "libmali.so"}; - - if (_loaded.first) - { - return _loaded.second; - } - - // Indicate that default loading has been tried - _loaded.first = true; - - for (const auto &lib : libraries) - { - if (load(lib)) - { - return true; - } - } - - std::cerr << "Couldn't find any OpenCL library.\n"; - return false; -} - -bool CLSymbolsEx::load(const std::string &library) -{ - void *handle = dlopen(library.c_str(), RTLD_LAZY | RTLD_LOCAL); - - if (handle == nullptr) - { - std::cerr << "Can't load " << library << ": " << dlerror() << "\n"; - // Set status of loading to failed - _loaded.second = false; - return false; - } - -#define LOAD_FUNCTION_PTR(func_name, handle) \ - func_name##_ptr = reinterpret_cast<decltype(func_name) *>(dlsym(handle, #func_name)); - - LOAD_FUNCTION_PTR(clGetEventInfo, handle); - LOAD_FUNCTION_PTR(clSetEventCallback, handle); - -#undef LOAD_FUNCTION_PTR - - // Don't call dlclose(handle) or all the symbols will be unloaded ! - - // Disable default loading and set status to successful - _loaded = std::make_pair(true, true); - - return true; -} - -} // namespace arm_compute - -cl_int clGetEventInfo(cl_event event, cl_event_info param_name, size_t param_value_size, - void *param_value, size_t *param_value_size_ret) -{ - arm_compute::CLSymbolsEx::get().load_default(); - auto func = arm_compute::CLSymbolsEx::get().clGetEventInfo_ptr; - if (func != nullptr) - { - return func(event, param_name, param_value_size, param_value, param_value_size_ret); - } - else - { - return CL_OUT_OF_RESOURCES; - } -} - -cl_int clSetEventCallback(cl_event event, cl_int command_exec_callback_type, - void(CL_CALLBACK *pfn_ev_notify)(cl_event ev, cl_int ev_cmd_exec_status, - void *user_data), - void *user_data) -{ - arm_compute::CLSymbolsEx::get().load_default(); - auto func = arm_compute::CLSymbolsEx::get().clSetEventCallback_ptr; - if (func != nullptr) - { - return func(event, command_exec_callback_type, pfn_ev_notify, user_data); - } - else - { - return CL_OUT_OF_RESOURCES; - } -} diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/activation_layer_ex.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/activation_layer_ex.cl deleted file mode 100644 index f54c7bde3..000000000 --- a/libs/ARMComputeEx/src/core/CL/cl_kernels/activation_layer_ex.cl +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "helpers.h" - -#define TYPE VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) - -#define CONST_ONE 1.f -#define DIV_OP(a, b) ((a) / (b)) -#define RSQRT_OP(a) DIV_OP(CONST_ONE, sqrt((a))) - -// Inverse Square-root Activation -inline TYPE rsqrt_op(TYPE x) -{ - return RSQRT_OP(x); -} - -#define ACTIVATION_OP2(op, x) op##_op(x) -#define ACTIVATION_OP(op, x) ACTIVATION_OP2(op, x) - -#if defined(ACT) - -/** This performs an activation function floating point inputs. - * - * @note In order to perform the activation function "in-place", the pre-processor -DIN_PLACE must be passed at compile time - * - * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short - * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16 - * @note Activation function should be given as a preprocessor argument using -DACT=name. e.g. -DACT=TANH - * @note A, B variables required by some activation functions are set using -DA_VAL= and -DB_VAL= respectively. - * - * @param[in] input_ptr Pointer to the source image. Supported data types: F16/F32 - * @param[in] input_stride_x Stride of the source image in X dimension (in bytes) - * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes) - * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image - * @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p input_ptr - * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image - */ -__kernel void activation_layer_ex( - TENSOR3D_DECLARATION(input) -#ifndef IN_PLACE - , - TENSOR3D_DECLARATION(output) -#endif /* not IN_PLACE */ -) -{ - // Get pixels pointer - Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input); -#ifdef IN_PLACE - Tensor3D output = input; -#else /* IN_PLACE */ - Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output); -#endif /* IN_PLACE */ - - // Load data - TYPE data = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input.ptr); - - // Perform activation - data = ACTIVATION_OP(ACT, data); - - // Store result - VSTORE(VEC_SIZE) - (data, 0, (__global DATA_TYPE *)output.ptr); -} - -#endif /* defined(ACT) */ diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/arg_operation.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/arg_operation.cl deleted file mode 100644 index 9a6921d7c..000000000 --- a/libs/ARMComputeEx/src/core/CL/cl_kernels/arg_operation.cl +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "helpers.h" - -#if defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(OP_CODE) -/** Perform arg_max/arg_min - * - * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short - * @attention Output tensor depth should be given as a preprocessor argument using -DDEPTH_OUT=size. e.g. -DDEPTH_OUT=16 - * @attention Operation type(code) specifying which operation to perform should be passed as preprocessor argument using - * -DOP_CODE = number. e.g. -DOP_CODE=1 - * - * @param[in] input_ptr Pointer to the source image. Supported data types: U8/S8/U16/S16/F16/U32/S32/F32 - * @param[in] input_stride_x Stride of the source image in X dimension (in bytes) - * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes) - * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image - * @param[in] input_stride_w Stride of the source tensor in W dimension (in bytes) - * @param[in] input_step_w output_stride_w * number of elements along W processed per workitem(in bytes) - * @param[out] output_ptr Pointer to the destination image. Supported data types: U32 - * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] output_stride_w Stride of the source tensor in W dimension (in bytes) - * @param[in] output_step_w output_stride_w * number of elements along W processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image - * @param[in] axis Axis through which reduction occurs for max value index - * @param[in] dim Dimension across the axis to be reduced. - */ - -__kernel void arg_op(TENSOR4D_DECLARATION(input), - TENSOR4D_DECLARATION(output), - const int axis, - const int dim) -{ - Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT(input, 0); - Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH_OUT); - - int indices[4] = - { - get_global_id(0), - get_global_id(1), - get_global_id(2) % DEPTH_OUT, - get_global_id(2) / DEPTH_OUT, - }; - - DATA_TYPE value = *((__global DATA_TYPE *)tensor4D_offset(&in, indices[0], indices[1], indices[2], indices[3])); - DATA_TYPE tval = value; - int idx = 0; - for(int i = 1; i < dim; ++i) - { - indices[axis] = i; - - #if OP_CODE == 1 // ArgMax - value = max(value, *((__global DATA_TYPE *) - tensor4D_offset(&in, indices[0], indices[1], indices[2], indices[3]))); - #elif OP_CODE == 2 //ArgMin - value = min(value, *((__global DATA_TYPE *) - tensor4D_offset(&in, indices[0], indices[1], indices[2], indices[3]))); - #else - return; - - #endif - - if(tval!=value) - { - idx = indices[axis]; - tval = value; - } - } - - *((__global uint *)out.ptr) = idx; -} -#endif // defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(OP_CODE) diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/arithmetic_op_ex.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/arithmetic_op_ex.cl deleted file mode 100644 index 2ed698951..000000000 --- a/libs/ARMComputeEx/src/core/CL/cl_kernels/arithmetic_op_ex.cl +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "helpers.h" - -#ifdef SATURATE -#define SUB(x, y) sub_sat((x), (y)) -#else /* SATURATE */ -#define SUB(x, y) (x) - (y) -#endif /* SATURATE */ - -/** This function subtracts one tensors from another. - * - * @attention The input and output data_types need to be passed at compile time using -DDATA_TYPE_IN1, -DDATA_TYPE_IN2 and -DDATA_TYPE_OUT: - * e.g. -DDATA_TYPE_IN1=uchar -DDATA_TYPE_IN2=uchar -DDATA_TYPE_OUT=short - * @attention To perform saturating operation -DSATURATE has to be passed to the compiler otherwise wrapping policy will be used. - * - * @param[in] in1_ptr Pointer to the source tensor. Supported data types: U8, S16 - * @param[in] in1_stride_x Stride of the source tensor in X dimension (in bytes) - * @param[in] in1_step_x in1_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] in1_stride_y Stride of the source tensor in Y dimension (in bytes) - * @param[in] in1_step_y in1_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] in1_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] in1_step_z in1_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] in1_offset_first_element_in_bytes The offset of the first element in the source tensor - * @param[in] in2_ptr Pointer to the source tensor. Supported data types: U8, S16 - * @param[in] in2_stride_x Stride of the source tensor in X dimension (in bytes) - * @param[in] in2_step_x in2_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] in2_stride_y Stride of the source tensor in Y dimension (in bytes) - * @param[in] in2_step_y in2_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] in2_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] in2_step_z in2_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] in2_offset_first_element_in_bytes The offset of the first element in the source tensor - * @param[out] out_ptr Pointer to the destination tensor. Supported data types: U8, S16 - * @param[in] out_stride_x Stride of the destination tensor in X dimension (in bytes) - * @param[in] out_step_x out_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] out_stride_y Stride of the destination tensor in Y dimension (in bytes) - * @param[in] out_step_y out_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] out_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] out_step_z out_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] out_offset_first_element_in_bytes The offset of the first element in the destination tensor - */ -__kernel void arithmetic_sub_ex( - TENSOR3D_DECLARATION(in1), - TENSOR3D_DECLARATION(in2), - TENSOR3D_DECLARATION(out)) -{ - // Get pixels pointer - Tensor3D in1 = CONVERT_TO_TENSOR3D_STRUCT(in1); - Tensor3D in2 = CONVERT_TO_TENSOR3D_STRUCT(in2); - Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(out); - - // Load values - VEC_DATA_TYPE(DATA_TYPE_OUT, 16) - in_a = CONVERT(vload16(0, (__global DATA_TYPE_IN1 *)in1.ptr), VEC_DATA_TYPE(DATA_TYPE_OUT, 16)); - VEC_DATA_TYPE(DATA_TYPE_OUT, 16) - in_b = CONVERT(vload16(0, (__global DATA_TYPE_IN2 *)in2.ptr), VEC_DATA_TYPE(DATA_TYPE_OUT, 16)); - - // Calculate and store result - vstore16(SUB(in_a, in_b), 0, (__global DATA_TYPE_OUT *)out.ptr); -} diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/arithmetic_op_quantized.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/arithmetic_op_quantized.cl deleted file mode 100644 index 5cd0a4309..000000000 --- a/libs/ARMComputeEx/src/core/CL/cl_kernels/arithmetic_op_quantized.cl +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016, 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "helpers_asymm.h" - -#ifdef SATURATE -#define ADD(x, y) add_sat((x), (y)) -#define SUB(x, y) sub_sat((x), (y)) -#else /* SATURATE */ -#define ADD(x, y) (x) + (y) -#define SUB(x, y) (x) - (y) -#endif /* SATURATE */ - -/** Performs a pixelwise addition used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8 - * - * The following computations will be performed: - * - * -# Add offset terms to inputs - -# Get scaled value of two inputs - * -# Add inputs - * -# Add offset terms to final result - * -# Multiply each entry of result by result_mult_int - * -# Shift the int32 accumulator by result_shift - * -# Clamp the resulting int32 values to the [0..255] range and cast to QASYMM8. - * - * @attention The inputs and output data types need to be passed at compile time using -DDATA_TYPE_IN1, -DDATA_TYPE_IN2 and -DDATA_TYPE_OUT: - * e.g. -DDATA_TYPE_IN1=uchar -DDATA_TYPE_IN2=uchar -DDATA_TYPE_OUT=uchar - * @attention The number of bits to shift left of input tensors must be passed at compile time using -DLEFT_SHIFT - * @attention The offset, scalar scale factor and number of bits to shift right of input tensors must be passed at compile time using -DIN1_OFFSET, -RIN1_MULT_INT, -DIN1_SHIFT, -DIN2_OFFSET, -RIN2_MULT_INT and -DIN2_SHIFT - * @attention The offset, scalar scale factor and number of bits to shift right of output tensor must be passed at compile time using -DRESULT_OFFSET, -RESULT_MULT_INT and -DRESULT_SHIFT - * - * @attention The input and output data_types need to be passed at compile time using -DDATA_TYPE_IN1, -DDATA_TYPE_IN2 and -DDATA_TYPE_OUT: - * e.g. -DDATA_TYPE_IN1=uchar -DDATA_TYPE_IN2=uchar -DDATA_TYPE_OUT=uchar - * @attention The inputs and output scale information of qasymm8 need to be passed at compile time using -DSCALE_IN1, -DSCALE_IN2 and -DSCALE_OUT: - * e.g. -DSCALE_IN1=1.f -DSCALE_IN2=1.f -DSCALE_OUT=2.f - * @attention The inputs and output scale offset need to be passed at compile time using -DOFFSET_IN1, -DOFFSET_IN2 and -DOFFSET_OUT: - * e.g. -DOFFSET_IN1=0 -DOFFSET_IN2=0 -DOFFSET_OUT=0 - * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16 - * @attention To perform saturating operation -DSATURATE has to be passed to the compiler otherwise wrapping policy will be used. - * - * @param[in] in1_ptr Pointer to the source tensor. Supported data types: QASYMM8 - * @param[in] in1_stride_x Stride of the source tensor in X dimension (in bytes) - * @param[in] in1_step_x in1_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] in1_stride_y Stride of the source tensor in Y dimension (in bytes) - * @param[in] in1_step_y in1_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] in1_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] in1_step_z in1_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] in1_offset_first_element_in_bytes The offset of the first element in the source tensor - * @param[in] in2_ptr Pointer to the source tensor. Supported data types: QASYMM8 - * @param[in] in2_stride_x Stride of the source tensor in X dimension (in bytes) - * @param[in] in2_step_x in2_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] in2_stride_y Stride of the source tensor in Y dimension (in bytes) - * @param[in] in2_step_y in2_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] in2_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] in2_step_z in2_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] in2_offset_first_element_in_bytes The offset of the first element in the source tensor - * @param[out] out_ptr Pointer to the destination tensor. Supported data types: QASYMM8 - * @param[in] out_stride_x Stride of the destination tensor in X dimension (in bytes) - * @param[in] out_step_x out_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] out_stride_y Stride of the destination tensor in Y dimension (in bytes) - * @param[in] out_step_y out_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] out_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] out_step_z out_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] out_offset_first_element_in_bytes The offset of the first element in the destination tensor - */ -__kernel void arithmetic_add_qasymm8( - TENSOR3D_DECLARATION(in1), - TENSOR3D_DECLARATION(in2), - TENSOR3D_DECLARATION(out)) -{ - // Get pixels pointer - Tensor3D in1 = CONVERT_TO_TENSOR3D_STRUCT(in1); - Tensor3D in2 = CONVERT_TO_TENSOR3D_STRUCT(in2); - Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(out); - - // Load data - VEC_DATA_TYPE(int, 16) - in1_data = CONVERT(vload16(0, (__global DATA_TYPE_IN1 *)in1.ptr), VEC_DATA_TYPE(int, 16)); - VEC_DATA_TYPE(int, 16) - in2_data = CONVERT(vload16(0, (__global DATA_TYPE_IN2 *)in2.ptr), VEC_DATA_TYPE(int, 16)); - - // Get scaled value of two inputs - VEC_DATA_TYPE(int, 16) in1_val = in1_data + (VEC_DATA_TYPE(int, 16))(IN1_OFFSET); - VEC_DATA_TYPE(int, 16) in2_val = in2_data + (VEC_DATA_TYPE(int, 16))(IN2_OFFSET); - - VEC_DATA_TYPE(int, 16) left_shift = (VEC_DATA_TYPE(int, 16))1 << (VEC_DATA_TYPE(int, 16))(LEFT_SHIFT); - VEC_DATA_TYPE(int, 16) shifted_in1_val = in1_val * left_shift; - VEC_DATA_TYPE(int, 16) shifted_in2_val = in2_val * left_shift; - - VEC_DATA_TYPE(int, 16) scaled_in1_val = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(shifted_in1_val, IN1_MULT_INT, IN1_SHIFT, 16); - VEC_DATA_TYPE(int, 16) scaled_in2_val = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(shifted_in2_val, IN2_MULT_INT, IN2_SHIFT, 16); - - // Add inputs and multiply with a multiplier smaller than 1 - VEC_DATA_TYPE(int, 16) sum_val = scaled_in1_val + scaled_in2_val; - VEC_DATA_TYPE(int, 16) out_val = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(sum_val, RESULT_MULT_INT, RESULT_SHIFT, 16); - out_val += (VEC_DATA_TYPE(int, 16))(RESULT_OFFSET); - - VEC_DATA_TYPE(uchar, 16) res = CONVERT(out_val, VEC_DATA_TYPE(uchar, 16)); - -// TODO: Apply min-max BOUND to support fuse with relu. -/* -#if defined(MIN_BOUND) - res = max(res, (uchar16)MIN_BOUND); -#endif // defined(MIN_BOUND) -#if defined(MAX_BOUND) - res = min(res, (uchar16)MAX_BOUND); -#endif // defined(MAX_BOUND) -*/ - - // Store result - VSTORE(16)(CONVERT(res, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)), - 0, (__global DATA_TYPE_OUT *)out.ptr); -} diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/batch_to_space_nd.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/batch_to_space_nd.cl deleted file mode 100644 index ad6a48a02..000000000 --- a/libs/ARMComputeEx/src/core/CL/cl_kernels/batch_to_space_nd.cl +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016, 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "helpers.h" - -#if defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(BLOCK_SIZE0) && defined(BLOCK_SIZE1) && defined(BATCH_OUT) -/** Perform batch to space rearrangement of tensor - * - * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float - * @attention Output tensor batch should be given as a preprocessor argument using -DBATCH_OUT=size. e.g. -DBATCH_OUT=16 - * @attention block size should be given as a preprocessor argument using -DBLOCK_SIZE0=size. e.g. -DBLOCK_SIZE0=1 - * - * @param[in] input_ptr Pointer to the source tensor. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 - * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes) - * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes) - * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor - * - * @param[out] output_ptr Pointer to the destination tensor. Supported data types: same as @p inpu -t_ptr - * @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in -bytes) - * @param[in] output_stride_z Stride of the destination tensor in Z dimension (in bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] output_stride_w Stride of the destination tensor in W dimension (in bytes) - * @param[in] output_step_w output_stride_w * number of elements along W processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor - */ -__kernel void batch_to_space_nd( - TENSOR4D_DECLARATION(input), - TENSOR4D_DECLARATION(output)) - { - Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, 0); - Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH_OUT); - - int out_index[4]={0}; - int in_index[4]={0}; - - out_index[0] = get_global_id(0);//W - out_index[1] = get_global_id(1);//H - out_index[2] = get_global_id(2) % DEPTH_OUT;//C - out_index[3] = get_global_id(2) / DEPTH_OUT;//N - - in_index[0] = out_index[0]/BLOCK_SIZE1; - in_index[1] = out_index[1]/BLOCK_SIZE0; - in_index[2] = out_index[2]; - in_index[3] = out_index[3] + ((out_index[1] % BLOCK_SIZE0) * BLOCK_SIZE0 + out_index[0] % BLOCK_SIZE1) * BATCH_OUT; - - *((__global DATA_TYPE *)out.ptr) = *((__global DATA_TYPE *)tensor4D_offset(&in, in_index[0], in_index[1], in_index[2], in_index[3])); - } -#endif // defined(DATA_TYPE) && defined(DEPTH_IN) && defined(BLOCK_SIZE0) && defined(BLOCK_SIZE1) && defined(BATCH_OUT) diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/binary_logical_op.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/binary_logical_op.cl deleted file mode 100644 index bea61f53e..000000000 --- a/libs/ARMComputeEx/src/core/CL/cl_kernels/binary_logical_op.cl +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "helpers.h" - -#ifndef VEC_SIZE -#define VEC_SIZE 1 -#endif - -#if defined(OP_CODE) && defined(DATA_TYPE) -/** returns truth value of the two input tensors for BINARY LOGICAL OP. - * where BINARY LOGICAL OP can be AND, OR. - * - * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=uchar - * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16 - * @attention Operation type(code) specifying which operation to perform should be passed as preprocessor argument using - * -DOP_CODE = number. e.g. -DOP_CODE=1 - * - * @param[in] input1_ptr Pointer to the source tensor. Supported data types: QASYMM8 - * @param[in] input1_stride_x Stride of the source tensor in X dimension (in bytes) - * @param[in] input1_step_x input1_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] input1_stride_y Stride of the source tensor in Y dimension (in bytes) - * @param[in] input1_step_y input1_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] input1_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] input1_step_z input1_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] input1_offset_first_element_in_bytes The offset of the first element in the source tensor - * - * @param[in] input2_ptr Pointer to the source tensor.Supported data types: QASYMM8 - * @param[in] input2_stride_x Stride of the source tensor in X dimension (in bytes) - * @param[in] input2_step_x input2_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] input2_stride_y Stride of the source tensor in Y dimension (in bytes) - * @param[in] input2_step_y input2_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] input2_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] input2_step_z input2_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] input2_offset_first_element_in_bytes The offset of the first element in the source tensor - * - * @param[out] output_ptr Pointer to the destination tensor. Supported data types: QASYMM8 - * @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] output_stride_z Stride of the destination tensor in Z dimension (in bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) - */ -__kernel void binary_logical_op( - TENSOR3D_DECLARATION(input1), - TENSOR3D_DECLARATION(input2), - TENSOR3D_DECLARATION(output)) -{ - Tensor3D input1 = CONVERT_TO_TENSOR3D_STRUCT(input1); - Tensor3D input2 = CONVERT_TO_TENSOR3D_STRUCT(input2); - Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output); - - #if OP_CODE == 1 // LOGICAL AND - VSTORE(VEC_SIZE) - (CONVERT(VLOAD(VEC_SIZE) - (0, (__global DATA_TYPE *)input1.ptr) && VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input2.ptr), - VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)), 0, (__global DATA_TYPE *)output.ptr); - - #elif OP_CODE == 2 // LOGICAL OR - VSTORE(VEC_SIZE) - (CONVERT(VLOAD(VEC_SIZE) - (0, (__global DATA_TYPE *)input1.ptr) || VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input2.ptr), - VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)), 0, (__global DATA_TYPE *)output.ptr); - - #else // OP NOT SUPPORTED - return - - #endif -} -#endif //if defined(OP_CODE) && defined(DATA_TYPE) diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/cast.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/cast.cl deleted file mode 100644 index 3d4675e5d..000000000 --- a/libs/ARMComputeEx/src/core/CL/cl_kernels/cast.cl +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "helpers.h" - -#ifndef SCALE -#define SCALE 1.0f -#endif -#ifndef OFFSET -#define OFFSET 0 -#endif -#ifndef VEC_SIZE -#define VEC_SIZE 1 -#endif - -#if defined(DATA_TYPE_IN) && defined(DATA_TYPE_OUT) -/** Perform a cast operation on an input tensor. - * - * @attention Data types of both input and output can be passed using the -DDATA_TYPE_IN and -DDATA_TYPE_OUT compile flag, e.g. -DDATA_TYPE_IN=float, -DDATA_TYPE_OUT=int - * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16 - * - * @param[in] input_ptr Pointer to the source image. Supported data types: F16/F32 - * @param[in] input_stride_x Stride of the source image in X dimension (in bytes) - * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes) - * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image - * @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p input_ptr - * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image - */ -__kernel void cast( - TENSOR3D_DECLARATION(input), - TENSOR3D_DECLARATION(output)) -{ - Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input); - Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output); - - VSTORE(VEC_SIZE)(CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_IN *)input.ptr), - VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE)), - 0, (__global DATA_TYPE_OUT *)output.ptr); -} - -/** Perform a cast operation on an QASYMM8 input tensor. - * @attention Data types of both input and output can be passed using the -DDATA_TYPE_IN and -DDATA_TYPE_OUT compile flag, e.g. -DDATA_TYPE_IN=float, -DDATA_TYPE_OUT=int - * @attention Offset and Scale of input should be given as a preprocessor argument using -DOFFSET=int, -DSCALE=float. e.g. -DOFFSET=1, -DSCALE=0.5 - * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16 - * - * @param[in] input_ptr Pointer to the source image. Supported data types: F16/F32 - * @param[in] input_stride_x Stride of the source image in X dimension (in bytes) - * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes) - * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image - * @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p input_ptr - * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image - */ -__kernel void cast_qasymm_in( - TENSOR3D_DECLARATION(input), - TENSOR3D_DECLARATION(output)) -{ - Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input); - Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output); - - VEC_DATA_TYPE(DATA_TYPE_IN, VEC_SIZE) in_data = - VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_IN *)input.ptr); - VEC_DATA_TYPE(int, VEC_SIZE) offset = (VEC_DATA_TYPE(int, VEC_SIZE))(OFFSET); - VEC_DATA_TYPE(float, VEC_SIZE) scale = (VEC_DATA_TYPE(float, VEC_SIZE))(SCALE); - - VEC_DATA_TYPE(int, VEC_SIZE) tmp = CONVERT(in_data, VEC_DATA_TYPE(int, VEC_SIZE)) - offset; - VEC_DATA_TYPE(float, VEC_SIZE) out_data = CONVERT(tmp, VEC_DATA_TYPE(float, VEC_SIZE)) * scale; - - VSTORE(VEC_SIZE)(CONVERT(out_data, VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE)), - 0, (__global DATA_TYPE_OUT *)output.ptr); -} - - -/** Perform a cast operation on an QASYMM8 output tensor. - * @attention Data types of both input and output can be passed using the -DDATA_TYPE_IN and -DDATA_TYPE_OUT compile flag, e.g. -DDATA_TYPE_IN=float, -DDATA_TYPE_OUT=int - * @attention Offset and Scale of output should be given as a preprocessor argument using -DOFFSET=int, -DSCALE=float. e.g. -DOFFSET=1, -DSCALE=0.5 - * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16 - * - * @param[in] input_ptr Pointer to the source image. Supported data types: F16/F32 - * @param[in] input_stride_x Stride of the source image in X dimension (in bytes) - * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes) - * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image - * @param[out] output_ptr Pointer to the destination image. Supported data types: U8 - * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image - */ -__kernel void cast_qasymm_out( - TENSOR3D_DECLARATION(input), - TENSOR3D_DECLARATION(output)) -{ - Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input); - Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output); - - VEC_DATA_TYPE(DATA_TYPE_IN, VEC_SIZE) in_data = - VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_IN *)input.ptr); - VEC_DATA_TYPE(int, VEC_SIZE) offset = (VEC_DATA_TYPE(int, VEC_SIZE))(OFFSET); - VEC_DATA_TYPE(float, VEC_SIZE) scale = (VEC_DATA_TYPE(float, VEC_SIZE))(SCALE); - - VEC_DATA_TYPE(float, VEC_SIZE) tmp = CONVERT(in_data, VEC_DATA_TYPE(float, VEC_SIZE)) / scale; - VEC_DATA_TYPE(float, VEC_SIZE) out_data = tmp + CONVERT(offset, VEC_DATA_TYPE(float, VEC_SIZE)); - - VSTORE(VEC_SIZE)(CONVERT(out_data, VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE)), - 0, (__global DATA_TYPE_OUT *)output.ptr); -} -#endif // defined(DATA_TYPE_IN) && defined(DATA_TYPE_OUT) diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/comparison_op.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/comparison_op.cl deleted file mode 100644 index 765072556..000000000 --- a/libs/ARMComputeEx/src/core/CL/cl_kernels/comparison_op.cl +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "helpers.h" - -#ifndef VEC_SIZE -#define VEC_SIZE 1 -#endif - -#if defined(DATA_TYPE_IN) && defined(DATA_TYPE_OUT) && defined(OP_CODE) -/** Returns truth value of comparison operators. - * Comparison operators may be equal, not_equal etc. - * - * @attention The input and output data types need to be passed at compile time using -DDATA_TYPE_IN, -DDATA_TYPE_OUT, - * e.g. -DDATA_TYPE_IN=float, -DDATA_TYPE_OUT = uchar - * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16 - * @attention Operation type(code) specifying which operation to perform should be passed as preprocessor argument using - * -DOP_CODE = number. e.g. -DOP_CODE=1 - * - * @param[in] input1_ptr Pointer to the source tensor. Supported data types: U8/S8/U16/S16/F16/U32/S32/F32 - * @param[in] input1_stride_x Stride of the source tensor in X dimension (in bytes) - * @param[in] input1_step_x input1_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] input1_stride_y Stride of the source tensor in Y dimension (in bytes) - * @param[in] input1_step_y input1_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] input1_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] input1_step_z input1_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] input1_offset_first_element_in_bytes The offset of the first element in the source tensor - * - * @param[in] input2_ptr Pointer to the source tensor. Supported data types: U8/S8/U16/S16/F16/U32/S32/F32 - * @param[in] input2_stride_x Stride of the source tensor in X dimension (in bytes) - * @param[in] input2_step_x input2_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] input2_stride_y Stride of the source tensor in Y dimension (in bytes) - * @param[in] input2_step_y input2_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] input2_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] input2_step_z input2_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] input2_offset_first_element_in_bytes The offset of the first element in the source tensor - * - * @param[out] output_ptr Pointer to the destination tensor. Supported data types: QASYMM8 - * @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] output_stride_z Stride of the destination tensor in Z dimension (in bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image - */ -__kernel void comparison_op( - TENSOR3D_DECLARATION(input1), - TENSOR3D_DECLARATION(input2), - TENSOR3D_DECLARATION(output)) -{ - Tensor3D input1 = CONVERT_TO_TENSOR3D_STRUCT(input1); - Tensor3D input2 = CONVERT_TO_TENSOR3D_STRUCT(input2); - Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output); - - #if OP_CODE == 1 //EQUAL - VSTORE(VEC_SIZE) - (CONVERT(VLOAD(VEC_SIZE) - (0, (__global DATA_TYPE_IN *)input1.ptr) == VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_IN *)input2.ptr), - VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE)),0, (__global DATA_TYPE_OUT *)output.ptr); - - #elif OP_CODE == 2 //NOT_EQUAL - VSTORE(VEC_SIZE) - (CONVERT(VLOAD(VEC_SIZE) - (0, (__global DATA_TYPE_IN *)input1.ptr) != VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_IN *)input2.ptr), - VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE)), 0, (__global DATA_TYPE_OUT *)output.ptr); - - #else // OP NOT SUPPORTED - return; - - #endif -} -#endif // defined(DATA_TYPE_IN) && defined(DATA_TYPE_OUT) && defined(OP_CODE) diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/comparison_op_quantized.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/comparison_op_quantized.cl deleted file mode 100644 index 1eb305f7b..000000000 --- a/libs/ARMComputeEx/src/core/CL/cl_kernels/comparison_op_quantized.cl +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "helpers.h" -#define SUB(x, y) (x) - (y) - -#ifndef VEC_SIZE -#define VEC_SIZE 1 -#endif - -#if defined(OFFSET_IN1) && defined(OFFSET_IN2) && defined(SCALE_IN1) && defined(SCALE_IN2) && defined(DATA_TYPE_OUT) - -#define VEC_FLOAT VEC_DATA_TYPE(float, VEC_SIZE) -#define VEC_INT VEC_DATA_TYPE(int, VEC_SIZE) -#define VEC_OUT VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE) - -/** Returns the truth value of comparison . - * @attention Offset and Scale of both input should be given as a preprocessor argument using -DOFFSET_IN1=int, -DOFFSET_IN2=int, -DSCALE_IN1=float and -DSCALE_IN2=float. e.g. -DOFFSET_IN1=1, -DOFFSET_IN2=0, -DSCALE_IN1=0.5, -DSCALE_IN2=0.5 - * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16 - * @attention Operation type(code) specifying which operation to perform should be passed as preprocessor argument using - * -DOP_CODE = number. e.g. -DOP_CODE=1 - * - * @param[in] input1_ptr Pointer to the source tensor. Supported data types: QASYMM8 - * @param[in] input1_stride_x Stride of the source tensor in X dimension (in bytes) - * @param[in] input1_step_x input1_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] input1_stride_y Stride of the source tensor in Y dimension (in bytes) - * @param[in] input1_step_y input1_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] input1_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] input1_step_z input1_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] input1_offset_first_element_in_bytes The offset of the first element in the source tensor - * - * @param[in] input2_ptr Pointer to the source tensor. Supported data types: QASYMM8 - * @param[in] input2_stride_x Stride of the source tensor in X dimension (in bytes) - * @param[in] input2_step_x input2_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] input2_stride_y Stride of the source tensor in Y dimension (in bytes) - * @param[in] input2_step_y input2_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] input2_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] input2_step_z input2_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] input2_offset_first_element_in_bytes The offset of the first element in the source tensor - * - * @param[out] output_ptr Pointer to the destination tensor. Supported data types: QASYMM8 - * @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] output_stride_z Stride of the destination tensor in Z dimension (in bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor - */ -__kernel void comparison_op_qasymm8( - TENSOR3D_DECLARATION(in1), - TENSOR3D_DECLARATION(in2), - TENSOR3D_DECLARATION(out)) -{ - // Get pixels pointer - Tensor3D in1 = CONVERT_TO_TENSOR3D_STRUCT(in1); - Tensor3D in2 = CONVERT_TO_TENSOR3D_STRUCT(in2); - Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(out); - - VEC_INT in_a = CONVERT(VLOAD(VEC_SIZE)(0, (__global uchar *)in1.ptr), VEC_INT); - VEC_INT in_b = CONVERT(VLOAD(VEC_SIZE)(0, (__global uchar *)in2.ptr), VEC_INT); - - in_a = SUB(in_a, (VEC_INT)((int)OFFSET_IN1)); - in_b = SUB(in_b, (VEC_INT)((int)OFFSET_IN2)); - - const VEC_FLOAT in1f32 = CONVERT(in_a, VEC_FLOAT) * (VEC_FLOAT)((float)SCALE_IN1); - const VEC_FLOAT in2f32 = CONVERT(in_b, VEC_FLOAT) * (VEC_FLOAT)((float)SCALE_IN2); - - #if OPCODE == 1 //EQUAL QUANTIZED - VSTORE(VEC_SIZE)(CONVERT(in1f32 == in2f32, VEC_OUT), 0, (__global DATA_TYPE_OUT *)out.ptr); - - #elif OPCODE == 2 //NOT EQUAL QUANTIZED - VSTORE(VEC_SIZE)(CONVERT(in1f32 != in2f32, VEC_OUT), 0, (__global DATA_TYPE_OUT *)out.ptr); - - #else // OP NOT SUPPORTED - return; - - #endif -} -#endif // defined(OFFSET_IN1) && defined(OFFSET_IN2) && defined(SCALE_IN1) && defined(SCALE_IN2) && defined(DATA_TYPE_OUT) diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/depth_to_space.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/depth_to_space.cl deleted file mode 100644 index fef2243e7..000000000 --- a/libs/ARMComputeEx/src/core/CL/cl_kernels/depth_to_space.cl +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016, 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "helpers.h" - -#if defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(BLOCK_SIZE) -/** Perform space to depth rearrangement of tensor - * - * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float - * @attention Input tensor depth should be given as a preprocessor argument using -DDEPTH_IN=size. e.g. -DDEPTH_IN=16 - * @attention block size should be given as a preprocessor argument using -DBLOCK_SIZE=size. e.g. -DBLOCK_SIZE=1 - * - * @param[in] input_ptr Pointer to the source image. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 - * @param[in] input_stride_x Stride of the source image in X dimension (in bytes) - * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes) - * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image - * @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p inpu -t_ptr - * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in -bytes) - * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] output_stride_w Stride of the source tensor in W dimension (in bytes) - * @param[in] output_step_w output_stride_w * number of elements along W processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image - */ -__kernel void depth_to_space( - TENSOR4D_DECLARATION(input), - TENSOR4D_DECLARATION(output)) - { - Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, 0); - Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH_OUT); - - int out_index[4]={0}; - int in_index[4]={0}; - - out_index[0] = get_global_id(0);//W - out_index[1] = get_global_id(1);//H - out_index[2] = get_global_id(2) % DEPTH_OUT;//C - out_index[3] = get_global_id(2) / DEPTH_OUT;//B - - in_index[0] = out_index[0]/BLOCK_SIZE; - in_index[1] = out_index[1]/BLOCK_SIZE; - in_index[2] = out_index[2] + ((out_index[1] % BLOCK_SIZE) * BLOCK_SIZE + out_index[0] % BLOCK_SIZE) * DEPTH_OUT; - in_index[3] = out_index[3]; - - *((__global DATA_TYPE *)out.ptr) = *((__global DATA_TYPE *)tensor4D_offset(&in, in_index[0], in_index[1], in_index[2],in_index[3])); - } -#endif // defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(BLOCK_SIZE) diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/embedding_lookup.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/embedding_lookup.cl deleted file mode 100644 index 348458fe9..000000000 --- a/libs/ARMComputeEx/src/core/CL/cl_kernels/embedding_lookup.cl +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "helpers.h" - -#ifndef VEC_SIZE -#define VEC_SIZE 1 -#endif - -#if defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(NUM_DIMS) -/** Perform embedding_lookup of input tensor - * - * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short - * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16 - * @attention Output tensor depth should be given as a preprocessor argument using -DDEPTH_OUT=depth. e.g. -DDEPTH_OUT=16 - * @attention Number of input dimensions are passed as a preprocessor argument using -DNUM_DIMS=size, e.g. -DNUM_DIMS=4 - * - * @param[in] input_ptr Pointer to the source tensor. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 - * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes) - * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes) - * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor - * @param[in] input_stride_w Stride of the source tensor in W dimension (in bytes) - * @param[in] input_step_w output_stride_w * number of elements along W processed per workitem(in bytes) - * @param[out] output_ptr Pointer to the destination tensor. Supported data types: same as @p input_ptr - * @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] output_stride_w Stride of the source tensor in W dimension (in bytes) - * @param[in] output_step_w output_stride_w * number of elements along W processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor - * @param[in] lookups_ptr Pointer to the lookups vector. Supported data types: S32 - * @param[in] lookups_stride_x Stride of the lookups vector in X dimension (in bytes) - * @param[in] lookups_step_x lookups_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] lookups_offset_first_element_in_bytes The offset of the first element in the lookups vector - */ - -__kernel void embedding_lookup(TENSOR4D_DECLARATION(input), - TENSOR4D_DECLARATION(output), - VECTOR_DECLARATION(lookups)) -{ - Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH_OUT); - Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, DEPTH_OUT); - - Vector lups = CONVERT_TO_VECTOR_STRUCT_NO_STEP(lookups); - - //lookup ids for based on the tensor dimensions - int lup_id[4] = {0}; - - lup_id[0] = (NUM_DIMS == 1)?*((__global int *)vector_offset(&lups,get_global_id(0))) - :get_global_id(0); - lup_id[1] = (NUM_DIMS == 2)?*((__global int *)vector_offset(&lups,get_global_id(1))) - :get_global_id(1); - lup_id[2] = (NUM_DIMS == 3)?*((__global int *)vector_offset(&lups,get_global_id(2))) - :get_global_id(2)%DEPTH_OUT; - lup_id[3] = (NUM_DIMS == 4)?*((__global int *)vector_offset(&lups, get_global_id(2) / DEPTH_OUT)) - :get_global_id(2) / DEPTH_OUT; - - in.ptr += input_offset_first_element_in_bytes + lup_id[0] * input_step_x + lup_id[1] * input_step_y - + lup_id[2] * input_step_z + lup_id[3] * input_step_w; - - VSTORE(VEC_SIZE)(CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)in.ptr), VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)), - 0, (__global DATA_TYPE *)out.ptr); -} -#endif // defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(NUM_DIMS) diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/exp.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/exp.cl deleted file mode 100644 index 69d94f30a..000000000 --- a/libs/ARMComputeEx/src/core/CL/cl_kernels/exp.cl +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "helpers.h" - -#ifndef VEC_SIZE -#define VEC_SIZE 1 -#endif - -#if defined(DATA_TYPE) -/** Perform an exponential operation on an input tensor. - * - * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float - * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16 - * @note Can only take floating point data types. - * - * @param[in] input_ptr Pointer to the source image. Supported data types: F16/F32 - * @param[in] input_stride_x Stride of the source image in X dimension (in bytes) - * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes) - * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image - * @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p input_ptr - * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image - */ -__kernel void exp_layer( - TENSOR3D_DECLARATION(input), - TENSOR3D_DECLARATION(output)) -{ - Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input); - Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output); - - VSTORE(VEC_SIZE) - (exp(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input.ptr)), 0, (__global DATA_TYPE *)output.ptr); -} -#endif // defined(DATA_TYPE) diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/gather.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/gather.cl deleted file mode 100644 index 6b767d6c9..000000000 --- a/libs/ARMComputeEx/src/core/CL/cl_kernels/gather.cl +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "helpers.h" - -/** Perform gather - * - * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short - * - * @param[in] input1_ptr Pointer to the first source tensor. Supported data types: U8/S32/F32 - * @param[in] input1_stride_x Stride of the first source tensor in X dimension (in bytes) - * @param[in] input1_step_x input_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] input1_stride_y Stride of the first source tensor in Y dimension (in bytes) - * @param[in] input1_step_y input_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] input1_stride_z Stride of the first source tensor in Z dimension (in bytes) - * @param[in] input1_step_z input_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] input1_offset_first_element_in_bytes The offset of the first element in the first source tensor - * @param[in] input2_ptr Pointer to the first source tensor. Supported data types: U32 - * @param[in] input2_stride_x Stride of the first source tensor in X dimension (in bytes) - * @param[in] input2_step_x input_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] input2_offset_first_element_in_bytes The offset of the first element in the first source tensor - * @param[out] output_ptr Pointer to the destination tensor. Supported data types: same as @p input_ptr - * @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] output_stride_z Stride of the destination tensor in Z dimension (in bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor - */ -__kernel void gather(IMAGE_DECLARATION(input1), - VECTOR_DECLARATION(input2), - IMAGE_DECLARATION(output)) -{ - Image in1 = CONVERT_TO_IMAGE_STRUCT_NO_STEP(input1); - Vector in2 = CONVERT_TO_VECTOR_STRUCT(input2); - Image out = CONVERT_TO_IMAGE_STRUCT_NO_STEP(output); - - VEC_DATA_TYPE(DATA_TYPE_IN2, 2) - in2_data = CONVERT(vload2(0, (__global DATA_TYPE_IN2 *)in2.ptr), VEC_DATA_TYPE(DATA_TYPE_IN2, 2)); - - //TODO: performance tuning for memcopy - int index = in2_data.s0; - int stride=input1_stride_y/input1_stride_x; - - for(int i=0; i<stride; i++){ - *((__global DATA_TYPE_OUT *)offset(&out, i,get_global_id(0)))=*((__global DATA_TYPE_IN1 *)offset(&in1, i,index)); - } -} - -__kernel void gather_1d_out(IMAGE_DECLARATION(input1), - VECTOR_DECLARATION(input2), - VECTOR_DECLARATION(output)) -{ - Image in1 = CONVERT_TO_IMAGE_STRUCT_NO_STEP(input1); - Vector in2 = CONVERT_TO_VECTOR_STRUCT(input2); - Vector out = CONVERT_TO_VECTOR_STRUCT_NO_STEP(output); - - VEC_DATA_TYPE(DATA_TYPE_IN2, 2) - in2_data = CONVERT(vload2(0, (__global DATA_TYPE_IN2 *)in2.ptr), VEC_DATA_TYPE(DATA_TYPE_IN2, 2)); - - //TODO: performance tuning for memcopy - int index = in2_data.s0; - int stride=input1_stride_y/input1_stride_x; - - for(int i=0; i<stride; i++){ - *((__global DATA_TYPE_OUT *)vector_offset(&out, i+get_global_id(0)))=*((__global DATA_TYPE_IN1 *)offset(&in1, i, index)); - } -} - -__kernel void gather_1d(VECTOR_DECLARATION(input1), - VECTOR_DECLARATION(input2), - VECTOR_DECLARATION(output)) -{ - Vector in1 = CONVERT_TO_VECTOR_STRUCT_NO_STEP(input1); - Vector in2 = CONVERT_TO_VECTOR_STRUCT(input2); - Vector out = CONVERT_TO_VECTOR_STRUCT_NO_STEP(output); - - VEC_DATA_TYPE(DATA_TYPE_IN2, 2) - in2_data = CONVERT(vload2(0, (__global DATA_TYPE_IN2 *)in2.ptr), VEC_DATA_TYPE(DATA_TYPE_IN2, 2)); - - //TODO: performance tuning for memcopy - int index = in2_data.s0; - *((__global DATA_TYPE_OUT *)vector_offset(&out, get_global_id(0)))=*((__global DATA_TYPE_IN1 *)vector_offset(&in1, index)); -} diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/hashtable_lookup.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/hashtable_lookup.cl deleted file mode 100644 index ed7409852..000000000 --- a/libs/ARMComputeEx/src/core/CL/cl_kernels/hashtable_lookup.cl +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "helpers.h" - -#ifndef VEC_SIZE -#define VEC_SIZE 1 -#endif - -#if defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(NUM_DIMS) -/** Perform hashtable_lookup of input tensor - * - * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short - * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16 - * @attention Output tensor depth should be given as a preprocessor argument using -DDEPTH_OUT=depth. e.g. -DDEPTH_OUT=16 - * @attention Number of input dimensions are passed as a preprocessor argument using -DNUM_DIMS=size, e.g. -DNUM_DIMS=4 - * - * @param[in] input_ptr Pointer to the source tensor. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 - * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes) - * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes) - * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor - * @param[in] input_stride_w Stride of the source tensor in W dimension (in bytes) - * @param[in] input_step_w output_stride_w * number of elements along W processed per workitem(in bytes) - * @param[out] output_ptr Pointer to the destination tensor. Supported data types: same as @p input_ptr - * @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] output_stride_w Stride of the source tensor in W dimension (in bytes) - * @param[in] output_step_w output_stride_w * number of elements along W processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor - * @param[in] lookups_ptr Pointer to the lookups vector. Supported data types: S32 - * @param[in] lookups_stride_x Stride of the lookups vector in X dimension (in bytes) - * @param[in] lookups_step_x lookups_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] lookups_offset_first_element_in_bytes The offset of the first element in the lookups vector - */ -__kernel void hashtable_lookup(TENSOR4D_DECLARATION(input), - TENSOR4D_DECLARATION(output), - VECTOR_DECLARATION(lookups)) -{ - Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH_OUT); - Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, DEPTH_OUT); - - Vector lups = CONVERT_TO_VECTOR_STRUCT_NO_STEP(lookups); - - int lup_id[4] = {0}; - - lup_id[0] = (NUM_DIMS == 1)?*((__global int *)vector_offset(&lups,get_global_id(0))) - :get_global_id(0); - lup_id[1] = (NUM_DIMS == 2)?*((__global int *)vector_offset(&lups,get_global_id(1))) - :get_global_id(1); - lup_id[2] = (NUM_DIMS == 3)?*((__global int *)vector_offset(&lups,get_global_id(2))) - :get_global_id(2)%DEPTH_OUT; - lup_id[3] = (NUM_DIMS == 4)?*((__global int *)vector_offset(&lups, get_global_id(2) / DEPTH_OUT)) - :get_global_id(2) / DEPTH_OUT; - - if (lup_id[NUM_DIMS-1] < 0) - { - VSTORE(VEC_SIZE)((VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE))0, 0, (__global DATA_TYPE *)out.ptr); - return; - } - - in.ptr += input_offset_first_element_in_bytes + lup_id[0] * input_step_x + lup_id[1] * input_step_y - + lup_id[2] * input_step_z + lup_id[3] * input_step_w; - - VSTORE(VEC_SIZE)(CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)in.ptr), VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)), - 0, (__global DATA_TYPE *)out.ptr); -} -#endif // defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(NUM_DIMS) diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/helpers.h b/libs/ARMComputeEx/src/core/CL/cl_kernels/helpers.h deleted file mode 100644 index 0e123ae0a..000000000 --- a/libs/ARMComputeEx/src/core/CL/cl_kernels/helpers.h +++ /dev/null @@ -1,352 +0,0 @@ -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_HELPER_H -#define ARM_COMPUTE_HELPER_H - -#if defined(ARM_COMPUTE_OPENCL_FP16_ENABLED) && defined(cl_khr_fp16) -#pragma OPENCL EXTENSION cl_khr_fp16 : enable -#endif // defined(ARM_COMPUTE_OPENCL_FP16_ENABLED) && defined(cl_khr_fp16) - -#if defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8) -#pragma OPENCL EXTENSION cl_arm_integer_dot_product_int8 : enable -#endif // defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8) - -#if defined(ARM_COMPUTE_OPENCL_DOT8_ACC_ENABLED) && \ - defined(cl_arm_integer_dot_product_accumulate_int8) -#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_int8 : enable -#endif // defined(ARM_COMPUTE_OPENCL_DOT8_ACC_ENABLED) && - // defined(cl_arm_integer_dot_product_accumulate_int8) - -#if defined(ARM_COMPUTE_DEBUG_ENABLED) && defined(cl_arm_printf) -#pragma OPENCL EXTENSION cl_arm_printf : enable -#endif // defined(ARM_COMPUTE_DEBUG_ENABLED) && defined(cl_arm_printf) - -#define EXPAND(x) x - -#define CLAMP(x, min_val, max_val) min(max(x, min_val), max_val) - -#define VLOAD_STR(size) vload##size -#define VLOAD(size) VLOAD_STR(size) - -#define VSTORE_STR(size) vstore##size -#define VSTORE(size) VSTORE_STR(size) - -#define VEC_DATA_TYPE_STR(type, size) type##size -#define VEC_DATA_TYPE(type, size) VEC_DATA_TYPE_STR(type, size) - -#define CL_VEC_DATA_TYPE_STR(type, size) type##size -#define CL_VEC_DATA_TYPE(type, size) CL_VEC_DATA_TYPE_STR(type, size) - -#define CONVERT_STR(x, type) (convert_##type((x))) -#define CONVERT(x, type) CONVERT_STR(x, type) - -#define CONVERT_SAT_STR(x, type) (convert_##type##_sat((x))) -#define CONVERT_SAT(x, type) CONVERT_SAT_STR(x, type) - -#define CONVERT_SAT_ROUND_STR(x, type, round) (convert_##type##_sat_##round((x))) -#define CONVERT_SAT_ROUND(x, type, round) CONVERT_SAT_ROUND_STR(x, type, round) - -#define VECTOR_DECLARATION(name) \ - __global uchar *name##_ptr, uint name##_stride_x, uint name##_step_x, \ - uint name##_offset_first_element_in_bytes - -#define IMAGE_DECLARATION(name) \ - __global uchar *name##_ptr, uint name##_stride_x, uint name##_step_x, uint name##_stride_y, \ - uint name##_step_y, uint name##_offset_first_element_in_bytes - -#define TENSOR3D_DECLARATION(name) \ - __global uchar *name##_ptr, uint name##_stride_x, uint name##_step_x, uint name##_stride_y, \ - uint name##_step_y, uint name##_stride_z, uint name##_step_z, \ - uint name##_offset_first_element_in_bytes - -#define TENSOR4D_DECLARATION(name) \ - __global uchar *name##_ptr, uint name##_stride_x, uint name##_step_x, uint name##_stride_y, \ - uint name##_step_y, uint name##_stride_z, uint name##_step_z, uint name##_stride_w, \ - uint name##_step_w, uint name##_offset_first_element_in_bytes - -#define CONVERT_TO_VECTOR_STRUCT(name) \ - update_vector_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, \ - name##_step_x) - -#define CONVERT_TO_VECTOR_STRUCT_NO_STEP(name) \ - update_vector_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0) - -#define CONVERT_TO_IMAGE_STRUCT(name) \ - update_image_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, \ - name##_step_x, name##_stride_y, name##_step_y) - -#define CONVERT_TO_IMAGE_STRUCT_NO_STEP(name) \ - update_image_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0, \ - name##_stride_y, 0) - -#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT(name) \ - update_image_from_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, \ - name##_stride_x, name##_step_x, name##_stride_y, \ - name##_step_y, name##_stride_z, name##_step_z) - -#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP(name) \ - update_image_from_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, \ - name##_stride_x, 0, name##_stride_y, 0, name##_stride_z, \ - name##_step_z) - -#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT(name) \ - update_image_from_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, \ - name##_stride_x, name##_step_x, name##_stride_y, \ - name##_step_y, name##_stride_z, name##_step_z) - -#define CONVERT_TO_TENSOR3D_STRUCT(name) \ - update_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, \ - name##_step_x, name##_stride_y, name##_step_y, name##_stride_z, \ - name##_step_z) - -#define CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(name) \ - update_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, \ - 0, name##_stride_y, 0, name##_stride_z, 0) - -#define CONVERT_TO_TENSOR4D_STRUCT(name, mod_size) \ - update_tensor4D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, \ - name##_step_x, name##_stride_y, name##_step_y, name##_stride_z, \ - name##_step_z, name##_stride_w, name##_step_w, mod_size) - -#define CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(name, mod_size) \ - update_tensor4D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, \ - 0, name##_stride_y, 0, name##_stride_z, 0, name##_stride_w, 0, \ - mod_size) - -/** Structure to hold Vector information */ -typedef struct Vector -{ - __global uchar *ptr; /**< Pointer to the starting postion of the buffer */ - int offset_first_element_in_bytes; /**< The offset of the first element in the source image */ - int stride_x; /**< Stride of the image in X dimension (in bytes) */ -} Vector; - -/** Structure to hold Image information */ -typedef struct Image -{ - __global uchar *ptr; /**< Pointer to the starting postion of the buffer */ - int offset_first_element_in_bytes; /**< The offset of the first element in the source image */ - int stride_x; /**< Stride of the image in X dimension (in bytes) */ - int stride_y; /**< Stride of the image in Y dimension (in bytes) */ -} Image; - -/** Structure to hold 3D tensor information */ -typedef struct Tensor3D -{ - __global uchar *ptr; /**< Pointer to the starting postion of the buffer */ - int offset_first_element_in_bytes; /**< The offset of the first element in the source image */ - int stride_x; /**< Stride of the image in X dimension (in bytes) */ - int stride_y; /**< Stride of the image in Y dimension (in bytes) */ - int stride_z; /**< Stride of the image in Z dimension (in bytes) */ -} Tensor3D; - -/** Structure to hold 4D tensor information */ -typedef struct Tensor4D -{ - __global uchar *ptr; /**< Pointer to the starting postion of the buffer */ - int offset_first_element_in_bytes; /**< The offset of the first element in the source image */ - int stride_x; /**< Stride of the image in X dimension (in bytes) */ - int stride_y; /**< Stride of the image in Y dimension (in bytes) */ - int stride_z; /**< Stride of the image in Z dimension (in bytes) */ - int stride_w; /**< Stride of the image in W dimension (in bytes) */ -} Tensor4D; - -/** Wrap vector information into an Vector structure, and make the pointer point at this workitem's - * data. - * - * @param[in] ptr Pointer to the starting postion of the buffer - * @param[in] offset_first_element_in_bytes The offset of the first element in the source vector - * @param[in] stride_x Stride of the vector in X dimension (in bytes) - * @param[in] step_x stride_x * number of elements along X processed per - * workitem(in bytes) - * - * @return An image object - */ -inline Vector update_vector_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, - uint stride_x, uint step_x) -{ - Vector vector = { - .ptr = ptr, - .offset_first_element_in_bytes = offset_first_element_in_bytes, - .stride_x = stride_x, - }; - vector.ptr += vector.offset_first_element_in_bytes + get_global_id(0) * step_x; - return vector; -} - -/** Wrap image information into an Image structure, and make the pointer point at this workitem's - * data. - * - * @param[in] ptr Pointer to the starting postion of the buffer - * @param[in] offset_first_element_in_bytes The offset of the first element in the source image - * @param[in] stride_x Stride of the image in X dimension (in bytes) - * @param[in] step_x stride_x * number of elements along X processed per - * workitem(in bytes) - * @param[in] stride_y Stride of the image in Y dimension (in bytes) - * @param[in] step_y stride_y * number of elements along Y processed per - * workitem(in bytes) - * - * @return An image object - */ -inline Image update_image_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes, - uint stride_x, uint step_x, uint stride_y, uint step_y) -{ - Image img = {.ptr = ptr, - .offset_first_element_in_bytes = offset_first_element_in_bytes, - .stride_x = stride_x, - .stride_y = stride_y}; - img.ptr += - img.offset_first_element_in_bytes + get_global_id(0) * step_x + get_global_id(1) * step_y; - return img; -} - -/** Wrap 3D tensor information into an image structure, and make the pointer point at this - * workitem's data. - * - * @param[in] ptr Pointer to the starting postion of the buffer - * @param[in] offset_first_element_in_bytes The offset of the first element in the source image - * @param[in] stride_x Stride of the image in X dimension (in bytes) - * @param[in] step_x stride_x * number of elements along X processed per - * workitem(in bytes) - * @param[in] stride_y Stride of the image in Y dimension (in bytes) - * @param[in] step_y stride_y * number of elements along Y processed per - * workitem(in bytes) - * @param[in] stride_z Stride of the image in Z dimension (in bytes) - * @param[in] step_z stride_z * number of elements along Z processed per - * workitem(in bytes) - * - * @return A 3D tensor object - */ -inline Image update_image_from_tensor3D_workitem_ptr(__global uchar *ptr, - uint offset_first_element_in_bytes, - uint stride_x, uint step_x, uint stride_y, - uint step_y, uint stride_z, uint step_z) -{ - Image img = {.ptr = ptr, - .offset_first_element_in_bytes = offset_first_element_in_bytes, - .stride_x = stride_x, - .stride_y = stride_y}; - img.ptr += img.offset_first_element_in_bytes + get_global_id(0) * step_x + - get_global_id(1) * step_y + get_global_id(2) * step_z; - return img; -} - -/** Wrap 3D tensor information into an tensor structure, and make the pointer point at this - * workitem's data. - * - * @param[in] ptr Pointer to the starting postion of the buffer - * @param[in] offset_first_element_in_bytes The offset of the first element in the source image - * @param[in] stride_x Stride of the image in X dimension (in bytes) - * @param[in] step_x stride_x * number of elements along X processed per - * workitem(in bytes) - * @param[in] stride_y Stride of the image in Y dimension (in bytes) - * @param[in] step_y stride_y * number of elements along Y processed per - * workitem(in bytes) - * @param[in] stride_z Stride of the image in Z dimension (in bytes) - * @param[in] step_z stride_z * number of elements along Z processed per - * workitem(in bytes) - * - * @return A 3D tensor object - */ -inline Tensor3D update_tensor3D_workitem_ptr(__global uchar *ptr, - uint offset_first_element_in_bytes, uint stride_x, - uint step_x, uint stride_y, uint step_y, uint stride_z, - uint step_z) -{ - Tensor3D tensor = {.ptr = ptr, - .offset_first_element_in_bytes = offset_first_element_in_bytes, - .stride_x = stride_x, - .stride_y = stride_y, - .stride_z = stride_z}; - tensor.ptr += tensor.offset_first_element_in_bytes + get_global_id(0) * step_x + - get_global_id(1) * step_y + get_global_id(2) * step_z; - return tensor; -} - -inline Tensor4D update_tensor4D_workitem_ptr(__global uchar *ptr, - uint offset_first_element_in_bytes, uint stride_x, - uint step_x, uint stride_y, uint step_y, uint stride_z, - uint step_z, uint stride_w, uint step_w, uint mod_size) -{ - Tensor4D tensor = {.ptr = ptr, - .offset_first_element_in_bytes = offset_first_element_in_bytes, - .stride_x = stride_x, - .stride_y = stride_y, - .stride_z = stride_z, - .stride_w = stride_w}; - - tensor.ptr += tensor.offset_first_element_in_bytes + get_global_id(0) * step_x + - get_global_id(1) * step_y + (get_global_id(2) % mod_size) * step_z + - (get_global_id(2) / mod_size) * step_w; - return tensor; -} - -/** Get the pointer position of a Vector - * - * @param[in] vec Pointer to the starting position of the buffer - * @param[in] x Relative X position - */ -inline __global const uchar *vector_offset(const Vector *vec, int x) -{ - return vec->ptr + x * vec->stride_x; -} - -/** Get the pointer position of a Image - * - * @param[in] img Pointer to the starting position of the buffer - * @param[in] x Relative X position - * @param[in] y Relative Y position - */ -inline __global uchar *offset(const Image *img, int x, int y) -{ - return img->ptr + x * img->stride_x + y * img->stride_y; -} - -/** Get the pointer position of a Tensor3D - * - * @param[in] tensor Pointer to the starting position of the buffer - * @param[in] x Relative X position - * @param[in] y Relative Y position - * @param[in] z Relative Z position - */ -inline __global const uchar *tensor3D_offset(const Tensor3D *tensor, int x, int y, int z) -{ - return tensor->ptr + x * tensor->stride_x + y * tensor->stride_y + z * tensor->stride_z; -} - -/** Get the pointer position of a Tensor4D - * - * @param[in] tensor Pointer to the starting position of the buffer - * @param[in] x Relative X position - * @param[in] y Relative Y position - * @param[in] z Relative Z position - * @param[in] w Relative W position - */ -inline __global const uchar *tensor4D_offset(const Tensor4D *tensor, int x, int y, int z, int w) -{ - return tensor->ptr + x * tensor->stride_x + y * tensor->stride_y + z * tensor->stride_z + - w * tensor->stride_w; -} - -#endif // _HELPER_H diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/helpers_asymm.h b/libs/ARMComputeEx/src/core/CL/cl_kernels/helpers_asymm.h deleted file mode 100644 index c39138caa..000000000 --- a/libs/ARMComputeEx/src/core/CL/cl_kernels/helpers_asymm.h +++ /dev/null @@ -1,406 +0,0 @@ -/* - * Copyright (c) 2017-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_HELPERS_ASYMM_H -#define ARM_COMPUTE_HELPERS_ASYMM_H - -#include "helpers.h" - -/** Correctly-rounded-to-nearest division by a power-of-two. - * - * @param[in] size Size of vector. - * - * @return Correctly-rounded-to-nearest division by a power-of-two. - */ -#define ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(size) \ - inline VEC_DATA_TYPE(int, size) \ - asymm_rounding_divide_by_POW2_##size(VEC_DATA_TYPE(int, size) x, int exponent) \ - { \ - VEC_DATA_TYPE(int, size) \ - mask = (1 << exponent) - 1; \ - const VEC_DATA_TYPE(int, size) zero = 0; \ - const VEC_DATA_TYPE(int, size) one = 1; \ - VEC_DATA_TYPE(int, size) \ - threshold = (mask >> 1) + select(zero, one, x < 0); \ - return (x >> exponent) + select(zero, one, (x & mask) > threshold); \ - } - -/** Product of two numbers, interpreting them as fixed-point values in the interval [-1, 1), - * rounding to the nearest value, and saturating -1 * -1 to the maximum value. - * - * @param[in] size Size of vector. - * - * @return Product of two fixed-point numbers. - */ -#define ASYMM_MULT_IMPL(size) \ - inline VEC_DATA_TYPE(int, size) \ - asymm_mult##size(VEC_DATA_TYPE(int, size) a, VEC_DATA_TYPE(int, size) b) \ - { \ - VEC_DATA_TYPE(int, size) \ - overflow = a == b && a == INT_MIN; \ - VEC_DATA_TYPE(long, size) \ - a_64 = convert_long##size(a); \ - VEC_DATA_TYPE(long, size) \ - b_64 = convert_long##size(b); \ - VEC_DATA_TYPE(long, size) \ - ab_64 = a_64 * b_64; \ - /* COMPMID-907 */ \ - VEC_DATA_TYPE(int, size) \ - ab_x2_high32 = convert_int##size(((ab_64 + (1 << 30)) >> 31)); \ - return select(ab_x2_high32, INT_MAX, overflow); \ - } - -/** Calculates \f$ exp(x) \f$ for x in [-1/4, 0). - * - * @param[in] size Size of vector. - * - * @return Result in fixed-point format Q0. - */ -#define ASYMM_EXP_ON_INTERVAL_BETWEEN_NEGATIVE_ONE_QUARTER_AND_0_EXCL_IMPL(size) \ - inline VEC_DATA_TYPE(int, size) \ - asymm_exp_on_interval_between_negative_one_quarter_and_0_excl##size(VEC_DATA_TYPE(int, size) \ - a) \ - { \ - const VEC_DATA_TYPE(int, size) constant_term = 1895147668; \ - const VEC_DATA_TYPE(int, size) constant_1_over_3 = 715827883; \ - const int k_fractional_bits = 31; \ - VEC_DATA_TYPE(int, size) \ - x = a + (1 << (k_fractional_bits - 3)); \ - VEC_DATA_TYPE(int, size) \ - x2 = ASYMM_MULT(x, x, size); \ - VEC_DATA_TYPE(int, size) \ - x3 = ASYMM_MULT(x2, x, size); \ - VEC_DATA_TYPE(int, size) \ - x4 = ASYMM_MULT(x2, x2, size); \ - VEC_DATA_TYPE(int, size) \ - x4_over_4 = ASYMM_ROUNDING_DIVIDE_BY_POW2(x4, 2, size); \ - VEC_DATA_TYPE(int, size) \ - x4_over_24_plus_x3_over_6_plus_x2 = \ - ASYMM_MULT((x4_over_4 + x3), constant_1_over_3, size) + x2; \ - VEC_DATA_TYPE(int, size) \ - x4_over_24_plus_x3_over_6_plus_x2_over_2 = \ - ASYMM_ROUNDING_DIVIDE_BY_POW2(x4_over_24_plus_x3_over_6_plus_x2, 1, size); \ - return constant_term + \ - ASYMM_MULT(constant_term, x + x4_over_24_plus_x3_over_6_plus_x2_over_2, size); \ - } - -/** Each bit of the result is set to the corresponding bit of either then_val or - * else_val depending on whether the corresponding bit of if_mask is set. - * Equivalent to the VBSL instruction in ARM NEON. - * - * @param[in] size Size of vector. - * - * @returns Result contaning bits from @p then_val or from @p else_val depending on corresponding - * bit in @p if_mask is set or not. - */ -#define ASYMM_SELECT_USING_MASK_IMPL(size) \ - inline VEC_DATA_TYPE(int, size) asymm_select_using_mask##size(VEC_DATA_TYPE(int, size) if_mask, \ - VEC_DATA_TYPE(int, size) then_val, \ - VEC_DATA_TYPE(int, size) else_val) \ - { \ - return (if_mask & then_val) ^ (~if_mask & else_val); \ - } - -/** For each element of input vector, the corresponding bits of the result item are set - * if the input item is zero. - * - * @param[in] size Size of vector. - * - * @returns Output vector with bits set when corresponding bit in @p a is zero. - */ -#define ASYMM_MASK_IF_ZERO_IMPL(size) \ - inline VEC_DATA_TYPE(int, size) asymm_mask_if_zero##size(VEC_DATA_TYPE(int, size) a) \ - { \ - const VEC_DATA_TYPE(int, size) all_zeros = 0; \ - const VEC_DATA_TYPE(int, size) all_ones = ~0; \ - return select(all_zeros, all_ones, a == 0); \ - } - -/** For each element of input vector, the corresponding bits of the result item are set - * if the input item is non-zero. - * - * @param[in] size Size of vector. - * - * @returns Output vector with bits set when corresponding bit in @p a is non zero. - */ -#define ASYMM_MASK_IF_NON_ZERO_IMPL(size) \ - inline VEC_DATA_TYPE(int, size) asymm_mask_if_non_zero##size(VEC_DATA_TYPE(int, size) a) \ - { \ - const VEC_DATA_TYPE(int, size) all_zeros = 0; \ - const VEC_DATA_TYPE(int, size) all_ones = ~0; \ - return select(all_zeros, all_ones, a != 0); \ - } - -#define EXP_BARREL_SHIFTER_IMPL(size) \ - inline VEC_DATA_TYPE(int, size) exp_barrel_shifter##size( \ - VEC_DATA_TYPE(int, size) result, int exponent, int fp_multiplier, int k_integer_bits, \ - int k_fractional_bits, VEC_DATA_TYPE(int, size) remainder) \ - { \ - if (k_integer_bits > exponent) \ - { \ - const int k_shift_amount = k_integer_bits > exponent ? k_fractional_bits + exponent : 0; \ - return ASYMM_SELECT_USING_MASK( \ - ASYMM_MASK_IF_NON_ZERO(remainder & (1 << k_shift_amount), size), \ - ASYMM_MULT(result, fp_multiplier, size), result, size); \ - } \ - \ - return result; \ - } - -/** Calculates \f$ exp(x) \f$ for x < 0. - * - * @param[in] size Size of vector. - * - * @return Result in fixed-point format Q0. - */ -#define ASYMM_EXP_ON_NEGATIVE_VALUES_IMPL(size) \ - inline VEC_DATA_TYPE(int, size) \ - asymm_exp_on_negative_values##size(VEC_DATA_TYPE(int, size) a, int k_integer_bits) \ - { \ - const int k_fractional_bits = 31 - k_integer_bits; \ - VEC_DATA_TYPE(int, size) \ - k_one_quarter = 1 << (k_fractional_bits - 2); \ - VEC_DATA_TYPE(int, size) \ - mask = k_one_quarter - 1; \ - VEC_DATA_TYPE(int, size) \ - a_mod_quarter_minus_one_quarter = (a & mask) - k_one_quarter; \ - VEC_DATA_TYPE(int, size) \ - a_mod_quarter_minus_one_quarter_scaled = a_mod_quarter_minus_one_quarter << k_integer_bits; \ - VEC_DATA_TYPE(int, size) \ - result = ASYMM_EXP_ON_INTERVAL_BETWEEN_NEGATIVE_ONE_QUARTER_AND_0_EXCL( \ - a_mod_quarter_minus_one_quarter_scaled, size); \ - VEC_DATA_TYPE(int, size) \ - remainder = a_mod_quarter_minus_one_quarter - a; \ - \ - result = EXP_BARREL_SHIFTER(result, -2, 1672461947, k_integer_bits, k_fractional_bits, \ - remainder, size); \ - result = EXP_BARREL_SHIFTER(result, -1, 1302514674, k_integer_bits, k_fractional_bits, \ - remainder, size); \ - result = EXP_BARREL_SHIFTER(result, +0, 790015084, k_integer_bits, k_fractional_bits, \ - remainder, size); \ - result = EXP_BARREL_SHIFTER(result, +1, 290630308, k_integer_bits, k_fractional_bits, \ - remainder, size); \ - result = EXP_BARREL_SHIFTER(result, +2, 39332535, k_integer_bits, k_fractional_bits, \ - remainder, size); \ - result = EXP_BARREL_SHIFTER(result, +3, 720401, k_integer_bits, k_fractional_bits, remainder, \ - size); \ - result = \ - EXP_BARREL_SHIFTER(result, +4, 242, k_integer_bits, k_fractional_bits, remainder, size); \ - \ - if (k_integer_bits > 5) \ - { \ - const VEC_DATA_TYPE(int, size) clamp = -(1 << (k_fractional_bits + 5)); \ - result = ASYMM_SELECT_USING_MASK(ASYMM_MASK_IF_NON_ZERO(a < clamp, size), 0, result, size); \ - } \ - \ - const VEC_DATA_TYPE(int, size) Q0_one = INT_MAX; \ - return ASYMM_SELECT_USING_MASK(ASYMM_MASK_IF_ZERO(a, size), Q0_one, result, size); \ - } - -/** Calculates the product of a integer value by a power of two, with either a positive exponent - * (equivalent to an arithmetic left shift, saturating) or a negative exponent - * (equivalent to an arithmetic right shift, rounding to nearest). - * - * @param[in] size Size of vector. - * - * @return Arithmetic left or right shift. - */ -#define ASYMM_SATURATING_ROUNDING_MULT_BY_POW2_IMPL(size) \ - inline VEC_DATA_TYPE(int, size) \ - asymm_saturating_rounding_mult_by_pow2##size(VEC_DATA_TYPE(int, size) x, int exponent) \ - { \ - if (exponent < 0) \ - { \ - return ASYMM_ROUNDING_DIVIDE_BY_POW2(x, -exponent, size); \ - } \ - \ - const VEC_DATA_TYPE(int, size) min = INT_MIN; \ - const VEC_DATA_TYPE(int, size) max = INT_MAX; \ - int threshold = ((1 << (31 - exponent)) - 1); \ - VEC_DATA_TYPE(int, size) \ - positive_mask = ASYMM_MASK_IF_NON_ZERO(x > threshold, size); \ - VEC_DATA_TYPE(int, size) \ - negative_mask = ASYMM_MASK_IF_NON_ZERO(x < -threshold, size); \ - VEC_DATA_TYPE(int, size) \ - result = x << exponent; \ - result = ASYMM_SELECT_USING_MASK(positive_mask, max, result, size); \ - result = ASYMM_SELECT_USING_MASK(negative_mask, min, result, size); \ - return result; \ - } - -/** Calculates (a+b)/2, rounded to the nearest integer. - * Equivalent to VRHADD in the ARM NEON instruction set. - * - * @param[in] size Size of vector. - * - * @return (a+b)/2, rounded to the nearest integer. - */ -#define ASYMM_ROUNDING_HALF_SUM_IMPL(size) \ - inline VEC_DATA_TYPE(int, size) \ - asymm_rounding_half_sum##size(VEC_DATA_TYPE(int, size) a, VEC_DATA_TYPE(int, size) b) \ - { \ - VEC_DATA_TYPE(long, size) \ - a64 = convert_long##size(a); \ - VEC_DATA_TYPE(long, size) \ - b64 = convert_long##size(b); \ - VEC_DATA_TYPE(long, size) \ - sum = a64 + b64; \ - const VEC_DATA_TYPE(long, size) one = 1; \ - const VEC_DATA_TYPE(long, size) minus_one = -1; \ - VEC_DATA_TYPE(long, size) \ - sign = select(minus_one, one, sum >= 0); \ - return convert_int##size((sum + sign) / 2); \ - } - -/** Calculates \f$ 1 / (1 + x) \f$ for x in (0, 1). - * - * @param[in] size Size of vector. - * - * @return Result in fixed-point format Q0. - */ -#define ASYMM_ONE_OVER_ONE_PLUS_X_FOR_X_IN_0_1_IMPL(size) \ - inline VEC_DATA_TYPE(int, size) \ - asymm_one_over_one_plus_x_for_x_in_0_1##size(VEC_DATA_TYPE(int, size) a) \ - { \ - const VEC_DATA_TYPE(int, size) Q0_one = INT_MAX; \ - const VEC_DATA_TYPE(int, size) Q2_one = 1 << (31 - 2); \ - VEC_DATA_TYPE(int, size) \ - half_denominator = ASYMM_ROUNDING_HALF_SUM(a, Q0_one, size); \ - const VEC_DATA_TYPE(int, size) Q2_48_over_17 = 1515870810; \ - const VEC_DATA_TYPE(int, size) Q2_neg_32_over_17 = -1010580540; \ - VEC_DATA_TYPE(int, size) \ - x = Q2_48_over_17 + ASYMM_MULT(half_denominator, Q2_neg_32_over_17, size); \ - for (int i = 0; i < 3; i++) \ - { \ - VEC_DATA_TYPE(int, size) \ - half_denominator_times_x = ASYMM_MULT(half_denominator, x, size); \ - VEC_DATA_TYPE(int, size) \ - one_minus_half_denominator_times_x = Q2_one - half_denominator_times_x; \ - VEC_DATA_TYPE(int, size) \ - tmp = ASYMM_MULT(x, one_minus_half_denominator_times_x, size); \ - x = x + ASYMM_SATURATING_ROUNDING_MULT_BY_POW2(tmp, 2, size); \ - } \ - return ASYMM_SATURATING_ROUNDING_MULT_BY_POW2(x, 1, size); \ - } - -/** Considering the integer value as fixed-point, change the number of integer bits and update value - * accordingly. - * - * @param[in] size Size of vector. - * - * @return Rescaled value. - */ -#define ASYMM_RESCALE_IMPL(size) \ - inline VEC_DATA_TYPE(int, size) asymm_rescale##size(VEC_DATA_TYPE(int, size) value, \ - int src_integer_bits, int dst_integer_bits) \ - { \ - int exponent = src_integer_bits - dst_integer_bits; \ - return ASYMM_SATURATING_ROUNDING_MULT_BY_POW2(value, exponent, size); \ - } - -#define ASYMM_ROUNDING_DIVIDE_BY_POW2(x, exponent, size) \ - asymm_rounding_divide_by_POW2_##size(x, exponent) -#define ASYMM_MULT(a, b, size) asymm_mult##size(a, b) -#define ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(x, quantized_multiplier, right_shift, size) \ - ASYMM_ROUNDING_DIVIDE_BY_POW2(ASYMM_MULT(x, quantized_multiplier, size), right_shift, size) -#define ASYMM_EXP_ON_INTERVAL_BETWEEN_NEGATIVE_ONE_QUARTER_AND_0_EXCL(a, size) \ - asymm_exp_on_interval_between_negative_one_quarter_and_0_excl##size(a) -#define ASYMM_SELECT_USING_MASK(if_mask, then_val, else_val, size) \ - asymm_select_using_mask##size(if_mask, then_val, else_val) -#define ASYMM_MASK_IF_ZERO(a, size) asymm_mask_if_zero##size(a) -#define ASYMM_MASK_IF_NON_ZERO(a, size) asymm_mask_if_non_zero##size(a) -#define EXP_BARREL_SHIFTER(result, exponent, fp_multiplier, k_integer_bits, k_fractional_bits, \ - remainder, size) \ - exp_barrel_shifter##size(result, exponent, fp_multiplier, k_integer_bits, k_fractional_bits, \ - remainder) -#define ASYMM_EXP_ON_NEGATIVE_VALUES(a, k_integer_bits, size) \ - asymm_exp_on_negative_values##size(a, k_integer_bits) -#define ASYMM_ONE_OVER_ONE_PLUS_X_FOR_X_IN_0_1(a, size) \ - asymm_one_over_one_plus_x_for_x_in_0_1##size(a) -#define ASYMM_SATURATING_ROUNDING_MULT_BY_POW2(x, exponent, size) \ - asymm_saturating_rounding_mult_by_pow2##size(x, exponent) -#define ASYMM_ROUNDING_HALF_SUM(a, b, size) asymm_rounding_half_sum##size(a, b) -#define ASYMM_RESCALE(value, src_integer_bits, dst_integer_bits, size) \ - asymm_rescale##size(value, src_integer_bits, dst_integer_bits) - -ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(2) -ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(4) -ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(8) -ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(16) - -ASYMM_MULT_IMPL(2) -ASYMM_MULT_IMPL(4) -ASYMM_MULT_IMPL(8) -ASYMM_MULT_IMPL(16) - -ASYMM_EXP_ON_INTERVAL_BETWEEN_NEGATIVE_ONE_QUARTER_AND_0_EXCL_IMPL(2) -ASYMM_EXP_ON_INTERVAL_BETWEEN_NEGATIVE_ONE_QUARTER_AND_0_EXCL_IMPL(4) -ASYMM_EXP_ON_INTERVAL_BETWEEN_NEGATIVE_ONE_QUARTER_AND_0_EXCL_IMPL(8) -ASYMM_EXP_ON_INTERVAL_BETWEEN_NEGATIVE_ONE_QUARTER_AND_0_EXCL_IMPL(16) - -ASYMM_SELECT_USING_MASK_IMPL(2) -ASYMM_SELECT_USING_MASK_IMPL(4) -ASYMM_SELECT_USING_MASK_IMPL(8) -ASYMM_SELECT_USING_MASK_IMPL(16) - -ASYMM_MASK_IF_ZERO_IMPL(2) -ASYMM_MASK_IF_ZERO_IMPL(4) -ASYMM_MASK_IF_ZERO_IMPL(8) -ASYMM_MASK_IF_ZERO_IMPL(16) - -ASYMM_MASK_IF_NON_ZERO_IMPL(2) -ASYMM_MASK_IF_NON_ZERO_IMPL(4) -ASYMM_MASK_IF_NON_ZERO_IMPL(8) -ASYMM_MASK_IF_NON_ZERO_IMPL(16) - -EXP_BARREL_SHIFTER_IMPL(2) -EXP_BARREL_SHIFTER_IMPL(4) -EXP_BARREL_SHIFTER_IMPL(8) -EXP_BARREL_SHIFTER_IMPL(16) - -ASYMM_EXP_ON_NEGATIVE_VALUES_IMPL(2) -ASYMM_EXP_ON_NEGATIVE_VALUES_IMPL(4) -ASYMM_EXP_ON_NEGATIVE_VALUES_IMPL(8) -ASYMM_EXP_ON_NEGATIVE_VALUES_IMPL(16) - -ASYMM_SATURATING_ROUNDING_MULT_BY_POW2_IMPL(2) -ASYMM_SATURATING_ROUNDING_MULT_BY_POW2_IMPL(4) -ASYMM_SATURATING_ROUNDING_MULT_BY_POW2_IMPL(8) -ASYMM_SATURATING_ROUNDING_MULT_BY_POW2_IMPL(16) - -ASYMM_ROUNDING_HALF_SUM_IMPL(2) -ASYMM_ROUNDING_HALF_SUM_IMPL(4) -ASYMM_ROUNDING_HALF_SUM_IMPL(8) -ASYMM_ROUNDING_HALF_SUM_IMPL(16) - -ASYMM_ONE_OVER_ONE_PLUS_X_FOR_X_IN_0_1_IMPL(2) -ASYMM_ONE_OVER_ONE_PLUS_X_FOR_X_IN_0_1_IMPL(4) -ASYMM_ONE_OVER_ONE_PLUS_X_FOR_X_IN_0_1_IMPL(8) -ASYMM_ONE_OVER_ONE_PLUS_X_FOR_X_IN_0_1_IMPL(16) - -ASYMM_RESCALE_IMPL(2) -ASYMM_RESCALE_IMPL(4) -ASYMM_RESCALE_IMPL(8) -ASYMM_RESCALE_IMPL(16) - -#endif // ARM_COMPUTE_HELPERS_ASYMM_H
\ No newline at end of file diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/neg_tensor.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/neg_tensor.cl deleted file mode 100644 index e3aa463db..000000000 --- a/libs/ARMComputeEx/src/core/CL/cl_kernels/neg_tensor.cl +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "helpers.h" - -#ifndef VEC_SIZE -#define VEC_SIZE 1 -#endif - -#if defined(DATA_TYPE) -/** Performs a negation of input tensor. - * - * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16 - * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float - * - * @param[in] in_ptr Pointer to the source image. Supported data types: S16/S32/F16/F32. - * @param[in] in_stride_x Stride of the source image in X dimension (in bytes) - * @param[in] in_step_x in_stride_x * number of elements along X processed per work item (in bytes) - * @param[in] in_offset_first_element_in_bytes Offset of the first element in the source image - * @param[out] out_ptr Pointer to the destination image. Supported data types: same as @p input_ptr - * @param[in] out_stride_x Stride of the destination image in X dimension (in bytes) - * @param[in] out_step_x out_stride_x * number of elements along X processed per work item (in bytes) - * @param[in] out_offset_first_element_in_bytes Offset of the first element in the destination image - */ -__kernel void neg_tensor( - TENSOR3D_DECLARATION(input), - TENSOR3D_DECLARATION(output)) -{ - Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input); - Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output); - - VSTORE(VEC_SIZE) - (-VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input.ptr), 0, (__global DATA_TYPE *)output.ptr); -} -#endif // defined(DATA_TYPE) diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/pad.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/pad.cl deleted file mode 100644 index ecf4696e9..000000000 --- a/libs/ARMComputeEx/src/core/CL/cl_kernels/pad.cl +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016, 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "helpers.h" - -#if defined(IW) && defined(IH) && defined(ID) && defined(IB) && defined(DEPTH_OUT) && defined(ZERO_VALUE) -/** Perform space to depth rearrangement of tensor - * - * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float - * @attention Output tensor depth should be given as a preprocessor argument using -DDEPTH_OUT=size. e.g. -DDEPTH_OUT=16 - * @attention Input dimensions should be passed as a preprocessor argument using -DIW(width), -DIH(height), -DID(depth) and -DIB(batch). e.g. -DIW = 4 - * @attention The value to be set by pad value using -DZERO_VALUE=value. e.g. -DZERO_VALUE=0 - * - * @param[in] input_ptr Pointer to the source tensor. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 - * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes) - * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes) - * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor - * - * @param[out] output_ptr Pointer to the destination tensor. Supported data types: same as @p inpu -t_ptr - * @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in -bytes) - * @param[in] output_stride_z Stride of the destination tensor in Z dimension (in bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] output_stride_w Stride of the destination tensor in W dimension (in bytes) - * @param[in] output_step_w output_stride_w * number of elements along W processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor - * - * @param[in] pad_values Padding values for each of the dimensions. Only pad values for Up(for - * batch), Top(for height), Left(for width) and Front(for depth) are - * required. Supported data type: S32 - */ - -__kernel void pad( - TENSOR4D_DECLARATION(input), - TENSOR4D_DECLARATION(output), - const int4 pad_values) - { - Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, 0); - Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH_OUT); - - int index[4]={0}; - - index[0] = get_global_id(0);//W - index[1] = get_global_id(1);//H - index[2] = get_global_id(2) % DEPTH_OUT;//C - index[3] = get_global_id(2) / DEPTH_OUT;//N - - if (index[0] < pad_values.x || index[0] >= (IW + pad_values.x) || - index[1] < pad_values.y || index[1] >= (IH + pad_values.y) || - index[2] < pad_values.z || index[2] >= (ID + pad_values.z) || - index[3] < pad_values.w || index[3] >= (IB + pad_values.w)) - { - *((__global DATA_TYPE *)out.ptr) = (DATA_TYPE)ZERO_VALUE; - } - else - { - *((__global DATA_TYPE *)out.ptr) = *((__global DATA_TYPE *) - tensor4D_offset(&in, index[0] - pad_values.x, - index[1] - pad_values.y, - index[2] - pad_values.z, - index[3] - pad_values.w)); - } - } - -#endif //if defined(IW) && defined(IH) && defined(ID) && defined(IB) && defined(DEPTH_OUT) && defined(ZERO_VALUE) diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/permute_ex.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/permute_ex.cl deleted file mode 100644 index 7cc8b0354..000000000 --- a/libs/ARMComputeEx/src/core/CL/cl_kernels/permute_ex.cl +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "helpers.h" - -#if defined(DATA_TYPE) && defined(DEPTH_IN) && defined(P1) && defined(P2) && defined(P3) && defined(P4) -/** Perform a Generic permute operation on an input tensor of Shape DCHW. - * - * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float - * @attention Input tensor depth should be given as a preprocessor argument using -DDEPTH_IN=size. e.g. -DDEPTH_IN=16 - * @attention Permutation vector is passed as a preprocessor arguement using -DP1, -DP2, -DP3 and -DP4=int, e.g. -DP1=2 - * - * @param[in] input_ptr Pointer to the source image. Supported data types: U8/S8/QASYMM8/U1 -6/S16/F16/U32/S32/F32 - * @param[in] input_stride_x Stride of the source image in X dimension (in bytes) - * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in b -ytes) - * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes) - * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in b -ytes) - * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in b -ytes) - * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image - * @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p inpu -t_ptr - * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in -bytes) - * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] output_stride_w Stride of the source tensor in W dimension (in bytes) - * @param[in] output_step_w output_stride_w * number of elements along W processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image - */ -__kernel void permute_generic( - TENSOR4D_DECLARATION(input), - TENSOR4D_DECLARATION(output)) -{ - Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT(input, DEPTH_IN); - Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(output, 0); - - int out_index[4]; - int in_index[4]; - in_index[0] = get_global_id(0);//W - in_index[1] = get_global_id(1);//H - in_index[2] = get_global_id(2) % DEPTH_IN;//C - in_index[3] = get_global_id(2) / DEPTH_IN;//B - out_index[0] = in_index[P1]; - out_index[1] = in_index[P2]; - out_index[2] = in_index[P3]; - out_index[3] = in_index[P4]; - - *((__global DATA_TYPE *)tensor4D_offset(&out, out_index[0],out_index[1],out_index[2],out_index[3])) = *((__global DATA_TYPE *)in.ptr); -} -#endif // defined(DATA_TYPE) && defined(DEPTH_IN) && defined(P1) && defined(P2) && defined(P3) && defined(P4) diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_div_float.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_div_float.cl deleted file mode 100644 index aa05121b1..000000000 --- a/libs/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_div_float.cl +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016, 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "helpers.h" - -#ifdef SATURATE -#define CONVERT_OP_FLOAT_STR(x, type, round) (convert_##type##_sat##round(x)) -#else /* SATURATE */ -#define CONVERT_OP_FLOAT_STR(x, type, round) (convert_##type##round(x)) -#endif /* SATURATE */ -#define CONVERT_OP_FLOAT(x, type, round) CONVERT_OP_FLOAT_STR(x, type, round) - -/** Performs a pixelwise division with float scale of either integer or float inputs. - * - * @attention The inputs and output data types need to be passed at compile time using -DDATA_TYPE_IN1, -DDATA_TYPE_IN2 and -DDATA_TYPE_OUT: - * e.g. -DDATA_TYPE_IN1=uchar -DDATA_TYPE_IN2=ushort -DDATA_TYPE_OUT=short - * @attention The data type of the intermediate result of the division should passed as well using -DDATA_TYPE_RES. - * e.g. If one of inputs is S16 -DDATA_TYPE_RES=int should be passed else -DDATA_TYPE_RES=short. - * @attention -DDATA_TYPE_FLOAT must be passed if floating point inputs are provided. - * - * @param[in] in1_ptr Pointer to the source image. Supported data types: U8, S16, F16, F32 - * @param[in] in1_stride_x Stride of the source image in X dimension (in bytes) - * @param[in] in1_step_x in1_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] in1_stride_y Stride of the source image in Y dimension (in bytes) - * @param[in] in1_step_y in1_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] in1_stride_z Stride of the source image in Y dimension (in bytes) - * @param[in] in1_step_z in1_stride_z * number of elements along Y processed per workitem(in bytes) - * @param[in] in1_offset_first_element_in_bytes The offset of the first element in the source image - * @param[in] in2_ptr Pointer to the source image. Supported data types: U8, S16, F16, F32 - * @param[in] in2_stride_x Stride of the source image in X dimension (in bytes) - * @param[in] in2_step_x in2_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] in2_stride_y Stride of the source image in Y dimension (in bytes) - * @param[in] in2_step_y in2_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] in2_stride_z Stride of the source image in Y dimension (in bytes) - * @param[in] in2_step_z in2_stride_z * number of elements along Y processed per workitem(in bytes) - * @param[in] in2_offset_first_element_in_bytes The offset of the first element in the source image - * @param[out] out_ptr Pointer to the destination image. Supported data types: U8, S16, F16, F32 - * @param[in] out_stride_x Stride of the destination image in X dimension (in bytes) - * @param[in] out_step_x out_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] out_stride_y Stride of the destination image in Y dimension (in bytes) - * @param[in] out_step_y out_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] out_stride_z Stride of the destination image in Y dimension (in bytes) - * @param[in] out_step_z out_stride_z * number of elements along Y processed per workitem(in bytes) - * @param[in] out_offset_first_element_in_bytes The offset of the first element in the destination image - * @param[in] scale Float scaling factor. Supported data types: F32 - */ -__kernel void pixelwise_div_float( - TENSOR3D_DECLARATION(in1), - TENSOR3D_DECLARATION(in2), - TENSOR3D_DECLARATION(out), - const float scale) -{ - // Get pixels pointer - Tensor3D in1 = CONVERT_TO_TENSOR3D_STRUCT(in1); - Tensor3D in2 = CONVERT_TO_TENSOR3D_STRUCT(in2); - Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(out); - - // Load data - VEC_DATA_TYPE(DATA_TYPE_RES, 16) - in1_data = CONVERT(vload16(0, (__global DATA_TYPE_IN1 *)in1.ptr), VEC_DATA_TYPE(DATA_TYPE_RES, 16)); - VEC_DATA_TYPE(DATA_TYPE_RES, 16) - in2_data = CONVERT(vload16(0, (__global DATA_TYPE_IN2 *)in2.ptr), VEC_DATA_TYPE(DATA_TYPE_RES, 16)); - - // Perform division -#ifdef DATA_TYPE_FLOAT - VEC_DATA_TYPE(DATA_TYPE_OUT, 16) - res = CONVERT(in1_data / in2_data * (DATA_TYPE_RES)scale, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)); -#else /* DATA_TYPE_FLOAT */ - VEC_DATA_TYPE(DATA_TYPE_OUT, 16) - res = CONVERT_OP_FLOAT(CONVERT_OP_FLOAT((convert_float16(in1_data / in2_data) * scale), VEC_DATA_TYPE(DATA_TYPE_RES, 16), ROUND), VEC_DATA_TYPE(DATA_TYPE_OUT, 16), ROUND); -#endif /* DATA_TYPE_FLOAT */ - - // Store result - vstore16(res, 0, (__global DATA_TYPE_OUT *)out.ptr); -} diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_div_int.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_div_int.cl deleted file mode 100644 index fdfb78003..000000000 --- a/libs/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_div_int.cl +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016, 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "helpers.h" - -#if defined(SATURATE) -#define CONVERT_OP_INT_STR(x, type, size) (convert_##type##size##_sat(x)) -#else // SATURATE -#define CONVERT_OP_INT_STR(x, type, size) (convert_##type##size(x)) -#endif // SATURATE -#define CONVERT_OP_INT(x, type, size) CONVERT_OP_INT_STR(x, type, size) - -#define DIV_OP(x, y, scale, type, size) CONVERT_OP_INT((x) / (y) >> scale, type, size) - -/** Performs a pixelwise division with integer scale of integer inputs. - * - * @attention The inputs and output data types need to be passed at compile time using -DDATA_TYPE_IN1, -DDATA_TYPE_IN2 and -DDATA_TYPE_OUT: - * e.g. -DDATA_TYPE_IN1=uchar -DDATA_TYPE_IN2=ushort -DDATA_TYPE_OUT=short - * @attention The data_type of the intermediate result of the division should passed as well using -DDATA_TYPE_RES. - * e.g. If one of inputs is S16 -DDATA_TYPE_RES=int should be passed else -DDATA_TYPE_RES=short. - * - * @param[in] in1_ptr Pointer to the source image. Supported data types: U8/S16 - * @param[in] in1_stride_x Stride of the source image in X dimension (in bytes) - * @param[in] in1_step_x in1_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] in1_stride_y Stride of the source image in Y dimension (in bytes) - * @param[in] in1_step_y in1_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] in1_stride_z Stride of the source image in Y dimension (in bytes) - * @param[in] in1_step_z in1_stride_z * number of elements along Y processed per workitem(in bytes) - * @param[in] in1_offset_first_element_in_bytes The offset of the first element in the source image - * @param[in] in2_ptr Pointer to the source image. Supported data types: same as @p in1_ptr - * @param[in] in2_stride_x Stride of the source image in X dimension (in bytes) - * @param[in] in2_step_x in2_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] in2_stride_y Stride of the source image in Y dimension (in bytes) - * @param[in] in2_step_y in2_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] in2_stride_z Stride of the source image in Y dimension (in bytes) - * @param[in] in2_step_z in2_stride_z * number of elements along Y processed per workitem(in bytes) - * @param[in] in2_offset_first_element_in_bytes The offset of the first element in the source image - * @param[out] out_ptr Pointer to the destination image. Supported data types: same as @p in1_ptr - * @param[in] out_stride_x Stride of the destination image in X dimension (in bytes) - * @param[in] out_step_x out_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] out_stride_y Stride of the destination image in Y dimension (in bytes) - * @param[in] out_step_y out_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] out_stride_z Stride of the destination image in Y dimension (in bytes) - * @param[in] out_step_z out_stride_z * number of elements along Y processed per workitem(in bytes) - * @param[in] out_offset_first_element_in_bytes The offset of the first element in the destination image - * @param[in] scale Integer scaling factor. Supported data types: S32 - */ -__kernel void pixelwise_div_int( - TENSOR3D_DECLARATION(in1), - TENSOR3D_DECLARATION(in2), - TENSOR3D_DECLARATION(out), - const uint scale) -{ - // Get pixels pointer - Tensor3D in1 = CONVERT_TO_TENSOR3D_STRUCT(in1); - Tensor3D in2 = CONVERT_TO_TENSOR3D_STRUCT(in2); - Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(out); - - // Load data - VEC_DATA_TYPE(DATA_TYPE_RES, 16) - in1_data = CONVERT(vload16(0, (__global DATA_TYPE_IN1 *)in1.ptr), VEC_DATA_TYPE(DATA_TYPE_RES, 16)); - VEC_DATA_TYPE(DATA_TYPE_RES, 16) - in2_data = CONVERT(vload16(0, (__global DATA_TYPE_IN2 *)in2.ptr), VEC_DATA_TYPE(DATA_TYPE_RES, 16)); - - // Perform division and store result - vstore16(DIV_OP(in1_data, in2_data, scale, DATA_TYPE_OUT, 16), 0, (__global DATA_TYPE_OUT *)out.ptr); -} diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_mul_quantized.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_mul_quantized.cl deleted file mode 100644 index ab1307e64..000000000 --- a/libs/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_mul_quantized.cl +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016, 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "helpers_asymm.h" - -#ifdef SATURATE -#define CONVERT_OP_FLOAT_STR(x, type, round) (convert_##type##_sat##round(x)) -#else /* SATURATE */ -#define CONVERT_OP_FLOAT_STR(x, type, round) (convert_##type##round(x)) -#endif /* SATURATE */ -#define CONVERT_OP_FLOAT(x, type, round) CONVERT_OP_FLOAT_STR(x, type, round) - -#if defined(RESULT_OFFSET) && defined(RESULT_MULT_INT) && defined(RESULT_SHIFT) -/** Performs a pixelwise multiplication used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8 - * - * The following computations will be performed by the kernel: - * - * -# Add offset terms to inputs - * -# Multiply inputs - * -# Add offset terms to final result - * -# Multiply each entry of result by result_mult_int - * -# Shift the int32 accumulator by result_shift - * -# Clamp the resulting int32 values to the [0..255] range and cast to QASYMM8. - * - * @attention The inputs and output data types need to be passed at compile time using -DDATA_TYPE_IN1, -DDATA_TYPE_IN2 and -DDATA_TYPE_OUT: - * e.g. -DDATA_TYPE_IN1=uchar -DDATA_TYPE_IN2=uchar -DDATA_TYPE_OUT=uchar - * @attention The offset factor of inputs must be passed at compile time using -DIN1_OFFSET and -DIN2_OFFSET - * @attention The offset, scalar scale factor and number of bits to shift right of output tensor must be passed at compile time using -DRESULT_OFFSET, -RESULT_MULT_INT and -DRESULT_SHIFT - * - * @param[in] in1_ptr Pointer to the source image. Supported data types: U8 - * @param[in] in1_stride_x Stride of the source image in X dimension (in bytes) - * @param[in] in1_step_x in1_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] in1_stride_y Stride of the source image in Y dimension (in bytes) - * @param[in] in1_step_y in1_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] in1_stride_z Stride of the source image in Y dimension (in bytes) - * @param[in] in1_step_z in1_stride_z * number of elements along Y processed per workitem(in bytes) - * @param[in] in1_offset_first_element_in_bytes The offset of the first element in the source image - * @param[in] in2_ptr Pointer to the source image. Supported data types: U8 - * @param[in] in2_stride_x Stride of the source image in X dimension (in bytes) - * @param[in] in2_step_x in2_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] in2_stride_y Stride of the source image in Y dimension (in bytes) - * @param[in] in2_step_y in2_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] in2_stride_z Stride of the source image in Y dimension (in bytes) - * @param[in] in2_step_z in2_stride_z * number of elements along Y processed per workitem(in bytes) - * @param[in] in2_offset_first_element_in_bytes The offset of the first element in the source image - * @param[out] out_ptr Pointer to the destination image. Supported data types: U8 - * @param[in] out_stride_x Stride of the destination image in X dimension (in bytes) - * @param[in] out_step_x out_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] out_stride_y Stride of the destination image in Y dimension (in bytes) - * @param[in] out_step_y out_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] out_stride_z Stride of the destination image in Y dimension (in bytes) - * @param[in] out_step_z out_stride_z * number of elements along Y processed per workitem(in bytes) - * @param[in] out_offset_first_element_in_bytes The offset of the first element in the destination image - * @param[in] scale Float scaling factor. Supported data types: F32 - */ -__kernel void pixelwise_mul_qasymm8( - TENSOR3D_DECLARATION(in1), - TENSOR3D_DECLARATION(in2), - TENSOR3D_DECLARATION(out), - const float scale) -{ - // Get pixels pointer - Tensor3D in1 = CONVERT_TO_TENSOR3D_STRUCT(in1); - Tensor3D in2 = CONVERT_TO_TENSOR3D_STRUCT(in2); - Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(out); - - // Load data - VEC_DATA_TYPE(int, 16) - in1_data = CONVERT(vload16(0, (__global DATA_TYPE_IN1 *)in1.ptr), VEC_DATA_TYPE(int, 16)); - VEC_DATA_TYPE(int, 16) - in2_data = CONVERT(vload16(0, (__global DATA_TYPE_IN2 *)in2.ptr), VEC_DATA_TYPE(int, 16)); - - // Perform multiplication of two inputs - VEC_DATA_TYPE(int, 16) in1_val = in1_data + (VEC_DATA_TYPE(int, 16))(IN1_OFFSET); - VEC_DATA_TYPE(int, 16) in2_val = in2_data + (VEC_DATA_TYPE(int, 16))(IN2_OFFSET); - VEC_DATA_TYPE(int, 16) out_val = in1_val * in2_val; - - // Multiply with a multiplier smaller than 1 - out_val = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(out_val, RESULT_MULT_INT, RESULT_SHIFT, 16); - out_val += (VEC_DATA_TYPE(int, 16))(RESULT_OFFSET); - - VEC_DATA_TYPE(uchar, 16) res = CONVERT(out_val, VEC_DATA_TYPE(uchar, 16)); - -// TODO: Apply min-max BOUND to support fuse with relu. -/* -#if defined(MIN_BOUND) - res = max(res, (uchar16)MIN_BOUND); -#endif // defined(MIN_BOUND) -#if defined(MAX_BOUND) - res = min(res, (uchar16)MAX_BOUND); -#endif // defined(MAX_BOUND) -*/ - - // Store result - VSTORE(16)(CONVERT(res, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)), - 0, (__global DATA_TYPE_OUT *)out.ptr); -} -#endif // defined(RESULT_OFFSET) && defined(RESULT_MULT_INT) && defined(RESULT_SHIFT) diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/prelu.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/prelu.cl deleted file mode 100644 index 68da2ba32..000000000 --- a/libs/ARMComputeEx/src/core/CL/cl_kernels/prelu.cl +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "helpers.h" - -#ifndef VEC_SIZE -#define VEC_SIZE 1 -#endif - -#if defined(DATA_TYPE) -/** Returns result of prelu function implemented as below: - * f(input) = alpha * input for input < 0, f(input) = input for input >= 0. - * - * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float - * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16 - * @note Can only take floating point data types. - * - * @param[in] input1_ptr Pointer to the source image. Supported Data types : F16/F32 - * @param[in] input1_stride_x Stride of the source image in X dimension (in bytes) - * @param[in] input1_step_x input1_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] input1_stride_y Stride of the source image in Y dimension (in bytes) - * @param[in] input1_step_y input1_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] input1_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] input1_step_z input1_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] input1_offset_first_element_in_bytes The offset of the first element in the source image - * - * @param[in] alpha_ptr Pointer to the source image. Supported Data types : F16/F32 - * @param[in] alpha_stride_x Stride of the source image in X dimension (in bytes) - * @param[in] alpha_step_x input2_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] alpha_stride_y Stride of the source image in Y dimension (in bytes) - * @param[in] alpha_step_y input2_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] alpha_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] alpha_step_z input2_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] alpha_offset_first_element_in_bytes The offset of the first element in the source image - * - * @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p input_ptr - * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image - */ -__kernel void prelu( - TENSOR3D_DECLARATION(input), - TENSOR3D_DECLARATION(alpha), - TENSOR3D_DECLARATION(output)) -{ - Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input); - Tensor3D alpha = CONVERT_TO_TENSOR3D_STRUCT(alpha); - Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output); - - VSTORE(VEC_SIZE) - (VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input.ptr) < 0 ? - VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input.ptr) * VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)alpha.ptr) : - VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input.ptr), - 0, (__global DATA_TYPE *)output.ptr); - -} -#endif // defined(DATA_TYPE) diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/prelu_quantized.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/prelu_quantized.cl deleted file mode 100644 index 7e97b7ed6..000000000 --- a/libs/ARMComputeEx/src/core/CL/cl_kernels/prelu_quantized.cl +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "helpers.h" -#define SUB(x, y) (x) - (y) - -#if defined(OFF_IN1) && defined(OFF_IN2) && defined(OFF_OUT) && defined(SCALE_IN1) && defined(SCALE_IN2) && defined(SCALE_OUT) && defined(VEC_SIZE) - -#define VEC_FLOAT VEC_DATA_TYPE(float, VEC_SIZE) -#define VEC_INT VEC_DATA_TYPE(int, VEC_SIZE) -#define VEC_UCHAR VEC_DATA_TYPE(uchar, VEC_SIZE) -#define CONVERT_RTE(x, type) (convert_##type##_rte((x))) -#define CONVERT_DOWN(x, type) CONVERT_RTE(x, type) - -/** Returns result of prelu function implemented as below: - * f(input) = alpha * input for input < 0, f(input) = input for input >= 0. - * - * @attention Data type can be passed using the -DDATA_TYPE_IN compile flag, e.g. -DDATA_TYPE_IN=uchar - * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16 - * @note Can only take uchar data types. - * - * @param[in] input1_ptr Pointer to the source image. Supported Data types : QASYMM8 - * @param[in] input1_stride_x Stride of the source image in X dimension (in bytes) - * @param[in] input1_step_x input1_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] input1_stride_y Stride of the source image in Y dimension (in bytes) - * @param[in] input1_step_y input1_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] input1_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] input1_step_z input1_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] input1_offset_first_element_in_bytes The offset of the first element in the source image - * - * @param[in] alpha_ptr Pointer to the source image. Supported Data types : QASYMM8 - * @param[in] alpha_stride_x Stride of the source image in X dimension (in bytes) - * @param[in] alpha_step_x input2_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] alpha_stride_y Stride of the source image in Y dimension (in bytes) - * @param[in] alpha_step_y input2_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] alpha_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] alpha_step_z input2_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] alpha_offset_first_element_in_bytes The offset of the first element in the source image - * - * @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p input_ptr - * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image - */ -__kernel void prelu_qasymm8( - TENSOR3D_DECLARATION(input), - TENSOR3D_DECLARATION(alpha), - TENSOR3D_DECLARATION(output)) -{ - // Get pixels pointer - Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input); - Tensor3D alpha = CONVERT_TO_TENSOR3D_STRUCT(alpha); - Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output); - - VEC_INT in_a = CONVERT(VLOAD(VEC_SIZE)(0, (__global uchar *)input.ptr), VEC_INT); - VEC_INT in_b = CONVERT(VLOAD(VEC_SIZE)(0, (__global uchar *)alpha.ptr), VEC_INT); - - in_a = SUB(in_a, (VEC_INT)((int)OFF_IN1)); - in_b = SUB(in_b, (VEC_INT)((int)OFF_IN2)); - - const VEC_FLOAT in1f32 = CONVERT(in_a, VEC_FLOAT) * (VEC_FLOAT)((float)SCALE_IN1); - const VEC_FLOAT in2f32 = CONVERT(in_b, VEC_FLOAT) * (VEC_FLOAT)((float)SCALE_IN2); - const VEC_FLOAT outf32 = in1f32 < 0 ? in1f32 * in2f32 : in1f32; - const VEC_FLOAT qresf32 = outf32 / ((VEC_FLOAT)(float)SCALE_OUT) + ((VEC_FLOAT)((float)OFF_OUT)); - const VEC_UCHAR res = CONVERT_SAT(CONVERT_DOWN(qresf32, VEC_INT), VEC_UCHAR); - - VSTORE(VEC_SIZE) - (res, 0, (__global uchar *)output.ptr); -} - -#endif // defined(OFF_IN1) && defined(OFF_IN2) && defined(OFF_OUT) && defined(SCALE_IN1) && defined(SCALE_IN2) && defined(SCALE_OUT) && defined(VEC_SIZE) diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/reduce_operation.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/reduce_operation.cl deleted file mode 100644 index 8bef49363..000000000 --- a/libs/ARMComputeEx/src/core/CL/cl_kernels/reduce_operation.cl +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016, 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "helpers.h" - -#if defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(OP_CODE) -/** Perform reduce max/min - * - * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short - * @attention Output tensor depth should be given as a preprocessor argument using -DDEPTH_OUT=size. e.g. -DDEPTH_OUT=16 - * @attention Operation type(code) specifying which operation to perform should be passed as preprocessor argument using - * -DOP_CODE = number. e.g. -DOP_CODE=1 - * - * @param[in] input_ptr Pointer to the source image. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 - * @param[in] input_stride_x Stride of the source image in X dimension (in bytes) - * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes) - * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image - * @param[in] input_stride_w Stride of the source tensor in W dimension (in bytes) - * @param[in] input_step_w output_stride_w * number of elements along W processed per workitem(in bytes) - * @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p input_ptr - * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] output_stride_w Stride of the source tensor in W dimension (in bytes) - * @param[in] output_step_w output_stride_w * number of elements along W processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image - * @param[in] axis Axis through which reduction occurs - * @param[in] dim Dimension across the axis to be reduced. - */ -__kernel void reduce_min_max(TENSOR4D_DECLARATION(input), - TENSOR4D_DECLARATION(output), - const int axis, - const int dim) -{ - Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT(input, 0); - Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH_OUT); - - int indices[4] = - { - get_global_id(0), - get_global_id(1), - get_global_id(2) % DEPTH_OUT, - get_global_id(2) / DEPTH_OUT, - }; - - DATA_TYPE value = *((__global DATA_TYPE *)tensor4D_offset(&in, indices[0], indices[1], indices[2], indices[3])); - for(int i = 1; i < dim; ++i) - { - indices[axis] = i; - - #if OP_CODE == 1 // REDUCE_MAX - value = max(value, *((__global DATA_TYPE *) - tensor4D_offset(&in, indices[0], indices[1], indices[2], indices[3]))); - - #elif OP_CODE == 2 // REDUCE_MIN - value = min(value, *((__global DATA_TYPE *) - tensor4D_offset(&in, indices[0], indices[1], indices[2], indices[3]))); - - #else // OP NOT SUPPORTED - return; - - #endif - } - - *((__global DATA_TYPE *)out.ptr) = value; -} - -/** Perform reduce sum/mean - * - * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short - * @attention Output tensor depth should be given as a preprocessor argument using -DDEPTH_OUT=size. e.g. -DDEPTH_OUT=16 - * @attention Operation type(code) specifying which operation to perform should be passed as preprocessor argument using - * -DOP_CODE = number. e.g. -DOP_CODE=1 - * - * @param[in] input_ptr Pointer to the source image. Supported data types: U8/S8/U16/S16/F16/U32/S32/F32 - * @param[in] input_stride_x Stride of the source image in X dimension (in bytes) - * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes) - * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image - * @param[in] input_stride_w Stride of the source tensor in W dimension (in bytes) - * @param[in] input_step_w output_stride_w * number of elements along W processed per workitem(in bytes) - * @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p input_ptr - * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] output_stride_w Stride of the source tensor in W dimension (in bytes) - * @param[in] output_step_w output_stride_w * number of elements along W processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image - * @param[in] axis Axis through which reduction occurs - * @param[in] dim Dimension across the axis to be reduced. - */ -__kernel void reduce_sum_mean(TENSOR4D_DECLARATION(input), - TENSOR4D_DECLARATION(output), - const int axis, - const int dim) -{ - Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT(input, 0); - Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH_OUT); - - int indices[4] = - { - get_global_id(0), - get_global_id(1), - get_global_id(2) % DEPTH_OUT, - get_global_id(2) / DEPTH_OUT, - }; - - DATA_TYPE sum_value = (DATA_TYPE)0; - for(int i = 0; i < dim; ++i) - { - indices[axis] = i; - sum_value += *((__global DATA_TYPE *)tensor4D_offset(&in, indices[0], indices[1], indices[2], indices[3])); - } - - #if OP_CODE == 3 // REDUCE_SUM - *((__global DATA_TYPE *)out.ptr) = sum_value; - - #elif OP_CODE == 4 // REDUCE_MEAN - *((__global DATA_TYPE *)out.ptr) = sum_value / CONVERT(dim, DATA_TYPE); - - #else // OP NOT SUPPORTED - return; - - #endif -} -#endif // defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(OP_CODE) diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/space_to_batch.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/space_to_batch.cl deleted file mode 100644 index a0fc2d5a9..000000000 --- a/libs/ARMComputeEx/src/core/CL/cl_kernels/space_to_batch.cl +++ /dev/null @@ -1,163 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016, 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "helpers.h" - -#if defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(BATCH_IN) && defined(HEIGHT_IN) && defined(WIDTH_IN) && defined(ZERO_VALUE) -/** Perform space to batch with input of 4D and NCHW format - * - * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float - * @attention Output tensor depth should be given as a preprocessor argument using -DDEPTH_OUT=size. e.g. -DDEPTH_OUT=16 - * @attention Input tensor batch should be given as a preprocessor argument using -DBATCH_IN=size. e.g. -DBATCH_IN=16 - * @attention Input tensor height should be given as a preprocessor argument using -DHEIGHT_IN=size. e.g. -DHEIGHT_IN=16 - * @attention Input tensor width should be given as a preprocessor argument using -DHEIGHT_IN=size. e.g. -DWIDTH_IN=16 - * @attention The value to be set by pad value using -DZERO_VALUE=value. e.g. -DZERO_VALUE=0 - * - * @param[in] input_ptr Pointer to the source tensor. Supported data types: U8/S8/U16/S16/F16/U32/S32/F32 - * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes) - * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes) - * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] input_stride_w Stride of the destination tensor in W dimension (in bytes) - * @param[in] input_step_w input_stride_w * number of elements along W processed per workitem(in bytes) - * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor - * @param[out] output_ptr Pointer to the destination tensor. Supported data types: same as @p input_ptr - * @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] output_stride_z Stride of the destination tensor in Z dimension (in bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] output_stride_w Stride of the destination tensor in W dimension (in bytes) - * @param[in] output_step_w output_stride_w * number of elements along W processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor - * @param[in] block_size_ptr Pointer to the source tensor. Supported data types: S32 - * @param[in] block_size_stride_x Stride of the source tensor in X dimension (in bytes) - * @param[in] block_size_step_x block_size_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] block_size_offset_first_element_in_bytes The offset of the first element in the destination tensor - * @param[in] padding_size_ptr Pointer to the source tensor. Supported data types: S32 - * @param[in] padding_size_stride_x Stride of the source tensor in X dimension (in bytes) - * @param[in] padding_size_step_x padding_size_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] padding_size_stride_y Stride of the source tensor in Y dimension (in bytes) - * @param[in] padding_size_step_y padding_size_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] padding_size_offset_first_element_in_bytes The offset of the first element in the destination tensor - */ -__kernel void space_to_batch_4d_nchw(TENSOR4D_DECLARATION(input), - TENSOR4D_DECLARATION(output), - VECTOR_DECLARATION(block_size), - IMAGE_DECLARATION(padding_size)) -{ - Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT(input, 0); - Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH_OUT); - - int block_size_x = *((__global int *)(block_size_ptr)); - int block_size_y = *((__global int *)(block_size_ptr + block_size_stride_x)); - int shift_x = (get_global_id(2) / DEPTH_OUT / BATCH_IN) % block_size_x; - int shift_y = (get_global_id(2) / DEPTH_OUT / BATCH_IN) / block_size_x; - - int in_index[4] = {0, }; - in_index[0] = get_global_id(0) * block_size_x + shift_x - *((__global int *)(padding_size_ptr)); - in_index[1] = get_global_id(1) * block_size_y + shift_y - *((__global int *)(padding_size_ptr + padding_size_stride_y)); - in_index[2] = get_global_id(2) % DEPTH_OUT; - in_index[3] = (get_global_id(2) / DEPTH_OUT) % BATCH_IN; - - if (in_index[0] < 0 || in_index[0] >= WIDTH_IN || in_index[1] < 0 || in_index[1] >= HEIGHT_IN) - { - *((__global DATA_TYPE *)out.ptr) = (DATA_TYPE)ZERO_VALUE; - } - else - { - *((__global DATA_TYPE *)out.ptr) = *((__global DATA_TYPE *)tensor4D_offset(&in, in_index[0], in_index[1], in_index[2], in_index[3])); - } -} -#endif // defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(BATCH_IN) && defined(HEIGHT_IN) && defined(WIDTH_IN) && defined(ZERO_VALUE) - -#if defined(DATA_TYPE) && defined(HEIGHT_OUT) && defined(BATCH_IN) && defined(HEIGHT_IN) && defined(WIDTH_IN) && defined(ZERO_VALUE) && defined(VEC_SIZE) -/** Perform space to batch with input of 4D and NHWC format - * - * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float - * @attention Output tensor depth should be given as a preprocessor argument using -DHEIGHT_OUT=size. e.g. -DHEIGHT_OUT=16 - * @attention Input tensor batch should be given as a preprocessor argument using -DBATCH_IN=size. e.g. -DBATCH_IN=16 - * @attention Input tensor height should be given as a preprocessor argument using -DHEIGHT_IN=size. e.g. -DHEIGHT_IN=16 - * @attention Input tensor width should be given as a preprocessor argument using -DHEIGHT_IN=size. e.g. -DWIDTH_IN=16 - * @attention The value to be set by pad value using -DZERO_VALUE=value. e.g. -DZERO_VALUE=0 - * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16 - * - * @param[in] input_ptr Pointer to the source tensor. Supported data types: U8/S8/U16/S16/F16/U32/S32/F32 - * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes) - * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes) - * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] input_stride_w Stride of the destination tensor in W dimension (in bytes) - * @param[in] input_step_w input_stride_w * number of elements along W processed per workitem(in bytes) - * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor - * @param[out] output_ptr Pointer to the destination tensor. Supported data types: same as @p input_ptr - * @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] output_stride_z Stride of the destination tensor in Z dimension (in bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] output_stride_w Stride of the destination tensor in W dimension (in bytes) - * @param[in] output_step_w output_stride_w * number of elements along W processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor - * @param[in] block_size_ptr Pointer to the source tensor. Supported data types: S32 - * @param[in] block_size_stride_x Stride of the source tensor in X dimension (in bytes) - * @param[in] block_size_step_x block_size_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] block_size_offset_first_element_in_bytes The offset of the first element in the destination tensor - * @param[in] padding_size_ptr Pointer to the source tensor. Supported data types: S32 - * @param[in] padding_size_stride_x Stride of the source tensor in X dimension (in bytes) - * @param[in] padding_size_step_x padding_size_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] padding_size_stride_y Stride of the source tensor in Y dimension (in bytes) - * @param[in] padding_size_step_y padding_size_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] padding_size_offset_first_element_in_bytes The offset of the first element in the destination tensor - */ -__kernel void space_to_batch_4d_nhwc(TENSOR4D_DECLARATION(input), - TENSOR4D_DECLARATION(output), - VECTOR_DECLARATION(block_size), - IMAGE_DECLARATION(padding_size)) -{ - Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT(input, 0); - Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, HEIGHT_OUT); - - int block_size_x = *((__global int *)(block_size_ptr)); - int block_size_y = *((__global int *)(block_size_ptr + block_size_stride_x)); - int shift_x = (get_global_id(2) / HEIGHT_OUT / BATCH_IN) % block_size_x; - int shift_y = (get_global_id(2) / HEIGHT_OUT / BATCH_IN) / block_size_x; - - int in_index[4] = {0, }; - in_index[0] = get_global_id(0) * VEC_SIZE; - in_index[1] = get_global_id(1) * block_size_x + shift_x - *((__global int *)(padding_size_ptr)); - in_index[2] = get_global_id(2) % HEIGHT_OUT * block_size_y + shift_y - *((__global int *)(padding_size_ptr + padding_size_stride_y)); - in_index[3] = (get_global_id(2) / HEIGHT_OUT) % BATCH_IN; - - if (in_index[1] < 0 || in_index[1] >= WIDTH_IN || in_index[2] < 0 || in_index[2] >= HEIGHT_IN) - { - VSTORE(VEC_SIZE)((VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE))ZERO_VALUE, 0, (__global DATA_TYPE *)out.ptr); - } - else - { - VSTORE(VEC_SIZE)(CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)tensor4D_offset(&in, in_index[0], in_index[1], in_index[2], in_index[3])), - VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)), - 0, (__global DATA_TYPE *)out.ptr); - } -} - -#endif // defined(DATA_TYPE) && defined(HEIGHT_OUT) && defined(BATCH_IN) && defined(HEIGHT_IN) && defined(WIDTH_IN) && defined(ZERO_VALUE) && defined(VEC_SIZE) diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/space_to_depth.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/space_to_depth.cl deleted file mode 100644 index f6977045a..000000000 --- a/libs/ARMComputeEx/src/core/CL/cl_kernels/space_to_depth.cl +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016, 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "helpers.h" - -#if defined(DATA_TYPE) && defined(DEPTH_IN) && defined(BLOCK_SIZE) -/** Perform space to depth rearrangement of tensor - * - * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float - * @attention Input tensor depth should be given as a preprocessor argument using -DDEPTH_IN=size. e.g. -DDEPTH_IN=16 - * @attention block size should be given as a preprocessor argument using -DBLOCK_SIZE=size. e.g. -DBLOCK_SIZE=1 - * - * @param[in] input_ptr Pointer to the source image. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 - * @param[in] input_stride_x Stride of the source image in X dimension (in bytes) - * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes) - * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image - * @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p inpu -t_ptr - * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in -bytes) - * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] output_stride_w Stride of the source tensor in W dimension (in bytes) - * @param[in] output_step_w output_stride_w * number of elements along W processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image - */ -__kernel void space_to_depth( - TENSOR4D_DECLARATION(input), - TENSOR4D_DECLARATION(output)) - { - Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT(input, DEPTH_IN); - Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(output, 0); - - int out_index[4]={0}; - int in_index[4]={0}; - - in_index[0] = get_global_id(0);//W - in_index[1] = get_global_id(1);//H - in_index[2] = get_global_id(2) % DEPTH_IN;//C - in_index[3] = get_global_id(2) / DEPTH_IN;//B - - out_index[0] = in_index[0]/BLOCK_SIZE; - out_index[1] = in_index[1]/BLOCK_SIZE; - out_index[2] = in_index[2] + ((in_index[1] % BLOCK_SIZE) * BLOCK_SIZE + in_index[0] % BLOCK_SIZE) * DEPTH_IN; - out_index[3] = in_index[3]; - - *((__global DATA_TYPE *)tensor4D_offset(&out, out_index[0],out_index[1],out_index[2],out_index[3])) = *((__global DATA_TYPE *)in.ptr); - } -#endif // defined(DATA_TYPE) && defined(DEPTH_IN) && defined(BLOCK_SIZE) diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/squared_difference.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/squared_difference.cl deleted file mode 100644 index 3e1a5c97f..000000000 --- a/libs/ARMComputeEx/src/core/CL/cl_kernels/squared_difference.cl +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "helpers.h" - -#ifndef VEC_SIZE -#define VEC_SIZE 1 -#endif - -#if defined(DATA_TYPE) -/** Returns true value of squared_difference of two tensors. - * - * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float - * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16 - * @note Can only take floating point data types. - * - * @param[in] input1_ptr Pointer to the source image. Supported data types: F16/F32 - * @param[in] input1_stride_x Stride of the source image in X dimension (in bytes) - * @param[in] input1_step_x input1_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] input1_stride_y Stride of the source image in Y dimension (in bytes) - * @param[in] input1_step_y input1_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] input1_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] input1_step_z input1_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] input1_offset_first_element_in_bytes The offset of the first element in the source image - * - * @param[in] input2_ptr Pointer to the source image. Supported data types: F16/F32 - * @param[in] input2_stride_x Stride of the source image in X dimension (in bytes) - * @param[in] input2_step_x input2_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] input2_stride_y Stride of the source image in Y dimension (in bytes) - * @param[in] input2_step_y input2_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] input2_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] input2_step_z input2_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] input2_offset_first_element_in_bytes The offset of the first element in the source image - * - * @param[out] output_ptr Pointer to the destination image. Supported data types: F16/F32 - * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image - */ -__kernel void squared_difference( - TENSOR3D_DECLARATION(input1), - TENSOR3D_DECLARATION(input2), - TENSOR3D_DECLARATION(output)) -{ - Tensor3D input1 = CONVERT_TO_TENSOR3D_STRUCT(input1); - Tensor3D input2 = CONVERT_TO_TENSOR3D_STRUCT(input2); - Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output); - - VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) - diff = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input1.ptr)- VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input2.ptr); - - VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) - sq_diff = diff * diff; - - VSTORE(VEC_SIZE) - (sq_diff, 0, (__global DATA_TYPE *)output.ptr); -} -#endif // defined(DATA_TYPE) diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/strided_slice_ex.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/strided_slice_ex.cl deleted file mode 100644 index b39c55b96..000000000 --- a/libs/ARMComputeEx/src/core/CL/cl_kernels/strided_slice_ex.cl +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "helpers.h" - -#if defined(ELEMENT_DATA_TYPE) && defined(DEPTH_OUT) -/** Extracts a strided slice up to 4-dimensions - * - * @note Datatype should be given as a preprocessor argument using -DELEMENT_DATA_TYPE=type. e.g. -DELEMENT_DATA_TYPE=short - * @attention Output tensor depth should be given as a preprocessor argument using -DDEPTH_OUT=size. e.g. -DDEPTH_OUT=16 - * - * @param[in] input_ptr Pointer to the source image. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 - * @param[in] input_stride_x Stride of the source image in X dimension (in bytes) - * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes) - * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image - * @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p input_ptr - * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] output_stride_w Stride of the source tensor in W dimension (in bytes) - * @param[in] output_step_w output_stride_w * number of elements along W processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image - * @param[in] starts The stride of X dimension of input tensor to be sliced. Supported data types: S32 - * @param[in] strides The stride of Y dimension of input tensor to be sliced. Supported data types: S32 - */ -__kernel void strided_slice_ex(TENSOR4D_DECLARATION(input), - TENSOR4D_DECLARATION(output), - const int4 starts, - const int4 strides) -{ - Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT(input, 0); - Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH_OUT); - - int4 indices_in = - { - starts.x + (strides.x * get_global_id(0)), - starts.y + (strides.y * get_global_id(1)), - starts.z + (strides.z * (get_global_id(2) % DEPTH_OUT)), - starts.w + (strides.w * (get_global_id(2) / DEPTH_OUT)), - }; - *((__global ELEMENT_DATA_TYPE *)out.ptr) = *((__global ELEMENT_DATA_TYPE *)tensor4D_offset(&in, indices_in.x, indices_in.y, indices_in.z, indices_in.w)); -} -#endif // defined(ELEMENT_DATA_TYPE) && defined(DEPTH_OUT) diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/topkv2.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/topkv2.cl deleted file mode 100644 index d97f23a47..000000000 --- a/libs/ARMComputeEx/src/core/CL/cl_kernels/topkv2.cl +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "helpers.h" - -__kernel void topkv2_init(VECTOR_DECLARATION(input), - __global float* in_key_buf, - __global int* in_ind_buf, - const int n) -{ - int gid = get_global_id(0); - int lws = get_local_size(0); - int groups = get_num_groups(0); - int gws = lws * groups; - int iter = n / gws; - - Vector input = CONVERT_TO_VECTOR_STRUCT_NO_STEP(input); - - for(int i = 0; i < iter; ++i) - { - int idx = i * gws + gid; - in_key_buf[idx] = *(__global float*)(input.ptr + idx * input.stride_x); - in_ind_buf[idx] = idx; - } -} - -__kernel void topkv2_find_first_negative( - __global float *out_key_buf, - __global int *first_negative_idx, - int n) -{ - int gid = get_global_id(0); - - if( gid == n - 1 ) - { - // if the last item is positive, the first negative index is n. - if( out_key_buf[gid] > 0.f ) - *first_negative_idx = n; - } else if ( gid == 0 ) { - // if the first item is negative, set it 0. - if( out_key_buf[gid] < 0.f ) - *first_negative_idx = 0; - } else { - // if its left is positive and it is negative, then it is the first negative item. - if( out_key_buf[gid-1] > 0.f && out_key_buf[gid] < 0.f ) - *first_negative_idx = gid; - } -} - -__kernel void topkv2_reorder_negatives( - __global float* in_key_buf, - __global float* out_key_buf, - __global float* in_ind_buf, - __global float* out_ind_buf, - __global int* first_negative_idx, - int n) -{ - int gid = get_global_id(0); - - int num_negs = n - *first_negative_idx; - int in_idx; - - if( gid < num_negs ) { - in_idx = n - 1 - gid; - } else { - in_idx = gid - num_negs; - } - - out_key_buf[gid] = in_key_buf[in_idx]; - out_ind_buf[gid] = in_ind_buf[in_idx]; -} - -__kernel void topkv2_store( - VECTOR_DECLARATION(values), - VECTOR_DECLARATION(indices), - __global float *out_key_buf, - __global int *out_ind_buf, - int n) -{ - int gid = get_global_id(0); - - Vector values = CONVERT_TO_VECTOR_STRUCT_NO_STEP(values); - Vector indices = CONVERT_TO_VECTOR_STRUCT_NO_STEP(indices); - - int idx = n - 1 - gid; - - *(__global float*)(values.ptr + gid * values.stride_x) = out_key_buf[idx]; - *(__global int*)(indices.ptr + gid * indices.stride_x) = out_ind_buf[idx]; -} diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/topkv2_quicksort.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/topkv2_quicksort.cl deleted file mode 100644 index 0292fab04..000000000 --- a/libs/ARMComputeEx/src/core/CL/cl_kernels/topkv2_quicksort.cl +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "helpers.h" - -__global inline float* get_vec_elem(Vector* vec, int idx) -{ - return (__global float*)(vec->ptr + idx * vec->stride_x); -} - -__global inline int* get_vec_elem_int(Vector* vec, int idx) -{ - return (__global int*)(vec->ptr + idx * vec->stride_x); -} - -// A utility function to swap two elements -void swap(__global float *a, __global float *b) -{ - float t = *a; - *a = *b; - *b = t; -} - -void swap_idx(__global int *a, __global int *b) -{ - int t = *a; - *a = *b; - *b = t; -} - -/* This function is same in both iterative and recursive*/ -int partition (Vector* arr, __global int* indices, int l, int h) -{ - float x = *get_vec_elem(arr, h); - int i = (l - 1); - - for (int j = l; j <= h- 1; j++) - { - if (*get_vec_elem(arr, j) >= x) - { - i++; - swap (get_vec_elem(arr,i), get_vec_elem(arr,j)); - swap_idx(&indices[i], &indices[j]); - } - } - swap (get_vec_elem(arr, i + 1), get_vec_elem(arr, h)); - swap_idx(&indices[i + 1], &indices[h]); - return (i + 1); -} - -/* A[] --> Array to be sorted, - l --> Starting index, - h --> Ending index */ -void quickSortIterative (Vector* arr, __global int* indices, - __global int *stack, int l, int h) -{ - // Create an auxiliary stack - - // initialize top of stack - int top = -1; - - // push initial values of l and h to stack - stack[ ++top ] = l; - stack[ ++top ] = h; - - // Keep popping from stack while is not empty - while ( top >= 0 ) - { - // Pop h and l - h = stack[ top-- ]; - l = stack[ top-- ]; - - // Set pivot element at its correct position - // in sorted array - int p = partition( arr, indices, l, h ); - - // If there are elements on left side of pivot, - // then push left side to stack - if ( p-1 > l ) - { - stack[ ++top ] = l; - stack[ ++top ] = p - 1; - } - - // If there are elements on right side of pivot, - // then push right side to stack - if ( p+1 < h ) - { - stack[ ++top ] = p + 1; - stack[ ++top ] = h; - } - } -} - -__kernel void topkv2_quicksort(VECTOR_DECLARATION(input), - VECTOR_DECLARATION(topk_values), VECTOR_DECLARATION(topk_indices), - __global int* indices, __global int* temp_stack, int k, int n) -{ - Vector input = CONVERT_TO_VECTOR_STRUCT_NO_STEP(input); - Vector topk_values = CONVERT_TO_VECTOR_STRUCT_NO_STEP(topk_values); - Vector topk_indices = CONVERT_TO_VECTOR_STRUCT_NO_STEP(topk_indices); - - for( int i = 0; i < n; ++i ) - { - indices[i] = i; - } - - quickSortIterative(&input, indices, temp_stack, 0, n-1); - - // extract k items. - for(int i = 0; i < k; ++i) - { - *get_vec_elem(&topk_values, i) = *get_vec_elem(&input, i); - *get_vec_elem_int(&topk_indices, i) = indices[i]; - } -} diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/topkv2_radixsort.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/topkv2_radixsort.cl deleted file mode 100644 index c2c2d89a4..000000000 --- a/libs/ARMComputeEx/src/core/CL/cl_kernels/topkv2_radixsort.cl +++ /dev/null @@ -1,271 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// reference: -// https://code.google.com/archive/p/ocl-radix-sort/source/default/source -// OpenCL kernel sources for the CLRadixSort class -// the #include does not exist in OpenCL -// Copyright Philippe Helluy, Université de Strasbourg, France, 2011, helluy@math.unistra.fr -// licensed under the GNU Lesser General Public License see http://www.gnu.org/copyleft/lesser.html -// if you find this software usefull you can cite the following work in your reports or articles: -// Philippe HELLUY, A portable implementation of the radix sort algorithm in OpenCL, 2011. -// http://hal.archives-ouvertes.fr/hal-00596730 - -// Reference for floating point radix sort: -// http://www.codercorner.com/RadixSortRevisited.htm - -// compute the histogram for each radix and each virtual processor for the pass -__kernel void radixsort_histogram(__global float* in_key_buf, - __global int* d_Histograms, - const int pass, - __local int* loc_histo, - const int n) -{ - int it = get_local_id(0); // i local number of the processor - int ig = get_global_id(0); // global number = i + g I - - int gr = get_group_id(0); // g group number - - int groups = get_num_groups(0); - int items = get_local_size(0); - - // set the local histograms to zero - for(int ir=0;ir<_RADIX;ir++){ - loc_histo[ir * items + it] = 0; - } - - barrier(CLK_LOCAL_MEM_FENCE); - - // range of keys that are analyzed by the work item - int size= n/groups/items; // size of the sub-list - int start= ig * size; // beginning of the sub-list - - unsigned int key; - int shortkey,k; - - // compute the index - // the computation depends on the transposition - for(int j = 0; j < size ; j++) { -#ifdef TRANSPOSE - k= groups * items * j + ig; -#else - k=j+start; -#endif - - key = *((__global unsigned int*)(in_key_buf + k)); - - // extract the group of _BITS bits of the pass - // the result is in the range 0.._RADIX-1 - shortkey=(( key >> (pass * _BITS)) & (_RADIX-1)); - - // increment the local histogram - loc_histo[shortkey * items + it ]++; - } - - barrier(CLK_LOCAL_MEM_FENCE); - - // copy the local histogram to the global one - for(int ir=0;ir<_RADIX;ir++) { - d_Histograms[items * (ir * groups + gr) + it] = loc_histo[ir * items + it]; - } - - barrier(CLK_GLOBAL_MEM_FENCE); -} - -// initial transpose of the list for improving -// coalescent memory access -__kernel void transpose(const __global int* invect, - __global int* outvect, - const int nbcol, - const int nbrow, - const __global int* inperm, - __global int* outperm, - __local int* blockmat, - __local int* blockperm, - const int tilesize){ - - int i0 = get_global_id(0)*tilesize; // first row index - int j = get_global_id(1); // column index - - int jloc = get_local_id(1); // local column index - - // fill the cache - for(int iloc=0;iloc<tilesize;iloc++){ - int k=(i0+iloc)*nbcol+j; // position in the matrix - blockmat[iloc*tilesize+jloc]=invect[k]; -#ifdef PERMUT - blockperm[iloc*tilesize+jloc]=inperm[k]; -#endif - } - - barrier(CLK_LOCAL_MEM_FENCE); - - // first row index in the transpose - int j0=get_group_id(1)*tilesize; - - // put the cache at the good place - for(int iloc=0;iloc<tilesize;iloc++){ - int kt=(j0+iloc)*nbrow+i0+jloc; // position in the transpose - outvect[kt]=blockmat[jloc*tilesize+iloc]; -#ifdef PERMUT - outperm[kt]=blockperm[jloc*tilesize+iloc]; -#endif - } - -} - -// each virtual processor reorders its data using the scanned histogram -__kernel void radixsort_reorder(__global float* in_key, - __global float* out_key, - __global int* d_Histograms, - const int pass, - __global int* indices_in, - __global int* indices_out, - __local int* loc_histo, - const int n){ - - int it = get_local_id(0); - int ig = get_global_id(0); - - int gr = get_group_id(0); - int groups=get_num_groups(0); - int items=get_local_size(0); - - int start= ig *(n/groups/items); - int size= n/groups/items; - - // take the histogram in the cache - for(int ir=0;ir<_RADIX;ir++){ - loc_histo[ir * items + it]= - d_Histograms[items * (ir * groups + gr) + it]; - } - barrier(CLK_LOCAL_MEM_FENCE); - - int newpos,shortkey,k,newpost; - unsigned int key; - - for(int j= 0; j< size;j++){ -#ifdef TRANSPOSE - k= groups * items * j + ig; -#else - k=j+start; -#endif - float org_value = in_key[k]; - key = *(__global unsigned int*)(in_key + k); - shortkey=((key >> (pass * _BITS)) & (_RADIX-1)); - - newpos=loc_histo[shortkey * items + it]; - -#ifdef TRANSPOSE - int ignew,jnew; - ignew= newpos/(n/groups/items); - jnew = newpos%(n/groups/items); - newpost = jnew * (groups*items) + ignew; -#else - newpost=newpos; -#endif - - //d_outKeys[newpost]= key; // killing line !!! - out_key[newpost] = org_value; - -#ifdef PERMUT - indices_out[newpost] = indices_in[k]; -#endif - - newpos++; - loc_histo[shortkey * items + it]=newpos; - } -} - -// perform a parallel prefix sum (a scan) on the local histograms -// (see Blelloch 1990) each workitem worries about two memories -// see also http://http.developer.nvidia.com/GPUGems3/gpugems3_ch39.html -__kernel void radixsort_scanhistograms(__global int* histo, __local int* temp, __global int* globsum) -{ - int it = get_local_id(0); - int ig = get_global_id(0); - int decale = 1; - int n=get_local_size(0) * 2 ; - int gr=get_group_id(0); - - // load input into local memory - // up sweep phase - temp[2*it] = histo[2*ig]; - temp[2*it+1] = histo[2*ig+1]; - - // parallel prefix sum (algorithm of Blelloch 1990) - for (int d = n>>1; d > 0; d >>= 1){ - barrier(CLK_LOCAL_MEM_FENCE); - if (it < d){ - int ai = decale*(2*it+1)-1; - int bi = decale*(2*it+2)-1; - temp[bi] += temp[ai]; - } - decale *= 2; - } - - // store the last element in the global sum vector - // (maybe used in the next step for constructing the global scan) - // clear the last element - if (it == 0) { - globsum[gr]=temp[n-1]; - temp[n - 1] = 0; - } - - // down sweep phase - for (int d = 1; d < n; d *= 2){ - decale >>= 1; - barrier(CLK_LOCAL_MEM_FENCE); - - if (it < d){ - int ai = decale*(2*it+1)-1; - int bi = decale*(2*it+2)-1; - - int t = temp[ai]; - temp[ai] = temp[bi]; - temp[bi] += t; - } - - } - barrier(CLK_LOCAL_MEM_FENCE); - - // write results to device memory - - histo[2*ig] = temp[2*it]; - histo[2*ig+1] = temp[2*it+1]; - - barrier(CLK_GLOBAL_MEM_FENCE); - -} - -// use the global sum for updating the local histograms -// each work item updates two values -__kernel void radixsort_pastehistograms( __global int* histo,__global int* globsum) -{ - int ig = get_global_id(0); - int gr=get_group_id(0); - - int s; - - s=globsum[gr]; - - // write results to device memory - histo[2*ig] += s; - histo[2*ig+1] += s; - - barrier(CLK_GLOBAL_MEM_FENCE); -} diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLActivationLayerExKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLActivationLayerExKernel.cpp deleted file mode 100644 index 1fdd2f98f..000000000 --- a/libs/ARMComputeEx/src/core/CL/kernels/CLActivationLayerExKernel.cpp +++ /dev/null @@ -1,211 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/core/CL/kernels/CLActivationLayerExKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/UtilsEx.h" - -using namespace arm_compute; - -namespace -{ -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, - const ActivationLayerInfoEx &act_info) -{ - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8, - DataType::F16, DataType::F32); - - // Checks performed when output is configured - if ((output != nullptr) && (output->total_size() != 0)) - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - } - - return Status{}; -} - -std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output) -{ - if (output != nullptr) - { - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - // Output auto inizialitation if not yet initialized - auto_init_if_empty(*output, *input); - } - - const unsigned int num_elems_processed_per_iteration = 16 / input->element_size(); - - Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration)); - bool window_changed = false; - - if (output != nullptr) - { - AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration); - AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration); - window_changed = update_window_and_padding(win, input_access, output_access); - output_access.set_valid_region(win, input->valid_region()); - } - else - { - window_changed = update_window_and_padding( - win, AccessWindowHorizontal(input, 0, num_elems_processed_per_iteration)); - } - - Status err = (window_changed) - ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") - : Status{}; - return std::make_pair(err, win); -} -} // namespace - -CLActivationLayerExKernel::CLActivationLayerExKernel() - : _input(nullptr), _output(nullptr), _run_in_place(false) -{ -} - -void CLActivationLayerExKernel::configure(ICLTensor *input, ICLTensor *output, - ActivationLayerInfoEx act_info) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input); - - _run_in_place = (output == nullptr) || (output == input); - - if (output != nullptr) - { - // Output auto inizialitation if not yet initialized - auto_init_if_empty(*output->info(), *input->info()->clone()); - } - - ARM_COMPUTE_ERROR_THROW_ON( - validate_arguments(input->info(), (output != nullptr) ? output->info() : nullptr, act_info)); - - const unsigned int num_elems_processed_per_iteration = 16 / input->info()->element_size(); - const DataType dt = input->info()->data_type(); - float a_const = act_info.a(); - float b_const = act_info.b(); - int a_const_int = 0; - int b_const_int = 0; - - // Create quantized version of constants a, b if needed - if (is_data_type_quantized(dt)) - { - a_const_int = - input->info()->quantization_info().quantize(a_const, RoundingPolicy::TO_NEAREST_UP); - b_const_int = - input->info()->quantization_info().quantize(b_const, RoundingPolicy::TO_NEAREST_UP); - } - - // Set build options - std::set<std::string> build_opts; - build_opts.emplace( - ("-DACT=" + lower_string(string_from_activation_func_ex(act_info.activation())))); - build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(dt))); - build_opts.emplace( - ("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration))); - - if (is_data_type_quantized(dt)) - { - build_opts.emplace(("-DA_VAL=" + support::cpp11::to_string(a_const_int))); - build_opts.emplace(("-DB_VAL=" + support::cpp11::to_string(b_const_int))); - - const int o1 = input->info()->quantization_info().offset; - // Quantized value of 0 corresponds to the offset o1 - build_opts.emplace(("-DCONST_0=" + support::cpp11::to_string(o1))); - - // Set scale and offset of the input and output if they have different quantization info - if (is_data_type_quantized_asymmetric(dt) && output != nullptr) - { - const float s1 = input->info()->quantization_info().scale; - const float s2 = output->info()->quantization_info().scale; - const int o2 = output->info()->quantization_info().offset; - - if (o1 != o2 || s1 != s2) - { - build_opts.emplace(("-DS1_VAL=" + float_to_string_with_full_precision(s1))); - build_opts.emplace(("-DS2_VAL=" + float_to_string_with_full_precision(s2))); - build_opts.emplace(("-DO1_VAL=" + support::cpp11::to_string(o1))); - build_opts.emplace(("-DO2_VAL=" + support::cpp11::to_string(o2))); - } - } - } - else - { - build_opts.emplace(("-DA_VAL=" + float_to_string_with_full_precision(a_const))); - build_opts.emplace(("-DB_VAL=" + float_to_string_with_full_precision(b_const))); - } - - build_opts.emplace((_run_in_place) ? "-DIN_PLACE" : ""); - - // Create kernel - std::string kernel_name = std::string("activation_layer_ex"); - _kernel = - static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts)); - - // Make sure _kernel is initialized before calling the parent's configure - _input = input; - _output = output; - - // Configure kernel window - auto win_config = - validate_and_configure_window(input->info(), (_run_in_place) ? nullptr : output->info()); - ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure_internal(win_config.second); - - // Set config_id for enabling LWS tuning - _config_id = "activation_layer_ex_"; - _config_id += lower_string(string_from_data_type(dt)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(1)); -} - -Status CLActivationLayerExKernel::validate(const ITensorInfo *input, const ITensorInfo *output, - const ActivationLayerInfoEx &act_info) -{ - const bool run_in_place = (output == nullptr) || (output == input); - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, act_info)); - ARM_COMPUTE_RETURN_ON_ERROR( - validate_and_configure_window(input->clone().get(), - (run_in_place) ? nullptr : output->clone().get()) - .first); - - return Status{}; -} - -void CLActivationLayerExKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - Window collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ); - Window slice = collapsed.first_slice_window_3D(); - - do - { - unsigned int idx = 0; - add_3D_tensor_argument(idx, _input, slice); - if (!_run_in_place) - { - add_3D_tensor_argument(idx, _output, slice); - } - enqueue(queue, *this, slice, lws_hint()); - } while (collapsed.slide_window_slice_3D(slice)); -} diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLArgMinMaxKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLArgMinMaxKernel.cpp deleted file mode 100644 index c1a2ad0be..000000000 --- a/libs/ARMComputeEx/src/core/CL/kernels/CLArgMinMaxKernel.cpp +++ /dev/null @@ -1,159 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/core/CL/kernels/CLArgMinMaxKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" -#include "arm_compute/core/CL/ICLTensor.h" - -using namespace arm_compute; - -namespace -{ -const TensorShape inferOutputShape(const TensorShape &input_shape, const uint32_t argminmax_axis) -{ - TensorShape out_shape{input_shape}; - - out_shape.set(argminmax_axis, 1); - - return out_shape; -} -} // namespace - -namespace -{ -constexpr unsigned int num_elems_processed_per_iteration = 16; - -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, - const uint32_t argminmax_axis, ArgOperation op) -{ - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S32, DataType::F32, - DataType::U8); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - - ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(input, output); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->tensor_shape().total_size() == 0, - "Inputs are not broadcast compatible"); - - const TensorShape output_shape = inferOutputShape(input->tensor_shape(), argminmax_axis); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(output_shape.total_size() != output->tensor_shape().total_size(), - "output shape's size does not match argminmax_axis"); - - const auto num_dimensions = input->tensor_shape().num_dimensions(); - ARM_COMPUTE_RETURN_ERROR_ON_MSG( - argminmax_axis >= 0 && argminmax_axis < num_dimensions, - "argminmax_axis must be greater than or equal to 0 and less than (input's rank)."); - return Status{}; -} - -} // namespace - -CLArgMinMaxKernel::CLArgMinMaxKernel() : _input(nullptr), _output(nullptr), _argminmax_axis() {} - -void CLArgMinMaxKernel::configure(const ICLTensor *input, ICLTensor *output, - const uint32_t argminmax_axis, ArgOperation op) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), argminmax_axis)); - - _input = input; - _output = output; - _argminmax_axis = argminmax_axis; - - std::unique_ptr<ITensorInfo> output_info = output->info()->clone(); - output_info->set_tensor_shape(inferOutputShape(input->info()->tensor_shape(), argminmax_axis)); - - // Construct kernel name for argmax and argmin based on axis - std::string kernel_name = "arg_op"; - int op_code = 0; - if (op == ArgOperation::MAX) - { - op_code = 1; - } - else if (op == ArgOperation::MIN) - { - op_code = 2; - } - else - throw std::runtime_error("Operation not supported, yet"); - - // Set kernel build options - std::set<std::string> build_opts; - build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(output_info->data_type())); - build_opts.emplace("-DDEPTH_OUT=" + support::cpp11::to_string(output_info->dimension(2))); - build_opts.emplace("-DOP_CODE=" + support::cpp11::to_string(op_code)); - - // Create kernel - _kernel = - static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts)); - - // Configure kernel window - Window win = calculate_max_window(*output_info, Steps()); - - Coordinates coord; - coord.set_num_dimensions(output_info->num_dimensions()); - output->info()->set_valid_region(ValidRegion(coord, output_info->tensor_shape())); - - ICLKernel::configure_internal(win); -} - -Status CLArgMinMaxKernel::validate(const ITensorInfo *input, const ITensorInfo *output, - const uint32_t argminmax_axis, ArgOperation op) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, argminmax_axis, op)); - - return Status{}; -} - -void CLArgMinMaxKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - const TensorShape &shape_in = _input->info()->tensor_shape(); - - unsigned int idx = 2 * num_arguments_per_4D_tensor(); // Skip the input and output parameters - - _kernel.setArg<cl_int>(idx++, _argminmax_axis); - _kernel.setArg<cl_int>(idx++, shape_in[_argminmax_axis]); - - Window slice_out = window.first_slice_window_4D().collapse(ICLKernel::window(), 2, 4); - - // Setup input slice - Window slice_in(slice_out); - slice_in.set(Window::DimX, Window::Dimension(0, 0, 0)); - slice_in.set(Window::DimY, Window::Dimension(0, 0, 0)); - slice_in.set(Window::DimZ, Window::Dimension(0, 0, 0)); - slice_in.set(3, Window::Dimension(0, 0, 0)); - - // Copy output's shape in order to use for recovering at end of this method - const TensorShape shape_out = _output->info()->tensor_shape(); - _output->info()->set_tensor_shape(inferOutputShape(shape_in, _argminmax_axis)); - - do - { - unsigned int idx = 0; - add_4D_tensor_argument(idx, _input, slice_in); - add_4D_tensor_argument(idx, _output, slice_out); - enqueue(queue, *this, slice_out); - } while (window.slide_window_slice_4D(slice_in) && window.slide_window_slice_4D(slice_out)); - - // Recover output's shape of output tensor - _output->info()->set_tensor_shape(shape_out); -} diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLArithmeticSubtractionExKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLArithmeticSubtractionExKernel.cpp deleted file mode 100644 index 1c505b4d5..000000000 --- a/libs/ARMComputeEx/src/core/CL/kernels/CLArithmeticSubtractionExKernel.cpp +++ /dev/null @@ -1,216 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/core/CL/kernels/CLArithmeticSubtractionExKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" -#include "arm_compute/core/CL/ICLTensor.h" - -using namespace arm_compute; - -namespace -{ -constexpr unsigned int num_elems_processed_per_iteration = 16; - -Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2, - const ITensorInfo *output, ConvertPolicy policy) -{ - ARM_COMPUTE_UNUSED(policy); - - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::S16, - DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::S16, - DataType::F16, DataType::F32); - - const TensorShape &out_shape = - TensorShape::broadcast_shape(input1->tensor_shape(), input2->tensor_shape()); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0, - "Inputs are not broadcast compatible"); - - // Validate in case of configured output - if (output->total_size() > 0) - { - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16, - DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_MSG( - output->data_type() == DataType::U8 && - (input1->data_type() != DataType::U8 || input2->data_type() != DataType::U8), - "Output can only be U8 if both inputs are U8"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG( - detail::have_different_dimensions(out_shape, output->tensor_shape(), 0), - "Wrong shape for output"); - } - - return Status{}; -} - -std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input1, ITensorInfo *input2, - ITensorInfo *output) -{ - const std::pair<TensorShape, ValidRegion> broadcast_pair = - ITensorInfo::broadcast_shape_and_valid_region(*input1, *input2); - const TensorShape &out_shape = broadcast_pair.first; - const ValidRegion &valid_region = broadcast_pair.second; - - // Auto initialize output if not initialized - { - set_shape_if_empty(*output, out_shape); - - if (input1->data_type() == DataType::S16 || input2->data_type() == DataType::S16) - { - set_format_if_unknown(*output, Format::S16); - } - else if (input1->data_type() == DataType::F16 && input2->data_type() == DataType::F16) - { - set_format_if_unknown(*output, Format::F16); - } - else if (input1->data_type() == DataType::F32 || input2->data_type() == DataType::F32) - { - set_format_if_unknown(*output, Format::F32); - } - } - - Window win = calculate_max_window(valid_region, Steps(num_elems_processed_per_iteration)); - Window win_input1 = win.broadcast_if_dimension_le_one(*input1); - Window win_input2 = win.broadcast_if_dimension_le_one(*input2); - - AccessWindowHorizontal input1_access(input1, 0, num_elems_processed_per_iteration); - AccessWindowHorizontal input2_access(input2, 0, num_elems_processed_per_iteration); - AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration); - - bool window_changed = update_window_and_padding(win_input1, input1_access) || - update_window_and_padding(win_input2, input2_access) || - update_window_and_padding(win, output_access); - - output_access.set_valid_region(win, valid_region); - - Status err = (window_changed) - ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") - : Status{}; - return std::make_pair(err, win); -} -} // namespace - -CLArithmeticSubtractionExKernel::CLArithmeticSubtractionExKernel() - : _input1(nullptr), _input2(nullptr), _output(nullptr) -{ -} - -void CLArithmeticSubtractionExKernel::configure(const ICLTensor *input1, const ICLTensor *input2, - ICLTensor *output, ConvertPolicy policy) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output); - ARM_COMPUTE_ERROR_THROW_ON( - validate_arguments(input1->info(), input2->info(), output->info(), policy)); - - // Configure kernel window - auto win_config = validate_and_configure_window(input1->info(), input2->info(), output->info()); - ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - - _input1 = input1; - _input2 = input2; - _output = output; - - const bool has_float_out = is_data_type_float(output->info()->data_type()); - - // Set kernel build options - std::set<std::string> build_opts; - build_opts.emplace((policy == ConvertPolicy::WRAP || has_float_out) ? "-DWRAP" : "-DSATURATE"); - build_opts.emplace("-DDATA_TYPE_IN1=" + get_cl_type_from_data_type(input1->info()->data_type())); - build_opts.emplace("-DDATA_TYPE_IN2=" + get_cl_type_from_data_type(input2->info()->data_type())); - build_opts.emplace("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output->info()->data_type())); - - // Create kernel - _kernel = static_cast<cl::Kernel>( - CLKernelLibraryEx::get().create_kernel("arithmetic_sub_ex", build_opts)); - - ICLKernel::configure_internal(win_config.second); -} - -Status CLArithmeticSubtractionExKernel::validate(const ITensorInfo *input1, - const ITensorInfo *input2, - const ITensorInfo *output, ConvertPolicy policy) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output); - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input1, input2, output, policy)); - ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input1->clone().get(), - input2->clone().get(), - output->clone().get()) - .first); - - return Status{}; -} - -void CLArithmeticSubtractionExKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - const TensorShape &in_shape1 = _input1->info()->tensor_shape(); - const TensorShape &in_shape2 = _input2->info()->tensor_shape(); - const TensorShape &out_shape = _output->info()->tensor_shape(); - - bool can_collapse = true; - if (std::min(in_shape1.total_size(), in_shape2.total_size()) > 1) - { - can_collapse = - (std::min(in_shape1.num_dimensions(), in_shape2.num_dimensions()) > Window::DimZ); - for (size_t d = Window::DimZ; can_collapse && (d < out_shape.num_dimensions()); d++) - { - can_collapse = (in_shape1[d] == in_shape2[d]); - } - } - - bool has_collapsed = false; - Window collapsed = - can_collapse ? window.collapse_if_possible(ICLKernel::window(), Window::DimZ, &has_collapsed) - : window; - - const TensorShape &in_shape1_collapsed = - has_collapsed ? in_shape1.collapsed_from(Window::DimZ) : in_shape1; - const TensorShape &in_shape2_collapsed = - has_collapsed ? in_shape2.collapsed_from(Window::DimZ) : in_shape2; - - Window slice = collapsed.first_slice_window_3D(); - Window slice_input1 = slice.broadcast_if_dimension_le_one(in_shape1_collapsed); - Window slice_input2 = slice.broadcast_if_dimension_le_one(in_shape2_collapsed); - - do - { - unsigned int idx = 0; - - add_3D_tensor_argument(idx, _input1, slice_input1); - add_3D_tensor_argument(idx, _input2, slice_input2); - add_3D_tensor_argument(idx, _output, slice); - - enqueue(queue, *this, slice); - - collapsed.slide_window_slice_3D(slice_input1); - collapsed.slide_window_slice_3D(slice_input2); - } while (collapsed.slide_window_slice_3D(slice)); -} - -BorderSize CLArithmeticSubtractionExKernel::border_size() const -{ - const unsigned int replicateSize = - _output->info()->dimension(0) - - std::min(_input1->info()->dimension(0), _input2->info()->dimension(0)); - const unsigned int border = - std::min<unsigned int>(num_elems_processed_per_iteration - 1U, replicateSize); - return BorderSize(0, border, 0, 0); -} diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLBatchToSpaceNDKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLBatchToSpaceNDKernel.cpp deleted file mode 100644 index b0016d23c..000000000 --- a/libs/ARMComputeEx/src/core/CL/kernels/CLBatchToSpaceNDKernel.cpp +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/core/CL/kernels/CLBatchToSpaceNDKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" -#include "arm_compute/core/CL/ICLTensor.h" - -using namespace arm_compute; - -namespace -{ -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, - const int32_t *block_size) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8, - DataType::S16, DataType::S32, DataType::F16, - DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::QASYMM8, - DataType::S16, DataType::S32, DataType::F16, - DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(block_size[0] >= 1 && block_size[1] >= 1, - "Block size should be greater than or equal to 1."); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->dimension(2) == output->dimension(2), - "Input Depth should be equal to Output Depth"); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG( - output->dimension(3) * block_size[0] * block_size[1] == input->dimension(3), - "Input batch should be equal to (output batch * block size[0] *block size[1])"); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(!(output->dimension(0) % block_size[1]) && - !(output->dimension(1) % block_size[0]), - "Output height and width should be divisible by block size[0] " - "and block_size[1] respectively"); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG((output->dimension(0) == input->dimension(0) * block_size[1]) && - (output->dimension(1) == input->dimension(1) * block_size[0]), - "Output height and width should be equal to " - "input_height*blocksize[0] and input_width*blocksize[1] " - "respectively"); - - return Status{}; -} - -} // namespace - -CLBatchToSpaceNDKernel::CLBatchToSpaceNDKernel() : _input(nullptr), _output(nullptr) {} - -void CLBatchToSpaceNDKernel::configure(const ICLTensor *input, ICLTensor *output, - const int32_t *block_size) -{ - - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), block_size)); - - _input = input; - _output = output; - - // Set kernel build options - std::set<std::string> build_opts; - build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())); - build_opts.emplace("-DBLOCK_SIZE0=" + support::cpp11::to_string(block_size[0])); - build_opts.emplace("-DBLOCK_SIZE1=" + support::cpp11::to_string(block_size[1])); - build_opts.emplace("-DBATCH_OUT=" + support::cpp11::to_string(output->info()->dimension(3))); - build_opts.emplace("-DDEPTH_OUT=" + support::cpp11::to_string(output->info()->dimension(2))); - - // Create kernel - _kernel = static_cast<cl::Kernel>( - CLKernelLibraryEx::get().create_kernel("batch_to_space_nd", build_opts)); - - // Configure kernel window - Window win = calculate_max_window(*output->info(), Steps()); - - Coordinates coord; - coord.set_num_dimensions(output->info()->num_dimensions()); - output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape())); - - ICLKernel::configure_internal(win); -} - -void CLBatchToSpaceNDKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window); - - Window slice_in = window.first_slice_window_4D().collapse(ICLKernel::window(), 2, 4); - - // Setup output slice - Window slice_out(slice_in); - slice_out.set(Window::DimX, Window::Dimension(0, 0, 0)); - slice_out.set(Window::DimY, Window::Dimension(0, 0, 0)); - slice_out.set(Window::DimZ, Window::Dimension(0, 0, 0)); - slice_out.set(3, Window::Dimension(0, 0, 0)); - - do - { - unsigned int idx = 0; - add_4D_tensor_argument(idx, _input, slice_out); - add_4D_tensor_argument(idx, _output, slice_in); - enqueue(queue, *this, slice_in); - } while (window.slide_window_slice_4D(slice_out) && window.slide_window_slice_4D(slice_in)); -} diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLBinaryLogicalOpKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLBinaryLogicalOpKernel.cpp deleted file mode 100644 index 3d2f2c702..000000000 --- a/libs/ARMComputeEx/src/core/CL/kernels/CLBinaryLogicalOpKernel.cpp +++ /dev/null @@ -1,173 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" -#include "arm_compute/core/CL/ICLTensor.h" - -using namespace arm_compute; - -namespace -{ -constexpr unsigned int num_elems_processed_per_iteration = 16; - -Status validate_parameters(const ITensorInfo *input1, const ITensorInfo *input2, - const ITensorInfo *output) -{ - const TensorShape &out_shape = - TensorShape::broadcast_shape(input1->tensor_shape(), input2->tensor_shape()); - - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::QASYMM8); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::QASYMM8); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0, - "Inputs are not broadcast compatible"); - // Validate in case of configured output - if (output->total_size() > 0) - { - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, - DataType::QASYMM8); - ARM_COMPUTE_RETURN_ERROR_ON_MSG( - detail::have_different_dimensions(out_shape, output->tensor_shape(), 0), - "Wrong shape for output"); - } - return Status{}; -} -} // namespace - -CLBinaryLogicalOpKernel::CLBinaryLogicalOpKernel() - : _input1(nullptr), _input2(nullptr), _output(nullptr) -{ -} - -void CLBinaryLogicalOpKernel::configure(const ICLTensor *input1, const ICLTensor *input2, - ICLTensor *output, BinaryLogicalOperation op) -{ - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input1, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_parameters(input1->info(), input2->info(), output->info())); - - _input1 = input1; - _input2 = input2; - _output = output; - - // Create kernel - std::string kernel_name = "binary_logical_op"; - std::set<std::string> build_opts; - build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input1->info()->data_type()))); - - int op_code = 0; - switch (op) - { - case BinaryLogicalOperation::AND: - op_code = 1; - break; - case BinaryLogicalOperation::OR: - op_code = 2; - break; - default: - throw std::runtime_error("Operation not supported, yet"); - } - - build_opts.emplace(("-DOP_CODE=" + support::cpp11::to_string(op_code))); - build_opts.emplace( - ("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration))); - - _kernel = - static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts)); - - const std::pair<TensorShape, ValidRegion> broadcast_pair = - ITensorInfo::broadcast_shape_and_valid_region(*input1->info(), *input2->info()); - - const TensorShape &out_shape = broadcast_pair.first; - const ValidRegion &valid_region = broadcast_pair.second; - - Window win = calculate_max_window(valid_region, Steps(num_elems_processed_per_iteration)); - Window win_input1 = win.broadcast_if_dimension_le_one(*input1->info()); - Window win_input2 = win.broadcast_if_dimension_le_one(*input2->info()); - - AccessWindowHorizontal input1_access(input1->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal input2_access(input2->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - - bool window_changed = update_window_and_padding(win_input1, input1_access) || - update_window_and_padding(win_input2, input2_access) || - update_window_and_padding(win, output_access); - - output_access.set_valid_region(win, valid_region); - - ICLKernel::configure_internal(win); -} - -void CLBinaryLogicalOpKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - const TensorShape &in_shape1 = _input1->info()->tensor_shape(); - const TensorShape &in_shape2 = _input2->info()->tensor_shape(); - const TensorShape &out_shape = _output->info()->tensor_shape(); - - bool can_collapse = true; - if (std::min(in_shape1.total_size(), in_shape2.total_size()) > 1) - { - can_collapse = - (std::min(in_shape1.num_dimensions(), in_shape2.num_dimensions()) > Window::DimZ); - for (size_t d = Window::DimZ; can_collapse && (d < out_shape.num_dimensions()); d++) - { - can_collapse = (in_shape1[d] == in_shape2[d]); - } - } - - bool has_collapsed = false; - Window collapsed = - can_collapse ? window.collapse_if_possible(ICLKernel::window(), Window::DimZ, &has_collapsed) - : window; - - const TensorShape &in_shape1_collapsed = - has_collapsed ? in_shape1.collapsed_from(Window::DimZ) : in_shape1; - const TensorShape &in_shape2_collapsed = - has_collapsed ? in_shape2.collapsed_from(Window::DimZ) : in_shape2; - - Window slice = collapsed.first_slice_window_3D(); - Window slice_input1 = slice.broadcast_if_dimension_le_one(in_shape1_collapsed); - Window slice_input2 = slice.broadcast_if_dimension_le_one(in_shape2_collapsed); - - do - { - unsigned int idx = 0; - add_3D_tensor_argument(idx, _input1, slice_input1); - add_3D_tensor_argument(idx, _input2, slice_input2); - add_3D_tensor_argument(idx, _output, slice); - - enqueue(queue, *this, slice); - - collapsed.slide_window_slice_3D(slice_input1); - collapsed.slide_window_slice_3D(slice_input2); - } while (collapsed.slide_window_slice_3D(slice)); -} - -BorderSize CLBinaryLogicalOpKernel::border_size() const -{ - const unsigned int replicateSize = - _output->info()->dimension(0) - - std::min(_input1->info()->dimension(0), _input2->info()->dimension(0)); - const unsigned int border = - std::min<unsigned int>(num_elems_processed_per_iteration - 1U, replicateSize); - return BorderSize(0, border, 0, 0); -} diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLCastKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLCastKernel.cpp deleted file mode 100644 index bf7ebae3f..000000000 --- a/libs/ARMComputeEx/src/core/CL/kernels/CLCastKernel.cpp +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/core/CL/kernels/CLCastKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" -#include "arm_compute/core/CL/ICLTensor.h" - -using namespace arm_compute; - -CLCastKernel::CLCastKernel() : _input(nullptr), _output(nullptr) {} - -void CLCastKernel::configure(const ICLTensor *input, ICLTensor *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8, - DataType::S16, DataType::S32, DataType::F16, - DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::QASYMM8, - DataType::S16, DataType::S32, DataType::F16, - DataType::F32); - ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output); - - _input = input; - _output = output; - - constexpr unsigned int num_elems_processed_per_iteration = 16; - - // Set kernel build options - std::set<std::string> build_opts; - build_opts.emplace("-DDATA_TYPE_IN=" + get_cl_type_from_data_type(input->info()->data_type())); - build_opts.emplace("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output->info()->data_type())); - build_opts.emplace( - ("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration))); - - // Create kernel - if (is_data_type_quantized_asymmetric(input->info()->data_type())) - { - const float scale_in = input->info()->quantization_info().scale; - const int offset_in = input->info()->quantization_info().offset; - build_opts.emplace("-DSCALE=" + float_to_string_with_full_precision(scale_in)); - build_opts.emplace("-DOFFSET=" + support::cpp11::to_string(offset_in)); - - _kernel = static_cast<cl::Kernel>( - CLKernelLibraryEx::get().create_kernel("cast_qasymm_in", build_opts)); - } - else if (is_data_type_quantized_asymmetric(output->info()->data_type())) - { - const float scale_in = output->info()->quantization_info().scale; - const int offset_in = output->info()->quantization_info().offset; - build_opts.emplace("-DSCALE=" + float_to_string_with_full_precision(scale_in)); - build_opts.emplace("-DOFFSET=" + support::cpp11::to_string(offset_in)); - - _kernel = static_cast<cl::Kernel>( - CLKernelLibraryEx::get().create_kernel("cast_qasymm_out", build_opts)); - } - else - { - _kernel = static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel("cast", build_opts)); - } - - // Configure kernel window - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); - AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - update_window_and_padding(win, input_access, output_access); - output_access.set_valid_region(win, input->info()->valid_region()); - - ICLKernel::configure_internal(win); -} - -void CLCastKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - Window collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ); - Window slice = collapsed.first_slice_window_3D(); - - do - { - unsigned int idx = 0; - add_3D_tensor_argument(idx, _input, slice); - add_3D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice); - } while (collapsed.slide_window_slice_3D(slice)); -} diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLComparisonOpKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLComparisonOpKernel.cpp deleted file mode 100644 index 5af5b16ea..000000000 --- a/libs/ARMComputeEx/src/core/CL/kernels/CLComparisonOpKernel.cpp +++ /dev/null @@ -1,212 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/core/CL/kernels/CLComparisonOpKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" -#include "arm_compute/core/CL/ICLTensor.h" - -using namespace arm_compute; - -namespace -{ -constexpr unsigned int num_elems_processed_per_iteration = 16; - -Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2, - const ITensorInfo *output) -{ - const TensorShape &out_shape = - TensorShape::broadcast_shape(input1->tensor_shape(), input2->tensor_shape()); - - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::U16, - DataType::S16, DataType::F16, DataType::S32, - DataType::F32, DataType::QASYMM8); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::U16, - DataType::S16, DataType::F16, DataType::S32, - DataType::F32, DataType::QASYMM8); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0, - "Inputs are not broadcast compatible"); - // Validate in case of configured output - if (output->total_size() > 0) - { - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QASYMM8); - ARM_COMPUTE_RETURN_ERROR_ON_MSG( - detail::have_different_dimensions(out_shape, output->tensor_shape(), 0), - "Wrong shape for output"); - } - return Status{}; -} -} // namespace - -CLComparisonOpKernel::CLComparisonOpKernel() : _input1(nullptr), _input2(nullptr), _output(nullptr) -{ -} - -void CLComparisonOpKernel::configure(const ICLTensor *input1, const ICLTensor *input2, - ICLTensor *output, const ComparisonOperation &op) -{ - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input1->info(), input2->info(), output->info())); - - _input1 = input1; - _input2 = input2; - _output = output; - - // Create kernel - std::string kernel_name = "comparison_op"; - int op_code = 0; - - switch (op) - { - case ComparisonOperation::EQUAL: - op_code = 1; - break; - case ComparisonOperation::NOT_EQUAL: - op_code = 2; - break; - default: - throw std::runtime_error(" Operation not supported, yet"); - } - - std::set<std::string> build_opts; - build_opts.emplace(("-DOP_CODE=" + support::cpp11::to_string(op_code))); - build_opts.emplace(("-DDATA_TYPE_IN=" + get_cl_type_from_data_type(input1->info()->data_type()))); - build_opts.emplace( - ("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output->info()->data_type()))); - build_opts.emplace( - ("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration))); - - if (is_data_type_quantized_asymmetric(input1->info()->data_type()) && - ((input1->info()->quantization_info().offset != input2->info()->quantization_info().offset) || - (input1->info()->quantization_info().scale != input2->info()->quantization_info().scale))) - { - build_opts.emplace("-DOFFSET_IN1=" + - support::cpp11::to_string(input1->info()->quantization_info().offset)); - build_opts.emplace("-DOFFSET_IN2=" + - support::cpp11::to_string(input2->info()->quantization_info().offset)); - build_opts.emplace("-DSCALE_IN1=" + - support::cpp11::to_string(input1->info()->quantization_info().scale)); - build_opts.emplace("-DSCALE_IN2=" + - support::cpp11::to_string(input2->info()->quantization_info().scale)); - kernel_name += "_qasymm8"; - } - - _kernel = - static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts)); - - const std::pair<TensorShape, ValidRegion> broadcast_pair = - ITensorInfo::broadcast_shape_and_valid_region(*input1->info(), *input2->info()); - - const TensorShape &out_shape = broadcast_pair.first; - const ValidRegion &valid_region = broadcast_pair.second; - - // Auto initialize output if not initialized - { - set_shape_if_empty(*output->info(), out_shape); - - if (input1->info()->data_type() == DataType::S16 || - input2->info()->data_type() == DataType::S16) - { - set_format_if_unknown(*output->info(), Format::S16); - } - else if (input1->info()->data_type() == DataType::F16 && - input2->info()->data_type() == DataType::F16) - { - set_format_if_unknown(*output->info(), Format::F16); - } - else if (input1->info()->data_type() == DataType::F32 || - input2->info()->data_type() == DataType::F32) - { - set_format_if_unknown(*output->info(), Format::F32); - } - } - - Window win = calculate_max_window(valid_region, Steps(num_elems_processed_per_iteration)); - Window win_input1 = win.broadcast_if_dimension_le_one(*input1->info()); - Window win_input2 = win.broadcast_if_dimension_le_one(*input2->info()); - - AccessWindowHorizontal input1_access(input1->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal input2_access(input2->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - - bool window_changed = update_window_and_padding(win_input1, input1_access) || - update_window_and_padding(win_input2, input2_access) || - update_window_and_padding(win, output_access); - - output_access.set_valid_region(win, valid_region); - - ICLKernel::configure_internal(win); -} - -void CLComparisonOpKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - const TensorShape &in_shape1 = _input1->info()->tensor_shape(); - const TensorShape &in_shape2 = _input2->info()->tensor_shape(); - const TensorShape &out_shape = _output->info()->tensor_shape(); - - bool can_collapse = true; - if (std::min(in_shape1.total_size(), in_shape2.total_size()) > 1) - { - can_collapse = - (std::min(in_shape1.num_dimensions(), in_shape2.num_dimensions()) > Window::DimZ); - for (size_t d = Window::DimZ; can_collapse && (d < out_shape.num_dimensions()); d++) - { - can_collapse = (in_shape1[d] == in_shape2[d]); - } - } - - bool has_collapsed = false; - Window collapsed = - can_collapse ? window.collapse_if_possible(ICLKernel::window(), Window::DimZ, &has_collapsed) - : window; - - const TensorShape &in_shape1_collapsed = - has_collapsed ? in_shape1.collapsed_from(Window::DimZ) : in_shape1; - const TensorShape &in_shape2_collapsed = - has_collapsed ? in_shape2.collapsed_from(Window::DimZ) : in_shape2; - - Window slice = collapsed.first_slice_window_3D(); - Window slice_input1 = slice.broadcast_if_dimension_le_one(in_shape1_collapsed); - Window slice_input2 = slice.broadcast_if_dimension_le_one(in_shape2_collapsed); - - do - { - unsigned int idx = 0; - add_3D_tensor_argument(idx, _input1, slice_input1); - add_3D_tensor_argument(idx, _input2, slice_input2); - add_3D_tensor_argument(idx, _output, slice); - - enqueue(queue, *this, slice); - - collapsed.slide_window_slice_3D(slice_input1); - collapsed.slide_window_slice_3D(slice_input2); - } while (collapsed.slide_window_slice_3D(slice)); -} - -BorderSize CLComparisonOpKernel::border_size() const -{ - const unsigned int replicateSize = - _output->info()->dimension(0) - - std::min(_input1->info()->dimension(0), _input2->info()->dimension(0)); - const unsigned int border = - std::min<unsigned int>(num_elems_processed_per_iteration - 1U, replicateSize); - return BorderSize(0, border, 0, 0); -} diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLDepthToSpaceKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLDepthToSpaceKernel.cpp deleted file mode 100644 index c386e3312..000000000 --- a/libs/ARMComputeEx/src/core/CL/kernels/CLDepthToSpaceKernel.cpp +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" -#include "arm_compute/core/CL/ICLTensor.h" - -using namespace arm_compute; - -namespace -{ -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, - const int32_t block_size) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8, - DataType::S16, DataType::S32, DataType::F16, - DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::QASYMM8, - DataType::S16, DataType::S32, DataType::F16, - DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(block_size >= 1, - "Block size should be greater than or equal to 1."); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(0) == input->dimension(0) * block_size, - "Output width should be equal to (Input width * block size)"); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(1) == input->dimension(1) * block_size, - "Output height should be equal to (Input height * block size)"); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->dimension(2) % (block_size * block_size) == 0, - "Input depth should be divisible by (block size * block size)"); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG( - output->dimension(2) == input->dimension(2) / (block_size * block_size), - "Output depth should be equal to (Input depth / (block size * block size))"); - - return Status{}; -} -} // namespace - -CLDepthToSpaceKernel::CLDepthToSpaceKernel() : _input(nullptr), _output(nullptr) -{ - // DO NOTHING -} - -void CLDepthToSpaceKernel::configure(const ICLTensor *input, ICLTensor *output, - const int32_t block_size) -{ - - _input = input; - _output = output; - - // Set kernel build options - std::set<std::string> build_opts; - build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())); - build_opts.emplace("-DBLOCK_SIZE=" + support::cpp11::to_string(block_size)); - build_opts.emplace("-DDEPTH_OUT=" + support::cpp11::to_string(output->info()->dimension(2))); - - // Create kernel - _kernel = - static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel("depth_to_space", build_opts)); - - // Configure kernel window - Window win = calculate_max_window(*output->info(), Steps()); - - Coordinates coord; - coord.set_num_dimensions(output->info()->num_dimensions()); - output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape())); - - ICLKernel::configure_internal(win); -} - -void CLDepthToSpaceKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window); - - Window slice_out = window.first_slice_window_4D().collapse(ICLKernel::window(), 2, 4); - - // Setup input slice - Window slice_in(slice_out); - slice_in.set(Window::DimX, Window::Dimension(0, 0, 0)); - slice_in.set(Window::DimY, Window::Dimension(0, 0, 0)); - slice_in.set(Window::DimZ, Window::Dimension(0, 0, 0)); - slice_in.set(3, Window::Dimension(0, 0, 0)); - - do - { - unsigned int idx = 0; - add_4D_tensor_argument(idx, _input, slice_in); - add_4D_tensor_argument(idx, _output, slice_out); - enqueue(queue, *this, slice_out); - } while (window.slide_window_slice_4D(slice_in) && window.slide_window_slice_4D(slice_out)); -} diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLEmbeddingLookupKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLEmbeddingLookupKernel.cpp deleted file mode 100644 index 0862b78bf..000000000 --- a/libs/ARMComputeEx/src/core/CL/kernels/CLEmbeddingLookupKernel.cpp +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" -#include "arm_compute/core/CL/ICLTensor.h" - -using namespace arm_compute; - -namespace -{ -constexpr unsigned int num_elems_processed_per_iteration = 16; - -std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output) -{ - Window win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration)); - AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration); - AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration); - - bool window_changed = update_window_and_padding(win, input_access, output_access); - input_access.set_valid_region(win, output->valid_region()); - - Status err = (window_changed) - ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") - : Status{}; - return std::make_pair(err, win); -} -} // namespace - -CLEmbeddingLookupKernel::CLEmbeddingLookupKernel() - : _input(nullptr), _output(nullptr), _lookups(nullptr) -{ -} - -Status CLEmbeddingLookupKernel::validate(const ITensorInfo *input, const ITensorInfo *output, - const ITensorInfo *lookups) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, lookups); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN( - input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16, - DataType::U32, DataType::S32, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lookups, 1, DataType::S32); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - - ARM_COMPUTE_ERROR_ON(input->num_dimensions() < 2 && input->num_dimensions() > 4); - ARM_COMPUTE_ERROR_ON(lookups->num_dimensions() > 1); - - return Status{}; -} - -void CLEmbeddingLookupKernel::configure(const ICLTensor *input, ICLTensor *output, - const ICLTensor *lookups) -{ - ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), output->info(), lookups->info())); - - _input = input; - _output = output; - _lookups = lookups; - - // Set kernel build options - std::stringstream kernel_name; - std::set<std::string> build_opts; - kernel_name << "embedding_lookup"; - - build_opts.emplace("-DDEPTH_OUT=" + support::cpp11::to_string(output->info()->dimension(2))); - build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())); - build_opts.emplace("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)); - build_opts.emplace("-DNUM_DIMS=" + support::cpp11::to_string(_input->info()->num_dimensions())); - - // Create kernel - _kernel = static_cast<cl::Kernel>( - CLKernelLibraryEx::get().create_kernel(kernel_name.str(), build_opts)); - - // Configure kernel window - auto win_config = validate_and_configure_window(input->info(), output->info()); - ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure_internal(win_config.second); -} - -void CLEmbeddingLookupKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - Window slice_in = window.first_slice_window_4D().collapse(ICLKernel::window(), 2, 4); - - Window win_lookup; - win_lookup.set(Window::DimX, Window::Dimension(0, 0, 0)); - - do - { - unsigned int idx = 0; - add_4D_tensor_argument(idx, _input, slice_in); - add_4D_tensor_argument(idx, _output, slice_in); - add_1D_tensor_argument(idx, _lookups, win_lookup); - - enqueue(queue, *this, slice_in); - } while (window.slide_window_slice_4D(slice_in) && window.slide_window_slice_1D(win_lookup)); -} diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLExpKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLExpKernel.cpp deleted file mode 100644 index b1ee21bdc..000000000 --- a/libs/ARMComputeEx/src/core/CL/kernels/CLExpKernel.cpp +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/core/CL/kernels/CLExpKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" -#include "arm_compute/core/CL/ICLTensor.h" - -using namespace arm_compute; - -CLExpKernel::CLExpKernel() : _input(nullptr), _output(nullptr) {} - -void CLExpKernel::configure(const ICLTensor *input, ICLTensor *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - - // Auto initialize output - auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type(), - input->info()->quantization_info()); - - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); - ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - - _input = input; - _output = output; - - constexpr unsigned int num_elems_processed_per_iteration = 4; - - // Create kernel - std::set<std::string> build_opts; - build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()))); - build_opts.emplace( - ("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration))); - _kernel = - static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel("exp_layer", build_opts)); - - // Configure kernel window - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); - AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - update_window_and_padding(win, input_access, output_access); - output_access.set_valid_region(win, input->info()->valid_region()); - - ICLKernel::configure_internal(win); -} - -void CLExpKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - Window collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ); - Window slice = collapsed.first_slice_window_3D(); - - do - { - unsigned int idx = 0; - add_3D_tensor_argument(idx, _input, slice); - add_3D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice); - } while (collapsed.slide_window_slice_3D(slice)); -} diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLGatherKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLGatherKernel.cpp deleted file mode 100644 index ae2801e2b..000000000 --- a/libs/ARMComputeEx/src/core/CL/kernels/CLGatherKernel.cpp +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/core/CL/kernels/CLGatherKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" -#include "arm_compute/core/CL/ICLTensor.h" - -using namespace arm_compute; - -namespace -{ -constexpr unsigned int num_elems_processed_per_iteration = 1; - -Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2, - const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::S32, - DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::S32); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S32, - DataType::F32); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input1, output); - - return Status{}; -} - -} // namespace - -CLGatherKernel::CLGatherKernel() : _input1(nullptr), _input2(nullptr), _output(nullptr) {} - -void CLGatherKernel::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input1->info(), input2->info(), output->info())); - - _input1 = input1; - _input2 = input2; - _output = output; - - // Construct kernel name - std::string kernel_name = "gather"; - if (input1->info()->num_dimensions() == 1) - { - kernel_name = "gather_1d"; - } - else if (input1->info()->num_dimensions() == 2) - { - if (_output->info()->num_dimensions() == 1) - { - kernel_name = "gather_1d_out"; - } - } - - // Set kernel build options - std::set<std::string> build_opts; - build_opts.emplace("-DDATA_TYPE_IN1=" + get_cl_type_from_data_type(input1->info()->data_type())); - build_opts.emplace("-DDATA_TYPE_IN2=" + get_cl_type_from_data_type(input2->info()->data_type())); - build_opts.emplace("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output->info()->data_type())); - - // Create kernel - _kernel = - static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts)); - - // Configure kernel window - Window win = calculate_max_window(*input2->info(), Steps(num_elems_processed_per_iteration)); - output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape())); - - ICLKernel::configure_internal(win); -} - -Status CLGatherKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, - const ITensorInfo *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output); - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input1, input2, output)); - - return Status{}; -} - -void CLGatherKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - if (_input1->info()->num_dimensions() == 1) - { - Window slice = window.first_slice_window_1D(); - - unsigned int idx = 0; - add_1D_tensor_argument(idx, _input1, slice); - add_1D_tensor_argument(idx, _input2, slice); - add_1D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice); - } - else if (_input1->info()->num_dimensions() == 2) - { - Window window_collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimY); - Window slice = window.collapse_if_possible(ICLKernel::window(), Window::DimX); - - // Set inputs - unsigned int idx = 0; - add_2D_tensor_argument(idx, _input1, window_collapsed); - add_1D_tensor_argument(idx, _input2, slice); - if (_output->info()->num_dimensions() == 1) - { - add_1D_tensor_argument(idx, _output, slice); - } - else - { - add_2D_tensor_argument(idx, _output, window_collapsed); - } - enqueue(queue, *this, slice); - } -} diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLHashtableLookupKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLHashtableLookupKernel.cpp deleted file mode 100644 index cd7b21c6d..000000000 --- a/libs/ARMComputeEx/src/core/CL/kernels/CLHashtableLookupKernel.cpp +++ /dev/null @@ -1,177 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/core/CL/kernels/CLHashtableLookupKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" -#include "arm_compute/core/CL/ICLTensor.h" - -using namespace arm_compute; - -namespace -{ -constexpr unsigned int num_elems_processed_per_iteration = 16; - -std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output) -{ - Window win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration)); - AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration); - AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration); - - bool window_changed = update_window_and_padding(win, input_access, output_access); - input_access.set_valid_region(win, output->valid_region()); - - Status err = (window_changed) - ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") - : Status{}; - return std::make_pair(err, win); -} -} // namespace - -CLHashtableLookupKernel::CLHashtableLookupKernel() - : _input(nullptr), _output(nullptr), _lookups(nullptr) -{ -} - -Status CLHashtableLookupKernel::validate(const ITensorInfo *lookups, const ITensorInfo *keys, - const ITensorInfo *input, const ITensorInfo *output, - const ITensorInfo *hits) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(lookups, keys, input, output, hits); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN( - input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16, - DataType::U32, DataType::S32, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lookups, 1, DataType::S32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(keys, 1, DataType::S32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(hits, 1, DataType::U8, DataType::QASYMM8); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->tensor_shape().total_size() == 0, - "Output's shape was not set"); - - ARM_COMPUTE_ERROR_ON(lookups->dimensions(0) == hits->dimensions(0) && - output->dimension(output->num_dimensions() - 1) == lookups->dimension(0)); - ARM_COMPUTE_ERROR_ON(input->num_dimensions() < 2 && input->num_dimensions() > 4); - ARM_COMPUTE_ERROR_ON(lookups->num_dimensions() > 1); - ARM_COMPUTE_ERROR_ON(keys->num_dimensions() > 1); - ARM_COMPUTE_ERROR_ON(hits->num_dimensions() > 1); - - return Status{}; -} - -void CLHashtableLookupKernel::configure(const ICLTensor *lookups, const ICLTensor *keys, - const ICLTensor *input, ICLTensor *output, ICLTensor *hits) -{ - ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), output->info(), lookups->info())); - - _lookups = lookups; - _keys = keys; - _input = input; - _output = output; - _hits = hits; - - // Make _lookup_indices tensor - _lookup_indices = arm_compute::support::cpp14::make_unique<CLTensor>(); - _lookup_indices->allocator()->init( - TensorInfo(lookups->info()->tensor_shape(), lookups->info()->num_channels(), DataType::S32)); - _lookup_indices->allocator()->allocate(); - - // Set kernel build options - std::stringstream kernel_name; - std::set<std::string> build_opts; - kernel_name << "hashtable_lookup"; - - build_opts.emplace("-DDEPTH_OUT=" + support::cpp11::to_string(output->info()->dimension(2))); - build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())); - build_opts.emplace("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)); - build_opts.emplace("-DNUM_DIMS=" + support::cpp11::to_string(_input->info()->num_dimensions())); - - // Create kernel - _kernel = static_cast<cl::Kernel>( - CLKernelLibraryEx::get().create_kernel(kernel_name.str(), build_opts)); - - // Configure kernel window - auto win_config = validate_and_configure_window(input->info(), output->info()); - ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure_internal(win_config.second); -} - -void CLHashtableLookupKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - const_cast<ICLTensor *>(_lookups)->map(queue); - const_cast<ICLTensor *>(_keys)->map(queue); - _hits->map(queue); - _lookup_indices->map(queue); - - // Set values of hits - const int32_t *lookups_buf = - reinterpret_cast<int32_t *>(const_cast<ICLTensor *>(_lookups)->buffer()); - const int32_t *keys_buf = reinterpret_cast<int32_t *>(const_cast<ICLTensor *>(_keys)->buffer()); - uint8_t *hits_buf = reinterpret_cast<uint8_t *>(_hits->buffer()); - int32_t *lookup_indices_buf = reinterpret_cast<int32_t *>(_lookup_indices->buffer()); - - std::map<int32_t, size_t> key_map; - const size_t keys_num = _keys->info()->dimension(0); - for (size_t key_index = 0; key_index < keys_num; key_index++) - { - key_map[keys_buf[key_index]] = key_index; - } - - const size_t lookups_num = _lookups->info()->dimension(0); - for (size_t i = 0; i < lookups_num; ++i) - { - const auto lookup_value = lookups_buf[i]; - const auto it = key_map.find(lookup_value); - if (it != key_map.end()) - { -#if defined(DEBUG) - if (it->second >= lookups_num) - ARM_COMPUTE_ERROR("HashTable Lookup: index out of bounds."); -#endif // defined(DEBUG) - lookup_indices_buf[i] = static_cast<int32_t>(it->second); - hits_buf[i] = static_cast<uint8_t>(1); - } - else - { - lookup_indices_buf[i] = -1; - hits_buf[i] = static_cast<uint8_t>(0); - } - } - - const_cast<ICLTensor *>(_lookups)->unmap(queue); - const_cast<ICLTensor *>(_keys)->unmap(queue); - _hits->unmap(queue); - _lookup_indices->unmap(queue); - - Window win = window.collapse(ICLKernel::window(), 2, 4); - - Window win_lookup; - win_lookup.set(Window::DimX, Window::Dimension(0, 0, 0)); - - do - { - unsigned int idx = 0; - add_4D_tensor_argument(idx, _input, win); - add_4D_tensor_argument(idx, _output, win); - add_1D_tensor_argument(idx, _lookup_indices.get(), win_lookup); - - enqueue(queue, *this, win); - } while (window.slide_window_slice_4D(win) && window.slide_window_slice_1D(win_lookup)); -} diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLNegKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLNegKernel.cpp deleted file mode 100644 index 80d99dd3b..000000000 --- a/libs/ARMComputeEx/src/core/CL/kernels/CLNegKernel.cpp +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/core/CL/kernels/CLNegKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" -#include "arm_compute/core/CL/ICLTensor.h" - -using namespace arm_compute; - -namespace -{ -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S16, DataType::S32, - DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::S16, DataType::S32, - DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(input->info()->tensor_shape(), - output->info()->tensor_shape()); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - - return Status{}; -} - -} // namespace - -CLNegKernel::CLNegKernel() : _input(nullptr), _output(nullptr) {} - -void CLNegKernel::configure(const ICLTensor *input, ICLTensor *output) -{ - - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info())); - - _input = input; - _output = output; - - constexpr unsigned int num_elems_processed_per_iteration = 16; - - // Create kernel - std::set<std::string> build_opts; - build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()))); - build_opts.emplace( - ("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration))); - _kernel = - static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel("neg_tensor", build_opts)); - - // Configure window - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); - - AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - update_window_and_padding(win, input_access, output_access); - output_access.set_valid_region(win, input->info()->valid_region()); - - ICLKernel::configure_internal(win); -} - -void CLNegKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - Window collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ); - Window slice = collapsed.first_slice_window_3D(); - - do - { - unsigned int idx = 0; - add_3D_tensor_argument(idx, _input, slice); - add_3D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice, lws_hint()); - } while (collapsed.slide_window_slice_3D(slice)); -} diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLNormalizationLayerExKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLNormalizationLayerExKernel.cpp deleted file mode 100644 index 12bbe910f..000000000 --- a/libs/ARMComputeEx/src/core/CL/kernels/CLNormalizationLayerExKernel.cpp +++ /dev/null @@ -1,166 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/core/CL/kernels/CLNormalizationLayerExKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLTensor.h" - -using namespace arm_compute; - -namespace -{ -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, - NormalizationLayerInfo norm_info) -{ - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output); - - // Checks performed when output is configured - if (output->total_size() != 0) - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); - } - - return Status{}; -} - -std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, - NormalizationLayerInfo norm_info) -{ - // Output tensor auto initialization if not yet initialized - auto_init_if_empty(*output, *input->clone()); - - const unsigned int norm_size = norm_info.norm_size(); - bool is_in_map = norm_info.is_in_map(); - - const unsigned int border_width = is_in_map ? std::min(norm_size / 2, 3U) : 0; - const BorderSize border_size = BorderSize(0, border_width); - - const unsigned int num_elems_processed_per_iteration = 4; - const unsigned int num_elems_read_per_iteration = - is_in_map ? (num_elems_processed_per_iteration + 2 * (norm_size / 2)) - : num_elems_processed_per_iteration; - - Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration)); - - // We do not use a Rectangle window for IN_MAP_2D as we clamp the top and bottom accesses inside - // the kernel, avoiding padding - AccessWindowHorizontal input_access(input, -border_size.left, num_elems_read_per_iteration); - AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration); - - bool window_changed = update_window_and_padding(win, input_access, output_access); - - output_access.set_valid_region(win, input->valid_region()); - - Status err = (window_changed) - ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") - : Status{}; - return std::make_pair(err, win); -} -} // namespace - -CLNormalizationLayerExKernel::CLNormalizationLayerExKernel() - : _input(nullptr), _output(nullptr), _border_size(0), _is_in_map(false) -{ -} - -BorderSize CLNormalizationLayerExKernel::border_size() const { return _border_size; } - -void CLNormalizationLayerExKernel::configure(const ICLTensor *input, ICLTensor *output, - NormalizationLayerInfo norm_info) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - - // Output tensor auto initialization if not yet initialized - auto_init_if_empty(*output->info(), *input->info()->clone()); - - // Perform validation step - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), norm_info)); - - _input = input; - _output = output; - - const unsigned int num_elems_processed_per_iteration = 4; - const bool is_in_map_2D = (norm_info.type() == NormType::IN_MAP_2D); - - // Set build options - CLBuildOptions build_opts; - build_opts.add_option(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()))); - build_opts.add_option( - ("-DCOEFF=" + float_to_string_with_full_precision(norm_info.scale_coeff()))); - build_opts.add_option(("-DBETA=" + float_to_string_with_full_precision(norm_info.beta()))); - build_opts.add_option(("-DKAPPA=" + float_to_string_with_full_precision(norm_info.kappa()))); - build_opts.add_option( - ("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration))); - build_opts.add_option(("-DRADIUS=" + support::cpp11::to_string(norm_info.norm_size()))); - build_opts.add_option(("-DNUM_SLICES=" + support::cpp11::to_string(input->info()->dimension(2)))); - build_opts.add_option_if(is_in_map_2D, "-DIN_MAP_2D"); - - // Create kernel - std::string kernel_name = - _is_in_map ? "normalization_layer_in_map" : "normalization_layer_cross_map"; - _kernel = static_cast<cl::Kernel>( - CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); - - // Configure kernel window - auto win_config = validate_and_configure_window(input->info(), output->info(), norm_info); - ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure_internal(win_config.second); - - // Set config_id for enabling LWS tuning - _config_id = "normalization_layer_"; - _config_id += lower_string(string_from_data_type(input->info()->data_type())); - _config_id += "_"; - _config_id += support::cpp11::to_string( - static_cast<std::underlying_type<NormType>::type>(norm_info.type())); - _config_id += "_"; - _config_id += support::cpp11::to_string(norm_info.norm_size()); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(1)); -} - -Status CLNormalizationLayerExKernel::validate(const ITensorInfo *input, const ITensorInfo *output, - NormalizationLayerInfo norm_info) -{ - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, norm_info)); - ARM_COMPUTE_RETURN_ON_ERROR( - validate_and_configure_window(input->clone().get(), output->clone().get(), norm_info).first); - - return Status{}; -} - -void CLNormalizationLayerExKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - const int collapsed_dimension = _is_in_map ? Window::DimZ : 4; - Window window_collapsed = window.collapse_if_possible(ICLKernel::window(), collapsed_dimension); - Window slice = window_collapsed.first_slice_window_3D(); - - do - { - unsigned int idx = 0; - add_3D_tensor_argument(idx, _input, slice); - add_3D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice); - } while (window_collapsed.slide_window_slice_3D(slice)); -} diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLPReLUKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLPReLUKernel.cpp deleted file mode 100644 index 241f8ae4d..000000000 --- a/libs/ARMComputeEx/src/core/CL/kernels/CLPReLUKernel.cpp +++ /dev/null @@ -1,185 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/core/CL/kernels/CLPReLUKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" -#include "arm_compute/core/CL/ICLTensor.h" - -using namespace arm_compute; - -namespace -{ -constexpr unsigned int num_elems_processed_per_iteration = 16; - -Status validate_info(const ITensorInfo *input, const ITensorInfo *alpha, const ITensorInfo *output) -{ - const TensorShape &out_shape = - TensorShape::broadcast_shape(input->tensor_shape(), alpha->tensor_shape()); - - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32, - DataType::QASYMM8); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(alpha, 1, DataType::F16, DataType::F32, - DataType::QASYMM8); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0, - "Inputs are not broadcast compatible"); - // Validate in case of configured output - if (output->total_size() > 0) - { - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_MSG( - detail::have_different_dimensions(out_shape, output->tensor_shape(), 0), - "Wrong shape for output"); - } - return Status{}; -} -} // namespace - -CLPReLUKernel::CLPReLUKernel() : _input(nullptr), _alpha(nullptr), _output(nullptr) {} - -void CLPReLUKernel::configure(const ICLTensor *input, const ICLTensor *alpha, ICLTensor *output) -{ - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, alpha); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), alpha->info(), output->info())); - - _input = input; - _alpha = alpha; - _output = output; - - // Create kernel - std::string kernel_name = "prelu"; - std::set<std::string> build_opts; - build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()))); - build_opts.emplace( - ("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration))); - - if (is_data_type_quantized_asymmetric(input->info()->data_type())) - { - build_opts.emplace("-DOFF_IN1=" + - support::cpp11::to_string(input->info()->quantization_info().offset)); - build_opts.emplace("-DOFF_IN2=" + - support::cpp11::to_string(alpha->info()->quantization_info().offset)); - build_opts.emplace("-DOFF_OUT=" + - support::cpp11::to_string(output->info()->quantization_info().offset)); - build_opts.emplace("-DSCALE_IN1=" + - support::cpp11::to_string(input->info()->quantization_info().scale)); - build_opts.emplace("-DSCALE_IN2=" + - support::cpp11::to_string(alpha->info()->quantization_info().scale)); - build_opts.emplace("-DSCALE_OUT=" + - support::cpp11::to_string(output->info()->quantization_info().scale)); - kernel_name += "_qasymm8"; - } - _kernel = - static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts)); - - const std::pair<TensorShape, ValidRegion> broadcast_pair = - ITensorInfo::broadcast_shape_and_valid_region(*input->info(), *alpha->info()); - - const TensorShape &out_shape = broadcast_pair.first; - const ValidRegion &valid_region = broadcast_pair.second; - - // Auto initialize output if not initialized - { - set_shape_if_empty(*output->info(), out_shape); - - if (input->info()->data_type() == DataType::F16 && alpha->info()->data_type() == DataType::F16) - { - set_format_if_unknown(*output->info(), Format::F16); - } - else if (input->info()->data_type() == DataType::F32 || - alpha->info()->data_type() == DataType::F32) - { - set_format_if_unknown(*output->info(), Format::F32); - } - } - - Window win = calculate_max_window(valid_region, Steps(num_elems_processed_per_iteration)); - Window win_input1 = win.broadcast_if_dimension_le_one(*input->info()); - Window win_input2 = win.broadcast_if_dimension_le_one(*alpha->info()); - - AccessWindowHorizontal input1_access(input->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal input2_access(alpha->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - - bool window_changed = update_window_and_padding(win_input1, input1_access) || - update_window_and_padding(win_input2, input2_access) || - update_window_and_padding(win, output_access); - - output_access.set_valid_region(win, valid_region); - - ICLKernel::configure_internal(win); -} - -void CLPReLUKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - const TensorShape &in_shape1 = _input->info()->tensor_shape(); - const TensorShape &in_shape2 = _alpha->info()->tensor_shape(); - const TensorShape &out_shape = _output->info()->tensor_shape(); - - bool can_collapse = true; - if (std::min(in_shape1.total_size(), in_shape2.total_size()) > 1) - { - can_collapse = - (std::min(in_shape1.num_dimensions(), in_shape2.num_dimensions()) > Window::DimZ); - for (size_t d = Window::DimZ; can_collapse && (d < out_shape.num_dimensions()); d++) - { - can_collapse = (in_shape1[d] == in_shape2[d]); - } - } - - bool has_collapsed = false; - Window collapsed = - can_collapse ? window.collapse_if_possible(ICLKernel::window(), Window::DimZ, &has_collapsed) - : window; - - const TensorShape &in_shape1_collapsed = - has_collapsed ? in_shape1.collapsed_from(Window::DimZ) : in_shape1; - const TensorShape &in_shape2_collapsed = - has_collapsed ? in_shape2.collapsed_from(Window::DimZ) : in_shape2; - - Window slice = collapsed.first_slice_window_3D(); - Window slice_input1 = slice.broadcast_if_dimension_le_one(in_shape1_collapsed); - Window slice_input2 = slice.broadcast_if_dimension_le_one(in_shape2_collapsed); - - do - { - unsigned int idx = 0; - add_3D_tensor_argument(idx, _input, slice_input1); - add_3D_tensor_argument(idx, _alpha, slice_input2); - add_3D_tensor_argument(idx, _output, slice); - - enqueue(queue, *this, slice); - - collapsed.slide_window_slice_3D(slice_input1); - collapsed.slide_window_slice_3D(slice_input2); - } while (collapsed.slide_window_slice_3D(slice)); -} - -BorderSize CLPReLUKernel::border_size() const -{ - const unsigned int replicateSize = - _output->info()->dimension(0) - - std::min(_input->info()->dimension(0), _alpha->info()->dimension(0)); - const unsigned int border = - std::min<unsigned int>(num_elems_processed_per_iteration - 1U, replicateSize); - return BorderSize(0, border, 0, 0); -} diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLPadLayerKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLPadLayerKernel.cpp deleted file mode 100644 index 99b54c822..000000000 --- a/libs/ARMComputeEx/src/core/CL/kernels/CLPadLayerKernel.cpp +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/core/CL/kernels/CLPadLayerKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" -#include "arm_compute/core/CL/ICLTensor.h" - -using namespace arm_compute; - -namespace -{ -Status validate_arguments(const ITensorInfo *input_info, const ITensorInfo *output_info, - const ITensorInfo *pad_size_info) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input_info, 1, DataType::U8, DataType::QASYMM8, - DataType::S16, DataType::S32, DataType::F16, - DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_info, 1, DataType::U8, DataType::QASYMM8, - DataType::S16, DataType::S32, DataType::F16, - DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(pad_size_info, 1, DataType::S32); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input_info->num_dimensions() > 0 && - input_info->num_dimensions() <= 4, - "Pad kernel supports upto 4-D input tensor"); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG( - input_info->num_dimensions() == output_info->num_dimensions(), - "output tensor should have same number of dimensions as input tensor"); - - if (input_info->data_type() == DataType::QASYMM8) - { - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input_info->quantization_info() != - output_info->quantization_info(), - "The input and output quantization info are different!"); - } - - return Status{}; -} - -} // namespace - -CLPadLayerKernel::CLPadLayerKernel() : _input(nullptr), _output(nullptr), _pad_size(nullptr) {} - -void CLPadLayerKernel::configure(const ICLTensor *input, ICLTensor *output, ICLTensor *pad_size) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, pad_size); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), pad_size->info())); - - _input = input; - _output = output; - _pad_size = pad_size; - - // Set kernel build options - std::set<std::string> build_opts; - build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())); - build_opts.emplace("-DDEPTH_OUT=" + support::cpp11::to_string(output->info()->dimension(2))); - build_opts.emplace("-DIB=" + support::cpp11::to_string(input->info()->dimension(3))); - build_opts.emplace("-DIW=" + support::cpp11::to_string(input->info()->dimension(0))); - build_opts.emplace("-DIH=" + support::cpp11::to_string(input->info()->dimension(1))); - build_opts.emplace("-DID=" + support::cpp11::to_string(input->info()->dimension(2))); - if (input->info()->data_type() == DataType::QASYMM8) - { - build_opts.emplace("-DZERO_VALUE=" + - support::cpp11::to_string(input->info()->quantization_info().offset)); - } - else - { - build_opts.emplace("-DZERO_VALUE=" + support::cpp11::to_string(0)); - } - - // Create kernel - _kernel = static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel("pad", build_opts)); - - // Configure kernel window - Window win = calculate_max_window(*output->info(), Steps()); - - Coordinates coord; - coord.set_num_dimensions(output->info()->num_dimensions()); - output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape())); - - ICLKernel::configure_internal(win); -} - -void CLPadLayerKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window); - - _pad_size->map(queue); - - // Padding values only for up, top, left and front are required based on the rank of tensor - int rank = _pad_size->info()->dimension(1); - - auto pad_batch_up = - (rank == 4) ? *reinterpret_cast<const int32_t *>(_pad_size->ptr_to_element({0, 0})) : 0; - auto pad_height_top = - (rank >= 2) - ? *reinterpret_cast<const int32_t *>(_pad_size->ptr_to_element({0, (rank == 2) ? 0 : 1})) - : 0; - auto pad_width_left = (rank >= 1) - ? *reinterpret_cast<const int32_t *>( - _pad_size->ptr_to_element({0, (rank == 4) ? 2 : rank - 1})) - : 0; - auto pad_depth_front = - (rank >= 3) - ? *reinterpret_cast<const int32_t *>(_pad_size->ptr_to_element({0, (rank == 3) ? 0 : 3})) - : 0; - - _pad_size->unmap(queue); - - // Pad_values which needs to be passed - const cl_int4 paddingValues = { - {static_cast<cl_int>(pad_width_left), static_cast<cl_int>(pad_height_top), - static_cast<cl_int>(pad_depth_front), static_cast<cl_int>(pad_batch_up)}}; - - Window slice_out = window.first_slice_window_4D().collapse(ICLKernel::window(), 2, 4); - - // Setup output slice - Window slice_in(slice_out); - slice_in.set(Window::DimX, Window::Dimension(0, 0, 0)); - slice_in.set(Window::DimY, Window::Dimension(0, 0, 0)); - slice_in.set(Window::DimZ, Window::Dimension(0, 0, 0)); - slice_in.set(3, Window::Dimension(0, 0, 0)); - - do - { - unsigned int idx = 0; - add_4D_tensor_argument(idx, _input, slice_in); - add_4D_tensor_argument(idx, _output, slice_out); - _kernel.setArg<cl_int4>(idx++, paddingValues); - enqueue(queue, *this, slice_out); - } while (window.slide_window_slice_4D(slice_out) && window.slide_window_slice_4D(slice_in)); -} diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLPermuteExKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLPermuteExKernel.cpp deleted file mode 100644 index aa094761c..000000000 --- a/libs/ARMComputeEx/src/core/CL/kernels/CLPermuteExKernel.cpp +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/core/CL/kernels/CLPermuteExKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" - -using namespace arm_compute; - -namespace -{ -TensorShape get_output_shape(const ITensorInfo *input, const PermutationVector &perm) -{ - TensorShape output_shape = input->tensor_shape(); - permute(output_shape, perm); - return output_shape; -} - -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, - const PermutationVector &perm) -{ - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN( - input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16, - DataType::U32, DataType::S32, DataType::F16, DataType::F32); - - const TensorShape output_shape = - misc::shape_calculator::compute_permutation_output_shape(*input, perm); - - // Validate configured output - if (output->total_size() != 0) - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - } - return Status{}; -} -} // namespace - -CLPermuteExKernel::CLPermuteExKernel() : _input(nullptr), _output(nullptr), _perm() {} - -void CLPermuteExKernel::configure(const ICLTensor *input, ICLTensor *output, - const PermutationVector &perm) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), perm)); - - _input = input; - _output = output; - _perm = perm; - - const TensorShape output_shape = get_output_shape(input->info(), perm); - // Output auto inizialitation if not yet initialized - auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape)); - - // Create kernel - std::set<std::string> build_opts; - - build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())); - build_opts.emplace("-DDEPTH_IN=" + support::cpp11::to_string(input->info()->dimension(2))); - - // New positions of batch(D), height(H), width(w) and channel(C) based on permutation vector - build_opts.emplace("-DP1=" + support::cpp11::to_string(perm[0])); - build_opts.emplace("-DP2=" + support::cpp11::to_string(perm[1])); - build_opts.emplace("-DP3=" + support::cpp11::to_string(perm[2])); - build_opts.emplace("-DP4=" + support::cpp11::to_string(perm[3])); - _kernel = static_cast<cl::Kernel>( - CLKernelLibraryEx::get().create_kernel("permute_generic", build_opts)); - - // Configure kernel window - Window win = calculate_max_window(*input->info(), Steps()); - - // The CLPermute doesn't need padding so update_window_and_padding() can be skipped - Coordinates coord; - coord.set_num_dimensions(output->info()->num_dimensions()); - output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape())); - - ICLKernel::configure_internal(win); -} - -Status CLPermuteExKernel::validate(const ITensorInfo *input, const ITensorInfo *output, - const PermutationVector &perm) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, perm)); - - return Status{}; -} - -void CLPermuteExKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window); - - Window slice_in = window.first_slice_window_4D().collapse(ICLKernel::window(), 2, 4); - - // Setup output slice - Window slice_out(slice_in); - slice_out.set(Window::DimX, Window::Dimension(0, 0, 0)); - slice_out.set(Window::DimY, Window::Dimension(0, 0, 0)); - slice_out.set(Window::DimZ, Window::Dimension(0, 0, 0)); - slice_out.set(3, Window::Dimension(0, 0, 0)); - - do - { - unsigned int idx = 0; - add_4D_tensor_argument(idx, _input, slice_in); - add_4D_tensor_argument(idx, _output, slice_out); - enqueue(queue, *this, slice_in); - } while (window.slide_window_slice_4D(slice_in) && window.slide_window_slice_4D(slice_out)); -} diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLPixelWiseDivisionKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLPixelWiseDivisionKernel.cpp deleted file mode 100644 index b985aa737..000000000 --- a/libs/ARMComputeEx/src/core/CL/kernels/CLPixelWiseDivisionKernel.cpp +++ /dev/null @@ -1,280 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/core/CL/kernels/CLPixelWiseDivisionKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" -#include "arm_compute/core/CL/ICLTensor.h" - -using namespace arm_compute; - -namespace -{ -constexpr unsigned int num_elems_processed_per_iteration = 16; - -Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2, - const ITensorInfo *output, float scale, ConvertPolicy overflow_policy, - RoundingPolicy rounding_policy) -{ - ARM_COMPUTE_UNUSED(overflow_policy); - ARM_COMPUTE_UNUSED(rounding_policy); - - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::S16, - DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::S16, - DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(scale < 0, "Scale cannot be negative."); - - const TensorShape &out_shape = - TensorShape::broadcast_shape(input1->tensor_shape(), input2->tensor_shape()); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0, - "Inputs are not broadcast compatible"); - - // Validate in case of configured output - if (output->total_size() > 0) - { - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16, - DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_MSG( - output->data_type() == DataType::U8 && - (input1->data_type() != DataType::U8 || input2->data_type() != DataType::U8), - "Output can only be U8 if both inputs are U8"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG( - detail::have_different_dimensions(out_shape, output->tensor_shape(), 0), - "Wrong shape for output"); - } - - return Status{}; -} - -std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input1, ITensorInfo *input2, - ITensorInfo *output) -{ - const std::pair<TensorShape, ValidRegion> broadcast_pair = - ITensorInfo::broadcast_shape_and_valid_region(*input1, *input2); - const TensorShape &out_shape = broadcast_pair.first; - const ValidRegion &valid_region = broadcast_pair.second; - - // Auto initialize output if not initialized - { - set_shape_if_empty(*output, out_shape); - - if (input1->data_type() == DataType::S16 || input2->data_type() == DataType::S16) - { - set_format_if_unknown(*output, Format::S16); - } - else if (input1->data_type() == DataType::F32 || input2->data_type() == DataType::F32) - { - set_format_if_unknown(*output, Format::F32); - } - } - - Window win = calculate_max_window(valid_region, Steps(num_elems_processed_per_iteration)); - Window win_input1 = win.broadcast_if_dimension_le_one(*input1); - Window win_input2 = win.broadcast_if_dimension_le_one(*input2); - - AccessWindowHorizontal input1_access(input1, 0, num_elems_processed_per_iteration); - AccessWindowHorizontal input2_access(input2, 0, num_elems_processed_per_iteration); - AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration); - - bool window_changed = update_window_and_padding(win_input1, input1_access) || - update_window_and_padding(win_input2, input2_access) || - update_window_and_padding(win, output_access); - - output_access.set_valid_region(win, valid_region); - - Status err = (window_changed) - ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") - : Status{}; - return std::make_pair(err, win); -} -} // namespace - -CLPixelWiseDivisionKernel::CLPixelWiseDivisionKernel() - : _input1(nullptr), _input2(nullptr), _output(nullptr) -{ -} - -void CLPixelWiseDivisionKernel::configure(const ICLTensor *input1, const ICLTensor *input2, - ICLTensor *output, float scale, - ConvertPolicy overflow_policy, - RoundingPolicy rounding_policy) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input1->info(), input2->info(), output->info(), - scale, overflow_policy, rounding_policy)); - - // Configure kernel window - auto win_config = validate_and_configure_window(input1->info(), input2->info(), output->info()); - ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - - _input1 = input1; - _input2 = input2; - _output = output; - - int scale_int = -1; - // Extract sign, exponent and mantissa - int exponent = 0; - float normalized_mantissa = std::frexp(scale, &exponent); - // Use int scaling if factor is equal to 1/2^n for 0 <= n <= 15 - // frexp returns 0.5 as mantissa which means that the exponent will be in the range of -1 <= e <= - // 14 - // Moreover, it will be negative as we deal with 1/2^n - if ((normalized_mantissa == 0.5f) && (-14 <= exponent) && (exponent <= 1)) - { - // Store the positive exponent. We know that we compute 1/2^n - // Additionally we need to subtract 1 to compensate that frexp used a mantissa of 0.5 - scale_int = std::abs(exponent - 1); - } - - std::string data_type; - std::string compute_type; - // Check if it has float inputs and output - if (is_data_type_float(input1->info()->data_type()) || - is_data_type_float(input2->info()->data_type())) - { - scale_int = -1; - compute_type = (input1->info()->data_type() == DataType::F32 || - input2->info()->data_type() == DataType::F32) - ? "float" - : "half"; - data_type = "DATA_TYPE_FLOAT"; - } - else - { - if (input1->info()->data_type() == DataType::S16 || - input2->info()->data_type() == DataType::S16) - { - compute_type = "int"; - } - else - { - compute_type = "ushort"; - } - data_type = "DATA_TYPE_INT"; - } - - // Construct kernel name - std::string kernel_name = "pixelwise_div"; - kernel_name += (scale_int >= 0) ? "_int" : "_float"; - - // Set kernel build options - std::set<std::string> build_opts; - build_opts.emplace( - (overflow_policy == ConvertPolicy::WRAP || is_data_type_float(output->info()->data_type())) - ? "-DWRAP" - : "-DSATURATE"); - build_opts.emplace((rounding_policy == RoundingPolicy::TO_ZERO) ? "-DROUND=_rtz" - : "-DROUND=_rte"); - build_opts.emplace("-DDATA_TYPE_IN1=" + get_cl_type_from_data_type(input1->info()->data_type())); - build_opts.emplace("-DDATA_TYPE_IN2=" + get_cl_type_from_data_type(input2->info()->data_type())); - build_opts.emplace("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output->info()->data_type())); - build_opts.emplace("-DDATA_TYPE_RES=" + compute_type); - build_opts.emplace("-D" + data_type); - - // Create kernel - _kernel = - static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts)); - - // Set scale argument - unsigned int idx = 3 * num_arguments_per_3D_tensor(); // Skip the inputs and output parameters - - if (scale_int >= 0) - { - _kernel.setArg(idx++, scale_int); - } - else - { - _kernel.setArg(idx++, scale); - } - - ICLKernel::configure_internal(win_config.second); -} - -Status CLPixelWiseDivisionKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, - const ITensorInfo *output, float scale, - ConvertPolicy overflow_policy, - RoundingPolicy rounding_policy) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output); - ARM_COMPUTE_RETURN_ON_ERROR( - validate_arguments(input1, input2, output, scale, overflow_policy, rounding_policy)); - ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input1->clone().get(), - input2->clone().get(), - output->clone().get()) - .first); - - return Status{}; -} - -void CLPixelWiseDivisionKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - const TensorShape &in_shape1 = _input1->info()->tensor_shape(); - const TensorShape &in_shape2 = _input2->info()->tensor_shape(); - const TensorShape &out_shape = _output->info()->tensor_shape(); - - bool can_collapse = true; - if (std::min(in_shape1.total_size(), in_shape2.total_size()) > 1) - { - can_collapse = - (std::min(in_shape1.num_dimensions(), in_shape2.num_dimensions()) > Window::DimZ); - for (size_t d = Window::DimZ; can_collapse && (d < out_shape.num_dimensions()); ++d) - { - can_collapse = (in_shape1[d] == in_shape2[d]); - } - } - - bool has_collapsed = false; - Window collapsed = - can_collapse ? window.collapse_if_possible(ICLKernel::window(), Window::DimZ, &has_collapsed) - : window; - - const TensorShape &in_shape1_collapsed = - has_collapsed ? in_shape1.collapsed_from(Window::DimZ) : in_shape1; - const TensorShape &in_shape2_collapsed = - has_collapsed ? in_shape2.collapsed_from(Window::DimZ) : in_shape2; - - Window slice = collapsed.first_slice_window_3D(); - Window slice_input1 = slice.broadcast_if_dimension_le_one(in_shape1_collapsed); - Window slice_input2 = slice.broadcast_if_dimension_le_one(in_shape2_collapsed); - - do - { - unsigned int idx = 0; - add_3D_tensor_argument(idx, _input1, slice_input1); - add_3D_tensor_argument(idx, _input2, slice_input2); - add_3D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice); - - collapsed.slide_window_slice_3D(slice_input1); - collapsed.slide_window_slice_3D(slice_input2); - } while (collapsed.slide_window_slice_3D(slice)); -} - -BorderSize CLPixelWiseDivisionKernel::border_size() const -{ - const unsigned int replicateSize = - _output->info()->dimension(0) - - std::min(_input1->info()->dimension(0), _input2->info()->dimension(0)); - const unsigned int border = - std::min<unsigned int>(num_elems_processed_per_iteration - 1U, replicateSize); - return BorderSize(0, border, 0, 0); -} diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLReduceOperationKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLReduceOperationKernel.cpp deleted file mode 100644 index f581780e1..000000000 --- a/libs/ARMComputeEx/src/core/CL/kernels/CLReduceOperationKernel.cpp +++ /dev/null @@ -1,181 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/core/CL/kernels/CLReduceOperationKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" -#include "arm_compute/core/CL/ICLTensor.h" - -using namespace arm_compute; -namespace -{ -// NOTE This is necessary because it is not guaranteed that the axis positions of input and output -// are the same. -const TensorShape inferOutputShape(const TensorShape &input_shape, const uint32_t axis) -{ - TensorShape out_shape{input_shape}; - - out_shape.set(axis, 1); - - return out_shape; -} -} // namespace - -namespace -{ -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const uint32_t axis, - ReduceOperation op) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - - if (output->total_size() != 0) - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - } - - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, - DataType::F32, DataType::S32); - if (op == ReduceOperation::MEAN || op == ReduceOperation::SUM) - { - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::QASYMM8, - "Not support QASYMM8, yet"); - } - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->tensor_shape().total_size() == 0, - "Inputs are not broadcast compatible"); - - const auto num_dimensions = input->tensor_shape().num_dimensions(); - ARM_COMPUTE_RETURN_ERROR_ON_MSG( - axis >= 0 && axis < num_dimensions, - "axis must be greater than or equal to 0 and less than (input's rank)."); - - const TensorShape output_shape = inferOutputShape(input->tensor_shape(), axis); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(output_shape.total_size() != output->tensor_shape().total_size(), - "output shape's size does not match axis"); - - return Status{}; -} -} // namespace - -CLReduceOperationKernel::CLReduceOperationKernel() : _input(nullptr), _output(nullptr), _axis() {} - -void CLReduceOperationKernel::configure(const ICLTensor *input, ICLTensor *output, - const uint32_t axis, ReduceOperation op) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), axis, op)); - - _input = input; - _output = output; - _axis = axis; - - std::unique_ptr<ITensorInfo> output_info = output->info()->clone(); - output_info->set_tensor_shape(inferOutputShape(input->info()->tensor_shape(), axis)); - - // Construct kernel name - std::string kernel_name; - int op_code = 0; - if (op == ReduceOperation::MAX) - { - kernel_name = "reduce_min_max"; - op_code = 1; - } - else if (op == ReduceOperation::MIN) - { - kernel_name = "reduce_min_max"; - op_code = 2; - } - else if (op == ReduceOperation::SUM) - { - kernel_name = "reduce_sum_mean"; - op_code = 3; - } - else if (op == ReduceOperation::MEAN) - { - kernel_name = "reduce_sum_mean"; - op_code = 4; - } - else - throw std::runtime_error("Operation not supported, yet"); - - // Set kernel build options - std::set<std::string> build_opts; - build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(output_info->data_type())); - build_opts.emplace("-DDEPTH_OUT=" + support::cpp11::to_string(output_info->dimension(2))); - build_opts.emplace("-DOP_CODE=" + support::cpp11::to_string(op_code)); - - // Create kernel - _kernel = - static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts)); - - // Configure kernel window - Window win = calculate_max_window(*output_info, Steps()); - - Coordinates coord; - coord.set_num_dimensions(output_info->num_dimensions()); - output->info()->set_valid_region(ValidRegion(coord, output_info->tensor_shape())); - - ICLKernel::configure_internal(win); -} - -Status CLReduceOperationKernel::validate(const ITensorInfo *input, const ITensorInfo *output, - const uint32_t axis, ReduceOperation op) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, axis, op)); - - return Status{}; -} - -void CLReduceOperationKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - const TensorShape &shape_in = _input->info()->tensor_shape(); - - unsigned int idx = 2 * num_arguments_per_4D_tensor(); // Skip the input and output parameters - - _kernel.setArg<cl_int>(idx++, _axis); - _kernel.setArg<cl_int>(idx++, shape_in[_axis]); - - // Support dimensions up to 4 - Window slice_out = window.collapse(ICLKernel::window(), 2, 4); - - // Setup input slice - Window slice_in(slice_out); - slice_in.set(Window::DimX, Window::Dimension(0, 0, 0)); - slice_in.set(Window::DimY, Window::Dimension(0, 0, 0)); - slice_in.set(Window::DimZ, Window::Dimension(0, 0, 0)); - slice_in.set(3, Window::Dimension(0, 0, 0)); - - // Copy output's shape in order to use for recovering at end of this method - // TODO Remove changing and recovering output's shape if it is guaranteed that the axis positions - // of input and output are the same - const TensorShape shape_out = _output->info()->tensor_shape(); - _output->info()->set_tensor_shape(inferOutputShape(shape_in, _axis)); - - idx = 0; - add_4D_tensor_argument(idx, _input, slice_in); - add_4D_tensor_argument(idx, _output, slice_out); - enqueue(queue, *this, slice_out); - - // Recover output's shape of output tensor - _output->info()->set_tensor_shape(shape_out); -} diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLSpaceToBatchNDKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLSpaceToBatchNDKernel.cpp deleted file mode 100644 index 6b0697e89..000000000 --- a/libs/ARMComputeEx/src/core/CL/kernels/CLSpaceToBatchNDKernel.cpp +++ /dev/null @@ -1,238 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/core/CL/kernels/CLSpaceToBatchNDKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" -#include "arm_compute/core/CL/ICLTensor.h" - -using namespace arm_compute; - -namespace -{ -constexpr unsigned int num_elems_processed_per_iteration = 16; - -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *block_size, - const ITensorInfo *padding_size, const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8, - DataType::S16, DataType::F16, DataType::S32, - DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(block_size, 1, DataType::S32); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(padding_size, 1, DataType::S32); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::QASYMM8, - DataType::S16, DataType::F16, DataType::S32, - DataType::F32); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->num_dimensions() != output->num_dimensions(), - "The number of dimensions of input should be equal to output"); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_layout() != output->data_layout(), - "The input and output layouts are different!"); - - // TODO Support other cases - if (input->num_dimensions() == 4 && input->data_layout() == DataLayout::NCHW) - { - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->dimension(2) != output->dimension(2), - "Input Depth should be equal to Output Depth"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(block_size->dimension(0) != 2 || - padding_size->dimension(1) != 2, - "Only 2-dimensional spatial block's size was wrong"); - } - else if (input->num_dimensions() == 4 && input->data_layout() == DataLayout::NHWC) - { - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->dimension(0) != output->dimension(0), - "Input Depth should be equal to Output Depth"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(block_size->dimension(0) != 2 || - padding_size->dimension(1) != 2, - "Only 2-dimensional spatial block's size was wrong"); - } - else - { - ARM_COMPUTE_RETURN_ERROR_MSG("CLSpaceToBatchNDKernel supports only 4-dimensional input"); - } - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->num_dimensions() < 2 && input->num_dimensions() > 4, - "CLSpaceToBatchNDKernel supports dimensions up to 4"); - - if (input->data_type() == DataType::QASYMM8) - { - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->quantization_info() != output->quantization_info(), - "The input and output quantization info are different!"); - } - - return Status{}; -} - -} // namespace - -CLSpaceToBatchNDKernel::CLSpaceToBatchNDKernel() : _input(nullptr), _output(nullptr) {} - -void CLSpaceToBatchNDKernel::configure(const ICLTensor *input, const ICLTensor *block_size, - const ICLTensor *padding_size, ICLTensor *output) -{ - - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON( - validate_arguments(input->info(), block_size->info(), padding_size->info(), output->info())); - - _input = input; - _block_size = block_size; - _padding_size = padding_size; - _output = output; - - // Set kernel build options - // TODO Support other cases - std::string kernel_name = "space_to_batch_4d"; - std::set<std::string> build_opts; - Window win; - - if (input->info()->data_layout() == DataLayout::NCHW) - { - kernel_name += "_nchw"; - build_opts.emplace("-DDEPTH_OUT=" + support::cpp11::to_string(output->info()->dimension(2))); - build_opts.emplace("-DHEIGHT_IN=" + support::cpp11::to_string(input->info()->dimension(1))); - build_opts.emplace("-DWIDTH_IN=" + support::cpp11::to_string(input->info()->dimension(0))); - - win = calculate_max_window(*output->info(), Steps()); - - Coordinates coord; - coord.set_num_dimensions(output->info()->num_dimensions()); - output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape())); - } - else if (input->info()->data_layout() == DataLayout::NHWC) - { - kernel_name += "_nhwc"; - build_opts.emplace("-DHEIGHT_OUT=" + support::cpp11::to_string(output->info()->dimension(2))); - build_opts.emplace("-DHEIGHT_IN=" + support::cpp11::to_string(input->info()->dimension(2))); - build_opts.emplace("-DWIDTH_IN=" + support::cpp11::to_string(input->info()->dimension(1))); - build_opts.emplace("-DVEC_SIZE=" + - support::cpp11::to_string(num_elems_processed_per_iteration)); - - win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration)); - AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - - bool window_changed = update_window_and_padding(win, input_access, output_access); - input_access.set_valid_region(win, output->info()->valid_region()); - - if (window_changed) - { - ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!"); - } - } - else - { - ARM_COMPUTE_ERROR("Unsupported layout"); - } - - build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())); - build_opts.emplace("-DBATCH_IN=" + support::cpp11::to_string(input->info()->dimension(3))); - if (input->info()->data_type() == DataType::QASYMM8) - { - build_opts.emplace("-DZERO_VALUE=" + - support::cpp11::to_string(input->info()->quantization_info().offset)); - } - else - { - build_opts.emplace("-DZERO_VALUE=" + support::cpp11::to_string(0)); - } - - // Create kernel - _kernel = - static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts)); - - // Configure kernel window - ICLKernel::configure_internal(win); -} - -void CLSpaceToBatchNDKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window); - -#if defined(DEBUG) - const_cast<ICLTensor *>(_block_size)->map(queue); - const_cast<ICLTensor *>(_padding_size)->map(queue); - - const size_t num_dimensions = _input->info()->num_dimensions(); - const size_t num_spacial_dimensions = _block_size->info()->dimension(0); - int32_t batch_size = _input->info()->dimension(num_dimensions - 1); - for (size_t i = 0; i < num_spacial_dimensions; ++i) - { - const int32_t block_size = *reinterpret_cast<int32_t *>(_block_size->ptr_to_element({i})); - const int32_t padding_size_pre = - *reinterpret_cast<int32_t *>(_padding_size->ptr_to_element({0, i})); - const int32_t padding_size_post = - *reinterpret_cast<int32_t *>(_padding_size->ptr_to_element({1, i})); - - ARM_COMPUTE_ERROR_ON_MSG(block_size < 1, "Block size should be greater than or equal to 1"); - ARM_COMPUTE_ERROR_ON_MSG(padding_size_pre < 0 && padding_size_post < 0, - "Padding size should be greater than or equal to 0"); - - if (num_dimensions == 4 && _input->info()->data_layout() == DataLayout::NCHW) - { - ARM_COMPUTE_ERROR_ON_MSG( - _output->info()->dimension(i) != - (_input->info()->dimension(i) + padding_size_pre + padding_size_post) / block_size, - "Dimension value of spatial block does not match output's dimension value"); - } - else - { - ARM_COMPUTE_ERROR_ON_MSG( - _output->info()->dimension(num_dimensions - num_spacial_dimensions - 1 + i) != - (_input->info()->dimension(num_dimensions - num_spacial_dimensions - 1 + i) + - padding_size_pre + padding_size_post) / - block_size, - "Dimension value of spatial block does not match output's dimension value"); - } - - batch_size *= block_size; - } - ARM_COMPUTE_ERROR_ON_MSG( - _output->info()->dimension(num_dimensions - 1) != batch_size, - "Output batch size should be equal to input batch size * (multiplication of all block size)"); - - const_cast<ICLTensor *>(_block_size)->unmap(queue); - const_cast<ICLTensor *>(_padding_size)->unmap(queue); -#endif // defined(DEBUG) - - Window slice_out = window.first_slice_window_4D().collapse(ICLKernel::window(), 2, 4); - - // Setup output slice - Window slice_in(slice_out); - slice_in.set(Window::DimX, Window::Dimension(0, 0, 0)); - slice_in.set(Window::DimY, Window::Dimension(0, 0, 0)); - slice_in.set(Window::DimZ, Window::Dimension(0, 0, 0)); - slice_in.set(3, Window::Dimension(0, 0, 0)); - - // Set block size window - Window win_block = calculate_max_window(*_block_size->info(), Steps()); - - // Set padding size window - Window win_padding = calculate_max_window(*_padding_size->info(), Steps()); - - do - { - unsigned int idx = 0; - add_4D_tensor_argument(idx, _input, slice_in); - add_4D_tensor_argument(idx, _output, slice_out); - add_1D_tensor_argument(idx, _block_size, win_block); - add_2D_tensor_argument(idx, _padding_size, win_padding); - enqueue(queue, *this, slice_out); - } while (window.slide_window_slice_4D(slice_out) && window.slide_window_slice_4D(slice_in)); -} diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLSpaceToDepthKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLSpaceToDepthKernel.cpp deleted file mode 100644 index 5d6329edc..000000000 --- a/libs/ARMComputeEx/src/core/CL/kernels/CLSpaceToDepthKernel.cpp +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" -#include "arm_compute/core/CL/ICLTensor.h" - -using namespace arm_compute; - -namespace -{ -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, - const int32_t block_size) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8, - DataType::S16, DataType::S32, DataType::F16, - DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::QASYMM8, - DataType::S16, DataType::S32, DataType::F16, - DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(block_size >= 1, - "Block size should be greater than or equal to 1."); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->dimension(3) == output->dimension(3), - "Input batch should be equal to Output batch"); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG( - input->dimension(2) * block_size * block_size == output->dimension(2), - "Output depth should be equal to (input depth * block size *block size)"); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(!(input->dimension(0) % block_size) && - !(input->dimension(1) % block_size), - "Input height and width should be divisible by block size"); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG((output->dimension(0) == (input->dimension(0) / block_size)) && - (output->dimension(1) == (input->dimension(1) / block_size)), - "Output height and width should be equal to " - "input_height/blocksize and input_width/blocksize respectively"); - - return Status{}; -} - -} // namespace - -CLSpaceToDepthKernel::CLSpaceToDepthKernel() : _input(nullptr), _output(nullptr) {} - -void CLSpaceToDepthKernel::configure(const ICLTensor *input, ICLTensor *output, - const int32_t block_size) -{ - - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), block_size)); - - _input = input; - _output = output; - - // Set kernel build options - std::set<std::string> build_opts; - build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())); - build_opts.emplace("-DBLOCK_SIZE=" + support::cpp11::to_string(block_size)); - build_opts.emplace("-DDEPTH_IN=" + support::cpp11::to_string(input->info()->dimension(2))); - - // Create kernel - _kernel = - static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel("space_to_depth", build_opts)); - - // Configure kernel window - Window win = calculate_max_window(*input->info(), Steps()); - - Coordinates coord; - coord.set_num_dimensions(output->info()->num_dimensions()); - output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape())); - - ICLKernel::configure_internal(win); -} - -void CLSpaceToDepthKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window); - - Window slice_in = window.first_slice_window_4D().collapse(ICLKernel::window(), 2, 4); - - // Setup output slice - Window slice_out(slice_in); - slice_out.set(Window::DimX, Window::Dimension(0, 0, 0)); - slice_out.set(Window::DimY, Window::Dimension(0, 0, 0)); - slice_out.set(Window::DimZ, Window::Dimension(0, 0, 0)); - slice_out.set(3, Window::Dimension(0, 0, 0)); - - do - { - unsigned int idx = 0; - add_4D_tensor_argument(idx, _input, slice_in); - add_4D_tensor_argument(idx, _output, slice_out); - enqueue(queue, *this, slice_in); - } while (window.slide_window_slice_4D(slice_in) && window.slide_window_slice_4D(slice_out)); -} diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLSquaredDifferenceKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLSquaredDifferenceKernel.cpp deleted file mode 100644 index 260bc39f1..000000000 --- a/libs/ARMComputeEx/src/core/CL/kernels/CLSquaredDifferenceKernel.cpp +++ /dev/null @@ -1,170 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/core/CL/kernels/CLSquaredDifferenceKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" -#include "arm_compute/core/CL/ICLTensor.h" - -using namespace arm_compute; - -namespace -{ -constexpr unsigned int num_elems_processed_per_iteration = 16; - -Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output) -{ - const TensorShape &out_shape = - TensorShape::broadcast_shape(input1->tensor_shape(), input2->tensor_shape()); - - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::F16, DataType::F32); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0, - "Inputs are not broadcast compatible"); - // Validate in case of configured output - if (output->total_size() > 0) - { - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_MSG( - detail::have_different_dimensions(out_shape, output->tensor_shape(), 0), - "Wrong shape for output"); - } - return Status{}; -} -} // namespace - -CLSquaredDifferenceKernel::CLSquaredDifferenceKernel() - : _input1(nullptr), _input2(nullptr), _output(nullptr) -{ -} - -void CLSquaredDifferenceKernel::configure(const ICLTensor *input1, const ICLTensor *input2, - ICLTensor *output) -{ - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input1, output); - ARM_COMPUTE_ERROR_THROW_ON(validate(input1->info(), input2->info(), output->info())); - - _input1 = input1; - _input2 = input2; - _output = output; - - // Create kernel - std::set<std::string> build_opts; - build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input1->info()->data_type()))); - build_opts.emplace( - ("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration))); - _kernel = static_cast<cl::Kernel>( - CLKernelLibraryEx::get().create_kernel("squared_difference", build_opts)); - - const std::pair<TensorShape, ValidRegion> broadcast_pair = - ITensorInfo::broadcast_shape_and_valid_region(*input1->info(), *input2->info()); - - const TensorShape &out_shape = broadcast_pair.first; - const ValidRegion &valid_region = broadcast_pair.second; - - // Auto initialize output if not initialized - { - set_shape_if_empty(*output->info(), out_shape); - - if (input1->info()->data_type() == DataType::F16 && - input2->info()->data_type() == DataType::F16) - { - set_format_if_unknown(*output->info(), Format::F16); - } - else if (input1->info()->data_type() == DataType::F32 || - input2->info()->data_type() == DataType::F32) - { - set_format_if_unknown(*output->info(), Format::F32); - } - } - - Window win = calculate_max_window(valid_region, Steps(num_elems_processed_per_iteration)); - Window win_input1 = win.broadcast_if_dimension_le_one(*input1->info()); - Window win_input2 = win.broadcast_if_dimension_le_one(*input2->info()); - - AccessWindowHorizontal input1_access(input1->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal input2_access(input2->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - - bool window_changed = update_window_and_padding(win_input1, input1_access) || - update_window_and_padding(win_input2, input2_access) || - update_window_and_padding(win, output_access); - - output_access.set_valid_region(win, valid_region); - - ICLKernel::configure_internal(win); -} - -void CLSquaredDifferenceKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - const TensorShape &in_shape1 = _input1->info()->tensor_shape(); - const TensorShape &in_shape2 = _input2->info()->tensor_shape(); - const TensorShape &out_shape = _output->info()->tensor_shape(); - - bool can_collapse = true; - if (std::min(in_shape1.total_size(), in_shape2.total_size()) > 1) - { - can_collapse = - (std::min(in_shape1.num_dimensions(), in_shape2.num_dimensions()) > Window::DimZ); - for (size_t d = Window::DimZ; can_collapse && (d < out_shape.num_dimensions()); d++) - { - can_collapse = (in_shape1[d] == in_shape2[d]); - } - } - - bool has_collapsed = false; - Window collapsed = - can_collapse ? window.collapse_if_possible(ICLKernel::window(), Window::DimZ, &has_collapsed) - : window; - - const TensorShape &in_shape1_collapsed = - has_collapsed ? in_shape1.collapsed_from(Window::DimZ) : in_shape1; - const TensorShape &in_shape2_collapsed = - has_collapsed ? in_shape2.collapsed_from(Window::DimZ) : in_shape2; - - Window slice = collapsed.first_slice_window_3D(); - Window slice_input1 = slice.broadcast_if_dimension_le_one(in_shape1_collapsed); - Window slice_input2 = slice.broadcast_if_dimension_le_one(in_shape2_collapsed); - - do - { - unsigned int idx = 0; - add_3D_tensor_argument(idx, _input1, slice_input1); - add_3D_tensor_argument(idx, _input2, slice_input2); - add_3D_tensor_argument(idx, _output, slice); - - enqueue(queue, *this, slice); - - collapsed.slide_window_slice_3D(slice_input1); - collapsed.slide_window_slice_3D(slice_input2); - } while (collapsed.slide_window_slice_3D(slice)); -} - -BorderSize CLSquaredDifferenceKernel::border_size() const -{ - const unsigned int replicateSize = - _output->info()->dimension(0) - - std::min(_input1->info()->dimension(0), _input2->info()->dimension(0)); - const unsigned int border = - std::min<unsigned int>(num_elems_processed_per_iteration - 1U, replicateSize); - return BorderSize(0, border, 0, 0); -} diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLStridedSliceExKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLStridedSliceExKernel.cpp deleted file mode 100644 index 48146a43a..000000000 --- a/libs/ARMComputeEx/src/core/CL/kernels/CLStridedSliceExKernel.cpp +++ /dev/null @@ -1,253 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/core/CL/kernels/CLStridedSliceExKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/TensorInfo.h" - -using namespace arm_compute; - -CLStridedSliceExKernel::CLStridedSliceExKernel() - : _input(nullptr), _output(nullptr), _beginData(nullptr), _endData(nullptr), - _stridesData(nullptr), _beginMask(0), _endMask(0), _shrinkAxisMask(0) -{ -} - -Status CLStridedSliceExKernel::validate(const ITensorInfo *input, const ITensorInfo *output, - const ITensorInfo *begin, const ITensorInfo *end, - const ITensorInfo *strides, int32_t beginMask, - int32_t endMask, int32_t shrinkAxisMask) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, begin, end, strides); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN( - input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16, - DataType::U32, DataType::S32, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(begin, 1, DataType::S32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(end, 1, DataType::S32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(strides, 1, DataType::S32); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - - ARM_COMPUTE_ERROR_ON(begin->num_dimensions() != 1 || begin->dimension(0) > 4); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(begin->tensor_shape(), end->tensor_shape(), - strides->tensor_shape()); - - return Status{}; -} - -// Return the index for the first element along that axis. This index will be a -// positive integer between [0, axisSize - 1] that can be used to index -// directly into the data. -inline int32_t StartForAxis(int32_t beginMask, int32_t begin, int32_t stride, - const TensorShape &inputShape, int32_t axis) -{ - // Begin with the specified index - int32_t start = begin; - - // beginMask override - if (beginMask & 1 << axis) - { - if (stride > 0) - { - // Forward iteration - use the first element. These values will get - // clamped below (Note: We could have set them to 0 and axisSize-1, but - // use lowest() and max() to maintain symmetry with StopForAxis()) - start = std::numeric_limits<int32_t>::lowest(); - } - else - { - // Backward iteration - use the last element. - start = std::numeric_limits<int32_t>::max(); - } - } - - // Handle negative indices - int32_t axisSize = inputShape[axis]; - if (start < 0) - { - start += axisSize; - } - - // Clamping - start = arm_compute::utility::clamp(start, 0, axisSize - 1); - - return start; -} - -// Return the "real" index for the end of iteration along that axis. This is an -// "end" in the traditional C sense, in that it points to one past the last -// element. ie. So if you were iterating through all elements of a 1D array of -// size 4, this function would return 4 as the stop, because it is one past the -// "real" indices of 0, 1, 2 & 3. -inline int32_t StopForAxis(int32_t endMask, int32_t end, int32_t stride, - const TensorShape &inputShape, int32_t axis) -{ - // Begin with the specified index - int32_t stop = end; - - // endMask override - if (endMask & (1 << axis)) - { - if (stride > 0) - { - // Forward iteration - use the last element. These values will get - // clamped below - stop = std::numeric_limits<int32_t>::max(); - } - else - { - // Backward iteration - use the first element. - stop = std::numeric_limits<int32_t>::lowest(); - } - } - - // Handle negative indices - int32_t axisSize = inputShape[axis]; - if (stop < 0) - { - stop += axisSize; - } - - // Clamping - // Because the end index points one past the last element, we need slightly - // different clamping ranges depending on the direction. - if (stride > 0) - { - // Forward iteration - stop = arm_compute::utility::clamp(stop, 0, axisSize); - } - else - { - // Backward iteration - stop = arm_compute::utility::clamp(stop, -1, axisSize - 1); - } - - return stop; -} - -inline int32_t getOutDim(int32_t start, int32_t stop, int32_t stride) -{ - int32_t ret = 0; - if (stride > 0) - { - ret = ((stop - start - 1) / stride) + 1; - } - else - { - ret = ((stop - start + 1) / stride) + 1; - } - ARM_COMPUTE_ERROR_ON_MSG(ret < 0, "The dimension must be the natural number"); - return ret; -} - -void CLStridedSliceExKernel::configure(const ICLTensor *input, ICLTensor *output, - ICLTensor *beginData, ICLTensor *endData, - ICLTensor *stridesData, int32_t beginMask, int32_t endMask, - int32_t shrinkAxisMask) -{ - ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), output->info(), beginData->info(), - endData->info(), stridesData->info(), beginMask, endMask, - shrinkAxisMask)); - - _input = input; - _output = output; - _beginData = beginData; - _endData = endData; - _stridesData = stridesData; - _beginMask = beginMask; - _endMask = endMask; - _shrinkAxisMask = shrinkAxisMask; - - // Set kernel build options - std::set<std::string> build_opts; - build_opts.emplace("-DELEMENT_DATA_TYPE=" + - get_cl_type_from_data_type(input->info()->data_type())); - build_opts.emplace("-DDEPTH_OUT=" + support::cpp11::to_string(output->info()->dimension(2))); - - // Create kernel - _kernel = static_cast<cl::Kernel>( - CLKernelLibraryEx::get().create_kernel("strided_slice_ex", build_opts)); - - // Configure kernel window - Window win = calculate_max_window(*output->info(), Steps()); - ICLKernel::configure_internal(win); -} - -void CLStridedSliceExKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - _beginData->map(queue); - _endData->map(queue); - _stridesData->map(queue); - - std::vector<int32_t> starts; - std::vector<int32_t> strides; - - for (uint32_t n = 0; n < _beginData->info()->tensor_shape().total_size(); ++n) - { - const TensorShape shape = _input->info()->tensor_shape(); - starts.emplace_back( - StartForAxis(_beginMask, reinterpret_cast<int32_t *>(_beginData->buffer())[n], - reinterpret_cast<int32_t *>(_stridesData->buffer())[n], shape, n)); - - strides.emplace_back(reinterpret_cast<int32_t *>(_stridesData->buffer())[n]); - } - - for (uint32_t n = _beginData->info()->tensor_shape().total_size(); n < 4; n++) - { - starts.emplace_back(0); - strides.emplace_back(1); - } - // TODO: Apply shrinkAxisMask - - _beginData->unmap(queue); - _stridesData->unmap(queue); - _endData->unmap(queue); - - unsigned int idx = 2 * num_arguments_per_4D_tensor(); // Skip the input and output parameters - const cl_int4 startsArg = {{ - static_cast<cl_int>(starts[0]), static_cast<cl_int>(starts[1]), - static_cast<cl_int>(starts[2]), static_cast<cl_int>(starts[3]), - }}; - _kernel.setArg<cl_int4>(idx++, startsArg); - - const cl_int4 stridesArg = {{ - static_cast<cl_int>(strides[0]), static_cast<cl_int>(strides[1]), - static_cast<cl_int>(strides[2]), static_cast<cl_int>(strides[3]), - }}; - _kernel.setArg<cl_int4>(idx++, stridesArg); - - Window slice_out = window.first_slice_window_4D().collapse(ICLKernel::window(), 2, 4); - - // Setup output slice - Window slice_in(slice_out); - slice_in.set(Window::DimX, Window::Dimension(0, 0, 0)); - slice_in.set(Window::DimY, Window::Dimension(0, 0, 0)); - slice_in.set(Window::DimZ, Window::Dimension(0, 0, 0)); - slice_in.set(3, Window::Dimension(0, 0, 0)); - - do - { - unsigned int idx = 0; - add_4D_tensor_argument(idx, _input, slice_in); - add_4D_tensor_argument(idx, _output, slice_out); - enqueue(queue, *this, slice_out); - } while (window.slide_window_slice_4D(slice_in) && window.slide_window_slice_4D(slice_out)); -} diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLTopKV2Kernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLTopKV2Kernel.cpp deleted file mode 100644 index 073c2f7bb..000000000 --- a/libs/ARMComputeEx/src/core/CL/kernels/CLTopKV2Kernel.cpp +++ /dev/null @@ -1,468 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/core/CL/kernels/CLTopKV2Kernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" -#include "arm_compute/core/CL/ICLTensor.h" - -namespace arm_compute -{ -//////////////////////////////////////////////////////////////////////////////// -CLTopKV2Single::CLTopKV2Single() : _input(nullptr), _topk_values(nullptr), _topk_indices(nullptr) {} - -void CLTopKV2Single::configure(ICLTensor *input, ICLTensor *topk_values, ICLTensor *topk_indices, - cl::Buffer *indices, cl::Buffer *temp_stack, int k, int n) -{ - ARM_COMPUTE_ERROR_ON(input == nullptr && indices == nullptr); - ARM_COMPUTE_ERROR_ON(topk_values == nullptr && topk_indices == nullptr); - ARM_COMPUTE_ERROR_ON(n == 0); - - _input = input; - _topk_values = topk_values; - _topk_indices = topk_indices; - - // Set kernel build options - std::set<std::string> build_opts; - - // Create kernel - _kernel = static_cast<cl::Kernel>( - CLKernelLibraryEx::get().create_kernel("topkv2_quicksort", build_opts)); - - unsigned int idx = 3 * num_arguments_per_1D_tensor(); - _kernel.setArg(idx++, *indices); - _kernel.setArg(idx++, *temp_stack); - _kernel.setArg<cl_int>(idx++, k); - _kernel.setArg<cl_int>(idx++, n); - - // Configure kernel window - Window win; - win.set(0, Window::Dimension(0, 1, 1)); - ICLKernel::configure_internal(win); -} - -void CLTopKV2Single::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - unsigned int idx = 0; - add_1D_tensor_argument(idx, _input, window); - add_1D_tensor_argument(idx, _topk_values, window); - add_1D_tensor_argument(idx, _topk_indices, window); - - enqueue(queue, *this, window); -} - -//////////////////////////////////////////////////////////////////////////////// -CLTopKV2Init::CLTopKV2Init() : _input(nullptr) {} - -void CLTopKV2Init::configure(ICLTensor *input, cl::Buffer *in_key_buf, cl::Buffer *in_ind_buf, - int n) -{ - ARM_COMPUTE_ERROR_ON(input == nullptr && in_key_buf == nullptr); - ARM_COMPUTE_ERROR_ON(in_ind_buf == nullptr); - ARM_COMPUTE_ERROR_ON(n == 0); - - _input = input; - - // Set kernel build options - std::set<std::string> build_opts; - - // Create kernel - _kernel = - static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel("topkv2_init", build_opts)); - - unsigned int idx = num_arguments_per_1D_tensor(); - _kernel.setArg(idx++, *in_key_buf); - _kernel.setArg(idx++, *in_ind_buf); - _kernel.setArg<cl_int>(idx++, n); - - // Configure kernel window - Window win; - win.set(0, Window::Dimension(0, n, 1)); - ICLKernel::configure_internal(win); -} - -void CLTopKV2Init::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - unsigned int idx = 0; - add_1D_tensor_argument(idx, _input, window); - - enqueue(queue, *this, window); -} - -//////////////////////////////////////////////////////////////////////////////// -// This kernel makes a histogram of radix for each work item. -CLRadixSortHistogram::CLRadixSortHistogram() : _pass(0), _in_key_buf(nullptr) {} - -void CLRadixSortHistogram::configure(cl::Buffer *hist_buf, int bits, int n) -{ - ARM_COMPUTE_ERROR_ON(hist_buf == nullptr); - - unsigned int radix = 1 << bits; - // Set kernel build options - std::set<std::string> build_opts; - build_opts.emplace("-D_BITS=" + support::cpp11::to_string(bits)); - build_opts.emplace("-D_RADIX=" + support::cpp11::to_string(radix)); - build_opts.emplace("-DPERMUT=1"); - - // Create kernel - _kernel = static_cast<cl::Kernel>( - CLKernelLibraryEx::get().create_kernel("radixsort_histogram", build_opts)); - - int loc_histo_size = radix * _ITEMS * sizeof(cl_int); - - unsigned int idx = 1; - _kernel.setArg(idx++, *hist_buf); - - idx = 3; - _kernel.setArg(idx++, loc_histo_size, nullptr); - _kernel.setArg<cl_int>(idx++, n); - - // Configure kernel window - Window win; - win.set(0, Window::Dimension(0, _GROUPS * _ITEMS, 1)); - ICLKernel::configure_internal(win); -} - -void CLRadixSortHistogram::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - _kernel.setArg(0, *_in_key_buf); - _kernel.setArg<cl_int>(2, _pass); - - cl::NDRange lws = cl::NDRange(_ITEMS, 1); - - enqueue(queue, *this, window, lws); -} - -//////////////////////////////////////////////////////////////////////////////// -CLRadixSortScanHistogram::CLRadixSortScanHistogram() {} - -void CLRadixSortScanHistogram::configure(cl::Buffer *hist_buf, cl::Buffer *glob_sum_buf, int bits) -{ - ARM_COMPUTE_ERROR_ON(hist_buf == nullptr && glob_sum_buf == nullptr); - - unsigned int radix = 1 << bits; - // Set kernel build options - std::set<std::string> build_opts; - build_opts.emplace("-D_BITS=" + support::cpp11::to_string(bits)); - build_opts.emplace("-D_RADIX=" + support::cpp11::to_string(radix)); - build_opts.emplace("-DPERMUT=1"); - - // Create kernel - _kernel = static_cast<cl::Kernel>( - CLKernelLibraryEx::get().create_kernel("radixsort_scanhistograms", build_opts)); - - int temp_size = - std::max<uint32_t>(_HISTOSPLIT, _ITEMS * _GROUPS * radix / _HISTOSPLIT) * sizeof(cl_uint); - - unsigned int idx = 0; - _kernel.setArg(idx++, *hist_buf); - _kernel.setArg(idx++, temp_size, nullptr); - _kernel.setArg(idx++, *glob_sum_buf); - - // Configure kernel window - Window win; - win.set(0, Window::Dimension(0, radix * _GROUPS * _ITEMS / 2, 1)); - ICLKernel::configure_internal(win); -} - -void CLRadixSortScanHistogram::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - const unsigned int gws_x = (window.x().end() - window.x().start()) / window.x().step(); - cl::NDRange lws = cl::NDRange(gws_x / _HISTOSPLIT, 1); - - enqueue(queue, *this, window, lws); -} - -//////////////////////////////////////////////////////////////////////////////// -CLRadixSortGlobalScanHistogram::CLRadixSortGlobalScanHistogram() {} - -void CLRadixSortGlobalScanHistogram::configure(cl::Buffer *glob_sum_buf, cl::Buffer *temp_buf, - int bits) -{ - ARM_COMPUTE_ERROR_ON(glob_sum_buf == nullptr && temp_buf == nullptr); - - unsigned int radix = 1 << bits; - // Set kernel build options - std::set<std::string> build_opts; - build_opts.emplace("-D_BITS=" + support::cpp11::to_string(bits)); - build_opts.emplace("-D_RADIX=" + support::cpp11::to_string(radix)); - build_opts.emplace("-DPERMUT=1"); - - // Create kernel - _kernel = static_cast<cl::Kernel>( - CLKernelLibraryEx::get().create_kernel("radixsort_scanhistograms", build_opts)); - - int temp_size = - std::max<uint32_t>(_HISTOSPLIT, _ITEMS * _GROUPS * radix / _HISTOSPLIT) * sizeof(cl_uint); - - unsigned int idx = 0; - _kernel.setArg(idx++, *glob_sum_buf); - _kernel.setArg(idx++, temp_size, nullptr); - _kernel.setArg(idx++, *temp_buf); - - // Configure kernel window - Window win; - win.set(0, Window::Dimension(0, _HISTOSPLIT / 2, 1)); - ICLKernel::configure_internal(win); -} - -void CLRadixSortGlobalScanHistogram::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - const unsigned int gws_x = (window.x().end() - window.x().start()) / window.x().step(); - cl::NDRange lws = cl::NDRange(gws_x, 1); - - enqueue(queue, *this, window, lws); -} - -//////////////////////////////////////////////////////////////////////////////// -CLRadixSortPasteHistogram::CLRadixSortPasteHistogram() {} - -void CLRadixSortPasteHistogram::configure(cl::Buffer *hist_buf, cl::Buffer *glob_sum_buf, int bits) -{ - ARM_COMPUTE_ERROR_ON(hist_buf == nullptr && glob_sum_buf == nullptr); - - unsigned int radix = 1 << bits; - // Set kernel build options - std::set<std::string> build_opts; - build_opts.emplace("-D_BITS=" + support::cpp11::to_string(bits)); - build_opts.emplace("-D_RADIX=" + support::cpp11::to_string(radix)); - build_opts.emplace("-DPERMUT=1"); - - // Create kernel - _kernel = static_cast<cl::Kernel>( - CLKernelLibraryEx::get().create_kernel("radixsort_pastehistograms", build_opts)); - - unsigned int idx = 0; - _kernel.setArg(idx++, *hist_buf); - _kernel.setArg(idx++, *glob_sum_buf); - - // Configure kernel window - Window win; - win.set(0, Window::Dimension(0, radix * _GROUPS * _ITEMS / 2, 1)); - ICLKernel::configure_internal(win); -} - -void CLRadixSortPasteHistogram::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - const unsigned int gws_x = (window.x().end() - window.x().start()) / window.x().step(); - cl::NDRange lws = cl::NDRange(gws_x / _HISTOSPLIT, 1); - - enqueue(queue, *this, window, lws); -} - -//////////////////////////////////////////////////////////////////////////////// -CLRadixSortReorder::CLRadixSortReorder() - : _pass(0), _in_key_buf(nullptr), _out_key_buf(nullptr), _in_ind_buf(nullptr), - _out_ind_buf(nullptr) -{ -} - -void CLRadixSortReorder::configure(cl::Buffer *hist_buf, int bits, int n) -{ - ARM_COMPUTE_ERROR_ON(hist_buf == nullptr); - ARM_COMPUTE_ERROR_ON(n == 0); - - unsigned int radix = 1 << bits; - // Set kernel build options - std::set<std::string> build_opts; - build_opts.emplace("-D_BITS=" + support::cpp11::to_string(bits)); - build_opts.emplace("-D_RADIX=" + support::cpp11::to_string(radix)); - build_opts.emplace("-DPERMUT=1"); - - // Create kernel - _kernel = static_cast<cl::Kernel>( - CLKernelLibraryEx::get().create_kernel("radixsort_reorder", build_opts)); - - unsigned int idx = 2; - _kernel.setArg(idx++, *hist_buf); - - idx = 6; - _kernel.setArg(idx++, sizeof(uint) * radix * _ITEMS, nullptr); - _kernel.setArg<cl_int>(idx++, n); - - // Configure kernel window - Window win; - win.set(0, Window::Dimension(0, _GROUPS * _ITEMS, 1)); - ICLKernel::configure_internal(win); -} - -void CLRadixSortReorder::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - const unsigned int gws_x = (window.x().end() - window.x().start()) / window.x().step(); - unsigned int lx = std::max(1U, (gws_x / _HISTOSPLIT)); - cl::NDRange lws = (lx < gws_x) ? cl::NDRange(lx, 1) : cl::NDRange(1, 1); - - _kernel.setArg(0, *_in_key_buf); - _kernel.setArg(1, *_out_key_buf); - _kernel.setArg<cl_int>(3, _pass); - _kernel.setArg(4, *_in_ind_buf); - _kernel.setArg(5, *_out_ind_buf); - - enqueue(queue, *this, window, lws); -} - -//////////////////////////////////////////////////////////////////////////////// -CLTopKV2FindFirstNegative::CLTopKV2FindFirstNegative() : _out_key_buf(nullptr) {} - -void CLTopKV2FindFirstNegative::configure(cl::Buffer *first_negative_idx_buf, int n) -{ - ARM_COMPUTE_ERROR_ON(first_negative_idx_buf == nullptr); - ARM_COMPUTE_ERROR_ON(n == 0); - - // Set kernel build options - std::set<std::string> build_opts; - - // Create kernel - _kernel = static_cast<cl::Kernel>( - CLKernelLibraryEx::get().create_kernel("topkv2_find_first_negative", build_opts)); - - unsigned int idx = 1; - _kernel.setArg(idx++, *first_negative_idx_buf); - _kernel.setArg<cl_int>(idx++, n); - - // Configure kernel window - Window win; - win.set(0, Window::Dimension(0, n, 1)); - ICLKernel::configure_internal(win); -} - -void CLTopKV2FindFirstNegative::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - unsigned int idx = 0; - _kernel.setArg(idx++, *_out_key_buf); - - enqueue(queue, *this, window); -} - -//////////////////////////////////////////////////////////////////////////////// -CLTopKV2ReorderNegatives::CLTopKV2ReorderNegatives() - : _in_key_buf(nullptr), _out_key_buf(nullptr), _in_ind_buf(nullptr), _out_ind_buf(nullptr) -{ -} - -void CLTopKV2ReorderNegatives::configure(cl::Buffer *first_negative_idx_buf, int n) -{ - ARM_COMPUTE_ERROR_ON(first_negative_idx_buf == nullptr); - ARM_COMPUTE_ERROR_ON(n == 0); - - // Set kernel build options - std::set<std::string> build_opts; - - // Create kernel - _kernel = static_cast<cl::Kernel>( - CLKernelLibraryEx::get().create_kernel("topkv2_reorder_negatives", build_opts)); - - unsigned int idx = 4; - _kernel.setArg(idx++, *first_negative_idx_buf); - _kernel.setArg<cl_int>(idx++, n); - - // Configure kernel window - Window win; - win.set(0, Window::Dimension(0, n, 1)); - ICLKernel::configure_internal(win); -} - -void CLTopKV2ReorderNegatives::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - unsigned int idx = 0; - _kernel.setArg(idx++, *_in_key_buf); - _kernel.setArg(idx++, *_out_key_buf); - _kernel.setArg(idx++, *_in_ind_buf); - _kernel.setArg(idx++, *_out_ind_buf); - - enqueue(queue, *this, window); -} - -//////////////////////////////////////////////////////////////////////////////// -CLTopKV2Store::CLTopKV2Store() - : _values(nullptr), _indices(nullptr), _out_key_buf(nullptr), _out_ind_buf(nullptr) -{ -} - -void CLTopKV2Store::configure(ICLTensor *values, ICLTensor *indices, int k, int n) -{ - ARM_COMPUTE_ERROR_ON(values == nullptr && indices == nullptr); - ARM_COMPUTE_ERROR_ON(k == 0); - ARM_COMPUTE_ERROR_ON(k > n); - - _values = values; - _indices = indices; - - // Set kernel build options - std::set<std::string> build_opts; - - // Create kernel - _kernel = - static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel("topkv2_store", build_opts)); - - unsigned int idx = 2 * num_arguments_per_1D_tensor() + 2; - _kernel.setArg<cl_int>(idx++, n); - - // Configure kernel window - Window win; - win.set(0, Window::Dimension(0, k, 1)); - ICLKernel::configure_internal(win); -} - -void CLTopKV2Store::setOutputBuffers(cl::Buffer *out_key_buf, cl::Buffer *out_ind_buf) -{ - _out_key_buf = out_key_buf; - _out_ind_buf = out_ind_buf; -} - -void CLTopKV2Store::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - unsigned int idx = 0; - add_1D_tensor_argument(idx, _values, window); - add_1D_tensor_argument(idx, _indices, window); - _kernel.setArg(idx++, *_out_key_buf); - _kernel.setArg(idx++, *_out_ind_buf); - - enqueue(queue, *this, window); -} - -} // namespace arm_compute diff --git a/libs/ARMComputeEx/src/core/NEON/kernels/NENormalizationLayerExKernel.cpp b/libs/ARMComputeEx/src/core/NEON/kernels/NENormalizationLayerExKernel.cpp deleted file mode 100644 index 3b5782c25..000000000 --- a/libs/ARMComputeEx/src/core/NEON/kernels/NENormalizationLayerExKernel.cpp +++ /dev/null @@ -1,294 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/core/NEON/kernels/NENormalizationLayerExKernel.h" - -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/NEON/NEMath.h" - -using namespace arm_compute; - -namespace -{ -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *input_squared, - const ITensorInfo *output, const NormalizationLayerInfo &norm_info) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, input_squared, output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); - - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, input_squared); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, input_squared); - - // Checks performed when output is configured - if (output->total_size() != 0) - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); - } - - return Status{}; -} - -std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, - ITensorInfo *input_squared, - ITensorInfo *output, - const NormalizationLayerInfo &norm_info) -{ - unsigned int num_elems_processed_per_iteration = 16 / input->element_size(); - const unsigned int num_elems_read_per_iteration = - num_elems_processed_per_iteration + 2 * (norm_info.norm_size() / 2); - const unsigned int num_rows = - (norm_info.type() == NormType::IN_MAP_2D) ? norm_info.norm_size() : 1; - const unsigned int border_width = - (norm_info.is_cross_map()) ? 0 : std::min<unsigned int>(norm_info.norm_size() / 2, 3U); - BorderSize border_size = BorderSize(0, border_width); - bool window_changed = false; - - // Configure window - Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration)); - - AccessWindowRectangle input_access(input, -border_size.left, 0, num_elems_read_per_iteration, - num_rows); - AccessWindowRectangle input_squared_access(input_squared, -border_size.left, 0, - num_elems_read_per_iteration, num_rows); - - if (output->total_size() != 0) - { - AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration); - window_changed = - update_window_and_padding(win, input_access, input_squared_access, output_access); - output_access.set_valid_region(win, input->valid_region()); - } - else - { - window_changed = update_window_and_padding(win, input_access, input_squared_access); - } - - Status err = (window_changed) - ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") - : Status{}; - return std::make_pair(err, win); -} -} // namespace - -NENormalizationLayerExKernel::NENormalizationLayerExKernel() - : _func(nullptr), _input(nullptr), _input_squared(nullptr), _output(nullptr), - _norm_info(NormType::IN_MAP_1D), _border_size() -{ -} - -BorderSize NENormalizationLayerExKernel::border_size() const { return _border_size; } - -void NENormalizationLayerExKernel::configure(const ITensor *input, const ITensor *input_squared, - ITensor *output, NormalizationLayerInfo norm_info) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, input_squared, output); - // Output tensor auto initialization if not yet initialized - auto_init_if_empty(*output->info(), *input->info()); - - // Perform validation step - ARM_COMPUTE_ERROR_THROW_ON( - validate_arguments(input->info(), input_squared->info(), output->info(), norm_info)); - - const unsigned int border_width = - (norm_info.is_cross_map()) ? 0 : std::min<unsigned int>(norm_info.norm_size() / 2, 3U); - - _input = input; - _input_squared = input_squared; - _output = output; - _norm_info = norm_info; - _border_size = BorderSize(0, border_width); - - switch (_input->info()->data_type()) - { - case DataType::F32: - { - switch (norm_info.type()) - { - case NormType::IN_MAP_1D: - _func = &NENormalizationLayerExKernel::normalize_float<DataType::F32, 0, false>; - break; - case NormType::IN_MAP_2D: - // Normalize over X and Y - _func = &NENormalizationLayerExKernel::normalize_float<DataType::F32, 0, true>; - break; - case NormType::CROSS_MAP: - _func = &NENormalizationLayerExKernel::normalize_float<DataType::F32, 2, false>; - break; - default: - break; - } - break; - } - case DataType::F16: - { - switch (norm_info.type()) - { - case NormType::IN_MAP_1D: - _func = &NENormalizationLayerExKernel::normalize_float<DataType::F16, 0, false>; - break; - case NormType::IN_MAP_2D: - // Normalize over X and Y - _func = &NENormalizationLayerExKernel::normalize_float<DataType::F16, 0, true>; - break; - case NormType::CROSS_MAP: - _func = &NENormalizationLayerExKernel::normalize_float<DataType::F16, 2, false>; - break; - default: - break; - } - break; - } - default: - ARM_COMPUTE_ERROR("NOT SUPPORTED!"); - } - - // Configure kernel window - auto win_config = validate_and_configure_window(input->info(), input_squared->info(), - output->info(), norm_info); - ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - INEKernel::configure(win_config.second); -} - -template <DataType dt, unsigned int dim, bool do_2D_norm> -void NENormalizationLayerExKernel::normalize_float(const Window &window) -{ - Iterator input(_input, window); - Iterator input_squared(_input_squared, window); - Iterator output(_output, window); - - const int dim_y = 1; - const int radius = _norm_info.norm_size(); - const int total_size = _input->info()->dimension(dim) - 1; - const int input_squared_stride = _input_squared->info()->strides_in_bytes()[dim]; - // We account padding across X only and we iterate over rows - const int min_left = (dim == 2) ? 0 : -static_cast<int>(border_size().left); - const int max_right = (dim == 2) ? total_size : total_size + border_size().left; - const int min_top = 0; - const int max_bottom = _input->info()->dimension(dim_y) - 1; - - if (dt == DataType::F32) - { - const float32x4_t coeff_vec = vdupq_n_f32(_norm_info.scale_coeff()); - const float32x4_t beta_vec = vdupq_n_f32(_norm_info.beta()); - const float32x4_t kappa_vec = vdupq_n_f32(_norm_info.kappa()); - - execute_window_loop( - window, - [&](const Coordinates &id) { - // Get range to normalize - const int current_row = do_2D_norm ? id[dim_y] : 0; - const int current_slice = id[dim]; - const int first_row = do_2D_norm ? std::max(current_row - radius, min_top) : 0; - const int last_row = do_2D_norm ? std::min(current_row + radius, max_bottom) : 0; - const int first_slice = std::max(current_slice - radius, min_left); - const int last_slice = std::min(current_slice + radius, max_right); - - // Accumulate 2D In-Map values - float32x4_t accu = vdupq_n_f32(0.f); - for (int j = first_row; j <= last_row; j++) - { - // Compute row displacement - const int row = (j - current_row) * _input_squared->info()->strides_in_bytes()[dim_y]; - const uint8_t *const input_squared_ptr = - input_squared.ptr() + row - (current_slice * input_squared_stride); - for (int i = first_slice; i <= last_slice; ++i) - { - accu = vaddq_f32(accu, vld1q_f32(reinterpret_cast<const float *>( - input_squared_ptr + i * input_squared_stride))); - } - } - - // Normalize - const float32x4_t normalized = vpowq_f32(vmlaq_f32(kappa_vec, coeff_vec, accu), beta_vec); - const float32x4_t normalized_pixel = vmulq_f32( - vld1q_f32(reinterpret_cast<const float *>(input.ptr())), vinvq_f32(normalized)); - vst1q_f32(reinterpret_cast<float *>(output.ptr()), normalized_pixel); - }, - input, input_squared, output); - } -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - else if (dt == DataType::F16) - { - const float16x8_t coeff_vec = vdupq_n_f16(_norm_info.scale_coeff()); - const float16x8_t beta_vec_f16 = vdupq_n_f16(_norm_info.beta()); - const float16x8_t kappa_vec = vdupq_n_f16(_norm_info.kappa()); - - execute_window_loop( - window, - [&](const Coordinates &id) { - // Get range to normalize - const int current_row = do_2D_norm ? id[dim_y] : 0; - const int current_slice = id[dim]; - const int first_row = do_2D_norm ? std::max(current_row - radius, min_top) : 0; - const int last_row = do_2D_norm ? std::min(current_row + radius, max_bottom) : 0; - const int first_slice = std::max(current_slice - radius, min_left); - const int last_slice = std::min(current_slice + radius, max_right); - - // Accumulate 2D In-Map values - float16x8_t accu = vdupq_n_f16(0.f); - for (int j = first_row; j <= last_row; j++) - { - // Compute row displacement - const int row = (j - current_row) * _input_squared->info()->strides_in_bytes()[dim_y]; - const uint8_t *const input_squared_ptr = - input_squared.ptr() + row - (current_slice * input_squared_stride); - for (int i = first_slice; i <= last_slice; ++i) - { - accu = vaddq_f16(accu, vld1q_f16(reinterpret_cast<const float16_t *>( - input_squared_ptr + i * input_squared_stride))); - } - } - - const float16x8_t norm_f16 = - vpowq_f16(vaddq_f16(kappa_vec, vmulq_f16(coeff_vec, accu)), beta_vec_f16); - const float16x8_t normalized_pixel = vmulq_f16( - vld1q_f16(reinterpret_cast<const float16_t *>(input.ptr())), vinvq_f16(norm_f16)); - vst1q_f16(reinterpret_cast<float16_t *>(output.ptr()), normalized_pixel); - }, - input, input_squared, output); - } -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ - else - { - ARM_COMPUTE_ERROR("Not supported"); - } -} - -Status NENormalizationLayerExKernel::validate(const ITensorInfo *input, - const ITensorInfo *input_squared, - const ITensorInfo *output, - const NormalizationLayerInfo norm_info) -{ - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, input_squared, output, norm_info)); - ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), - input_squared->clone().get(), - output->clone().get(), norm_info) - .first); - - return Status{}; -} - -void NENormalizationLayerExKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - ARM_COMPUTE_ERROR_ON(_func == nullptr); - - // Run function - (this->*_func)(window); -} diff --git a/libs/ARMComputeEx/src/core/UtilsEx.cpp b/libs/ARMComputeEx/src/core/UtilsEx.cpp deleted file mode 100644 index b63093bbb..000000000 --- a/libs/ARMComputeEx/src/core/UtilsEx.cpp +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/core/UtilsEx.h" - -#include <cstdint> -#include <fstream> -#include <map> -#include <string> - -using namespace arm_compute; - -const std::string & -arm_compute::string_from_activation_func_ex(ActivationLayerInfoEx::ActivationFunction act) -{ - static std::map<ActivationLayerInfoEx::ActivationFunction, const std::string> act_map = { - {ActivationLayerInfoEx::ActivationFunction::RSQRT, "RSQRT"}, - }; - - return act_map[act]; -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLActivationLayerEx.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLActivationLayerEx.cpp deleted file mode 100644 index 1e52fc429..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLActivationLayerEx.cpp +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLActivationLayerEx.h" - -#include "arm_compute/core/CL/kernels/CLActivationLayerExKernel.h" - -using namespace arm_compute; - -void CLActivationLayerEx::configure(ICLTensor *input, ICLTensor *output, - ActivationLayerInfoEx act_info) -{ - auto k = arm_compute::support::cpp14::make_unique<CLActivationLayerExKernel>(); - k->configure(input, output, act_info); - _kernel = std::move(k); -} - -Status CLActivationLayerEx::validate(const ITensorInfo *input, const ITensorInfo *output, - const ActivationLayerInfoEx &act_info) -{ - return CLActivationLayerExKernel::validate(input, output, act_info); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLArgMinMax.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLArgMinMax.cpp deleted file mode 100644 index dff743e89..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLArgMinMax.cpp +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLArgMinMax.h" - -#include "arm_compute/core/CL/kernels/CLArgMinMaxKernel.h" -#include "arm_compute/runtime/CL/CLScheduler.h" - -namespace arm_compute -{ - -CLArgMinMax::CLArgMinMax() - : _input(nullptr), _output(nullptr), _argminmax_axis(), _interm_tensors(), _argminmax_kernels(), - _num_of_kernels() -{ -} - -void CLArgMinMax::configure(ICLTensor *input, ICLTensor *output, std::vector<uint32_t> axis, - ArgOperation op) -{ - ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), output->info(), axis, op)); - _input = input; - _output = output; - _argminmax_axis = axis; - _arg_op = op; - // NOTE The argminmax_axis must have no duplication. - _num_of_kernels = axis.size(); - const size_t num_of_interm_tensors = _num_of_kernels - 1; - - _interm_tensors = arm_compute::support::cpp14::make_unique<CLTensor[]>(num_of_interm_tensors); - _argminmax_kernels = - arm_compute::support::cpp14::make_unique<CLArgMinMaxKernel[]>(_num_of_kernels); - - TensorShape shape{input->info()->tensor_shape()}; - for (size_t i = 0; i < num_of_interm_tensors; i++) - { - shape.set(_argminmax_axis[i], 1); - _interm_tensors[i].allocator()->init( - TensorInfo(shape, input->info()->num_channels(), input->info()->data_type())); - _interm_tensors[i].allocator()->allocate(); - } - - // Set a vector that is ordered ICLTensors sequentially. - std::vector<ICLTensor *> tensors; - tensors.emplace_back(input); - for (size_t i = 0; i < num_of_interm_tensors; i++) - { - tensors.emplace_back(_interm_tensors.get() + i); - } - tensors.emplace_back(output); - - // Apply ArgMinMax on all kernels - for (size_t i = 0; i < _num_of_kernels; i++) - { - _argminmax_kernels[i].configure(tensors[i], tensors[i + 1], _argminmax_axis[i], op); - } -} - -Status CLArgMinMax::validate(const ITensorInfo *input, const std::vector<uint32_t> &argminmax_axis, - const ITensorInfo *output, ArgOperation op) -{ - const size_t num_of_kernels = argminmax_axis.size(); - const size_t num_of_interm_tensors = num_of_kernels - 1; - - // Create temporary tensor infos - auto interm_tensors = - arm_compute::support::cpp14::make_unique<TensorInfo[]>(num_of_interm_tensors); - - // Create intermediate tensor info - TensorShape shape{input->tensor_shape()}; - - for (size_t i = 0; i < num_of_interm_tensors; i++) - { - shape.set(argminmax_axis[i], 1); - interm_tensors[i].set_data_type(input->data_type()); - interm_tensors[i].set_tensor_shape(shape); - interm_tensors[i].set_num_channels(input->num_channels()); - } - - // Set a vector that is ordered ITensorInfo sequentially. - std::vector<const ITensorInfo *> tensors; - tensors.emplace_back(input); - for (size_t i = 0; i < num_of_interm_tensors; i++) - { - tensors.emplace_back(interm_tensors.get() + i); - } - tensors.emplace_back(output); - - // Validate argminmax only on all kernels - for (size_t i = 0; i < num_of_kernels; i++) - { - ARM_COMPUTE_RETURN_ON_ERROR( - CLArgMinMaxKernel::validate(tensors[i], tensors[i + 1], argminmax_axis[i], op)); - } - - return Status{}; -} - -void CLArgMinMax::run() -{ - for (size_t i = 0; i < _num_of_kernels; ++i) - { - CLScheduler::get().enqueue(_argminmax_kernels[i]); - } -} - -} // namespace arm_compute diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLArithmeticSubtractionEx.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLArithmeticSubtractionEx.cpp deleted file mode 100644 index 3f403c80a..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLArithmeticSubtractionEx.cpp +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLArithmeticSubtractionEx.h" - -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/kernels/CLArithmeticSubtractionExKernel.h" - -using namespace arm_compute; - -void CLArithmeticSubtractionEx::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, - ConvertPolicy policy) -{ - auto k = arm_compute::support::cpp14::make_unique<CLArithmeticSubtractionExKernel>(); - k->configure(input1, input2, output, policy); - _kernel = std::move(k); - - if (output->info()->dimension(0) > 1) - { - ICLTensor *broadcasted_info = (input1->info()->dimension(0) == 1) ? input1 : input2; - - if (broadcasted_info->info()->dimension(0) == 1) - { - _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE); - } - } -} - -Status CLArithmeticSubtractionEx::validate(const ITensorInfo *input1, const ITensorInfo *input2, - const ITensorInfo *output, ConvertPolicy policy) -{ - return CLArithmeticSubtractionExKernel::validate(input1, input2, output, policy); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLBatchToSpaceND.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLBatchToSpaceND.cpp deleted file mode 100644 index 26e3798cc..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLBatchToSpaceND.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLBatchToSpaceND.h" - -#include "arm_compute/core/CL/kernels/CLBatchToSpaceNDKernel.h" - -using namespace arm_compute; - -void CLBatchToSpaceND::configure(ICLTensor *input, ICLTensor *output, const int32_t *block_size) -{ - auto k = arm_compute::support::cpp14::make_unique<CLBatchToSpaceNDKernel>(); - k->configure(input, output, block_size); - _kernel = std::move(k); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp deleted file mode 100644 index 7c5fe5eda..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h" - -#include "arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h" -#include "arm_compute/core/CL/ICLTensor.h" - -using namespace arm_compute; - -void CLBinaryLogicalOp::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, - BinaryLogicalOperation op) -{ - auto k = arm_compute::support::cpp14::make_unique<CLBinaryLogicalOpKernel>(); - k->configure(input1, input2, output, op); - _kernel = std::move(k); - - if (output->info()->dimension(0) > 1) - { - ICLTensor *broadcasted_info = (input1->info()->dimension(0) == 1) ? input1 : input2; - if (broadcasted_info->info()->dimension(0) == 1) - { - _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE); - } - } -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLCast.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLCast.cpp deleted file mode 100644 index 8e106737c..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLCast.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLCast.h" - -#include "arm_compute/core/CL/kernels/CLCastKernel.h" - -using namespace arm_compute; - -void CLCast::configure(ICLTensor *input, ICLTensor *output) -{ - auto k = arm_compute::support::cpp14::make_unique<CLCastKernel>(); - k->configure(input, output); - _kernel = std::move(k); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLComparisonOp.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLComparisonOp.cpp deleted file mode 100644 index f6a745a25..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLComparisonOp.cpp +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLComparisonOp.h" - -#include "arm_compute/core/CL/kernels/CLComparisonOpKernel.h" -#include "arm_compute/core/CL/ICLTensor.h" - -using namespace arm_compute; - -void CLComparisonOp::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, - const ComparisonOperation &op) -{ - auto k = arm_compute::support::cpp14::make_unique<CLComparisonOpKernel>(); - k->configure(input1, input2, output, op); - _kernel = std::move(k); - - if (output->info()->dimension(0) > 1) - { - ICLTensor *broadcasted_info = (input1->info()->dimension(0) == 1) ? input1 : input2; - - if (broadcasted_info->info()->dimension(0) == 1) - { - _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE); - } - } -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLDepthToSpace.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLDepthToSpace.cpp deleted file mode 100644 index c2e4ca9ff..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLDepthToSpace.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLDepthToSpace.h" - -#include "arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h" - -using namespace arm_compute; - -void CLDepthToSpace::configure(ICLTensor *input, ICLTensor *output, const int32_t block_size) -{ - auto k = arm_compute::support::cpp14::make_unique<CLDepthToSpaceKernel>(); - k->configure(input, output, block_size); - _kernel = std::move(k); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp deleted file mode 100644 index 2781784ca..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLEmbeddingLookup.h" - -#include "arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h" - -using namespace arm_compute; - -void CLEmbeddingLookup::configure(const ICLTensor *input, ICLTensor *output, - const ICLTensor *lookups) -{ - auto k = arm_compute::support::cpp14::make_unique<CLEmbeddingLookupKernel>(); - k->configure(input, output, lookups); - _kernel = std::move(k); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLExp.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLExp.cpp deleted file mode 100644 index 411fa8700..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLExp.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLExp.h" - -#include "arm_compute/core/CL/kernels/CLExpKernel.h" - -using namespace arm_compute; - -void CLExp::configure(const ICLTensor *input, ICLTensor *output) -{ - auto k = arm_compute::support::cpp14::make_unique<CLExpKernel>(); - k->configure(input, output); - _kernel = std::move(k); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLGather.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLGather.cpp deleted file mode 100644 index fb056fe45..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLGather.cpp +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLGather.h" - -#include "arm_compute/core/CL/kernels/CLGatherKernel.h" - -using namespace arm_compute; - -void CLGather::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output) -{ - auto k = arm_compute::support::cpp14::make_unique<CLGatherKernel>(); - k->configure(input1, input2, output); - _kernel = std::move(k); -} - -Status CLGather::validate(const ITensorInfo *input1, const ITensorInfo *input2, - const ITensorInfo *output) -{ - return CLGatherKernel::validate(input1, input2, output); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp deleted file mode 100644 index 7180e9356..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLHashtableLookup.h" - -#include "arm_compute/core/CL/kernels/CLHashtableLookupKernel.h" - -using namespace arm_compute; - -void CLHashtableLookup::configure(const ICLTensor *lookups, const ICLTensor *keys, - const ICLTensor *input, ICLTensor *output, ICLTensor *hits) -{ - auto k = arm_compute::support::cpp14::make_unique<CLHashtableLookupKernel>(); - k->configure(lookups, keys, input, output, hits); - _kernel = std::move(k); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp deleted file mode 100644 index be35ea732..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLNeg.h" - -#include "arm_compute/core/CL/kernels/CLNegKernel.h" - -using namespace arm_compute; - -void CLNeg::configure(ICLTensor *input, ICLTensor *output) -{ - auto k = arm_compute::support::cpp14::make_unique<CLNegKernel>(); - k->configure(input, output); - _kernel = std::move(k); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLNormalizationLayerEx.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLNormalizationLayerEx.cpp deleted file mode 100644 index 276c4557a..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLNormalizationLayerEx.cpp +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLNormalizationLayerEx.h" - -#include "arm_compute/runtime/CL/CLScheduler.h" - -using namespace arm_compute; - -CLNormalizationLayerEx::CLNormalizationLayerEx() : _norm_kernel(), _border_handler() {} - -void CLNormalizationLayerEx::configure(ICLTensor *input, ICLTensor *output, - const NormalizationLayerInfo &norm_info) -{ - ARM_COMPUTE_ERROR_ON(input == nullptr); - - // Configure normalization kernel - _norm_kernel.configure(input, output, norm_info); - - // Fill the border by 3 elements since we need vload4 in the IN_MAP normalization kernel - _border_handler.configure(input, _norm_kernel.border_size(), BorderMode::CONSTANT, PixelValue(0)); -} - -Status CLNormalizationLayerEx::validate(const ITensorInfo *input, const ITensorInfo *output, - const NormalizationLayerInfo &norm_info) -{ - return CLNormalizationLayerExKernel::validate(input, output, norm_info); -} - -void CLNormalizationLayerEx::run() -{ - // Run border handler - CLScheduler::get().enqueue(_border_handler, false); - - // Run normalization kernel - CLScheduler::get().enqueue(_norm_kernel); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLPReLU.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLPReLU.cpp deleted file mode 100644 index 38adedd10..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLPReLU.cpp +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLPReLU.h" - -#include "arm_compute/core/CL/kernels/CLPReLUKernel.h" -#include "arm_compute/core/CL/ICLTensor.h" - -using namespace arm_compute; - -void CLPReLU::configure(ICLTensor *input, ICLTensor *alpha, ICLTensor *output) -{ - auto k = arm_compute::support::cpp14::make_unique<CLPReLUKernel>(); - k->configure(input, alpha, output); - _kernel = std::move(k); - - if (output->info()->dimension(0) > 1) - { - ICLTensor *broadcasted_info = (input->info()->dimension(0) == 1) ? input : alpha; - - if (broadcasted_info->info()->dimension(0) == 1) - { - _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE); - } - } -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLPadLayerEx.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLPadLayerEx.cpp deleted file mode 100644 index 5265b6c34..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLPadLayerEx.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/* -* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved -* Copyright (c) 2016-2018 ARM Limited. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ -#include "arm_compute/runtime/CL/functions/CLPadLayerEx.h" - -#include "arm_compute/core/CL/kernels/CLPadLayerKernel.h" - -using namespace arm_compute; - -void CLPadLayerEx::configure(ICLTensor *input, ICLTensor *output, ICLTensor *pad_size) -{ - auto k = arm_compute::support::cpp14::make_unique<CLPadLayerKernel>(); - k->configure(input, output, pad_size); - _kernel = std::move(k); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLPermuteEx.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLPermuteEx.cpp deleted file mode 100644 index fb363270d..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLPermuteEx.cpp +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLPermuteEx.h" - -#include "arm_compute/core/CL/kernels/CLPermuteExKernel.h" - -using namespace arm_compute; - -void CLPermuteEx::configure(const ICLTensor *input, ICLTensor *output, - const PermutationVector &perm) -{ - auto k = arm_compute::support::cpp14::make_unique<CLPermuteExKernel>(); - k->configure(input, output, perm); - _kernel = std::move(k); -} - -Status CLPermuteEx::validate(const ITensorInfo *input, const ITensorInfo *output, - const PermutationVector &perm) -{ - ARM_COMPUTE_RETURN_ON_ERROR(CLPermuteExKernel::validate(input, output, perm)); - return Status{}; -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLPixelWiseDivision.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLPixelWiseDivision.cpp deleted file mode 100644 index dc0baa8dd..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLPixelWiseDivision.cpp +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLPixelWiseDivision.h" - -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/kernels/CLPixelWiseDivisionKernel.h" - -using namespace arm_compute; - -void CLPixelWiseDivision::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, - float scale, ConvertPolicy overflow_policy, - RoundingPolicy rounding_policy) -{ - auto k = arm_compute::support::cpp14::make_unique<CLPixelWiseDivisionKernel>(); - k->configure(input1, input2, output, scale, overflow_policy, rounding_policy); - _kernel = std::move(k); - - if (output->info()->dimension(0) > 1) - { - ICLTensor *broadcasted_info = (input1->info()->dimension(0) == 1) ? input1 : input2; - - if (broadcasted_info->info()->dimension(0) == 1) - { - _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE); - } - } -} - -Status CLPixelWiseDivision::validate(const ITensorInfo *input1, const ITensorInfo *input2, - const ITensorInfo *output, float scale, - ConvertPolicy overflow_policy, RoundingPolicy rounding_policy) -{ - return CLPixelWiseDivisionKernel::validate(input1, input2, output, scale, overflow_policy, - rounding_policy); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp deleted file mode 100644 index 2b8d82706..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLReduceOperation.h" - -#include "arm_compute/core/CL/kernels/CLReduceOperationKernel.h" -#include "arm_compute/runtime/CL/CLScheduler.h" - -using namespace arm_compute; - -CLReduceOperation::CLReduceOperation() - : _input(nullptr), _output(nullptr), _axis(), _interm_tensors(), _reduce_kernels() -{ -} - -Status CLReduceOperation::validate(const ITensorInfo *input, const ITensorInfo *output, - const std::set<uint32_t> &axis, const ReduceOperation &op) -{ - const size_t num_of_kernels = axis.size(); - const size_t num_of_interm_tensors = num_of_kernels - 1; - - // Create temporary tensor infos - auto interm_tensors = - arm_compute::support::cpp14::make_unique<TensorInfo[]>(num_of_interm_tensors); - - // Create intermediate tensor info - TensorShape shape{input->tensor_shape()}; - - auto it = axis.begin(); - for (size_t i = 0; i < num_of_interm_tensors; ++i, ++it) - { - shape.set(*it, 1); - interm_tensors[i].set_data_type(input->data_type()); - interm_tensors[i].set_tensor_shape(shape); - interm_tensors[i].set_num_channels(input->num_channels()); - } - - // Set a vector that is ordered ITensorInfo sequentially. - std::vector<const ITensorInfo *> tensors; - tensors.emplace_back(input); - for (size_t i = 0; i < num_of_interm_tensors; ++i) - { - tensors.emplace_back(interm_tensors.get() + i); - } - tensors.emplace_back(output); - - // Validate ReduceOperation only on all kernels - it = axis.begin(); - for (size_t i = 0; i < num_of_kernels; ++i, ++it) - { - ARM_COMPUTE_RETURN_ON_ERROR( - CLReduceOperationKernel::validate(tensors[i], tensors[i + 1], *it, op)); - } - - return Status{}; -} - -void CLReduceOperation::configure(ICLTensor *input, ICLTensor *output, - const std::set<uint32_t> &axis, ReduceOperation op) -{ - ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), output->info(), axis, op)); - - _axis = axis; - - _input = input; - _output = output; - - // NOTE The axis must have no duplication. - const size_t num_of_kernels = axis.size(); - const size_t num_of_interm_tensors = num_of_kernels - 1; - - _interm_tensors = arm_compute::support::cpp14::make_unique<CLTensor[]>(num_of_interm_tensors); - _reduce_kernels = - arm_compute::support::cpp14::make_unique<CLReduceOperationKernel[]>(num_of_kernels); - - TensorShape shape{input->info()->tensor_shape()}; - auto it = axis.begin(); - for (size_t i = 0; i < num_of_interm_tensors; ++i, ++it) - { - shape.set(*it, 1); - _interm_tensors[i].allocator()->init( - TensorInfo(shape, input->info()->num_channels(), input->info()->data_type())); - _interm_tensors[i].allocator()->allocate(); - } - - // Set a vector that is ordered ICLTensors sequentially. - std::vector<ICLTensor *> tensors; - tensors.emplace_back(input); - for (size_t i = 0; i < num_of_interm_tensors; ++i) - { - tensors.emplace_back(_interm_tensors.get() + i); - } - tensors.emplace_back(output); - - // Apply ReduceOperation on all kernels - it = axis.begin(); - for (size_t i = 0; i < num_of_kernels; ++i, ++it) - { - _reduce_kernels[i].configure(tensors[i], tensors[i + 1], *it, op); - } -} - -void CLReduceOperation::run() -{ - const size_t num_of_kernels = _axis.size(); - for (size_t i = 0; i < num_of_kernels; ++i) - { - CLScheduler::get().enqueue(_reduce_kernels[i]); - } -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLSpaceToBatchND.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLSpaceToBatchND.cpp deleted file mode 100644 index c03826891..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLSpaceToBatchND.cpp +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLSpaceToBatchND.h" - -#include "arm_compute/core/CL/kernels/CLSpaceToBatchNDKernel.h" - -using namespace arm_compute; - -void CLSpaceToBatchND::configure(const ICLTensor *input, const ICLTensor *block_size, - const ICLTensor *padding_size, ICLTensor *output) -{ - auto k = arm_compute::support::cpp14::make_unique<CLSpaceToBatchNDKernel>(); - k->configure(input, block_size, padding_size, output); - _kernel = std::move(k); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLSpaceToDepth.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLSpaceToDepth.cpp deleted file mode 100644 index 0f455f96f..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLSpaceToDepth.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLSpaceToDepth.h" - -#include "arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h" - -using namespace arm_compute; - -void CLSpaceToDepth::configure(ICLTensor *input, ICLTensor *output, const int32_t block_size) -{ - auto k = arm_compute::support::cpp14::make_unique<CLSpaceToDepthKernel>(); - k->configure(input, output, block_size); - _kernel = std::move(k); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLSquaredDifference.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLSquaredDifference.cpp deleted file mode 100644 index dc6e4af44..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLSquaredDifference.cpp +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLSquaredDifference.h" - -#include "arm_compute/core/CL/kernels/CLSquaredDifferenceKernel.h" -#include "arm_compute/core/CL/ICLTensor.h" - -using namespace arm_compute; - -void CLSquaredDifference::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output) -{ - auto k = arm_compute::support::cpp14::make_unique<CLSquaredDifferenceKernel>(); - k->configure(input1, input2, output); - _kernel = std::move(k); - - if (output->info()->dimension(0) > 1) - { - ICLTensor *broadcasted_info = (input1->info()->dimension(0) == 1) ? input1 : input2; - - if (broadcasted_info->info()->dimension(0) == 1) - { - _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE); - } - } -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLStridedSliceEx.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLStridedSliceEx.cpp deleted file mode 100644 index be7353493..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLStridedSliceEx.cpp +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLStridedSliceEx.h" - -#include "arm_compute/core/CL/kernels/CLStridedSliceExKernel.h" - -using namespace arm_compute; - -void CLStridedSliceEx::configure(const ICLTensor *input, ICLTensor *output, ICLTensor *beginData, - ICLTensor *endData, ICLTensor *stridesData, int32_t beginMask, - int32_t endMask, int32_t shrinkAxisMask) -{ - auto k = arm_compute::support::cpp14::make_unique<CLStridedSliceExKernel>(); - k->configure(input, output, beginData, endData, stridesData, beginMask, endMask, shrinkAxisMask); - _kernel = std::move(k); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp deleted file mode 100644 index 19177497c..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp +++ /dev/null @@ -1,302 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLTopKV2.h" -#include "arm_compute/runtime/CL/CLScheduler.h" - -#include "arm_compute/core/CL/ICLTensor.h" - -#include "../../topk_v2.h" - -namespace arm_compute -{ - -CLTopKV2::CLTopKV2() - : _k(0), _total_bits(0), _bits(0), _radix(0), _hist_buf_size(0), _glob_sum_buf_size(0), _n(0), - _input(nullptr), _values(nullptr), _indices(nullptr), _qs_idx_buf(), _qs_temp_buf(), - _hist_buf(), _glob_sum_buf(), _temp_buf(), _first_negative_idx_buf(), _in_key_buf(), - _out_key_buf(), _in_ind_buf(), _out_ind_buf(), _p_in_key_buf(nullptr), - _p_out_key_buf(nullptr), _p_in_ind_buf(nullptr), _p_out_ind_buf(nullptr), _qs_kernel(), - _init_kernel(), _hist_kernel(), _scan_hist_kernel(), _glob_scan_hist_kernel(), - _paste_hist_kernel(), _reorder_kernel(), _find_first_negative_kernel(), - _reorder_negatives_kernel(), _store_kernel() -{ -} - -void CLTopKV2::configure(ICLTensor *input, int k, ICLTensor *values, ICLTensor *indices, - int total_bits, int bits) -{ - _total_bits = total_bits; - _bits = bits; - _n = input->info()->tensor_shape()[0]; - - // _total_bits should be divided by _bits. - ARM_COMPUTE_ERROR_ON((_total_bits % _bits) != 0); - - _k = k; - _radix = 1 << bits; - - _input = input; - _values = values; - _indices = indices; - - std::string topk_env; - - char *env = getenv("ACL_TOPKV2"); - if (env) - topk_env = env; - - if (topk_env == "GPU_SINGLE") - { - _qs_idx_buf = cl::Buffer(CLScheduler::get().context(), - CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_int) * _n); - _qs_temp_buf = cl::Buffer(CLScheduler::get().context(), - CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_int) * _n); - - _qs_kernel.configure(input, values, indices, &_qs_idx_buf, &_qs_temp_buf, k, _n); - } - else if (topk_env == "GPU") - { - // n should be divided by (_GROUPS * _ITEMS) - ARM_COMPUTE_ERROR_ON((_n % (_GROUPS * _ITEMS)) != 0); - - _hist_buf_size = _radix * _GROUPS * _ITEMS; - _glob_sum_buf_size = _HISTOSPLIT; - - _hist_buf = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, - sizeof(cl_int) * _hist_buf_size); - _glob_sum_buf = - cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, - sizeof(cl_int) * _glob_sum_buf_size); - _temp_buf = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, - sizeof(cl_int) * _glob_sum_buf_size); - _first_negative_idx_buf = cl::Buffer(CLScheduler::get().context(), - CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_int)); - _in_key_buf = cl::Buffer(CLScheduler::get().context(), - CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_float) * _n); - _out_key_buf = cl::Buffer(CLScheduler::get().context(), - CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_float) * _n); - _in_ind_buf = cl::Buffer(CLScheduler::get().context(), - CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_int) * _n); - _out_ind_buf = cl::Buffer(CLScheduler::get().context(), - CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_int) * _n); - - _p_in_key_buf = &_in_key_buf; - _p_out_key_buf = &_out_key_buf; - _p_in_ind_buf = &_in_ind_buf; - _p_out_ind_buf = &_out_ind_buf; - - _init_kernel.configure(input, _p_in_key_buf, _p_in_ind_buf, _n); - _hist_kernel.configure(&_hist_buf, bits, _n); - _scan_hist_kernel.configure(&_hist_buf, &_glob_sum_buf, bits); - _glob_scan_hist_kernel.configure(&_glob_sum_buf, &_temp_buf, bits); - _paste_hist_kernel.configure(&_hist_buf, &_glob_sum_buf, bits); - _reorder_kernel.configure(&_hist_buf, bits, _n); - _find_first_negative_kernel.configure(&_first_negative_idx_buf, _n); - _reorder_negatives_kernel.configure(&_first_negative_idx_buf, _n); - _store_kernel.configure(values, indices, k, _n); - } - else - { - // DO NOTHING for CPU. - } -} - -void CLTopKV2::run() -{ - std::string topk_env; - - char *env = getenv("ACL_TOPKV2"); - if (env) - topk_env = env; - - if (topk_env == "GPU_SINGLE") - { - run_on_gpu_single_quicksort(); - } - else if (topk_env == "GPU") - { - run_on_gpu(); - } - else - { - run_on_cpu(); - } -} - -void CLTopKV2::run_on_gpu_single_quicksort() -{ - // This is a single threaded quick sort implementation. - CLScheduler::get().enqueue(_qs_kernel, false); - - arm_compute::CLScheduler::get().sync(); -} - -void CLTopKV2::run_on_gpu() -{ - cl::CommandQueue q = CLScheduler::get().queue(); - - // 1. CLTopKV2Init set key buffer and index buffer. - // - Key buffer is set as the same value of the layer's input - // - Values in the index buffer are set as their indices. - CLScheduler::get().enqueue(_init_kernel, false); - - int n_passes = _total_bits / _bits; - - // 2. Repeat (total_bits/bits) times. - // - total_bits is the number of bits of the data type (e.g., 32 for float) - // - bits defines number of buckets (e.g. 16 buckets where bit is 4) - for (int pass = 0; pass < n_passes; ++pass) - { - arm_compute::CLScheduler::get().sync(); - - // 2.1. Calculate histogram with _GROUPS * _ITEMS threads - _hist_kernel.setPass(pass, _p_in_key_buf); - CLScheduler::get().enqueue(_hist_kernel, false); - - // 2.2. Calculate prefix sum locally with multiple threads - CLScheduler::get().enqueue(_scan_hist_kernel, false); - // 2.3. Calculate prefix sum within a work group - CLScheduler::get().enqueue(_glob_scan_hist_kernel, false); - // 2.4. Calculate global prefix sum - CLScheduler::get().enqueue(_paste_hist_kernel, false); - - // 2.5. Reorder keys and indices based on the global prefix sum - _reorder_kernel.setPass(pass, _p_in_key_buf, _p_out_key_buf, _p_in_ind_buf, _p_out_ind_buf); - CLScheduler::get().enqueue(_reorder_kernel, false); - - cl::Buffer *tmp; - // swap key buffers - tmp = _p_in_key_buf; - _p_in_key_buf = _p_out_key_buf; - _p_out_key_buf = tmp; - - // swap index buffers - tmp = _p_in_ind_buf; - _p_in_ind_buf = _p_out_ind_buf; - _p_out_ind_buf = tmp; - } - - // 3. Get the first negative index - // Because we swap in_buf and out_buf at the end of the above for loop, - // the output buffers are in bufs. - _find_first_negative_kernel.setOutputBuffer(_p_in_key_buf); - CLScheduler::get().enqueue(_find_first_negative_kernel, false); - - // 4. Correct odering of negatives - // - Since radix sort does not consider negatives, negatives are considered as bigger values - // than positives. - // reordered data will be stored in _p_out_key_buf and _p_out_ind_buf - _reorder_negatives_kernel.setBuffers(_p_in_key_buf, _p_out_key_buf, _p_in_ind_buf, - _p_out_ind_buf); - CLScheduler::get().enqueue(_reorder_negatives_kernel, false); - - // 5. Extract top k values from sorted keys and indices. - _store_kernel.setOutputBuffers(_p_out_key_buf, _p_out_ind_buf); - CLScheduler::get().enqueue(_store_kernel, false); - - arm_compute::CLScheduler::get().sync(); - -#if 0 - // below code is left for debugging. - int first_neg; - q.enqueueReadBuffer(_first_negative_idx_buf, CL_TRUE, 0, sizeof(cl_int), &first_neg); - std::cout << "first neg = " << first_neg << std::endl; - - float in_key[_n]; - q.enqueueReadBuffer(*_p_in_key_buf, CL_TRUE, 0, sizeof(cl_float)*_n, in_key); - for(uint32_t i = 0 ; i < _n; ++i) { - std::cout << "in_key[" << i << "] = " << in_key[i] << std::endl; - } - - float out_key[_n]; - q.enqueueReadBuffer(*_p_out_key_buf, CL_TRUE, 0, sizeof(cl_float)*_n, out_key); - for(uint32_t i = 0 ; i < _n; ++i) { - std::cout << "out_key[" << i << "] = " << out_key[i] << std::endl; - } - - int in_ind[_n]; - q.enqueueReadBuffer(*_p_in_ind_buf, CL_TRUE, 0, sizeof(cl_int)*_n, in_ind); - for(uint32_t i = 0 ; i < _n; ++i) { - std::cout << "in_ind[" << i << "] = " << in_ind[i] << std::endl; - } - - int out_ind[_n]; - q.enqueueReadBuffer(*_p_out_ind_buf, CL_TRUE, 0, sizeof(cl_int)*_n, out_ind); - for(uint32_t i = 0 ; i < _n; ++i) { - std::cout << "out_ind[" << i << "] = " << out_ind[i] << std::endl; - } - - int hist_buf[_hist_buf_size]; - q.enqueueReadBuffer(_hist_buf, CL_TRUE, 0, sizeof(cl_int)*_hist_buf_size, hist_buf); - for(uint32_t i = 0 ; i < _hist_buf_size; ++i) { - std::cout << "hist_buf[" << i << "] = " << hist_buf[i] << std::endl; - } - - int glob_sum_buf[_glob_sum_buf_size]; - q.enqueueReadBuffer(_glob_sum_buf, CL_TRUE, 0, sizeof(cl_int)*_glob_sum_buf_size, glob_sum_buf); - for(uint32_t i = 0 ; i < _glob_sum_buf_size; ++i) { - std::cout << "glob_sum_buf[" << i << "] = " << glob_sum_buf[i] << std::endl; - } - -#endif -} - -void CLTopKV2::run_on_cpu() -{ - cl::CommandQueue q = CLScheduler::get().queue(); - // const Window& w = _topkv2_kernel.window(); - - _input->map(q); - _values->map(q); - _indices->map(q); - - // int row_size = (w[0].end() - w[0].start()) / w[0].step(); - int row_size = _input->info()->tensor_shape()[0]; - int rank = _input->info()->num_dimensions(); - - if (rank > 2) - throw std::runtime_error("Not supported type."); - - int row_num = (rank == 2 ? _input->info()->tensor_shape()[1] : 1); - - if (_input->info()->data_type() == DataType::F32) - { - nnfw::rt::optimized_ops::TopK<float>(row_size, row_num, (float *)_input->buffer(), _k, - (int32 *)_indices->buffer(), (float *)_values->buffer()); - } - else if (_input->info()->data_type() == DataType::S32) - { - nnfw::rt::optimized_ops::TopK<int32_t>(row_size, row_num, (int32_t *)_input->buffer(), _k, - (int32 *)_indices->buffer(), - (int32_t *)_values->buffer()); - } - else if (_input->info()->data_type() == DataType::QASYMM8) - { - nnfw::rt::optimized_ops::TopK<uint8_t>(row_size, row_num, (uint8_t *)_input->buffer(), _k, - (int32 *)_indices->buffer(), - (uint8_t *)_values->buffer()); - } - else - { - throw std::runtime_error("Not supported type."); - } - - _input->unmap(q); - _values->unmap(q); - _indices->unmap(q); -} -} // namespace arm_compute diff --git a/libs/ARMComputeEx/src/runtime/NEON/functions/NENormalizationLayerEx.cpp b/libs/ARMComputeEx/src/runtime/NEON/functions/NENormalizationLayerEx.cpp deleted file mode 100644 index 988e92715..000000000 --- a/libs/ARMComputeEx/src/runtime/NEON/functions/NENormalizationLayerEx.cpp +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/NEON/functions/NENormalizationLayerEx.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" - -using namespace arm_compute; - -NENormalizationLayerEx::NENormalizationLayerEx(std::shared_ptr<IMemoryManager> memory_manager) - : _memory_group(std::move(memory_manager)), _norm_kernel(), _multiply_kernel(), - _border_handler(), _input_squared() -{ -} - -void NENormalizationLayerEx::configure(const ITensor *input, ITensor *output, - const NormalizationLayerInfo &norm_info) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - - TensorInfo tensor_info(input->info()->tensor_shape(), 1, input->info()->data_type(), - input->info()->quantization_info()); - _input_squared.allocator()->init(tensor_info); - - // Manage intermediate buffers - _memory_group.manage(&_input_squared); - - // Configure kernels - _norm_kernel.configure(input, &_input_squared, output, norm_info); - _multiply_kernel.configure(input, input, &_input_squared, 1.0f, ConvertPolicy::SATURATE, - RoundingPolicy::TO_ZERO); - _border_handler.configure(&_input_squared, _norm_kernel.border_size(), BorderMode::CONSTANT, - PixelValue(0.0f)); - - // Allocate the tensor once the configure methods have been called - _input_squared.allocator()->allocate(); -} - -Status NENormalizationLayerEx::validate(const ITensorInfo *input, const ITensorInfo *output, - const NormalizationLayerInfo &norm_info) -{ - // Perform validation step - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - - ARM_COMPUTE_RETURN_ON_ERROR( - NENormalizationLayerExKernel::validate(input, input, output, norm_info)); - ARM_COMPUTE_RETURN_ON_ERROR(NEPixelWiseMultiplicationKernel::validate( - input, input, output, 1.0f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO)); - - return Status{}; -} - -void NENormalizationLayerEx::run() -{ - _memory_group.acquire(); - - NEScheduler::get().schedule(&_multiply_kernel, Window::DimY); - NEScheduler::get().schedule(&_border_handler, Window::DimY); - NEScheduler::get().schedule(&_norm_kernel, Window::DimY); - - _memory_group.release(); -} diff --git a/libs/ARMComputeEx/src/runtime/topk_v2.h b/libs/ARMComputeEx/src/runtime/topk_v2.h deleted file mode 100644 index f94effea1..000000000 --- a/libs/ARMComputeEx/src/runtime/topk_v2.h +++ /dev/null @@ -1,191 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2018 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file topk_v2.h - * @brief This file contains TopK method and TopContainer class for TopK operation - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __NNFW_RT_OPTIMIZED_OPS_TOPK_V2_H__ -#define __NNFW_RT_OPTIMIZED_OPS_TOPK_V2_H__ - -typedef int32_t int32; - -namespace nnfw -{ -namespace rt -{ -namespace optimized_ops -{ -/** - * @brief class to define TopK operation - * @note The follwing codes are impemented and modified while referring to TFLite topk_v2.cc file. - * TopK_v2 of NN Runtime supports TENSOR_FLOAT32, TENSOR_QUANT8_ASYMM, TENSOR_INT32 other than - * TFLite. - * (TFLite additionaly supports kTfLiteInt64.) - * - * The class that collects top indexes of k values. Based on template - * tensorflow::gtl::TopN<> but, for optimization, - * it re-uses the same container. - */ -template <typename T> class TopContainer -{ -public: - /** - * @brief Prevent default constructor of of this class - */ - TopContainer() = delete; - /** - * @brief Constructor with params - * @param [in] row_size Size of row in data - * @param [in] k The top k predictions - */ - TopContainer(int32 k, int32 row_size) : k_(k), container_(), values_(nullptr) - { - container_.reserve(std::min(k, row_size) + 1); - } - - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers) - * @param [in] topContainer To copy - */ - TopContainer(const TopContainer &) = delete; - /* - * @brief Prevent instances of this class from being copied (As this class contains pointers) - * @param [in] topContainer To copy - * @return Reference of TopContainer - */ - TopContainer &operator=(const TopContainer &) = delete; - - /** - * @brief Start collecting - * @param [in] values To set as values - * @return N/A - */ - void start_collecting(const T *values) - { - values_ = values; - container_.clear(); - } - - /** - * @brief Push a value to be compared for topk - * @param [in] a A value to compare - * @return N/A - */ - void push(int32 a) - { - auto comparator = [this](int32 a, int32 b) { return compare_fun(a, b); }; - if (container_.size() <= (size_t)k_) - { - container_.push_back(a); - if (container_.size() == (size_t)(k_ + 1)) - { - std::make_heap(container_.begin(), container_.end(), comparator); - std::pop_heap(container_.begin(), container_.end(), comparator); - } - } - else if (comparator(a, container_.front())) - { - container_.back() = a; - std::push_heap(container_.begin(), container_.end(), comparator); - std::pop_heap(container_.begin(), container_.end(), comparator); - } - } - - /** - * @brief Get sorted result from pushed values - * @return Reference of vector with sorted values - */ - const std::vector<int32> &sorted_result() - { - auto comparator = [this](int32 a, int32 b) { return compare_fun(a, b); }; - if (container_.size() <= (size_t)(k_)) - { - std::sort(container_.begin(), container_.end(), comparator); - } - else - { - std::sort_heap(container_.begin(), container_.end() - 1, comparator); - container_.resize(k_); - } - return container_; - } - -private: - int32 k_; - std::vector<int32> container_; - const T *values_ = nullptr; - - bool compare_fun(int32 a, int32 b) const - { - if (values_[b] < values_[a]) - { - return true; - } - else if (values_[b] > values_[a]) - { - return false; - } - else - { - return a < b; - } - } -}; - -/** - * @brief Operates TopK operation with params - * @param [in] row_size Size of row in data - * @param [in] num_rows The number of rows in data - * @param [in] data To be operated in - * @param [in] k The top k predictions - * @param [out] output_indexes Indexes of targets in the top k predictions - * @param [out] output_values Values of targets in the top k predictions - * @return N/A - */ -template <typename T> -void TopK(int32 row_size, int32 num_rows, const T *data, int32 k, int32 *output_indexes, - T *output_values) -{ - TopContainer<T> topc(k, row_size); - for (int row = 0; row < num_rows; ++row) - { - const T *values_row = data + row * row_size; - topc.start_collecting(values_row); - for (int32 c = 0; c < row_size; ++c) - { - topc.push(c); - } - - // Prepare output buffers. - int32 *indexes_row = output_indexes + row * k; - T *output_row = output_values + row * k; - // We always assume that the output is sorted. - const auto &top_k = topc.sorted_result(); - std::copy(top_k.begin(), top_k.end(), indexes_row); - std::transform(top_k.begin(), top_k.end(), output_row, - [values_row](const int32 loc) { return values_row[loc]; }); - } -} - -} // namespace optimized_ops -} // namespace rt -} // namespace nnfw - -#endif // __NNFW_RT_OPTIMIZED_OPS_TOPK_V2_H__ diff --git a/libs/CMakeLists.txt b/libs/CMakeLists.txt deleted file mode 100644 index 99d2028f4..000000000 --- a/libs/CMakeLists.txt +++ /dev/null @@ -1,4 +0,0 @@ -# Add all subdirectories. -# Each library in sub-directory must have it's own CMakeLists.txt -# to build library's binaries or to support interface. -add_subdirectories() diff --git a/libs/cpp14/CMakeLists.txt b/libs/cpp14/CMakeLists.txt deleted file mode 100644 index bba9e132d..000000000 --- a/libs/cpp14/CMakeLists.txt +++ /dev/null @@ -1,2 +0,0 @@ -add_library(nnfw_lib_cpp14 INTERFACE) -target_include_directories(nnfw_lib_cpp14 INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/include) diff --git a/libs/cpp14/include/cpp14/memory.h b/libs/cpp14/include/cpp14/memory.h deleted file mode 100644 index b3e678baa..000000000 --- a/libs/cpp14/include/cpp14/memory.h +++ /dev/null @@ -1,29 +0,0 @@ -/** - * @file memory.h - * @ingroup COM_AI_RUNTIME - * @brief This file contains @c make_unique which is not supported by C++11 - */ -#ifndef __NNFW_CPP14_MEMORY_H__ -#define __NNFW_CPP14_MEMORY_H__ - -#include <memory> - -namespace nnfw -{ -namespace cpp14 -{ -/** - * @brief Provide @c make_unique function supported from C++14 - * @param[in] args List of arguments with which an instance of T will be constructed. - * @return @c std::unique_ptr of an instance of type T - */ -template <typename T, typename... Args> std::unique_ptr<T> make_unique(Args &&... args) -{ - // NOTE std::make_unique is missing in C++11 standard - return std::unique_ptr<T>(new T(std::forward<Args>(args)...)); -} - -} // napesapce cpp14 -} // namespace nnfw - -#endif // __NNFW_CPP14_MEMORY_H__ diff --git a/libs/misc/CMakeLists.txt b/libs/misc/CMakeLists.txt deleted file mode 100644 index cd01695fb..000000000 --- a/libs/misc/CMakeLists.txt +++ /dev/null @@ -1,13 +0,0 @@ -# Library `nnfw_lib_misc` -set(NNFW_UTILITY_SRCS src/environment.cpp) -list(APPEND NNFW_UTILITY_SRCS src/tensor/Shape.cpp) -list(APPEND NNFW_UTILITY_SRCS src/tensor/NonIncreasingStride.cpp) -list(APPEND NNFW_UTILITY_SRCS src/tensor/IndexFormatter.cpp) -list(APPEND NNFW_UTILITY_SRCS src/tensor/Comparator.cpp) - -add_library(nnfw_lib_misc STATIC ${NNFW_UTILITY_SRCS}) -target_include_directories(nnfw_lib_misc PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include) -set_target_properties(nnfw_lib_misc PROPERTIES POSITION_INDEPENDENT_CODE ON) - -add_executable(nnfw_tensor_index_iterator "examples/tensor_index_iterator.cpp") -target_link_libraries(nnfw_tensor_index_iterator nnfw_lib_misc) diff --git a/libs/misc/examples/tensor_index_iterator.cpp b/libs/misc/examples/tensor_index_iterator.cpp deleted file mode 100644 index 8a19dac87..000000000 --- a/libs/misc/examples/tensor_index_iterator.cpp +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "misc/tensor/IndexIterator.h" - -#include <array> - -#include <iostream> -#include <algorithm> - -#include <cassert> - -void test_iterate(void) -{ - const nnfw::misc::tensor::Shape shape{3, 4, 7}; - - std::array<int, 3 * 4 * 7> array; - - array.fill(0); - - using nnfw::misc::tensor::iterate; - using nnfw::misc::tensor::Index; - - iterate(shape) << [&](const Index &index) { - assert(index.rank() == shape.rank()); - - const size_t rank = index.rank(); - - uint32_t offset = index.at(0); - - for (size_t axis = 1; axis < rank; ++axis) - { - offset *= shape.dim(axis); - offset += index.at(axis); - } - - array[offset] += 1; - }; - - assert(std::all_of(array.begin(), array.end(), [](int num) { return num == 1; })); -} - -int main(int argc, char **argv) -{ - test_iterate(); - - nnfw::misc::tensor::Shape shape{3, 4, 3, 4}; - - std::cout << "Iterate over tensor{3, 4, 3, 4}" << std::endl; - - nnfw::misc::tensor::iterate(shape) << [](const nnfw::misc::tensor::Index &index) { - std::cout << "rank: " << index.rank() << std::endl; - - for (size_t d = 0; d < index.rank(); ++d) - { - std::cout << " offset(" << d << ") = " << index.at(d) << std::endl; - } - }; - - return 0; -} diff --git a/libs/misc/include/misc/EnvVar.h b/libs/misc/include/misc/EnvVar.h deleted file mode 100644 index 47206d4c0..000000000 --- a/libs/misc/include/misc/EnvVar.h +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file EnvVar.h - * @ingroup COM_AI_RUNTIME - * @brief This file contains nnfw::misc::EnvVar class - */ - -#ifndef __NNFW_MISC_ENV_VAR__ -#define __NNFW_MISC_ENV_VAR__ - -#include <algorithm> -#include <array> -#include <cstdlib> -#include <string> - -namespace nnfw -{ -namespace misc -{ -/** - * @brief Class to access environment variable - */ -class EnvVar -{ -public: - /** - * @brief Construct a new EnvVar object - * @param[in] key environment variable - */ - EnvVar(const std::string &key) - { - const char *value = std::getenv(key.c_str()); - if (value == nullptr) - { - // An empty string is considered as an empty value - _value = ""; - } - else - { - _value = value; - } - } - - /** - * @brief Get environment variable of string type - * @param[in] def Default value of environment variable - * @return Defaut value passed as a parameter when there is no environment variable, - * otherwise the value of environment variable passed into constructor - */ - std::string asString(const std::string &def) const - { - if (_value.empty()) - return def; - return _value; - } - - /** - * @brief Get environment variable of boolean type - * @param[in] def Default value of environment variable - * @return Defaut value passed as a parameter when there is no environment variable, - * otherwise the value of environment variable passed into constructor - */ - bool asBool(bool def) const - { - if (_value.empty()) - return def; - static const std::array<std::string, 5> false_list{"0", "OFF", "FALSE", "N", "NO"}; - auto false_found = std::find(false_list.begin(), false_list.end(), _value); - return (false_found == false_list.end()); - } - - /** - * @brief Get environment variable of int type - * @param[in] def Default value of environment variable - * @return Defaut value passed as a parameter when there is no environment variable, - * otherwise the value of environment variable passed into constructor - */ - int asInt(int def) const - { - if (_value.empty()) - return def; - return std::stoi(_value); - } - -private: - std::string _value; -}; - -} // namespace misc -} // namespace nnfw - -#endif // __NNFW_MISC_ENV_VAR__ diff --git a/libs/misc/include/misc/benchmark.h b/libs/misc/include/misc/benchmark.h deleted file mode 100644 index fe5b97585..000000000 --- a/libs/misc/include/misc/benchmark.h +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file benchmark.h - * @ingroup COM_AI_RUNTIME - * @brief This file contains nnfw::misc::benchmark::Accumulator class - */ -#ifndef __NNFW_MISC_BENCHMARK_H__ -#define __NNFW_MISC_BENCHMARK_H__ - -#include <chrono> - -namespace nnfw -{ -namespace misc -{ -// Benckmark support -namespace benchmark -{ - -/** - * @brief Class to accumulate time during benchmark - */ -template <typename T> class Accumulator -{ -public: - /** - * @brief Construct a new Accumulator object - * @param[in] ref Object to keep time duration - */ - Accumulator(T &ref) : _ref(ref) - { - // DO NOTHING - } - -public: - /** - * @brief Return the reference of @c ref passed to constructor - * @return Reference of @c ref - */ - T &operator()(void) { return _ref; } - -private: - T &_ref; -}; - -/** - * @brief Run passed function and returns accumulated time - * @tparam T Period used by @c std::chrono::duration_cast - * @tparam Callable Function type to benchmark - * @param[in] acc Accumulated time after running @cb - * @param[in] cb Function to run and benchmark - * @return Accumulated time - */ -template <typename T, typename Callable> -Accumulator<T> &operator<<(Accumulator<T> &&acc, Callable cb) -{ - auto begin = std::chrono::steady_clock::now(); - cb(); - auto end = std::chrono::steady_clock::now(); - - acc() += std::chrono::duration_cast<T>(end - begin); - - return acc; -} - -template <typename T> Accumulator<T> measure(T &out) { return Accumulator<T>(out); } - -} // namespace benchmark -} // namespace misc -} // namespace nnfw - -#endif // __NNFW_MISC_BENCHMARK_H__ diff --git a/libs/misc/include/misc/environment.h b/libs/misc/include/misc/environment.h deleted file mode 100644 index 8e6bd00d5..000000000 --- a/libs/misc/include/misc/environment.h +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file environment.h - * @ingroup COM_AI_RUNTIME - * @brief This file contains utility functions and classes to access environment variables - */ - -#ifndef __UTIL_ENVIRONMENT_H__ -#define __UTIL_ENVIRONMENT_H__ - -namespace nnfw -{ -namespace misc -{ - -/** - * @brief Get the environment variable of int type - * @param[in] name Name of the environment variable - * @param[in] defaultValue Default value when the value of environment variable does not exist - * @return The int value of the environment variable - */ -int get_env_int(const char *name, int defaultValue = 0); - -/** - * @brief Get the environment variable of bool type - * @param[in] name Name of the environment variable - * @param[in] defaultValue Default value when the value of environment variable does not exist - * @return @c 0 if the value of the environment variable is @c "0", @c 1 in case of other number - */ -bool get_env_bool(const char *name, bool defaultValue = false); -} -} - -#include <string> - -namespace nnfw -{ -namespace misc -{ -namespace env -{ -/** - * @brief Parent struct of @ref IntAccessor and @ref FloatAccessor - * @tparam T Type of the value of environment variable - */ -template <typename T> struct Accessor -{ - /** - * @brief Destroy the Accessor object - */ - virtual ~Accessor() = default; - /** - * @brief Read the value of environment variable - * @param[out] out The value of environment variable - * @return @c true if accessing environment variable is successful, - * @c false if there is exist no such environment variable - */ - virtual bool access(T &out) const = 0; -}; - -/** - * @brief Class to read int environment variable - */ -class IntAccessor : public Accessor<int> -{ -public: - /** - * @brief Construct a new IntAccessor object - * @param[in] tag Name of environment variable - */ - IntAccessor(const std::string &tag); - -public: - /** - * @brief Read the value of environment variable - * @param[out] out The value of environment variable - * @return @c true if accessing environment variable is successful, - * @c false if there is exist no such environment variable - */ - bool access(int &out) const override; - -private: - std::string _tag; -}; - -/** - * @brief Class to read float environment variable - */ -class FloatAccessor : public Accessor<float> -{ -public: - /** - * @brief Construct a new FloatAccessor object - * @param[in] tag Name of environment variable - */ - FloatAccessor(const std::string &tag); - -public: - /** - * @brief Read the value of environment variable - * @param[out] out The value of environment variable - * @return @c true if accessing environment variable is successful, - * @c false if there is exist no such environment variable - */ - bool access(float &out) const override; - -private: - std::string _tag; -}; - -} // namespace env -} // namespace misc -} // namespace nnfw - -#endif // __UTIL_ENVIRONMENT_H__ diff --git a/libs/misc/include/misc/feature/Index.h b/libs/misc/include/misc/feature/Index.h deleted file mode 100644 index a361d8dd2..000000000 --- a/libs/misc/include/misc/feature/Index.h +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file Index.h - * @brief This file contains Index class - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __NNFW_MISC_FEATURE_INDEX_H__ -#define __NNFW_MISC_FEATURE_INDEX_H__ - -#include <cstdint> - -namespace nnfw -{ -namespace misc -{ -namespace feature -{ - -/** - * @brief Class to have the index information for calculating the offset. - */ -class Index -{ -public: - /** - * @brief Construct Index object using default constrcutor - */ - Index() = default; - -public: - /** - * @brief Construct Index object with three indexes of dimensions - * @param[in] ch The depth index - * @param[in] row The heigth index - * @param[in] col The width index - */ - Index(int32_t ch, int32_t row, int32_t col) : _batch{1}, _ch{ch}, _row{row}, _col{col} - { - // DO NOTHING - } - /** - * @brief Construct Index object with four indexes of dimensions - * @param[in] batch The batch index - * @param[in] ch The depth index - * @param[in] row The height index - * @param[in] col The width index - */ - Index(int32_t batch, int32_t ch, int32_t row, int32_t col) - : _batch{batch}, _ch{ch}, _row{row}, _col{col} - { - // DO NOTHING - } - -public: - /** - * @brief Get the batch index - * @return The batch index - */ - int32_t batch(void) const { return _batch; } - /** - * @brief Get the depth index - * @return The depth index - */ - int32_t ch(void) const { return _ch; } - /** - * @brief Get the height index - * @return The height index - */ - int32_t row(void) const { return _row; } - /** - * @brief Get the width index - * @return The width index - */ - int32_t col(void) const { return _col; } - -public: - /** - * @brief Get the batch index as the lvalue reference - * @return The reference of the batch value - */ - int32_t &batch(void) { return _batch; } - /** - * @brief Get the depth index as the lvalue reference - * @return The reference of the depth value - */ - int32_t &ch(void) { return _ch; } - /** - * @brief Get the height index as the lvalue reference - * @return The reference of the height value - */ - int32_t &row(void) { return _row; } - /** - * @brief Get the width index as the lvalue reference - * @return The reference of the width value - */ - int32_t &col(void) { return _col; } - -private: - /** - * @brief The batch index - */ - int32_t _batch; - /** - * @brief The depth index - */ - int32_t _ch; - /** - * @brief The height index - */ - int32_t _row; - /** - * @brief The width index - */ - int32_t _col; -}; - -} // namespace feature -} // namespace misc -} // namespace nnfw - -#endif // __NNFW_MISC_FEATURE_INDEX_H__ diff --git a/libs/misc/include/misc/feature/IndexIterator.h b/libs/misc/include/misc/feature/IndexIterator.h deleted file mode 100644 index 1cf675526..000000000 --- a/libs/misc/include/misc/feature/IndexIterator.h +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file IndexIterator.h - * @brief This file contains IndexIterator class - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __NNFW_MISC_FEATURE_INDEX_ITERATOR_H__ -#define __NNFW_MISC_FEATURE_INDEX_ITERATOR_H__ - -#include "misc/feature/Shape.h" - -namespace nnfw -{ -namespace misc -{ -namespace feature -{ - -/** - * @brief Class to iterate Callable with Index of feature - */ -class IndexIterator -{ -public: - /** - * @brief Construct IndexIterator object with Shape of feature - * @param[in] shape Shape reference of feature - */ - IndexIterator(const Shape &shape) : _shape{shape} - { - // DO NOTHING - } - -public: - /** - * @brief Call a function iterated - * @param[in] cb A callback function - * @return Current IndexIterator object - */ - template <typename Callable> IndexIterator &iter(Callable cb) - { - for (int32_t batch = 0; batch < _shape.N; ++batch) - { - for (int32_t ch = 0; ch < _shape.C; ++ch) - { - for (int32_t row = 0; row < _shape.H; ++row) - { - for (int32_t col = 0; col < _shape.W; ++col) - { - cb(batch, ch, row, col); - } - } - } - } - - return (*this); - } - -private: - /** - * @brief Shape for feature - */ - const Shape _shape; -}; - -/** - * @brief Create an object of IndexIterator for feature - * @param[in] Shape reference of feature - * @return Created IndexIterator object - */ -static inline IndexIterator iterate(const Shape &shape) { return IndexIterator{shape}; } - -/** - * @brief Call a function iterated using IndexIterator of feature - * Overloaded operator<< - * @param[in] it An IndexIterator reference - * @param[in] cb A callback function - * @return created IndexIterator object - */ -template <typename Callable> IndexIterator &operator<<(IndexIterator &&it, Callable cb) -{ - return it.iter(cb); -} - -} // namespace feature -} // namespace misc -} // namespace nnfw - -#endif // __NNFW_MISC_FEATURE_INDEX_ITERATOR_H__ diff --git a/libs/misc/include/misc/feature/Object.h b/libs/misc/include/misc/feature/Object.h deleted file mode 100644 index 7af0e28f4..000000000 --- a/libs/misc/include/misc/feature/Object.h +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file Object.h - * @brief This file contains Object class - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __NNFW_MISC_FEATURE_OBJECT_H__ -#define __NNFW_MISC_FEATURE_OBJECT_H__ - -#include "misc/feature/Shape.h" -#include "misc/feature/Index.h" -#include "misc/feature/Reader.h" - -#include <vector> - -namespace nnfw -{ -namespace misc -{ -namespace feature -{ - -/** - * @brief Class to have information of the operand for feature - */ -template <typename T> class Object final : public Reader<T> -{ -public: - using Generator = std::function<T(const Shape &shape, const Index &index)>; - -public: - /** - * @brief Construct Object object with Shape of feature and set value used by Generator - * @param[in] shape Reference of Shape for feature - * @param[in] fn A function to set values of operand tensor - */ - Object(const Shape &shape, const Generator &fn) : _shape{shape} - { - _value.resize(_shape.C * _shape.H * _shape.W); - - for (int32_t ch = 0; ch < _shape.C; ++ch) - { - for (int32_t row = 0; row < _shape.H; ++row) - { - for (int32_t col = 0; col < _shape.W; ++col) - { - _value.at(offsetOf(ch, row, col)) = fn(_shape, Index{ch, row, col}); - } - } - } - } - -public: - /** - * @brief Get Shape of feature as the reference - * @return The reference of the width value - */ - const Shape &shape(void) const { return _shape; } - -public: - /** - * @brief Get the value used by three indexes - * @param[in] ch The depth index - * @param[in] row The height index - * @param[in] col The width index - * @return The value at the offset - */ - T at(uint32_t ch, uint32_t row, uint32_t col) const override - { - return _value.at(offsetOf(ch, row, col)); - } - -private: - /** - * @brief Get the offset value at three indexes - * @param[in] ch The depth index - * @param[in] row The height index - * @param[in] col The width index - * @return The offset value - */ - uint32_t offsetOf(uint32_t ch, uint32_t row, uint32_t col) const - { - return ch * _shape.H * _shape.W + row * _shape.W + col; - } - -private: - /** - * @brief Shape of operand - */ - Shape _shape; - /** - * @brief The tensor vector of operand - */ - std::vector<T> _value; -}; - -} // namespace feature -} // namespace misc -} // namespace nnfw - -#endif // __NNFW_MISC_FEATURE_OBJECT_H__ diff --git a/libs/misc/include/misc/feature/Reader.h b/libs/misc/include/misc/feature/Reader.h deleted file mode 100644 index b09209789..000000000 --- a/libs/misc/include/misc/feature/Reader.h +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file Reader.h - * @brief This file contains Reader class - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __NNFW_MISC_FEATURE_READER_H__ -#define __NNFW_MISC_FEATURE_READER_H__ - -#include <cstdint> - -namespace nnfw -{ -namespace misc -{ -namespace feature -{ - -/** - * @brief Class reads values of feature - * The interface class - */ -template <typename T> struct Reader -{ - /** - * @brief Destruct Reader object using default destructor - */ - virtual ~Reader() = default; - - /** - * @brief Get the value used by three indexes - * @param[in] ch The depth index - * @param[in] row The height index - * @param[in] col The width index - * @return The value at the offset - */ - virtual T at(uint32_t ch, uint32_t row, uint32_t col) const = 0; - /** - * @brief Get the value used by four indexes - * @param[in] batch The batch index - * @param[in] ch The depth index - * @param[in] row The height index - * @param[in] col The width index - * @return The value at the offset - */ - virtual T at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const = 0; -}; - -} // namespace feature -} // namespace misc -} // namespace nnfw - -#endif // __NNFW_MISC_FEATURE_READER_H__ diff --git a/libs/misc/include/misc/feature/Shape.h b/libs/misc/include/misc/feature/Shape.h deleted file mode 100644 index 09881f58b..000000000 --- a/libs/misc/include/misc/feature/Shape.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file Shape.h - * @brief This file contains Shape class for feature - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __NNFW_MISC_FEATURE_SHAPE_H__ -#define __NNFW_MISC_FEATURE_SHAPE_H__ - -#include <cstdint> - -namespace nnfw -{ -namespace misc -{ -namespace feature -{ - -/** - * @brief Structure to have values of dimensions for feature - */ -struct Shape -{ - int32_t N; /**< The batch value */ - int32_t C; /**< The depth value */ - int32_t H; /**< The height value */ - int32_t W; /**< The width value */ - - /** - * @brief Construct Shape object using default constrcutor - */ - Shape() = default; - /** - * @brief Construct Shape object with three values of dimensions - * @param[in] depth The depth value - * @param[in] height The height value - * @param[in] width The width value - */ - Shape(int32_t depth, int32_t height, int32_t width) : N{1}, C{depth}, H{height}, W{width} - { - // DO NOTHING - } - /** - * @brief Construct Shape object with four values of dimensions - * @param[in] batch The batch value - * @param[in] depth The depth value - * @param[in] height The height value - * @param[in] width The width value - */ - Shape(int32_t batch, int32_t depth, int32_t height, int32_t width) - : N{batch}, C{depth}, H{height}, W{width} - { - // DO NOTHING - } -}; - -} // namespace feature -} // namespace misc -} // namespace nnfw - -#endif // __NNFW_MISC_FEATURE_H__ diff --git a/libs/misc/include/misc/feature/TextFormatter.h b/libs/misc/include/misc/feature/TextFormatter.h deleted file mode 100644 index e053f1c61..000000000 --- a/libs/misc/include/misc/feature/TextFormatter.h +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file TextFormatter.h - * @brief This file contains TextFormatter class - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __NNFW_MISC_FEATURE_TEXT_FORMATTER_H__ -#define __NNFW_MISC_FEATURE_TEXT_FORMATTER_H__ - -#include "misc/feature/Shape.h" -#include "misc/feature/Reader.h" - -#include <ostream> -#include <iomanip> -#include <limits> - -namespace nnfw -{ -namespace misc -{ -namespace feature -{ - -/** - * @brief Class to print operand of feature to ostream in the given string format - */ -template <typename T> class TextFormatter -{ -public: - /** - * @brief Construct TextFormatter object with an operand's information. - * @param[in] shape The shape of an operand - * @param[in] data The data of an operand - */ - TextFormatter(const Shape &shape, const Reader<T> &data) : _shape(shape), _data(data) - { - // DO NOTHING - } - -public: - /** - * @brief Get Shape of feature as the lvalue reference - * @return Shape of feature - */ - const Shape &shape(void) const { return _shape; } - /** - * @brief Get Reader<T> that can read the data of an operand - * @return Reader<T> - */ - const Reader<T> &data(void) const { return _data; } - -private: - /** - * @brief Shape of feature - */ - const Shape &_shape; - /** - * @brief Reader<T> that can read the data of an operand - */ - const Reader<T> &_data; -}; - -/** - * @brief Print operand of feature - * @param[in] os Standard output stream - * @param[in] fmt TextFormatter to print information of an operand - * @return Standard output stream - */ -template <typename T> std::ostream &operator<<(std::ostream &os, const TextFormatter<T> &fmt) -{ - const auto &shape = fmt.shape(); - - for (uint32_t ch = 0; ch < shape.C; ++ch) - { - os << " Channel " << ch << ":" << std::endl; - for (uint32_t row = 0; row < shape.H; ++row) - { - os << " "; - for (uint32_t col = 0; col < shape.W; ++col) - { - const auto value = fmt.data().at(ch, row, col); - os << std::right; - os << std::fixed; - os << std::setw(std::numeric_limits<T>::digits10 + 2); - os << std::setprecision(5); - os << value; - os << " "; - } - os << std::endl; - } - } - - return os; -} - -} // namespace feature -} // namespace misc -} // namespace nnfw - -#endif // __NNFW_MISC_FEATURE_TEXT_FORMATTER_H__ diff --git a/libs/misc/include/misc/fp32.h b/libs/misc/include/misc/fp32.h deleted file mode 100644 index c310402ba..000000000 --- a/libs/misc/include/misc/fp32.h +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file fp32.h - * @brief This file contains functions to compare float values - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __NNFW_MISC_FP32_H__ -#define __NNFW_MISC_FP32_H__ - -#include <cmath> -#include <cfloat> -#include <algorithm> -#include <cstdint> - -namespace nnfw -{ -namespace misc -{ -namespace fp32 -{ - -/** - * @brief Get the difference between two float values as a relative value. - * @param[in] lhs A float value to be compared - * @param[in] rhs A float value to be compared - * @return A relative value of difference between two float values. - */ -inline float relative_diff(float lhs, float rhs) -{ - const auto diff = std::fabs(lhs - rhs); - const auto base = std::max(std::fabs(lhs), std::fabs(rhs)); - - return diff / base; -} - -/** - * @brief Verify that an obtained float value is equal to the expected float value - * by using FLT_EPSILON - * @param[in] expected An expected float value to be compared - * @param[in] obtained An obtained float value to be compared - * @param[in] tolerance A tolerance value - * @return @c true if both values are equal, otherwise @c false - */ -inline bool epsilon_equal(float expected, float obtained, uint32_t tolerance = 1) -{ - if (std::isnan(expected) && std::isnan(obtained)) - { - return true; - } - - // Let's use relative epsilon comparision - const auto diff = std::fabs(expected - obtained); - const auto max = std::max(std::fabs(expected), std::fabs(obtained)); - - return diff <= (max * FLT_EPSILON * tolerance); -} - -/** - * @brief Verify that an obtained float value is equal to the expected float value - * by comparing absolute tolerance value - * @param[in] expected An expected float value to be compared - * @param[in] obtained An obtained float value to be compared - * @param[in] tolerance A tolerance value - * @return @c true if both values are equal, otherwise @c false - */ -inline bool absolute_epsilon_equal(float expected, float obtained, float tolerance = 0.001) -{ - if (std::isnan(expected) && std::isnan(obtained)) - { - return true; - } - - // Let's use absolute epsilon comparision - const auto diff = std::fabs(expected - obtained); - - return diff <= tolerance; -} - -} // namespace fp32 -} // namespace misc -} // namespace nnfw - -#endif // __NNFW_MISC_FP32_H__ diff --git a/libs/misc/include/misc/kernel/IndexIterator.h b/libs/misc/include/misc/kernel/IndexIterator.h deleted file mode 100644 index 59e0f0095..000000000 --- a/libs/misc/include/misc/kernel/IndexIterator.h +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file IndexIterator.h - * @brief This file contains IndexIterator class - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __NNFW_MISC_KERNEL_INDEX_ITERATOR_H__ -#define __NNFW_MISC_KERNEL_INDEX_ITERATOR_H__ - -#include "misc/kernel/Shape.h" - -namespace nnfw -{ -namespace misc -{ -namespace kernel -{ - -/** - * @brief Class to iterate Callable with Index of kernel - */ -class IndexIterator -{ -public: - /** - * @brief Construct IndexIterator object with Shape of kernel - * @param[in] shape Shape reference of feature - */ - IndexIterator(const Shape &shape) : _shape{shape} - { - // DO NOTHING - } - -public: - /** - * @brief Call a function iterated - * @param[in] cb A callback function - * @return Current IndexIterator object - */ - template <typename Callable> IndexIterator &iter(Callable cb) - { - for (int32_t nth = 0; nth < _shape.N; ++nth) - { - for (int32_t ch = 0; ch < _shape.C; ++ch) - { - for (int32_t row = 0; row < _shape.H; ++row) - { - for (int32_t col = 0; col < _shape.W; ++col) - { - cb(nth, ch, row, col); - } - } - } - } - - return (*this); - } - -private: - const Shape _shape; /**< Shape for kernel */ -}; - -/** - * @brief Create an object of IndexIterator for kernel - * @param[in] shape reference of feature - * @return Created IndexIterator object - */ -inline IndexIterator iterate(const Shape &shape) { return IndexIterator{shape}; } - -/** - * @brief Call a function iterated using IndexIterator of kernel - * Overloaded operator<< - * @param[in] it An IndexIterator reference - * @param[in] cb A callback function - * @return Created IndexIterator object - */ -template <typename Callable> IndexIterator &operator<<(IndexIterator &&it, Callable cb) -{ - return it.iter(cb); -} - -} // namespace kernel -} // namespace misc -} // namespace nnfw - -#endif // __NNFW_MISC_FEATURE_INDEX_ITERATOR_H__ diff --git a/libs/misc/include/misc/kernel/RandomObject.h b/libs/misc/include/misc/kernel/RandomObject.h deleted file mode 100644 index 4b58b0c7f..000000000 --- a/libs/misc/include/misc/kernel/RandomObject.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file RandomObject.h - * @brief This file contains RandomObject class - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __NNFW_MISC_KERNEL_RANDOM_OBJECT_H__ -#define __NNFW_MISC_KERNEL_RANDOM_OBJECT_H__ - -#include "misc/kernel/Shape.h" -#include "misc/kernel/Reader.h" - -#include <vector> - -namespace nnfw -{ -namespace misc -{ -namespace kernel -{ - -template <typename T> class RandomObject final : public Reader<T> -{ -public: - RandomObject(const Shape &shape) : _shape{shape} - { - const uint32_t size = _shape.N * _shape.C * _shape.H * _shape.W; - - // TODO Use random number - for (uint32_t off = 0; off < size; ++off) - { - _value.emplace_back(static_cast<float>(off)); - } - } - -public: - const Shape &shape(void) const { return _shape; } - -public: - T at(uint32_t nth, uint32_t ch, uint32_t row, uint32_t col) const override - { - uint32_t index = 0; - - index += nth * _shape.C * _shape.H * _shape.W; - index += ch * _shape.H * _shape.W; - index += row * _shape.W; - index += col; - - return _value.at(index); - } - -private: - const Shape _shape; - std::vector<T> _value; -}; - -} // namespace kernel -} // namespace misc -} // namespace nnfw - -#endif // __NNFW_MISC_KERNEL_RANDOM_OBJECT_H__ diff --git a/libs/misc/include/misc/kernel/Reader.h b/libs/misc/include/misc/kernel/Reader.h deleted file mode 100644 index 019c809ee..000000000 --- a/libs/misc/include/misc/kernel/Reader.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file Reader.h - * @brief This file contains Reader structure - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __NNFW_MISC_KERNEL_READER_H__ -#define __NNFW_MISC_KERNEL_READER_H__ - -#include <cstdint> - -namespace nnfw -{ -namespace misc -{ -namespace kernel -{ - -/** - * @brief Structure to Reader - */ -template <typename T> struct Reader -{ - /** - * @brief Destroy the Reader object as default - */ - virtual ~Reader() = default; - - /** - * @brief Get the value used by four indexes - * @param[in] nth The kernel index - * @param[in] ch The channel index - * @param[in] row The row index - * @param[in] col The column index - * @return The value at the offset - */ - virtual T at(uint32_t nth, uint32_t ch, uint32_t row, uint32_t col) const = 0; -}; - -} // namespace kernel -} // namespace misc -} // namespace nnfw - -#endif // __NNFW_MISC_KERNEL_READER_H__ diff --git a/libs/misc/include/misc/kernel/Shape.h b/libs/misc/include/misc/kernel/Shape.h deleted file mode 100644 index 27d6a8bf0..000000000 --- a/libs/misc/include/misc/kernel/Shape.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file Shape.h - * @brief This file contains Shape structure - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __NNFW_MISC_KERNEL_SHAPE_H__ -#define __NNFW_MISC_KERNEL_SHAPE_H__ - -#include <cstdint> - -namespace nnfw -{ -namespace misc -{ -namespace kernel -{ - -/** - * @brief Structure to Shape - */ -struct Shape -{ - int32_t N; /**< The kernel index */ - int32_t C; /**< The channel index */ - int32_t H; /**< The height index */ - int32_t W; /**< The width index */ - - /** - * @brief Construct a new Shape object as default - */ - Shape() = default; - - /** - * @brief Construct a new Shape object with parameters - * @param[in] count The kernel index - * @param[in] depth The channel index - * @param[in] height The height index - * @param[in] width The width index - */ - Shape(int32_t count, int32_t depth, int32_t height, int32_t width) - : N{count}, C{depth}, H{height}, W{width} - { - // DO NOTHING - } -}; - -} // namespace kernel -} // namespace misc -} // namespace nnfw - -#endif // __NNFW_MISC_KERNEL_SHAPE_H__ diff --git a/libs/misc/include/misc/matrix/IndexIterator.h b/libs/misc/include/misc/matrix/IndexIterator.h deleted file mode 100644 index 742ed3a65..000000000 --- a/libs/misc/include/misc/matrix/IndexIterator.h +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file IndexIterator.h - * @brief This file contains IndexIterator class - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __NNFW_MISC_MATRIX_INDEX_ITERATOR_H__ -#define __NNFW_MISC_MATRIX_INDEX_ITERATOR_H__ - -#include "misc/matrix/Shape.h" - -namespace nnfw -{ -namespace misc -{ -namespace matrix -{ - -/** - * @brief Class to iterate Callable with Index of matrix - */ -class IndexIterator -{ -public: - /** - * @brief Construct IndexIterator object with Shape of matrix - * @param[in] shape Shape reference of matrix - */ - IndexIterator(const Shape &shape) : _shape{shape} - { - // DO NOTHING - } - -public: - /** - * @brief Call a function iterated - * @param[in] cb A callback function - * @return Current IndexIterator object - */ - template <typename Callable> IndexIterator &iter(Callable cb) - { - for (uint32_t row = 0; row < _shape.H; ++row) - { - for (uint32_t col = 0; col < _shape.W; ++col) - { - cb(row, col); - } - } - - return (*this); - } - -private: - /** - * @brief Shape for matrix - */ - const Shape _shape; -}; - -/** - * @brief Create an object of IndexIterator for matrix - * @param[in] Shape reference of matrix - * @return Created IndexIterator object - */ -inline IndexIterator iterate(const Shape &shape) { return IndexIterator{shape}; } - -/** - * @brief Call a function iterated using IndexIterator of matrix - * Overloaded operator<< - * @param[in] it An IndexIterator reference - * @param[in] cb A callback function - * @return created IndexIterator object - */ -template <typename Callable> IndexIterator &operator<<(IndexIterator &&it, Callable cb) -{ - return it.iter(cb); -} - -} // namespace matrix -} // namespace misc -} // namespace nnfw - -#endif // __NNFW_MISC_MATRIX_INDEX_ITERATOR_H__ diff --git a/libs/misc/include/misc/matrix/Reader.h b/libs/misc/include/misc/matrix/Reader.h deleted file mode 100644 index ea222c9d1..000000000 --- a/libs/misc/include/misc/matrix/Reader.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file Reader.h - * @brief This file contains Reader class - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __NNFW_MISC_MATRIX_READER_H__ -#define __NNFW_MISC_MATRIX_READER_H__ - -#include <cstdint> - -namespace nnfw -{ -namespace misc -{ -namespace matrix -{ - -/** - * @brief Class reads values of matrix - * The interface class - */ -template <typename T> struct Reader -{ - /** - * @brief Destruct Reader object using default destructor - */ - virtual ~Reader() = default; - - /** - * @brief Get the value used by two indexes - * @param[in] row The height index - * @param[in] col The width index - * @return The value at the offset - */ - virtual T at(uint32_t row, uint32_t col) const = 0; -}; - -} // namespace matrix -} // namespace misc -} // namespace nnfw - -#endif // __NNFW_MISC_MATRIX_READER_H__ diff --git a/libs/misc/include/misc/matrix/Shape.h b/libs/misc/include/misc/matrix/Shape.h deleted file mode 100644 index 8cbcc1e12..000000000 --- a/libs/misc/include/misc/matrix/Shape.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file Shape.h - * @brief This file contains Shape class for matrix - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __NNFW_MISC_MATRIX_SHAPE_H__ -#define __NNFW_MISC_MATRIX_SHAPE_H__ - -#include <cstdint> - -namespace nnfw -{ -namespace misc -{ -namespace matrix -{ - -/** - * @brief Structure to have values of dimensions for matrix - */ -struct Shape -{ - int32_t H; /**< The height value */ - int32_t W; /**< The width value */ - - /** - * @brief Construct Shape object using default constrcutor - */ - Shape() = default; - - /** - * @brief Construct Shape object with two values of dimensions - * @param[in] height The height value - * @param[in] width The width value - */ - Shape(int32_t height, int32_t width) : H{height}, W{width} - { - // DO NOTHING - } -}; - -} // namespace matrix -} // namespace misc -} // namespace nnfw - -#endif // __NNFW_MISC_MATRIX_SHAPE_H__ diff --git a/libs/misc/include/misc/tensor/Comparator.h b/libs/misc/include/misc/tensor/Comparator.h deleted file mode 100644 index 80f53043c..000000000 --- a/libs/misc/include/misc/tensor/Comparator.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file Comparator.h - * @ingroup COM_AI_RUNTIME - * @brief This file contains nnfw::misc::tensor::Comparator class - */ - -#ifndef __NNFW_MISC_TENSOR_COMPARATOR_H__ -#define __NNFW_MISC_TENSOR_COMPARATOR_H__ - -#include "misc/tensor/Index.h" -#include "misc/tensor/Shape.h" -#include "misc/tensor/Reader.h" -#include "misc/tensor/Diff.h" - -#include <functional> - -#include <vector> - -namespace nnfw -{ -namespace misc -{ -namespace tensor -{ - -/** - * @brief Class to compare two tensors (expected and obtained to compare) - */ -class Comparator -{ -public: - /** - * @brief Construct a new @c Comparator object - * @param[in] fn Function that compares two float values - */ - Comparator(const std::function<bool(float lhs, float rhs)> &fn) : _compare_fn{fn} - { - // DO NOTHING - } - -public: - /** - * @brief Struct to observe comparison results - */ - struct Observer - { - /** - * @brief Get notification of comparison result at every index of two tensors - * @param[in] index Index of tensors compared - * @param[in] expected Expected value of element at @c index - * @param[in] obtained Obtained value of element at @c index - * @return N/A - */ - virtual void notify(const Index &index, float expected, float obtained) = 0; - }; - -public: - /** - * @brief Compare two tensors - * @param[in] shape Shape of two tensors - * @param[in] expected @c Reader<float> object that accesses expected tensor - * @param[in] obtained @c Reader<float> object that accesses obtained tensor - * @param[in] observer @c Observer notified of expected value and obtained value at every index - * @return @c std::vector<Diff<float>> containing information of failed comparison - */ - // NOTE Observer should live longer than comparator - std::vector<Diff<float>> compare(const Shape &shape, const Reader<float> &expected, - const Reader<float> &obtained, - Observer *observer = nullptr) const; - -private: - std::function<bool(float lhs, float rhs)> _compare_fn; -}; - -} // namespace tensor -} // namespace misc -} // namespace nnfw - -#endif // __NNFW_MISC_TENSOR_COMPARATOR_H__ diff --git a/libs/misc/include/misc/tensor/Diff.h b/libs/misc/include/misc/tensor/Diff.h deleted file mode 100644 index c41a97987..000000000 --- a/libs/misc/include/misc/tensor/Diff.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file Diff.h - * @ingroup COM_AI_RUNTIME - * @brief This file contains nnfw::misc::tensor::Diff struct - */ - -#ifndef __NNFW_MISC_TENSOR_DIFF_H__ -#define __NNFW_MISC_TENSOR_DIFF_H__ - -#include "misc/tensor/Index.h" - -namespace nnfw -{ -namespace misc -{ -namespace tensor -{ - -/** - * @brief Struct to have information after comparing two elements of two tensors - */ -template <typename T> struct Diff -{ - Index index; /**< Index of elements in two tensors, which turn out to be different */ - - T expected; /**< Expected value of element of first tensor */ - T obtained; /**< Obtained value of element of second tensor */ - - /** - * @brief Construct a new @c Diff object - * @param[in] i Initial value of index - */ - Diff(const Index &i) : index(i) - { - // DO NOTHING - } - - /** - * @brief Construct a new @c Diff object - * @param[in] i Index value - * @param[in] e Expected value of element of first tensor - * @param[in] o Obtained value of element of second tensor - */ - Diff(const Index &i, const T &e, const T &o) : index(i), expected{e}, obtained{o} - { - // DO NOTHING - } -}; - -} // namespace tensor -} // namespace misc -} // namespace nnfw - -#endif // __NNFW_MISC_TENSOR_DIFF_H__ diff --git a/libs/misc/include/misc/tensor/Index.h b/libs/misc/include/misc/tensor/Index.h deleted file mode 100644 index a08d7099e..000000000 --- a/libs/misc/include/misc/tensor/Index.h +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file Index.h - * @ingroup COM_AI_RUNTIME - * @brief This file contains nnfw::misc::tensor::Index struct - */ -#ifndef __NNFW_MISC_TENSOR_INDEX_H__ -#define __NNFW_MISC_TENSOR_INDEX_H__ - -#include <cstdint> -#include <cstddef> - -#include <vector> -#include <initializer_list> - -namespace nnfw -{ -namespace misc -{ -namespace tensor -{ - -/** - * @brief Struct to represent index of each dimension of a tensor - */ -struct Index -{ -public: - /** - * @brief Construct a new @c Index object - * @param[in] rank Rank of a tensor - */ - Index(size_t rank) { _offsets.resize(rank); } - -public: - /** - * @brief Construct a new @c Index object - * @param[in] offsets Rank of a tensor of @c std::initializer_list<int32_t> type - */ - Index(std::initializer_list<int32_t> offsets) : _offsets{offsets} - { - // DO NOTHING - } - -public: - /** - * @brief Get the rank - * @return Rank that this @c Index object can handle - */ - size_t rank(void) const { return _offsets.size(); } - -public: - /** - * @brief Get the index n'th dimension - * @param[in] n Dimension - * @return index of n'th dimension - */ - int32_t at(size_t n) const { return _offsets.at(n); } - - /** - * @brief Get the reference of the index n'th dimension - * @param[in] n Dimension - * @return reference of index of n'th dimension - */ - int32_t &at(size_t n) { return _offsets.at(n); } - -private: - std::vector<int32_t> _offsets; -}; - -/** - * @brief Copy an @c Index with reversed order - * @param[in] origin @c Index object to copy - * @return an @c Index object with reversed order - * @note This is used to convert NNAPI tensor index to ARM tensor index or vice versa - */ -inline static Index copy_reverse(const Index &origin) -{ - size_t rank = origin.rank(); - Index target(rank); - for (int i = 0; i < rank; i++) - target.at(i) = origin.at(rank - 1 - i); - return target; -} - -} // namespace tensor -} // namespace misc -} // namespace nnfw - -#endif // __NNFW_MISC_TENSOR_INDEX_H__ diff --git a/libs/misc/include/misc/tensor/IndexEnumerator.h b/libs/misc/include/misc/tensor/IndexEnumerator.h deleted file mode 100644 index 4912ea289..000000000 --- a/libs/misc/include/misc/tensor/IndexEnumerator.h +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file IndexEnumerator.h - * @ingroup COM_AI_RUNTIME - * @brief This file contains nnfw::misc::tensor::IndexEnumerator class - */ - -#ifndef __NNFW_MISC_TENSOR_INDEX_ENUMERATOR_H__ -#define __NNFW_MISC_TENSOR_INDEX_ENUMERATOR_H__ - -#include "misc/tensor/Shape.h" -#include "misc/tensor/Index.h" - -namespace nnfw -{ -namespace misc -{ -namespace tensor -{ -/** - * @brief Class to enumerate index of a tensor - * - */ -class IndexEnumerator -{ -public: - /** - * @brief Construct a new @c IndexEnumerator object - * @param[in] shape Shape of tensor of which index will be enumerate - */ - explicit IndexEnumerator(const Shape &shape) : _shape(shape), _index(shape.rank()), _cursor(0) - { - const size_t rank = _shape.rank(); - - for (size_t axis = 0; axis < rank; ++axis) - { - _index.at(axis) = 0; - } - - for (_cursor = 0; _cursor < rank; ++_cursor) - { - if (_index.at(_cursor) < _shape.dim(_cursor)) - { - break; - } - } - } - -public: - /** - * @brief Prevent constructing @c IndexEnumerator object by using R-value reference - */ - IndexEnumerator(IndexEnumerator &&) = delete; - /** - * @brief Prevent copy constructor - */ - IndexEnumerator(const IndexEnumerator &) = delete; - -public: - /** - * @brief Check if more enumeration is available - * @return @c true if more @c advance() is available, otherwise @c false - */ - bool valid(void) const { return _cursor < _shape.rank(); } - -public: - /** - * @brief Get the current index to enumerate - * @return Current index - */ - const Index &curr(void) const { return _index; } - -public: - /** - * @brief Advance index by +1 - */ - void advance(void) - { - const size_t rank = _shape.rank(); - - // Find axis to be updated - while ((_cursor < rank) && !(_index.at(_cursor) + 1 < _shape.dim(_cursor))) - { - ++_cursor; - } - - if (_cursor == rank) - { - return; - } - - // Update index - _index.at(_cursor) += 1; - - for (size_t axis = 0; axis < _cursor; ++axis) - { - _index.at(axis) = 0; - } - - // Update cursor - _cursor = 0; - } - -public: - const Shape _shape; //!< Shape to enumerate - -private: - size_t _cursor; - Index _index; -}; - -} // namespace tensor -} // namespace misc -} // namespace nnfw - -#endif // __NNFW_MISC_TENSOR_INDEX_ENUMERATOR_H__ diff --git a/libs/misc/include/misc/tensor/IndexFormatter.h b/libs/misc/include/misc/tensor/IndexFormatter.h deleted file mode 100644 index 7ae34eec1..000000000 --- a/libs/misc/include/misc/tensor/IndexFormatter.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file IndexFormatter.h - * @ingroup COM_AI_RUNTIME - * @brief This file contains nnfw::misc::tensor::IndexFormatter class - */ - -#ifndef __NNFW_MISC_TENSOR_INDEX_FORMATTER_H__ -#define __NNFW_MISC_TENSOR_INDEX_FORMATTER_H__ - -#include "misc/tensor/Index.h" - -#include <ostream> - -namespace nnfw -{ -namespace misc -{ -namespace tensor -{ - -/** - * @brief Class to send @c Index object to output stream - */ -class IndexFormatter -{ -public: - /** - * @brief Construct a new @c IndexFormatter object - * @param[in] index index to be sent to output stream - */ - IndexFormatter(const nnfw::misc::tensor::Index &index) : _index(index) - { - // DO NOTHING - } - -public: - /** - * @brief Get an @c Index object - * @return @c Index object previously passed to the constructor - */ - const nnfw::misc::tensor::Index &index(void) const { return _index; } - -private: - const nnfw::misc::tensor::Index &_index; -}; - -/** - * @brief Send @c IndexFormatter object to output stream - * @param[in] os Output stream - * @param[in] fmt @c IndexFormatter object that is sent to output stream - * @return Output stream - */ -std::ostream &operator<<(std::ostream &os, const IndexFormatter &fmt); - -} // namespace tensor -} // namespace misc -} // namespace nnfw - -#endif // __NNFW_MISC_TENSOR_INDEX_FORMATTER_H__ diff --git a/libs/misc/include/misc/tensor/IndexIterator.h b/libs/misc/include/misc/tensor/IndexIterator.h deleted file mode 100644 index f6428e19e..000000000 --- a/libs/misc/include/misc/tensor/IndexIterator.h +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file IndexIterator.h - * @ingroup COM_AI_RUNTIME - * @brief This file contains nnfw::misc::tensor::IndexIterator class and - * helper function and operator - */ -#ifndef __NNFW_MISC_TENSOR_INDEX_ITERATOR_H__ -#define __NNFW_MISC_TENSOR_INDEX_ITERATOR_H__ - -#include "misc/tensor/Shape.h" -#include "misc/tensor/Index.h" -#include "misc/tensor/IndexEnumerator.h" - -namespace nnfw -{ -namespace misc -{ -namespace tensor -{ - -/** - * @brief Class to iterate indexes available for given shape - */ -class IndexIterator -{ -public: - /** - * @brief Construct a new @c IndexIterator object - * @param[in] shape Shape of tensor of which index will be iterated - */ - IndexIterator(const Shape &shape) : _shape(shape) - { - // DO NOTHING - } - -public: - /** - * @brief Construct a new IndexIterator object using reference - * @param[in] IndexIterator @c IndexIterator object to move - */ - IndexIterator(IndexIterator &&) = default; - - /** - * @brief Prevent copy constructor - */ - IndexIterator(const IndexIterator &) = delete; - -public: - /** - * @brief Iterate all available indexes and run a function for each index - * @param[in] fn Function that requires an index as a parameter. - * @return @c IndexIterator object - */ - template <typename Callable> IndexIterator &iter(Callable fn) - { - for (IndexEnumerator e{_shape}; e.valid(); e.advance()) - { - fn(e.curr()); - } - - return (*this); - } - -private: - const Shape &_shape; -}; - -/** - * @brief Get an @c IndexItator object - * @param[in] shape Shape of tensor of which index will be iterated - * @return @c IndexIterator object - */ -inline IndexIterator iterate(const Shape &shape) { return IndexIterator{shape}; } - -/** - * @brief Iterate all indexes and apply a function - * @param[in] it @c IndexIterator object that is constructed with a tensor shape - * @param[in] cb A function that will receive a specific index. - * Inside the function, the index is used to manipulate tensor element. - * @return @c IndexIterator object - */ -template <typename Callable> IndexIterator &operator<<(IndexIterator &&it, Callable cb) -{ - return it.iter(cb); -} - -} // namespace tensor -} // namespace misc -} // namespace nnfw - -#endif // __NNFW_MISC_TENSOR_INDEX_ITERATOR_H__ diff --git a/libs/misc/include/misc/tensor/NonIncreasingStride.h b/libs/misc/include/misc/tensor/NonIncreasingStride.h deleted file mode 100644 index e7ad0857b..000000000 --- a/libs/misc/include/misc/tensor/NonIncreasingStride.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file NonIncreasingStride.h - * @ingroup COM_AI_RUNTIME - * @brief This file contains nnfw::misc::tensor::NonIncreasingStride class - */ -#ifndef __NNFW_MISC_TENSOR_NON_INCREASING_STRIDE_H__ -#define __NNFW_MISC_TENSOR_NON_INCREASING_STRIDE_H__ - -#include "misc/tensor/Shape.h" -#include "misc/tensor/Index.h" - -#include <vector> - -namespace nnfw -{ -namespace misc -{ -namespace tensor -{ - -/** - * @brief Class to represent strides where stride[N-1] >= stride[N] holds for all N < rank - */ -class NonIncreasingStride -{ -public: - /** - * @brief Initialize the stride data using @c Shape - * @param[in] shape to build stride info - * @return N/A - */ - void init(const Shape &shape) - { - _stride.resize(shape.rank()); - _stride.at(shape.rank() - 1) = 1; - - for (uint32_t axis = shape.rank() - 1; axis > 0; --axis) - { - _stride.at(axis - 1) = _stride.at(axis) * shape.dim(axis); - } - } - -public: - /** - * @brief Get an stride value for specific axis - * @param[in] axis Axis of stride - * @return The value of stride - */ - uint32_t at(uint32_t axis) const { return _stride.at(axis); } - -public: - /** - * @brief Get the 1-D offset of specified index for n-D tensor - * @param index @c Index object - * @return 1-D offset of index - */ - uint32_t offset(const Index &index) const; - -private: - std::vector<uint32_t> _stride; -}; - -} // namespace tensor -} // namespace misc -} // namespace nnfw - -#endif // __NNFW_MISC_TENSOR_NON_INCREASING_STRIDE_H__ diff --git a/libs/misc/include/misc/tensor/Object.h b/libs/misc/include/misc/tensor/Object.h deleted file mode 100644 index 83fbc0bd1..000000000 --- a/libs/misc/include/misc/tensor/Object.h +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file Object.h - * @ingroup COM_AI_RUNTIME - * @brief This file contains nnfw::misc::tensor::Object class - */ - -#ifndef __NNFW_MISC_TENSOR_OBJECT_H__ -#define __NNFW_MISC_TENSOR_OBJECT_H__ - -#include "misc/tensor/Shape.h" -#include "misc/tensor/Index.h" -#include "misc/tensor/IndexIterator.h" -#include "misc/tensor/NonIncreasingStride.h" -#include "misc/tensor/Reader.h" - -#include <vector> - -namespace nnfw -{ -namespace misc -{ -namespace tensor -{ - -/** - * @brief Class to build a tensor using specific generator - * @tparam T Type of tensor element - */ - -template <typename T> class Object final : public Reader<T> -{ -public: - /** - * @brief Function to generate tensor element - */ - using Generator = std::function<T(const Shape &shape, const Index &index)>; - -public: - /** - * @brief Construct a new @c Object object - * @param[in] shape Tensor shape - * @param[in] fn Function to generate tensor elements - */ - Object(const Shape &shape, const Generator &fn) : _shape{shape} - { - // Set 'stride' - _stride.init(shape); - - // Pre-allocate buffer - _values.resize(_shape.dim(0) * _stride.at(0)); - - // Set 'value' - iterate(_shape) << - [this, &fn](const Index &index) { _values.at(_stride.offset(index)) = fn(_shape, index); }; - } - -public: - /** - * @brief Get reference of shape - * @return Reference of shape - */ - const Shape &shape(void) const { return _shape; } - -public: - /** - * @brief Get and element of tensor - * @param[in] index Index of a tensor element - * @return Value of tensor element - */ - T at(const Index &index) const override { return _values.at(_stride.offset(index)); } - -private: - Shape _shape; - NonIncreasingStride _stride; - -private: - std::vector<T> _values; -}; - -} // namespace tensor -} // namespace misc -} // namespace nnfw - -#endif // __NNFW_MISC_FEATURE_OBJECT_H__ diff --git a/libs/misc/include/misc/tensor/Reader.h b/libs/misc/include/misc/tensor/Reader.h deleted file mode 100644 index 9175a913e..000000000 --- a/libs/misc/include/misc/tensor/Reader.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file Reader.h - * @ingroup COM_AI_RUNTIME - * @brief This file contains nnfw::misc::tensor::Reader struct - */ - -#ifndef __NNFW_MISC_TENSOR_READER_H__ -#define __NNFW_MISC_TENSOR_READER_H__ - -#include "misc/tensor/Index.h" - -namespace nnfw -{ -namespace misc -{ -namespace tensor -{ - -/** - * @brief Struct to read element of tensor - * @tparam T Type of elements in tensor - */ -template <typename T> struct Reader -{ - /** - * @brief Destroy the Reader object - */ - virtual ~Reader() = default; - - /** - * @brief Get an element of tensor - * @param[in] index Index specifying indexes of tensor element - * @return The value of specificed element - */ - virtual T at(const Index &index) const = 0; -}; - -} // namespace tensor -} // namespace misc -} // namespace nnfw - -#endif // __NNFW_MISC_TENSOR_READER_H__ diff --git a/libs/misc/include/misc/tensor/Shape.h b/libs/misc/include/misc/tensor/Shape.h deleted file mode 100644 index 6e6c23502..000000000 --- a/libs/misc/include/misc/tensor/Shape.h +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file Shape.h - * @ingroup COM_AI_RUNTIME - * @brief This file contains nnfw::misc::tensor::Shape class - */ - -#ifndef __NNFW_MISC_TENSOR_SHAPE_H__ -#define __NNFW_MISC_TENSOR_SHAPE_H__ - -#include <cstdint> -#include <cstddef> -#include <deque> -#include <initializer_list> -#include <ostream> -#include <string> - -namespace nnfw -{ -namespace misc -{ -namespace tensor -{ - -/** - * @brief Class to represent shape of a tensor - */ -class Shape -{ -public: - /** - * @brief Construct a new Shape object - * @param[in] rank Rank of a tensor - */ - Shape(size_t rank) { _dimensions.resize(rank); } - -public: - /** - * @brief Construct a new Shape object - * @param[in] dimensions @c initializer_list<int32_t> of dimensions of tensor - */ - Shape(const std::initializer_list<int32_t> &dimensions) : _dimensions{dimensions} - { - // DO NOTHING - } - - /** - * @brief Construct a new Shape object - * @param[in] origin @c Shape object to copy - */ - Shape(const Shape &origin) = default; - -public: - /** - * @brief Add dimension to the beginning - * @param[in] d dimension to add to the beginning - * @return N/A - */ - void prepend(int32_t d) { _dimensions.emplace_front(d); } - - /** - * @brief Add dimension to the back - * @param[in] d dimension to add to the back - * @return N/A - */ - void append(int32_t d) { _dimensions.emplace_back(d); } - -public: - /** - * @brief Get the rank of this shape - * @return rank - */ - size_t rank(void) const { return _dimensions.size(); } - -public: - /** - * @brief Get specific dimension - * @param[in] n Index of dimension - * @return n'th dimension - */ - int32_t dim(size_t n) const { return _dimensions.at(n); } - - /** - * @brief Get the reference of specific dimension - * @param[in] n Index of dimension - * @return Reference of n'th dimension - */ - int32_t &dim(size_t n) { return _dimensions.at(n); } - -public: - /** - * @brief Get the number of elements specified by this shape - * @return The number of elements - */ - size_t element_nums() const - { - size_t nums = 1; - for (auto d : _dimensions) - { - nums *= d; - } - return nums; - } - -private: - std::deque<int32_t> _dimensions; - -public: - /** - * @brief Get a @c Shape object after parsing string - * @param[in] s String of dimension list. Accepted format is numbers separated by comma. - * @return @c Shape object - */ - static Shape from(const std::string &s); -}; - -/** - * @brief Check equality of two @c Shape - * @param[in] Shape First shape to compare - * @param[in] Shape Second shape to compare - * @return @c true if both shapes are equal, otherwise @c false - */ -bool operator==(const Shape &, const Shape &); - -/** - * @brief Send @c Shape to @c std::ostream - * @param[in] os @c std::ostream to process this @c Shape - * @param[in] shape @c Shape to send to @c ostream - * @return Reference of @c std::ostream - */ -std::ostream &operator<<(std::ostream &os, const Shape &shape); - -} // namespace tensor -} // namespace misc -} // namespace nnfw - -#endif // __NNFW_MISC_TENSOR_SHAPE_H__ diff --git a/libs/misc/include/misc/tensor/Zipper.h b/libs/misc/include/misc/tensor/Zipper.h deleted file mode 100644 index 8f0ec4ab6..000000000 --- a/libs/misc/include/misc/tensor/Zipper.h +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file Zipper.h - * @ingroup COM_AI_RUNTIME - * @brief This file contains nnfw::misc::tensor::Zipper class - */ - -#ifndef __NNFW_MISC_TENSOR_ZIPPER_H__ -#define __NNFW_MISC_TENSOR_ZIPPER_H__ - -#include "misc/tensor/Index.h" -#include "misc/tensor/IndexIterator.h" -#include "misc/tensor/Reader.h" - -namespace nnfw -{ -namespace misc -{ -namespace tensor -{ - -/** - * @brief Class to apply a function with three params: @c Index, elements of a tensor - * at passed index read by @c Reader objects - */ -template <typename T> class Zipper -{ -public: - /** - * @brief Construct a new @c Zipper object - * @param[in] shape Shape of @c lhs and @c rhs - * @param[in] lhs @c Reader object of a tensor - * @param[in] rhs @c Reader object of a tensor - */ - Zipper(const Shape &shape, const Reader<T> &lhs, const Reader<T> &rhs) - : _shape{shape}, _lhs{lhs}, _rhs{rhs} - { - // DO NOTHING - } - -public: - /** - * @brief Apply @c cb to all elements of tensors. Elements of two tensors - * at passed @c index are read by @c lhs and @c rhs - * @param[in] cb Function to apply - * @return N/A - */ - template <typename Callable> void zip(Callable cb) const - { - iterate(_shape) << - [this, &cb](const Index &index) { cb(index, _lhs.at(index), _rhs.at(index)); }; - } - -private: - const Shape &_shape; - const Reader<T> &_lhs; - const Reader<T> &_rhs; -}; - -/** - * @brief Apply @c cb by using @c lhs and @c rhs passed to the constructor of @c zipper - * @param[in] zipper @c Zipper object - * @param[in] cb Function to zpply using @c zip function - * @return @c zipper object after applying @c cb to @c zipper - */ -template <typename T, typename Callable> -const Zipper<T> &operator<<(const Zipper<T> &zipper, Callable cb) -{ - zipper.zip(cb); - return zipper; -} - -/** - * @brief Get @c Zipper object constructed using passed params - * @param shape Shape of @c lhs and @c rhs - * @param lhs @c Reader object of a tensor - * @param rhs @c Reader object of a tensor - * @return @c Zipper object - */ -template <typename T> Zipper<T> zip(const Shape &shape, const Reader<T> &lhs, const Reader<T> &rhs) -{ - return Zipper<T>{shape, lhs, rhs}; -} - -} // namespace tensor -} // namespace misc -} // namespace nnfw - -#endif // __NNFW_MISC_TENSOR_ZIPPER_H__ diff --git a/libs/misc/include/misc/vector.h b/libs/misc/include/misc/vector.h deleted file mode 100644 index 395b08912..000000000 --- a/libs/misc/include/misc/vector.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file vector.h - * @ingroup COM_AI_RUNTIME - * @brief This file contains @c == operator to check equality of elements in two vectors - */ -#ifndef __NNFW_MISC_VECTOR_H__ -#define __NNFW_MISC_VECTOR_H__ - -#include <vector> - -/** - * @brief Compare elements of two vectors - * @tparam T Type of elements in vectors - * @param[in] lhs First vector to compare - * @param[in] rhs Second vector to compare - * @return @c true if all elements are equal, otherwise @c false. - */ -template <typename T> bool operator==(const std::vector<T> &lhs, const std::vector<T> &rhs) -{ - if (lhs.size() != rhs.size()) - { - return false; - } - - for (size_t ind = 0; ind < lhs.size(); ++ind) - { - if (lhs.at(ind) != rhs.at(ind)) - { - return false; - } - } - - return true; -} - -#endif // __NNFW_MISC_VECTOR_H__ diff --git a/libs/misc/include/misc/vector/Object.h b/libs/misc/include/misc/vector/Object.h deleted file mode 100644 index 65d4bc613..000000000 --- a/libs/misc/include/misc/vector/Object.h +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file Object.h - * @brief This file contains Object class - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __NNFW_MISC_VECTOR_OBJECT_H__ -#define __NNFW_MISC_VECTOR_OBJECT_H__ - -#include "misc/vector/Reader.h" - -#include <vector> -#include <functional> - -namespace nnfw -{ -namespace misc -{ -namespace vector -{ - -/** - * @brief Class to have information of the operand for vector - */ -template <typename T> class Object final : public Reader<T> -{ -public: - using Generator = std::function<T(int32_t size, int32_t offset)>; - -public: - /** - * @brief Construct Object object with size of vector and set value used by Generator - * @param[in] size The size of vector - * @param[in] gen A function to set values of operand tensor - */ - Object(int32_t size, const Generator &gen) : _size{size} - { - _value.resize(_size); - - for (int32_t offset = 0; offset < size; ++offset) - { - _value.at(offset) = gen(size, offset); - } - } - -public: - /** - * @brief Get size of vector - * @return Size of vector - */ - int32_t size(void) const { return _size; } - -public: - /** - * @brief Get the value used by index - * @param[in] nth The vector index - * @return The value at the offset - */ - T at(uint32_t nth) const override { return _value.at(nth); } - -private: - /** - * @brief Size of vector - */ - const int32_t _size; - /** - * @brief The tensor vector of operand - */ - std::vector<T> _value; -}; - -} // namespace vector -} // namespace misc -} // namespace nnfw - -#endif // __NNFW_MISC_VECTOR_OBJECT_H__ diff --git a/libs/misc/include/misc/vector/Reader.h b/libs/misc/include/misc/vector/Reader.h deleted file mode 100644 index eab4c427b..000000000 --- a/libs/misc/include/misc/vector/Reader.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file Reader.h - * @brief This file contains Reader class - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __NNFW_MISC_VECTOR_READER_H__ -#define __NNFW_MISC_VECTOR_READER_H__ - -#include <cstdint> - -namespace nnfw -{ -namespace misc -{ -namespace vector -{ - -/** - * @brief Class reads values of vector - * The interface class - */ -template <typename T> struct Reader -{ - /** - * @brief Destruct Reader object using default destructor - */ - virtual ~Reader() = default; - - /** - * @brief Get the value used by the index - * @param[in] nth The vector index - * @return The value at the offset - */ - virtual T at(uint32_t nth) const = 0; -}; - -} // namespace vector -} // namespace misc -} // namespace nnfw - -#endif // __NNFW_MISC_VECTOR_READER_H__ diff --git a/libs/misc/src/environment.cpp b/libs/misc/src/environment.cpp deleted file mode 100644 index e39f18d62..000000000 --- a/libs/misc/src/environment.cpp +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include <string.h> -#include <cstdlib> -#include <string> - -#include "misc/environment.h" - -namespace nnfw -{ -namespace misc -{ - -int get_env_int(const char *name, int defaultValue) -{ - const char *value = std::getenv(name); - if (value != nullptr) - return std::stoi(value); - return defaultValue; -} - -bool get_env_bool(const char *name, bool defaultValue) -{ - const char *value = std::getenv(name); - if (value != nullptr) - { - return std::stoi(value) != 0; - } - - return defaultValue; -} - -} // namespace misc -} // namespace nnfw - -namespace nnfw -{ -namespace misc -{ -namespace env -{ - -IntAccessor::IntAccessor(const std::string &tag) : _tag{tag} -{ - // DO NOTHING -} - -bool IntAccessor::access(int &out) const -{ - auto value = std::getenv(_tag.c_str()); - - if (value == nullptr) - { - return false; - } - - out = std::stoi(value); - return true; -} - -FloatAccessor::FloatAccessor(const std::string &tag) : _tag{tag} -{ - // DO NOTHING -} - -bool FloatAccessor::access(float &out) const -{ - auto value = std::getenv(_tag.c_str()); - - if (value == nullptr) - { - return false; - } - - out = std::stof(value); - return true; -} - -} // namespace env -} // namespace misc -} // namespace nnfw diff --git a/libs/misc/src/tensor/Comparator.cpp b/libs/misc/src/tensor/Comparator.cpp deleted file mode 100644 index 013c9eed2..000000000 --- a/libs/misc/src/tensor/Comparator.cpp +++ /dev/null @@ -1,40 +0,0 @@ -#include "misc/tensor/Comparator.h" -#include "misc/tensor/Zipper.h" - -#include "misc/fp32.h" - -namespace nnfw -{ -namespace misc -{ -namespace tensor -{ - -std::vector<Diff<float>> Comparator::compare(const Shape &shape, const Reader<float> &expected, - const Reader<float> &obtained, - Observer *observer) const -{ - std::vector<Diff<float>> res; - - zip(shape, expected, obtained) << - [&](const Index &index, float expected_value, float obtained_value) { - const auto relative_diff = nnfw::misc::fp32::relative_diff(expected_value, obtained_value); - - if (!_compare_fn(expected_value, obtained_value)) - { - res.emplace_back(index, expected_value, obtained_value); - } - - // Update max_diff_index, if necessary - if (observer != nullptr) - { - observer->notify(index, expected_value, obtained_value); - } - }; - - return res; -} - -} // namespace tensor -} // namespace misc -} // namespace nnfw diff --git a/libs/misc/src/tensor/IndexFormatter.cpp b/libs/misc/src/tensor/IndexFormatter.cpp deleted file mode 100644 index c949db7a8..000000000 --- a/libs/misc/src/tensor/IndexFormatter.cpp +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "misc/tensor/IndexFormatter.h" - -#include <cassert> - -namespace nnfw -{ -namespace misc -{ -namespace tensor -{ - -std::ostream &operator<<(std::ostream &os, const IndexFormatter &fmt) -{ - const auto rank = fmt.index().rank(); - - assert(rank > 0); - - os << fmt.index().at(0); - - if (rank > 1) - { - for (uint32_t axis = 1; axis < rank; ++axis) - { - os << ", " << fmt.index().at(axis); - } - } - - return os; -} - -} // namespace tensor -} // namespace misc -} // namespace nnfw diff --git a/libs/misc/src/tensor/NonIncreasingStride.cpp b/libs/misc/src/tensor/NonIncreasingStride.cpp deleted file mode 100644 index c51ad0324..000000000 --- a/libs/misc/src/tensor/NonIncreasingStride.cpp +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "misc/tensor/NonIncreasingStride.h" - -#include <cassert> - -namespace nnfw -{ -namespace misc -{ -namespace tensor -{ - -uint32_t NonIncreasingStride::offset(const Index &index) const -{ - const size_t rank = _stride.size(); - - assert(index.rank() == rank); - - uint32_t offset = 0; - - for (size_t axis = 0; axis < rank; ++axis) - { - offset += _stride.at(axis) * index.at(axis); - } - - return offset; -} - -} // namespace tensor -} // namespace misc -} // namespace nnfw diff --git a/libs/misc/src/tensor/Shape.cpp b/libs/misc/src/tensor/Shape.cpp deleted file mode 100644 index 675695e8e..000000000 --- a/libs/misc/src/tensor/Shape.cpp +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "misc/tensor/Shape.h" - -#include <cassert> - -namespace nnfw -{ -namespace misc -{ -namespace tensor -{ - -bool operator==(const Shape &lhs, const Shape &rhs) -{ - if (lhs.rank() != rhs.rank()) - { - return false; - } - - for (size_t axis = 0; axis < lhs.rank(); ++axis) - { - if (lhs.dim(axis) != rhs.dim(axis)) - { - return false; - } - } - - return true; -} - -Shape Shape::from(const std::string &str) -{ - Shape shape(0); - - bool pending = false; - int value = 0; - - for (const char *cur = str.c_str(); true; ++cur) - { - if (*cur == ',' || *cur == '\0') - { - if (pending) - { - shape.append(value); - } - - if (*cur == '\0') - { - break; - } - - pending = false; - value = 0; - continue; - } - - assert(*cur >= '0' && *cur <= '9'); - - pending = true; - value *= 10; - value += *cur - '0'; - } - - return shape; -} - -std::ostream &operator<<(std::ostream &os, const Shape &shape) -{ - if (shape.rank() > 0) - { - os << shape.dim(0); - - for (uint32_t axis = 1; axis < shape.rank(); ++axis) - { - os << "," << shape.dim(axis); - } - } - - return os; -} - -} // namespace tensor -} // namespace misc -} // namespace nnfw diff --git a/libs/profiling/CMakeLists.txt b/libs/profiling/CMakeLists.txt deleted file mode 100644 index 7169508a1..000000000 --- a/libs/profiling/CMakeLists.txt +++ /dev/null @@ -1,5 +0,0 @@ -file(GLOB_RECURSE SOURCES "src/*.cpp") - -add_library(nnfw_lib_profiling STATIC ${SOURCES}) -set_property(TARGET nnfw_lib_profiling PROPERTY POSITION_INDEPENDENT_CODE ON) -target_include_directories(nnfw_lib_profiling PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include) diff --git a/libs/profiling/include/profiling/profile_buffer.h b/libs/profiling/include/profiling/profile_buffer.h deleted file mode 100644 index 83cd3eb2b..000000000 --- a/libs/profiling/include/profiling/profile_buffer.h +++ /dev/null @@ -1,170 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -// NOTE To minimize diff with upstream tensorflow, disable clang-format -// clang-format off - -// NOTE This header is derived from the following file (in TensorFlow v1.12) -// 'externals/tensorflow/tensorflow/contrib/lite/profiling/profile_buffer.h -#ifndef TENSORFLOW_CONTRIB_LITE_PROFILING_PROFILE_BUFFER_H_ -#define TENSORFLOW_CONTRIB_LITE_PROFILING_PROFILE_BUFFER_H_ - -#include <cstddef> -#include <cstdint> - -#include "profiling/time.h" - -namespace tflite { -namespace profiling { - -// A profiling event. -struct ProfileEvent { - // Describes the type of event. - // The event_metadata field may contain additional data for interpreting - // the event. - enum class EventType { - // Default event type, the metadata field has no special significance. - DEFAULT = 0, - // The event is an operator invocation and the event_metadata field is the - // index of operator node. - OPERATOR_INVOKE_EVENT = 1 - }; - - // Label of the event. This usually describes the event. - const char* tag; - // Timestamp in microseconds when the event began. - uint64_t begin_timestamp_us; - // Timestamp in microseconds when the event ended. - uint64_t end_timestamp_us; - // The field containing the type of event. This must be one of the event types - // in EventType. - EventType event_type; - // Extra data describing the details of the event. - uint32_t event_metadata; -}; -} // namespace profiling -} // namespace tflite - -#ifdef TFLITE_PROFILING_ENABLED - -#include <sys/time.h> -#include <vector> - -namespace tflite { -namespace profiling { -constexpr uint32_t kInvalidEventHandle = static_cast<uint32_t>(~0) - 1; - -// A ring buffer of profile events. -// This class is not thread safe. -class ProfileBuffer { - public: - ProfileBuffer(uint32_t max_num_entries, bool enabled) - : enabled_(enabled), current_index_(0), event_buffer_(max_num_entries) {} - - // Adds an event to the buffer with begin timestamp set to the current - // timestamp. Returns a handle to event that can be used to call EndEvent. If - // buffer is disabled this has no affect. - // The tag of the event should remain valid till the buffer is valid. - uint32_t BeginEvent(const char* tag, ProfileEvent::EventType event_type, - uint32_t event_metadata) { - if (!enabled_) { - return kInvalidEventHandle; - } - uint64_t timestamp = time::NowMicros(); - int index = current_index_ % event_buffer_.size(); - event_buffer_[index].tag = tag; - event_buffer_[index].event_type = event_type; - event_buffer_[index].event_metadata = event_metadata; - event_buffer_[index].begin_timestamp_us = timestamp; - event_buffer_[index].end_timestamp_us = 0; - current_index_++; - return index; - } - - // Sets the enabled state of buffer to |enabled| - void SetEnabled(bool enabled) { enabled_ = enabled; } - - // Sets the end timestamp for event for the handle to current time. - // If the buffer is disabled or previous event has been overwritten this - // operation has not effect. - void EndEvent(uint32_t event_handle) { - if (!enabled_ || event_handle == kInvalidEventHandle || - event_handle > current_index_) { - return; - } - const uint32_t max_size = event_buffer_.size(); - if (current_index_ > (max_size + event_handle)) { - // Ignore, buffer has already overflowed. - return; - } - - int event_index = event_handle % max_size; - event_buffer_[event_index].end_timestamp_us = time::NowMicros(); - } - - // Returns the size of the buffer. - size_t Size() const { - return (current_index_ >= event_buffer_.size()) ? event_buffer_.size() - : current_index_; - } - - // Resets the buffer. - void Reset() { - enabled_ = false; - current_index_ = 0; - } - - // Returns the profile event at the given index. If the index is invalid a - // nullptr is returned. The return event may get overwritten if more events - // are added to buffer. - const struct ProfileEvent* const At(int index) const { - size_t size = Size(); - if (index >= size) { - return nullptr; - } - const uint32_t max_size = event_buffer_.size(); - uint32_t start = - (current_index_ > max_size) ? current_index_ % max_size : max_size; - index = (index + start) % max_size; - return &event_buffer_[index]; - } - - private: - bool enabled_; - uint32_t current_index_; - std::vector<ProfileEvent> event_buffer_; -}; -} // namespace profiling -} // namespace tflite -#endif // TFLITE_PROFILING_ENABLED -#endif // TENSORFLOW_CONTRIB_LITE_PROFILING_PROFILE_BUFFER_H_ - -// clang-format on diff --git a/libs/profiling/include/profiling/profiler.h b/libs/profiling/include/profiling/profiler.h deleted file mode 100644 index 953042da3..000000000 --- a/libs/profiling/include/profiling/profiler.h +++ /dev/null @@ -1,203 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -// NOTE To minimize diff with upstream tensorflow, disable clang-format -// clang-format off - -// NOTE This header is derived from the following file (in TensorFlow v1.12) -// 'externals/tensorflow/tensorflow/contrib/lite/profiling/profiler.h -#ifndef TENSORFLOW_CONTRIB_LITE_PROFILING_PROFILER_H_ -#define TENSORFLOW_CONTRIB_LITE_PROFILING_PROFILER_H_ - -#include <vector> - -#include "profiling/profile_buffer.h" - -#ifdef TFLITE_PROFILING_ENABLED - -namespace tflite { -namespace profiling { -class ScopedProfile; -class ScopedOperatorProfile; - -// Controls whether profiling is enabled or disabled and collects profiles. -// TFLite is used on platforms that don't have posix threads, so the profiler is -// kept as simple as possible. It is designed to be used only on a single -// thread. -// -// Profiles are collected using Scoped*Profile objects that begin and end a -// profile event. -// An example usage is shown in the example below: -// -// Say Worker class has a DoWork method and we are interested in profiling -// the overall execution time for DoWork and time spent in Task1 and Task2 -// functions. -// -// class Worker { -// public: -// void DoWork() { -// ScopedProfile(&controller, "DoWork"); -// Task1(); -// Task2(); -// ..... -// } -// -// void Task1() { -// ScopedProfile(&controller, "Task1"); -// .... -// } -// -// void Task2() { -// ScopedProfile(&controller, "Task2"); -// } -// -// Profiler profiler; -// } -// -// We instrument the functions that need to be profiled. -// -// Profile can be collected by enable profiling and then getting profile -// events. -// -// void ProfileWorker() { -// Worker worker; -// worker.profiler.EnableProfiling(); -// worker.DoWork(); -// worker.profiler.DisableProfiling(); -// // Profiling is complete, extract profiles. -// auto profile_events = worker.profiler.GetProfiles(); -// } -// -// -class Profiler { - public: - Profiler() : buffer_(1024, false) {} - - void StartProfiling() { buffer_.SetEnabled(true); } - void StopProfiling() { buffer_.SetEnabled(false); } - void Reset() { buffer_.Reset(); } - std::vector<const ProfileEvent*> GetProfileEvents() { - std::vector<const ProfileEvent*> profile_events; - profile_events.reserve(buffer_.Size()); - for (size_t i = 0; i < buffer_.Size(); i++) { - profile_events.push_back(buffer_.At(i)); - } - return profile_events; - } - - private: - friend class ScopedProfile; - friend class ScopedOperatorProfile; - ProfileBuffer* GetProfileBuffer() { return &buffer_; } - ProfileBuffer buffer_; -}; - -class ScopedProfile { - public: - // Adds a profile event to profile that begins with the construction - // of object and ends when the object goes out of scope. - // The lifetime of tag should be at least the lifetime of profiler. - - ScopedProfile(Profiler* profiler, const char* tag) - : buffer_(nullptr), event_handle_(0) { - if (profiler) { - buffer_ = profiler->GetProfileBuffer(); - event_handle_ = - buffer_->BeginEvent(tag, ProfileEvent::EventType::DEFAULT, 0); - } - } - ~ScopedProfile() { - if (buffer_) { - buffer_->EndEvent(event_handle_); - } - } - - private: - ProfileBuffer* buffer_; - int32_t event_handle_; -}; - -class ScopedOperatorProfile { - public: - // Adds a profile event to profile that begins with the construction - // of object and ends when the object goes out of scope. - // The lifetime of tag should be at least the lifetime of profiler. - ScopedOperatorProfile(Profiler* profiler, const char* tag, int node_index) - : buffer_(nullptr), event_handle_(0) { - if (profiler) { - buffer_ = profiler->GetProfileBuffer(); - event_handle_ = buffer_->BeginEvent( - tag, ProfileEvent::EventType::OPERATOR_INVOKE_EVENT, node_index); - } - } - - ~ScopedOperatorProfile() { - if (buffer_) { - buffer_->EndEvent(event_handle_); - } - } - - private: - ProfileBuffer* buffer_; - int32_t event_handle_; -}; - -} // namespace profiling -} // namespace tflite - -#define VARNAME_UNIQ(name, ctr) name##ctr - -#define SCOPED_OPERATOR_PROFILE(profiler, node_index) \ - tflite::profiling::ScopedOperatorProfile VARNAME_UNIQ( \ - _profile_, __COUNTER__)((profiler), "OpInvoke", (node_index)) -#else - -namespace tflite { -namespace profiling { -// A noop version of profiler when profiling is disabled. -class Profiler { - public: - Profiler() {} - void StartProfiling() {} - void StopProfiling() {} - void Reset() {} - std::vector<const ProfileEvent*> GetProfileEvents() { return {}; } -}; -} // namespace profiling -} // namespace tflite - -#define SCOPED_OPERATOR_PROFILE(profiler, node_index) - -#endif // TFLITE_PROFILING_ENABLED - -#endif // TENSORFLOW_CONTRIB_LITE_PROFILING_PROFILER_H_ - -// clang-format on diff --git a/libs/profiling/include/profiling/profiling.h b/libs/profiling/include/profiling/profiling.h deleted file mode 100644 index ee0df1338..000000000 --- a/libs/profiling/include/profiling/profiling.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __NNFW_MISC_PROFILING_H__ -#define __NNFW_MISC_PROFILING_H__ - -#include <iostream> - -namespace tflite -{ -namespace profiling -{ -class Profiler; // forward declaration -} -} - -namespace profiling -{ - -class Context -{ -public: - Context() : _sync(false), _profiler(nullptr) {} - -public: - const bool &sync(void) const { return _sync; } - tflite::profiling::Profiler *getProfiler() { return _profiler; } - void setProfiler(tflite::profiling::Profiler *p) { _profiler = p; } - void setSync(void) { _sync = true; } - -private: - bool _sync; - tflite::profiling::Profiler *_profiler; - -public: - static Context &get(void) - { - static Context ctx{}; - return ctx; - } -}; - -} // namespace profiling -#endif // __NNFW_MISC_PROFILING_H__ diff --git a/libs/profiling/include/profiling/time.h b/libs/profiling/include/profiling/time.h deleted file mode 100644 index 4b194944d..000000000 --- a/libs/profiling/include/profiling/time.h +++ /dev/null @@ -1,35 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -// NOTE To minimize diff with upstream tensorflow, disable clang-format -// clang-format off - -// NOTE This header is derived from the following file (in TensorFlow v1.12) -// 'externals/tensorflow/tensorflow/contrib/lite/profiling/time.h -#ifndef TENSORFLOW_CONTRIB_LITE_PROFILING_TIME_H_ -#define TENSORFLOW_CONTRIB_LITE_PROFILING_TIME_H_ - -#include <cstdint> - -namespace tflite { -namespace profiling { -namespace time { -uint64_t NowMicros(); -} // namespace time -} // namespace profiling -} // namespace tflite -#endif // TENSORFLOW_CONTRIB_LITE_PROFILING_TIME_H_ - -// clang-format on diff --git a/libs/profiling/src/profiling/time.cpp b/libs/profiling/src/profiling/time.cpp deleted file mode 100644 index 92d8595f8..000000000 --- a/libs/profiling/src/profiling/time.cpp +++ /dev/null @@ -1,55 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -// NOTE To minimize diff with upstream tensorflow, disable clang-format -// clang-format off - -// NOTE This header is derived from the following file (in TensorFlow v1.12) -// 'externals/tensorflow/tensorflow/contrib/lite/profiling/time.cpp -#include "profiling/time.h" - -#if defined(_MSC_VER) -#include <chrono> // NOLINT(build/c++11) -#else -#include <sys/time.h> -#endif - -namespace tflite { -namespace profiling { -namespace time { - -#if defined(_MSC_VER) - -uint64_t NowMicros() { - return std::chrono::duration_cast<std::chrono::microseconds>( - std::chrono::system_clock::now().time_since_epoch()) - .count(); -} - -#else - -uint64_t NowMicros() { - struct timeval tv; - gettimeofday(&tv, nullptr); - return static_cast<uint64_t>(tv.tv_sec) * 1000000 + tv.tv_usec; -} - -#endif // defined(_MSC_VER) - -} // namespace time -} // namespace profiling -} // namespace tflite - -// clang-format on diff --git a/libs/tflite/CMakeLists.txt b/libs/tflite/CMakeLists.txt deleted file mode 100644 index e844d1c68..000000000 --- a/libs/tflite/CMakeLists.txt +++ /dev/null @@ -1,12 +0,0 @@ -file(GLOB_RECURSE SOURCES "src/*.cpp") -file(GLOB_RECURSE TESTS "src/*.test.cpp") -list(REMOVE_ITEM SOURCES ${TESTS}) - -add_library(nnfw_lib_tflite STATIC ${SOURCES}) -set_target_properties(nnfw_lib_tflite PROPERTIES POSITION_INDEPENDENT_CODE ON) -target_include_directories(nnfw_lib_tflite PUBLIC ${NNFW_INCLUDE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/include) -target_link_libraries(nnfw_lib_tflite tensorflow-lite ${LIB_PTHREAD} dl) -target_link_libraries(nnfw_lib_tflite nnfw_lib_misc) - -add_executable(nnfw_lib_tflite_test_TensorView src/TensorView.test.cpp) -target_link_libraries(nnfw_lib_tflite_test_TensorView nnfw_lib_tflite) diff --git a/libs/tflite/include/tflite/Assert.h b/libs/tflite/include/tflite/Assert.h deleted file mode 100644 index 6d12d37f6..000000000 --- a/libs/tflite/include/tflite/Assert.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file Assert.h - * @brief This file contains helper function of assertion - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __NNFW_TFLITE_ASSERT_H__ -#define __NNFW_TFLITE_ASSERT_H__ - -#include "tensorflow/contrib/lite/context.h" - -#include <sstream> - -#define STR_DETAIL(value) #value -#define STR(value) STR_DETAIL(value) - -#define TFLITE_ENSURE(exp) \ - { \ - const TfLiteStatus status = (exp); \ - \ - if (status != kTfLiteOk) \ - { \ - std::ostringstream ss; \ - ss << #exp << " failed (" << __FILE__ << ":" << __LINE__ << ")"; \ - throw std::runtime_error{ss.str()}; \ - } \ - } - -#endif // __NNFW_TFLITE_ASSERT_H__ diff --git a/libs/tflite/include/tflite/Diff.h b/libs/tflite/include/tflite/Diff.h deleted file mode 100644 index 15c672831..000000000 --- a/libs/tflite/include/tflite/Diff.h +++ /dev/null @@ -1,199 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file Diff.h - * @brief This file contains classes for testing correctess of implementation - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __NNFW_TFLITE_DIFF_H__ -#define __NNFW_TFLITE_DIFF_H__ - -#include "tensorflow/contrib/lite/interpreter.h" - -#include "misc/tensor/Index.h" -#include "misc/tensor/Diff.h" -#include "misc/tensor/Shape.h" -#include "misc/tensor/Comparator.h" - -#include "tflite/TensorView.h" - -#include <functional> -#include <vector> - -/** - * @brief Class to define TfLite interpreter match application - */ -class TfLiteInterpMatchApp -{ -public: - /** - * @brief Construct a new TfLiteInterpMatchApp object with Comparator - * @param[in] comparator Comparator object for tensor comparation - */ - TfLiteInterpMatchApp(const nnfw::misc::tensor::Comparator &comparator) - : _verbose{false}, _comparator(comparator) - { - // DO NOTHING - } - -public: - /** - * @brief Get reference verbose for debugging information - * @return Reference of verbose value - */ - int &verbose(void) { return _verbose; } - -private: - int _verbose; - -public: - /** - * @brief Run two interpreter and return the output matching - * @param[in] pure Interpreter object of expected(with TfLite) - * @param[in] nnapi Interpreter object of obtained(through NNAPI) - * @return @c true if two Interpreter results are same, otherwise @c false - */ - bool run(::tflite::Interpreter &pure, ::tflite::Interpreter &nnapi) const; - /** - * @brief Compare two TensorView values and return the match result - * @param[in] expected TensorView object to read expected values - * @param[in] obtained TensorView object to read obtained values - * @param[in] id Tensor ID value used for debug message - * @return @c true if two TensorView values are same, otherwise @c false - */ - template <typename T> - bool compareSingleTensorView(const nnfw::tflite::TensorView<T> &expected, - const nnfw::tflite::TensorView<T> &obtained, int id) const; - -private: - const nnfw::misc::tensor::Comparator &_comparator; -}; - -#include "tflite/interp/Builder.h" -#include "tflite/Quantization.h" - -#include <random> - -/** - * @brief Class to generate random values - */ -class RandomGenerator -{ -public: - /** - * @brief Construct a new RandomGenerator object - * @param[in] seed Random seed value - * @param[in] mean Mean value of normal random number generation - * @param[in] stddev Standard deviation of random number generation - * @param[in] quantization TfLiteQuantizationParams type to represent quantization value - * (not used yet) - */ - RandomGenerator(int seed, float mean, float stddev, - const TfLiteQuantizationParams quantization = make_default_quantization()) - : _rand{seed}, _dist{mean, stddev}, _quantization{quantization} - { - // DO NOTHING - } - -public: - /** - * @brief Generate random numbers for type T - * @param[in] s Shape value - * @param[in] i Index value - * @return Random generated value - * @note This is same as T generate(void) as two input parameters are not used - */ - template <typename T> - T generate(const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &) - { - return generate<T>(); - } - - /** - * @brief Generate random numbers for type T - * @return Random generated value - */ - template <typename T> T generate(void) { return _dist(_rand); } - -private: - std::minstd_rand _rand; - std::normal_distribution<float> _dist; - const TfLiteQuantizationParams _quantization; -}; - -template <> uint8_t RandomGenerator::generate<uint8_t>(void); - -/** - * @brief Structure for NNAPI correctness test - */ -struct RandomTestParam -{ - int verbose; //!< Verbosity of debug information - int tolerance; //!< Torlerance of value difference - int tensor_logging = 0; //!< Save logging to a file if not 0 - std::string log_path = ""; //!< Path of log file, meaningful only when tensor_logging is 1 -}; - -/** - * @brief Class to define Random test runner - */ -class RandomTestRunner -{ -public: - /** - * @brief Construct a new RandomTestRunner object - * @param[in] seed Random seed value - * @param[in] param RandomTestParam object for test runner - * @param[in] quantization TfLiteQuantizationParams type to represent quantization value - */ - RandomTestRunner(int seed, const RandomTestParam ¶m, - const TfLiteQuantizationParams quantization = make_default_quantization()) - : _randgen{seed, 0.0f, 2.0f, quantization}, _param{param} - { - // DO NOTHING - } - -public: - /** - * @brief Run the random test runner - * @param[in] builder Interpreter Builder used to run - * @return 0 if test succeeds, otherwise failure - */ - int run(const nnfw::tflite::Builder &builder); - -public: - /** - * @brief Get RandomGenerator reference - * @return RandomGenerator reference - */ - RandomGenerator &generator() { return _randgen; }; - -private: - RandomGenerator _randgen; - const RandomTestParam _param; - -public: - /** - * @brief Create a RandomTestRunner object - * @param[in] seed Random seed value - * @return RandomGenerator object - */ - static RandomTestRunner make(int seed); -}; - -#endif // __NNFW_TFLITE_DIFF_H__ diff --git a/libs/tflite/include/tflite/FeatureView.h b/libs/tflite/include/tflite/FeatureView.h deleted file mode 100644 index 06cbf4b14..000000000 --- a/libs/tflite/include/tflite/FeatureView.h +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file FeatureView.h - * @brief This file contains FeatureView class - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __NNFW_TFLITE_FEATURE_VIEW_H__ -#define __NNFW_TFLITE_FEATURE_VIEW_H__ - -#include "tensorflow/contrib/lite/interpreter.h" - -#include "tflite/InputIndex.h" -#include "tflite/OutputIndex.h" - -#include "misc/feature/Shape.h" -#include "misc/feature/Reader.h" - -namespace nnfw -{ -namespace tflite -{ - -template <typename T> class FeatureView; - -/** - * @brief Class to support reading element of float type feature - */ -template <> class FeatureView<float> : public nnfw::misc::feature::Reader<float> -{ -public: - /** - * @brief Construct a new FeatureView object - * @param[in] interp Interpreter to read from - * @param[in] index InputIndex index of input - */ - FeatureView(::tflite::Interpreter &interp, const InputIndex &index); - /** - * @brief Construct a new FeatureView object - * @param[in] interp Interpreter to read from - * @param[in] index OutputIndex index of output - */ - FeatureView(::tflite::Interpreter &interp, const OutputIndex &index); - -public: - /** - * @brief Get value of element using channel, row and column index - * @param[in] ch Channel index - * @param[in] row Row index - * @param[in] col Column index - * @return Value of element - */ - float at(uint32_t ch, uint32_t row, uint32_t col) const; - /** - * @brief Get reference of element using channel, row and column index - * @param[in] ch Channel index - * @param[in] row Row index - * @param[in] col Column index - * @return Reference of element - */ - float &at(uint32_t ch, uint32_t row, uint32_t col); - -private: - /** - * @brief Get offset of element from channel, row and column index - * @param[in] ch Channel index - * @param[in] row Row index - * @param[in] col Column index - * @return Offset of element - */ - uint32_t getElementOffset(uint32_t ch, uint32_t row, uint32_t col) const - { - uint32_t res = 0; - - // TensorFlow Lite assumes that NHWC ordering for tessor - res += row * _shape.W * _shape.C; - res += col * _shape.C; - res += ch; - - return res; - } - -private: - nnfw::misc::feature::Shape _shape; - float *_base; -}; - -} // namespace tflite -} // namespace nnfw - -#endif // __NNFW_TFLITE_FEATURE_VIEW_H__ diff --git a/libs/tflite/include/tflite/InputIndex.h b/libs/tflite/include/tflite/InputIndex.h deleted file mode 100644 index f535b2626..000000000 --- a/libs/tflite/include/tflite/InputIndex.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file InputIndex.h - * @brief This file contains InputIndex class - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __NNFW_TFLITE_INPUT_INDEX_H__ -#define __NNFW_TFLITE_INPUT_INDEX_H__ - -namespace nnfw -{ -namespace tflite -{ - -/** - * @brief Class to express index of input - */ -class InputIndex -{ -public: - /** - * @brief Construct a new InputIndex object with index value - * @param [in] index The value of index - */ - InputIndex(int index) : _index(index) - { - // DO NOTHING - } - -public: - /** - * @brief Get index value as int - * @return Index value as int - */ - int asInt(void) const { return _index; } - -private: - int _index; -}; - -} // namespace tflite -} // namespace nnfw - -#endif // __NNFW_TFLITE_INPUT_INDEX_H__ diff --git a/libs/tflite/include/tflite/InterpreterSession.h b/libs/tflite/include/tflite/InterpreterSession.h deleted file mode 100644 index deaf05a7f..000000000 --- a/libs/tflite/include/tflite/InterpreterSession.h +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file InterpreterSession.h - * @brief This file contains InterpreterSession class - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __NNFW_TFLITE_INTERPRETER_SESSION_H__ -#define __NNFW_TFLITE_INTERPRETER_SESSION_H__ - -#include "Session.h" - -namespace nnfw -{ -namespace tflite -{ - -/** - * @brief Class to define TfLite interpreter session which is inherited from Session class - */ -class InterpreterSession final : public Session -{ -public: - /** - * @brief Construct a InterpreterSession object with interpreter of TfLite - * @param[in] interp The TfLite interpreter pointer - */ - InterpreterSession(::tflite::Interpreter *interp) : _interp{interp} - { - // DO NOTHING - } - -public: - /** - * @brief Get TfLite interpreter pointer - * @return The TfLite interpreter - */ - ::tflite::Interpreter *interp(void) override { return _interp; } - -public: - /** - * @brief Prepare the TfLite interpreter session - * @return @c true if tensor preparation is successful, otherwise @c false - */ - bool prepare(void) override - { - _interp->UseNNAPI(false); - - if (kTfLiteOk != _interp->AllocateTensors()) - { - return false; - } - - return true; - } - - /** - * @brief Run the Invoke function of TfLite interpreter - * @return @c true if Invoke() is successful, otherwise @c false - */ - bool run(void) override - { - // Return true if Invoke returns kTfLiteOk - return kTfLiteOk == _interp->Invoke(); - } - - /** - * @brief Tear down TfLite interpreter session - * @return @c true always - */ - bool teardown(void) override - { - // Do NOTHING currently - return true; - } - -private: - ::tflite::Interpreter *const _interp; -}; - -} // namespace tflite -} // namespace nnfw - -#endif // __NNFW_TFLITE_INTERPRETER_SESSION_H__ diff --git a/libs/tflite/include/tflite/NNAPISession.h b/libs/tflite/include/tflite/NNAPISession.h deleted file mode 100644 index b2a999d10..000000000 --- a/libs/tflite/include/tflite/NNAPISession.h +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file NNAPISession.h - * @brief This file contains NNAPISession class - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __NNFW_TFLITE_NNAPI_SESSION_H__ -#define __NNFW_TFLITE_NNAPI_SESSION_H__ - -#include "Session.h" -#include "tflite/ext/nnapi_delegate.h" - -namespace nnfw -{ -namespace tflite -{ - -/** - * @brief Class to define NNAPI interpreter session which is inherited from Session class - */ -class NNAPISession final : public Session -{ -public: - /** - * @brief Construct a NNAPISession object with interpreter of TfLite - * @param[in] interp The TfLite interpreter pointer - * @note Invoke BuildGraph() of NNAPI delegate from Interpreter - */ - NNAPISession(::tflite::Interpreter *interp) : _interp{interp} - { - // Construct Graph from Interpreter - _delegate.BuildGraph(_interp); - } - -public: - /** - * @brief Get TfLite interpreter pointer - * @return The TfLite interpreter - */ - ::tflite::Interpreter *interp(void) override { return _interp; } - -public: - /** - * @brief Prepare the TfLite interpreter session - * @return @c true if tensor preparation is successful, otherwise @c false - */ - bool prepare(void) override - { - // Explicitly turn off T/F lite internal NNAPI delegation in order to use locally defined - // NNAPI delegation. - _interp->UseNNAPI(false); - - if (kTfLiteOk != _interp->AllocateTensors()) - { - return false; - } - - return true; - } - - /** - * @brief Run the Invoke function of NNAPI delegate - * @return @c true if Invoke() is successful, otherwise @c false - */ - bool run(void) override { return kTfLiteOk == _delegate.Invoke(_interp); } - - /** - * @brief Tear down TfLite interpreter session - * @return @c true always - */ - bool teardown(void) override - { - // DO NOTHING - return true; - } - -private: - ::tflite::Interpreter *const _interp; - nnfw::tflite::NNAPIDelegate _delegate; -}; - -} // namespace tflite -} // namespace nnfw - -#endif // __NNFW_TFLITE_NNAPI_SESSION_H__ diff --git a/libs/tflite/include/tflite/OutputIndex.h b/libs/tflite/include/tflite/OutputIndex.h deleted file mode 100644 index dd1ca8d44..000000000 --- a/libs/tflite/include/tflite/OutputIndex.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file OutputIndex.h - * @brief This file contains OutputIndex class - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __NNFW_TFLITE_OUTPUT_INDEX_H__ -#define __NNFW_TFLITE_OUTPUT_INDEX_H__ - -namespace nnfw -{ -namespace tflite -{ - -/** - * @brief Class to define OutputIndex - */ -class OutputIndex -{ -public: - /** - * @brief Construct a OutputIndex object with index value - * @param[in] index The value of index - */ - OutputIndex(int index) : _index(index) - { - // DO NOTHING - } - -public: - /** - * @brief Get index value as int - * @return Index valuel as int - */ - int asInt(void) const { return _index; } - -private: - int _index; -}; - -} // namespace tflite -} // namespace nnfw - -#endif // __NNFW_TFLITE_OUTPUT_INDEX_H__ diff --git a/libs/tflite/include/tflite/Quantization.h b/libs/tflite/include/tflite/Quantization.h deleted file mode 100644 index 4a8a0f1ac..000000000 --- a/libs/tflite/include/tflite/Quantization.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file Quantization.h - * @brief This file contains BitwiseIntToFloat union and quantization related - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __NNFW_TFLITE_QUANTIZATION_H__ -#define __NNFW_TFLITE_QUANTIZATION_H__ - -/** - * @brief Union to provide bitwise conversion of integer and float - */ -union BitwiseIntToFloat { - int i; - float f; -}; - -static const float FLOAT_NEAREST_TO_1 = BitwiseIntToFloat{0x3f7fffff}.f; - -#include "tensorflow/contrib/lite/context.h" - -/** - * @brief Get TfLiteQuantizationParams object with default values - * @return TfLiteQuantizationParams object - */ -TfLiteQuantizationParams make_default_quantization(void); - -#endif // __NNFW_TFLITE_QUANTIZATION_H__ diff --git a/libs/tflite/include/tflite/Session.h b/libs/tflite/include/tflite/Session.h deleted file mode 100644 index 4f2e5c54d..000000000 --- a/libs/tflite/include/tflite/Session.h +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file Session.h - * @brief This file contains Session class - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __NNFW_TFLITE_SESSION_H__ -#define __NNFW_TFLITE_SESSION_H__ - -#include <tensorflow/contrib/lite/interpreter.h> - -namespace nnfw -{ -namespace tflite -{ - -/** - * @brief Structure to provide interface methods of interpreter session - */ -struct Session -{ - /** - * @brief Destruct Session object using default destructor - */ - virtual ~Session() = default; - - /** - * @brief Get the Interpreter object pointer - * @return The Interpreter object pointer - */ - virtual ::tflite::Interpreter *interp(void) = 0; - - /** - * @brief Prepare the session - * @return @c true if prepare method succeeded, otherwise @c false - */ - virtual bool prepare(void) = 0; - /** - * @brief Run the session - * @return @c true if run method succeeded, otherwise @c false - */ - virtual bool run(void) = 0; - /** - * @brief Teardown(release) the session - * @return @c true if teardown method succeeded, otherwise @c false - */ - virtual bool teardown(void) = 0; -}; - -} // namespace tflite -} // namespace nnfw - -#endif // __NNFW_TFLITE_INTERP_SESSION_H__ diff --git a/libs/tflite/include/tflite/TensorLogger.h b/libs/tflite/include/tflite/TensorLogger.h deleted file mode 100644 index e56a76b58..000000000 --- a/libs/tflite/include/tflite/TensorLogger.h +++ /dev/null @@ -1,168 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file TensorLogger.h - * @brief This file contains TensorLogger class - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __NNFW_TFLITE_TENSOR_LOGGER_H__ -#define __NNFW_TFLITE_TENSOR_LOGGER_H__ - -#include "misc/tensor/IndexIterator.h" -#include "tflite/TensorView.h" - -#include <tensorflow/contrib/lite/interpreter.h> -#include <tensorflow/contrib/lite/context.h> -#include <fstream> -#include <iomanip> - -namespace nnfw -{ -namespace tflite -{ - -/** - * @brief Class to write input and output value / shape into a file in python form - * @note This is a utility to write input and output value / shape into a file in python form.\n - * any python app can load this value by running the python code below:\n - * exec(open(filename).read())\n - * generated python code looks like the following: \n - * tensor_shape_gen = []\n - * tensor_value_gen = []\n\n - * tensor_shape_gen.append("{2, 1, 2}")\n - * tensor_value_gen.append([1, 2, 3, 4])\n\n - * tensor_shape_gen.append("{2}")\n - * tensor_value_gen.append([1, 2])\n\n - * tensor_shape_gen.append("{2, 1, 2}")\n - * tensor_value_gen.append([1, 4, 3, 8])\n - */ -class TensorLogger -{ -private: - std::ofstream _outfile; - -public: - /** - * @brief Get TensorLogger instance - * @return The TensorLogger instance - */ - static TensorLogger &instance() - { - static TensorLogger instance; - return instance; - } - - /** - * @brief Save the tensor details to file from interpreter - * @param[in] path The file path to save - * @param[in] interp The TfLite interpreter - */ - void save(const std::string &path, ::tflite::Interpreter &interp) - { - open(path); - - int log_index = 0; - for (const auto id : interp.inputs()) - { - _outfile << "# input tensors" << std::endl; - printTensor(interp, id, log_index++); - } - for (const auto id : interp.outputs()) - { - _outfile << "# output tensors" << std::endl; - printTensor(interp, id, log_index++); - } - close(); - } - -private: - void open(const std::string &path) - { - if (!_outfile.is_open()) - _outfile.open(path, std::ios_base::out); - - _outfile << "# ------ file: " << path << " ------" << std::endl - << "tensor_shape_gen = []" << std::endl - << "tensor_value_gen = []" << std::endl - << std::endl; - } - - void printTensor(::tflite::Interpreter &interp, const int id, const int log_index) - { - const TfLiteTensor *tensor = interp.tensor(id); - - _outfile << "# tensor name: " << tensor->name << std::endl; - _outfile << "# tflite::interpreter.tensor(" << id << ") -> " - "tensor_value_gen[" - << log_index << "]" << std::endl; - - if (tensor->type == kTfLiteInt32) - { - printTensorShape(tensor); - printTensorValue<int32_t>(tensor, tensor->data.i32); - } - else if (interp.tensor(id)->type == kTfLiteUInt8) - { - printTensorShape(tensor); - printTensorValue<uint8_t>(tensor, tensor->data.uint8); - } - else if (tensor->type == kTfLiteFloat32) - { - printTensorShape(tensor); - printTensorValue<float>(tensor, tensor->data.f); - } - } - - void printTensorShape(const TfLiteTensor *tensor) - { - _outfile << "tensor_shape_gen.append('{"; - - size_t r = 0; - for (; r < tensor->dims->size - 1; r++) - { - _outfile << tensor->dims->data[r] << ", "; - } - _outfile << tensor->dims->data[r]; - - _outfile << "}')" << std::endl; - } - - template <typename T> void printTensorValue(const TfLiteTensor *tensor, T *tensor_data_ptr) - { - _outfile << "tensor_value_gen.append(["; - - _outfile << std::fixed << std::setprecision(10); - - const T *end = reinterpret_cast<const T *>(tensor->data.raw_const + tensor->bytes); - for (T *ptr = tensor_data_ptr; ptr < end; ptr++) - _outfile << *ptr << ", "; - - _outfile << "])" << std::endl << std::endl; - } - - void close() - { - _outfile << "# --------- tensor shape and value defined above ---------" << std::endl; - _outfile.close(); - } -}; - -} // namespace tflite -} // namespace nnfw - -#endif // __NNFW_TFLITE_TENSOR_LOGGER_H__ diff --git a/libs/tflite/include/tflite/TensorShapeUtils.h b/libs/tflite/include/tflite/TensorShapeUtils.h deleted file mode 100644 index ba8687413..000000000 --- a/libs/tflite/include/tflite/TensorShapeUtils.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file TensorShapeUtils.h - * @brief This file contains utilities function of tensor shape - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __NNFW_TFLITE_TENSOR_SHAPE_UTILS_H__ -#define __NNFW_TFLITE_TENSOR_SHAPE_UTILS_H__ - -#include "misc/tensor/Shape.h" - -#include <vector> - -namespace nnfw -{ -namespace tflite -{ - -/** - * @brief Converts tensor::Shape into a vector - * @param[in] shape The tensor shape to be converted - * @return vector value of given shape object - */ -static inline std::vector<int32_t> as_dims(const nnfw::misc::tensor::Shape &shape) -{ - std::vector<int32_t> dims; - - for (uint32_t axis = 0; axis < shape.rank(); ++axis) - { - dims.emplace_back(shape.dim(axis)); - } - - return dims; -} - -/** - * @brief Broadcasts between two given shapes - * @param[in] lhs_shape The left hand side shape - * @param[in] rhs_shape The right hand side shape - * @return The broadcasted shape - */ -nnfw::misc::tensor::Shape broadcast(const nnfw::misc::tensor::Shape &lhs_shape, - const nnfw::misc::tensor::Shape &rhs_shape); - -} // namespace tflite -} // namespace nnfw - -#endif // __NNFW_TFLITE_TENSOR_SHAPE_UTILS_H__ diff --git a/libs/tflite/include/tflite/TensorUtils.h b/libs/tflite/include/tflite/TensorUtils.h deleted file mode 100644 index 6266c5dff..000000000 --- a/libs/tflite/include/tflite/TensorUtils.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file TensorUtils.h - * @brief This file contains utilities function - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __NNFW_TFLITE_TENSOR_UTILS_H__ -#define __NNFW_TFLITE_TENSOR_UTILS_H__ - -#include <tensorflow/contrib/lite/context.h> - -namespace nnfw -{ -namespace tflite -{ - -/** - * @brief Get @c true if tensor type is kTfLiteFloat32, otherwise @c false - * @param[in] tensor The tensor object to be compared - * @return @c true if tensor type is kTfLiteFloat32, otherwise @c false - */ -inline bool isFloatTensor(const TfLiteTensor *tensor) { return tensor->type == kTfLiteFloat32; } - -/** - * @brief Get @c true if tensor is 4-D tensor and the first dimension length is 1, - * otherwise @c false - * @param[in] tensor The tensor object to be compared - * @return @c true if tensor is 4-D tensor and the first dimension length is 1, otherwise @c false - */ -inline bool isFeatureTensor(const TfLiteTensor *tensor) -{ - return (tensor->dims->size == 4) && (tensor->dims->data[0] == 1); -} - -} // namespace tflite -} // namespace nnfw - -#endif // __NNFW_TFLITE_TENSOR_UTILS_H__ diff --git a/libs/tflite/include/tflite/TensorView.h b/libs/tflite/include/tflite/TensorView.h deleted file mode 100644 index 79c754c78..000000000 --- a/libs/tflite/include/tflite/TensorView.h +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file TensorView.h - * @brief This file contains TensorView class - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __NNFW_TFLITE_TENSOR_VIEW_H__ -#define __NNFW_TFLITE_TENSOR_VIEW_H__ - -#include "tensorflow/contrib/lite/interpreter.h" - -#include "misc/tensor/Shape.h" -#include "misc/tensor/Index.h" -#include "misc/tensor/Reader.h" -#include "misc/tensor/NonIncreasingStride.h" - -namespace nnfw -{ -namespace tflite -{ - -/** - * @brief Class to define TensorView which is inherited from nnfw::misc::tensor::Reader<T> class - */ -template <typename T> class TensorView final : public nnfw::misc::tensor::Reader<T> -{ -public: - /** - * @brief Construct a TensorView object with base and shape informations - * @param[in] shape The shape of a tensor - * @param[in] base The base address of a tensor - */ - TensorView(const nnfw::misc::tensor::Shape &shape, T *base) : _shape{shape}, _base{base} - { - // Set 'stride' - _stride.init(_shape); - } - -public: - /** - * @brief Get shape of tensor - * @return Reference of shape - */ - const nnfw::misc::tensor::Shape &shape(void) const { return _shape; } - -public: - /** - * @brief Get value of tensor index - * @param[in] index The tensor index - * @return The value at the index - */ - T at(const nnfw::misc::tensor::Index &index) const override - { - const auto offset = _stride.offset(index); - return *(_base + offset); - } - -public: - /** - * @brief Get reference value of tensor index - * @param[in] index The tensor index - * @return The reference value at the index - */ - T &at(const nnfw::misc::tensor::Index &index) - { - const auto offset = _stride.offset(index); - return *(_base + offset); - } - -private: - nnfw::misc::tensor::Shape _shape; /**< The tensor shape */ - -public: - T *_base; /**< The base address of tensor */ - nnfw::misc::tensor::NonIncreasingStride _stride; /**< The NonIncreasingStride object */ - -public: - // TODO Introduce Operand ID class - /** - * @brief Create TensorView object using given parameters - * @param[in] interp The TfLite interpreter - * @param[in] tensor_index The tensor index - * @return The new TensorView<T> object - */ - static TensorView<T> make(::tflite::Interpreter &interp, int tensor_index) - { - auto tensor_ptr = interp.tensor(tensor_index); - - // Set 'shape' - nnfw::misc::tensor::Shape shape(tensor_ptr->dims->size); - - for (uint32_t axis = 0; axis < shape.rank(); ++axis) - { - shape.dim(axis) = tensor_ptr->dims->data[axis]; - } - - return TensorView<T>(shape, interp.typed_tensor<T>(tensor_index)); - } -}; - -} // namespace tflite -} // namespace nnfw - -#endif // __NNFW_TFLITE_TENSOR_VIEW_H__ diff --git a/libs/tflite/include/tflite/ext/kernels/Abs.h b/libs/tflite/include/tflite/ext/kernels/Abs.h deleted file mode 100644 index 74e4aa658..000000000 --- a/libs/tflite/include/tflite/ext/kernels/Abs.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __NNFW_TFLITE_EXT_KERNELS_ABS_H__ -#define __NNFW_TFLITE_EXT_KERNELS_ABS_H__ - -#include "tensorflow/contrib/lite/context.h" - -namespace nnfw -{ -namespace tflite -{ -namespace custom -{ -namespace Abs -{ - -void *InitAbs(TfLiteContext *context, const char *buffer, size_t length); -void FreeAbs(TfLiteContext *context, void *buffer); -TfLiteStatus PrepareAbs(TfLiteContext *context, TfLiteNode *node); -TfLiteStatus EvalAbs(TfLiteContext *context, TfLiteNode *node); - -} // namespace Abs -} // namespace custom -} // namespace tflite -} // namespace nnfw - -#endif // __NNFW_TFLITE_EXT_KERNELS_ABS_H__ diff --git a/libs/tflite/include/tflite/ext/kernels/CustomOps.h b/libs/tflite/include/tflite/ext/kernels/CustomOps.h deleted file mode 100644 index 3f9459bb2..000000000 --- a/libs/tflite/include/tflite/ext/kernels/CustomOps.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file CustomOps.h - * @brief This file contains registration of custom operands - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __NNFW_TFLITE_EXT_KERNELS_CUSTOM_OP_H__ -#define __NNFW_TFLITE_EXT_KERNELS_CUSTOM_OP_H__ - -#include "tensorflow/contrib/lite/context.h" -#include "tflite/ext/kernels/TensorFlowMax.h" -#include "tflite/ext/kernels/SquaredDifference.h" -#include "tflite/ext/kernels/TensorFlowSum.h" -#include "tflite/ext/kernels/Abs.h" - -namespace nnfw -{ -namespace tflite -{ -namespace custom -{ - -#define REGISTER_FUNCTION(Name) \ - TfLiteRegistration *Register_##Name(void) \ - { \ - static TfLiteRegistration r = { \ - Name::Init##Name, Name::Free##Name, Name::Prepare##Name, Name::Eval##Name, \ - }; \ - r.custom_name = #Name; \ - return &r; \ - } - -REGISTER_FUNCTION(TensorFlowMax) -REGISTER_FUNCTION(SquaredDifference) -REGISTER_FUNCTION(TensorFlowSum) -REGISTER_FUNCTION(Abs) - -#undef REGISTER_FUNCTION - -} // namespace custom -} // namespace tflite -} // namespace nnfw - -#endif // __NNFW_TFLITE_EXT_KERNELS_CUSTOM_OP_H__ diff --git a/libs/tflite/include/tflite/ext/kernels/SquaredDifference.h b/libs/tflite/include/tflite/ext/kernels/SquaredDifference.h deleted file mode 100644 index 492523c02..000000000 --- a/libs/tflite/include/tflite/ext/kernels/SquaredDifference.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file SquaredDifference.h - * @brief This file contains SquaredDifference namespace and SquaredDifference function - * definitions - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __NNFW_TFLITE_EXT_KERNELS_SQUARED_DIFFERENCE_H__ -#define __NNFW_TFLITE_EXT_KERNELS_SQUARED_DIFFERENCE_H__ - -#include "tensorflow/contrib/lite/context.h" - -namespace nnfw -{ -namespace tflite -{ -namespace custom -{ -namespace SquaredDifference -{ - -/** - * @brief Initialize SquaredDifference operand using the contents of buffer - * @param[in] context The TfLite context - * @param[in] buffer The buffer with contents - * @param[in] length The buffer length - * @return The void pointer for user data - */ -void *InitSquaredDifference(TfLiteContext *context, const char *buffer, size_t length); - -/** - * @brief Release any memory it might have allocated via 'InitSquaredDifference' - * @param[in] context The TfLite context - * @param[in] buffer The buffer with contents - * @return N/A - */ -void FreeSquaredDifference(TfLiteContext *context, void *buffer); - -/** - * @brief Prepare the SquaredDifference operand for execution - * @param[in] context The TfLite context - * @param[in] node The operand node - * @return The TfLite status - */ -TfLiteStatus PrepareSquaredDifference(TfLiteContext *context, TfLiteNode *node); - -/** - * @brief Evaluation the SquaredDifference operand for execution - * @param[in] context The TfLite context - * @param[in] node The operand node - * @return The TfLite status - */ -TfLiteStatus EvalSquaredDifference(TfLiteContext *context, TfLiteNode *node); - -} // namespace SquaredDifference -} // namespace custom -} // namespace tflite -} // namespace nnfw - -#endif // __NNFW_TFLITE_EXT_KERNELS_SQUARED_DIFFERENCE_H__ diff --git a/libs/tflite/include/tflite/ext/kernels/TensorFlowMax.h b/libs/tflite/include/tflite/ext/kernels/TensorFlowMax.h deleted file mode 100644 index d31d76483..000000000 --- a/libs/tflite/include/tflite/ext/kernels/TensorFlowMax.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file TensorFlowMax.h - * @brief This file contains TensorFlowMax namespace and TensorFlowMax function definitions - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __NNFW_TFLITE_EXT_KERNELS_TENSORFLOW_MAX_H__ -#define __NNFW_TFLITE_EXT_KERNELS_TENSORFLOW_MAX_H__ - -#include "tensorflow/contrib/lite/context.h" - -namespace nnfw -{ -namespace tflite -{ -namespace custom -{ -namespace TensorFlowMax -{ - -/** - * @brief Initialize TensorFlowMax operand using the contents of buffer - * @param[in] context The TfLite context - * @param[in] buffer The buffer with contents - * @param[in] length The buffer length - * @return The void pointer for user data - */ -void *InitTensorFlowMax(TfLiteContext *context, const char *buffer, size_t length); - -/** - * @brief Release any memory it might have allocated via 'InitTensorFlowMax' - * @param[in] context The TfLite context - * @param[in] buffer The buffer with contents - * @return N/A - */ -void FreeTensorFlowMax(TfLiteContext *context, void *buffer); - -/** - * @brief Prepare the TensorFlowMax operand for execution - * @param[in] context The TfLite context - * @param[in] node The operand node - * @return The TfLite status - */ -TfLiteStatus PrepareTensorFlowMax(TfLiteContext *context, TfLiteNode *node); - -/** - * @brief Evaluation the TensorFlowMax operand for execution - * @param[in] context The TfLite context - * @param[in] node The operand node - * @return The TfLite status - */ -TfLiteStatus EvalTensorFlowMax(TfLiteContext *context, TfLiteNode *node); - -} // namespace TensorFlowMax -} // namespace custom -} // namespace tflite -} // namespace nnfw - -#endif // __NNFW_TFLITE_EXT_KERNELS_TENSORFLOW_MAX_H__ diff --git a/libs/tflite/include/tflite/ext/kernels/TensorFlowSum.h b/libs/tflite/include/tflite/ext/kernels/TensorFlowSum.h deleted file mode 100644 index 66783cf41..000000000 --- a/libs/tflite/include/tflite/ext/kernels/TensorFlowSum.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __NNFW_TFLITE_EXT_KERNELS_TENSORFLOW_SUM_H__ -#define __NNFW_TFLITE_EXT_KERNELS_TENSORFLOW_SUM_H__ - -#include "tensorflow/contrib/lite/context.h" - -namespace nnfw -{ -namespace tflite -{ -namespace custom -{ -namespace TensorFlowSum -{ - -void *InitTensorFlowSum(TfLiteContext *context, const char *buffer, size_t length); -void FreeTensorFlowSum(TfLiteContext *context, void *buffer); -TfLiteStatus PrepareTensorFlowSum(TfLiteContext *context, TfLiteNode *node); -TfLiteStatus EvalTensorFlowSum(TfLiteContext *context, TfLiteNode *node); - -} // namespace TensorFlowSum -} // namespace custom -} // namespace tflite -} // namespace nnfw - -#endif // __NNFW_TFLITE_EXT_KERNELS_TENSORFLOW_SUM_H__ diff --git a/libs/tflite/include/tflite/ext/kernels/register.h b/libs/tflite/include/tflite/ext/kernels/register.h deleted file mode 100644 index 124af7abc..000000000 --- a/libs/tflite/include/tflite/ext/kernels/register.h +++ /dev/null @@ -1,42 +0,0 @@ -/* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -// NOTE To minimize diff with upstream tensorflow, disable clang-format -// clang-format off - -// NOTE This header is derived from the following file (in TensorFlow) -// 'externals/tensorflow/tensorflow/contrib/lite/kernels/register.h' -#ifndef __NNFW_TFLITE_EXT_KERNELS_REGISTER_H__ -#define __NNFW_TFLITE_EXT_KERNELS_REGISTER_H__ - -#include <unordered_map> -#include "tensorflow/contrib/lite/context.h" -#include "tensorflow/contrib/lite/model.h" - -namespace nnfw { -namespace tflite { - -class BuiltinOpResolver : public ::tflite::MutableOpResolver { - public: - BuiltinOpResolver(); -}; - -} // namespace tflite -} // namespace nnfw - -#endif // __NNFW_TFLITE_EXT_KERNELS_REGISTER_H__ - -// clang-format on diff --git a/libs/tflite/include/tflite/ext/nnapi_delegate.h b/libs/tflite/include/tflite/ext/nnapi_delegate.h deleted file mode 100644 index 3aac01af7..000000000 --- a/libs/tflite/include/tflite/ext/nnapi_delegate.h +++ /dev/null @@ -1,97 +0,0 @@ -/* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -// NOTE To minimize diff with upstream tensorflow, disable clang-format -// clang-format off - -// NOTE This header is derived from the following file (in TensorFlow v1.12) -// 'externals/tensorflow/tensorflow/contrib/lite/nnapi_delegate.h' -#ifndef __NNFW_TFLITE_EXT_NNAPI_DELEGATE_H__ -#define __NNFW_TFLITE_EXT_NNAPI_DELEGATE_H__ - -#include "tensorflow/contrib/lite/allocation.h" -#ifdef OBS_BUILD -#include "tensorflow/contrib/lite/context.h" -#include "tensorflow/contrib/lite/error_reporter.h" -#else -#include "tensorflow/contrib/lite/c/c_api_internal.h" -#include "tensorflow/contrib/lite/core/api/error_reporter.h" -#endif -#include "tensorflow/contrib/lite/interpreter.h" -#include "NeuralNetworksShim.h" - -class ANeuralNetworksModel; -class ANeuralNetworksMemory; -class ANeuralNetworksCompilation; - -namespace nnfw { -namespace tflite { - -class NNAPIAllocation : public ::tflite::MMAPAllocation { - public: - NNAPIAllocation(const char* filename, ::tflite::ErrorReporter* error_reporter); - ~NNAPIAllocation(); - - size_t offset(const void* ptr) const { - auto signed_offset = reinterpret_cast<const uint8_t*>(ptr) - - reinterpret_cast<const uint8_t*>(mmapped_buffer_); - - return static_cast<size_t>(signed_offset); - } - - ANeuralNetworksMemory* memory() const { return handle_; } - bool valid() const override { return handle_ != nullptr; } - - private: - mutable ANeuralNetworksMemory* handle_ = nullptr; -}; - -class NNAPIDelegate { - public: - ~NNAPIDelegate(); - - // Convert a tflite graph to NNAPI - TfLiteStatus BuildGraph(::tflite::Interpreter* interpreter); - - // Run - TfLiteStatus Invoke(::tflite::Interpreter* interpreter); - - // Whether the current platform supports NNAPI delegation. - static bool IsSupported(); - - private: - // The NN API model handle - ANeuralNetworksModel* nn_model_ = nullptr; - // The NN API compilation handle - ANeuralNetworksCompilation* nn_compiled_model_ = nullptr; - // Model status - TfLiteStatus model_status_ = kTfLiteOk; - - // List of state tensors for LSTM, RNN, SVDF. - // NN API does not allow ops to maintain states across multiple - // invocations. We need to manually create state input tensors from - // corresponding state output tensors of TFLite operations, and map them - // correctly. - std::vector<int> model_states_inputs_; // holds NNAPI operand ids - std::vector<int> model_states_outputs_; // holds TFLite tensor ids -}; - -} // namespace tflite -} // namespace nnfw - -#endif // __NNFW_TFLITE_EXT_NNAPI_DELEGATE_H__ - -// clang-format on diff --git a/libs/tflite/include/tflite/interp/Builder.h b/libs/tflite/include/tflite/interp/Builder.h deleted file mode 100644 index b4d082419..000000000 --- a/libs/tflite/include/tflite/interp/Builder.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file Builder.h - * @brief This file contains Builder structure - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __NNFW_TFLITE_INTERP_BUILDER_H__ -#define __NNFW_TFLITE_INTERP_BUILDER_H__ - -#include <tensorflow/contrib/lite/interpreter.h> - -namespace nnfw -{ -namespace tflite -{ - -/** - * @brief Structure to Builder - */ -struct Builder -{ - /** - * @brief Destroy the Builder object - */ - virtual ~Builder() = default; - - /** - * @brief Build a FlatBuffer model - * @return The TfLite interpreter object - */ - virtual std::unique_ptr<::tflite::Interpreter> build(void) const = 0; -}; - -} // namespace tflite -} // namespace nnfw - -#endif // __NNFW_TFLITE_INTERP_BUILDER_H__ diff --git a/libs/tflite/include/tflite/interp/FlatBufferBuilder.h b/libs/tflite/include/tflite/interp/FlatBufferBuilder.h deleted file mode 100644 index 13470b8c5..000000000 --- a/libs/tflite/include/tflite/interp/FlatBufferBuilder.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file FlatBufferBuilder.h - * @brief This file contains FlatBufferBuilder class - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __NNFW_TFLITE_INTERP_FLAT_BUFFER_BUILDER_H__ -#define __NNFW_TFLITE_INTERP_FLAT_BUFFER_BUILDER_H__ - -#include <tensorflow/contrib/lite/model.h> - -#include "tflite/interp/Builder.h" - -namespace nnfw -{ -namespace tflite -{ - -/** - * @brief Class to define FlatBufferBuilder which is inherited from Builder - */ -class FlatBufferBuilder final : public Builder -{ -public: - /** - * @brief Construct a FlatBufferBuilder object with FlatBufferModel of TfLite - * @param[in] model The TfLite Flatbuffer model - */ - FlatBufferBuilder(const ::tflite::FlatBufferModel &model) : _model{model} - { - // DO NOTHING - } - -public: - /** - * @brief Build a FlatBuffer model - * @return The TfLite interpreter pointer address - */ - std::unique_ptr<::tflite::Interpreter> build(void) const override; - -private: - const ::tflite::FlatBufferModel &_model; -}; - -} // namespace tflite -} // namespace nnfw - -#endif // __NNFW_TFLITE_INTERP_FLAT_BUFFER_BUILDER_H__ diff --git a/libs/tflite/include/tflite/interp/FunctionBuilder.h b/libs/tflite/include/tflite/interp/FunctionBuilder.h deleted file mode 100644 index 064375939..000000000 --- a/libs/tflite/include/tflite/interp/FunctionBuilder.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file FunctionBuilder.h - * @brief This file contains FunctionBuilder class - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __NNFW_TFLITE_INTERP_FUNCTION_BUILDER_H__ -#define __NNFW_TFLITE_INTERP_FUNCTION_BUILDER_H__ - -#include <tensorflow/contrib/lite/model.h> - -#include "tflite/interp/Builder.h" - -namespace nnfw -{ -namespace tflite -{ - -/** - * @brief Class to define FunctionBuilder which is inherited from Builder - */ -class FunctionBuilder final : public Builder -{ -public: - using SetupFunc = std::function<void(::tflite::Interpreter &)>; - -public: - /** - * @brief Construct a FunctionBuilder object with SetupFunction - * @param[in] fn The SetupFunc object - */ - FunctionBuilder(const SetupFunc &fn) : _fn{fn} - { - // DO NOTHING - } - -public: - /** - * @brief Build a SetupFunc - * @return The TfLite interpreter pointer address - */ - std::unique_ptr<::tflite::Interpreter> build(void) const override; - -private: - SetupFunc _fn; -}; - -} // namespace tflite -} // namespace nnfw - -#endif // __NNFW_TFLITE_INTERP_FUNCTION_BUILDER_H__ diff --git a/libs/tflite/src/Diff.cpp b/libs/tflite/src/Diff.cpp deleted file mode 100644 index 45ef06110..000000000 --- a/libs/tflite/src/Diff.cpp +++ /dev/null @@ -1,598 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "tflite/Diff.h" -#include "tflite/ext/nnapi_delegate.h" - -#include "misc/fp32.h" - -#include "misc/tensor/IndexIterator.h" -#include "misc/tensor/IndexFormatter.h" -#include "misc/tensor/Zipper.h" -#include "misc/tensor/Comparator.h" - -#include "misc/environment.h" - -#include <iostream> -#include <cassert> - -class DiffSummary : public nnfw::misc::tensor::Comparator::Observer -{ -public: - DiffSummary() - : max_abs_diff_index(0), max_abs_diff_expected{0.0f}, max_abs_diff_obtained{0.0f}, - max_abs_diff_value{0.0f}, max_rel_diff_index(0), max_rel_diff_expected{0.0f}, - max_rel_diff_obtained{0.0f}, max_rel_diff_value{0.0f} - { - // DO NOTHING - } - -public: - void notify(const nnfw::misc::tensor::Index &index, float expected, float obtained) override; - -public: - nnfw::misc::tensor::Index max_abs_diff_index; - float max_abs_diff_expected; - float max_abs_diff_obtained; - float max_abs_diff_value; - - nnfw::misc::tensor::Index max_rel_diff_index; - float max_rel_diff_expected; - float max_rel_diff_obtained; - float max_rel_diff_value; -}; - -void DiffSummary::notify(const nnfw::misc::tensor::Index &index, float expected, float obtained) -{ - const auto abs_diff_value = std::fabs(expected - obtained); - - if (max_abs_diff_value < abs_diff_value) - { - max_abs_diff_index = index; - max_abs_diff_value = abs_diff_value; - max_abs_diff_expected = expected; - max_abs_diff_obtained = obtained; - } - - const auto rel_diff_value = nnfw::misc::fp32::relative_diff(expected, obtained); - - if (max_rel_diff_value < rel_diff_value) - { - max_rel_diff_index = index; - max_rel_diff_value = rel_diff_value; - max_rel_diff_expected = expected; - max_rel_diff_obtained = obtained; - } -} - -template <typename T> -bool TfLiteInterpMatchApp::compareSingleTensorView(const nnfw::tflite::TensorView<T> &expected, - const nnfw::tflite::TensorView<T> &obtained, - int id) const -{ - std::vector<nnfw::misc::tensor::Diff<T>> diffs; - assert(expected.shape() == obtained.shape()); - - using nnfw::misc::tensor::zip; - using nnfw::misc::tensor::Index; - - zip(expected.shape(), expected, obtained) - << [&](const Index &index, T expected_value, T obtained_value) { - if (expected_value != obtained_value) - { - diffs.emplace_back(index, expected_value, obtained_value); - } - }; - - // TODO Unify summary generation code - if (diffs.size() == 0) - { - std::cout << " Tensor #" << id << ": MATCHED" << std::endl; - } - else - { - std::cout << " Tensor #" << id << ": UNMATCHED" << std::endl; - std::cout << " " << diffs.size() << " diffs are detected" << std::endl; - } - - if (diffs.size() > 0 && _verbose != 0) - { - std::cout << " ---- Details ---" << std::endl; - for (const auto &diff : diffs) - { - std::cout << " Diff at [" << nnfw::misc::tensor::IndexFormatter(diff.index) << "]" - << std::endl; - std::cout << " expected: " << diff.expected << std::endl; - std::cout << " obtained: " << diff.obtained << std::endl; - } - } - - return diffs.size() == 0; -} - -template <> -bool TfLiteInterpMatchApp::compareSingleTensorView<float>( - const nnfw::tflite::TensorView<float> &expected, - const nnfw::tflite::TensorView<float> &obtained, int id) const -{ - DiffSummary summary; - - assert(expected.shape() == obtained.shape()); - auto diffs = _comparator.compare(expected.shape(), expected, obtained, &summary); - - // TODO Unify summary generation code - if (diffs.size() == 0) - { - std::cout << " Tensor #" << id << ": MATCHED" << std::endl; - } - else - { - std::cout << " Tensor #" << id << ": UNMATCHED" << std::endl; - std::cout << " " << diffs.size() << " diffs are detected" << std::endl; - } - - // Print out max_diff - if (summary.max_abs_diff_value > 0) - { - std::cout << " Max absolute diff at [" - << nnfw::misc::tensor::IndexFormatter(summary.max_abs_diff_index) << "]" << std::endl; - std::cout << " expected: " << summary.max_abs_diff_expected << std::endl; - std::cout << " obtained: " << summary.max_abs_diff_obtained << std::endl; - std::cout << " absolute diff: " << summary.max_abs_diff_value << std::endl; - } - - if (summary.max_rel_diff_value > 0) - { - const auto tolerance_level = summary.max_rel_diff_value / FLT_EPSILON; - - std::cout << " Max relative diff at [" - << nnfw::misc::tensor::IndexFormatter(summary.max_rel_diff_index) << "]" << std::endl; - std::cout << " expected: " << summary.max_rel_diff_expected << std::endl; - std::cout << " obtained: " << summary.max_rel_diff_obtained << std::endl; - std::cout << " relative diff: " << summary.max_rel_diff_value << std::endl; - std::cout << " (tolerance level = " << tolerance_level << ")" << std::endl; - } - - if (diffs.size() > 0) - { - if (_verbose != 0) - { - std::cout << " ---- Details ---" << std::endl; - for (const auto &diff : diffs) - { - const auto absolute_diff = std::fabs(diff.expected - diff.obtained); - const auto relative_diff = nnfw::misc::fp32::relative_diff(diff.expected, diff.obtained); - const auto tolerance_level = relative_diff / FLT_EPSILON; - - std::cout << " Diff at [" << nnfw::misc::tensor::IndexFormatter(diff.index) << "]" - << std::endl; - std::cout << " expected: " << diff.expected << std::endl; - std::cout << " obtained: " << diff.obtained << std::endl; - std::cout << " absolute diff: " << absolute_diff << std::endl; - std::cout << " relative diff: " << relative_diff << std::endl; - std::cout << " (tolerance level = " << tolerance_level << ")" << std::endl; - } - } - - return false; - } - return true; -} - -#include <map> - -bool TfLiteInterpMatchApp::run(::tflite::Interpreter &interp, ::tflite::Interpreter &nnapi) const -{ - assert(interp.outputs() == nnapi.outputs()); - - bool all_matched = true; - - using Comparator = std::function<bool(int id, ::tflite::Interpreter &, ::tflite::Interpreter &)>; - - std::map<TfLiteType, Comparator> comparators; - - comparators[kTfLiteUInt8] = [this](int id, ::tflite::Interpreter &interp, - ::tflite::Interpreter &nnapi) { - const auto expected = nnfw::tflite::TensorView<uint8_t>::make(interp, id); - const auto obtained = nnfw::tflite::TensorView<uint8_t>::make(nnapi, id); - - return compareSingleTensorView(expected, obtained, id); - }; - - comparators[kTfLiteInt32] = [this](int id, ::tflite::Interpreter &interp, - ::tflite::Interpreter &nnapi) { - const auto expected = nnfw::tflite::TensorView<int32_t>::make(interp, id); - const auto obtained = nnfw::tflite::TensorView<int32_t>::make(nnapi, id); - - return compareSingleTensorView(expected, obtained, id); - }; - - comparators[kTfLiteFloat32] = [this](int id, ::tflite::Interpreter &interp, - ::tflite::Interpreter &nnapi) { - const auto expected = nnfw::tflite::TensorView<float>::make(interp, id); - const auto obtained = nnfw::tflite::TensorView<float>::make(nnapi, id); - - return compareSingleTensorView(expected, obtained, id); - }; - - comparators[kTfLiteBool] = [this](int id, ::tflite::Interpreter &interp, - ::tflite::Interpreter &nnapi) { - const auto expected = nnfw::tflite::TensorView<bool>::make(interp, id); - const auto obtained = nnfw::tflite::TensorView<bool>::make(nnapi, id); - - return compareSingleTensorView(expected, obtained, id); - }; - - for (const auto &id : interp.outputs()) - { - assert(interp.tensor(id)->type == nnapi.tensor(id)->type); - - auto it = comparators.find(interp.tensor(id)->type); - - if (it == comparators.end()) - { - throw std::runtime_error{"Not supported output type"}; - } - - const auto &comparator = it->second; - - if (!comparator(id, interp, nnapi)) - { - all_matched = false; - } - } - - return all_matched; -} - -#include "misc/tensor/Object.h" - -using namespace std::placeholders; - -template <> uint8_t RandomGenerator::generate<uint8_t>(void) -{ - // The value of type_range is 255. - float type_range = static_cast<float>(std::numeric_limits<uint8_t>::max()) - - static_cast<float>(std::numeric_limits<uint8_t>::min()); - // Most _dist values range from -5.0 to 5.0. - float min_range = -5.0f; - float max_range = 5.0f; - return static_cast<uint8_t>((_dist(_rand) - min_range) * type_range / (max_range - min_range)); -} - -#include "tflite/TensorLogger.h" -// -// Random Test Runner -// -int RandomTestRunner::run(const nnfw::tflite::Builder &builder) -{ - auto tfl_interp = builder.build(); - auto nnapi = builder.build(); - - tfl_interp->UseNNAPI(false); - - // Allocate Tensors - tfl_interp->AllocateTensors(); - nnapi->AllocateTensors(); - - assert(tfl_interp->inputs() == nnapi->inputs()); - - using ::tflite::Interpreter; - using Initializer = std::function<void(int id, Interpreter *, Interpreter *)>; - - std::map<TfLiteType, Initializer> initializers; - std::map<TfLiteType, Initializer> reseters; - - // Generate singed 32-bit integer (s32) input - initializers[kTfLiteInt32] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) { - assert(tfl_interp->tensor(id)->type == kTfLiteInt32); - assert(nnapi->tensor(id)->type == kTfLiteInt32); - - auto tfl_interp_view = nnfw::tflite::TensorView<int32_t>::make(*tfl_interp, id); - auto nnapi_view = nnfw::tflite::TensorView<int32_t>::make(*nnapi, id); - - assert(tfl_interp_view.shape() == nnapi_view.shape()); - - int32_t value = 0; - - nnfw::misc::tensor::iterate(tfl_interp_view.shape()) - << [&](const nnfw::misc::tensor::Index &ind) { - // TODO Generate random values - tfl_interp_view.at(ind) = value; - nnapi_view.at(ind) = value; - ++value; - }; - }; - - // Generate singed 32-bit integer (s32) input - reseters[kTfLiteInt32] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) { - assert(tfl_interp->tensor(id)->type == kTfLiteInt32); - assert(nnapi->tensor(id)->type == kTfLiteInt32); - - auto tfl_interp_view = nnfw::tflite::TensorView<int32_t>::make(*tfl_interp, id); - auto nnapi_view = nnfw::tflite::TensorView<int32_t>::make(*nnapi, id); - - assert(tfl_interp_view.shape() == nnapi_view.shape()); - - int32_t value = 0; - - nnfw::misc::tensor::iterate(tfl_interp_view.shape()) - << [&](const nnfw::misc::tensor::Index &ind) { - // TODO Generate random values - tfl_interp_view.at(ind) = value; - nnapi_view.at(ind) = value; - }; - }; - - initializers[kTfLiteUInt8] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) { - assert(tfl_interp->tensor(id)->type == kTfLiteUInt8); - assert(nnapi->tensor(id)->type == kTfLiteUInt8); - - auto tfl_interp_view = nnfw::tflite::TensorView<uint8_t>::make(*tfl_interp, id); - auto nnapi_view = nnfw::tflite::TensorView<uint8_t>::make(*nnapi, id); - - assert(tfl_interp_view.shape() == nnapi_view.shape()); - - auto fp = static_cast<uint8_t (RandomGenerator::*)(const ::nnfw::misc::tensor::Shape &, - const ::nnfw::misc::tensor::Index &)>( - &RandomGenerator::generate<uint8_t>); - const nnfw::misc::tensor::Object<uint8_t> data(tfl_interp_view.shape(), - std::bind(fp, _randgen, _1, _2)); - assert(tfl_interp_view.shape() == data.shape()); - - nnfw::misc::tensor::iterate(tfl_interp_view.shape()) - << [&](const nnfw::misc::tensor::Index &ind) { - const auto value = data.at(ind); - - tfl_interp_view.at(ind) = value; - nnapi_view.at(ind) = value; - }; - }; - - reseters[kTfLiteUInt8] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) { - assert(tfl_interp->tensor(id)->type == kTfLiteUInt8); - assert(nnapi->tensor(id)->type == kTfLiteUInt8); - - auto tfl_interp_view = nnfw::tflite::TensorView<uint8_t>::make(*tfl_interp, id); - auto nnapi_view = nnfw::tflite::TensorView<uint8_t>::make(*nnapi, id); - - assert(tfl_interp_view.shape() == nnapi_view.shape()); - - auto fp = static_cast<uint8_t (RandomGenerator::*)(const ::nnfw::misc::tensor::Shape &, - const ::nnfw::misc::tensor::Index &)>( - &RandomGenerator::generate<uint8_t>); - const nnfw::misc::tensor::Object<uint8_t> data(tfl_interp_view.shape(), - std::bind(fp, _randgen, _1, _2)); - assert(tfl_interp_view.shape() == data.shape()); - - uint8_t value = 0; - - nnfw::misc::tensor::iterate(tfl_interp_view.shape()) - << [&](const nnfw::misc::tensor::Index &ind) { - tfl_interp_view.at(ind) = value; - nnapi_view.at(ind) = value; - }; - }; - - initializers[kTfLiteFloat32] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) { - assert(tfl_interp->tensor(id)->type == kTfLiteFloat32); - assert(nnapi->tensor(id)->type == kTfLiteFloat32); - - auto tfl_interp_view = nnfw::tflite::TensorView<float>::make(*tfl_interp, id); - auto nnapi_view = nnfw::tflite::TensorView<float>::make(*nnapi, id); - - assert(tfl_interp_view.shape() == nnapi_view.shape()); - - auto fp = static_cast<float (RandomGenerator::*)(const ::nnfw::misc::tensor::Shape &, - const ::nnfw::misc::tensor::Index &)>( - &RandomGenerator::generate<float>); - const nnfw::misc::tensor::Object<float> data(tfl_interp_view.shape(), - std::bind(fp, _randgen, _1, _2)); - - assert(tfl_interp_view.shape() == data.shape()); - - nnfw::misc::tensor::iterate(tfl_interp_view.shape()) - << [&](const nnfw::misc::tensor::Index &ind) { - const auto value = data.at(ind); - - tfl_interp_view.at(ind) = value; - nnapi_view.at(ind) = value; - }; - }; - - reseters[kTfLiteFloat32] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) { - assert(tfl_interp->tensor(id)->type == kTfLiteFloat32); - assert(nnapi->tensor(id)->type == kTfLiteFloat32); - - auto tfl_interp_view = nnfw::tflite::TensorView<float>::make(*tfl_interp, id); - auto nnapi_view = nnfw::tflite::TensorView<float>::make(*nnapi, id); - - assert(tfl_interp_view.shape() == nnapi_view.shape()); - - auto fp = static_cast<float (RandomGenerator::*)(const ::nnfw::misc::tensor::Shape &, - const ::nnfw::misc::tensor::Index &)>( - &RandomGenerator::generate<float>); - const nnfw::misc::tensor::Object<float> data(tfl_interp_view.shape(), - std::bind(fp, _randgen, _1, _2)); - - assert(tfl_interp_view.shape() == data.shape()); - - float value = 0; - - nnfw::misc::tensor::iterate(tfl_interp_view.shape()) - << [&](const nnfw::misc::tensor::Index &ind) { - tfl_interp_view.at(ind) = value; - nnapi_view.at(ind) = value; - }; - }; - - initializers[kTfLiteBool] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) { - assert(tfl_interp->tensor(id)->type == kTfLiteBool); - assert(nnapi->tensor(id)->type == kTfLiteBool); - - auto tfl_interp_view = nnfw::tflite::TensorView<bool>::make(*tfl_interp, id); - auto nnapi_view = nnfw::tflite::TensorView<bool>::make(*nnapi, id); - - assert(tfl_interp_view.shape() == nnapi_view.shape()); - - auto fp = static_cast<bool (RandomGenerator::*)(const ::nnfw::misc::tensor::Shape &, - const ::nnfw::misc::tensor::Index &)>( - &RandomGenerator::generate<bool>); - const nnfw::misc::tensor::Object<bool> data(tfl_interp_view.shape(), - std::bind(fp, _randgen, _1, _2)); - - assert(tfl_interp_view.shape() == data.shape()); - - nnfw::misc::tensor::iterate(tfl_interp_view.shape()) - << [&](const nnfw::misc::tensor::Index &ind) { - const auto value = data.at(ind); - - tfl_interp_view.at(ind) = value; - nnapi_view.at(ind) = value; - }; - }; - - reseters[kTfLiteBool] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) { - assert(tfl_interp->tensor(id)->type == kTfLiteBool); - assert(nnapi->tensor(id)->type == kTfLiteBool); - - auto tfl_interp_view = nnfw::tflite::TensorView<bool>::make(*tfl_interp, id); - auto nnapi_view = nnfw::tflite::TensorView<bool>::make(*nnapi, id); - - assert(tfl_interp_view.shape() == nnapi_view.shape()); - - auto fp = static_cast<bool (RandomGenerator::*)(const ::nnfw::misc::tensor::Shape &, - const ::nnfw::misc::tensor::Index &)>( - &RandomGenerator::generate<bool>); - const nnfw::misc::tensor::Object<bool> data(tfl_interp_view.shape(), - std::bind(fp, _randgen, _1, _2)); - - assert(tfl_interp_view.shape() == data.shape()); - - bool value = false; - - nnfw::misc::tensor::iterate(tfl_interp_view.shape()) - << [&](const nnfw::misc::tensor::Index &ind) { - tfl_interp_view.at(ind) = value; - nnapi_view.at(ind) = value; - }; - }; - - // Fill IFM with random numbers - for (const auto id : tfl_interp->inputs()) - { - assert(tfl_interp->tensor(id)->type == nnapi->tensor(id)->type); - - auto it = initializers.find(tfl_interp->tensor(id)->type); - - if (it == initializers.end()) - { - throw std::runtime_error{"Not supported input type"}; - } - - it->second(id, tfl_interp.get(), nnapi.get()); - } - - // Fill OFM with 0 - for (const auto id : tfl_interp->outputs()) - { - assert(tfl_interp->tensor(id)->type == nnapi->tensor(id)->type); - - auto it = reseters.find(tfl_interp->tensor(id)->type); - - if (it == reseters.end()) - { - throw std::runtime_error{"Not supported input type"}; - } - - it->second(id, tfl_interp.get(), nnapi.get()); - } - - std::cout << "[NNAPI TEST] Run T/F Lite Interpreter without NNAPI" << std::endl; - tfl_interp->Invoke(); - - std::cout << "[NNAPI TEST] Run T/F Lite Interpreter with NNAPI" << std::endl; - - char *env = getenv("UPSTREAM_DELEGATE"); - - if (env && !std::string(env).compare("1")) - { - nnapi->UseNNAPI(true); - nnapi->Invoke(); - } - else - { - nnfw::tflite::NNAPIDelegate d; - - if (d.BuildGraph(nnapi.get())) - { - throw std::runtime_error{"Failed to BuildGraph"}; - } - - if (d.Invoke(nnapi.get())) - { - throw std::runtime_error{"Failed to BuildGraph"}; - } - } - - // Compare OFM - std::cout << "[NNAPI TEST] Compare the result" << std::endl; - - const auto tolerance = _param.tolerance; - - auto equals = [tolerance](float lhs, float rhs) { - // NOTE Hybrid approach - // TODO Allow users to set tolerance for absolute_epsilon_equal - if (nnfw::misc::fp32::absolute_epsilon_equal(lhs, rhs)) - { - return true; - } - - return nnfw::misc::fp32::epsilon_equal(lhs, rhs, tolerance); - }; - - nnfw::misc::tensor::Comparator comparator(equals); - TfLiteInterpMatchApp app(comparator); - - app.verbose() = _param.verbose; - - bool res = app.run(*tfl_interp, *nnapi); - - if (!res) - { - return 255; - } - - std::cout << "[NNAPI TEST] PASSED" << std::endl; - - if (_param.tensor_logging) - nnfw::tflite::TensorLogger::instance().save(_param.log_path, *tfl_interp); - - return 0; -} - -RandomTestRunner RandomTestRunner::make(int seed) -{ - RandomTestParam param; - - param.verbose = 0; - param.tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(param.verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(param.tolerance); - - return RandomTestRunner{seed, param}; -} diff --git a/libs/tflite/src/FeatureView.cpp b/libs/tflite/src/FeatureView.cpp deleted file mode 100644 index fdf5a4b00..000000000 --- a/libs/tflite/src/FeatureView.cpp +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "tflite/FeatureView.h" -#include "tflite/TensorUtils.h" - -#include <cassert> - -namespace nnfw -{ -namespace tflite -{ - -nnfw::misc::feature::Shape getFeatureShape(const TfLiteTensor *tensor) -{ - nnfw::misc::feature::Shape shape{tensor->dims->data[3], tensor->dims->data[1], - tensor->dims->data[2]}; - - return shape; -} - -FeatureView<float>::FeatureView(::tflite::Interpreter &interp, const InputIndex &index) -{ - const auto tensor_index = interp.inputs().at(index.asInt()); - auto tensor_ptr = interp.tensor(tensor_index); - - assert(isFloatTensor(tensor_ptr)); - assert(isFeatureTensor(tensor_ptr)); - - _shape = getFeatureShape(tensor_ptr); - _base = interp.typed_tensor<float>(tensor_index); -} - -FeatureView<float>::FeatureView(::tflite::Interpreter &interp, const OutputIndex &index) -{ - const auto tensor_index = interp.outputs().at(index.asInt()); - auto tensor_ptr = interp.tensor(tensor_index); - - assert(isFloatTensor(tensor_ptr)); - assert(isFeatureTensor(tensor_ptr)); - - _shape = getFeatureShape(tensor_ptr); - _base = interp.typed_tensor<float>(tensor_index); -} - -float FeatureView<float>::at(uint32_t ch, uint32_t row, uint32_t col) const -{ - return *(_base + getElementOffset(ch, row, col)); -} - -float &FeatureView<float>::at(uint32_t ch, uint32_t row, uint32_t col) -{ - return *(_base + getElementOffset(ch, row, col)); -} - -} // namespace tflite -} // namespace nnfw diff --git a/libs/tflite/src/Quantization.cpp b/libs/tflite/src/Quantization.cpp deleted file mode 100644 index 9c162c342..000000000 --- a/libs/tflite/src/Quantization.cpp +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "tflite/Quantization.h" - -TfLiteQuantizationParams make_default_quantization(void) -{ - return TfLiteQuantizationParams{0.0f, 0}; -} diff --git a/libs/tflite/src/TensorShapeUtils.cpp b/libs/tflite/src/TensorShapeUtils.cpp deleted file mode 100644 index b5d906719..000000000 --- a/libs/tflite/src/TensorShapeUtils.cpp +++ /dev/null @@ -1,48 +0,0 @@ -#include "tflite/TensorShapeUtils.h" - -namespace nnfw -{ -namespace tflite -{ - -nnfw::misc::tensor::Shape broadcast(const nnfw::misc::tensor::Shape &lhs_shape, - const nnfw::misc::tensor::Shape &rhs_shape) -{ - const uint32_t lhs_rank = lhs_shape.rank(); - const uint32_t rhs_rank = rhs_shape.rank(); - const uint32_t out_rank = std::max(lhs_rank, rhs_rank); - - // TODO Simplify implementation - std::vector<int32_t> lhs_normalized_dims; - std::vector<int32_t> rhs_normalized_dims; - - for (uint32_t n = 0; n < out_rank - lhs_rank; ++n) - { - lhs_normalized_dims.emplace_back(1); - } - for (uint32_t axis = 0; axis < lhs_rank; ++axis) - { - lhs_normalized_dims.emplace_back(lhs_shape.dim(axis)); - } - - for (uint32_t n = 0; n < out_rank - rhs_rank; ++n) - { - rhs_normalized_dims.emplace_back(1); - } - for (uint32_t axis = 0; axis < rhs_rank; ++axis) - { - rhs_normalized_dims.emplace_back(rhs_shape.dim(axis)); - } - - nnfw::misc::tensor::Shape out_shape(out_rank); - - for (uint32_t axis = 0; axis < out_rank; ++axis) - { - out_shape.dim(axis) = std::max(lhs_normalized_dims.at(axis), rhs_normalized_dims.at(axis)); - } - - return out_shape; -} - -} // namespace tflite -} // namespace nnfw diff --git a/libs/tflite/src/TensorView.test.cpp b/libs/tflite/src/TensorView.test.cpp deleted file mode 100644 index c710b3c33..000000000 --- a/libs/tflite/src/TensorView.test.cpp +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "tflite/TensorView.h" - -#include <cassert> - -void int_test(void) -{ - int value[6] = {1, 2, 3, 4, 5, 6}; - - const nnfw::misc::tensor::Shape shape{2, 3}; - const nnfw::tflite::TensorView<int> view{shape, value}; - - assert(view.at(nnfw::misc::tensor::Index{0, 0}) == 1); - assert(view.at(nnfw::misc::tensor::Index{0, 1}) == 2); - assert(view.at(nnfw::misc::tensor::Index{0, 2}) == 3); - assert(view.at(nnfw::misc::tensor::Index{1, 0}) == 4); - assert(view.at(nnfw::misc::tensor::Index{1, 1}) == 5); - assert(view.at(nnfw::misc::tensor::Index{1, 2}) == 6); -} - -int main(int argc, char **argv) -{ - float value[6] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; - - const nnfw::misc::tensor::Shape shape{2, 3}; - const nnfw::tflite::TensorView<float> view{shape, value}; - - assert(view.at(nnfw::misc::tensor::Index{0, 0}) == 1.0f); - assert(view.at(nnfw::misc::tensor::Index{0, 1}) == 2.0f); - assert(view.at(nnfw::misc::tensor::Index{0, 2}) == 3.0f); - assert(view.at(nnfw::misc::tensor::Index{1, 0}) == 4.0f); - assert(view.at(nnfw::misc::tensor::Index{1, 1}) == 5.0f); - assert(view.at(nnfw::misc::tensor::Index{1, 2}) == 6.0f); - - int_test(); - - return 0; -} diff --git a/libs/tflite/src/ext/kernels/Abs.cpp b/libs/tflite/src/ext/kernels/Abs.cpp deleted file mode 100644 index 7e9c2338d..000000000 --- a/libs/tflite/src/ext/kernels/Abs.cpp +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "tflite/ext/kernels/Abs.h" -#include "tensorflow/contrib/lite/kernels/kernel_util.h" - -#include <iostream> -#include <cmath> - -namespace nnfw -{ -namespace tflite -{ -namespace custom -{ -namespace Abs -{ - -void *InitAbs(TfLiteContext *context, const char *buffer, size_t length) { return nullptr; } - -void FreeAbs(TfLiteContext *context, void *buffer) {} - -TfLiteStatus PrepareAbs(TfLiteContext *context, TfLiteNode *node) -{ - TF_LITE_ENSURE_EQ(context, ::tflite::NumInputs(node), 1); - TF_LITE_ENSURE_EQ(context, ::tflite::NumOutputs(node), 1); - - const TfLiteTensor *input = ::tflite::GetInput(context, node, 0); - TfLiteTensor *output = ::tflite::GetOutput(context, node, 0); - - TF_LITE_ENSURE_EQ(context, input->type, output->type); - - return context->ResizeTensor(context, output, TfLiteIntArrayCopy(input->dims)); -} - -TfLiteStatus EvalAbs(TfLiteContext *context, TfLiteNode *node) -{ - const TfLiteTensor *input = ::tflite::GetInput(context, node, 0); - TfLiteTensor *output = ::tflite::GetOutput(context, node, 0); - size_t elements = ::tflite::NumElements(input); - switch (input->type) - { - case kTfLiteFloat32: - { - auto *in = input->data.f; - auto *in_end = in + elements; - auto *out = output->data.f; - for (; in < in_end; in++, out++) - *out = std::abs(*in); - return kTfLiteOk; - } - case kTfLiteInt32: - { - auto *in = input->data.i32; - auto *in_end = in + elements; - auto *out = output->data.i32; - for (; in < in_end; in++, out++) - *out = std::abs(*in); - return kTfLiteOk; - } - case kTfLiteInt64: - { - auto *in = input->data.i64; - auto *in_end = in + elements; - auto *out = output->data.i64; - for (; in < in_end; in++, out++) - *out = std::abs(*in); - return kTfLiteOk; - } - case kTfLiteUInt8: - { - auto *in = input->data.uint8; - auto *in_end = in + elements; - auto *out = output->data.uint8; - for (; in < in_end; in++, out++) - *out = std::abs(*in); - return kTfLiteOk; - } - default: - { - context->ReportError(context, "Input type %d is not supported", input->type); - return kTfLiteError; - } - } -} - -} // namespace Abs -} // namespace custom -} // namespace tflite -} // namespace nnfw diff --git a/libs/tflite/src/ext/kernels/SquaredDifference.cpp b/libs/tflite/src/ext/kernels/SquaredDifference.cpp deleted file mode 100644 index 8ac2b1de0..000000000 --- a/libs/tflite/src/ext/kernels/SquaredDifference.cpp +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "tflite/ext/kernels/SquaredDifference.h" -#include "tensorflow/contrib/lite/kernels/kernel_util.h" - -#include <iostream> - -namespace nnfw -{ -namespace tflite -{ -namespace custom -{ -namespace SquaredDifference -{ - -void *InitSquaredDifference(TfLiteContext *context, const char *buffer, size_t length) -{ - return nullptr; -} - -void FreeSquaredDifference(TfLiteContext *context, void *buffer) {} - -TfLiteStatus PrepareSquaredDifference(TfLiteContext *context, TfLiteNode *node) -{ - TF_LITE_ENSURE_EQ(context, ::tflite::NumInputs(node), 2); - TF_LITE_ENSURE_EQ(context, ::tflite::NumOutputs(node), 1); - - const TfLiteTensor *input1 = ::tflite::GetInput(context, node, 0); - const TfLiteTensor *input2 = ::tflite::GetInput(context, node, 1); - TfLiteTensor *output = ::tflite::GetOutput(context, node, 0); - - TF_LITE_ENSURE_EQ(context, input1->type, input2->type); - TF_LITE_ENSURE_EQ(context, input1->type, output->type); - - return context->ResizeTensor(context, output, TfLiteIntArrayCopy(input1->dims)); -} - -TfLiteStatus EvalSquaredDifference(TfLiteContext *context, TfLiteNode *node) -{ - - const TfLiteTensor *input1 = ::tflite::GetInput(context, node, 0); - const TfLiteTensor *input2 = ::tflite::GetInput(context, node, 1); - - TfLiteTensor *output = ::tflite::GetOutput(context, node, 0); - - size_t elements = ::tflite::NumElements(input1); - - switch (input1->type) - { - case kTfLiteFloat32: - { - const float *in1 = input1->data.f; - const float *in2 = input2->data.f; - const float *in_end1 = in1 + elements; - float *out = output->data.f; - - for (; in1 < in_end1; in1++, in2++, out++) - *out = ((*in1 - *in2) * (*in1 - *in2)); - - return kTfLiteOk; - } - case kTfLiteInt32: - { - const int *in1 = input1->data.i32; - const int *in2 = input2->data.i32; - const int *in_end1 = in1 + elements; - int *out = output->data.i32; - - for (; in1 < in_end1; in1++, in2++, out++) - *out = ((*in1 - *in2) * (*in1 - *in2)); - - return kTfLiteOk; - } - case kTfLiteInt64: - { - const int64_t *in1 = input1->data.i64; - const int64_t *in2 = input1->data.i64; - const int64_t *in_end1 = in1 + elements; - int64_t *out = output->data.i64; - - for (; in1 < in_end1; in1++, in2++, out++) - *out = ((*in1 - *in2) * (*in1 - *in2)); - - return kTfLiteOk; - } - default: - { - context->ReportError(context, "InputType is %d Unsupported", input1->type); - return kTfLiteError; - } - } -} - -} // namespace SquaredDifference -} // namespace custom -} // namespace tflite -} // namespace nnfw diff --git a/libs/tflite/src/ext/kernels/TensorFlowMax.cpp b/libs/tflite/src/ext/kernels/TensorFlowMax.cpp deleted file mode 100644 index d72ad242c..000000000 --- a/libs/tflite/src/ext/kernels/TensorFlowMax.cpp +++ /dev/null @@ -1,405 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "tflite/ext/kernels/TensorFlowMax.h" -#include "tensorflow/contrib/lite/kernels/kernel_util.h" - -#include <iostream> - -namespace nnfw -{ -namespace tflite -{ -namespace custom -{ -namespace TensorFlowMax -{ - -struct TensorFlowMaxOp -{ - TensorFlowMaxOp(TfLiteContext *context, TfLiteNode *node) - { - input = ::tflite::GetInput(context, node, 0); - axis = ::tflite::GetInput(context, node, 1); - output = ::tflite::GetOutput(context, node, 0); - } - const TfLiteTensor *input; - const TfLiteTensor *axis; - TfLiteTensor *output; -}; - -void *InitTensorFlowMax(TfLiteContext *context, const char *buffer, size_t length) -{ - // Creates two temp tensors to store index and axis for internal - // implementation only. - auto *scratch_tensor_index = new int; - context->AddTensors(context, 2, scratch_tensor_index); - return scratch_tensor_index; -} - -void FreeTensorFlowMax(TfLiteContext *context, void *buffer) -{ - delete static_cast<TensorFlowMaxOp *>(buffer); -} - -// Resizes the temp tensor that stores resolved axis. -TfLiteStatus ResizeTempAxis(TfLiteContext *context, TensorFlowMaxOp *op_context, - TfLiteTensor *resolved_axis) -{ - TfLiteIntArray *axis_size = TfLiteIntArrayCreate(1); - axis_size->data[0] = static_cast<int>(::tflite::NumElements(op_context->axis)); - return context->ResizeTensor(context, resolved_axis, axis_size); -} - -// Resizes output array based on the input size and resolved axis. -TfLiteStatus ResizeOutputTensor(TfLiteContext *context, TensorFlowMaxOp *op_context) -{ - size_t num_axis = ::tflite::NumElements(op_context->axis); - TfLiteIntArray *input_dims = op_context->input->dims; - int input_num_dims = ::tflite::NumDimensions(op_context->input); - const int *axis = op_context->axis->data.i32; - - { - // Calculates size of reducing axis. - int num_reduce_axis = num_axis; - for (int i = 0; i < num_axis; ++i) - { - int current = axis[i]; - if (current < 0) - { - current += input_num_dims; - } - TF_LITE_ENSURE(context, current >= 0 && current < input_num_dims); - for (int j = 0; j < i; ++j) - { - int previous = axis[j]; - if (previous < 0) - { - previous += input_num_dims; - } - if (current == previous) - { - --num_reduce_axis; - break; - } - } - } - // Determines output dimensions. - int output_num_dims = ::tflite::NumDimensions(op_context->output); - TF_LITE_ENSURE(context, (input_num_dims == output_num_dims) || - (input_num_dims - num_reduce_axis == output_num_dims)); - - if (input_num_dims == output_num_dims) - { - TfLiteIntArray *output_dims = TfLiteIntArrayCopy(input_dims); - for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx) - { - int current = axis[axis_idx]; - output_dims->data[current] = 1; - } - return context->ResizeTensor(context, op_context->output, output_dims); - } - else - { - TfLiteIntArray *output_dims = TfLiteIntArrayCreate(output_num_dims); - int num_skip_axis = 0; - for (int idx = 0; idx < input_num_dims; ++idx) - { - bool is_axis = false; - for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx) - { - if (axis[axis_idx] == idx || axis[axis_idx] + input_num_dims == idx) - { - ++num_skip_axis; - is_axis = true; - break; - } - } - if (!is_axis) - { - output_dims->data[idx - num_skip_axis] = input_dims->data[idx]; - } - } - return context->ResizeTensor(context, op_context->output, output_dims); - } - } -} - -// Initializes temp tensors to store index and resolved axis. -TfLiteStatus InitializeTemporaries(TfLiteContext *context, TfLiteNode *node, - TensorFlowMaxOp *op_context) -{ - // Creates a temp index to iterate through input data. - int *scratch_tensor_index = reinterpret_cast<int *>(node->user_data); - TfLiteIntArrayFree(node->temporaries); - node->temporaries = TfLiteIntArrayCreate(2); - node->temporaries->data[0] = *scratch_tensor_index; - TfLiteTensor *scratch_tensor = &context->tensors[node->temporaries->data[0]]; - scratch_tensor->type = kTfLiteInt32; - scratch_tensor->allocation_type = kTfLiteArenaRw; - TfLiteIntArray *index_size = TfLiteIntArrayCreate(1); - index_size->data[0] = ::tflite::NumDimensions(op_context->input); - TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, scratch_tensor, index_size)); - - // Creates a temp tensor to store resolved axis given input data. - node->temporaries->data[1] = *scratch_tensor_index + 1; - TfLiteTensor *resolved_axis = &context->tensors[node->temporaries->data[1]]; - resolved_axis->type = kTfLiteInt32; - return kTfLiteOk; -} - -TfLiteStatus PrepareTensorFlowMax(TfLiteContext *context, TfLiteNode *node) -{ - TF_LITE_ENSURE_EQ(context, ::tflite::NumInputs(node), 2); - TF_LITE_ENSURE_EQ(context, ::tflite::NumOutputs(node), 1); - - TensorFlowMaxOp op_context(context, node); - TF_LITE_ENSURE_OK(context, InitializeTemporaries(context, node, &op_context)); - - TfLiteTensor *resolved_axis = &context->tensors[node->temporaries->data[1]]; - // Leaves work to Eval if axis is not constant; else resizes output. - if (!::tflite::IsConstantTensor(op_context.axis)) - { - ::tflite::SetTensorToDynamic(op_context.output); - ::tflite::SetTensorToDynamic(resolved_axis); - return kTfLiteOk; - } - resolved_axis->allocation_type = kTfLiteArenaRw; - TF_LITE_ENSURE_OK(context, ResizeTempAxis(context, &op_context, resolved_axis)); - return ResizeOutputTensor(context, &op_context); -} - -// Gets offset of index if expanded on axis. When expanded, the flattened offset -// will not change, if the output index changes on the given axis. For example, -// if you have a 2D tensor and you are expanding to 3D on axis 0, -// then index (0, 1, 2) and index (1, 1, 2) will map from the same flattened -// offset. -inline size_t ExpandedInputOffset(const int num_dims, const int *dims, const int *index, - const int num_axis, const int *axis) -{ - size_t offset = 0; - int out_idx = 0; - for (int in_idx = 0; in_idx < num_dims; ++in_idx) - { - // if we need to expand this axis - bool is_axis = false; - if (axis != nullptr) - { - for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx) - { - if (in_idx == axis[axis_idx]) - { - is_axis = true; - break; - } - } - } - if (!is_axis) - { - offset = offset * static_cast<size_t>(dims[in_idx]) + static_cast<size_t>(index[out_idx]); - out_idx++; - } - else - { - offset = offset * static_cast<size_t>(dims[in_idx]); - } - } - return offset; -} - -// Gets offset of index if reducing on axis. When reducing, the flattened offset -// will not change, if the input index changes on the given axis. For example, -// if you have a 3D tensor and you are reducing to 2D by eliminating axis 0, -// then index (0, 1, 2) and index (1, 1, 2) will map to the same flattened -// offset. -// TODO(kanlig): uses Dims to represent dimensions. -inline size_t ReducedOutputOffset(const int num_dims, const int *dims, const int *index, - const int num_axis, const int *axis) -{ - size_t offset = 0; - for (int idx = 0; idx < num_dims; ++idx) - { - // if we need to skip this axis - bool is_axis = false; - if (axis != nullptr) - { - for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx) - { - if (idx == axis[axis_idx]) - { - is_axis = true; - break; - } - } - } - if (!is_axis) - { - offset = offset * static_cast<size_t>(dims[idx]) + static_cast<size_t>(index[idx]); - } - } - return offset; -} - -// Gets next index to iterate through a multidimensional array. -inline bool NextIndex(TfLiteContext *context, const int num_dims, const int *dims, int *current) -{ - int carry = 1; - for (int idx = num_dims - 1; idx >= 0; --idx) - { - int current_val = current[idx] + carry; - TF_LITE_ENSURE(context, (dims[idx] >= current_val)); - if (dims[idx] == current_val) - { - current[idx] = 0; - } - else - { - current[idx] = current_val; - carry = 0; - break; - } - } - return (carry == 0); -} - -template <typename T> -inline TfLiteStatus -CustomMax(TfLiteContext *context, T *input_data, const int *input_dims, const int input_num_dims, - T *output_data, const int *output_dims, const int output_num_dims, const int *axis, - const int num_axis_dimensions, bool keep_dims, int *temp_index, int *resolved_axis) -{ - // resolves axis. - int num_resolved_axis = 0; - for (int idx = 0; idx < num_axis_dimensions; ++idx) - { - int current = axis[idx]; - TF_LITE_ENSURE(context, (current < input_num_dims && current + input_num_dims >= 0)); - if (current < 0) - { - current += input_num_dims; - } - bool is_dup = false; - for (int j = 0; j < num_resolved_axis; ++j) - { - if (resolved_axis[j] == current) - { - is_dup = true; - break; - } - } - if (!is_dup) - { - resolved_axis[num_resolved_axis++] = current; - } - } - - TF_LITE_ENSURE(context, (input_num_dims > 0)); - TF_LITE_ENSURE(context, (input_dims != nullptr)); - TF_LITE_ENSURE(context, (temp_index != nullptr)); - - // resets output data. - for (int idx = 0; idx < output_num_dims; ++idx) - { - temp_index[idx] = 0; - } - for (bool has_next = true; has_next; - has_next = NextIndex(context, output_num_dims, output_dims, temp_index)) - { - size_t output_offset = - ReducedOutputOffset(output_num_dims, output_dims, temp_index, 0, nullptr); - size_t input_offset = ExpandedInputOffset(input_num_dims, input_dims, temp_index, - num_resolved_axis, resolved_axis); - output_data[output_offset] = input_data[input_offset]; - } - - // resets temp index. - for (int idx = 0; idx < input_num_dims; ++idx) - { - temp_index[idx] = 0; - } - - // iterates through input_data. - for (bool has_next = true; has_next; - has_next = NextIndex(context, input_num_dims, input_dims, temp_index)) - { - size_t input_offset = ReducedOutputOffset(input_num_dims, input_dims, temp_index, 0, nullptr); - size_t output_offset = ReducedOutputOffset(input_num_dims, input_dims, temp_index, - num_resolved_axis, resolved_axis); - if (output_data[output_offset] < input_data[input_offset]) - { - output_data[output_offset] = input_data[input_offset]; - } - } - - return kTfLiteOk; -} - -TfLiteStatus EvalTensorFlowMax(TfLiteContext *context, TfLiteNode *node) -{ - - TensorFlowMaxOp op_context(context, node); - int num_axis = static_cast<int>(::tflite::NumElements(op_context.axis)); - TfLiteTensor *temp_index = &context->tensors[node->temporaries->data[0]]; - TfLiteTensor *resolved_axis = &context->tensors[node->temporaries->data[1]]; - // Resize the output tensor if the output tensor is dynamic. - if (::tflite::IsDynamicTensor(op_context.output)) - { - TF_LITE_ENSURE_OK(context, ResizeTempAxis(context, &op_context, resolved_axis)); - TF_LITE_ENSURE_OK(context, ResizeOutputTensor(context, &op_context)); - } - - TfLiteStatus returnStatus = kTfLiteOk; - switch (op_context.input->type) - { - case kTfLiteFloat32: - returnStatus = CustomMax<float>( - context, op_context.input->data.f, op_context.input->dims->data, - op_context.input->dims->size, op_context.output->data.f, op_context.output->dims->data, - op_context.output->dims->size, op_context.axis->data.i32, num_axis, false, - temp_index->data.i32, resolved_axis->data.i32); - break; - case kTfLiteInt32: - returnStatus = CustomMax<int>(context, op_context.input->data.i32, - op_context.input->dims->data, op_context.input->dims->size, - op_context.output->data.i32, op_context.output->dims->data, - op_context.output->dims->size, op_context.axis->data.i32, - num_axis, false, temp_index->data.i32, resolved_axis->data.i32); - break; - case kTfLiteUInt8: - returnStatus = CustomMax<uint8_t>( - context, op_context.input->data.uint8, op_context.input->dims->data, - op_context.input->dims->size, op_context.output->data.uint8, - op_context.output->dims->data, op_context.output->dims->size, op_context.axis->data.i32, - num_axis, false, temp_index->data.i32, resolved_axis->data.i32); - break; - case kTfLiteInt64: - returnStatus = CustomMax<int64_t>( - context, op_context.input->data.i64, op_context.input->dims->data, - op_context.input->dims->size, op_context.output->data.i64, op_context.output->dims->data, - op_context.output->dims->size, op_context.axis->data.i32, num_axis, false, - temp_index->data.i32, resolved_axis->data.i32); - break; - default: - returnStatus = kTfLiteError; - } - - return returnStatus; -} - -} // namespace TensorFlowMax -} // namespace custom -} // namespace tflite -} // namespace nnfw diff --git a/libs/tflite/src/ext/kernels/TensorFlowSum.cpp b/libs/tflite/src/ext/kernels/TensorFlowSum.cpp deleted file mode 100644 index cbf97970c..000000000 --- a/libs/tflite/src/ext/kernels/TensorFlowSum.cpp +++ /dev/null @@ -1,400 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "tflite/ext/kernels/TensorFlowSum.h" -#include "tensorflow/contrib/lite/kernels/kernel_util.h" - -#include <iostream> - -namespace nnfw -{ -namespace tflite -{ -namespace custom -{ -namespace TensorFlowSum -{ - -struct TensorFlowSumOp -{ - TensorFlowSumOp(TfLiteContext *context, TfLiteNode *node) - { - input = ::tflite::GetInput(context, node, 0); - axis = ::tflite::GetInput(context, node, 1); - output = ::tflite::GetOutput(context, node, 0); - } - const TfLiteTensor *input; - const TfLiteTensor *axis; - TfLiteTensor *output; -}; - -void *InitTensorFlowSum(TfLiteContext *context, const char *buffer, size_t length) -{ - // Creates two temp tensors to store index and axis for internal - // implementation only. - auto *scratch_tensor_index = new int; - context->AddTensors(context, 2, scratch_tensor_index); - return scratch_tensor_index; -} - -void FreeTensorFlowSum(TfLiteContext *context, void *buffer) -{ - delete static_cast<TensorFlowSumOp *>(buffer); -} - -// Resizes the temp tensor that stores resolved axis. -TfLiteStatus ResizeTempAxis(TfLiteContext *context, TensorFlowSumOp *op_context, - TfLiteTensor *resolved_axis) -{ - TfLiteIntArray *axis_size = TfLiteIntArrayCreate(1); - axis_size->data[0] = static_cast<int>(::tflite::NumElements(op_context->axis)); - return context->ResizeTensor(context, resolved_axis, axis_size); -} - -// Resizes output array based on the input size and resolved axis. -TfLiteStatus ResizeOutputTensor(TfLiteContext *context, TensorFlowSumOp *op_context) -{ - size_t num_axis = ::tflite::NumElements(op_context->axis); - TfLiteIntArray *input_dims = op_context->input->dims; - int input_num_dims = ::tflite::NumDimensions(op_context->input); - const int *axis = op_context->axis->data.i32; - - { - // Calculates size of reducing axis. - int num_reduce_axis = num_axis; - for (int i = 0; i < num_axis; ++i) - { - int current = axis[i]; - if (current < 0) - { - current += input_num_dims; - } - TF_LITE_ENSURE(context, current >= 0 && current < input_num_dims); - for (int j = 0; j < i; ++j) - { - int previous = axis[j]; - if (previous < 0) - { - previous += input_num_dims; - } - if (current == previous) - { - --num_reduce_axis; - break; - } - } - } - // Determines output dimensions. - int output_num_dims = ::tflite::NumDimensions(op_context->output); - TF_LITE_ENSURE(context, (input_num_dims == output_num_dims) || - (input_num_dims - num_reduce_axis == output_num_dims)); - - if (input_num_dims == output_num_dims) - { - TfLiteIntArray *output_dims = TfLiteIntArrayCopy(input_dims); - for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx) - { - int current = axis[axis_idx]; - output_dims->data[current] = 1; - } - return context->ResizeTensor(context, op_context->output, output_dims); - } - else - { - TfLiteIntArray *output_dims = TfLiteIntArrayCreate(output_num_dims); - int num_skip_axis = 0; - for (int idx = 0; idx < input_num_dims; ++idx) - { - bool is_axis = false; - for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx) - { - if (axis[axis_idx] == idx || axis[axis_idx] + input_num_dims == idx) - { - ++num_skip_axis; - is_axis = true; - break; - } - } - if (!is_axis) - { - output_dims->data[idx - num_skip_axis] = input_dims->data[idx]; - } - } - return context->ResizeTensor(context, op_context->output, output_dims); - } - } -} - -// Initializes temp tensors to store index and resolved axis. -TfLiteStatus InitializeTemporaries(TfLiteContext *context, TfLiteNode *node, - TensorFlowSumOp *op_context) -{ - // Creates a temp index to iterate through input data. - int *scratch_tensor_index = reinterpret_cast<int *>(node->user_data); - TfLiteIntArrayFree(node->temporaries); - node->temporaries = TfLiteIntArrayCreate(2); - node->temporaries->data[0] = *scratch_tensor_index; - TfLiteTensor *scratch_tensor = &context->tensors[node->temporaries->data[0]]; - scratch_tensor->type = kTfLiteInt32; - scratch_tensor->allocation_type = kTfLiteArenaRw; - TfLiteIntArray *index_size = TfLiteIntArrayCreate(1); - index_size->data[0] = ::tflite::NumDimensions(op_context->input); - TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, scratch_tensor, index_size)); - - // Creates a temp tensor to store resolved axis given input data. - node->temporaries->data[1] = *scratch_tensor_index + 1; - TfLiteTensor *resolved_axis = &context->tensors[node->temporaries->data[1]]; - resolved_axis->type = kTfLiteInt32; - return kTfLiteOk; -} - -TfLiteStatus PrepareTensorFlowSum(TfLiteContext *context, TfLiteNode *node) -{ - TF_LITE_ENSURE_EQ(context, ::tflite::NumInputs(node), 2); - TF_LITE_ENSURE_EQ(context, ::tflite::NumOutputs(node), 1); - - TensorFlowSumOp op_context(context, node); - TF_LITE_ENSURE_OK(context, InitializeTemporaries(context, node, &op_context)); - - TfLiteTensor *resolved_axis = &context->tensors[node->temporaries->data[1]]; - // Leaves work to Eval if axis is not constant; else resizes output. - if (!::tflite::IsConstantTensor(op_context.axis)) - { - ::tflite::SetTensorToDynamic(op_context.output); - ::tflite::SetTensorToDynamic(resolved_axis); - return kTfLiteOk; - } - resolved_axis->allocation_type = kTfLiteArenaRw; - TF_LITE_ENSURE_OK(context, ResizeTempAxis(context, &op_context, resolved_axis)); - return ResizeOutputTensor(context, &op_context); -} - -// Gets offset of index if expanded on axis. When expanded, the flattened offset -// will not change, if the output index changes on the given axis. For example, -// if you have a 2D tensor and you are expanding to 3D on axis 0, -// then index (0, 1, 2) and index (1, 1, 2) will map from the same flattened -// offset. -inline size_t ExpandedInputOffset(const int num_dims, const int *dims, const int *index, - const int num_axis, const int *axis) -{ - size_t offset = 0; - int out_idx = 0; - for (int in_idx = 0; in_idx < num_dims; ++in_idx) - { - // if we need to expand this axis - bool is_axis = false; - if (axis != nullptr) - { - for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx) - { - if (in_idx == axis[axis_idx]) - { - is_axis = true; - break; - } - } - } - if (!is_axis) - { - offset = offset * static_cast<size_t>(dims[in_idx]) + static_cast<size_t>(index[out_idx]); - out_idx++; - } - else - { - offset = offset * static_cast<size_t>(dims[in_idx]); - } - } - return offset; -} - -// Gets offset of index if reducing on axis. When reducing, the flattened offset -// will not change, if the input index changes on the given axis. For example, -// if you have a 3D tensor and you are reducing to 2D by eliminating axis 0, -// then index (0, 1, 2) and index (1, 1, 2) will map to the same flattened -// offset. -// TODO(kanlig): uses Dims to represent dimensions. -inline size_t ReducedOutputOffset(const int num_dims, const int *dims, const int *index, - const int num_axis, const int *axis) -{ - size_t offset = 0; - for (int idx = 0; idx < num_dims; ++idx) - { - // if we need to skip this axis - bool is_axis = false; - if (axis != nullptr) - { - for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx) - { - if (idx == axis[axis_idx]) - { - is_axis = true; - break; - } - } - } - if (!is_axis) - { - offset = offset * static_cast<size_t>(dims[idx]) + static_cast<size_t>(index[idx]); - } - } - return offset; -} - -// Gets next index to iterate through a multidimensional array. -inline bool NextIndex(TfLiteContext *context, const int num_dims, const int *dims, int *current) -{ - int carry = 1; - for (int idx = num_dims - 1; idx >= 0; --idx) - { - int current_val = current[idx] + carry; - TF_LITE_ENSURE(context, (dims[idx] >= current_val)); - if (dims[idx] == current_val) - { - current[idx] = 0; - } - else - { - current[idx] = current_val; - carry = 0; - break; - } - } - return (carry == 0); -} - -template <typename T> -inline TfLiteStatus -CustomSum(TfLiteContext *context, T *input_data, const int *input_dims, const int input_num_dims, - T *output_data, const int *output_dims, const int output_num_dims, const int *axis, - const int num_axis_dimensions, bool keep_dims, int *temp_index, int *resolved_axis) -{ - // resolves axis. - int num_resolved_axis = 0; - for (int idx = 0; idx < num_axis_dimensions; ++idx) - { - int current = axis[idx]; - TF_LITE_ENSURE(context, (current < input_num_dims && current + input_num_dims >= 0)); - if (current < 0) - { - current += input_num_dims; - } - bool is_dup = false; - for (int j = 0; j < num_resolved_axis; ++j) - { - if (resolved_axis[j] == current) - { - is_dup = true; - break; - } - } - if (!is_dup) - { - resolved_axis[num_resolved_axis++] = current; - } - } - - TF_LITE_ENSURE(context, (input_num_dims > 0)); - TF_LITE_ENSURE(context, (input_dims != nullptr)); - TF_LITE_ENSURE(context, (temp_index != nullptr)); - - // resets output data. - for (int idx = 0; idx < output_num_dims; ++idx) - { - temp_index[idx] = 0; - } - for (bool has_next = true; has_next; - has_next = NextIndex(context, output_num_dims, output_dims, temp_index)) - { - size_t output_offset = - ReducedOutputOffset(output_num_dims, output_dims, temp_index, 0, nullptr); - output_data[output_offset] = 0; - } - - // resets temp index. - for (int idx = 0; idx < input_num_dims; ++idx) - { - temp_index[idx] = 0; - } - - // iterates through input_data. - for (bool has_next = true; has_next; - has_next = NextIndex(context, input_num_dims, input_dims, temp_index)) - { - size_t input_offset = ReducedOutputOffset(input_num_dims, input_dims, temp_index, 0, nullptr); - size_t output_offset = ReducedOutputOffset(input_num_dims, input_dims, temp_index, - num_resolved_axis, resolved_axis); - output_data[output_offset] += input_data[input_offset]; - } - - return kTfLiteOk; -} - -TfLiteStatus EvalTensorFlowSum(TfLiteContext *context, TfLiteNode *node) -{ - - TensorFlowSumOp op_context(context, node); - int num_axis = static_cast<int>(::tflite::NumElements(op_context.axis)); - TfLiteTensor *temp_index = &context->tensors[node->temporaries->data[0]]; - TfLiteTensor *resolved_axis = &context->tensors[node->temporaries->data[1]]; - // Resize the output tensor if the output tensor is dynamic. - if (::tflite::IsDynamicTensor(op_context.output)) - { - TF_LITE_ENSURE_OK(context, ResizeTempAxis(context, &op_context, resolved_axis)); - TF_LITE_ENSURE_OK(context, ResizeOutputTensor(context, &op_context)); - } - - TfLiteStatus returnStatus = kTfLiteOk; - switch (op_context.input->type) - { - case kTfLiteFloat32: - returnStatus = CustomSum<float>( - context, op_context.input->data.f, op_context.input->dims->data, - op_context.input->dims->size, op_context.output->data.f, op_context.output->dims->data, - op_context.output->dims->size, op_context.axis->data.i32, num_axis, false, - temp_index->data.i32, resolved_axis->data.i32); - break; - case kTfLiteInt32: - returnStatus = CustomSum<int>(context, op_context.input->data.i32, - op_context.input->dims->data, op_context.input->dims->size, - op_context.output->data.i32, op_context.output->dims->data, - op_context.output->dims->size, op_context.axis->data.i32, - num_axis, false, temp_index->data.i32, resolved_axis->data.i32); - break; - case kTfLiteUInt8: - returnStatus = CustomSum<uint8_t>( - context, op_context.input->data.uint8, op_context.input->dims->data, - op_context.input->dims->size, op_context.output->data.uint8, - op_context.output->dims->data, op_context.output->dims->size, op_context.axis->data.i32, - num_axis, false, temp_index->data.i32, resolved_axis->data.i32); - break; - case kTfLiteInt64: - returnStatus = CustomSum<int64_t>( - context, op_context.input->data.i64, op_context.input->dims->data, - op_context.input->dims->size, op_context.output->data.i64, op_context.output->dims->data, - op_context.output->dims->size, op_context.axis->data.i32, num_axis, false, - temp_index->data.i32, resolved_axis->data.i32); - break; - default: - returnStatus = kTfLiteError; - } - - return returnStatus; -} - -} // namespace TensorFlowSum -} // namespace custom -} // namespace tflite -} // namespace nnfw diff --git a/libs/tflite/src/ext/kernels/register.cpp b/libs/tflite/src/ext/kernels/register.cpp deleted file mode 100644 index b822bd616..000000000 --- a/libs/tflite/src/ext/kernels/register.cpp +++ /dev/null @@ -1,221 +0,0 @@ -/* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -// NOTE To minimize diff with upstream tensorflow, disable clang-format -// clang-format off - -// NOTE This code is derived from the following file (in TensorFlow) -// 'externals/tensorflow/tensorflow/contrib/lite/kernels/register.cc' -#include "tflite/ext/kernels/register.h" -#include "tflite/ext/kernels/CustomOps.h" - -namespace tflite { -namespace ops { -namespace builtin { - -TfLiteRegistration *Register_RELU(); -TfLiteRegistration *Register_RELU_N1_TO_1(); -TfLiteRegistration *Register_RELU6(); -TfLiteRegistration *Register_TANH(); -TfLiteRegistration *Register_LOGISTIC(); -TfLiteRegistration *Register_AVERAGE_POOL_2D(); -TfLiteRegistration *Register_MAX_POOL_2D(); -TfLiteRegistration *Register_L2_POOL_2D(); -TfLiteRegistration *Register_CONV_2D(); -TfLiteRegistration *Register_DEPTHWISE_CONV_2D(); -TfLiteRegistration *Register_SVDF(); -TfLiteRegistration *Register_RNN(); -TfLiteRegistration *Register_BIDIRECTIONAL_SEQUENCE_RNN(); -TfLiteRegistration *Register_UNIDIRECTIONAL_SEQUENCE_RNN(); -TfLiteRegistration *Register_EMBEDDING_LOOKUP(); -TfLiteRegistration *Register_EMBEDDING_LOOKUP_SPARSE(); -TfLiteRegistration *Register_FULLY_CONNECTED(); -TfLiteRegistration *Register_LSH_PROJECTION(); -TfLiteRegistration *Register_HASHTABLE_LOOKUP(); -TfLiteRegistration *Register_SOFTMAX(); -TfLiteRegistration *Register_CONCATENATION(); -TfLiteRegistration *Register_ADD(); -TfLiteRegistration *Register_SPACE_TO_BATCH_ND(); -TfLiteRegistration *Register_DIV(); -TfLiteRegistration *Register_SUB(); -TfLiteRegistration *Register_BATCH_TO_SPACE_ND(); -TfLiteRegistration *Register_MUL(); -TfLiteRegistration *Register_L2_NORMALIZATION(); -TfLiteRegistration *Register_LOCAL_RESPONSE_NORMALIZATION(); -TfLiteRegistration *Register_LSTM(); -TfLiteRegistration *Register_BIDIRECTIONAL_SEQUENCE_LSTM(); -TfLiteRegistration *Register_UNIDIRECTIONAL_SEQUENCE_LSTM(); -TfLiteRegistration *Register_PAD(); -TfLiteRegistration *Register_PADV2(); -TfLiteRegistration *Register_RESHAPE(); -TfLiteRegistration *Register_RESIZE_BILINEAR(); -TfLiteRegistration *Register_SKIP_GRAM(); -TfLiteRegistration *Register_SPACE_TO_DEPTH(); -TfLiteRegistration *Register_GATHER(); -TfLiteRegistration *Register_TRANSPOSE(); -TfLiteRegistration *Register_MEAN(); -TfLiteRegistration *Register_SPLIT(); -TfLiteRegistration *Register_SQUEEZE(); -TfLiteRegistration *Register_STRIDED_SLICE(); -TfLiteRegistration *Register_EXP(); -TfLiteRegistration *Register_TOPK_V2(); -TfLiteRegistration *Register_LOG_SOFTMAX(); -TfLiteRegistration *Register_CAST(); -TfLiteRegistration *Register_DEQUANTIZE(); -TfLiteRegistration *Register_PRELU(); -TfLiteRegistration *Register_MAXIMUM(); -TfLiteRegistration *Register_MINIMUM(); -TfLiteRegistration *Register_ARG_MAX(); -TfLiteRegistration *Register_GREATER(); -TfLiteRegistration *Register_GREATER_EQUAL(); -TfLiteRegistration *Register_LESS(); -TfLiteRegistration *Register_LESS_EQUAL(); -TfLiteRegistration *Register_FLOOR(); -TfLiteRegistration *Register_NEG(); -TfLiteRegistration *Register_SELECT(); -TfLiteRegistration *Register_SLICE(); -TfLiteRegistration *Register_SIN(); -TfLiteRegistration *Register_TRANSPOSE_CONV(); -TfLiteRegistration *Register_SPARSE_TO_DENSE(); -#ifndef OBS_BUILD -TfLiteRegistration *Register_SUM(); -TfLiteRegistration *Register_REDUCE_MAX(); -TfLiteRegistration *Register_REDUCE_MIN(); -TfLiteRegistration *Register_EQUAL(); -TfLiteRegistration *Register_NOT_EQUAL(); -TfLiteRegistration *Register_SQRT(); -TfLiteRegistration *Register_RSQRT(); -TfLiteRegistration *Register_SHAPE(); -TfLiteRegistration *Register_POW(); -TfLiteRegistration *Register_FAKE_QUANT(); -TfLiteRegistration *Register_PACK(); -TfLiteRegistration *Register_ONE_HOT(); -TfLiteRegistration *Register_LOGICAL_OR(); -TfLiteRegistration *Register_LOGICAL_AND(); -TfLiteRegistration *Register_LOGICAL_NOT(); -TfLiteRegistration *Register_UNPACK(); -TfLiteRegistration *Register_FLOOR_DIV(); -TfLiteRegistration *Register_SQUARE(); -TfLiteRegistration *Register_ZEROS_LIKE(); -#endif // OBS_BUILD - -} // namespace builtin -} // namespace ops -} // namespace tflite - -namespace nnfw { -namespace tflite { - -BuiltinOpResolver::BuiltinOpResolver() -{ - // Using namespace directive to minimize diff with upstream tensorflow - using namespace ::tflite::ops::builtin; - using namespace ::tflite; - - AddBuiltin(BuiltinOperator_RELU, Register_RELU()); - AddBuiltin(BuiltinOperator_RELU_N1_TO_1, Register_RELU_N1_TO_1()); - AddBuiltin(BuiltinOperator_RELU6, Register_RELU6()); - AddBuiltin(BuiltinOperator_TANH, Register_TANH()); - AddBuiltin(BuiltinOperator_LOGISTIC, Register_LOGISTIC()); - AddBuiltin(BuiltinOperator_AVERAGE_POOL_2D, Register_AVERAGE_POOL_2D()); - AddBuiltin(BuiltinOperator_MAX_POOL_2D, Register_MAX_POOL_2D()); - AddBuiltin(BuiltinOperator_L2_POOL_2D, Register_L2_POOL_2D()); - AddBuiltin(BuiltinOperator_CONV_2D, Register_CONV_2D()); - AddBuiltin(BuiltinOperator_DEPTHWISE_CONV_2D, Register_DEPTHWISE_CONV_2D()); - AddBuiltin(BuiltinOperator_SVDF, Register_SVDF()); - AddBuiltin(BuiltinOperator_RNN, Register_RNN()); - AddBuiltin(BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN, Register_BIDIRECTIONAL_SEQUENCE_RNN()); - AddBuiltin(BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN, Register_UNIDIRECTIONAL_SEQUENCE_RNN()); - AddBuiltin(BuiltinOperator_EMBEDDING_LOOKUP, Register_EMBEDDING_LOOKUP()); - AddBuiltin(BuiltinOperator_EMBEDDING_LOOKUP_SPARSE, Register_EMBEDDING_LOOKUP_SPARSE()); - AddBuiltin(BuiltinOperator_FULLY_CONNECTED, Register_FULLY_CONNECTED()); - AddBuiltin(BuiltinOperator_LSH_PROJECTION, Register_LSH_PROJECTION()); - AddBuiltin(BuiltinOperator_HASHTABLE_LOOKUP, Register_HASHTABLE_LOOKUP()); - AddBuiltin(BuiltinOperator_SOFTMAX, Register_SOFTMAX()); - AddBuiltin(BuiltinOperator_CONCATENATION, Register_CONCATENATION()); - AddBuiltin(BuiltinOperator_ADD, Register_ADD()); - AddBuiltin(BuiltinOperator_SPACE_TO_BATCH_ND, Register_SPACE_TO_BATCH_ND()); - AddBuiltin(BuiltinOperator_BATCH_TO_SPACE_ND, Register_BATCH_TO_SPACE_ND()); - AddBuiltin(BuiltinOperator_MUL, Register_MUL()); - AddBuiltin(BuiltinOperator_L2_NORMALIZATION, Register_L2_NORMALIZATION()); - AddBuiltin(BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION, Register_LOCAL_RESPONSE_NORMALIZATION()); - AddBuiltin(BuiltinOperator_LSTM, Register_LSTM()); - AddBuiltin(BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM, Register_BIDIRECTIONAL_SEQUENCE_LSTM()); - AddBuiltin(BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM, Register_UNIDIRECTIONAL_SEQUENCE_LSTM()); - AddBuiltin(BuiltinOperator_PAD, Register_PAD()); - AddBuiltin(BuiltinOperator_PADV2, Register_PADV2()); - AddBuiltin(BuiltinOperator_RESHAPE, Register_RESHAPE()); - AddBuiltin(BuiltinOperator_RESIZE_BILINEAR, Register_RESIZE_BILINEAR()); - AddBuiltin(BuiltinOperator_SKIP_GRAM, Register_SKIP_GRAM()); - AddBuiltin(BuiltinOperator_SPACE_TO_DEPTH, Register_SPACE_TO_DEPTH()); - AddBuiltin(BuiltinOperator_GATHER, Register_GATHER()); - AddBuiltin(BuiltinOperator_TRANSPOSE, Register_TRANSPOSE()); - AddBuiltin(BuiltinOperator_MEAN, Register_MEAN()); - AddBuiltin(BuiltinOperator_DIV, Register_DIV()); - AddBuiltin(BuiltinOperator_SUB, Register_SUB()); - AddBuiltin(BuiltinOperator_SPLIT, Register_SPLIT()); - AddBuiltin(BuiltinOperator_SQUEEZE, Register_SQUEEZE()); - AddBuiltin(BuiltinOperator_STRIDED_SLICE, Register_STRIDED_SLICE()); - AddBuiltin(BuiltinOperator_EXP, Register_EXP()); - AddBuiltin(BuiltinOperator_TOPK_V2, Register_TOPK_V2()); - AddBuiltin(BuiltinOperator_LOG_SOFTMAX, Register_LOG_SOFTMAX()); - AddBuiltin(BuiltinOperator_CAST, Register_CAST()); - AddBuiltin(BuiltinOperator_DEQUANTIZE, Register_DEQUANTIZE()); - AddBuiltin(BuiltinOperator_PRELU, Register_PRELU()); - AddBuiltin(BuiltinOperator_MAXIMUM, Register_MAXIMUM()); - AddBuiltin(BuiltinOperator_MINIMUM, Register_MINIMUM()); - AddBuiltin(BuiltinOperator_ARG_MAX, Register_ARG_MAX()); - AddBuiltin(BuiltinOperator_GREATER, Register_GREATER()); - AddBuiltin(BuiltinOperator_GREATER_EQUAL, Register_GREATER_EQUAL()); - AddBuiltin(BuiltinOperator_LESS, Register_LESS()); - AddBuiltin(BuiltinOperator_LESS_EQUAL, Register_LESS_EQUAL()); - AddBuiltin(BuiltinOperator_FLOOR, Register_FLOOR()); - AddBuiltin(BuiltinOperator_NEG, Register_NEG()); - AddBuiltin(BuiltinOperator_SELECT, Register_SELECT()); - AddBuiltin(BuiltinOperator_SLICE, Register_SLICE()); - AddBuiltin(BuiltinOperator_SIN, Register_SIN()); -#ifndef OBS_BUILD - AddBuiltin(BuiltinOperator_SUM, Register_SUM()); - AddBuiltin(BuiltinOperator_REDUCE_MAX, Register_REDUCE_MAX()); - AddBuiltin(BuiltinOperator_REDUCE_MIN, Register_REDUCE_MIN()); - AddBuiltin(BuiltinOperator_TRANSPOSE_CONV, Register_TRANSPOSE_CONV()); - AddBuiltin(BuiltinOperator_SPARSE_TO_DENSE, Register_SPARSE_TO_DENSE()); - AddBuiltin(BuiltinOperator_EQUAL, Register_EQUAL()); - AddBuiltin(BuiltinOperator_NOT_EQUAL, Register_NOT_EQUAL()); - AddBuiltin(BuiltinOperator_SQRT, Register_SQRT()); - AddBuiltin(BuiltinOperator_RSQRT, Register_RSQRT()); - AddBuiltin(BuiltinOperator_SHAPE, Register_SHAPE()); - AddBuiltin(BuiltinOperator_POW, Register_POW()); - AddBuiltin(BuiltinOperator_FAKE_QUANT, Register_FAKE_QUANT(), 1, 2); - AddBuiltin(BuiltinOperator_PACK, Register_PACK()); - AddBuiltin(BuiltinOperator_ONE_HOT, Register_ONE_HOT()); - AddBuiltin(BuiltinOperator_LOGICAL_OR, Register_LOGICAL_OR()); - AddBuiltin(BuiltinOperator_LOGICAL_AND, Register_LOGICAL_AND()); - AddBuiltin(BuiltinOperator_LOGICAL_NOT, Register_LOGICAL_NOT()); - AddBuiltin(BuiltinOperator_UNPACK, Register_UNPACK()); - AddBuiltin(BuiltinOperator_FLOOR_DIV, Register_FLOOR_DIV()); - AddBuiltin(BuiltinOperator_SQUARE, Register_SQUARE()); - AddBuiltin(BuiltinOperator_ZEROS_LIKE, Register_ZEROS_LIKE()); -#endif // OBS_BUILD - - AddCustom("TensorFlowMax", nnfw::tflite::custom::Register_TensorFlowMax()); - AddCustom("SquaredDifference", nnfw::tflite::custom::Register_SquaredDifference()); - AddCustom("TensorFlowSum", nnfw::tflite::custom::Register_TensorFlowSum()); - AddCustom("Abs", nnfw::tflite::custom::Register_Abs()); -} - -} // namespace tflite -} // namespace nnfw diff --git a/libs/tflite/src/ext/nnapi_delegate.cpp b/libs/tflite/src/ext/nnapi_delegate.cpp deleted file mode 100644 index 25858a7b4..000000000 --- a/libs/tflite/src/ext/nnapi_delegate.cpp +++ /dev/null @@ -1,1209 +0,0 @@ -/* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -// NOTE To minimize diff with upstream tensorflow, disable clang-format -// clang-format off - -// NOTE This code is derived from the following file (in TensorFlow v1.12) -// 'externals/tensorflow/tensorflow/contrib/lite/nnapi_delegate.cc' -#include "tflite/ext/nnapi_delegate.h" -#include <fcntl.h> -#include <sys/mman.h> -#include <sys/stat.h> -#include <sys/types.h> -#ifdef OBS_BUILD -#include "tensorflow/contrib/lite/builtin_op_data.h" -#include "tensorflow/contrib/lite/error_reporter.h" -#else -#include "tensorflow/contrib/lite/c/builtin_op_data.h" -#include "tensorflow/contrib/lite/core/api/error_reporter.h" -#endif -#include "tensorflow/contrib/lite/model.h" -#include "NeuralNetworksShim.h" -#include "NeuralNetworksExShim.h" - -#ifdef __ANDROID__ -#include <android/log.h> -#include <sys/system_properties.h> -#endif - -namespace nnfw { -namespace tflite { - -void logError(const char* format, ...) { - // stderr is convenient for native tests, but is not captured for apps - va_list args_for_stderr; - va_start(args_for_stderr, format); - vfprintf(stderr, format, args_for_stderr); - va_end(args_for_stderr); - fprintf(stderr, "\n"); - fflush(stderr); -#ifdef __ANDROID__ - // produce logcat output for general consumption - va_list args_for_log; - va_start(args_for_log, format); - __android_log_vprint(ANDROID_LOG_ERROR, "tflite", format, args_for_log); - va_end(args_for_log); -#endif -} - -#define FATAL(...) \ - logError(__VA_ARGS__); \ - exit(1); - -// TODO(aselle): Change the error model to use status codes. -#define CHECK_TFLITE_SUCCESS(x) \ - if (x != kTfLiteOk) { \ - FATAL("Aborting since tflite returned failure nnapi_delegate.cc:%d.", \ - __LINE__); \ - } - -#define CHECK_NN(x) \ - if (x != ANEURALNETWORKS_NO_ERROR) { \ - FATAL("Aborting since NNAPI returned failure nnapi_delegate.cc:%d", \ - __LINE__); \ - } - -#define RETURN_ERROR_IF_TFLITE_FAILED(x) \ - if (x != kTfLiteOk) { \ - logError( \ - "Returning error since TFLite returned failure nnapi_delegate.cc:%d.", \ - __LINE__); \ - return kTfLiteError; \ - } - -#define RETURN_ERROR_IF_NN_FAILED(x) \ - if (x != ANEURALNETWORKS_NO_ERROR) { \ - logError( \ - "Returning error since NNAPI returned failure nnapi_delegate.cc:%d.", \ - __LINE__); \ - return kTfLiteError; \ - } - -// Tracking of NNAPI operand ids -static const int64_t kOperandIdNotSet = -1; -static const int64_t kOperandNotNeeded = -2; - -namespace { - -int32_t GetAndroidSdkVersion() { -#ifdef __ANDROID__ - const char* sdkProp = "ro.build.version.sdk"; - char sdkVersion[PROP_VALUE_MAX]; - int length = __system_property_get(sdkProp, sdkVersion); - if (length != 0) { - for (int i = 0; i < length; ++i) { - int digit = sdkVersion[i] - '0'; - if (digit < 0 || digit > 9) { - // Non-numeric SDK version, assume it's higher then expected; - return 0xFFFF; - } - } - return atoi(sdkVersion); - } - FATAL("No %s prop", sdkProp); -#endif // __ANDROID__ - return 0; -} - -int32_t GetAndroidSdkVersionCached() { - static int32_t androidSdkVersion = GetAndroidSdkVersion(); - return androidSdkVersion; -} - -static const uint32_t dimension_for_scalar[1] = {1}; - -} // namespace - -NNAPIAllocation::NNAPIAllocation(const char* filename, - ::tflite::ErrorReporter* error_reporter) - : MMAPAllocation(filename, error_reporter) { - if (mmapped_buffer_ != MAP_FAILED) - CHECK_NN(ANeuralNetworksMemory_createFromFd(buffer_size_bytes_, PROT_READ, - mmap_fd_, 0, &handle_)); -} - -NNAPIAllocation::~NNAPIAllocation() { - if (handle_) { - ANeuralNetworksMemory_free(handle_); - } -} - -NNAPIDelegate::~NNAPIDelegate() { - if (nn_compiled_model_) { - ANeuralNetworksCompilation_free(nn_compiled_model_); - nn_compiled_model_ = nullptr; - } - if (nn_model_) { - ANeuralNetworksModel_free(nn_model_); - nn_model_ = nullptr; - // TODO(aselle): Is this thread-safe and callable multiple times? - } - // ANeuralNetworksShutdown(); -} - -// Adds the tensors of the interpreter to the NN API model. -TfLiteStatus addTensorOperands(::tflite::Interpreter* interpreter, - ANeuralNetworksModel* nn_model, - uint32_t* no_of_operands_added, - std::vector<int64_t>* nnapi_ids) { - uint32_t next_id = 0; - for (size_t i = 0; i < interpreter->tensors_size(); i++) { - // Skip temporaries and RNN back-edges. - if ((*nnapi_ids)[i] == kOperandNotNeeded) continue; - - (*nnapi_ids)[i] = int64_t(next_id); - - int32_t nn_type = 0; - // NNAPI requires 32-bit float scale to be zero, tflite doesn't care - float scale = 0.0f; - int32_t zeroPoint = 0; - TfLiteTensor* tensor = interpreter->tensor(i); - switch (tensor->type) { - case kTfLiteNoType: - // Tensors added during initialization of Ops don't have a type yet and - // should not be registered with the NNAPI. - continue; - case kTfLiteFloat32: - nn_type = ANEURALNETWORKS_TENSOR_FLOAT32; - break; - case kTfLiteUInt8: - nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM; - scale = tensor->params.scale; - // FIXME The next line is a workaround because currently zero scale is - // passed down from TF - // Lite. Note that the latest NeuralNetworks.h (see - // https://android.googlesource.com/platform/frameworks/ml/+/master/nn/runtime/include/NeuralNetworks.h) - // requires scale to be greater than zero. Remove this workaround - // when the scale - // value is correctly passed. - scale = (scale == 0.0f) ? 1.0f : scale; - zeroPoint = tensor->params.zero_point; - break; - case kTfLiteInt32: - nn_type = ANEURALNETWORKS_TENSOR_INT32; - scale = tensor->params.scale; - zeroPoint = tensor->params.zero_point; - break; - case kTfLiteBool: - // Workaround to pass bool type under NNAPI - // Use bool type using ANEURALNETWORKS_TENSOR_QUANT8_ASYMM with scale = 1.0f and zero_point = 0 - nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM; - scale = 1.0f; - zeroPoint = 0; - break; - default: - logError("Unsupported tensor type %d", tensor->type); - return kTfLiteError; - } - if (tensor->dims->size == 0) { - // WORKAROUND Some model have dimension zero - switch (tensor->type) { - case kTfLiteFloat32: - nn_type = ANEURALNETWORKS_TENSOR_FLOAT32; - break; - case kTfLiteInt32: - nn_type = ANEURALNETWORKS_TENSOR_INT32; - break; - default: - logError("NNAPI doesn't support tensors with rank 0 (index %d name %s)", - i, tensor->name); - return kTfLiteError; - } - } - if (tensor->dims->size > 4) { - logError("NNAPI doesn't support tensors with rank > 4 (index %d name %s)", - i, tensor->name); - return kTfLiteError; - } - // TODO(aselle): Note, many of these are intermediate results. Do I need - // to ever specify these sizes. I am currently below doing setValue - // on all of them, but I shouldn't in the future. - // Answer(jeanluc): If all the operators can set the dimension correctly, - // you won't need to. - ANeuralNetworksOperandType operand_type{ - nn_type, static_cast<uint32_t>(tensor->dims->size), - reinterpret_cast<uint32_t*>(tensor->dims->data), scale, zeroPoint}; - if (tensor->dims->size == 0) { - // WORKAROUND Some model have dimension zero - // Consider scalar as vector size 1 - operand_type.dimensions = dimension_for_scalar; - operand_type.dimensionCount = 1; - } - RETURN_ERROR_IF_NN_FAILED( - ANeuralNetworksModel_addOperand(nn_model, &operand_type)); - // TODO(aselle): Based on Michael's suggestion, limiting this to read - // only memory - if (tensor->allocation_type == kTfLiteMmapRo) { - if (const NNAPIAllocation* alloc = dynamic_cast<const NNAPIAllocation*>( - static_cast<const ::tflite::Allocation*>(tensor->allocation))) { - RETURN_ERROR_IF_NN_FAILED( - ANeuralNetworksModel_setOperandValueFromMemory( - nn_model, next_id, alloc->memory(), - alloc->offset(tensor->data.raw), tensor->bytes)); - } else { - RETURN_ERROR_IF_NN_FAILED(ANeuralNetworksModel_setOperandValue( - nn_model, next_id, tensor->data.raw, tensor->bytes)); - } - } else if (tensor->bytes == 0) { - // These size 0 tensors are optional tensors reserved. - RETURN_ERROR_IF_NN_FAILED( - ANeuralNetworksModel_setOperandValue(nn_model, next_id, nullptr, 0)); - } - - ++next_id; - } - *no_of_operands_added = next_id; - return kTfLiteOk; -} - -void MapAndAddTensorIds(const int* from_ids_buf, size_t from_ids_count, - std::vector<uint32_t>* into, - const std::vector<int64_t>& map) { - for (size_t i = 0; i < from_ids_count; i++) { - int from_id = from_ids_buf[i]; - if (from_id == kOptionalTensor) { - into->push_back(from_id); - } else { - into->push_back(map[from_id]); - } - } -} - -// Adds the operations and their parameters to the NN API model. -// 'next-id' is the operand ID of the next operand of the model. -TfLiteStatus AddOpsAndParams( - ::tflite::Interpreter* interpreter, ANeuralNetworksModel* nn_model, - uint32_t next_id, std::vector<int>* model_state_inputs, - std::vector<int>* model_state_outputs, - const std::vector<int64_t>& tensor_id_to_nnapi_id) { - for (size_t i = 0; i < interpreter->nodes_size(); i++) { - const auto* node_and_registration = interpreter->node_and_registration(i); - const TfLiteNode& node = node_and_registration->first; - const TfLiteRegistration& registration = node_and_registration->second; - ::tflite::BuiltinOperator builtin = - static_cast<::tflite::BuiltinOperator>(registration.builtin_code); - - // Add the parameters. - std::vector<uint32_t> augmented_inputs, augmented_outputs; - MapAndAddTensorIds(node.inputs->data, node.inputs->size, &augmented_inputs, - tensor_id_to_nnapi_id); - MapAndAddTensorIds(node.outputs->data, node.outputs->size, - &augmented_outputs, tensor_id_to_nnapi_id); - - auto add_scalar_int32 = [&nn_model, &augmented_inputs, - &next_id](int value) { - ANeuralNetworksOperandType operand_type{.type = ANEURALNETWORKS_INT32}; - CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type)) - CHECK_NN(ANeuralNetworksModel_setOperandValue(nn_model, next_id, &value, - sizeof(int32_t))) - augmented_inputs.push_back(next_id++); - }; - - auto add_scalar_float32 = [&nn_model, &augmented_inputs, - &next_id](float value) { - ANeuralNetworksOperandType operand_type{.type = ANEURALNETWORKS_FLOAT32}; - CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type)) - CHECK_NN(ANeuralNetworksModel_setOperandValue(nn_model, next_id, &value, - sizeof(float))) - augmented_inputs.push_back(next_id++); - }; - - auto add_vector_int32 = [&](const int* values, uint32_t num_values) { - ANeuralNetworksOperandType operand_type{ - .type = ANEURALNETWORKS_TENSOR_INT32, - .dimensionCount = 1, - .dimensions = &num_values}; - CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type)) - CHECK_NN(ANeuralNetworksModel_setOperandValue( - nn_model, next_id, values, sizeof(int32_t) * num_values)); - augmented_inputs.push_back(next_id++); - }; - - // Handle state tensors of RNN, LSTM, SVDF. - // For each state_out tensor, a corresponding state_in operand needs to be - // created for NNAPI. - auto duplicate_state_tensor_float32 = - [interpreter, &nn_model, &next_id, &augmented_inputs, - &model_state_inputs, &model_state_outputs](int tensor_id) { - const TfLiteTensor* tensor = interpreter->tensor(tensor_id); - ANeuralNetworksOperandType operand_type{ - ANEURALNETWORKS_TENSOR_FLOAT32, - static_cast<uint32_t>(tensor->dims->size), - reinterpret_cast<uint32_t*>(tensor->dims->data), - tensor->params.scale, tensor->params.zero_point}; - CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type)); - augmented_inputs.push_back(next_id); - model_state_inputs->push_back(next_id); - model_state_outputs->push_back(tensor_id); - next_id++; - }; - auto check_and_add_activation = [&add_scalar_int32](int activation) { - if (activation > kTfLiteActRelu6) { - logError("NNAPI only supports RELU, RELU1 and RELU6 activations"); - return kTfLiteError; - } - add_scalar_int32(activation); - return kTfLiteOk; - }; - - auto add_add_params = [&add_scalar_int32](void* data) { - auto* builtin = reinterpret_cast<TfLiteAddParams*>(data); - if (builtin->activation > kTfLiteActRelu6) { - logError("NNAPI only supports RELU, RELU1 and RELU6 activations"); - return kTfLiteError; - } - add_scalar_int32(builtin->activation); - return kTfLiteOk; - }; - - auto add_pooling_params = [&add_scalar_int32, - &check_and_add_activation](void* data) { - auto builtin = reinterpret_cast<TfLitePoolParams*>(data); - add_scalar_int32(builtin->padding); - add_scalar_int32(builtin->stride_width); - add_scalar_int32(builtin->stride_height); - add_scalar_int32(builtin->filter_width); - add_scalar_int32(builtin->filter_height); - return check_and_add_activation(builtin->activation); - }; - - auto add_convolution_params = [&add_scalar_int32, - &check_and_add_activation](void* data) { - auto builtin = reinterpret_cast<TfLiteConvParams*>(data); - add_scalar_int32(builtin->padding); - add_scalar_int32(builtin->stride_width); - add_scalar_int32(builtin->stride_height); - return check_and_add_activation(builtin->activation); - }; - - auto add_depthwise_conv_params = [&add_scalar_int32, - &check_and_add_activation](void* data) { - auto builtin = reinterpret_cast<TfLiteDepthwiseConvParams*>(data); - add_scalar_int32(builtin->padding); - add_scalar_int32(builtin->stride_width); - add_scalar_int32(builtin->stride_height); - add_scalar_int32(builtin->depth_multiplier); - return check_and_add_activation(builtin->activation); - }; - - auto add_fully_connected_params = [&check_and_add_activation](void* data) { - auto builtin = reinterpret_cast<TfLiteFullyConnectedParams*>(data); - return check_and_add_activation(builtin->activation); - }; - - auto add_concatenation_params = [&add_scalar_int32](void* data) { - auto builtin = reinterpret_cast<TfLiteConcatenationParams*>(data); - add_scalar_int32(builtin->axis); - if (builtin->activation != kTfLiteActNone) { - logError("Concatenation does not support fused activation in NNAPI"); - return kTfLiteError; - } - return kTfLiteOk; - }; - - auto add_softmax_params = [&add_scalar_float32](void* data) { - auto builtin = reinterpret_cast<TfLiteSoftmaxParams*>(data); - add_scalar_float32(builtin->beta); - }; - - auto add_space_to_depth_params = [&add_scalar_int32](void* data) { - auto builtin = reinterpret_cast<TfLiteSpaceToDepthParams*>(data); - add_scalar_int32(builtin->block_size); - }; - - auto add_lstm_params = [&add_scalar_int32, - &add_scalar_float32](void* data) { - auto builtin = reinterpret_cast<TfLiteLSTMParams*>(data); - add_scalar_int32(builtin->activation); - add_scalar_float32(builtin->cell_clip); - add_scalar_float32(builtin->proj_clip); - }; - - // LSTM in NNAPI requires scratch tensor as an output operand. - auto add_lstm_scratch_tensor_float32 = [interpreter, &node, &nn_model, - &next_id, &augmented_outputs]() { - if (node.temporaries->size == 0) return; - int scratch_buffer_index = node.temporaries->data[0]; - const TfLiteTensor* tensor = interpreter->tensor(scratch_buffer_index); - ANeuralNetworksOperandType operand_type{ - ANEURALNETWORKS_TENSOR_FLOAT32, - static_cast<uint32_t>(tensor->dims->size), - reinterpret_cast<uint32_t*>(tensor->dims->data), tensor->params.scale, - tensor->params.zero_point}; - CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type)); - augmented_outputs.insert(augmented_outputs.begin(), next_id++); - }; - - auto add_mean_params = [&add_scalar_int32](void* data) { -#ifdef OBS_BUILD - auto builtin = reinterpret_cast<TfLiteMeanParams*>(data); -#else - auto builtin = reinterpret_cast<TfLiteReducerParams*>(data); -#endif - add_scalar_int32(builtin->keep_dims); - }; - - auto add_svdf_params = [&add_scalar_int32](void* data) { - auto builtin = reinterpret_cast<TfLiteSVDFParams*>(data); - add_scalar_int32(builtin->rank); - add_scalar_int32(builtin->activation); - }; - - auto add_rnn_params = [&add_scalar_int32](void* data) { - auto builtin = reinterpret_cast<TfLiteRNNParams*>(data); - add_scalar_int32(builtin->activation); - }; - - auto add_squeeze_params = [&](void* data) { - const auto* builtin = reinterpret_cast<TfLiteSqueezeParams*>(data); - // Note that we add the squeeze dimensions even if the dimensions were - // unspecified (empty), as NNAPI requires the operand. - add_vector_int32(builtin->squeeze_dims, - static_cast<uint32_t>(builtin->num_squeeze_dims)); - }; - - // Handle optional input tensors. - auto add_optional_tensors = [&nn_model, &augmented_inputs, - &next_id](int nn_type) { - for (size_t idx = 0; idx < augmented_inputs.size(); idx++) { - if (augmented_inputs[idx] == kOptionalTensor) { - const std::vector<uint32_t> dim = {0, 0}; - ANeuralNetworksOperandType operand_type{nn_type, 2, dim.data(), 0, 0}; - CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type)) - CHECK_NN(ANeuralNetworksModel_setOperandValue(nn_model, next_id, - nullptr, 0)) - augmented_inputs[idx] = next_id++; - } - } - }; - - int nnapi_version = 10; -#include "nnapi_delegate_ex_AddOpsAndParams_lambda.inc" - - ANeuralNetworksOperationType nn_op_type; - - // Using namespace directive to minimize diff with upstream tensorflow - namespace tflite = ::tflite; - - switch (builtin) { - case tflite::BuiltinOperator_ADD: - nn_op_type = ANEURALNETWORKS_ADD; - RETURN_ERROR_IF_TFLITE_FAILED(add_add_params(node.builtin_data)); - break; - case tflite::BuiltinOperator_MUL: - nn_op_type = ANEURALNETWORKS_MUL; - RETURN_ERROR_IF_TFLITE_FAILED(add_add_params(node.builtin_data)); - break; - case tflite::BuiltinOperator_AVERAGE_POOL_2D: - RETURN_ERROR_IF_TFLITE_FAILED(add_pooling_params(node.builtin_data)); - nn_op_type = ANEURALNETWORKS_AVERAGE_POOL_2D; - break; - case tflite::BuiltinOperator_MAX_POOL_2D: - RETURN_ERROR_IF_TFLITE_FAILED(add_pooling_params(node.builtin_data)); - nn_op_type = ANEURALNETWORKS_MAX_POOL_2D; - break; - case tflite::BuiltinOperator_L2_POOL_2D: - RETURN_ERROR_IF_TFLITE_FAILED(add_pooling_params(node.builtin_data)); - nn_op_type = ANEURALNETWORKS_L2_POOL_2D; - break; - case tflite::BuiltinOperator_CONV_2D: { - auto builtin = reinterpret_cast<TfLiteConvParams*>(node.builtin_data); - if (builtin->dilation_width_factor != 1 || - builtin->dilation_height_factor != 1 || node.inputs->size != 3) { - logError("NNAPI does not support dilated Conv2D."); - return kTfLiteError; - } - } - RETURN_ERROR_IF_TFLITE_FAILED( - add_convolution_params(node.builtin_data)); - nn_op_type = ANEURALNETWORKS_CONV_2D; - break; - case tflite::BuiltinOperator_RELU: - nn_op_type = ANEURALNETWORKS_RELU; - break; - case tflite::BuiltinOperator_RELU_N1_TO_1: - nn_op_type = ANEURALNETWORKS_RELU1; - break; - case tflite::BuiltinOperator_RELU6: - nn_op_type = ANEURALNETWORKS_RELU6; - break; - case tflite::BuiltinOperator_TANH: - nn_op_type = ANEURALNETWORKS_TANH; - break; - case tflite::BuiltinOperator_FLOOR: - nn_op_type = ANEURALNETWORKS_FLOOR; - break; - case tflite::BuiltinOperator_LOGISTIC: - nn_op_type = ANEURALNETWORKS_LOGISTIC; - break; - case tflite::BuiltinOperator_DEPTHWISE_CONV_2D: - RETURN_ERROR_IF_TFLITE_FAILED( - add_depthwise_conv_params(node.builtin_data)); - nn_op_type = ANEURALNETWORKS_DEPTHWISE_CONV_2D; - break; - case tflite::BuiltinOperator_CONCATENATION: - RETURN_ERROR_IF_TFLITE_FAILED( - add_concatenation_params(node.builtin_data)); - nn_op_type = ANEURALNETWORKS_CONCATENATION; - break; - case tflite::BuiltinOperator_SOFTMAX: - add_softmax_params(node.builtin_data); - nn_op_type = ANEURALNETWORKS_SOFTMAX; - break; - case tflite::BuiltinOperator_FULLY_CONNECTED: - RETURN_ERROR_IF_TFLITE_FAILED( - add_fully_connected_params(node.builtin_data)); - nn_op_type = ANEURALNETWORKS_FULLY_CONNECTED; - break; - case tflite::BuiltinOperator_RESHAPE: - if (node.inputs->size != 2) { - logError("NNAPI only supports 2-input RESHAPE"); - return kTfLiteError; - } - nn_op_type = ANEURALNETWORKS_RESHAPE; - // add_reshape_params(node.builtin_data); - break; - case tflite::BuiltinOperator_RESIZE_BILINEAR: - add_resize_bilinear_params(node.builtin_data); - nn_op_type = ANEURALNETWORKS_RESIZE_BILINEAR; - break; - case tflite::BuiltinOperator_SPACE_TO_DEPTH: - add_space_to_depth_params(node.builtin_data); - nn_op_type = ANEURALNETWORKS_SPACE_TO_DEPTH; - break; - case tflite::BuiltinOperator_LSTM: { - if (node.inputs->size + /* no of params */ 3 != 21) { - logError("NNAPI only supports 21-input LSTMs"); - return kTfLiteError; - } - duplicate_state_tensor_float32( - node.outputs->data[/*kOutputStateTensor*/ 0]); - duplicate_state_tensor_float32( - node.outputs->data[/*kCellStateTensor*/ 1]); - add_lstm_params(node.builtin_data); - add_lstm_scratch_tensor_float32(); - add_optional_tensors(ANEURALNETWORKS_TENSOR_FLOAT32); - nn_op_type = ANEURALNETWORKS_LSTM; - break; - } - case tflite::BuiltinOperator_DEQUANTIZE: - nn_op_type = ANEURALNETWORKS_DEQUANTIZE; - break; - case tflite::BuiltinOperator_SVDF: { - duplicate_state_tensor_float32(node.outputs->data[/*kStateTensor*/ 0]); - add_svdf_params(node.builtin_data); - nn_op_type = ANEURALNETWORKS_SVDF; - break; - } - case tflite::BuiltinOperator_RNN: { - duplicate_state_tensor_float32( - node.outputs->data[/*kHiddenStateTensor*/ 0]); - add_rnn_params(node.builtin_data); - nn_op_type = ANEURALNETWORKS_RNN; - break; - } - case tflite::BuiltinOperator_EMBEDDING_LOOKUP: - nn_op_type = ANEURALNETWORKS_EMBEDDING_LOOKUP; - break; - case tflite::BuiltinOperator_PAD: - nnapi_version = 11; // require NNAPI 1.1 - nn_op_type = ANEURALNETWORKS_PAD; - break; - case tflite::BuiltinOperator_MEAN: - nnapi_version = 11; // require NNAPI 1.1 - add_mean_params(node.builtin_data); - nn_op_type = ANEURALNETWORKS_MEAN; - break; - case tflite::BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION: - nn_op_type = ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION; - add_lrn_params(node.builtin_data); - break; - case tflite::BuiltinOperator_DIV: - nnapi_version = 11; // require NNAPI 1.1 - nn_op_type = ANEURALNETWORKS_DIV; - RETURN_ERROR_IF_TFLITE_FAILED(check_and_add_activation( - reinterpret_cast<TfLiteDivParams*>(node.builtin_data)->activation)); - break; - case tflite::BuiltinOperator_SUB: - nnapi_version = 11; // require NNAPI 1.1 - nn_op_type = ANEURALNETWORKS_SUB; - RETURN_ERROR_IF_TFLITE_FAILED(check_and_add_activation( - reinterpret_cast<TfLiteSubParams*>(node.builtin_data)->activation)); - break; - case tflite::BuiltinOperator_SQUEEZE: - nnapi_version = 11; // requires NNAPI 1.1 - add_squeeze_params(node.builtin_data); - nn_op_type = ANEURALNETWORKS_SQUEEZE; - break; - case tflite::BuiltinOperator_TRANSPOSE: - // The permutation input tensor value dictates the output dimensions. - // TODO(b/110888333): Support dynamically-sized tensors in delegates. - if ((node.inputs->size > 1) && - (interpreter->tensor(node.inputs->data[1])->allocation_type != - kTfLiteMmapRo)) { - logError("NNAPI does not yet support dynamic tensors."); - return kTfLiteError; - } - nnapi_version = 11; // require NNAPI 1.1 - nn_op_type = ANEURALNETWORKS_TRANSPOSE; - break; - case tflite::BuiltinOperator_L2_NORMALIZATION: - nn_op_type = ANEURALNETWORKS_L2_NORMALIZATION; - if (reinterpret_cast<TfLiteL2NormParams*>(node.builtin_data) - ->activation != kTfLiteActNone) { - logError( - "NNAPI does not support L2Normalization with fused activations"); - return kTfLiteError; - } - if ((node.inputs->size > 0) && - (interpreter->tensor(node.inputs->data[0])->dims->size != 4)) { - logError("NNAPI only supports input rank 4 for L2Normalization"); - return kTfLiteError; - } - break; - case tflite::BuiltinOperator_HASHTABLE_LOOKUP: - if (interpreter->tensor(node.outputs->data[0])->type != - kTfLiteFloat32) { - logError("NNAPI only support HASHTABLE_LOOKUP with float32 output", - builtin); - return kTfLiteError; - } - nn_op_type = ANEURALNETWORKS_HASHTABLE_LOOKUP; - break; - case tflite::BuiltinOperator_STRIDED_SLICE: - add_strided_slice_params(node.builtin_data); - nn_op_type = ANEURALNETWORKS_STRIDED_SLICE; - break; - case tflite::BuiltinOperator_SPACE_TO_BATCH_ND: - nnapi_version = 11; // require NNAPI 1.1 - nn_op_type = ANEURALNETWORKS_SPACE_TO_BATCH_ND; - break; - case tflite::BuiltinOperator_BATCH_TO_SPACE_ND: - nnapi_version = 11; // require NNAPI 1.1 - nn_op_type = ANEURALNETWORKS_BATCH_TO_SPACE_ND; - check_batch_to_space_params(); - break; - case tflite::BuiltinOperator_CAST: - CHECK_NN(ANeuralNetworksModel_addOperationEx( - nn_model, ANEURALNETWORKS_CAST_EX, - static_cast<uint32_t>(augmented_inputs.size()), - augmented_inputs.data(), static_cast<uint32_t>(node.outputs->size), - reinterpret_cast<uint32_t*>(node.outputs->data))); - continue; - case tflite::BuiltinOperator_TOPK_V2: - CHECK_NN(ANeuralNetworksModel_addOperationEx( - nn_model, ANEURALNETWORKS_TOPK_V2_EX, - static_cast<uint32_t>(augmented_inputs.size()), - augmented_inputs.data(), static_cast<uint32_t>(node.outputs->size), - reinterpret_cast<uint32_t*>(node.outputs->data))); - continue; - case tflite::BuiltinOperator_GATHER: - add_gather_ex_params(node.builtin_data); - CHECK_NN(ANeuralNetworksModel_addOperationEx( - nn_model, ANEURALNETWORKS_GATHER_EX, - static_cast<uint32_t>(augmented_inputs.size()), - augmented_inputs.data(), static_cast<uint32_t>(node.outputs->size), - reinterpret_cast<uint32_t*>(node.outputs->data))); - continue; - case tflite::BuiltinOperator_SPLIT: - CHECK_NN(ANeuralNetworksModel_addOperationEx( - nn_model, ANEURALNETWORKS_SPLIT_EX, - static_cast<uint32_t>(augmented_inputs.size()), - augmented_inputs.data(), static_cast<uint32_t>(node.outputs->size), - reinterpret_cast<uint32_t*>(node.outputs->data))); - continue; - case tflite::BuiltinOperator_NEG: - CHECK_NN(ANeuralNetworksModel_addOperationEx( - nn_model, ANEURALNETWORKS_NEG_EX, - static_cast<uint32_t>(augmented_inputs.size()), - augmented_inputs.data(), static_cast<uint32_t>(node.outputs->size), - reinterpret_cast<uint32_t*>(node.outputs->data))); - continue; - case tflite::BuiltinOperator_EXP: - CHECK_NN(ANeuralNetworksModel_addOperationEx( - nn_model, ANEURALNETWORKS_EXP_EX, - static_cast<uint32_t>(augmented_inputs.size()), - augmented_inputs.data(), static_cast<uint32_t>(node.outputs->size), - reinterpret_cast<uint32_t*>(node.outputs->data))); - continue; - case tflite::BuiltinOperator_TRANSPOSE_CONV: - add_transpose_conv_params(node.builtin_data); - CHECK_NN(ANeuralNetworksModel_addOperationEx( - nn_model, ANEURALNETWORKS_TRANSPOSE_CONV_EX, - static_cast<uint32_t>(augmented_inputs.size()), - augmented_inputs.data(), static_cast<uint32_t>(node.outputs->size), - reinterpret_cast<uint32_t*>(node.outputs->data))); - continue; - case tflite::BuiltinOperator_PRELU: - CHECK_NN(ANeuralNetworksModel_addOperationEx( - nn_model, ANEURALNETWORKS_PRELU_EX, - static_cast<uint32_t>(augmented_inputs.size()), - augmented_inputs.data(), - static_cast<uint32_t>(node.outputs->size), - reinterpret_cast<uint32_t*>(node.outputs->data))); - continue; - case tflite::BuiltinOperator_ARG_MAX: - CHECK_NN(ANeuralNetworksModel_addOperationEx( - nn_model, ANEURALNETWORKS_ARGMAX_EX, - static_cast<uint32_t>(augmented_inputs.size()), - augmented_inputs.data(), static_cast<uint32_t>(node.outputs->size), - reinterpret_cast<uint32_t*>(node.outputs->data))); - continue; -#ifndef OBS_BUILD - case tflite::BuiltinOperator_PACK: - add_pack_ex_params(node.builtin_data); - CHECK_NN(ANeuralNetworksModel_addOperationEx( - nn_model, ANEURALNETWORKS_PACK_EX, - static_cast<uint32_t>(augmented_inputs.size()), - augmented_inputs.data(), static_cast<uint32_t>(node.outputs->size), - reinterpret_cast<uint32_t*>(node.outputs->data))); - continue; - case tflite::BuiltinOperator_UNPACK: - add_unpack_ex_params(node.builtin_data); - CHECK_NN(ANeuralNetworksModel_addOperationEx( - nn_model, ANEURALNETWORKS_UNPACK_EX, - static_cast<uint32_t>(augmented_inputs.size()), - augmented_inputs.data(), static_cast<uint32_t>(node.outputs->size), - reinterpret_cast<uint32_t*>(node.outputs->data))); - continue; - case tflite::BuiltinOperator_SQRT: - CHECK_NN(ANeuralNetworksModel_addOperationEx( - nn_model, ANEURALNETWORKS_SQRT_EX, - static_cast<uint32_t>(augmented_inputs.size()), - augmented_inputs.data(), - static_cast<uint32_t>(node.outputs->size), - reinterpret_cast<uint32_t*>(node.outputs->data))); - continue; - case tflite::BuiltinOperator_RSQRT: - CHECK_NN(ANeuralNetworksModel_addOperationEx( - nn_model, ANEURALNETWORKS_RSQRT_EX, - static_cast<uint32_t>(augmented_inputs.size()), - augmented_inputs.data(), - static_cast<uint32_t>(node.outputs->size), - reinterpret_cast<uint32_t*>(node.outputs->data))); - continue; - case tflite::BuiltinOperator_EQUAL: - CHECK_NN(ANeuralNetworksModel_addOperationEx( - nn_model, ANEURALNETWORKS_EQUAL_EX, - static_cast<uint32_t>(augmented_inputs.size()), - augmented_inputs.data(), static_cast<uint32_t>(node.outputs->size), - reinterpret_cast<uint32_t*>(node.outputs->data))); - continue; - case tflite::BuiltinOperator_NOT_EQUAL: - CHECK_NN(ANeuralNetworksModel_addOperationEx( - nn_model, ANEURALNETWORKS_NOT_EQUAL_EX, - static_cast<uint32_t>(augmented_inputs.size()), - augmented_inputs.data(), static_cast<uint32_t>(node.outputs->size), - reinterpret_cast<uint32_t*>(node.outputs->data))); - continue; - case tflite::BuiltinOperator_SUM: - CHECK_NN(ANeuralNetworksModel_addOperationEx( - nn_model, ANEURALNETWORKS_REDUCE_SUM_EX, - static_cast<uint32_t>(augmented_inputs.size()), - augmented_inputs.data(), - static_cast<uint32_t>(node.outputs->size), - reinterpret_cast<uint32_t*>(node.outputs->data))); - continue; - case tflite::BuiltinOperator_REDUCE_MAX: - CHECK_NN(ANeuralNetworksModel_addOperationEx( - nn_model, ANEURALNETWORKS_TENSORFLOW_MAX_EX, - static_cast<uint32_t>(augmented_inputs.size()), - augmented_inputs.data(), - static_cast<uint32_t>(node.outputs->size), - reinterpret_cast<uint32_t*>(node.outputs->data))); - continue; - case tflite::BuiltinOperator_REDUCE_MIN: - CHECK_NN(ANeuralNetworksModel_addOperationEx( - nn_model, ANEURALNETWORKS_REDUCE_MIN_EX, - static_cast<uint32_t>(augmented_inputs.size()), - augmented_inputs.data(), - static_cast<uint32_t>(node.outputs->size), - reinterpret_cast<uint32_t*>(node.outputs->data))); - continue; - case tflite::BuiltinOperator_LOGICAL_AND: - CHECK_NN(ANeuralNetworksModel_addOperationEx( - nn_model, ANEURALNETWORKS_LOGICAL_AND_EX, - static_cast<uint32_t>(augmented_inputs.size()), - augmented_inputs.data(), - static_cast<uint32_t>(node.outputs->size), - reinterpret_cast<uint32_t*>(node.outputs->data))); - continue; - case tflite::BuiltinOperator_LOGICAL_OR: - CHECK_NN(ANeuralNetworksModel_addOperationEx( - nn_model, ANEURALNETWORKS_LOGICAL_OR_EX, - static_cast<uint32_t>(augmented_inputs.size()), - augmented_inputs.data(), - static_cast<uint32_t>(node.outputs->size), - reinterpret_cast<uint32_t*>(node.outputs->data))); - continue; -#endif - case tflite::BuiltinOperator_CONCAT_EMBEDDINGS: - case tflite::BuiltinOperator_LSH_PROJECTION: - case tflite::BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN: - case tflite::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN: - case tflite::BuiltinOperator_EMBEDDING_LOOKUP_SPARSE: - case tflite::BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM: - case tflite::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM: - //case tflite::BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION: - case tflite::BuiltinOperator_PADV2: - //case tflite::BuiltinOperator_RESIZE_BILINEAR: - case tflite::BuiltinOperator_CALL: - case tflite::BuiltinOperator_SKIP_GRAM: - //case tflite::BuiltinOperator_RELU_N1_TO_1: - //case tflite::BuiltinOperator_GATHER: - //case tflite::BuiltinOperator_SPACE_TO_BATCH_ND: - //case tflite::BuiltinOperator_BATCH_TO_SPACE_ND: - //case tflite::BuiltinOperator_TOPK_V2: - //case tflite::BuiltinOperator_SPLIT: - //case tflite::BuiltinOperator_STRIDED_SLICE: - //case tflite::BuiltinOperator_EXP: - case tflite::BuiltinOperator_LOG_SOFTMAX: - //case tflite::BuiltinOperator_DEQUANTIZE: - case tflite::BuiltinOperator_DELEGATE: - //case tflite::BuiltinOperator_CAST: - //case tflite::BuiltinOperator_PRELU: - case tflite::BuiltinOperator_MAXIMUM: - case tflite::BuiltinOperator_MINIMUM: -#ifndef OBS_BUILD - case tflite::BuiltinOperator_ARG_MIN: -#endif - case tflite::BuiltinOperator_GREATER: - case tflite::BuiltinOperator_GREATER_EQUAL: - case tflite::BuiltinOperator_LESS: - case tflite::BuiltinOperator_LESS_EQUAL: - //case tflite::BuiltinOperator_NEG: - case tflite::BuiltinOperator_SELECT: - case tflite::BuiltinOperator_SLICE: - case tflite::BuiltinOperator_SIN: - //case tflite::BuiltinOperator_LOG: - //case tflite::BuiltinOperator_TRANSPOSE_CONV: -#ifndef OBS_BUILD - case tflite::BuiltinOperator_TILE: - case tflite::BuiltinOperator_EXPAND_DIMS: - case tflite::BuiltinOperator_SPARSE_TO_DENSE: - //case tflite::BuiltinOperator_EQUAL: - //case tflite::BuiltinOperator_NOT_EQUAL: - //case tflite::BuiltinOperator_SUM: - //case tflite::BuiltinOperator_REDUCE_MAX: - //case tflite::BuiltinOperator_REDUCE_MIN: - case tflite::BuiltinOperator_REDUCE_PROD: - //case tflite::BuiltinOperator_SQRT: - //case tflite::BuiltinOperator_RSQRT: - case tflite::BuiltinOperator_SHAPE: - case tflite::BuiltinOperator_POW: - case tflite::BuiltinOperator_FAKE_QUANT: - //case tflite::BuiltinOperator_PACK: - //case tflite::BuiltinOperator_LOGICAL_OR: - case tflite::BuiltinOperator_ONE_HOT: - //case tflite::BuiltinOperator_LOGICAL_AND: - case tflite::BuiltinOperator_LOGICAL_NOT: - //case tflite::BuiltinOperator_UNPACK: - case tflite::BuiltinOperator_FLOOR_DIV: - case tflite::BuiltinOperator_REDUCE_ANY: - case tflite::BuiltinOperator_SQUARE: - case tflite::BuiltinOperator_ZEROS_LIKE: - case tflite::BuiltinOperator_FILL: -#endif - logError("Op code %d is currently not delegated to NNAPI", builtin); - return kTfLiteError; - break; - case tflite::BuiltinOperator_CUSTOM: { - std::string custom_name(registration.custom_name); - if (custom_name.compare("TensorFlowMax") == 0) { - CHECK_NN(ANeuralNetworksModel_addOperationEx( - nn_model, ANEURALNETWORKS_TENSORFLOW_MAX_EX, - static_cast<uint32_t>(augmented_inputs.size()), - augmented_inputs.data(), - static_cast<uint32_t>(node.outputs->size), - reinterpret_cast<uint32_t*>(node.outputs->data))); - continue; - } - else if (custom_name.compare("SquaredDifference") == 0) { - CHECK_NN(ANeuralNetworksModel_addOperationEx( - nn_model, ANEURALNETWORKS_SQUARED_DIFFERENCE_EX, - static_cast<uint32_t>(augmented_inputs.size()), - augmented_inputs.data(), - static_cast<uint32_t>(node.outputs->size), - reinterpret_cast<uint32_t*>(node.outputs->data))); - continue; - } - else if (custom_name.compare("TensorFlowSum") == 0) { - CHECK_NN(ANeuralNetworksModel_addOperationEx( - nn_model, ANEURALNETWORKS_REDUCE_SUM_EX, - static_cast<uint32_t>(augmented_inputs.size()), - augmented_inputs.data(), - static_cast<uint32_t>(node.outputs->size), - reinterpret_cast<uint32_t*>(node.outputs->data))); - continue; - } - logError("Custom operations are not supported when using NNAPI."); - return kTfLiteError; - break; - } -#ifdef OBS_BUILD - default: - logError("Op code %d is currently not delegated to NNAPI", builtin); - return kTfLiteError; - break; -#endif - } - - //if (nnapi_version == 11 && GetAndroidSdkVersionCached() < 28) { - // FATAL("Op %d needs NNAPI1.1", builtin); - //} - - // Add the operation. - RETURN_ERROR_IF_NN_FAILED(ANeuralNetworksModel_addOperation( - nn_model, nn_op_type, static_cast<uint32_t>(augmented_inputs.size()), - augmented_inputs.data(), - static_cast<uint32_t>(augmented_outputs.size()), - reinterpret_cast<uint32_t*>(augmented_outputs.data()))); - } - return kTfLiteOk; -} - -TfLiteStatus NNAPIDelegate::BuildGraph(::tflite::Interpreter* interpreter) { - if (nn_model_ && nn_compiled_model_) return model_status_; - - // TODO(aselle): This is not correct. need to handle resize invalidation. - if (!nn_model_) { - CHECK_NN(ANeuralNetworksModel_create(&nn_model_)); - - // Find which tensors should be added to NNAPI. TFLite has temporaries - // and RNN back-edges which are are not valid for NNAPI. We look through all - // inputs and outputs and mark the mapping in tensor_id_to_nnapi_id with - // kOperandIdNotSet. addTensorOperands will replace those with the - // corresponding NNAPI operand ids and skip kOperandNotNeeded entries. - std::vector<int64_t> tensor_id_to_nnapi_id(interpreter->tensors_size(), - kOperandNotNeeded); - auto set_ids_to_not_set = [&tensor_id_to_nnapi_id](const int* buf, - size_t count) { - for (int j = 0; j < count; j++) { - auto tensor_id = buf[j]; - if (tensor_id != kOptionalTensor) { - tensor_id_to_nnapi_id[tensor_id] = kOperandIdNotSet; - } - } - }; - for (size_t i = 0; i < interpreter->nodes_size(); i++) { - const auto* node_and_registration = interpreter->node_and_registration(i); - const TfLiteNode& node = node_and_registration->first; - set_ids_to_not_set(node.inputs->data, node.inputs->size); - set_ids_to_not_set(node.outputs->data, node.outputs->size); - } - set_ids_to_not_set(interpreter->inputs().data(), - interpreter->inputs().size()); - set_ids_to_not_set(interpreter->outputs().data(), - interpreter->outputs().size()); - - uint32_t next_id = 0; - RETURN_ERROR_IF_TFLITE_FAILED(addTensorOperands( - interpreter, nn_model_, &next_id, &tensor_id_to_nnapi_id)); - RETURN_ERROR_IF_TFLITE_FAILED( - AddOpsAndParams(interpreter, nn_model_, next_id, &model_states_inputs_, - &model_states_outputs_, tensor_id_to_nnapi_id)); - - std::vector<uint32_t> augmented_inputs; - MapAndAddTensorIds(interpreter->inputs().data(), - interpreter->inputs().size(), &augmented_inputs, - tensor_id_to_nnapi_id); - augmented_inputs.insert(augmented_inputs.end(), - model_states_inputs_.begin(), - model_states_inputs_.end()); - std::vector<uint32_t> augmented_outputs; - MapAndAddTensorIds(interpreter->outputs().data(), - interpreter->outputs().size(), &augmented_outputs, - tensor_id_to_nnapi_id); - MapAndAddTensorIds(model_states_outputs_.data(), - model_states_outputs_.size(), &augmented_outputs, - tensor_id_to_nnapi_id); - - CHECK_NN(ANeuralNetworksModel_identifyInputsAndOutputs( - nn_model_, static_cast<uint32_t>(augmented_inputs.size()), - reinterpret_cast<const uint32_t*>(augmented_inputs.data()), - static_cast<uint32_t>(augmented_outputs.size()), - reinterpret_cast<const uint32_t*>(augmented_outputs.data()))); - - // TODO Support ANeuralNetworksModel_relaxComputationFloat32toFloat16 - //if (GetAndroidSdkVersionCached() >= 28) { - // CHECK_NN(ANeuralNetworksModel_relaxComputationFloat32toFloat16( - // nn_model_, interpreter->GetAllowFp16PrecisionForFp32())); - //} - CHECK_NN(ANeuralNetworksModel_finish(nn_model_)); - } - if (!nn_compiled_model_) { - CHECK_NN(ANeuralNetworksCompilation_create(nn_model_, &nn_compiled_model_)); - CHECK_NN(ANeuralNetworksCompilation_finish(nn_compiled_model_)); - } - return kTfLiteOk; -} - -#include <unordered_map> - -TfLiteStatus NNAPIDelegate::Invoke(::tflite::Interpreter* interpreter) { - if (!nn_model_) { - model_status_ = BuildGraph(interpreter); - if (model_status_ != kTfLiteOk) { - logError("Failed to build graph for NNAPI"); - } - } - if (model_status_ != kTfLiteOk) { - return model_status_; - } - - ANeuralNetworksExecution* execution = nullptr; - CHECK_NN(ANeuralNetworksExecution_create(nn_compiled_model_, &execution)); - - // Allocate temporary buffer to save casted boolean tensor - std::unordered_map<size_t, uint8_t*> input_boolean_tensors; - std::unordered_map<size_t, uint8_t*> output_boolean_tensors; - for (size_t i = 0; i < interpreter->inputs().size(); i++) - { - int input = interpreter->inputs()[i]; - TfLiteTensor* tensor = interpreter->tensor(input); - if (tensor->type == kTfLiteBool) - { - size_t elements = tensor->bytes / sizeof(bool); - uint8_t* temp_tensor = new uint8_t[tensor->bytes / sizeof(bool)]; - input_boolean_tensors[i] = temp_tensor; - for (size_t idx = 0; idx < elements; idx++) - { - temp_tensor[idx] = (tensor->data.b[idx] ? 0x00 : 0xff); - } - } - } - for (size_t i = 0; i < interpreter->outputs().size(); i++) - { - int output = interpreter->outputs()[i]; - TfLiteTensor* tensor = interpreter->tensor(output); - if (tensor->type == kTfLiteBool) - { - uint8_t* temp_tensor = new uint8_t[tensor->bytes / sizeof(bool)]; - output_boolean_tensors[i] = temp_tensor; - } - } - - // Currently perform deep copy of input buffer - for (size_t i = 0; i < interpreter->inputs().size(); i++) { - int input = interpreter->inputs()[i]; - // TODO(aselle): Is this what we want or do we want input instead? - // TODO(aselle): This should be called setInputValue maybe to be cons. - TfLiteTensor* tensor = interpreter->tensor(input); - if (tensor->type == kTfLiteBool) - { - CHECK_NN(ANeuralNetworksExecution_setInput( - execution, i, nullptr, input_boolean_tensors[i], tensor->bytes * sizeof(uint8_t) / sizeof(bool))); - } - else - { - CHECK_NN(ANeuralNetworksExecution_setInput( - execution, i, nullptr, tensor->data.raw, tensor->bytes)); - } - } - - // Tell nn api where to place final data. - for (size_t i = 0; i < interpreter->outputs().size(); i++) { - int output = interpreter->outputs()[i]; - TfLiteTensor* tensor = interpreter->tensor(output); - - if (tensor->type == kTfLiteBool) - { - CHECK_NN(ANeuralNetworksExecution_setOutput( - execution, i, nullptr, output_boolean_tensors[i], tensor->bytes * sizeof(uint8_t) / sizeof(bool))); - } - else - { - CHECK_NN(ANeuralNetworksExecution_setOutput( - execution, i, nullptr, tensor->data.raw, tensor->bytes)); - } - } - - // The state_out of previous invocation need to be mapped to state_in of - // current invocation. - for (size_t i = 0; i < model_states_outputs_.size(); i++) { - int state_tensor_idx = model_states_outputs_[i]; - TfLiteTensor* tensor = interpreter->tensor(state_tensor_idx); - // Here we are using a deep copy for state_in tensors so that we are not - // reading and writing into the same buffer during a invocation. - // TODO(miaowang): using double shared buffer to minimize the copies. - CHECK_NN(ANeuralNetworksExecution_setInput( - execution, i + interpreter->inputs().size(), nullptr, tensor->data.raw, - tensor->bytes)); - // Tell NNAPI where to output the state_out. - CHECK_NN(ANeuralNetworksExecution_setOutput( - execution, i + interpreter->outputs().size(), nullptr, tensor->data.raw, - tensor->bytes)); - } - - // Currently use blocking compute. - ANeuralNetworksEvent* event = nullptr; - CHECK_NN(ANeuralNetworksExecution_startCompute(execution, &event)); - CHECK_NN(ANeuralNetworksEvent_wait(event)); - ANeuralNetworksEvent_free(event); - ANeuralNetworksExecution_free(execution); - - // Tell nn api where to place final data. - for (size_t i = 0; i < interpreter->inputs().size(); i++) { - int input = interpreter->inputs()[i]; - TfLiteTensor* tensor = interpreter->tensor(input); - - if (tensor->type == kTfLiteBool) - { - uint8_t* temp_tensor = input_boolean_tensors[i]; - input_boolean_tensors[i] = nullptr; - delete temp_tensor; - } - } - for (size_t i = 0; i < interpreter->outputs().size(); i++) { - int output = interpreter->outputs()[i]; - TfLiteTensor* tensor = interpreter->tensor(output); - - if (tensor->type == kTfLiteBool) - { - uint8_t* temp_tensor = output_boolean_tensors[i]; - size_t elements = tensor->bytes / sizeof(bool); - for (size_t idx = 0; idx < elements; idx++) - { - tensor->data.b[idx] = ((temp_tensor[idx] == 0x00) ? false : true); - } - output_boolean_tensors[i] = nullptr; - delete temp_tensor; - } - } - -#if 0 - printf("From the NN API:\n"); - TfLiteTensor* tensor = interpreter->tensor(interpreter->outputs()[0]); - if (float* data = - interpreter->typed_tensor<float>(interpreter->outputs()[0])) { - size_t num = tensor->bytes / sizeof(float); - for (float* p = data; p < data + num; p++) { - printf(" %f", *p); - } - printf("\n"); - } -#endif - - return kTfLiteOk; -} - -bool NNAPIDelegate::IsSupported() { return nnfw::NNAPIExists(); } - -} // namespace tflite -} // namespace nnfw - -// clang-format on diff --git a/libs/tflite/src/ext/nnapi_delegate_ex_AddOpsAndParams_lambda.inc b/libs/tflite/src/ext/nnapi_delegate_ex_AddOpsAndParams_lambda.inc deleted file mode 100644 index a91e4de60..000000000 --- a/libs/tflite/src/ext/nnapi_delegate_ex_AddOpsAndParams_lambda.inc +++ /dev/null @@ -1,106 +0,0 @@ -// This file is included from AddOpsAndParams defined in nnapi_delegate.cc -// and contains lambda for extened implementation to original Tensorflow Lite. - auto add_resize_bilinear_params = [&add_scalar_int32, &interpreter, &augmented_inputs](void* data) { - auto builtin = reinterpret_cast<TfLiteResizeBilinearParams*>(data); - if (builtin->align_corners) { - FATAL("Resize bilinear does not support align corners in NNAPI"); - } - - TfLiteTensor* tensor = interpreter->tensor(augmented_inputs.back()); - assert(tensor->type == kTfLiteInt32); - assert(tensor->bytes == sizeof(int)*2); - augmented_inputs.pop_back(); - - int height = ((int*)(tensor->data.raw))[1]; - int width = ((int*)(tensor->data.raw))[0]; - add_scalar_int32(height); - add_scalar_int32(width); - }; - - auto check_l2normalization_params = [interpreter, &node](void* data) { - auto builtin = reinterpret_cast<TfLiteL2NormParams*>(data); - if (builtin->activation != kTfLiteActNone) { - FATAL("NNAPI does not support L2Normalization with fused activations"); - } - if ((node.inputs->size > 0) && - (interpreter->tensor(node.inputs->data[0])->dims->size != 4)) { - FATAL("NNAPI only supports input rank 4 for L2Normalization"); - } - }; - - auto add_transpose_conv_params = [&add_scalar_int32](void* data) { - auto builtin = reinterpret_cast<TfLiteTransposeConvParams*>(data); - add_scalar_int32(builtin->padding); - add_scalar_int32(builtin->stride_width); - add_scalar_int32(builtin->stride_height); - }; - - auto add_lrn_params = [&add_scalar_int32, - &add_scalar_float32](void* data) { - auto builtin = reinterpret_cast<TfLiteLocalResponseNormParams*>(data); - add_scalar_int32(builtin->radius); - add_scalar_float32(builtin->bias); - add_scalar_float32(builtin->alpha); - add_scalar_float32(builtin->beta); - }; - - auto add_strided_slice_params = [&add_scalar_int32](void* data) { - auto builtin = reinterpret_cast<TfLiteStridedSliceParams*>(data); - add_scalar_int32(builtin->begin_mask); - add_scalar_int32(builtin->end_mask); - // ellipsis_mask and new_axis_mask are not supported on nn runtime - // cf) tflite interpreter supports both operations - if (builtin->ellipsis_mask) { - FATAL("STRIDE_SLICE does not support ellipsis_mask in NNAPI"); - } - if (builtin->new_axis_mask) { - FATAL("STRIDE_SLICE does not support new_axis_mask in NNAPI"); - } - add_scalar_int32(builtin->shrink_axis_mask); - }; - - auto add_gather_ex_params = [&add_scalar_int32](void* data) { - auto builtin = reinterpret_cast<TfLiteGatherParams*>(data); - add_scalar_int32(builtin->axis); - if (builtin->axis != 0) { - FATAL("GATHER does not support axis>0 in NNAPI"); - } - }; - -#ifndef OBS_BUILD - auto add_pack_ex_params = [&add_scalar_int32](void* data) { - auto builtin = reinterpret_cast<TfLitePackParams*>(data); - add_scalar_int32(builtin->values_count); - add_scalar_int32(builtin->axis); - }; - - auto add_unpack_ex_params = [&add_scalar_int32](void* data) { - auto builtin = reinterpret_cast<TfLiteUnpackParams*>(data); - add_scalar_int32(builtin->num); - add_scalar_int32(builtin->axis); - }; -#endif - - auto check_batch_to_space_params = [interpreter, &node, &augmented_inputs]() { - - //If there are 3 inputs, check if crops is having default values {0, 0, 0, 0} - //Else unsupported by NNAPI - - if(augmented_inputs.size() == 3) - { - const uint32_t crops_buffer_index = node.inputs->data[2]; - const TfLiteTensor* crops = interpreter->tensor(crops_buffer_index); - const int *crops_value = crops->data.i32; - - //Check if crops is having default values {0, 0, 0, 0} - if(crops_value[0] != 0 || crops_value[1] != 0 || crops_value[2] != 0 || crops_value[3] != 0) - { - FATAL("BATCH_TO_SPACE_ND does not support Explicit crops in NNAPI"); - } - else - { - //Restrict crops input and pass only other two inputs - augmented_inputs.pop_back(); - } - } - }; diff --git a/libs/tflite/src/interp/FlatBufferBuilder.cpp b/libs/tflite/src/interp/FlatBufferBuilder.cpp deleted file mode 100644 index 4b9cde719..000000000 --- a/libs/tflite/src/interp/FlatBufferBuilder.cpp +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "tflite/interp/FlatBufferBuilder.h" - -#include "tflite/ext/kernels/register.h" - -namespace nnfw -{ -namespace tflite -{ - -std::unique_ptr<::tflite::Interpreter> FlatBufferBuilder::build(void) const -{ - std::unique_ptr<::tflite::Interpreter> interpreter; - - nnfw::tflite::BuiltinOpResolver resolver; - - ::tflite::InterpreterBuilder builder(_model, resolver); - - builder(&interpreter); - - return std::move(interpreter); -} - -} // namespace tflite -} // namespace nnfw diff --git a/libs/tflite/src/interp/FunctionBuilder.cpp b/libs/tflite/src/interp/FunctionBuilder.cpp deleted file mode 100644 index eab940c18..000000000 --- a/libs/tflite/src/interp/FunctionBuilder.cpp +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "tflite/interp/FunctionBuilder.h" - -namespace nnfw -{ -namespace tflite -{ - -std::unique_ptr<::tflite::Interpreter> FunctionBuilder::build(void) const -{ - auto res = std::unique_ptr<::tflite::Interpreter>{new ::tflite::Interpreter}; - - _fn(*res); - - return std::move(res); -} - -} // namespace tflite -} // namespace nnfw |