diff options
author | Chunseok Lee <chunseok.lee@samsung.com> | 2020-07-30 11:40:16 +0900 |
---|---|---|
committer | Chunseok Lee <chunseok.lee@samsung.com> | 2020-07-30 11:40:16 +0900 |
commit | 9e45ab56bd165609118989c0d1bec309c3754560 (patch) | |
tree | 4979e8674abc7d21a6471770c1355e0e6c0e8a3f /compute/ARMComputeEx | |
parent | 05e0ec30a632339a8533082476f27bda31ccde16 (diff) | |
download | nnfw-9e45ab56bd165609118989c0d1bec309c3754560.tar.gz nnfw-9e45ab56bd165609118989c0d1bec309c3754560.tar.bz2 nnfw-9e45ab56bd165609118989c0d1bec309c3754560.zip |
patch for rebase master on release/1.7.0submit/tizen/20200731.060745submit/tizen/20200730.075407submit/tizen/20200730.023729accepted/tizen/unified/20200803.122223tizen_6_m1_backup
Change-Id: Id38b617d325ef7e854995a47f032bdf482a779b3
Diffstat (limited to 'compute/ARMComputeEx')
113 files changed, 1170 insertions, 10246 deletions
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLArgOperationKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLArgOperationKernel.h deleted file mode 100644 index 9699b5c00..000000000 --- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLArgOperationKernel.h +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/** - * @file CLArgOperationKernel.h - * @brief This file defines CLArgOperationKernel - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __ARM_COMPUTE_CLARGOPERATIONKERNEL_H__ -#define __ARM_COMPUTE_CLARGOPERATIONKERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/TypesEx.h" - -namespace arm_compute -{ -class ICLTensor; - -/** - * @brief Class to define interface for the argop kernel. - */ -class CLArgOperationKernel : public ICLKernel -{ -public: - /** - * @brief Default constructor. - */ - CLArgOperationKernel(); - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - * @param [in] copiedInstance Const reference of CLArgOperationKernel to be copied - */ - CLArgOperationKernel(const CLArgOperationKernel &) = delete; - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - * @param [in] copiedInstance Const reference of CLArgOperationKernel to be copied - * @return Reference of this instance - */ - CLArgOperationKernel &operator=(const CLArgOperationKernel &) = delete; - /** - * @brief Allow instances of this class to be moved - * @param [in] movedInstance Rvalue reference of CLArgOperationKernel to be moved - */ - CLArgOperationKernel(CLArgOperationKernel &&) = default; - /** - * @brief Allow instances of this class to be moved - * @param [in] movedInstance Rvalue reference of CLArgOperationKernel to be moved - * @return Reference of this instance - */ - CLArgOperationKernel &operator=(CLArgOperationKernel &&) = default; - /** - * @brief Initialise the kernel's input, output and border mode. - * @param[in] input An input tensor. Data types supported: U8/QASYMM8/S32/F32. - * @param[out] output The output tensor, Data types supported: S32. - * @param[in] axis Axis along which to reduce. It must be sorted and no duplicates. - * @param[in] op Arg operation to perform. - * return N/A - */ - void configure(const ICLTensor *input, ICLTensor *output, const uint32_t axis, ArgOperation op); - /** - * @brief Static function to check if given info will lead to a valid configuration of @ref - * CLArgOperationKernel - * @param[in] input An input tensor info. Data types supported: U8/QASYMM8/S32/F32. - * @param[in] output The output tensor info, Data types supported: S32. - * @param[in] axis Axis along which to reduce. It must be sorted and no duplicates. - * @param[in] op Arg operation to perform. - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const uint32_t axis, - ArgOperation op); - - /* - * @brief Run CLArgOperationKernel op - * @param[in] window Window to be used for in_slice - * @param[in] queue cl::CommandQueue - * @return N/A - */ - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; - uint32_t _axis; -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLARGOPERATIONKERNEL_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLCastKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLCastKernel.h deleted file mode 100644 index b0357fe99..000000000 --- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLCastKernel.h +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/** - * @file CLCastKernel.h - * @ingroup COM_AI_RUNTIME - * @brief This file defines CLCastKernel class - */ - -#ifndef __ARM_COMPUTE_CLCASTKERNEL_H__ -#define __ARM_COMPUTE_CLCASTKERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/TypesEx.h" - -namespace arm_compute -{ -class ICLTensor; - -/** - * @brief Class to define OpenCL kernel for cast operation - */ -class CLCastKernel : public ICLKernel -{ -public: - /** - * @brief Construct CLCastKernel object - */ - CLCastKernel(); - - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers) - */ - CLCastKernel(const CLCastKernel &) = delete; - - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers) - */ - CLCastKernel &operator=(const CLCastKernel &) = delete; - - /** - * @brief Construct CLCastKernel object using default move constructor - * @param[in] CLCastKernel object to move - */ - CLCastKernel(CLCastKernel &&) = default; - - /** - * @brief Allow instances of this class to be moved - * @param[in] CLCastKernel object to move - */ - CLCastKernel &operator=(CLCastKernel &&) = default; - - /** - * @brief Destruct this CLCastKernel object - */ - ~CLCastKernel() = default; - - /** - * @brief Initialise the kernel's input and output. - * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. - * @param[in] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. - * @param[in] input_subtype Sub data type of input. - * @return N/A - */ - void configure(const ICLTensor *input, ICLTensor *output, SubDataType input_subtype); - - /** - * @brief Enqueue the OpenCL kernel to process the given window on the passed OpenCL command - * queue. - * @note The queue is *not* flushed by this method, and therefore the kernel will not have - * been executed by the time this method returns. - * @param[in] window Region on which to execute the kernel. (Must be a valid region of - * the window returned by window()). - * @param[in,out] queue Command queue on which to enqueue the kernel.@return N/A - * @return N/A - */ - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; /**< Source tensor */ - ICLTensor *_output; /**< Destination tensor */ -}; -} // namespace arm_compute -#endif /* __ARM_COMPUTE_CLCASTKERNEL_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h deleted file mode 100644 index 8615cf120..000000000 --- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_CLDEPTHTOSPACEKERNEL_H__ -#define __ARM_COMPUTE_CLDEPTHTOSPACEKERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to perform depthTospace operation */ -class CLDepthToSpaceKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLDepthToSpaceKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDepthToSpaceKernel(const CLDepthToSpaceKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDepthToSpaceKernel &operator=(const CLDepthToSpaceKernel &) = delete; - /** Allow instances of this class to be moved */ - CLDepthToSpaceKernel(CLDepthToSpaceKernel &&) = default; - /** Allow instances of this class to be moved */ - CLDepthToSpaceKernel &operator=(CLDepthToSpaceKernel &&) = default; - /** Default destructor */ - ~CLDepthToSpaceKernel() = default; - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. - * @param[in] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. - */ - void configure(const ICLTensor *input, ICLTensor *output, const int32_t block_size); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; /**< Source tensor */ - ICLTensor *_output; /**< Destination tensor */ -}; - -} // namespace arm_compute -#endif /* __ARM_COMPUTE_CLDEPTHTOSPACEKERNEL_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernelEx.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernelEx.h deleted file mode 100644 index 9321c3677..000000000 --- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernelEx.h +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYKERNELEX_H__ -#define __ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYKERNELEX_H__ - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to multiply matrices - * - * @note This kernel should be used ONLY for Midgard architectures - * - * This kernel performs the following computation: - * - * -# Convert a values from int8 to int32 - * -# Convert b values from int8 to int32 - * -# Compute the int32 matrix product of the resulting a * b and store the result as int32 - * - */ -class CLGEMMLowpMatrixMultiplyKernelEx : public ICLKernel -{ -public: - /** Default Constructor */ - CLGEMMLowpMatrixMultiplyKernelEx(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMLowpMatrixMultiplyKernelEx(const CLGEMMLowpMatrixMultiplyKernelEx &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMLowpMatrixMultiplyKernelEx &operator=(const CLGEMMLowpMatrixMultiplyKernelEx &) = delete; - /** Allow instances of this class to be moved */ - CLGEMMLowpMatrixMultiplyKernelEx(CLGEMMLowpMatrixMultiplyKernelEx &&) = default; - /** Allow instances of this class to be moved */ - CLGEMMLowpMatrixMultiplyKernelEx &operator=(CLGEMMLowpMatrixMultiplyKernelEx &&) = default; - /** Initialise the kernel's input and output. - * - * @note This kernel should be used ONLY for Midgard architectures - * - * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: QASYMM8 - * @param[in] input1 Input tensor containing the RHS matrix. Data type supported: same as @p - * input0 - * @param[out] output Output tensor to store the result of matrix multiplication. Data type - * supported: S32 - * @param[in] gemm_info (Optional) GEMM information used to retrieve the original dimensions of - * the input matrices - */ - void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, - const GEMMReshapeInfo &gemm_info = GEMMReshapeInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref - * CLGEMMLowpMatrixMultiplyKernelEx - * - * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: QASYMM8 - * @param[in] input1 Input tensor containing the RHS matrix. Data type supported: same as @p - * input0 - * @param[in] output Output tensor to store the result of matrix multiplication. Data type - * supported: S32 - * @param[in] gemm_info (Optional) GEMM information used to retrieve the original dimensions of - * the input matrices - * - * @return a status - */ - static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, - const ITensorInfo *output, - const GEMMReshapeInfo &gemm_info = GEMMReshapeInfo()); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input0; - const ICLTensor *_input1; - ICLTensor *_output; - bool _slide_matrix_b; - bool _reinterpret_input_as_3d; - bool _reinterpret_output_as_3d; -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYKERNELEX_H__*/ diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLPReLUKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLPReLUKernel.h deleted file mode 100644 index dd2dbf6a4..000000000 --- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLPReLUKernel.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_CLPRELU_KERNEL_H__ -#define __ARM_COMPUTE_CLPRELU_KERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to calculate PReLU*/ -class CLPReLUKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLPReLUKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers). */ - CLPReLUKernel(const CLPReLUKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers). */ - CLPReLUKernel &operator=(const CLPReLUKernel &) = delete; - /** Allow instances of this class to be moved */ - CLPReLUKernel(CLPReLUKernel &&) = default; - /** Allow instances of this class to be moved */ - CLPReLUKernel &operator=(CLPReLUKernel &&) = default; - /** Initialize the kernel's input, output. - * - * @param[in] input Source tensor1. - * @param[in] alpha Source tensor2. - * @param[out] output Output tensor. - */ - void configure(const ICLTensor *input, const ICLTensor *alpha, ICLTensor *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - - BorderSize border_size() const override; - -private: - const ICLTensor *_input; - const ICLTensor *_alpha; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLPRELU_KERNEL_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h deleted file mode 100644 index 4c0a82ce1..000000000 --- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_CLSPACETODEPTHKERNEL_H__ -#define __ARM_COMPUTE_CLSPACETODEPTHKERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to perform spaceTodepth operation */ -class CLSpaceToDepthKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLSpaceToDepthKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLSpaceToDepthKernel(const CLSpaceToDepthKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLSpaceToDepthKernel &operator=(const CLSpaceToDepthKernel &) = delete; - /** Allow instances of this class to be moved */ - CLSpaceToDepthKernel(CLSpaceToDepthKernel &&) = default; - /** Allow instances of this class to be moved */ - CLSpaceToDepthKernel &operator=(CLSpaceToDepthKernel &&) = default; - /** Default destructor */ - ~CLSpaceToDepthKernel() = default; - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. - * @param[in] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. - */ - void configure(const ICLTensor *input, ICLTensor *output, const int32_t block_size); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; /**< Source tensor */ - ICLTensor *_output; /**< Destination tensor */ -}; - -} // namespace arm_compute -#endif /* __ARM_COMPUTE_CLSPACETODEPTHKERNEL_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h deleted file mode 100644 index 9d174deb5..000000000 --- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLEKERNEL_H__ -#define __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLEKERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the Upsampling layer kernel for transpose convolution on OpenCL. - */ -class CLTransposeConvLayerUpsampleKernel : public ICLKernel -{ -public: - /** Constructor */ - CLTransposeConvLayerUpsampleKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLTransposeConvLayerUpsampleKernel(const CLTransposeConvLayerUpsampleKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLTransposeConvLayerUpsampleKernel & - operator=(const CLTransposeConvLayerUpsampleKernel &) = delete; - /** Default Move Constructor. */ - CLTransposeConvLayerUpsampleKernel(CLTransposeConvLayerUpsampleKernel &&) = default; - /** Default move assignment operator */ - CLTransposeConvLayerUpsampleKernel &operator=(CLTransposeConvLayerUpsampleKernel &&) = default; - /** Default destructor */ - ~CLTransposeConvLayerUpsampleKernel() = default; - - /** Initialise the kernel's input and output. - * - * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32. - * @param[out] output Destination tensor. Data types supported: same as @p input. All but - * the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only - * performed within the XY-plane. - * @param[in] inner_border Top and right inner border sizes. These rows and columns will be - * filled with zero. - * @param[in] info Contains padding and stride information described in @ref - * PadStrideInfo. - */ - void configure(const ICLTensor *input, ICLTensor *output, const BorderSize &inner_border, - const PadStrideInfo &info); - /** Static function to check if given info will lead to a valid configuration of @ref - * CLTransposeConvLayerUpsample - * - * @param[in] input Source tensor info. Data types supported: QASYMM8/F16/F32. - * @param[in] output Destination tensor info. Data types supported: same as @p input. All - * but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is - * only performed within the XY-plane. - * @param[in] inner_border Top and right inner border sizes. These rows and columns will be filled - * with zero. - * @param[in] info Contains padding and stride information described in @ref - * PadStrideInfo. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, - const BorderSize &inner_border, const PadStrideInfo &info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; - BorderSize _inner_border; - PadStrideInfo _info; -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLEKERNEL_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/CPP/kernels/CPPUpsampleKernelEx.h b/compute/ARMComputeEx/arm_compute/core/CPP/kernels/CPPUpsampleKernelEx.h deleted file mode 100644 index d4c9c610a..000000000 --- a/compute/ARMComputeEx/arm_compute/core/CPP/kernels/CPPUpsampleKernelEx.h +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_CPPUPSAMPLEKERNEL_EX_H__ -#define __ARM_COMPUTE_CPPUPSAMPLEKERNEL_EX_H__ - -#include "arm_compute/core/CPP/ICPPKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** CPP kernel to perform tensor upsample. - * - */ -class CPPUpsampleKernelEx : public ICPPKernel -{ -public: - const char *name() const override { return "CPPUpsampleKernelEx"; } - /** Default constructor */ - CPPUpsampleKernelEx(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CPPUpsampleKernelEx(const CPPUpsampleKernelEx &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CPPUpsampleKernelEx &operator=(const CPPUpsampleKernelEx &) = delete; - /** Allow instances of this class to be moved */ - CPPUpsampleKernelEx(CPPUpsampleKernelEx &&) = default; - /** Allow instances of this class to be moved */ - CPPUpsampleKernelEx &operator=(CPPUpsampleKernelEx &&) = default; - /** Default destructor */ - ~CPPUpsampleKernelEx() = default; - - /** Set the input and output of the kernel. - * - * @param[in] input The input tensor to upsample. Data types supported: F32/F16/QASYMM8 - * @param[out] output The output tensor. Data types supported: Same as @p input - * @param[in] info Padding info. - */ - void configure(const ITensor *input, ITensor *output, const PadStrideInfo &info); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - bool is_parallelisable() const override; - -private: - const ITensor *_input; - ITensor *_output; - PadStrideInfo _info; -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CPPUPSAMPLEKERNEL_EX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NECastKernel.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NECastKernel.h deleted file mode 100644 index 4e9f097c2..000000000 --- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NECastKernel.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_NECASTKERNEL_H__ -#define __ARM_COMPUTE_NECASTKERNEL_H__ - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/TypesEx.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the cast layer kernel. */ -class NECastKernel : public INEKernel -{ -public: - const char *name() const override { return "NECastKernel"; } - /** Default constructor */ - NECastKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NECastKernel(const NECastKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NECastKernel &operator=(const NECastKernel &) = delete; - /** Default Move Constructor. */ - NECastKernel(NECastKernel &&) = default; - /** Default move assignment operator */ - NECastKernel &operator=(NECastKernel &&) = default; - /** Default destructor */ - ~NECastKernel() = default; - /** Set input, output tensors. - * - * @param[in] input Source tensor. Data type supported: U8/S8/QASYMM8/U32/S32/F32. - * @param[out] output Destination tensor with the same dimensions of input. Data type supported: - * U8/S8/QASYMM8/U32/S32/F32. - * @param[in] input_subtype Sub data type of input. - */ - void configure(const ITensor *input, ITensor *output, SubDataType input_subtype); - /** Static function to check if given info will lead to a valid configuration of @ref NECastKernel - * - * @param[in] input Input tensor info. Data types supported: U8/S8/QASYMM8/U32/S32/F32. - * @param[in] output Output tensor info. Data types supported: U8/S8/QASYMM8/U32/S32/F32. - * @param[in] input_subtype Sub data type of input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, - SubDataType input_subtype); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; - ITensor *_output; - SubDataType _input_subtype; -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_NECASTKERNEL_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernelEx.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernelEx.h deleted file mode 100644 index b62897e68..000000000 --- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernelEx.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_NEDEPTHTOSPACELAYERKERNELEX_H__ -#define __ARM_COMPUTE_NEDEPTHTOSPACELAYERKERNELEX_H__ - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the depth to space kernel */ -class NEDepthToSpaceLayerKernelEx : public INEKernel -{ -public: - const char *name() const override { return "NEDepthToSpaceLayerKernelEx"; } - /** Default constructor */ - NEDepthToSpaceLayerKernelEx(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDepthToSpaceLayerKernelEx(const NEDepthToSpaceLayerKernelEx &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDepthToSpaceLayerKernelEx &operator=(const NEDepthToSpaceLayerKernelEx &) = delete; - /** Allow instances of this class to be moved */ - NEDepthToSpaceLayerKernelEx(NEDepthToSpaceLayerKernelEx &&) = default; - /** Allow instances of this class to be moved */ - NEDepthToSpaceLayerKernelEx &operator=(NEDepthToSpaceLayerKernelEx &&) = default; - /** Default destructor */ - ~NEDepthToSpaceLayerKernelEx() = default; - /** Initialise the kernel's inputs and output. - * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: - * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. - * @param[out] output Tensor output. Data types supported: same as @p input - * @param[in] block_shape Block shape x value. - */ - void configure(const ITensor *input, ITensor *output, int32_t block_shape); - /** Static function to check if given info will lead to a valid configuration of @ref - * NEDepthToSpaceLayerKernelEx. - * - * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: - * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. - * @param[in] output Tensor output info. Data types supported: same as @p input - * @param[in] block_shape Block shape value. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; /**< Source tensor */ - ITensor *_output; /**< Destination tensor */ - int32_t _block_shape; /**< Block shape */ -}; -} // namespace arm_compute -#endif /* __ARM_COMPUTE_NEDEPTHTOSPACELAYERKERNELEX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEElementwiseUnaryKernelEx.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEElementwiseUnaryKernelEx.h deleted file mode 100644 index 57de78dd8..000000000 --- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEElementwiseUnaryKernelEx.h +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_NEELEMENTWISEUNARYKERNELEX_H__ -#define __ARM_COMPUTE_NEELEMENTWISEUNARYKERNELEX_H__ - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/TypesEx.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for an element-wise unary operation kernel - * - * Element-wise operation is computed by: - * @f[ output(x) = OP(input(x))@f] - * - */ -class NEElementwiseUnaryKernelEx : public INEKernel -{ -public: - const char *name() const override { return "NEElementwiseUnaryKernelEx"; } - /** Default constructor */ - NEElementwiseUnaryKernelEx(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEElementwiseUnaryKernelEx(const NEElementwiseUnaryKernelEx &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEElementwiseUnaryKernelEx &operator=(const NEElementwiseUnaryKernelEx &) = delete; - /** Allow instances of this class to be moved */ - NEElementwiseUnaryKernelEx(NEElementwiseUnaryKernelEx &&) = default; - /** Allow instances of this class to be moved */ - NEElementwiseUnaryKernelEx &operator=(NEElementwiseUnaryKernelEx &&) = default; - /** Default destructor */ - ~NEElementwiseUnaryKernelEx() = default; - - /** Static function to check if given info will lead to a valid configuration of @ref - * NEElementwiseUnaryKernelEx - * - * @param[in] op Arithmetic operation to be executed. - * @param[in] input First tensor input. Data types supported: F16/F32/S32. - * @param[in] output Output tensor. Data types supported: Same as @p input. - */ - void configure(ElementWiseUnaryEx op, const ITensor *input, ITensor *output); - - /** Static function to check if given info will lead to a valid configuration of @ref - * NEElementwiseUnaryKernelEx - * - * @param[in] op Arithmetic operation to be executed. - * @param[in] input First tensor input info. Data types supported: F16/F32/S32. - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * - * @return a Status - */ - static Status validate(ElementWiseUnaryEx op, const ITensorInfo *input, - const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - - /** Common signature for all the specialised arithmetic functions - * - * @param[in] input An input tensor. Data types supported: F16/F32/S32. - * @param[out] output The output tensor. Data types supported: Same as @p input. - * @param[in] window Region on which to execute the kernel. - */ - using ElementwiseUnaryFunction = void(const ITensor *input, ITensor *output, - const Window &window); - -protected: - // Inherited methods overridden: - static Status validate_arguments(const ITensorInfo &input, const ITensorInfo &output); - - /** Function to use for the particular tensor types passed to configure() */ - std::function<void(const ITensor *input, ITensor *output, const Window &window)> _function; - - const ITensor *_input; - ITensor *_output; -}; -} // namespace arm_compute -#endif /* __ARM_COMPUTE_NEELEMENTWISEUNARYKERNELEX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEPReLUKernel.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEPReLUKernel.h deleted file mode 100644 index 722efd3d0..000000000 --- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEPReLUKernel.h +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_NEPRELUKERNEL_H__ -#define __ARM_COMPUTE_NEPRELUKERNEL_H__ - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the kernel to perform Parametric Rectified Linear Unit - * - * Result is computed by: - * @f[ output(x) = alpha * x for x < 0, output(x) = x for x >= 0 @f] - */ -class NEPReLUKernel : public INEKernel -{ -public: - const char *name() const override { return "NEPReLUKernel"; } - /** Default constructor */ - NEPReLUKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEPReLUKernel(const NEPReLUKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEPReLUKernel &operator=(const NEPReLUKernel &) = delete; - /** Allow instances of this class to be moved */ - NEPReLUKernel(NEPReLUKernel &&) = default; - /** Allow instances of this class to be moved */ - NEPReLUKernel &operator=(NEPReLUKernel &&) = default; - /** Initialise the kernel's inputs and output - * - * @param[in] input Input tensor. Data type supported: QASYMM8/F32 - * @param[in] alpha Alpha tensor. Data types supported: Same as @p input - * @param[out] output Output tensor. Data types supported: Same as @p input - */ - void configure(const ITensor *input, const ITensor *alpha, ITensor *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - - /** Static function to check if given info will lead to a valid configuration of @ref - * NEPReLUKernel.h - * - * @param[in] input Input tensor input info. Data types supported: QASYMM8/F32. - * @param[in] alpha Alpha tensor input info. Data types supported: Same as @p input. - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * - * @return a Status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *alpha, - const ITensorInfo *output); - static Status validate_arguments(const ITensorInfo &input, const ITensorInfo &alpha, - const ITensorInfo &output); - -private: - const ITensor *_input; /**< Source tensor */ - const ITensor *_alpha; /**< Alpha tensor */ - ITensor *_output; /**< Destination tensor */ -}; -} // namespace arm_compute -#endif /* __ARM_COMPUTE_NEPRELUKERNEL_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernelEx.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernelEx.h deleted file mode 100644 index 0ffcf6be8..000000000 --- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernelEx.h +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_NESPACETODEPTHLAYERKERNELEX_H__ -#define __ARM_COMPUTE_NESPACETODEPTHLAYERKERNELEX_H__ - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the space to depth kernel */ -class NESpaceToDepthLayerKernelEx : public INEKernel -{ -public: - const char *name() const override { return "NESpaceToDepthLayerKernelEx"; } - /** Default constructor */ - NESpaceToDepthLayerKernelEx(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESpaceToDepthLayerKernelEx(const NESpaceToDepthLayerKernelEx &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESpaceToDepthLayerKernelEx &operator=(const NESpaceToDepthLayerKernelEx &) = delete; - /** Allow instances of this class to be moved */ - NESpaceToDepthLayerKernelEx(NESpaceToDepthLayerKernelEx &&) = default; - /** Allow instances of this class to be moved */ - NESpaceToDepthLayerKernelEx &operator=(NESpaceToDepthLayerKernelEx &&) = default; - /** Default destructor */ - ~NESpaceToDepthLayerKernelEx() = default; - /** Initialise the kernel's inputs and output. - * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: - * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. - * @param[out] output Tensor output. Data types supported: same as @p input - * @param[in] block_shape Block shape value - */ - void configure(const ITensor *input, ITensor *output, int32_t block_shape); - /** Static function to check if given info will lead to a valid configuration of @ref - * NESpaceToDepthLayerKernelEx - * - * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: - * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. - * @param[in] output Tensor output info. Data types supported: same as @p input - * @param[in] block_shape Block shape value - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; /**< Source tensor */ - ITensor *_output; /**< Destination tensor */ - int32_t _block_shape; /**< Block shape */ -}; -} // namespace arm_compute -#endif /* __ARM_COMPUTE_NESPACETODEPTHLAYERKERNELEX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h index 97bc4cea5..cfbd13436 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h +++ b/compute/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h @@ -16,25 +16,14 @@ #ifndef __ARM_COMPUTE_CLFUNCTIONSEX_H__ #define __ARM_COMPUTE_CLFUNCTIONSEX_H__ -#include <arm_compute/runtime/CL/functions/CLArgOperation.h> -#include <arm_compute/runtime/CL/functions/CLBatchToSpaceND.h> #include <arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h> -#include <arm_compute/runtime/CL/functions/CLCast.h> -#include <arm_compute/runtime/CL/functions/CLDepthToSpace.h> #include <arm_compute/runtime/CL/functions/CLEmbeddingLookup.h> #include <arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h> #include <arm_compute/runtime/CL/functions/CLGatherEx.h> #include <arm_compute/runtime/CL/functions/CLHashtableLookup.h> #include <arm_compute/runtime/CL/functions/CLInstanceNormalizationLayerEx.h> -#include <arm_compute/runtime/CL/functions/CLLogicalNot.h> #include <arm_compute/runtime/CL/functions/CLNeg.h> -#include <arm_compute/runtime/CL/functions/CLPixelWiseDivision.h> -#include <arm_compute/runtime/CL/functions/CLPReLU.h> #include <arm_compute/runtime/CL/functions/CLReduceOperation.h> -#include <arm_compute/runtime/CL/functions/CLRNNLayerEx.h> -#include <arm_compute/runtime/CL/functions/CLSpaceToDepth.h> -#include <arm_compute/runtime/CL/functions/CLSplit.h> -#include <arm_compute/runtime/CL/functions/CLStridedSliceEx.h> #include <arm_compute/runtime/CL/functions/CLTopKV2.h> #include <arm_compute/runtime/CL/functions/CLTransposeConvLayer.h> diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgOperation.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgOperation.h deleted file mode 100644 index c37096f7c..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgOperation.h +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/** - * @file CLArgOperation.h - * @ingroup COM_AI_RUNTIME - * @brief This file contains arm_compute::CLArgOperation class - */ - -#ifndef __ARM_COMPUTE_CLARGOPERATION_H__ -#define __ARM_COMPUTE_CLARGOPERATION_H__ - -#include "arm_compute/core/CL/kernels/CLArgOperationKernel.h" -#include "arm_compute/runtime/CL/CLTensor.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/core/TypesEx.h" - -namespace arm_compute -{ -class ICLTensor; - -/** - * @brief Class to execute CLArgOperation operation - */ -class CLArgOperation : public IFunction -{ -public: - /** - * @brief Construct a new CLArgOperation object - */ - CLArgOperation(); - - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers) - */ - CLArgOperation(const CLArgOperation &) = delete; - - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers) - */ - CLArgOperation &operator=(const CLArgOperation &) = delete; - - /** - * @brief Construct a new CLArgOperation object by using copy constructor - * @param[in] CLArgOperation object to move - */ - CLArgOperation(CLArgOperation &&) = default; - - /** - * @brief Assign a CLArgOperation object. - * @param[in] CLArgOperation object to assign. This object will be moved. - */ - CLArgOperation &operator=(CLArgOperation &&) = default; - - /** - * @brief Initialise the kernel's inputs and outputs. - * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S32/F32. - * @param[out] output The result of arg operation. Data types supported: S32. - * @param[in] axis Axis along which to reduce. It must be sorted and no duplicates. - * @param[in] op Arg operation to perform. - * @return N/A - */ - void configure(ICLTensor *input, ICLTensor *output, std::vector<uint32_t> axis, ArgOperation op); - - /** - * @brief Static function to check if given info will lead to a valid configuration - * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S32/F32. - * @param[in] axis Axis along which to reduce. It must be sorted and no duplicates. - * @param[out] output The result of arg operation. Data types supported: S32. - * @param[in] op Arg operation to perform. - * @return a status - */ - static Status validate(const ITensorInfo *input, const std::vector<uint32_t> &axis, - const ITensorInfo *output, ArgOperation op); - /** - * @brief Run the OpenCL kernel for this operation - * @return N/A - */ - void run() override; - -private: - ICLTensor *_input{nullptr}; - ICLTensor *_output{nullptr}; - std::vector<uint32_t> _axis{}; - ArgOperation _arg_op{ArgOperation::MAX}; - - std::unique_ptr<CLTensor[]> _interm_tensors{nullptr}; - std::unique_ptr<CLArgOperationKernel[]> _argop_kernels{nullptr}; - size_t _num_of_kernels{0}; -}; -} -#endif /*__ARM_COMPUTE_CLARGOPERATION_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLBatchToSpaceND.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLBatchToSpaceND.h deleted file mode 100644 index eed5cb8a4..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLBatchToSpaceND.h +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_CLBATCH_TO_SPACE_ND_H__ -#define __ARM_COMPUTE_CLBATCH_TO_SPACE_ND_H__ - -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Basic function to run @ref CLBatchToSpaceNDKernel - * - * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/F16/F32. - * @note The function converts the input tensor to the tensor of the output tensor's type. - */ -class CLBatchToSpaceND : public ICLSimpleFunction -{ -public: - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. - * @param[out] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. - * @param[in] block_size A pointer to an array of integer values specifying block sizes - * for spatial dimension. - */ - void configure(ICLTensor *input, ICLTensor *output, const int32_t *block_size); -}; - -} // namespace arm_compute -#endif /* __ARM_COMPUTE_CLBATCH_TO_SPACE_ND_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLCast.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLCast.h deleted file mode 100644 index ebe0d8a1c..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLCast.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/** - * @file CLCast.h - * @ingroup COM_AI_RUNTIME - * @brief This file contains arm_compute::CLCast class - */ - -#ifndef __ARM_COMPUTE_CLCAST_H__ -#define __ARM_COMPUTE_CLCAST_H__ - -#include "arm_compute/core/TypesEx.h" -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - -namespace arm_compute -{ -class ICLTensor; - -/** - * @brief Class to run @ref CLCastKernel. - * This converts the input tensor to the tensor of the output tensor's type. - */ -class CLCast : public ICLSimpleFunction -{ -public: - /** - * @brief Initialise the kernel's input and output - * @param[in, out] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. - * The input tensor is [in, out] because its TensorInfo might be - * modified inside the kernel. - * @param[out] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. - * @param[in] input_subtype Sub data type of input. - */ - void configure(ICLTensor *input, ICLTensor *output, SubDataType input_subtype); -}; -} -#endif /* __ARM_COMPUTE_CLCAST_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDepthToSpace.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDepthToSpace.h deleted file mode 100644 index d52a538df..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDepthToSpace.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_CLDEPTHTOSPACE_H__ -#define __ARM_COMPUTE_CLDEPTHTOSPACE_H__ - -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Basic function to run @ref CLDepthToSpaceKernel - * - * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/F16/F32. - * @note The function converts the input tensor to the tensor of the output tensor's type. - */ -class CLDepthToSpace : public ICLSimpleFunction -{ -public: - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. - * @param[out] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. - * @param[block_size] block size integer only - */ - void configure(ICLTensor *input, ICLTensor *output, const int32_t block_size); -}; -} // namesace arm_compute - -#endif /* __ARM_COMPUTE_CLDEPTHTOSPACE_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h new file mode 100644 index 000000000..409eaf593 --- /dev/null +++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h @@ -0,0 +1,201 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Copyright (c) 2019-2020 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLDIRECTTRANSPOSECONVLAYER_H__ +#define __ARM_COMPUTE_CLDIRECTTRANSPOSECONVLAYER_H__ + +#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h" +#include "arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h" +#include "arm_compute/runtime/CL/functions/CLReverse.h" +#include "arm_compute/runtime/CL/functions/CLTranspose.h" + +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/IMemoryManager.h" +#include "arm_compute/runtime/MemoryGroup.h" + +#include <memory> + +namespace arm_compute +{ +class ICLTensor; +/** Function to run the deconvolution layer. + * + * Deconvolution Layer is the backward pass of Convolution Layer. First we transform the input + * depending on the stride and pad info and then perform a 1x1 + * convolution pass. Input stride defines how many zeroes we should put between each element of the + * input and pad is the amount of padding. + * + * The relation between input to output is as follows: + * \f[ + * width\_output = (width\_input - 1) \cdot stride\_x - 2 \cdot padding\_x + kernel\_x + * \f] + * \f[ + * height\_output = (height\_input - 1) \cdot stride\_y - 2 \cdot padding\_y + kernel\_y + * \f] + * + * where: + * width_input is the size of the first input dimension. + * height_input is the size of the second input dimension. + * width_output is the size of the first output dimension. + * height_output is the size of the second output dimension. + * kernel_x and kernel_y are the convolution sizes in x and y. + * stride_x and stride_y is the input stride of the first and second dimension. + * + * The weights used by Deconvolution are supposed to be the same as the ones used for Convolution. + * Therefore, it will be necessary to use the weights in the + * reverse order to perform an actual convolution. This is achieved by using @ref CLReverse. + * + * This function calls the following OpenCL kernels/functions: + * + * -# @ref CLDeconvolutionLayerUpsample + * -# @ref CLConvolutionLayer + * + * And the following CPP kernels: + * -# @ref CLReverse + * + */ +class CLDirectTransposeConvLayer : public IFunction +{ +public: + /** Constructor */ + CLDirectTransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDirectTransposeConvLayer(const CLDirectTransposeConvLayer &) = delete; + /** Default move constructor */ + CLDirectTransposeConvLayer(CLDirectTransposeConvLayer &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDirectTransposeConvLayer &operator=(const CLDirectTransposeConvLayer &) = delete; + /** Default move assignment operator */ + CLDirectTransposeConvLayer &operator=(CLDirectTransposeConvLayer &&) = default; + /** Set the input, weights, biases and output tensors. + * + * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an + * optional 4th dimension for batch of inputs. + * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. + * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type + * supported: Same as @p input. + * @param[in] bias (Optional) The biases have one dimension. + * Data type supported: Should match @p input data type, except for + * input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type + * @param[out] output Output tensor. The output has the same number of dimensions as the + * @p input. + * @param[in] info Contains padding and policies to be used in the deconvolution, this + * is decribed in @ref PadStrideInfo. + * @param[in] invalid_right The number of zeros added to right edge of the output. + * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. + * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer, + * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel. + * + */ + void configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, + const PadStrideInfo &info, unsigned int invalid_right, unsigned int invalid_bottom, + const WeightsInfo &weights_info = WeightsInfo()); + /** Set the input, weights, biases and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and + * an optional 4th dimension for batch of inputs. + * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. + * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data + * type supported: Same as @p input. + * @param[in] bias (Optional) The biases have one dimension. + * Data type supported: Should match @p input data type, except for + * input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type + * @param[out] output Output tensor. The output has the same number of dimensions as + * the @p input. + * @param[in] info Contains padding and policies to be used in the deconvolution, + * this is decribed in @ref PadStrideInfo. + * @param[in] invalid_right The number of zeros added to right edge of the output. + * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. + * @param[in] weights_info (Optional) Weights information needed for @ref + * CLConvolutionLayer, specifies if the weights tensor has been reshaped with @ref + * CLWeightsReshapeKernel. + * + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *weights, + const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &info, + unsigned int invalid_right, unsigned int invalid_bottom, + const WeightsInfo &weights_info = WeightsInfo()); + /** Static function to check if given info will lead to a valid configuration of @ref + * CLDirectTransposeConvLayer + * + * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an + * optional 4th dimension for batch of inputs. + * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. + * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data + * type supported: Same as @p input. + * @param[in] bias (Optional) The biases have one dimension. + * Data type supported: Should match @p input data type, except for input + * of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type + * @param[in] output Output tensor info. The output has the same number of dimensions as the + * @p input. + * @param[in] info Contains padding and policies to be used in the deconvolution, this is + * decribed in @ref PadStrideInfo. + * @param[in] invalid_right The number of zeros added to right edge of the output. + * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. + * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer, + * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *weights, + const ITensorInfo *bias, ITensorInfo *output, const PadStrideInfo &info, + unsigned int invalid_right, unsigned int invalid_bottom, + const WeightsInfo &weights_info = WeightsInfo()); + + // Inherited methods overridden: + void run() override; + void prepare() override; + +private: + MemoryGroup _memory_group; + CLDeconvolutionLayerUpsample _scale_f; + CLConvolutionLayer _conv_f; + CLReverse _flip_weights; + + CLTensor _scaled_output; + ICLTensor *_original_weights; + CLTensor _weights_flipped; + CLTensor _flip_axis; + + bool _is_prepared; +}; +} // namespace arm_compute +#endif /* __ARM_COMPUTE_CLDIRECTTRANSPOSECONVLAYER_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h index 1a0284a3e..f3266f688 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h +++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h @@ -50,7 +50,7 @@ #include "arm_compute/core/CL/kernels/CLTransposeKernel.h" #include "arm_compute/runtime/MemoryGroup.h" #include "arm_compute/runtime/CL/CLTensor.h" -#include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.h" +#include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h" namespace arm_compute { @@ -168,7 +168,7 @@ private: CLFullyConnectedHybridLayerReshapeWeights _reshape_weights_kernel; CLScaleFactorSymm8Kernel _scale_factor_kernel; CLQuantizationSymmetricKernel _quant_input_kernel; - CLGEMMLowpMatrixMultiplyCoreEx _mm_gemmlowp; + CLGEMMLowpMatrixMultiplyCore _mm_gemmlowp; CLMultiplyScaleFactorKernel _multiply_scale_kernel; CLGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel; // TODO(COMPMID-1889): Use CLGEMM to // add bias in diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.h deleted file mode 100644 index 68aba74ab..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.h +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYCOREEX_H__ -#define __ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYCOREEX_H__ - -#include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernelEx.h" -#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" -#include "arm_compute/runtime/CL/CLTensor.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/MemoryGroup.h" - -namespace arm_compute -{ -class IMemoryManager; -class ICLTensor; - -/** Basic function to execute GEMMLowpMatrixMultiplyCore on OpenCL. This function calls the - * following OpenCL kernels: - * - * -# @ref CLGEMMLowpMatrixMultiplyKernel (if the parameter "reshape_b_only_on_first_run" of - * GEMMInfo is FALSE) - * -# @ref CLGEMMLowpMatrixAReductionKernel (if the offset of matrix B is not 0) - * -# @ref CLGEMMLowpMatrixBReductionKernel (if the offset of matrix A is not 0) - * -*/ -class CLGEMMLowpMatrixMultiplyCoreEx : public IFunction -{ -public: - /** Constructor */ - CLGEMMLowpMatrixMultiplyCoreEx(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMLowpMatrixMultiplyCoreEx(const CLGEMMLowpMatrixMultiplyCoreEx &) = delete; - /** Default move constructor */ - CLGEMMLowpMatrixMultiplyCoreEx(CLGEMMLowpMatrixMultiplyCoreEx &&) = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMLowpMatrixMultiplyCoreEx &operator=(const CLGEMMLowpMatrixMultiplyCoreEx &) = delete; - /** Default move assignment operator */ - CLGEMMLowpMatrixMultiplyCoreEx &operator=(CLGEMMLowpMatrixMultiplyCoreEx &&) = default; - /** Initialise the kernel's inputs, output - * - * @note GEMMLowp: low precision GEMM kernel. [A * B + C] - * This kernel performs the following computations: - * - * -# Convert a values from QASYMM8 to int32 and add a_offset to each of them. - * -# Convert b values from QASYMM8 to int32 and add b_offset to each of them. - * -# Compute the matrix product of the resulting a * b in int32. - * -# Quantize to uint8 if gemm_info.gemmlowp_output_stage != NONE - * - * @param[in] a First input tensor (Matrix A). Data type supported: QASYMM8. - * @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a - * @param[in] c Third input tensor (Matrix C). It can be a nullptr. Data type supported: - * S32 - * @param[out] output Output tensor. Data type supported: S32 or QASYMM8 if - * gemm_info.gemmlowp_output_stage != NONE - * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped - * and - * if the reshape of matrix B should be executed only for the first run - */ - void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, - const GEMMInfo &gemm_info = GEMMInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref - * CLGEMMLowpMatrixMultiplyCoreEx - * - * @param[in] a First input tensor info (Matrix A). Data type supported: QASYMM8. - * @param[in] b Second input tensor info (Matrix B). Data type supported: same as @p a - * @param[in] c Third input tensor info (Matrix C). It can be a nullptr. Data type - * supported: S32 - * @param[in] output Output tensor info. Data type supported: S32 or QASYMM8 if - * gemm_info.gemmlowp_output_stage != NONE - * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped - * and - * if the reshape of matrix B should be executed only for the first run - * - * @return a status - */ - static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, - const ITensorInfo *output, const GEMMInfo &gemm_info = GEMMInfo()); - - // Inherited methods overridden: - void run() override; - void prepare() override; - -private: - MemoryGroup _memory_group; - - // Kernels used - CLGEMMLowpMatrixMultiplyKernelEx _mm_midgard_kernel; - CLGEMMLowpMatrixAReductionKernel _mtx_a_reduction_kernel; - CLGEMMLowpMatrixBReductionKernel _mtx_b_reduction_kernel; - - // Temporary tensors - CLTensor _vector_sum_col; - CLTensor _vector_sum_row; - - int32_t _a_offset; - int32_t _b_offset; - bool _reshape_b_only_on_first_run; - bool _is_prepared; -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYCOREEX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLLogicalNot.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLLogicalNot.h deleted file mode 100644 index 51216715f..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLLogicalNot.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_CLLOGICALNOT_H__ -#define __ARM_COMPUTE_CLLOGICALNOT_H__ - -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - -namespace arm_compute -{ -class ICLTensor; - -class CLLogicalNot : public ICLSimpleFunction -{ -public: - /** Initialise the function's source and destination. - * - * @param[in] input Source tensor. Data types supported: QASYMM8. - * @param[out] output Output tensor. Data types supported: QASYMM8. - */ - void configure(ICLTensor *input, ICLTensor *output); -}; - -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLLOGICALNOT_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLPReLU.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLPReLU.h deleted file mode 100644 index 7fbe558ff..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLPReLU.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_CLPRELU_H__ -#define __ARM_COMPUTE_CLPRELU_H__ - -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - -namespace arm_compute -{ -class ICLTensor; - -class CLPReLU : public ICLSimpleFunction -{ -public: - /** Initialise the function's source and destination. - * - * @param[in] input. Data types supported: - * QASYMM8/F16/F32. - * @param[in] alpha. Data types supported: - * QASYMM8/F16/F32. - * @param[out] output Output tensor. Data types supported: Same as @p input. - */ - void configure(ICLTensor *input, ICLTensor *alpha, ICLTensor *output); -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLPRELU_H__*/ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLPixelWiseDivision.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLPixelWiseDivision.h deleted file mode 100644 index e83fb01cd..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLPixelWiseDivision.h +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/** - * @file CLPixelWiseDivision.h - * @ingroup COM_AI_RUNTIME - * @brief This file contains arm_compute::CLPixelWiseDivision class - */ -#ifndef __ARM_COMPUTE_CLPIXELWISEDIVISION_H__ -#define __ARM_COMPUTE_CLPIXELWISEDIVISION_H__ - -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - -namespace arm_compute -{ -class ICLTensor; - -/** - * @brief Class to run @ref CLPixelWiseDivisionKernel. - */ -class CLPixelWiseDivision : public ICLSimpleFunction -{ -public: - /** - * @brief Initialise the kernel's inputs, output and convertion policy. - * @param[in, out] input1 An input tensor. Data types supported: U8/S16/F16/F32 - * The input tensor is [in, out] because its TensorInfo might be - * modified inside the kernel in case of broadcasting of dimension 0. - * @param[in, out] input2 An input tensor. Data types supported: same as @p input1. - * The input tensor is [in, out] because its TensorInfo might be - * modified inside the kernel in case of broadcasting of dimension 0. - * @param[out] output The output tensor, Data types supported: same as @p input1. - * Note: U8 requires both inputs to be U8. - * @param[in] scale Scale to apply after multiplication. - * Scale must be positive and its value must be either 1/255 or - * 1/2^n where n is between 0 and 15. - * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate - * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest - * even. - * @return N/A - */ - void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, float scale = 1.f, - ConvertPolicy overflow_policy = ConvertPolicy::WRAP, - RoundingPolicy rounding_policy = RoundingPolicy::TO_ZERO); - - /** - * @brief Static function to check if given info will lead to a valid configuration of @ref - * CLPixelWiseDivision - * @param[in] input1 An input tensor info. Data types supported: U8/S16/F16/F32 - * @param[in] input2 An input tensor info. Data types supported: same as @p input1. - * @param[in] output The output tensor info, Data types supported: same as @p input1. - * Note: U8 requires both inputs to be U8. - * @param[in] scale Scale to apply after multiplication. - * Scale must be positive and its value must be either 1/255 or 1/2^n - * where n is between 0 and 15. - * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate - * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even. - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, - const ITensorInfo *output, float scale = 1.f, - ConvertPolicy overflow_policy = ConvertPolicy::WRAP, - RoundingPolicy rounding_policy = RoundingPolicy::TO_ZERO); -}; -} -#endif /*__ARM_COMPUTE_CLPIXELWISEDIVISION_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLRNNLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLRNNLayerEx.h deleted file mode 100644 index b49cbd873..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLRNNLayerEx.h +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_CLRNN_LAYER_EX_H__ -#define __ARM_COMPUTE_CLRNN_LAYER_EX_H__ - -#include "arm_compute/core/CL/kernels/CLActivationLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLCopyKernel.h" -#include "arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h" -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" -#include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h" -#include "arm_compute/runtime/CL/functions/CLGEMM.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Basic function to run @ref CLRNNLayerEx */ -class CLRNNLayerEx : public IFunction -{ -public: - /** Default constructor */ - CLRNNLayerEx(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - /** Initialize the function - * - * @param[in] input Input is a 2-D tensor of shape [input_size, batch_size]. Data - * types supported: F16/F32 - * @param[in] weights Weights tensor of shape [input_size, num_units] that - * multiplies the input. Data types supported: Same as @p input - * @param[in] recurrent_weights Weights tensor of shape [num_units, num_units] that multiplies - * the current 'state'. Data types supported: Same as @p input - * @param[in] bias Bias vector of shape [num_units]. Data types supported: Same - * as @p input - * @param[out] output Output tensor of shape [num_units, batch_size]. Data types - * supported: Same as @p input - * @param[in,out] hidden_state Output tensor of shape [num_units, batch_size]. Data types - * supported: Same as @p input - * @param[in] info Activation layer parameter. - */ - void configure(const ICLTensor *input, const ICLTensor *weights, - const ICLTensor *recurrent_weights, const ICLTensor *bias, ICLTensor *hidden_state, - ICLTensor *output, ActivationLayerInfo &info); - /** Initialize the function - * - * @param[in] input Input is a 2-D tensor of shape [input_size, batch_size]. Data - * types supported: F16/F32 - * @param[in] weights Weights tensor of shape [input_size, num_units] that multiplies - * the input. Data types supported: Same as @p input - * @param[in] recurrent_weights Weights tensor of shape [num_units, num_units] that multiplies the - * current 'state'. Data types supported: Same as @p input - * @param[in] bias Bias vector of shape [num_units]. Data types supported: Same as @p - * input - * @param[in] output Output tensor of shape [num_units, batch_size]. Data types - * supported: Same as @p input - * @param[in] hidden_state Output tensor of shape [num_units, batch_size]. Data types - * supported: Same as @p input - * @param[in] info Activation layer parameter. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, - const ITensorInfo *recurrent_weights, const ITensorInfo *bias, - const ITensorInfo *hidden_state, const ITensorInfo *output, - const ActivationLayerInfo &info); - - // Inherited methods overridden: - void run() override; - void prepare() override; - -private: - MemoryGroup _memory_group; - CLGEMM _gemm_state_f; - CLSaturatedArithmeticOperationKernel _add_kernel; - CLActivationLayerKernel _activation_kernel; - CLFullyConnectedLayer _fully_connected_kernel; - CLCopyKernel _copy_kernel; - CLTensor _fully_connected_out; - CLTensor _gemm_output; - CLTensor _add_output; - bool _is_prepared; -}; -} -#endif /* __ARM_COMPUTE_CLRNN_LAYER_EX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToDepth.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToDepth.h deleted file mode 100644 index 2090b46fa..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToDepth.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_CLSPACETODEPTH_H__ -#define __ARM_COMPUTE_CLSPACETODEPTH_H__ - -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Basic function to run @ref CLSpaceToDepthKernel - * - * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/F16/F32. - * @note The function converts the input tensor to the tensor of the output tensor's type. - */ -class CLSpaceToDepth : public ICLSimpleFunction -{ -public: - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. - * @param[out] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. - * @param[block_size] block size integer only - */ - void configure(ICLTensor *input, ICLTensor *output, const int32_t block_size); -}; - -} // namespace arm_compute -#endif /* __ARM_COMPUTE_CLSPACETODEPTH_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLStridedSliceEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLStridedSliceEx.h deleted file mode 100644 index 03edd15e6..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLStridedSliceEx.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/** - * @file CLStridedSlice.h - * @ingroup COM_AI_RUNTIME - * @brief This file contains arm_compute::CLStridedSlice and arm_compute::CLStridedSliceCPU class - */ - -#ifndef __ARM_COMPUTE_CLSTRIDEDSLICEEX_H__ -#define __ARM_COMPUTE_CLSTRIDEDSLICEEX_H__ - -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - -namespace arm_compute -{ -class ICLTensor; - -/** - * @brief Class to run @ref CLStridedSliceKernel - */ -class CLStridedSliceEx : public ICLSimpleFunction -{ -public: - /** - * @brief Initialise the kernel's inputs and outputs - * @param[in] input Tensor input. Data type supported: - * U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 - * @param[out] output Output tensor. Data type supported: Same as @p input - * @param[in] beginData 'begin' vector of strided slice operation - * @param[in] endData 'end' vector of strided slice operation - * @param[in] stridesData 'strides' vector of strided slice operation - * @param[in] beginMask If the ith bit is set, begin[i] is ignored - * @param[in] endMask If the ith bit is set, end[i] is ignored - * @param[in] shrinkAxisMask If the ith bit is set, the ith specification shrinks the - * dimensionality by 1, taking on the value at index begin[i] - * @return N/A - */ - void configure(const ICLTensor *input, ICLTensor *output, ICLTensor *beginData, - ICLTensor *endData, ICLTensor *stridesData, int32_t beginMask, int32_t endMask, - int32_t shrinkAxisMask); -}; -} -#endif /*__ARM_COMPUTE_CLSTRIDEDSLICEEX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h index 54a697e69..5fb102e47 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h +++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h @@ -15,7 +15,7 @@ */ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -37,16 +37,11 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ - #ifndef __ARM_COMPUTE_CLTRANSPOSECONVLAYER_H__ #define __ARM_COMPUTE_CLTRANSPOSECONVLAYER_H__ -#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h" -#include "arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h" - -#include "arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h" - -#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h" +#include "arm_compute/runtime/CL/functions/CLGEMMDeconvolutionLayer.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" @@ -54,119 +49,102 @@ namespace arm_compute { -class ICLTensor; -/** Function to run the transpose convolution layer. - * - * @note This layer was copied in order to fix a bug computing to wrong output dimensions. - * - * TransposeConv Layer is the backward pass of Convolution Layer. First we transform the input - * depending on the stride and pad info and then perform a 1x1 - * convolution pass. Input stride defines how many zeroes we should put between each element of the - * input, pad is the amount of padding and finally a is a user - * specified value where a < stride - 1, that increases the padding top and right of the input - * image. - * - * The relation between input to output is as follows: - * \f[ - * width\_output = (width\_input - 1) \cdot stride\_x - \cdot padding\_x + kernel\_x - * \f] - * \f[ - * height\_output = (height\_input - 1) \cdot stride\_y - \cdot padding\_y + kernel\_y - * \f] - * - * where: - * width_input is the size of the first input dimension. - * height_input is the size of the second input dimension. - * width_output is the size of the first output dimension. - * height_output is the size of the second output dimension. - * kernel_x and kernel_y are the convolution sizes in x and y. - * stride_x and stride_y is the input stride of the first and second dimension. - * - * The weights used by Deconvolution are supposed to be the same as the ones used for Convolution. - * Therefore, it will be necessary to use the weights in the - * reverse order to perform an actual convolution. This is achieved by using the @ref - * CPPFlipWeightsKernel. - * - * This function calls the following OpenCL kernels/functions: - * - * -# @ref CLTransposeConvLayerUpsample - * -# @ref CLConvolutionLayer +/** Basic function to compute the deconvolution layer. This function calls the following OpenCL + * kernels/functions: * + * -# @ref CLGEMMDeconvolutionLayer + * -# @ref CLDirectTransposeConvLayer */ class CLTransposeConvLayer : public IFunction { public: - /** Constructor */ + /** Default constructor */ CLTransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLTransposeConvLayer(const CLTransposeConvLayer &) = delete; - /** Default move constructor */ - CLTransposeConvLayer(CLTransposeConvLayer &&) = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLTransposeConvLayer &operator=(const CLTransposeConvLayer &) = delete; - /** Default move assignment operator */ - CLTransposeConvLayer &operator=(CLTransposeConvLayer &&) = default; + /** Set the input, weights, biases and output tensors. * - * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, - * and an optional 4th dimension for batch of inputs. - * Data types supported: QASYMM8/F16/F32. - * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. - * Data type supported: Same as @p input. - * @param[in] bias (Optional) The biases have one dimension. Data type supported: - * Same as @p input. - * @param[out] output Output tensor. The output has the same number of dimensions - * as the @p input. - * @param[in] info Contains padding and policies to be used in the - * transpose convolution, this is decribed in @ref PadStrideInfo. - * @param[in] invalid_right The number of zeros added to right edge of the output. - * @param[in] invalid_bottom The number of zeros added to top edge of the output. - * @param[in] weights_info (Optional) Weights information needed for @ref - * CLConvolutionLayer, specifies if the weights tensor has been - * reshaped with @ref CLWeightsReshapeKernel. + * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an + * optional 4th dimension for batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. + * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type + * supported: Same as @p input. + * @param[in] bias (Optional) The biases have one dimension. Data type supported: Same + * as @p input. + * @param[out] output Output tensor. The output has the same number of dimensions as the + * @p input. + * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this + * is described in @ref PadStrideInfo. + * @param[in] invalid_right The number of zeros added to right edge of the output. + * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. + * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer, + * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel. + * */ void configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, - const PadStrideInfo &info, unsigned int invalid_right, unsigned int invalid_bottom, + const PadStrideInfo &deconv_info, unsigned int invalid_right, + unsigned int invalid_bottom, const WeightsInfo &weights_info = WeightsInfo()); + /** Set the input, weights, biases and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and + * an optional 4th dimension for batch of inputs. Data types supported: + * QASYMM8_SIGNED/QASYMM8/F16/F32. + * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data + * type supported: Same as @p input. + * @param[in] bias (Optional) The biases have one dimension. Data type supported: + * Same as @p input. + * @param[out] output Output tensor. The output has the same number of dimensions as + * the @p input. + * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, + * this is described in @ref PadStrideInfo. + * @param[in] invalid_right The number of zeros added to right edge of the output. + * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. + * @param[in] weights_info (Optional) Weights information needed for @ref + * CLConvolutionLayer, specifies if the weights tensor has been reshaped with @ref + * CLWeightsReshapeKernel. + * + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *weights, + const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &deconv_info, + unsigned int invalid_right, unsigned int invalid_bottom, const WeightsInfo &weights_info = WeightsInfo()); /** Static function to check if given info will lead to a valid configuration of @ref - * CLTransposeConvLayer + * CLTransposeConvLayer + * + * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an + * optional 4th dimension for batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. + * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data + * type supported: Same as @p input. + * @param[in] bias (Optional) The biases have one dimension. Data type supported: Same as + * @p input. + * @param[in] output Output tensor info. The output has the same number of dimensions as the + * @p input. + * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this is + * described in @ref PadStrideInfo. + * @param[in] invalid_right The number of zeros added to right edge of the output. + * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. + * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer, + * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel. * - * @param[in] input Input tensor info. 3 lower dimensions represent a single input, - * and an optional 4th dimension for batch of inputs. - * Data types supported: QASYMM8/F16/F32. - * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. - * Data type supported: Same as @p input. - * @param[in] bias (Optional) The biases have one dimension. Data type supported: - * Same as @p input. - * @param[in] output Output tensor info. The output has the same number of dimensions - * as the @p input. - * @param[in] info Contains padding and policies to be used in the - * transpose convolution, this is decribed in @ref PadStrideInfo. - * @param[in] innvalid_right The number of zeros added to right edge of the output. - * @param[in] invalid_bottom The number of zeros added to top edge of the output. - * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer, - * specifies if the weights tensor has been reshaped with @ref - * CLWeightsReshapeKernel. * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *weights, - const ITensorInfo *bias, ITensorInfo *output, const PadStrideInfo &info, - unsigned int innvalid_right, unsigned int invalid_bottom, + const ITensorInfo *bias, ITensorInfo *output, + const PadStrideInfo &deconv_info, unsigned int invalid_right, + unsigned int invalid_bottom, const WeightsInfo &weights_info = WeightsInfo()); + static DeconvolutionMethod + get_deconvolution_method(const ITensorInfo *input, const ITensorInfo *weights, + const ITensorInfo *bias, ITensorInfo *output, + const PadStrideInfo &deconv_info, unsigned int invalid_right, + unsigned int invalid_bottom, const WeightsInfo &weights_info); // Inherited methods overridden: void run() override; void prepare() override; private: - MemoryGroup _memory_group; - CLTransposeConvLayerUpsample _scale_f; - CLConvolutionLayer _conv_f; - CPPFlipWeightsKernel _flip_weights; - CLTensor _scaled_output; - ICLTensor *_original_weights; - CLTensor _weights_flipped; - bool _is_prepared; + std::shared_ptr<IMemoryManager> _memory_manager; + std::unique_ptr<IFunction> _function; }; -} +} // namespace arm_compute #endif /* __ARM_COMPUTE_CLTRANSPOSECONVLAYER_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h deleted file mode 100644 index 7570fe76d..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLE_H__ -#define __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLE_H__ - -#include "arm_compute/runtime/IFunction.h" - -#include "arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/IMemoryManager.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Basic function to run @ref CLTransposeConvLayerUpsampleKernel */ -class CLTransposeConvLayerUpsample : public IFunction -{ -public: - /** Default constructor */ - CLTransposeConvLayerUpsample(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLTransposeConvLayerUpsample(const CLTransposeConvLayerUpsample &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLTransposeConvLayerUpsample &operator=(const CLTransposeConvLayerUpsample &) = delete; - /** Allow instances of this class to be moved */ - CLTransposeConvLayerUpsample(CLTransposeConvLayerUpsample &&) = default; - /** Allow instances of this class to be moved */ - CLTransposeConvLayerUpsample &operator=(CLTransposeConvLayerUpsample &&) = default; - /** Default destructor */ - virtual ~CLTransposeConvLayerUpsample() = default; - - /** Initialize the function's source, destination, interpolation type and border_mode. - * - * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. - * @param[out] output Destination tensor. Data type supported: same as @p input. - * @param[in] inner_border The number of zeros added to right and top edges of the input. - * @param[in] info Contains padding and policies to be used in the deconvolution. - */ - void configure(ICLTensor *input, ICLTensor *output, const BorderSize &inner_border, - const PadStrideInfo &info); - /** Static function to check if given info will lead to a valid configuration of @ref - * CLTransposeConvLayerUpsample - * - * @param[in] input Source tensor info. Data type supported: QASYMM8/F16/F32. - * @param[in] output Destination tensor info. Data type supported: same as @p input. - * @param[in] inner_border The number of zeros added to right and top edges of the input. - * @param[in] info Contains padding and policies to be used in the deconvolution. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, - const BorderSize &inner_border, const PadStrideInfo &info); - - // Inherited methods overridden: - void run() override; - -private: - CLTransposeConvLayerUpsampleKernel _upsample; - ICLTensor *_output; -}; -} -#endif /* __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLE_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CPP/functions/CPPUpsampleEx.h b/compute/ARMComputeEx/arm_compute/runtime/CPP/functions/CPPUpsampleEx.h deleted file mode 100644 index 666afef4b..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/CPP/functions/CPPUpsampleEx.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_CPPUPSAMPLE_EX_H__ -#define __ARM_COMPUTE_CPPUPSAMPLE_EX_H__ - -#include "arm_compute/runtime/CPP/ICPPSimpleFunction.h" - -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ITensor; - -/** Basic function to run @ref CPPUpsample */ -class CPPUpsampleEx : public ICPPSimpleFunction -{ -public: - /** Configure the upsample CPP kernel - * - * @param[in] input The input tensor to upsample. Data types supported: F32/F16/QASYMM8 - * @param[out] output The output tensor. Data types supported: Same as @p input - * @param[in] info Padding information - */ - void configure(const ITensor *input, ITensor *output, const PadStrideInfo &info); -}; -} -#endif /* __ARM_COMPUTE_CPPUPSAMPLE_EX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h index 49504fde3..3fad230f1 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h +++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h @@ -18,20 +18,13 @@ #include <arm_compute/runtime/NEON/functions/NEActivationLayerEx.h> #include <arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h> -#include <arm_compute/runtime/NEON/functions/NECast.h> -#include <arm_compute/runtime/NEON/functions/NEDepthToSpaceLayerEx.h> #include <arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h> #include <arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h> #include <arm_compute/runtime/NEON/functions/NEGatherEx.h> #include <arm_compute/runtime/NEON/functions/NEHashtableLookup.h> #include <arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayerEx.h> -#include <arm_compute/runtime/NEON/functions/NEPReLU.h> -#include <arm_compute/runtime/NEON/functions/NEReduceMeanEx.h> #include <arm_compute/runtime/NEON/functions/NEReduceSum.h> -#include <arm_compute/runtime/NEON/functions/NERNNLayerEx.h> #include <arm_compute/runtime/NEON/functions/NEReduceOperation.h> -#include <arm_compute/runtime/NEON/functions/NESpaceToBatchLayerEx.h> -#include <arm_compute/runtime/NEON/functions/NESpaceToDepthLayerEx.h> #include <arm_compute/runtime/NEON/functions/NETransposeConvLayer.h> #endif // __ARM_COMPUTE_NEFUNCTIONSEX_H__ diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NECast.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NECast.h deleted file mode 100644 index f0f0d8114..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NECast.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_NECAST_H__ -#define __ARM_COMPUTE_NECAST_H__ - -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -#include "arm_compute/core/Types.h" -#include "arm_compute/core/TypesEx.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Basic function to run @ref NECastKernel that converts an input tensor to the other types */ -class NECast : public INESimpleFunctionNoBorder -{ -public: - /** Configure the kernel. - * - * @param[in] input Source tensor. Data types supported: U8/S8/QASYMM8/U32/S32/F32. - * @param[out] output Destination tensor with the same dimensions of input. Data type supported: - * U8/S8/QASYMM8/U32/S32/F32. - * @param[in] input_subtype Sub data type of input. - */ - void configure(const ITensor *input, ITensor *output, - SubDataType input_subtype = SubDataType::NONE); - /** Static function to check if given info will lead to a valid configuration of @ref NECast - * - * @param[in] input Input tensor info. Data types supported: U8/S8/QASYMM8/U32/S32/F32. - * @param[in] output Output tensor info. Data type supported: U8/S8/QASYMM8/U32/S32/F32. - * @param[in] input_subtype Sub data type of input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, - SubDataType input_subtype = SubDataType::NONE); -}; -} // namespace arm_compute -#endif /* __ARM_COMPUTE_NECAST_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayerEx.h deleted file mode 100644 index 005d85add..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayerEx.h +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_NEDEPTHTOSPACELAYEREX_H__ -#define __ARM_COMPUTE_NEDEPTHTOSPACELAYEREX_H__ - -#include "arm_compute/runtime/IFunction.h" - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -namespace arm_compute -{ -class ITensor; - -/** Basic function to run @ref NEDepthToSpaceLayerKernelEx. */ -class NEDepthToSpaceLayerEx : public INESimpleFunctionNoBorder -{ -public: - /** Set the input and output tensors. - * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: - * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. - * @param[out] output Tensor output. Data types supported: same as @p input - * @param[in] block_shape Block shape value. - */ - void configure(const ITensor *input, ITensor *output, int32_t block_shape); - /** Static function to check if given info will lead to a valid configuration of @ref - * NEDepthToSpaceLayerEx. - * - * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: - * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. - * @param[in] output Tensor output info. Data types supported: same as @p input - * @param[in] block_shape Block shape x value. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape); -}; -} // namespace arm_compute -#endif /* __ARM_COMPUTE_NEDEPTHTOSPACELAYEREX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayerEx.h deleted file mode 100644 index 27a38e982..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayerEx.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_NEELEMENTWISEUNARYLAYEREX_H__ -#define __ARM_COMPUTE_NEELEMENTWISEUNARYLAYEREX_H__ - -#include "arm_compute/runtime/NEON/INESimpleFunction.h" - -namespace arm_compute -{ -class ITensor; - -/** Basic function to perform negative on an input tensor. */ -class NENegLayer : public INESimpleFunction -{ -public: - /** Initialize the function - * - * @param[in] input Input tensor. Data types supported: F16/F32/S32. - * @param[out] output Output tensor. Data types supported: same as @p input. - */ - void configure(const ITensor *input, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NERsqrtLayer - * - * @param[in] input First tensor input info. Data types supported: F16/F32/S32. - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); -}; -} // namespace arm_compute -#endif /* __ARM_COMPUTE_NEELEMENTWISEUNARYLAYEREX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h index 39c57eb70..56548a479 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h +++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h @@ -46,7 +46,7 @@ #include "arm_compute/core/NEON/kernels/NEMuliplyScaleFactorKernel.h" #include "arm_compute/core/NEON/kernels/NETransposeKernel.h" #include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.h" +#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h" #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" #include "arm_compute/runtime/Tensor.h" @@ -164,7 +164,7 @@ private: MemoryGroup _memory_group; NEFullyConnectedHybridLayerReshapeWeights _reshape_weights_function; NEQuantizationSymmetricKernel _quant_input_kernel; - NEGEMMLowpMatrixMultiplyCoreEx _mm_gemmlowp; + NEGEMMLowpMatrixMultiplyCore _mm_gemmlowp; NEMultiplyScaleFactorKernel _multiply_scale_kernel; NEGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel; Tensor _reshape_weights_output; diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.h deleted file mode 100644 index d844513c9..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.h +++ /dev/null @@ -1,170 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCOREEX_H__ -#define __ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCOREEX_H__ - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -// #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" -#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" -#include "arm_compute/runtime/Tensor.h" - -#include <memory> - -namespace arm_compute -{ -class ITensor; - -/** Basic function to execute GEMMLowpMatrixMultiplyCore on NEON. This function calls the following - * NEON kernels if the DOT product instruction is not available: - * - * -# @ref NEGEMMInterleave4x4Kernel - * -# @ref NEGEMMTranspose1xWKernel - * -# @ref NEGEMMLowpMatrixMultiplyKernel - * -# @ref NEGEMMLowpOffsetContributionKernel - * -# @ref NEActivationLayer - * - * otherwise if the DOT product instruction is available: - * - * -# @ref NEGEMMLowpOffsetContributionKernel - * -*/ -class NEGEMMLowpMatrixMultiplyCoreEx : public IFunction -{ -public: - /** Constructor */ - NEGEMMLowpMatrixMultiplyCoreEx(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGEMMLowpMatrixMultiplyCoreEx(const NEGEMMLowpMatrixMultiplyCoreEx &) = delete; - /** Default move constructor */ - NEGEMMLowpMatrixMultiplyCoreEx(NEGEMMLowpMatrixMultiplyCoreEx &&) = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGEMMLowpMatrixMultiplyCoreEx &operator=(const NEGEMMLowpMatrixMultiplyCoreEx &) = delete; - /** Default move assignment operator */ - NEGEMMLowpMatrixMultiplyCoreEx &operator=(NEGEMMLowpMatrixMultiplyCoreEx &&) = default; - /** Initialise the kernel's inputs, output - * - * @note GEMM_LOWP: low precision GEMM kernel - * This kernel performs the following computations: - * - * -# Convert a values from QASYMM8 to int32 and add a_offset to each of them. - * -# Convert b values from QASYMM8 to int32 add b_offset to each of them. - * -# Compute the matrix product of the resulting a * b in int32. - * - * @note The @p output type is S32 if @p gemm_info.type == GEMMLowpOutputStageType::NONE. It is - * QASYMM8/QASYMM8_SIGNED otherwise - * - * @param[in] a First input tensor (Matrix A). Data type supported: - * QASYMM8/QASYMM8_SIGNED. - * @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a - * @param[in] c Third input tensor (Matrix C). It can be a nullptr. Data type supported: - * S32 - * @param[out] output Output tensor. Data type supported: Data type supported: - * S32/QASYMM8/QASYMM8_SIGNED - * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped - * and - * if the reshape of matrix B should be executed only for the first run - */ - void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *output, - const GEMMInfo &gemm_info = GEMMInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref - * NEGEMMLowpMatrixMultiplyCoreEx - * - * @note The @p output type is S32 if @p gemm_info.type == GEMMLowpOutputStageType::NONE. It is - * QASYMM8/QASYMM8_SIGNED otherwise - * - * @param[in] a First input tensor info (Matrix A). Data type supported: - * QASYMM8/QASYMM8_SIGNED. - * @param[in] b Second input tensor info (Matrix B). Data type supported: same as @p a - * @param[in] c Third input tensor info (Matrix C). It can be a nullptr. Data type - * supported: S32 - * @param[in] output Output tensor info. Data type supported: Data type supported: - * S32/QASYMM8/QASYMM8_SIGNED - * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped - * and - * if the reshape of matrix B should be executed only for the first run - * - * @return a status - */ - static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, - const ITensorInfo *output, const GEMMInfo &gemm_info = GEMMInfo()); - - // Inherited methods overridden - void run() override; - void prepare() override; - -private: - MemoryGroup _memory_group; - NEGEMMAssemblyDispatch _asm_glue; - std::unique_ptr<INEKernel> _mm_kernel; - std::unique_ptr<INEKernel> _mtx_a_reshape_kernel; - std::unique_ptr<INEKernel> _mtx_b_reshape_kernel; - NEGEMMLowpMatrixAReductionKernel _mtx_a_reduction_kernel; - NEGEMMLowpMatrixBReductionKernel _mtx_b_reduction_kernel; - NEGEMMLowpOffsetContributionKernel _offset_contribution_kernel; - NEGEMMLowpOffsetContributionOutputStageKernel _offset_contribution_output_stage_kernel; - - Tensor _vector_sum_col; - Tensor _vector_sum_row; - Tensor _tmp_a; - Tensor _tmp_b; - Tensor _mm_result_s32; - Tensor _signed_a; - Tensor _signed_output; - const ITensor *_original_b; - int32_t _a_offset; - int32_t _b_offset; - - bool _run_vector_matrix_multiplication; - bool _assembly_path; - bool _fused_assembly_path; - bool _reshape_b_only_on_first_run; - bool _is_prepared; - bool _fuse_output_stage; - bool _flip_signedness; -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCOREEX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEPReLU.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEPReLU.h deleted file mode 100644 index ca8413352..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEPReLU.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_NEPRELU_H__ -#define __ARM_COMPUTE_NEPRELU_H__ - -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -namespace arm_compute -{ -class ITensor; - -/** Basic function to run @ref NEPReLUKernel */ -class NEPReLU : public INESimpleFunctionNoBorder -{ -public: - /** Initialise the kernel's inputs and output - * - * @param[in] input. Data types supported: QASYMM8/F32. - * @param[in] alpha. Data types supported: Same as @p input. - * @param[out] output Output tensor. Data types supported: Same as @p input. - */ - void configure(const ITensor *input, const ITensor *alpha, ITensor *output); -}; -} // namespace arm_compute -#endif /* __ARM_COMPUTE_NEPRELU_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NERNNLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NERNNLayerEx.h deleted file mode 100644 index 8a7b17946..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NERNNLayerEx.h +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_NERNNLAYER_EX_H__ -#define __ARM_COMPUTE_NERNNLAYER_EX_H__ - -#include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h" -#include "arm_compute/core/NEON/kernels/NECopyKernel.h" - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h" -#include "arm_compute/runtime/NEON/functions/NEGEMM.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Basic function to run @ref NERNNLayerEx */ -class NERNNLayerEx : public IFunction -{ -public: - /** Default constructor */ - NERNNLayerEx(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NERNNLayerEx(const NERNNLayerEx &) = delete; - /** Default move constructor */ - NERNNLayerEx(NERNNLayerEx &&) = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NERNNLayerEx &operator=(const NERNNLayerEx &) = delete; - /** Default move assignment operator */ - NERNNLayerEx &operator=(NERNNLayerEx &&) = default; - /** Initialize the function - * - * @param[in] input Input is a 2-D tensor of shape [input_size, batch_size]. Data - * types supported: F16/F32 - * @param[in] weights Weights tensor of shape [input_size, num_units] that - * multiplies the input. Data types supported: Same as @p input - * @param[in] recurrent_weights Weights tensor of shape [num_units, num_units] that multiplies - * the current 'state'. Data types supported: Same as @p input - * @param[in] bias Bias vector of shape [num_units]. Data types supported: Same - * as @p input - * @param[out] output Output tensor of shape [num_units, batch_size]. Data types - * supported: Same as @p input - * @param[in,out] hidden_state Output tensor of shape [num_units, batch_size]. Data types - * supported: Same as @p input - * @param[in] info Activation layer parameter. - */ - void configure(const ITensor *input, const ITensor *weights, const ITensor *recurrent_weights, - const ITensor *bias, ITensor *hidden_state, ITensor *output, - ActivationLayerInfo &info); - /** Initialize the function - * - * @param[in] input Input is a 2-D tensor of shape [input_size, batch_size]. Data - * types supported: F16/F32 - * @param[in] weights Weights tensor of shape [input_size, num_units] that multiplies - * the input. Data types supported: Same as @p input - * @param[in] recurrent_weights Weights tensor of shape [num_units, num_units] that multiplies the - * current 'state'. Data types supported: Same as @p input - * @param[in] bias Bias vector of shape [num_units]. Data types supported: Same as @p - * input - * @param[in] output Output tensor of shape [num_units, batch_size]. Data types - * supported: Same as @p input - * @param[in] hidden_state Output tensor of shape [num_units, batch_size]. Data types - * supported: Same as @p input - * @param[in] info Activation layer parameter. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, - const ITensorInfo *recurrent_weights, const ITensorInfo *bias, - const ITensorInfo *hidden_state, const ITensorInfo *output, - const ActivationLayerInfo &info); - - // Inherited methods overridden: - void run() override; - void prepare() override; - -private: - MemoryGroup _memory_group; - NEGEMM _gemm_state_f; - NEArithmeticAdditionKernel _add_kernel; - NEActivationLayerKernel _activation_kernel; - NEFullyConnectedLayer _fully_connected_kernel; - NECopyKernel _copy_kernel; - Tensor _fully_connected_out; - Tensor _gemm_output; - Tensor _add_output; - bool _is_prepared; -}; -} // namespace arm_compute -#endif /* __ARM_COMPUTE_NERNNLAYER_EX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceMeanEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceMeanEx.h deleted file mode 100644 index 03ac45798..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceMeanEx.h +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_NEON_REDUCE_MEAN_EX_H__ -#define __ARM_COMPUTE_NEON_REDUCE_MEAN_EX_H__ - -#include "arm_compute/runtime/IFunction.h" - -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NEReductionOperation.h" -#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h" - -namespace arm_compute -{ -class ITensor; - -/** Basic function to perform reduce operation */ -class NEReduceMeanEx : public IFunction -{ -public: - /** Constructor */ - NEReduceMeanEx(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - /** Configure kernel - * - * @note Supported tensor rank: up to 4 - * - * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32 - * @param[in] reduction_axis Reduction axis vector. - * @param[in] keep_dims If positive, retains reduced dimensions with length 1. - * @param[out] output Destination tensor. Data type supported: Same as @p input - */ - void configure(ITensor *input, const Coordinates &reduction_axis, bool keep_dims, - ITensor *output); - - /** Static function to check if given info will lead to a valid configuration of @ref - * NEReduceMeanEx - * - * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32 - * @param[in] reduction_axis Reduction axis vector. - * @param[in] keep_dims If positive, retains reduced dimensions with length 1. - * @param[in] output Destination tensor. Data type supported: Same as @p input - * - * @return A status - */ - static Status validate(const ITensorInfo *input, const Coordinates &reduction_axis, - bool keep_dims, const ITensorInfo *output); - - // Inherited methods overridden: - void run() override; - -private: - MemoryGroup _memory_group; - std::unique_ptr<NEReductionOperation[]> _reduction_kernels{nullptr}; - std::unique_ptr<Tensor[]> _reduced_outs{nullptr}; - NEReshapeLayer _reshape; - unsigned int _reduction_ops; - bool _keep_dims; -}; -} // namespace arm_compute -#endif /* __ARM_COMPUTE_NEON_REDUCE_MEAN_EX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NESpaceToBatchLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NESpaceToBatchLayerEx.h deleted file mode 100644 index 3b695fbc0..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NESpaceToBatchLayerEx.h +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_NESPACETOBATCHLAYEREX_H__ -#define __ARM_COMPUTE_NESPACETOBATCHLAYEREX_H__ - -#include "arm_compute/runtime/IFunction.h" - -#include "arm_compute/core/NEON/kernels/NEMemsetKernel.h" -#include "arm_compute/core/NEON/kernels/NESpaceToBatchLayerKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ITensor; - -/** Basic function to spatial divide a tensor. This function calls the following NEON - * kernels/functions: - * - * -# @ref NEMemsetKernel - * -# @ref NESpaceToBatchLayerKernel - */ -class NESpaceToBatchLayerEx : public IFunction -{ -public: - /** Default constructor */ - NESpaceToBatchLayerEx(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESpaceToBatchLayerEx(const NESpaceToBatchLayerEx &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESpaceToBatchLayerEx &operator=(const NESpaceToBatchLayerEx &) = delete; - /** Allow instances of this class to be moved */ - NESpaceToBatchLayerEx(NESpaceToBatchLayerEx &&) = default; - /** Allow instances of this class to be moved */ - NESpaceToBatchLayerEx &operator=(NESpaceToBatchLayerEx &&) = default; - /** Default destructor */ - virtual ~NESpaceToBatchLayerEx() = default; - /** Set the input and output tensors. - * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: - * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. - * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 - * @param[in] paddings 2-D tensor with shape [2, M]. Data types supported: S32 - * @param[out] output Tensor output. Data types supported: same as @p input - */ - void configure(const ITensor *input, const ITensor *block_shape, const ITensor *paddings, - ITensor *output); - /** Set the input and output tensors. (Static block shape and paddings) - * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: - * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. - * @param[in] block_shape_x Block shape x value. - * @param[in] block_shape_y Block shape y value. - * @param[in] padding_left The left padding of the output tensor. - * @param[in] padding_right The right padding of the output tensor. - * @param[out] output Tensor output. Data types supported: same as @p input - */ - void configure(const ITensor *input, const int block_shape_x, const int block_shape_y, - const Size2D &padding_left, const Size2D &padding_right, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref - * NESpaceToBatchLayerEx - * - * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: - * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. - * @param[in] block_shape block shape tensor info with shape [M]. Data types supported: S32 - * @param[in] paddings paddings tensor info with shape [2, M]. Data types supported: S32 - * @param[in] output Tensor output info. Data types supported: same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, - const ITensorInfo *paddings, const ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration of @ref - * NESpaceToBatchLayerEx (Static block shape and paddings) - * - * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: - * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. - * @param[in] block_shape_x Block shape x value. - * @param[in] block_shape_y Block shape y value. - * @param[in] padding_left The left padding of the output tensor. - * @param[in] padding_right The right padding of the output tensor. - * @param[in] output Tensor output info. Data types supported: same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const int block_shape_x, const int block_shape_y, - const Size2D &padding_left, const Size2D &padding_right, - const ITensorInfo *output); - - // Inherited methods overridden: - void run() override; - -private: - NESpaceToBatchLayerKernel _space_to_batch_kernel; /**< SpaceToBatch kernel to run */ - NEMemsetKernel _memset_kernel; /**< Memset kernel to run */ - bool _has_padding; /**< Flag to check if the output has padding */ -}; -} // namespace arm_compute -#endif /* __ARM_COMPUTE_NESPACETOBATCHLAYEREX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NESpaceToDepthLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NESpaceToDepthLayerEx.h deleted file mode 100644 index 9f32616f3..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NESpaceToDepthLayerEx.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_NESPACETODEPTHLAYEREX_H__ -#define __ARM_COMPUTE_NESPACETODEPTHLAYEREX_H__ - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -namespace arm_compute -{ -class ITensor; - -/** This function calls the following NEON kernels/functions: - * - * -# @ref NESpaceToDepthLayerKernelEx - */ -class NESpaceToDepthLayerEx : public INESimpleFunctionNoBorder -{ -public: - /** Set the input and output tensors. - * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: - * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. - * @param[out] output Tensor output. Data types supported: same as @p input - * @param[in] block_shape Block shape value - */ - void configure(const ITensor *input, ITensor *output, int32_t block_shape); - /** Static function to check if given info will lead to a valid configuration of @ref - * NESpaceToDepthLayerEx (Static block shape and paddings) - * - * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: - * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. - * @param[in] output Tensor output info. Data types supported: same as @p input - * @param[in] block_shape Block shape value - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape); -}; -} // namespace arm_compute -#endif /* __ARM_COMPUTE_NESPACETODEPTHLAYEREX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h index 408d150d0..24ff5dac9 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h +++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h @@ -15,7 +15,7 @@ */ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -37,16 +37,14 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ - #ifndef __ARM_COMPUTE_NETRANSPOSECONVLAYER_H__ #define __ARM_COMPUTE_NETRANSPOSECONVLAYER_H__ -#include "arm_compute/runtime/CPP/functions/CPPUpsampleEx.h" +#include "arm_compute/runtime/CPP/functions/CPPUpsample.h" #include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h" #include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h" -#include "arm_compute/runtime/NEON/functions/NEPermute.h" +#include "arm_compute/runtime/NEON/functions/NEReverse.h" -#include "arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" @@ -59,8 +57,8 @@ namespace arm_compute { /** Function to run the deconvolution layer. * - * Transpose convolution Layer is the backward pass of Convolution Layer. First we transform the - * input depending on the stride and pad info and then perfrom a 1x1 + * Deconvolution Layer is the backward pass of Convolution Layer. First we transform the input + * depending on the stride and pad info and then perfrom a 1x1 * convolution pass. Input stride defines how many zeroes we should put between each element of the * input, pad is the amount of padding and finaly a is a user * specified value where a < stride - 1 that increases the padding top and right of the input image. @@ -81,21 +79,22 @@ namespace arm_compute * kernel_x and kernel_y are the convolution sizes in x and y. * stride_x and stride_y is the input stride of the first and second dimension. * - * The weights used by Transpose convolution are supposed to be the same as the ones used for - * Convolution. Therefore, it will be necessary to use the weights in the - * reverse order to perform an actual convolution. This is achieved by using the @ref - * CPPFlipWeightsKernel. + * The weights used by Deconvolution are supposed to be the same as the ones used for Convolution. + * Therefore, it will be necessary to use the weights in the + * reverse order to perform an actual convolution. This is achieved by using @ref NEReverse. * * This function calls the following NEON kernels/functions: * - * -# @ref CPPUpsample + * -# @ref CPPUpsampleEx * -# @ref NEConvolutionLayer + * -# @ref NEPermute + * -# @ref NEReverse * */ class NETransposeConvLayer : public IFunction { public: - /** Default constructor */ + /** Constructor */ NETransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr); /** Prevent instances of this class from being copied (As this class contains pointers) */ @@ -112,37 +111,38 @@ public: /** Set the input, weights, biases and output tensors. * * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an - * optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8. + * optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED. * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type - * supported: Same as @p input. + * supported: Same as @p input. * @param[in] bias Optional, ignored if NULL. The biases have one dimension. Data type - * supported: Data types supported: S32 for QASYMM8 input, F32 for F32 input, F16 for F16 input. + * supported: Data types supported: S32 for QASYMM8 and QASYMM8_SIGNED input, F32 for F32 input, F16 + * for F16 input. * @param[out] output Output tensor. The output has the same number of dimensions as the @p - * input. + * input. * @param[in] info Contains padding and policies to be used in the deconvolution, this is - * decribed in @ref PadStrideInfo. - * @param[in] invalid_right The number of zeros added to right edge of the output. - * @param[in] invalid_bottom The number of zeros added to top edge of the output. + * decribed in @ref PadStrideInfo. + * @param[in] invalid_right The number of zeros added to right edge of the output. + * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. * */ void configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, const PadStrideInfo &info, unsigned int invalid_right, unsigned int invalid_bottom); /** Static function to check if given info will lead to a valid configuration of @ref - * NETransposeConvLayer + * NETransposeConvLayer * * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an - * optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8. + * optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED. * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data type - * supported: Same as @p input. + * supported: Same as @p input. * @param[in] bias (Optional) The biases have one dimension. Data type supported: Data types - * supported: S32 for QASYMM8 input, F32 for F32 input, F16 for F16 input. + * supported: S32 for QASYMM8 and QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input. * @param[in] output Output tensor info. The output has the same number of dimensions as the @p - * input. + * input. * @param[in] info Contains padding and policies to be used in the deconvolution, this is - * decribed in @ref PadStrideInfo. - * @param[in] innvalid_right The number of zeros added to right edge of the output. - * @param[in] invalid_bottom The number of zeros added to top edge of the output. + * decribed in @ref PadStrideInfo. + * @param[in] innvalid_right The number of zeros added to right edge of the output. + * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. * * @return a status */ @@ -158,17 +158,11 @@ public: private: MemoryGroup _memory_group; NEConvolutionLayer _conv_f; - CPPUpsampleEx _upsample_f; - CPPFlipWeightsKernel _flip_weights; - NEPermute _permute_input; - NEPermute _permute_weights; - NEPermute _permute_output; + CPPUpsample _upsample_f; + NEReverse _flip_weights; Tensor _scaled_output; Tensor _weights_flipped; - Tensor _permuted_input; - Tensor _permuted_weights; - Tensor _permuted_output; - bool _is_nchw; + Tensor _flip_axis; const ITensor *_original_weights; ITensor *_input; PadStrideInfo _info; diff --git a/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp b/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp index 7b6b9742b..ba42a2456 100644 --- a/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp +++ b/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp @@ -55,16 +55,7 @@ using namespace arm_compute; const std::map<std::string, std::string> CLKernelLibraryEx::_kernel_program_map = { // ARMComputeEx kernels - {"arg_op", "arg_operation.cl"}, - {"arithmetic_add_qasymm8", "arithmetic_op_quantized.cl"}, {"binary_logical_op", "binary_logical_op.cl"}, - {"cast", "cast.cl"}, - {"cast_qasymm_in", "cast.cl"}, - {"cast_qasymm_out", "cast.cl"}, - {"comparison_op", "comparison_op.cl"}, - {"comparison_op_qasymm8", "comparison_op_quantized.cl"}, - {"depth_to_space_nchw", "depth_to_space.cl"}, - {"depth_to_space_nhwc", "depth_to_space.cl"}, {"embedding_lookup", "embedding_lookup.cl"}, {"gather_ex", "gather_ex.cl"}, {"gather_ex_1d", "gather_ex.cl"}, @@ -74,10 +65,6 @@ const std::map<std::string, std::string> CLKernelLibraryEx::_kernel_program_map {"instance_normalization_ex", "instance_normalization_ex.cl"}, {"multiply_scale_factor", "multiply_scale_factor.cl"}, {"neg_tensor", "neg_tensor.cl"}, - {"permute_generic", "permute_ex.cl"}, - {"pixelwise_mul_qasymm8", "pixelwise_mul_quantized.cl"}, - {"prelu", "prelu.cl"}, - {"prelu_qasymm8", "prelu_quantized.cl"}, {"quantization_symm8", "quantization_symm8.cl"}, {"reduce_min_max", "reduce_operation.cl"}, {"reduce_sum_mean", "reduce_operation.cl"}, @@ -91,29 +78,15 @@ const std::map<std::string, std::string> CLKernelLibraryEx::_kernel_program_map {"radixsort_reorder", "topkv2_radixsort.cl"}, {"topkv2_quicksort", "topkv2_quicksort.cl"}, {"scale_factor_symm8", "scale_factor.cl"}, - {"space_to_depth_nchw", "space_to_depth.cl"}, - {"space_to_depth_nhwc", "space_to_depth.cl"}, }; const std::map<std::string, std::string> CLKernelLibraryEx::_program_source_map = { #ifdef EMBEDDED_KERNELS { - "arg_operation.cl", -#include "./cl_kernels/arg_operation.clembed" - }, - { - "cast.cl", -#include "./cl_kernels/cast.clembed" - }, - { "embedding_lookup.cl", #include "./cl_kernels/embedding_lookup.clembed" }, { - "depth_to_space.cl", -#include "./cl_kernels/depth_to_space.clembed" - }, - { "gather_ex.cl", #include "./cl_kernels/gather_ex.clembed" }, @@ -150,14 +123,6 @@ const std::map<std::string, std::string> CLKernelLibraryEx::_program_source_map #include "./cl_kernels/neg_tensor.clembed" }, { - "prelu.cl", -#include "./cl_kernels/prelu.clembed" - }, - { - "prelu_quantized.cl", -#include "./cl_kernels/prelu_quantized.clembed" - }, - { "quantization_symm8.cl", #include "./cl_kernels/quantization_symm8.clembed" }, @@ -170,10 +135,6 @@ const std::map<std::string, std::string> CLKernelLibraryEx::_program_source_map #include "./cl_kernels/scale_factor.clembed" }, { - "space_to_depth.cl", -#include "./cl_kernels/space_to_depth.clembed" - }, - { "topkv2.cl", #include "./cl_kernels/topkv2.clembed" }, diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/arg_operation.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/arg_operation.cl deleted file mode 100644 index 03717cfe9..000000000 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/arg_operation.cl +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "helpers.h" - -#if defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(OP_CODE) -/** Perform arg_max/arg_min - * - * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. - * e.g. -DDATA_TYPE=short - * @attention Output tensor depth should be given as a preprocessor argument using -DDEPTH_OUT=size. - * e.g. -DDEPTH_OUT=16 - * @attention Operation type(code) specifying which operation to perform should be passed as - * preprocessor argument using -DOP_CODE = number. e.g. -DOP_CODE=1 - * - * @param[in] input_ptr Pointer to the source image. Supported data - * types: - * U8/QASYMM8/S8/U16/S16/F16/U32/S32/F32 - * @param[in] input_stride_x Stride of the source image in X dimension - * (in bytes) - * @param[in] input_step_x input_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] input_stride_y Stride of the source image in Y dimension - * (in bytes) - * @param[in] input_step_y input_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] input_stride_z Stride of the source tensor in Z dimension - * (in bytes) - * @param[in] input_step_z input_stride_z * number of elements along Z - * processed per workitem(in bytes) - * @param[in] input_offset_first_element_in_bytes The offset of the first element - * in the source image - * @param[in] input_stride_w Stride of the source tensor in W dimension - * (in bytes) - * @param[in] input_step_w output_stride_w * number of elements along W - * processed per workitem(in bytes) - * @param[out] output_ptr Pointer to the destination image. - * Supported data types: U32 - * @param[in] output_stride_x Stride of the destination image in X dimension - * (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the destination image in Y dimension - * (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] output_stride_z Stride of the source tensor in Z dimension - * (in bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z - * processed per workitem(in bytes) - * @param[in] output_stride_w Stride of the source tensor in W dimension - * (in bytes) - * @param[in] output_step_w output_stride_w * number of elements along W - * processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes The offset of the first element in the - * destination image - * @param[in] axis Axis through which reduction occurs - * @param[in] dim Dimension across the axis to be reduced. - */ - -__kernel void arg_op(TENSOR4D_DECLARATION(input), TENSOR4D_DECLARATION(output), const int axis, - const int dim) -{ - Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT(input, 0); - Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH_OUT); - - int indices[4] = { - get_global_id(0), get_global_id(1), get_global_id(2) % DEPTH_OUT, - get_global_id(2) / DEPTH_OUT, - }; - - DATA_TYPE value = - *((__global DATA_TYPE *)tensor4D_offset(&in, indices[0], indices[1], indices[2], indices[3])); - DATA_TYPE tval = value; - int idx = 0; - for (int i = 1; i < dim; ++i) - { - indices[axis] = i; - -#if OP_CODE == 1 // ArgMax - value = max(value, *((__global DATA_TYPE *)tensor4D_offset(&in, indices[0], indices[1], - indices[2], indices[3]))); -#elif OP_CODE == 2 // ArgMin - value = min(value, *((__global DATA_TYPE *)tensor4D_offset(&in, indices[0], indices[1], - indices[2], indices[3]))); -#else - return; - -#endif - - if (tval != value) - { - idx = indices[axis]; - tval = value; - } - } - - *((__global uint *)out.ptr) = idx; -} -#endif // defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(OP_CODE) diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/arithmetic_op_quantized.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/arithmetic_op_quantized.cl deleted file mode 100644 index f74c1c103..000000000 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/arithmetic_op_quantized.cl +++ /dev/null @@ -1,191 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016, 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "helpers_asymm.h" - -#ifdef SATURATE -#define ADD(x, y) add_sat((x), (y)) -#define SUB(x, y) sub_sat((x), (y)) -#else /* SATURATE */ -#define ADD(x, y) (x) + (y) -#define SUB(x, y) (x) - (y) -#endif /* SATURATE */ - -/** Performs a pixelwise addition used to quantize down the int32 accumulator values of GEMMLowp to - * QASYMM8 - * - * The following computations will be performed: - * - * -# Add offset terms to inputs - -# Get scaled value of two inputs - * -# Add inputs - * -# Add offset terms to final result - * -# Multiply each entry of result by result_mult_int - * -# Shift the int32 accumulator by result_shift - * -# Clamp the resulting int32 values to the [0..255] range and cast to QASYMM8. - * - * @attention The inputs and output data types need to be passed at compile time using - * -DDATA_TYPE_IN1, -DDATA_TYPE_IN2 and -DDATA_TYPE_OUT: - * e.g. -DDATA_TYPE_IN1=uchar -DDATA_TYPE_IN2=uchar -DDATA_TYPE_OUT=uchar - * @attention The number of bits to shift left of input tensors must be passed at compile time using - * -DLEFT_SHIFT - * @attention The offset, scalar scale factor and number of bits to shift right of input tensors - * must be passed at compile time using -DIN1_OFFSET, -RIN1_MULT_INT, -DIN1_SHIFT, - -DIN2_OFFSET, - * -RIN2_MULT_INT and -DIN2_SHIFT - * @attention The offset, scalar scale factor and number of bits to shift right of output tensor - * must be passed at compile time using -DRESULT_OFFSET, -RESULT_MULT_INT and - -DRESULT_SHIFT - * - * @attention The input and output data_types need to be passed at compile time using - * -DDATA_TYPE_IN1, -DDATA_TYPE_IN2 and -DDATA_TYPE_OUT: - * e.g. -DDATA_TYPE_IN1=uchar -DDATA_TYPE_IN2=uchar -DDATA_TYPE_OUT=uchar - * @attention The inputs and output scale information of qasymm8 need to be passed at compile time - * using -DSCALE_IN1, -DSCALE_IN2 and -DSCALE_OUT: - * e.g. -DSCALE_IN1=1.f -DSCALE_IN2=1.f -DSCALE_OUT=2.f - * @attention The inputs and output scale offset need to be passed at compile time using - * -DOFFSET_IN1, -DOFFSET_IN2 and -DOFFSET_OUT: - * e.g. -DOFFSET_IN1=0 -DOFFSET_IN2=0 -DOFFSET_OUT=0 - * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. - * -DVEC_SIZE=16 - * @attention To perform saturating operation -DSATURATE has to be passed to the compiler otherwise - * wrapping policy will be used. - * - * @param[in] in1_ptr Pointer to the source tensor. - * Supported data types: QASYMM8 - * @param[in] in1_stride_x Stride of the source tensor in X dimension - * (in bytes) - * @param[in] in1_step_x in1_stride_x * number of elements along X processed - * per workitem(in bytes) - * @param[in] in1_stride_y Stride of the source tensor in Y dimension - * (in bytes) - * @param[in] in1_step_y in1_stride_y * number of elements along Y processed - * per workitem(in bytes) - * @param[in] in1_stride_z Stride of the source tensor in Z dimension - * (in bytes) - * @param[in] in1_step_z in1_stride_z * number of elements along Z processed - * per workitem(in bytes) - * @param[in] in1_offset_first_element_in_bytes The offset of the first element in the source - * tensor - * @param[in] in2_ptr Pointer to the source tensor. Supported data types: - * QASYMM8 - * @param[in] in2_stride_x Stride of the source tensor in X dimension - * (in bytes) - * @param[in] in2_step_x in2_stride_x * number of elements along X processed - * per workitem(in bytes) - * @param[in] in2_stride_y Stride of the source tensor in Y dimension - * (in bytes) - * @param[in] in2_step_y in2_stride_y * number of elements along Y processed - * per workitem(in bytes) - * @param[in] in2_stride_z Stride of the source tensor in Z dimension - * (in bytes) - * @param[in] in2_step_z in2_stride_z * number of elements along Z processed - * per workitem(in bytes) - * @param[in] in2_offset_first_element_in_bytes The offset of the first element in the source - * tensor - * @param[out] out_ptr Pointer to the destination tensor. - * Supported data types: QASYMM8 - * @param[in] out_stride_x Stride of the destination tensor in X dimension - * (in bytes) - * @param[in] out_step_x out_stride_x * number of elements along X processed - * per workitem(in bytes) - * @param[in] out_stride_y Stride of the destination tensor in Y dimension - * (in bytes) - * @param[in] out_step_y out_stride_y * number of elements along Y processed - * per workitem(in bytes) - * @param[in] out_stride_z Stride of the source tensor in Z dimension - * (in bytes) - * @param[in] out_step_z out_stride_z * number of elements along Z processed - * per workitem(in bytes) - * @param[in] out_offset_first_element_in_bytes The offset of the first element in the destination - * tensor - */ -__kernel void arithmetic_add_qasymm8(TENSOR3D_DECLARATION(in1), TENSOR3D_DECLARATION(in2), - TENSOR3D_DECLARATION(out)) -{ - // Get pixels pointer - Tensor3D in1 = CONVERT_TO_TENSOR3D_STRUCT(in1); - Tensor3D in2 = CONVERT_TO_TENSOR3D_STRUCT(in2); - Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(out); - - // Load data - VEC_DATA_TYPE(int, 16) - in1_data = CONVERT(vload16(0, (__global DATA_TYPE_IN1 *)in1.ptr), VEC_DATA_TYPE(int, 16)); - VEC_DATA_TYPE(int, 16) - in2_data = CONVERT(vload16(0, (__global DATA_TYPE_IN2 *)in2.ptr), VEC_DATA_TYPE(int, 16)); - - // Get scaled value of two inputs - VEC_DATA_TYPE(int, 16) in1_val = in1_data + (VEC_DATA_TYPE(int, 16))(IN1_OFFSET); - VEC_DATA_TYPE(int, 16) in2_val = in2_data + (VEC_DATA_TYPE(int, 16))(IN2_OFFSET); - - VEC_DATA_TYPE(int, 16) - left_shift = (VEC_DATA_TYPE(int, 16))1 << (VEC_DATA_TYPE(int, 16))(LEFT_SHIFT); - VEC_DATA_TYPE(int, 16) shifted_in1_val = in1_val * left_shift; - VEC_DATA_TYPE(int, 16) shifted_in2_val = in2_val * left_shift; - - VEC_DATA_TYPE(int, 16) - scaled_in1_val = - ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(shifted_in1_val, IN1_MULT_INT, IN1_SHIFT, 16); - VEC_DATA_TYPE(int, 16) - scaled_in2_val = - ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(shifted_in2_val, IN2_MULT_INT, IN2_SHIFT, 16); - - // Add inputs and multiply with a multiplier smaller than 1 - VEC_DATA_TYPE(int, 16) sum_val = scaled_in1_val + scaled_in2_val; - VEC_DATA_TYPE(int, 16) - out_val = - ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(sum_val, RESULT_MULT_INT, RESULT_SHIFT, 16); - out_val += (VEC_DATA_TYPE(int, 16))(RESULT_OFFSET); - - VEC_DATA_TYPE(uchar, 16) res = CONVERT(out_val, VEC_DATA_TYPE(uchar, 16)); - - // TODO: Apply min-max BOUND to support fuse with relu. - /* - #if defined(MIN_BOUND) - res = max(res, (uchar16)MIN_BOUND); - #endif // defined(MIN_BOUND) - #if defined(MAX_BOUND) - res = min(res, (uchar16)MAX_BOUND); - #endif // defined(MAX_BOUND) - */ - - // Store result - VSTORE(16)(CONVERT(res, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)), 0, (__global DATA_TYPE_OUT *)out.ptr); -} diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/cast.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/cast.cl deleted file mode 100644 index 4147a0017..000000000 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/cast.cl +++ /dev/null @@ -1,233 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "helpers.h" - -#ifndef SCALE -#define SCALE 1.0f -#endif -#ifndef OFFSET -#define OFFSET 0 -#endif -#ifndef VEC_SIZE -#define VEC_SIZE 1 -#endif - -#if defined(DATA_TYPE_IN) && defined(DATA_TYPE_OUT) -/** Perform a cast operation on an input tensor. - * - * @attention Data types of both input and output can be passed using the -DDATA_TYPE_IN and - * -DDATA_TYPE_OUT compile flag, e.g. -DDATA_TYPE_IN=float, -DDATA_TYPE_OUT=int - * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. - * -DVEC_SIZE=16 - * @attention -DBOOL_INPUT : Whether type of input is bool. - * - * @param[in] input_ptr Pointer to the source image. Supported data - * types: F16/F32 - * @param[in] input_stride_x Stride of the source image in X dimension (in - * bytes) - * @param[in] input_step_x input_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] input_stride_y Stride of the source image in Y dimension (in - * bytes) - * @param[in] input_step_y input_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] input_stride_z Stride of the source tensor in Z dimension (in - * bytes) - * @param[in] input_step_z input_stride_z * number of elements along Z - * processed per workitem(in bytes) - * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source - * image - * @param[out] output_ptr Pointer to the destination image. Supported data - * types: same as @p input_ptr - * @param[in] output_stride_x Stride of the destination image in X dimension - * (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the destination image in Y dimension - * (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] output_stride_z Stride of the source tensor in Z dimension (in - * bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z - * processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes The offset of the first element in the - * destination image - */ -__kernel void cast(TENSOR3D_DECLARATION(input), TENSOR3D_DECLARATION(output)) -{ - Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input); - Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output); - - VSTORE(VEC_SIZE) - (CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_IN *)input.ptr), - VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE)), - 0, (__global DATA_TYPE_OUT *)output.ptr); - VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE) - res = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_IN *)input.ptr), - VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE)); -#if defined(BOOL_INPUT) - VEC_DATA_TYPE(char, VEC_SIZE) tmp = CONVERT(res, VEC_DATA_TYPE(char, VEC_SIZE)); - VEC_DATA_TYPE(char, VEC_SIZE) mask = (VEC_DATA_TYPE(char, VEC_SIZE))(1); - res = CONVERT(tmp & mask, VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE)); -#endif // defined(BOOL_INPUT) - - VSTORE(VEC_SIZE)(res, 0, (__global DATA_TYPE_OUT *)output.ptr); -} - -/** Perform a cast operation on an QASYMM8 input tensor. - * @attention Data types of both input and output can be passed using the -DDATA_TYPE_IN and - * -DDATA_TYPE_OUT compile flag, e.g. -DDATA_TYPE_IN=float, -DDATA_TYPE_OUT=int - * @attention Offset and Scale of input should be given as a preprocessor argument using - * -DOFFSET=int, -DSCALE=float. e.g. -DOFFSET=1, -DSCALE=0.5 - * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. - * -DVEC_SIZE=16 - * - * @param[in] input_ptr Pointer to the source image. Supported data - * types: F16/F32 - * @param[in] input_stride_x Stride of the source image in X dimension (in - * bytes) - * @param[in] input_step_x input_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] input_stride_y Stride of the source image in Y dimension (in - * bytes) - * @param[in] input_step_y input_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] input_stride_z Stride of the source tensor in Z dimension (in - * bytes) - * @param[in] input_step_z input_stride_z * number of elements along Z - * processed per workitem(in bytes) - * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source - * image - * @param[out] output_ptr Pointer to the destination image. Supported data - * types: same as @p input_ptr - * @param[in] output_stride_x Stride of the destination image in X dimension - * (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the destination image in Y dimension - * (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] output_stride_z Stride of the source tensor in Z dimension (in - * bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z - * processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes The offset of the first element in the - * destination image - */ -__kernel void cast_qasymm_in(TENSOR3D_DECLARATION(input), TENSOR3D_DECLARATION(output)) -{ - Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input); - Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output); - - VEC_DATA_TYPE(DATA_TYPE_IN, VEC_SIZE) - in_data = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_IN *)input.ptr); - VEC_DATA_TYPE(int, VEC_SIZE) offset = (VEC_DATA_TYPE(int, VEC_SIZE))(OFFSET); - VEC_DATA_TYPE(float, VEC_SIZE) scale = (VEC_DATA_TYPE(float, VEC_SIZE))(SCALE); - - VEC_DATA_TYPE(int, VEC_SIZE) tmp = CONVERT(in_data, VEC_DATA_TYPE(int, VEC_SIZE)) - offset; - VEC_DATA_TYPE(float, VEC_SIZE) out_data = CONVERT(tmp, VEC_DATA_TYPE(float, VEC_SIZE)) * scale; - - VSTORE(VEC_SIZE) - (CONVERT(out_data, VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE)), 0, - (__global DATA_TYPE_OUT *)output.ptr); -} - -/** Perform a cast operation on an QASYMM8 output tensor. - * @attention Data types of both input and output can be passed using the -DDATA_TYPE_IN and - * -DDATA_TYPE_OUT compile flag, e.g. -DDATA_TYPE_IN=float, -DDATA_TYPE_OUT=int - * @attention Offset and Scale of output should be given as a preprocessor argument using - * -DOFFSET=int, -DSCALE=float. e.g. -DOFFSET=1, -DSCALE=0.5 - * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. - * -DVEC_SIZE=16 - * - * @param[in] input_ptr Pointer to the source image. Supported data - * types: F16/F32 - * @param[in] input_stride_x Stride of the source image in X dimension (in - * bytes) - * @param[in] input_step_x input_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] input_stride_y Stride of the source image in Y dimension (in - * bytes) - * @param[in] input_step_y input_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] input_stride_z Stride of the source tensor in Z dimension (in - * bytes) - * @param[in] input_step_z input_stride_z * number of elements along Z - * processed per workitem(in bytes) - * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source - * image - * @param[out] output_ptr Pointer to the destination image. Supported data - * types: U8 - * @param[in] output_stride_x Stride of the destination image in X dimension - * (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the destination image in Y dimension - * (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] output_stride_z Stride of the source tensor in Z dimension (in - * bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z - * processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes The offset of the first element in the - * destination image - */ -__kernel void cast_qasymm_out(TENSOR3D_DECLARATION(input), TENSOR3D_DECLARATION(output)) -{ - Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input); - Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output); - - VEC_DATA_TYPE(DATA_TYPE_IN, VEC_SIZE) - in_data = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_IN *)input.ptr); - VEC_DATA_TYPE(int, VEC_SIZE) offset = (VEC_DATA_TYPE(int, VEC_SIZE))(OFFSET); - VEC_DATA_TYPE(float, VEC_SIZE) scale = (VEC_DATA_TYPE(float, VEC_SIZE))(SCALE); - - VEC_DATA_TYPE(float, VEC_SIZE) tmp = CONVERT(in_data, VEC_DATA_TYPE(float, VEC_SIZE)) / scale; - VEC_DATA_TYPE(float, VEC_SIZE) out_data = tmp + CONVERT(offset, VEC_DATA_TYPE(float, VEC_SIZE)); - - VSTORE(VEC_SIZE) - (CONVERT(out_data, VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE)), 0, - (__global DATA_TYPE_OUT *)output.ptr); -} -#endif // defined(DATA_TYPE_IN) && defined(DATA_TYPE_OUT) diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/depth_to_space.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/depth_to_space.cl deleted file mode 100644 index 0285c955b..000000000 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/depth_to_space.cl +++ /dev/null @@ -1,185 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016, 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "helpers.h" - -#if defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(BLOCK_SIZE) && defined(Z_OUT) -/** Perform space to depth rearrangement of tensor - * - * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float - * @attention Input tensor depth should be given as a preprocessor argument using -DDEPTH_OUT=size. - * e.g. -DDEPTH_OUT=16 - * @attention The value of the z-axis of output tensor should be given as a preprocessor argument - * using -DZ_OUT=size. e.g. -DZ_OUT=16 - * @attention block size should be given as a preprocessor argument using -DBLOCK_SIZE=size. e.g. - * -DBLOCK_SIZE=1 - * - * @param[in] input_ptr Pointer to the source image. Supported data - * types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 - * @param[in] input_stride_x Stride of the source image in X dimension (in - * bytes) - * @param[in] input_step_x input_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] input_stride_y Stride of the source image in Y dimension (in - * bytes) - * @param[in] input_step_y input_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] input_stride_z Stride of the source tensor in Z dimension (in - * bytes) - * @param[in] input_step_z input_stride_z * number of elements along Z - * processed per workitem(in bytes) - * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source - * image - * @param[out] output_ptr Pointer to the destination image. Supported data - * types: same as @p input_ptr - * @param[in] output_stride_x Stride of the destination image in X dimension - * (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the destination image in Y dimension - * (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] output_stride_z Stride of the source tensor in Z dimension (in - * bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z - * processed per workitem(in bytes) - * @param[in] output_stride_w Stride of the source tensor in W dimension (in - * bytes) - * @param[in] output_step_w output_stride_w * number of elements along W - * processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes The offset of the first element in the - * destination image - */ -__kernel void depth_to_space_nchw(TENSOR4D_DECLARATION(input), TENSOR4D_DECLARATION(output)) -{ - Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, 0); - Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, Z_OUT); - - int out_index[4] = {0}; - int in_index[4] = {0}; - - out_index[0] = get_global_id(0); // W - out_index[1] = get_global_id(1); // H - out_index[2] = get_global_id(2) % Z_OUT; // C - out_index[3] = get_global_id(2) / Z_OUT; // B - - in_index[0] = out_index[0] / BLOCK_SIZE; - in_index[1] = out_index[1] / BLOCK_SIZE; - in_index[2] = out_index[2] + - ((out_index[1] % BLOCK_SIZE) * BLOCK_SIZE + out_index[0] % BLOCK_SIZE) * DEPTH_OUT; - in_index[3] = out_index[3]; - - *((__global DATA_TYPE *)out.ptr) = *((__global DATA_TYPE *)tensor4D_offset( - &in, in_index[0], in_index[1], in_index[2], in_index[3])); -} -#endif // defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(BLOCK_SIZE) && defined(Z_OUT) - -#if defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(BLOCK_SIZE) && defined(Z_OUT) -/** Perform space to depth rearrangement of tensor (NHWC) - * - * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float - * @attention Output tensor depth should be given as a preprocessor argument using -DDEPTH_OUT=size. - * e.g. -DDEPTH_OUT=16 - * @attention The value of the z-axis of output tensor should be given as a preprocessor argument - * using -DZ_OUT=size. e.g. -DZ_OUT=16 - * @attention block size should be given as a preprocessor argument using -DBLOCK_SIZE=size. e.g. - * -DBLOCK_SIZE=1 - * - * @param[in] input_ptr Pointer to the source image. Supported data - * types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 - * @param[in] input_stride_x Stride of the source image in X dimension (in - * bytes) - * @param[in] input_step_x input_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] input_stride_y Stride of the source image in Y dimension (in - * bytes) - * @param[in] input_step_y input_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] input_stride_z Stride of the source tensor in Z dimension (in - * bytes) - * @param[in] input_step_z input_stride_z * number of elements along Z - * processed per workitem(in bytes) - * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source - * image - * @param[out] output_ptr Pointer to the destination image. Supported data - * types: same as @p input_ptr - * @param[in] output_stride_x Stride of the destination image in X dimension - * (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the destination image in Y dimension - * (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] output_stride_z Stride of the source tensor in Z dimension (in - * bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z - * processed per workitem(in bytes) - * @param[in] output_stride_w Stride of the source tensor in W dimension (in - * bytes) - * @param[in] output_step_w output_stride_w * number of elements along W - * processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes The offset of the first element in the - * destination image - */ -__kernel void depth_to_space_nhwc(TENSOR4D_DECLARATION(input), TENSOR4D_DECLARATION(output)) -{ - Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, 0); - Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, Z_OUT); - - int out_index[4] = {0}; - int in_index[4] = {0}; - - out_index[0] = get_global_id(0); // C - out_index[1] = get_global_id(1); // W - out_index[2] = get_global_id(2) % Z_OUT; // H - out_index[3] = get_global_id(2) / Z_OUT; // B - - in_index[0] = out_index[0] + - ((out_index[2] % BLOCK_SIZE) * BLOCK_SIZE + out_index[1] % BLOCK_SIZE) * DEPTH_OUT; - in_index[1] = out_index[1] / BLOCK_SIZE; - in_index[2] = out_index[2] / BLOCK_SIZE; - in_index[3] = out_index[3]; - - *((__global DATA_TYPE *)out.ptr) = *((__global DATA_TYPE *)tensor4D_offset( - &in, in_index[0], in_index[1], in_index[2], in_index[3])); -} -#endif // defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(BLOCK_SIZE) && defined(Z_OUT) diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers.h b/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers.h index 2d0b6a299..e07a25ec9 100644 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers.h +++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers.h @@ -15,7 +15,7 @@ */ /* - * Copyright (c) 2016-2018 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -37,7 +37,6 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ - #ifndef ARM_COMPUTE_HELPER_H #define ARM_COMPUTE_HELPER_H @@ -59,16 +58,219 @@ #pragma OPENCL EXTENSION cl_arm_printf : enable #endif // defined(ARM_COMPUTE_DEBUG_ENABLED) && defined(cl_arm_printf) +#define GPU_ARCH_MIDGARD 0x100 +#define GPU_ARCH_BIFROST 0x200 + +/** Concatenate two inputs. + * + * @param[in] a The first input to be concatenated + * @param[in] b The second input to be concatenated + * + * @return The concatenated output + */ +#define CONCAT(a, b) a##b + +/** Expand the given vector + * + * @param[in] x The vector to be expanded + * + * @return The expanded output + */ #define EXPAND(x) x +/** Clamp the given value between an upper and lower bound. + * + * @param[in] x The value to be clamped + * @param[in] min_val The lower bound + * @param[in] max_val The upper bound + * + * @return The clamped value. + */ #define CLAMP(x, min_val, max_val) min(max(x, min_val), max_val) +/** REVn reverses the given vector whose size is n. + * @name REVn + * + * @param[in] x The vector to be reversed + * + * @return The reversed vector + * @{ + */ +#define REV1(x) ((x)) +#define REV2(x) ((x).s10) +#define REV3(x) ((x).s210) +#define REV4(x) ((x).s3210) +#define REV8(x) ((x).s76543210) +#define REV16(x) ((x).sFEDCBA9876543210) +/** @} */ // end of group REVn + +/** Reverse the given vector. + * @name REVERSE + * + * @param[in] x The vector to be reversed + * @param[in] s The size of the vector + * + * @return The reversed vector + * @{ + */ +#define REVERSE_STR(x, s) REV##s((x)) +#define REVERSE(x, s) REVERSE_STR(x, s) +/** @} */ // end of group REVERSE + +/** Circular-right-shift (rotate-right) the vector of size s by the amount of n. + * @name ROTs_n + * + * @param[in] x The vector to be shifted + * + * @return The shifted vector + * @{ + */ +#define ROT1_0(x) ((x)) + +#define ROT2_0(x) ((x)) +#define ROT2_1(x) ((x).s10) + +#define ROT3_0(x) ((x)) +#define ROT3_1(x) ((x).s201) +#define ROT3_2(x) ((x).s120) + +#define ROT4_0(x) ((x)) +#define ROT4_1(x) ((x).s3012) +#define ROT4_2(x) ((x).s2301) +#define ROT4_3(x) ((x).s1230) + +#define ROT8_0(x) ((x)) +#define ROT8_1(x) ((x).s70123456) +#define ROT8_2(x) ((x).s67012345) +#define ROT8_3(x) ((x).s56701234) +#define ROT8_4(x) ((x).s45670123) +#define ROT8_5(x) ((x).s34567012) +#define ROT8_6(x) ((x).s23456701) +#define ROT8_7(x) ((x).s12345670) + +#define ROT16_0(x) ((x)) +#define ROT16_1(x) ((x).sF0123456789ABCDE) +#define ROT16_2(x) ((x).sEF0123456789ABCD) +#define ROT16_3(x) ((x).sDEF0123456789ABC) +#define ROT16_4(x) ((x).sCDEF0123456789AB) +#define ROT16_5(x) ((x).sBCDEF0123456789A) +#define ROT16_6(x) ((x).sABCDEF0123456789) +#define ROT16_7(x) ((x).s9ABCDEF012345678) +#define ROT16_8(x) ((x).s89ABCDEF01234567) +#define ROT16_9(x) ((x).s789ABCDEF0123456) +#define ROT16_10(x) ((x).s6789ABCDEF012345) +#define ROT16_11(x) ((x).s56789ABCDEF01234) +#define ROT16_12(x) ((x).s456789ABCDEF0123) +#define ROT16_13(x) ((x).s3456789ABCDEF012) +#define ROT16_14(x) ((x).s23456789ABCDEF01) +#define ROT16_15(x) ((x).s123456789ABCDEF0) +/** @} */ // end of group ROTs_n + +/** Circular-right-shift (rotate-right) the given vector by the given amount. + * @name ROTATE + * + * @param[in] x The vector to be shifted + * @param[in] s The size of the vector + * @param[in] n The amount to be shifted + * + * @return The shifted vector + * @{ + */ +#define ROTATE_STR(x, s, n) ROT##s##_##n(x) +#define ROTATE(x, s, n) ROTATE_STR(x, s, n) +/** @} */ // end of group ROTATE + +/** Creates a vector of size n filled with offset values corresponding to the location of each + * element. + * @name V_OFFSn + * + * @param[in] dt The data type of the output vector + * + * @return The vector filled with offset values + * @{ + */ +#define V_OFFS1(dt) (dt)(0) +#define V_OFFS2(dt) (dt)(0, 1) +#define V_OFFS3(dt) (dt)(0, 1, 3) +#define V_OFFS4(dt) (dt)(0, 1, 2, 3) +#define V_OFFS8(dt) (dt)(0, 1, 2, 3, 4, 5, 6, 7) +#define V_OFFS16(dt) (dt)(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) +/** @} */ // end of group V_OFFSn + +/** Create a vector filled with offset values corresponding to the location of each element. + * @name VEC_OFFS + * + * @param[in] dt The data type of the output vector + * @param[in] s The size of the output vector + * + * @return The vector filled with offset values + * @{ + */ +#define VEC_OFFS_STR(dt, s) V_OFFS##s(dt) +#define VEC_OFFS(dt, s) VEC_OFFS_STR(dt, s) +/** @} */ // end of group VEC_OFFS + #define VLOAD_STR(size) vload##size #define VLOAD(size) VLOAD_STR(size) #define VSTORE_STR(size) vstore##size #define VSTORE(size) VSTORE_STR(size) +#define float1 float +#define half1 half +#define char1 char +#define uchar1 uchar +#define short1 short +#define ushort1 ushort +#define int1 int +#define uint1 uint +#define long1 long +#define ulong1 ulong +#define double1 double + +#define vload1(OFFSET, PTR) *(OFFSET + PTR) +#define vstore1(DATA, OFFSET, PTR) *(OFFSET + PTR) = DATA + +// Convert built-in functions with _sat modifier are not supported in floating point so we create +// defines +// without _sat to overcome this issue +#define convert_float_sat convert_float +#define convert_float1_sat convert_float +#define convert_float2_sat convert_float2 +#define convert_float3_sat convert_float3 +#define convert_float4_sat convert_float4 +#define convert_float8_sat convert_float8 +#define convert_float16_sat convert_float16 +#define convert_half_sat convert_float +#define convert_half1_sat convert_half +#define convert_half2_sat convert_half2 +#define convert_half3_sat convert_half3 +#define convert_half4_sat convert_half4 +#define convert_half8_sat convert_half8 +#define convert_half16_sat convert_half16 + +#define convert_float1 convert_float +#define convert_half1 convert_half +#define convert_char1 convert_char +#define convert_uchar1 convert_uchar +#define convert_short1 convert_short +#define convert_ushort1 convert_ushort +#define convert_int1 convert_int +#define convert_uint1 convert_uint +#define convert_long1 convert_long +#define convert_ulong1 convert_ulong +#define convert_double1 convert_double + +#define convert_char1_sat convert_char_sat +#define convert_uchar1_sat convert_uchar_sat +#define convert_short1_sat convert_short_sat +#define convert_ushort1_sat convert_ushort_sat +#define convert_int1_sat convert_int_sat +#define convert_uint1_sat convert_uint_sat +#define convert_long1_sat convert_long_sat +#define convert_ulong1_sat convert_ulong_sat +#define convert_double1_sat convert_double_sat + #define VEC_DATA_TYPE_STR(type, size) type##size #define VEC_DATA_TYPE(type, size) VEC_DATA_TYPE_STR(type, size) diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers_asymm.h b/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers_asymm.h index a83b1a8a5..5f1b3f902 100644 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers_asymm.h +++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers_asymm.h @@ -15,7 +15,7 @@ */ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -37,29 +37,112 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ - #ifndef ARM_COMPUTE_HELPERS_ASYMM_H #define ARM_COMPUTE_HELPERS_ASYMM_H #include "helpers.h" +/** Convert the given vector with round to nearest even rounding mode + * + * @param[in] x The target to be converted + * @param[in] type The target type + * + * @return The converted vector + */ +#define CONVERT_DOWN_RTE_STR(x, type) (convert_##type##_rte((x))) +#define CONVERT_DOWN_RTE(x, type) CONVERT_DOWN_RTE_STR(x, type) + +/** Quantize a floating-point scalar value to 8-bit asymmetric + * + * @param[in] input Input value to quantize + * @param[in] offset Quantization offset + * @param[in] scale Quantization scale + * + * @return quantized value + */ +inline uchar quantize_qasymm8(float input, float offset, float scale) +{ + float out_f32 = input / scale + offset; + uchar res_u8 = CONVERT_SAT(CONVERT_DOWN_RTE(out_f32, int), uchar); + return res_u8; +} + +/** Dequantize a scalar value from 8-bit asymmetric to floating-point + * + * @param[in] input Input value to quantize + * @param[in] offset Quantization offset + * @param[in] scale Quantization scale + * + * @return quantized value + */ +inline float dequantize_qasymm8(uchar input, float offset, float scale) +{ + return ((float)input - offset) * scale; +} + +/** Dequantize a scalar value from signed 8-bit asymmetric to floating-point + * + * @param[in] input Input value to quantize + * @param[in] offset Quantization offset + * @param[in] scale Quantization scale + * + * @return quantized value + */ +inline float dequantize_qasymm8_signed(char input, float offset, float scale) +{ + return ((float)input - offset) * scale; +} + +/** Quantize a vector of values from floating-point + * + * @param[in] type Output data type. + * @param[in] size Size of vector. + * + * @return quantized values + */ +#define QUANTIZE_IMPL(type, size) \ + inline VEC_DATA_TYPE(type, size) \ + quantize_##type##size(VEC_DATA_TYPE(float, size) input, float offset, float scale) \ + { \ + VEC_DATA_TYPE(float, size) \ + out_f32 = input / (VEC_DATA_TYPE(float, size))(scale) + (VEC_DATA_TYPE(float, size))(offset); \ + VEC_DATA_TYPE(type, size) \ + res = CONVERT_SAT(CONVERT_DOWN_RTE(out_f32, VEC_DATA_TYPE(int, size)), \ + VEC_DATA_TYPE(type, size)); \ + return res; \ + } + +/** Dequantize a vector of values to floating-point + * + * @param[in] type Input data type. + * @param[in] size Size of vector. + * + * @return dequantized values in floating point + */ +#define DEQUANTIZE_IMPL(type, size) \ + inline VEC_DATA_TYPE(float, size) \ + dequantize_##type##size(VEC_DATA_TYPE(type, size) input, float offset, float scale) \ + { \ + return (CONVERT(input, VEC_DATA_TYPE(float, size)) - offset) * scale; \ + } + /** Correctly-rounded-to-nearest division by a power-of-two. * * @param[in] size Size of vector. * * @return Correctly-rounded-to-nearest division by a power-of-two. */ -#define ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(size) \ - inline VEC_DATA_TYPE(int, size) \ - asymm_rounding_divide_by_POW2_##size(VEC_DATA_TYPE(int, size) x, int exponent) \ - { \ - VEC_DATA_TYPE(int, size) \ - mask = (1 << exponent) - 1; \ - const VEC_DATA_TYPE(int, size) zero = 0; \ - const VEC_DATA_TYPE(int, size) one = 1; \ - VEC_DATA_TYPE(int, size) \ - threshold = (mask >> 1) + select(zero, one, x < 0); \ - return (x >> exponent) + select(zero, one, (x & mask) > threshold); \ +#define ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(size) \ + inline VEC_DATA_TYPE(int, size) asymm_rounding_divide_by_POW2_##size( \ + VEC_DATA_TYPE(int, size) x, VEC_DATA_TYPE(int, size) exponent) \ + { \ + const VEC_DATA_TYPE(int, size) zero = (VEC_DATA_TYPE(int, size))0; \ + const VEC_DATA_TYPE(int, size) one = (VEC_DATA_TYPE(int, size))1; \ + VEC_DATA_TYPE(int, size) \ + mask = (one << exponent) - one; \ + VEC_DATA_TYPE(int, size) \ + threshold = (mask >> 1) + select(zero, one, x < 0); \ + return (x >> exponent) + select(zero, one, (x & mask) > threshold); \ } /** Product of two numbers, interpreting them as fixed-point values in the interval [-1, 1), @@ -81,9 +164,19 @@ b_64 = convert_long##size(b); \ VEC_DATA_TYPE(long, size) \ ab_64 = a_64 * b_64; \ - /* COMPMID-907 */ \ + /* Revert COMPMID-907 */ \ + VEC_DATA_TYPE(long, size) \ + mask1 = 1 << 30; \ + VEC_DATA_TYPE(long, size) \ + mask2 = 1 - (1 << 30); \ + VEC_DATA_TYPE(long, size) \ + is_positive_or_zero = ab_64 >= 0; \ + VEC_DATA_TYPE(long, size) \ + nudge = select(mask2, mask1, is_positive_or_zero); \ + VEC_DATA_TYPE(long, size) \ + mask = 1ll << 31; \ VEC_DATA_TYPE(int, size) \ - ab_x2_high32 = convert_int##size(((ab_64 + (1 << 30)) >> 31)); \ + ab_x2_high32 = convert_int##size((ab_64 + nudge) / mask); \ return select(ab_x2_high32, INT_MAX, overflow); \ } @@ -335,9 +428,18 @@ return ASYMM_SATURATING_ROUNDING_MULT_BY_POW2(value, exponent, size); \ } +#define QUANTIZE_STR(input, offset, scale, type, size) quantize_##type##size(input, offset, scale) +#define QUANTIZE(input, offset, scale, type, size) QUANTIZE_STR(input, offset, scale, type, size) +#define DEQUANTIZE_STR(input, offset, scale, type, size) \ + dequantize_##type##size(input, offset, scale) +#define DEQUANTIZE(input, offset, scale, type, size) \ + DEQUANTIZE_STR(input, offset, scale, type, size) + #define ASYMM_ROUNDING_DIVIDE_BY_POW2(x, exponent, size) \ asymm_rounding_divide_by_POW2_##size(x, exponent) #define ASYMM_MULT(a, b, size) asymm_mult##size(a, b) +#define ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(x, quantized_multiplier, left_shift, size) \ + ASYMM_MULT(x *((VEC_DATA_TYPE(int, size))(1) << (-left_shift)), quantized_multiplier, size) #define ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(x, quantized_multiplier, right_shift, size) \ ASYMM_ROUNDING_DIVIDE_BY_POW2(ASYMM_MULT(x, quantized_multiplier, size), right_shift, size) #define ASYMM_EXP_ON_INTERVAL_BETWEEN_NEGATIVE_ONE_QUARTER_AND_0_EXCL(a, size) \ @@ -360,11 +462,53 @@ #define ASYMM_RESCALE(value, src_integer_bits, dst_integer_bits, size) \ asymm_rescale##size(value, src_integer_bits, dst_integer_bits) +#define MULTIPLY_BY_QUANTIZED_MULTIPLIER_IMPL(size) \ + inline VEC_DATA_TYPE(int, size) \ + multiply_by_quantized_multiplier##size(VEC_DATA_TYPE(int, size) input, int qmul, int shift) \ + { \ + const int left_shift = shift > 0 ? shift : 0; \ + const int right_shift = shift > 0 ? 0 : -shift; \ + return ASYMM_ROUNDING_DIVIDE_BY_POW2(ASYMM_MULT(input * (1 << left_shift), qmul, size), \ + right_shift, size); \ + } +#define MULTIPLY_BY_QUANTIZED_MULTIPLIER(input, qmul, shift, size) \ + multiply_by_quantized_multiplier##size(input, qmul, shift) + +QUANTIZE_IMPL(uchar, 1) +QUANTIZE_IMPL(char, 1) +QUANTIZE_IMPL(uint, 1) +QUANTIZE_IMPL(int, 1) +QUANTIZE_IMPL(uchar, 4) +QUANTIZE_IMPL(ushort, 4) +QUANTIZE_IMPL(short, 4) +QUANTIZE_IMPL(uchar, 16) +QUANTIZE_IMPL(char, 16) +QUANTIZE_IMPL(ushort, 16) +QUANTIZE_IMPL(short, 16) +QUANTIZE_IMPL(uint, 16) +QUANTIZE_IMPL(int, 16) + +DEQUANTIZE_IMPL(uchar, 1) +DEQUANTIZE_IMPL(char, 1) +DEQUANTIZE_IMPL(uint, 1) +DEQUANTIZE_IMPL(int, 1) +DEQUANTIZE_IMPL(uchar, 4) +DEQUANTIZE_IMPL(ushort, 4) +DEQUANTIZE_IMPL(short, 4) +DEQUANTIZE_IMPL(uchar, 16) +DEQUANTIZE_IMPL(char, 16) +DEQUANTIZE_IMPL(ushort, 16) +DEQUANTIZE_IMPL(short, 16) +DEQUANTIZE_IMPL(uint, 16) +DEQUANTIZE_IMPL(int, 16) + +ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(1) ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(2) ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(4) ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(8) ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(16) +ASYMM_MULT_IMPL(1) ASYMM_MULT_IMPL(2) ASYMM_MULT_IMPL(4) ASYMM_MULT_IMPL(8) @@ -375,16 +519,19 @@ ASYMM_EXP_ON_INTERVAL_BETWEEN_NEGATIVE_ONE_QUARTER_AND_0_EXCL_IMPL(4) ASYMM_EXP_ON_INTERVAL_BETWEEN_NEGATIVE_ONE_QUARTER_AND_0_EXCL_IMPL(8) ASYMM_EXP_ON_INTERVAL_BETWEEN_NEGATIVE_ONE_QUARTER_AND_0_EXCL_IMPL(16) +ASYMM_SELECT_USING_MASK_IMPL(1) ASYMM_SELECT_USING_MASK_IMPL(2) ASYMM_SELECT_USING_MASK_IMPL(4) ASYMM_SELECT_USING_MASK_IMPL(8) ASYMM_SELECT_USING_MASK_IMPL(16) +ASYMM_MASK_IF_ZERO_IMPL(1) ASYMM_MASK_IF_ZERO_IMPL(2) ASYMM_MASK_IF_ZERO_IMPL(4) ASYMM_MASK_IF_ZERO_IMPL(8) ASYMM_MASK_IF_ZERO_IMPL(16) +ASYMM_MASK_IF_NON_ZERO_IMPL(1) ASYMM_MASK_IF_NON_ZERO_IMPL(2) ASYMM_MASK_IF_NON_ZERO_IMPL(4) ASYMM_MASK_IF_NON_ZERO_IMPL(8) @@ -400,6 +547,7 @@ ASYMM_EXP_ON_NEGATIVE_VALUES_IMPL(4) ASYMM_EXP_ON_NEGATIVE_VALUES_IMPL(8) ASYMM_EXP_ON_NEGATIVE_VALUES_IMPL(16) +ASYMM_SATURATING_ROUNDING_MULT_BY_POW2_IMPL(1) ASYMM_SATURATING_ROUNDING_MULT_BY_POW2_IMPL(2) ASYMM_SATURATING_ROUNDING_MULT_BY_POW2_IMPL(4) ASYMM_SATURATING_ROUNDING_MULT_BY_POW2_IMPL(8) @@ -415,9 +563,16 @@ ASYMM_ONE_OVER_ONE_PLUS_X_FOR_X_IN_0_1_IMPL(4) ASYMM_ONE_OVER_ONE_PLUS_X_FOR_X_IN_0_1_IMPL(8) ASYMM_ONE_OVER_ONE_PLUS_X_FOR_X_IN_0_1_IMPL(16) +ASYMM_RESCALE_IMPL(1) ASYMM_RESCALE_IMPL(2) ASYMM_RESCALE_IMPL(4) ASYMM_RESCALE_IMPL(8) ASYMM_RESCALE_IMPL(16) +MULTIPLY_BY_QUANTIZED_MULTIPLIER_IMPL(1) +MULTIPLY_BY_QUANTIZED_MULTIPLIER_IMPL(2) +MULTIPLY_BY_QUANTIZED_MULTIPLIER_IMPL(4) +MULTIPLY_BY_QUANTIZED_MULTIPLIER_IMPL(8) +MULTIPLY_BY_QUANTIZED_MULTIPLIER_IMPL(16) + #endif // ARM_COMPUTE_HELPERS_ASYMM_H diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/prelu.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/prelu.cl deleted file mode 100644 index 12c8eeb79..000000000 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/prelu.cl +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "helpers.h" - -#ifndef VEC_SIZE -#define VEC_SIZE 1 -#endif - -#if defined(DATA_TYPE) -/** Returns result of prelu function implemented as below: - * f(input) = alpha * input for input < 0, f(input) = input for input >= 0. - * - * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float - * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. - * -DVEC_SIZE=16 - * @note Can only take floating point data types. - * - * @param[in] input1_ptr Pointer to the source image. Supported Data - * types : F16/F32 - * @param[in] input1_stride_x Stride of the source image in X dimension (in - * bytes) - * @param[in] input1_step_x input1_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] input1_stride_y Stride of the source image in Y dimension (in - * bytes) - * @param[in] input1_step_y input1_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] input1_stride_z Stride of the source tensor in Z dimension (in - * bytes) - * @param[in] input1_step_z input1_stride_z * number of elements along Z - * processed per workitem(in bytes) - * @param[in] input1_offset_first_element_in_bytes The offset of the first element in the source - * image - * @param[in] alpha_ptr Pointer to the source image. Supported Data - * types : F16/F32 - * @param[in] alpha_stride_x Stride of the source image in X dimension (in - * bytes) - * @param[in] alpha_step_x input2_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] alpha_stride_y Stride of the source image in Y dimension (in - * bytes) - * @param[in] alpha_step_y input2_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] alpha_stride_z Stride of the source tensor in Z dimension (in - * bytes) - * @param[in] alpha_step_z input2_stride_z * number of elements along Z - * processed per workitem(in bytes) - * @param[in] alpha_offset_first_element_in_bytes The offset of the first element in the source - * image - * - * @param[out] output_ptr Pointer to the destination image. Supported - * data types: same as @p input_ptr - * @param[in] output_stride_x Stride of the destination image in X dimension - * (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the destination image in Y dimension - * (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] output_stride_z Stride of the source tensor in Z dimension (in - * bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z - * processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes The offset of the first element in the - * destination image - */ -__kernel void prelu(TENSOR3D_DECLARATION(input), TENSOR3D_DECLARATION(alpha), - TENSOR3D_DECLARATION(output)) -{ - Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input); - Tensor3D alpha = CONVERT_TO_TENSOR3D_STRUCT(alpha); - Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output); - - VSTORE(VEC_SIZE) - (VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input.ptr) < 0 - ? VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input.ptr) * - VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)alpha.ptr) - : VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input.ptr), - 0, (__global DATA_TYPE *)output.ptr); -} -#endif // defined(DATA_TYPE) diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/prelu_quantized.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/prelu_quantized.cl deleted file mode 100644 index a66e107d1..000000000 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/prelu_quantized.cl +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "helpers.h" -#define SUB(x, y) (x) - (y) - -#if defined(OFF_IN) && defined(OFF_ALPHA) && defined(OFF_OUT) && defined(SCALE_IN) && \ - defined(SCALE_ALPHA) && defined(SCALE_OUT) && defined(VEC_SIZE) - -#define VEC_FLOAT VEC_DATA_TYPE(float, VEC_SIZE) -#define VEC_INT VEC_DATA_TYPE(int, VEC_SIZE) -#define VEC_UCHAR VEC_DATA_TYPE(uchar, VEC_SIZE) -#define CONVERT_RTE(x, type) (convert_##type##_rte((x))) -#define CONVERT_DOWN(x, type) CONVERT_RTE(x, type) -#define SELECT_TYPE VEC_INT - -/** Returns result of prelu function implemented as below: - * f(input) = alpha * input for input < 0, f(input) = input for input >= 0. - * - * @attention Data type can be passed using the -DDATA_TYPE_IN compile flag, e.g. - * -DDATA_TYPE_IN=uchar - * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. - * -DVEC_SIZE=16 - * @note Can only take uchar data types. - * - * @param[in] input1_ptr Pointer to the source image. Supported Data - * types : QASYMM8 - * @param[in] input1_stride_x Stride of the source image in X dimension (in - * bytes) - * @param[in] input1_step_x input1_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] input1_stride_y Stride of the source image in Y dimension (in - * bytes) - * @param[in] input1_step_y input1_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] input1_stride_z Stride of the source tensor in Z dimension (in - * bytes) - * @param[in] input1_step_z input1_stride_z * number of elements along Z - * processed per workitem(in bytes) - * @param[in] input1_offset_first_element_in_bytes The offset of the first element in the source - * image - * @param[in] alpha_ptr Pointer to the source image. Supported Data - * types : QASYMM8 - * @param[in] alpha_stride_x Stride of the source image in X dimension (in - * bytes) - * @param[in] alpha_step_x input2_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] alpha_stride_y Stride of the source image in Y dimension (in - * bytes) - * @param[in] alpha_step_y input2_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] alpha_stride_z Stride of the source tensor in Z dimension (in - * bytes) - * @param[in] alpha_step_z input2_stride_z * number of elements along Z - * processed per workitem(in bytes) - * @param[in] alpha_offset_first_element_in_bytes The offset of the first element in the source - * image - * @param[out] output_ptr Pointer to the destination image. Supported - * data types: same as @p input_ptr - * @param[in] output_stride_x Stride of the destination image in X dimension - * (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the destination image in Y dimension - * (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] output_stride_z Stride of the source tensor in Z dimension (in - * bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z - * processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes The offset of the first element in the - * destination image - */ -__kernel void prelu_qasymm8(TENSOR3D_DECLARATION(input), TENSOR3D_DECLARATION(alpha), - TENSOR3D_DECLARATION(output)) -{ - // Get pixels pointer - Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input); - Tensor3D alpha = CONVERT_TO_TENSOR3D_STRUCT(alpha); - Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output); - - VEC_INT in_vec = CONVERT(VLOAD(VEC_SIZE)(0, (__global uchar *)input.ptr), VEC_INT); - VEC_INT alpha_vec = CONVERT(VLOAD(VEC_SIZE)(0, (__global uchar *)alpha.ptr), VEC_INT); - - in_vec = SUB(in_vec, (VEC_INT)((int)OFF_IN)); - alpha_vec = SUB(alpha_vec, (VEC_INT)((int)OFF_ALPHA)); - - const VEC_FLOAT inf32 = CONVERT(in_vec, VEC_FLOAT) * (VEC_FLOAT)((float)SCALE_IN); - const VEC_FLOAT alphaf32 = CONVERT(alpha_vec, VEC_FLOAT) * (VEC_FLOAT)((float)SCALE_ALPHA); - const VEC_FLOAT outf32 = - select(inf32, inf32 * alphaf32, CONVERT(inf32 < (VEC_FLOAT)0, SELECT_TYPE)); - const VEC_FLOAT qresf32 = outf32 / ((VEC_FLOAT)(float)SCALE_OUT) + ((VEC_FLOAT)((float)OFF_OUT)); - const VEC_UCHAR res = CONVERT_SAT(CONVERT_DOWN(qresf32, VEC_INT), VEC_UCHAR); - - VSTORE(VEC_SIZE) - (res, 0, (__global uchar *)output.ptr); -} - -#endif // defined(OFF_IN) && defined(OFF_ALPHA) && defined(OFF_OUT) && defined(SCALE_IN) && - // defined(SCALE_ALPHA) && defined(SCALE_OUT) && defined(VEC_SIZE) diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/space_to_depth.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/space_to_depth.cl deleted file mode 100644 index eb612f834..000000000 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/space_to_depth.cl +++ /dev/null @@ -1,185 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016, 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "helpers.h" - -#if defined(DATA_TYPE) && defined(DEPTH_IN) && defined(BLOCK_SIZE) && defined(Z_IN) -/** Perform space to depth rearrangement of tensor - * - * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float - * @attention Input tensor depth should be given as a preprocessor argument using -DDEPTH_IN=size. - * e.g. -DDEPTH_IN=16 - * @attention The value of the z-axis of input tensor depth should be given as a preprocessor - * argument using -DZ_IN=size. e.g. -DZ_IN=16 - * @attention block size should be given as a preprocessor argument using -DBLOCK_SIZE=size. e.g. - * -DBLOCK_SIZE=1 - * - * @param[in] input_ptr Pointer to the source image. Supported data - * types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 - * @param[in] input_stride_x Stride of the source image in X dimension (in - * bytes) - * @param[in] input_step_x input_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] input_stride_y Stride of the source image in Y dimension (in - * bytes) - * @param[in] input_step_y input_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] input_stride_z Stride of the source tensor in Z dimension (in - * bytes) - * @param[in] input_step_z input_stride_z * number of elements along Z - * processed per workitem(in bytes) - * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source - * image - * @param[out] output_ptr Pointer to the destination image. Supported data - * types: same as @p input_ptr - * @param[in] output_stride_x Stride of the destination image in X dimension - * (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the destination image in Y dimension - * (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] output_stride_z Stride of the source tensor in Z dimension (in - * bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z - * processed per workitem(in bytes) - * @param[in] output_stride_w Stride of the source tensor in W dimension (in - * bytes) - * @param[in] output_step_w output_stride_w * number of elements along W - * processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes The offset of the first element in the - * destination image - */ -__kernel void space_to_depth_nchw(TENSOR4D_DECLARATION(input), TENSOR4D_DECLARATION(output)) -{ - Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT(input, Z_IN); - Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(output, 0); - - int out_index[4] = {0}; - int in_index[4] = {0}; - - in_index[0] = get_global_id(0); // W - in_index[1] = get_global_id(1); // H - in_index[2] = get_global_id(2) % Z_IN; // C - in_index[3] = get_global_id(2) / Z_IN; // B - - out_index[0] = in_index[0] / BLOCK_SIZE; - out_index[1] = in_index[1] / BLOCK_SIZE; - out_index[2] = - in_index[2] + ((in_index[1] % BLOCK_SIZE) * BLOCK_SIZE + in_index[0] % BLOCK_SIZE) * DEPTH_IN; - out_index[3] = in_index[3]; - - *((__global DATA_TYPE *)tensor4D_offset(&out, out_index[0], out_index[1], out_index[2], - out_index[3])) = *((__global DATA_TYPE *)in.ptr); -} -#endif // defined(DATA_TYPE) && defined(Z_IN) && defined(BLOCK_SIZE) && defined(Z_IN) - -#if defined(DATA_TYPE) && defined(Z_IN) && defined(BLOCK_SIZE) && defined(Z_IN) -/** Perform space to depth rearrangement of tensor - * - * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float - * @attention Input tensor depth should be given as a preprocessor argument using -DDEPTH_IN=size. - * e.g. -DDEPTH_IN=16 - * @attention The value of the z-axis of input tensor depth should be given as a preprocessor - * argument using -DZ_IN=size. e.g. -DZ_IN=16 - * @attention block size should be given as a preprocessor argument using -DBLOCK_SIZE=size. e.g. - * -DBLOCK_SIZE=1 - * - * @param[in] input_ptr Pointer to the source image. Supported data - * types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 - * @param[in] input_stride_x Stride of the source image in X dimension (in - * bytes) - * @param[in] input_step_x input_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] input_stride_y Stride of the source image in Y dimension (in - * bytes) - * @param[in] input_step_y input_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] input_stride_z Stride of the source tensor in Z dimension (in - * bytes) - * @param[in] input_step_z input_stride_z * number of elements along Z - * processed per workitem(in bytes) - * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source - * image - * @param[out] output_ptr Pointer to the destination image. Supported data - * types: same as @p input_ptr - * @param[in] output_stride_x Stride of the destination image in X dimension - * (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X - * processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the destination image in Y dimension - * (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y - * processed per workitem(in bytes) - * @param[in] output_stride_z Stride of the source tensor in Z dimension (in - * bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z - * processed per workitem(in bytes) - * @param[in] output_stride_w Stride of the source tensor in W dimension (in - * bytes) - * @param[in] output_step_w output_stride_w * number of elements along W - * processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes The offset of the first element in the - * destination image - */ -__kernel void space_to_depth_nhwc(TENSOR4D_DECLARATION(input), TENSOR4D_DECLARATION(output)) -{ - Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT(input, Z_IN); - Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(output, 0); - - int out_index[4] = {0}; - int in_index[4] = {0}; - - in_index[0] = get_global_id(0); // C - in_index[1] = get_global_id(1); // W - in_index[2] = get_global_id(2) % Z_IN; // H - in_index[3] = get_global_id(2) / Z_IN; // B - - out_index[0] = - in_index[0] + ((in_index[2] % BLOCK_SIZE) * BLOCK_SIZE + in_index[1] % BLOCK_SIZE) * DEPTH_IN; - out_index[1] = in_index[1] / BLOCK_SIZE; - out_index[2] = in_index[2] / BLOCK_SIZE; - out_index[3] = in_index[3]; - - *((__global DATA_TYPE *)tensor4D_offset(&out, out_index[0], out_index[1], out_index[2], - out_index[3])) = *((__global DATA_TYPE *)in.ptr); -} -#endif // defined(DATA_TYPE) && defined(DEPTH_IN) && defined(BLOCK_SIZE) && defined(Z_IN) diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLArgOperationKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLArgOperationKernel.cpp deleted file mode 100644 index 06eeb5b98..000000000 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLArgOperationKernel.cpp +++ /dev/null @@ -1,181 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/core/CL/kernels/CLArgOperationKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" -#include "arm_compute/core/CL/ICLTensor.h" - -using namespace arm_compute; - -namespace -{ -const TensorShape inferOutputShape(const TensorShape &input_shape, const uint32_t axis) -{ - TensorShape out_shape{input_shape}; - - out_shape.set(axis, 1); - - return out_shape; -} -} // namespace - -namespace -{ -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const uint32_t axis, - ArgOperation /*op*/) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_NOT_IN(input, DataType::S32, DataType::F32, DataType::U8, - DataType::QASYMM8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_NOT_IN(output, DataType::S32); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG((input->tensor_shape().num_dimensions() - 1) != - output->tensor_shape().num_dimensions(), - "Input's rank is not same with output"); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->tensor_shape().total_size() == 0, - "Inputs are not broadcast compatible"); - - const TensorShape output_shape = inferOutputShape(input->tensor_shape(), axis); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(output_shape.total_size() != output->tensor_shape().total_size(), - "output shape's size does not match axis"); - - const auto num_dimensions = input->tensor_shape().num_dimensions(); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= num_dimensions, "axis must be less than (input's rank)."); - return Status{}; -} - -} // namespace - -CLArgOperationKernel::CLArgOperationKernel() : _input(nullptr), _output(nullptr), _axis() {} - -void CLArgOperationKernel::configure(const ICLTensor *input, ICLTensor *output, const uint32_t axis, - ArgOperation op) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), axis, op)); - - _input = input; - _output = output; - _axis = axis; - - std::unique_ptr<ITensorInfo> output_info = output->info()->clone(); - output_info->set_tensor_shape(inferOutputShape(input->info()->tensor_shape(), axis)); - - // Construct kernel and set op_code based on type of ArgOperation as specified by object op - std::string kernel_name = "arg_op"; - int op_code = 0; - if (op == ArgOperation::MAX) - { - op_code = 1; - } - else if (op == ArgOperation::MIN) - { - op_code = 2; - } - else - throw std::runtime_error("Operation not supported, yet"); - - // Set kernel build options - std::set<std::string> build_opts; - build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())); - build_opts.emplace("-DDEPTH_OUT=" + support::cpp11::to_string(output_info->dimension(2))); - build_opts.emplace("-DOP_CODE=" + support::cpp11::to_string(op_code)); - - // Create kernel - _kernel = - static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts)); - - // Configure kernel window - Window win = calculate_max_window(*output_info, Steps()); - - Coordinates coord; - coord.set_num_dimensions(output_info->num_dimensions()); - output->info()->set_valid_region(ValidRegion(coord, output_info->tensor_shape())); - - ICLKernel::configure_internal(win); -} - -Status CLArgOperationKernel::validate(const ITensorInfo *input, const ITensorInfo *output, - const uint32_t axis, ArgOperation op) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, axis, op)); - - return Status{}; -} - -void CLArgOperationKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - const TensorShape &shape_in = _input->info()->tensor_shape(); - - unsigned int idx = 2 * num_arguments_per_4D_tensor(); // Skip the input and output parameters - - _kernel.setArg<cl_int>(idx++, _axis); - _kernel.setArg<cl_int>(idx++, shape_in[_axis]); - - Window slice_out = window.first_slice_window_4D().collapse(ICLKernel::window(), 2, 4); - - // Setup input slice - Window slice_in(slice_out); - slice_in.set(Window::DimX, Window::Dimension(0, 0, 0)); - slice_in.set(Window::DimY, Window::Dimension(0, 0, 0)); - slice_in.set(Window::DimZ, Window::Dimension(0, 0, 0)); - slice_in.set(3, Window::Dimension(0, 0, 0)); - - // Copy output's shape in order to use for recovering at end of this method - const TensorShape shape_out = _output->info()->tensor_shape(); - _output->info()->set_tensor_shape(inferOutputShape(shape_in, _axis)); - - do - { - unsigned int idx = 0; - add_4D_tensor_argument(idx, _input, slice_in); - add_4D_tensor_argument(idx, _output, slice_out); - enqueue(queue, *this, slice_out); - } while (window.slide_window_slice_4D(slice_in) && window.slide_window_slice_4D(slice_out)); - - // Recover output's shape of output tensor - _output->info()->set_tensor_shape(shape_out); -} diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLBinaryLogicalOpKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLBinaryLogicalOpKernel.cpp index bb5556888..fbc76f5e1 100644 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLBinaryLogicalOpKernel.cpp +++ b/compute/ARMComputeEx/src/core/CL/kernels/CLBinaryLogicalOpKernel.cpp @@ -43,6 +43,7 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibraryEx.h" #include "arm_compute/core/CL/ICLTensor.h" +#include "support/StringSupport.h" using namespace arm_compute; diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLCastKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLCastKernel.cpp deleted file mode 100644 index 01ea655b4..000000000 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLCastKernel.cpp +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/core/CL/kernels/CLCastKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" -#include "arm_compute/core/CL/ICLTensor.h" - -using namespace arm_compute; - -CLCastKernel::CLCastKernel() : _input(nullptr), _output(nullptr) {} - -void CLCastKernel::configure(const ICLTensor *input, ICLTensor *output, SubDataType input_subtype) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8, - DataType::S16, DataType::S32, DataType::F16, - DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::QASYMM8, - DataType::S16, DataType::S32, DataType::F16, - DataType::F32); - ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output); - - _input = input; - _output = output; - - constexpr unsigned int num_elems_processed_per_iteration = 16; - - // Set kernel build options - CLBuildOptions build_opts; - build_opts.add_option("-DDATA_TYPE_IN=" + get_cl_type_from_data_type(input->info()->data_type())); - build_opts.add_option("-DDATA_TYPE_OUT=" + - get_cl_type_from_data_type(output->info()->data_type())); - build_opts.add_option( - ("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration))); - - // Create kernel - if (is_data_type_quantized_asymmetric(input->info()->data_type())) - { - UniformQuantizationInfo qinfo = input->info()->quantization_info().uniform(); - const float scale_in = qinfo.scale; - const int offset_in = qinfo.offset; - build_opts.add_option("-DSCALE=" + float_to_string_with_full_precision(scale_in)); - build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(offset_in)); - - _kernel = static_cast<cl::Kernel>( - CLKernelLibraryEx::get().create_kernel("cast_qasymm_in", build_opts.options())); - } - else if (is_data_type_quantized_asymmetric(output->info()->data_type())) - { - UniformQuantizationInfo qinfo = output->info()->quantization_info().uniform(); - const float scale_in = qinfo.scale; - const float offset_in = qinfo.offset; - - build_opts.add_option("-DSCALE=" + float_to_string_with_full_precision(scale_in)); - build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(offset_in)); - - _kernel = static_cast<cl::Kernel>( - CLKernelLibraryEx::get().create_kernel("cast_qasymm_out", build_opts.options())); - } - else - { - build_opts.add_option_if(input_subtype == SubDataType::BOOL, "-DBOOL_INPUT"); - _kernel = static_cast<cl::Kernel>( - CLKernelLibraryEx::get().create_kernel("cast", build_opts.options())); - } - - // Configure kernel window - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); - AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - update_window_and_padding(win, input_access, output_access); - output_access.set_valid_region(win, input->info()->valid_region()); - - ICLKernel::configure_internal(win); -} - -void CLCastKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - Window collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ); - Window slice = collapsed.first_slice_window_3D(); - - do - { - unsigned int idx = 0; - add_3D_tensor_argument(idx, _input, slice); - add_3D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice, lws_hint()); - } while (collapsed.slide_window_slice_3D(slice)); -} diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLDepthToSpaceKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLDepthToSpaceKernel.cpp deleted file mode 100644 index 389136817..000000000 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLDepthToSpaceKernel.cpp +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" -#include "arm_compute/core/CL/ICLTensor.h" - -using namespace arm_compute; - -namespace -{ -// TODO Use this validation function -#if 0 -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, - const int32_t block_size) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8, - DataType::S16, DataType::S32, DataType::F16, - DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::QASYMM8, - DataType::S16, DataType::S32, DataType::F16, - DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(block_size < 1, - "Block size should be greater than or equal to 1."); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(0) != input->dimension(0) * block_size, - "Output width should be equal to (Input width * block size)"); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(1) != input->dimension(1) * block_size, - "Output height should be equal to (Input height * block size)"); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->dimension(2) % (block_size * block_size) != 0, - "Input depth should be divisible by (block size * block size)"); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG( - output->dimension(2) != input->dimension(2) / (block_size * block_size), - "Output depth should be equal to (Input depth / (block size * block size))"); - - return Status{}; -} -#endif -} // namespace - -CLDepthToSpaceKernel::CLDepthToSpaceKernel() : _input(nullptr), _output(nullptr) -{ - // DO NOTHING -} - -void CLDepthToSpaceKernel::configure(const ICLTensor *input, ICLTensor *output, - const int32_t block_size) -{ - // TODO Add validation of data_layout - _input = input; - _output = output; - - // Set kernel build options - auto layout_out = output->info()->data_layout(); - std::set<std::string> build_opts; - build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())); - build_opts.emplace("-DBLOCK_SIZE=" + support::cpp11::to_string(block_size)); - auto index_depth = get_data_layout_dimension_index(layout_out, DataLayoutDimension::CHANNEL); - auto depth = output->info()->dimension(index_depth); - build_opts.emplace("-DDEPTH_OUT=" + support::cpp11::to_string(depth)); - build_opts.emplace("-DZ_OUT=" + support::cpp11::to_string(output->info()->tensor_shape().z())); - - // Create kernel - _kernel = static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel( - "depth_to_space_" + lower_string(string_from_data_layout(layout_out)), build_opts)); - - // Configure kernel window - Window win = calculate_max_window(*output->info(), Steps()); - - Coordinates coord; - coord.set_num_dimensions(output->info()->num_dimensions()); - output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape())); - - ICLKernel::configure_internal(win); -} - -void CLDepthToSpaceKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window); - - Window slice_out = window.first_slice_window_4D().collapse(ICLKernel::window(), 2, 4); - - // Setup input slice - Window slice_in(slice_out); - slice_in.set(Window::DimX, Window::Dimension(0, 0, 0)); - slice_in.set(Window::DimY, Window::Dimension(0, 0, 0)); - slice_in.set(Window::DimZ, Window::Dimension(0, 0, 0)); - slice_in.set(3, Window::Dimension(0, 0, 0)); - - do - { - unsigned int idx = 0; - add_4D_tensor_argument(idx, _input, slice_in); - add_4D_tensor_argument(idx, _output, slice_out); - enqueue(queue, *this, slice_out); - } while (window.slide_window_slice_4D(slice_in) && window.slide_window_slice_4D(slice_out)); -} diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLEmbeddingLookupKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLEmbeddingLookupKernel.cpp index 79f5ce065..67aaf2db6 100644 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLEmbeddingLookupKernel.cpp +++ b/compute/ARMComputeEx/src/core/CL/kernels/CLEmbeddingLookupKernel.cpp @@ -43,6 +43,7 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibraryEx.h" #include "arm_compute/core/CL/ICLTensor.h" +#include "support/StringSupport.h" using namespace arm_compute; diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernelEx.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernelEx.cpp deleted file mode 100644 index 235e8975d..000000000 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernelEx.cpp +++ /dev/null @@ -1,372 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernelEx.h" - -#include "arm_compute/core/AccessWindowStatic.h" -#include "arm_compute/core/AccessWindowTranspose.h" -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "support/ToolchainSupport.h" - -#include <cstddef> -#include <cstdint> -#include <tuple> - -using namespace arm_compute; -using namespace arm_compute::misc::shape_calculator; - -namespace arm_compute -{ -class Coordinates; -} // namespace arm_compute - -namespace -{ -using ElementsProcessed = Steps; - -Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *input1, - const ITensorInfo *output, const GEMMReshapeInfo &gemm_info) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input0, input1, output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input0, 1, DataType::S8); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input0, input1); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input0->num_dimensions() > 4, - "The number of dimensions for the matrix A must be <= 4"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input1->num_dimensions() > 3, - "The number of dimensions for the matrix B must be <= 3"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input1->num_dimensions() > 2 && - gemm_info.reinterpret_input_as_3d(), - "The input1 tensor cannot have more than 2 dimensions if input0 " - "has to be reinterpreted as 3D"); - - const int m = gemm_info.m(); - const int n = gemm_info.n(); - const int k = gemm_info.k(); - - ARM_COMPUTE_UNUSED(m); - ARM_COMPUTE_UNUSED(n); - ARM_COMPUTE_UNUSED(k); - - ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(0) != static_cast<unsigned int>(k)); - ARM_COMPUTE_RETURN_ERROR_ON(input1->dimension(0) != static_cast<unsigned int>(n)); - ARM_COMPUTE_RETURN_ERROR_ON(input1->dimension(1) != static_cast<unsigned int>(k)); - if (gemm_info.reinterpret_input_as_3d()) - { - ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(1) * input0->dimension(2) != - static_cast<unsigned int>(m)); - } - else - { - ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(1) != static_cast<unsigned int>(m)); - } - - if (output->total_size() != 0) - { - const TensorInfo tensor_info_output = - output->clone()->set_tensor_shape(compute_mm_shape(*input0, *input1, false, gemm_info)); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &tensor_info_output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::S32); - } - - return Status{}; -} - -std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input0, ITensorInfo *input1, - ITensorInfo *output, - const GEMMReshapeInfo &gemm_info, - ElementsProcessed &num_elements_processed) -{ - unsigned int &num_elems_processed_per_iteration_x = num_elements_processed[0]; - unsigned int &num_elems_processed_per_iteration_y = num_elements_processed[1]; - bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d(); - bool reinterpret_output_as_3d = (gemm_info.depth_output_gemm3d() != 0); - - Window win{}; - Window win_out{}; - bool window_changed = false; - - // In case both input and output have to be reinterpreted as 3D tensors, - // force reinterpret_input_as_3d and reinterpret_output_as_3d to be false. - if (reinterpret_input_as_3d == reinterpret_output_as_3d) - { - reinterpret_input_as_3d = false; - reinterpret_output_as_3d = false; - } - - // Output tensor auto inizialitation if not yet initialized - auto_init_if_empty(*output, - input0->clone() - ->set_tensor_shape(compute_mm_shape(*input0, *input1, false, gemm_info)) - .set_data_type(DataType::S32)); - - TensorInfo tmp_info(*output); - - if (reinterpret_output_as_3d) - { - // Since the output tensor has to be reinterpreted as 3D and the execute window is based on a 2D - // GEMM, - // the window needs to be constructed on the 2D collapsed version of the tensor - TensorShape tmp_shape(output->tensor_shape()); - tmp_shape.collapse(2U, 1U); - tmp_info.set_tensor_shape(tmp_shape); - } - - // Special case for 1xN, 2xN, 3xN and 4xN input0 tensor. num_elems_processed_per_iteration_x - // Note: if the dot product instruction is available, the 8x2 tile has to be used - num_elems_processed_per_iteration_x = 4; - num_elems_processed_per_iteration_y = std::min(static_cast<int>(output->dimension(1)), 4); - - // Note: bottom paddings are calculated manually as the output can be reinterpreted as 3D tensor - // The only way to set properly the paddings, it is to set those explicitly through the - // AccessWindowStatic - const int m = reinterpret_input_as_3d ? input0->tensor_shape()[1] * input0->tensor_shape()[2] - : input0->tensor_shape()[1]; - const int bottom_pad = - (num_elems_processed_per_iteration_y - (m % num_elems_processed_per_iteration_y)) % - num_elems_processed_per_iteration_y; - - // Configure window - win = calculate_max_window( - tmp_info, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y)); - win_out = calculate_max_window( - *output, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y)); - - AccessWindowStatic input0_access(input0, 0, 0, input0->dimension(0), - input0->dimension(1) + bottom_pad); - AccessWindowStatic input1_access( - input1, 0, 0, ceil_to_multiple(input1->dimension(0), num_elems_processed_per_iteration_x), - input1->dimension(1)); - AccessWindowStatic output_access( - output, 0, 0, ceil_to_multiple(output->dimension(0), num_elems_processed_per_iteration_x), - output->dimension(1) + bottom_pad); - - window_changed = - update_window_and_padding(win, input0_access, - input1_access) || // window used by the execute_window_loop - update_window_and_padding( - win_out, - output_access); // window used to update the padding requirements of output tensor - - Coordinates coord; - coord.set_num_dimensions(output->num_dimensions()); - output_access.set_valid_region(win_out, ValidRegion(coord, output->tensor_shape())); - - // Collapse along the Z direction - // This collapse needs to be here in order to tune the Z dimension of LWS - Window collapsed = win; - const unsigned int dimension_to_collapse = - std::min(static_cast<unsigned int>(output->num_dimensions()), 2u); - collapsed = win.collapse(win, dimension_to_collapse); - - Status err = (window_changed) - ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") - : Status{}; - return std::make_pair(err, collapsed); -} -} // namespace - -CLGEMMLowpMatrixMultiplyKernelEx::CLGEMMLowpMatrixMultiplyKernelEx() - : _input0(nullptr), _input1(nullptr), _output(nullptr), _slide_matrix_b(true), - _reinterpret_input_as_3d(false), _reinterpret_output_as_3d(false) -{ -} - -void CLGEMMLowpMatrixMultiplyKernelEx::configure(const ICLTensor *input0, const ICLTensor *input1, - ICLTensor *output, - const GEMMReshapeInfo &gemm_info) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input0, input1, output); - - ARM_COMPUTE_ERROR_THROW_ON( - validate_arguments(input0->info(), input1->info(), output->info(), gemm_info)); - - _input0 = input0; - _input1 = input1; - _output = output; - _reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d(); - _reinterpret_output_as_3d = (gemm_info.depth_output_gemm3d() != 0); - - // In case both input and output have to be reinterpreted as 3D tensors, - // force reinterpret_input_as_3d and reinterpret_output_as_3d to be false. - if (_reinterpret_input_as_3d == _reinterpret_output_as_3d) - { - _reinterpret_input_as_3d = false; - _reinterpret_output_as_3d = false; - } - - // Check if we need to slide the matrix B - const unsigned int num_dimensions_input0 = _reinterpret_input_as_3d - ? _input0->info()->num_dimensions() - 1 - : _input0->info()->num_dimensions(); - _slide_matrix_b = (_input1->info()->num_dimensions() >= num_dimensions_input0); - - ElementsProcessed num_elements_processed{}; - - // Configure kernel window - auto win_config = validate_and_configure_window(input0->info(), input1->info(), output->info(), - gemm_info, num_elements_processed); - ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure_internal(win_config.second); - - // Create build options - std::string kernel_name(" "); - CLBuildOptions build_opts; - build_opts.add_option_if(_reinterpret_input_as_3d, "-DREINTERPRET_INPUT_AS_3D"); - build_opts.add_option_if(_reinterpret_output_as_3d, "-DREINTERPRET_OUTPUT_AS_3D"); - build_opts.add_option_if(_reinterpret_input_as_3d || _reinterpret_output_as_3d, - "-DHEIGHT_GEMM3D=" + - support::cpp11::to_string(output->info()->dimension(1))); - build_opts.add_option_if(_reinterpret_input_as_3d || _reinterpret_output_as_3d, - "-DDEPTH_GEMM3D=" + - support::cpp11::to_string(output->info()->dimension(2))); - build_opts.add_option_if(!_slide_matrix_b, - "-DMATRIX_B_DEPTH=" + - support::cpp11::to_string(input1->info()->dimension(2))); - build_opts.add_option("-DCOLS_A=" + support::cpp11::to_string(input0->info()->dimension(0))); - build_opts.add_option("-DNUM_ELEMS_PROCESSED_PER_THREAD_X=" + - support::cpp11::to_string(num_elements_processed.x())); - build_opts.add_option("-DNUM_ELEMS_PROCESSED_PER_THREAD_Y=" + - support::cpp11::to_string(num_elements_processed.y())); - - kernel_name = "gemmlowp_mm_midgard_ex"; - - // Create kernel - _kernel = static_cast<cl::Kernel>( - CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts.options())); - - // Set config_id for enabling LWS tuning - _config_id = kernel_name; - _config_id += "_"; - _config_id += (_reinterpret_input_as_3d ? "3di_" : ""); - _config_id += (_reinterpret_output_as_3d ? "3do_" : ""); - _config_id += lower_string(string_from_data_type(input0->info()->data_type())); - _config_id += "_"; - _config_id += support::cpp11::to_string(output->info()->dimension(1)); - _config_id += "_"; - _config_id += support::cpp11::to_string(output->info()->dimension(0)); -} - -Status CLGEMMLowpMatrixMultiplyKernelEx::validate(const ITensorInfo *input0, - const ITensorInfo *input1, - const ITensorInfo *output, - const GEMMReshapeInfo &gemm_info) -{ - ElementsProcessed num_elements_processed{}; - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input0, input1, output, gemm_info)); - ARM_COMPUTE_RETURN_ON_ERROR( - validate_and_configure_window(input0->clone().get(), input1->clone().get(), - output->clone().get(), gemm_info, num_elements_processed) - .first); - - return Status{}; -} - -void CLGEMMLowpMatrixMultiplyKernelEx::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - if (_input1->info()->num_dimensions() < 3) - { - // The stride_z for matrix B must be zero if we do not slice - ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != 0); - } - - Window slice = window.first_slice_window_3D(); - Window slice_matrix_b = slice; - - slice_matrix_b.set(Window::DimX, Window::Dimension(0, 1, 1)); - slice_matrix_b.set(Window::DimY, Window::Dimension(0, 1, 1)); - - if (_reinterpret_input_as_3d) - { - // Pass bottom paddings to the kernel if the input has to be reinterpreted as 3D tensor - const unsigned int idx0 = 3 * num_arguments_per_2D_tensor() + 3; - const unsigned int total_cross_plane_pad = - _input0->info()->padding().top + _input0->info()->padding().bottom; - _kernel.setArg<cl_uint>(idx0, static_cast<unsigned int>(total_cross_plane_pad)); - } - - if (_reinterpret_output_as_3d) - { - // Pass bottom paddings to the kernel if the output has to be reinterpreted as 3D tensor - const unsigned int idx0 = - 3 * num_arguments_per_2D_tensor() + 3 + (_reinterpret_input_as_3d ? 1 : 0); - const unsigned int total_cross_plane_pad = - _output->info()->padding().top + _output->info()->padding().bottom; - _kernel.setArg<cl_uint>(idx0, static_cast<unsigned int>(total_cross_plane_pad)); - } - - do - { - Window slice_b = slice; - // Don't slice matrix B along the z dimension if matrix B has just 2 dimensions and matrix A - // more than 2 - // This scenario can happen when the matrix multiplication is used to perform a convolution - // operation - if (!_slide_matrix_b) - { - slice_b = slice_matrix_b; - } - - unsigned int idx = 0; - add_2D_tensor_argument(idx, _input0, slice); - add_2D_tensor_argument(idx, _input1, slice_b); - add_2D_tensor_argument(idx, _output, slice); - _kernel.setArg<cl_uint>(idx++, - static_cast<unsigned int>(_input0->info()->strides_in_bytes()[2])); - _kernel.setArg<cl_uint>(idx++, - static_cast<unsigned int>(_input1->info()->strides_in_bytes()[2])); - _kernel.setArg<cl_uint>(idx++, - static_cast<unsigned int>(_output->info()->strides_in_bytes()[2])); - enqueue(queue, *this, slice, lws_hint()); - } while (window.slide_window_slice_3D(slice)); -} diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLGatherExKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLGatherExKernel.cpp index 3a25987d0..3bfe3e407 100644 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLGatherExKernel.cpp +++ b/compute/ARMComputeEx/src/core/CL/kernels/CLGatherExKernel.cpp @@ -45,6 +45,7 @@ #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/utils/misc/ShapeCalculatorEx.h" #include "arm_compute/core/UtilsEx.h" +#include "support/StringSupport.h" using namespace arm_compute; diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLHashtableLookupKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLHashtableLookupKernel.cpp index 7fbdcdaa7..930e7c944 100644 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLHashtableLookupKernel.cpp +++ b/compute/ARMComputeEx/src/core/CL/kernels/CLHashtableLookupKernel.cpp @@ -43,6 +43,7 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibraryEx.h" #include "arm_compute/core/CL/ICLTensor.h" +#include "support/StringSupport.h" using namespace arm_compute; @@ -110,7 +111,7 @@ void CLHashtableLookupKernel::configure(const ICLTensor *lookups, const ICLTenso _hits = hits; // Make _lookup_indices tensor - _lookup_indices = arm_compute::support::cpp14::make_unique<CLTensor>(); + _lookup_indices = support::cpp14::make_unique<CLTensor>(); _lookup_indices->allocator()->init( TensorInfo(lookups->info()->tensor_shape(), lookups->info()->num_channels(), DataType::S32)); _lookup_indices->allocator()->allocate(); diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.cpp index b45f6bb24..61c14d271 100644 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.cpp +++ b/compute/ARMComputeEx/src/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.cpp @@ -48,7 +48,7 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Window.h" - +#include "support/StringSupport.h" #include "support/ToolchainSupport.h" namespace arm_compute diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLMultiplyScaleFactorKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLMultiplyScaleFactorKernel.cpp index d305896ea..6b27c9917 100644 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLMultiplyScaleFactorKernel.cpp +++ b/compute/ARMComputeEx/src/core/CL/kernels/CLMultiplyScaleFactorKernel.cpp @@ -49,6 +49,7 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "support/StringSupport.h" using namespace arm_compute; diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLNegKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLNegKernel.cpp index 74f7b4158..643c8b110 100644 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLNegKernel.cpp +++ b/compute/ARMComputeEx/src/core/CL/kernels/CLNegKernel.cpp @@ -43,6 +43,7 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibraryEx.h" #include "arm_compute/core/CL/ICLTensor.h" +#include "support/StringSupport.h" using namespace arm_compute; diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLPReLUKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLPReLUKernel.cpp deleted file mode 100644 index 8910a7b80..000000000 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLPReLUKernel.cpp +++ /dev/null @@ -1,210 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/core/CL/kernels/CLPReLUKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" -#include "arm_compute/core/CL/ICLTensor.h" - -using namespace arm_compute; - -namespace -{ -constexpr unsigned int num_elems_processed_per_iteration = 16; - -Status validate_info(const ITensorInfo *input, const ITensorInfo *alpha, const ITensorInfo *output) -{ - const TensorShape &out_shape = - TensorShape::broadcast_shape(input->tensor_shape(), alpha->tensor_shape()); - - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32, - DataType::QASYMM8); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(alpha, 1, DataType::F16, DataType::F32, - DataType::QASYMM8); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0, - "Inputs are not broadcast compatible"); - // Validate in case of configured output - if (output->total_size() > 0) - { - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F16, DataType::F32, - DataType::QASYMM8); - ARM_COMPUTE_RETURN_ERROR_ON_MSG( - detail::have_different_dimensions(out_shape, output->tensor_shape(), 0), - "Wrong shape for output"); - } - return Status{}; -} -} // namespace - -CLPReLUKernel::CLPReLUKernel() : _input(nullptr), _alpha(nullptr), _output(nullptr) {} - -void CLPReLUKernel::configure(const ICLTensor *input, const ICLTensor *alpha, ICLTensor *output) -{ - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, alpha); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_info(input->info(), alpha->info(), output->info())); - - _input = input; - _alpha = alpha; - _output = output; - - // Create kernel - std::string kernel_name = "prelu"; - std::set<std::string> build_opts; - build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()))); - build_opts.emplace( - ("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration))); - - if (is_data_type_quantized_asymmetric(input->info()->data_type())) - { - build_opts.emplace("-DOFF_IN=" + support::cpp11::to_string( - input->info()->quantization_info().uniform().offset)); - build_opts.emplace("-DOFF_ALPHA=" + support::cpp11::to_string( - alpha->info()->quantization_info().uniform().offset)); - build_opts.emplace("-DOFF_OUT=" + support::cpp11::to_string( - output->info()->quantization_info().uniform().offset)); - build_opts.emplace("-DSCALE_IN=" + support::cpp11::to_string( - input->info()->quantization_info().uniform().scale)); - build_opts.emplace("-DSCALE_ALPHA=" + support::cpp11::to_string( - alpha->info()->quantization_info().uniform().scale)); - build_opts.emplace("-DSCALE_OUT=" + support::cpp11::to_string( - output->info()->quantization_info().uniform().scale)); - kernel_name += "_qasymm8"; - } - _kernel = - static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts)); - - const std::pair<TensorShape, ValidRegion> broadcast_pair = - ITensorInfo::broadcast_shape_and_valid_region(*input->info(), *alpha->info()); - - const TensorShape &out_shape = broadcast_pair.first; - const ValidRegion &valid_region = broadcast_pair.second; - - // Auto initialize output if not initialized - { - set_shape_if_empty(*output->info(), out_shape); - - if (input->info()->data_type() == DataType::F16 && alpha->info()->data_type() == DataType::F16) - { - set_format_if_unknown(*output->info(), Format::F16); - } - else if (input->info()->data_type() == DataType::F32 || - alpha->info()->data_type() == DataType::F32) - { - set_format_if_unknown(*output->info(), Format::F32); - } - } - - Window win = calculate_max_window(valid_region, Steps(num_elems_processed_per_iteration)); - Window win_input1 = win.broadcast_if_dimension_le_one(*input->info()); - Window win_input2 = win.broadcast_if_dimension_le_one(*alpha->info()); - - AccessWindowHorizontal input1_access(input->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal input2_access(alpha->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - - update_window_and_padding(win_input1, input1_access) || - update_window_and_padding(win_input2, input2_access) || - update_window_and_padding(win, output_access); - - output_access.set_valid_region(win, valid_region); - - ICLKernel::configure_internal(win); -} - -void CLPReLUKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - const TensorShape &in_shape1 = _input->info()->tensor_shape(); - const TensorShape &in_shape2 = _alpha->info()->tensor_shape(); - const TensorShape &out_shape = _output->info()->tensor_shape(); - - bool can_collapse = true; - if (std::min(in_shape1.total_size(), in_shape2.total_size()) > 1) - { - can_collapse = - (std::min(in_shape1.num_dimensions(), in_shape2.num_dimensions()) > Window::DimZ); - for (size_t d = Window::DimZ; can_collapse && (d < out_shape.num_dimensions()); d++) - { - can_collapse = (in_shape1[d] == in_shape2[d]); - } - } - - bool has_collapsed = false; - Window collapsed = - can_collapse ? window.collapse_if_possible(ICLKernel::window(), Window::DimZ, &has_collapsed) - : window; - - const TensorShape &in_shape1_collapsed = - has_collapsed ? in_shape1.collapsed_from(Window::DimZ) : in_shape1; - const TensorShape &in_shape2_collapsed = - has_collapsed ? in_shape2.collapsed_from(Window::DimZ) : in_shape2; - - Window slice = collapsed.first_slice_window_3D(); - Window slice_input1 = slice.broadcast_if_dimension_le_one(in_shape1_collapsed); - Window slice_input2 = slice.broadcast_if_dimension_le_one(in_shape2_collapsed); - - do - { - unsigned int idx = 0; - add_3D_tensor_argument(idx, _input, slice_input1); - add_3D_tensor_argument(idx, _alpha, slice_input2); - add_3D_tensor_argument(idx, _output, slice); - - enqueue(queue, *this, slice); - - collapsed.slide_window_slice_3D(slice_input1); - collapsed.slide_window_slice_3D(slice_input2); - } while (collapsed.slide_window_slice_3D(slice)); -} - -BorderSize CLPReLUKernel::border_size() const -{ - const unsigned int replicateSize = - _output->info()->dimension(0) - - std::min(_input->info()->dimension(0), _alpha->info()->dimension(0)); - const unsigned int border = - std::min<unsigned int>(num_elems_processed_per_iteration - 1U, replicateSize); - return BorderSize(0, border, 0, 0); -} diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLQuantizationSymmetricKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLQuantizationSymmetricKernel.cpp index 2d551f654..1a7a18cfa 100644 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLQuantizationSymmetricKernel.cpp +++ b/compute/ARMComputeEx/src/core/CL/kernels/CLQuantizationSymmetricKernel.cpp @@ -49,6 +49,7 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "support/StringSupport.h" namespace arm_compute { @@ -69,7 +70,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *scale_fac // Output must always be initialized ARM_COMPUTE_RETURN_ERROR_ON(output->tensor_shape().total_size() == 0); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::S8); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QASYMM8_SIGNED); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); return Status{}; diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLReduceOperationKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLReduceOperationKernel.cpp index a98318323..06c2579f2 100644 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLReduceOperationKernel.cpp +++ b/compute/ARMComputeEx/src/core/CL/kernels/CLReduceOperationKernel.cpp @@ -43,6 +43,7 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibraryEx.h" #include "arm_compute/core/CL/ICLTensor.h" +#include "support/StringSupport.h" using namespace arm_compute; namespace diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLScaleFactorSymm8Kernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLScaleFactorSymm8Kernel.cpp index ff1904abd..8d8853c81 100644 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLScaleFactorSymm8Kernel.cpp +++ b/compute/ARMComputeEx/src/core/CL/kernels/CLScaleFactorSymm8Kernel.cpp @@ -48,6 +48,7 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "support/StringSupport.h" #include <climits> diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLSpaceToDepthKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLSpaceToDepthKernel.cpp deleted file mode 100644 index 64fc0384e..000000000 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLSpaceToDepthKernel.cpp +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" -#include "arm_compute/core/CL/ICLTensor.h" - -using namespace arm_compute; - -namespace -{ -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, - const int32_t block_size) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8, - DataType::S16, DataType::S32, DataType::F16, - DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::QASYMM8, - DataType::S16, DataType::S32, DataType::F16, - DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(block_size < 1, - "Block size should be greater than or equal to 1."); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->dimension(3) != output->dimension(3), - "Input batch should be equal to Output batch"); - - auto layout_out = input->data_layout(); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output); - - auto index_depth = get_data_layout_dimension_index(layout_out, DataLayoutDimension::CHANNEL); - auto index_height = get_data_layout_dimension_index(layout_out, DataLayoutDimension::HEIGHT); - auto index_width = get_data_layout_dimension_index(layout_out, DataLayoutDimension::WIDTH); - ARM_COMPUTE_RETURN_ERROR_ON_MSG( - input->dimension(index_depth) * block_size * block_size != output->dimension(index_depth), - "Output depth should be equal to (input depth * block size *block size)"); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG((input->dimension(index_width) % block_size) || - (input->dimension(index_height) % block_size), - "Input height and width should be divisible by block size"); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG( - (output->dimension(index_width) != (input->dimension(index_width) / block_size)) || - (output->dimension(index_height) != (input->dimension(index_height) / block_size)), - "Output height and width should be equal to " - "input_height/blocksize and input_width/blocksize respectively"); - - return Status{}; -} - -} // namespace - -CLSpaceToDepthKernel::CLSpaceToDepthKernel() : _input(nullptr), _output(nullptr) {} - -void CLSpaceToDepthKernel::configure(const ICLTensor *input, ICLTensor *output, - const int32_t block_size) -{ - - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), block_size)); - - _input = input; - _output = output; - - // Set kernel build options - auto layout_out = input->info()->data_layout(); - std::set<std::string> build_opts; - build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())); - build_opts.emplace("-DBLOCK_SIZE=" + support::cpp11::to_string(block_size)); - auto index_depth = get_data_layout_dimension_index(layout_out, DataLayoutDimension::CHANNEL); - auto depth = input->info()->dimension(index_depth); - build_opts.emplace("-DDEPTH_IN=" + support::cpp11::to_string(depth)); - build_opts.emplace("-DZ_IN=" + support::cpp11::to_string(input->info()->tensor_shape().z())); - - // Create kernel - _kernel = static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel( - "space_to_depth_" + lower_string(string_from_data_layout(layout_out)), build_opts)); - - // Configure kernel window - Window win = calculate_max_window(*input->info(), Steps()); - - Coordinates coord; - coord.set_num_dimensions(output->info()->num_dimensions()); - output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape())); - - ICLKernel::configure_internal(win); -} - -void CLSpaceToDepthKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window); - - Window slice_in = window.first_slice_window_4D().collapse(ICLKernel::window(), 2, 4); - - // Setup output slice - Window slice_out(slice_in); - slice_out.set(Window::DimX, Window::Dimension(0, 0, 0)); - slice_out.set(Window::DimY, Window::Dimension(0, 0, 0)); - slice_out.set(Window::DimZ, Window::Dimension(0, 0, 0)); - slice_out.set(3, Window::Dimension(0, 0, 0)); - - do - { - unsigned int idx = 0; - add_4D_tensor_argument(idx, _input, slice_in); - add_4D_tensor_argument(idx, _output, slice_out); - enqueue(queue, *this, slice_in); - } while (window.slide_window_slice_4D(slice_in) && window.slide_window_slice_4D(slice_out)); -} diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.cpp deleted file mode 100644 index 61999cbd4..000000000 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.cpp +++ /dev/null @@ -1,188 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLValidate.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" - -using namespace arm_compute; - -CLTransposeConvLayerUpsampleKernel::CLTransposeConvLayerUpsampleKernel() - : _input(nullptr), _output(nullptr), _inner_border(), _info() -{ -} - -Status CLTransposeConvLayerUpsampleKernel::validate(const ITensorInfo *input, - const ITensorInfo *output, - const BorderSize &inner_border, - const PadStrideInfo &info) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, - DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output); - - const DataLayout data_layout = input->data_layout(); - - const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); - const size_t idx_c = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); - - ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(idx_w) == 0); - ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(idx_h) == 0); - - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(idx_c) != output->dimension(idx_c)); - for (size_t i = 3; i < Coordinates::num_max_dimensions; ++i) - { - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(i) != output->dimension(i)); - } - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(inner_border.right > info.stride().first - 1, - "inner_border_right must be smaller that stride_x"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(inner_border.top > info.stride().second - 1, - "inner_border_top must be smaller that stride_y"); - - return Status{}; -} - -void CLTransposeConvLayerUpsampleKernel::configure(const ICLTensor *input, ICLTensor *output, - const BorderSize &inner_border, - const PadStrideInfo &info) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - - _input = input; - _output = output; - _inner_border = inner_border; - _info = info; - - // Perform validation step - ARM_COMPUTE_ERROR_THROW_ON(CLTransposeConvLayerUpsampleKernel::validate( - input->info(), output->info(), inner_border, info)); - - // Create kernel - CLBuildOptions build_opts; - build_opts.add_option(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()))); - _kernel = static_cast<cl::Kernel>( - CLKernelLibrary::get().create_kernel("deconvolution_upsample", build_opts.options())); - - constexpr unsigned int num_elems_processed_per_iteration = 1; - - // Configure kernel window - Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration)); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape())); - - ICLKernel::configure_internal(win); -} - -void CLTransposeConvLayerUpsampleKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - const DataLayout data_layout = _input->info()->data_layout(); - - const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); - - const int out_start_x = _info.pad_left(); - const int out_end_x = _output->info()->dimension(idx_w) - _inner_border.right - - _info.pad_right() + _info.stride().first - 1; - const int out_step_x = _info.stride().first; - - const int out_start_y = _inner_border.top + _info.pad_top(); - const int out_end_y = - _output->info()->dimension(idx_h) - _info.pad_bottom() + _info.stride().second - 1; - const int out_step_y = _info.stride().second; - - switch (data_layout) - { - case DataLayout::NCHW: - { - Window collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ); - - Window slice_out = collapsed.first_slice_window_3D(); - slice_out.set(Window::DimX, Window::Dimension(out_start_x, out_end_x, out_step_x)); - slice_out.set(Window::DimY, Window::Dimension(out_start_y, out_end_y, out_step_y)); - - Window slice_in = collapsed.first_slice_window_3D(); - - do - { - unsigned int idx = 0; - add_3D_tensor_argument(idx, _input, slice_in); - add_3D_tensor_argument(idx, _output, slice_out); - enqueue(queue, *this, slice_out); - } while (collapsed.slide_window_slice_3D(slice_in) && - collapsed.slide_window_slice_3D(slice_out)); - break; - } - case DataLayout::NHWC: - { - // NOTE: not collapsing in NHWC - Window slice_out = window.first_slice_window_3D(); - slice_out.set(Window::DimY, Window::Dimension(out_start_x, out_end_x, out_step_x)); - slice_out.set(Window::DimZ, Window::Dimension(out_start_y, out_end_y, out_step_y)); - - Window slice_in = window.first_slice_window_3D(); - - do - { - unsigned int idx = 0; - add_3D_tensor_argument(idx, _input, slice_in); - add_3D_tensor_argument(idx, _output, slice_out); - enqueue(queue, *this, slice_out); - } while (window.slide_window_slice_3D(slice_in) && window.slide_window_slice_3D(slice_out)); - break; - } - default: - ARM_COMPUTE_ERROR("Unsupported data layout"); - } -} diff --git a/compute/ARMComputeEx/src/core/CPP/kernels/CPPUpsampleKernelEx.cpp b/compute/ARMComputeEx/src/core/CPP/kernels/CPPUpsampleKernelEx.cpp deleted file mode 100644 index 648afb304..000000000 --- a/compute/ARMComputeEx/src/core/CPP/kernels/CPPUpsampleKernelEx.cpp +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/core/CPP/kernels/CPPUpsampleKernelEx.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" - -#include <cstddef> -#include <cstdint> - -namespace arm_compute -{ -CPPUpsampleKernelEx::CPPUpsampleKernelEx() : _input(nullptr), _output(nullptr), _info() {} - -bool CPPUpsampleKernelEx::is_parallelisable() const { return false; } - -void CPPUpsampleKernelEx::configure(const ITensor *input, ITensor *output, - const PadStrideInfo &info) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - - _input = input; - _output = output; - _info = info; - - // Configure kernel window - Window win = calculate_max_window(*input->info(), Steps()); - - // The CPPUpsampleKernelEx doesn't need padding so update_window_and_padding() can be skipped - Coordinates coord; - coord.set_num_dimensions(output->info()->num_dimensions()); - output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape())); - - ICPPKernel::configure(win); -} - -void CPPUpsampleKernelEx::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICPPKernel::window(), window); - - // Initialize _scaled_output buffer - const int width_scaled = _output->info()->dimension(0); - const int height_scaled = _output->info()->dimension(1); - const int stride_x = _info.stride().first; - const int stride_y = _info.stride().second; - const int start_x = _info.pad_left(); - const int start_y = _info.pad_top(); - const int end_y = height_scaled - _info.pad_bottom(); - const int end_x = width_scaled - _info.pad_top(); - const size_t element_size = _input->info()->element_size(); - - // The fill value is normally 0, but for QASYMM8 the '0' corresponds to the offset - const uint8_t fill_value = - _output->info()->data_type() == DataType::QASYMM8 - ? utility::clamp<uint8_t>(_output->info()->quantization_info().uniform().offset) - : 0; - // Filling a value different than 0 works only for QASYMM8 datatype since we are filling 1byte - // values in a buffer of uint8_ts - std::fill_n(_output->buffer(), _output->info()->total_size(), fill_value); - - // Create window - Window window_out(window); - window_out.set(Window::DimX, Window::Dimension(start_x, end_x, stride_x)); - window_out.set(Window::DimY, Window::Dimension(start_y, end_y, stride_y)); - - // Create iterators - Iterator in(_input, window); - Iterator out(_output, window_out); - - execute_window_loop( - window, [&](const Coordinates &) { memcpy(out.ptr(), in.ptr(), element_size); }, in, out); -} -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NECastKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NECastKernel.cpp deleted file mode 100644 index fbb9dbca9..000000000 --- a/compute/ARMComputeEx/src/core/NEON/kernels/NECastKernel.cpp +++ /dev/null @@ -1,671 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/core/NEON/kernels/NECastKernel.h" - -#include "arm_compute/core/AccessWindowStatic.h" -#include "arm_compute/core/CPP/Validate.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/NEON/NEAsymm.h" -#include "arm_compute/core/NEON/wrapper/wrapper.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" - -#include <arm_neon.h> - -namespace arm_compute -{ -namespace -{ -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, - SubDataType input_subtype) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, - DataType::QASYMM8, DataType::U32, - DataType::S32, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON(input_subtype == SubDataType::BOOL && - input->data_type() != DataType::U8); - - if (output->tensor_shape().total_size() > 0) - { - ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S8, - DataType::QASYMM8, DataType::U32, - DataType::S32, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); - } - - return Status{}; -} - -std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output) -{ - // Configure kernel window - Window win = calculate_max_window(*input, Steps()); - - // Output tensor auto initialization if not yet initialized - auto_init_if_empty(*output, input->tensor_shape(), 1, DataType::F32); - - // NECastKernel doesn't need padding so update_window_and_padding() can be skipped - Coordinates coord; - coord.set_num_dimensions(output->num_dimensions()); - output->set_valid_region(ValidRegion(coord, output->tensor_shape())); - - return std::make_tuple(Status{}, win); -} - -typedef struct bool8x16 -{ - uint8x16_t val; -} bool8x16_t; - -static inline uint8x16_t vreinterpretq_u8_b8(bool8x16_t __a) { return (uint8x16_t)__a.val; } - -template <typename ToV, typename FromV> inline ToV vcast(const FromV &v) { return v; } -template <> inline uint8x16_t vcast(const bool8x16_t &v) -{ - const uint8x16_t vu8 = vreinterpretq_u8_b8(v); - const uint8x16_t zero_uint8x16 = vdupq_n_u8(0); - uint8x16_t mask = vcgtq_u8(vu8, zero_uint8x16); - return vshrq_n_u8(mask, 7); // true -> 1, false -> 0 -} - -template <> inline uint32x4x4_t vcast(const bool8x16_t &v) -{ - const uint8x16_t vu8 = vreinterpretq_u8_b8(v); - const uint8x16_t zero_uint8x16 = vdupq_n_u8(0); - uint8x16_t mask = vcgtq_u8(vu8, zero_uint8x16); - uint8x16_t vb = vshrq_n_u8(mask, 7); // true -> 1, false -> 0 - - const uint32x4x4_t ret = {{ - vmovl_u16(vget_low_u16(vmovl_u8(vget_low_u8(vb)))), - vmovl_u16(vget_high_u16(vmovl_u8(vget_low_u8(vb)))), - vmovl_u16(vget_low_u16(vmovl_u8(vget_high_u8(vb)))), - vmovl_u16(vget_high_u16(vmovl_u8(vget_high_u8(vb)))), - }}; - - return ret; -} - -template <> inline int32x4x4_t vcast(const bool8x16_t &v) -{ - const uint8x16_t vu8 = vreinterpretq_u8_b8(v); - const uint8x16_t zero_uint8x16 = vdupq_n_u8(0); - uint8x16_t mask = vcgtq_u8(vu8, zero_uint8x16); - uint8x16_t vb = vshrq_n_u8(mask, 7); // true -> 1, false -> 0 - - const int32x4x4_t ret = {{ - vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_low_u8(vb))))), - vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_low_u8(vb))))), - vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_high_u8(vb))))), - vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_high_u8(vb))))), - }}; - - return ret; -} - -template <> inline float32x4x4_t vcast(const bool8x16_t &v) -{ - const uint8x16_t vu8 = vreinterpretq_u8_b8(v); - const uint8x16_t zero_uint8x16 = vdupq_n_u8(0); - uint8x16_t mask = vcgtq_u8(vu8, zero_uint8x16); - uint8x16_t vb = vshrq_n_u8(mask, 7); // true -> 1, false -> 0 - - const float32x4x4_t ret = {{ - vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_low_u8(vb))))), - vcvtq_f32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_low_u8(vb))))), - vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_high_u8(vb))))), - vcvtq_f32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_high_u8(vb))))), - }}; - - return ret; -} - -template <> inline uint32x4x4_t vcast(const uint8x16_t &v) -{ - const uint32x4x4_t ret = {{ - vmovl_u16(vget_low_u16(vmovl_u8(vget_low_u8(v)))), - vmovl_u16(vget_high_u16(vmovl_u8(vget_low_u8(v)))), - vmovl_u16(vget_low_u16(vmovl_u8(vget_high_u8(v)))), - vmovl_u16(vget_high_u16(vmovl_u8(vget_high_u8(v)))), - }}; - - return ret; -} - -template <> inline int32x4x4_t vcast(const uint8x16_t &v) -{ - const int32x4x4_t ret = {{ - vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_low_u8(v))))), - vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_low_u8(v))))), - vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_high_u8(v))))), - vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_high_u8(v))))), - }}; - - return ret; -} - -template <> inline float32x4x4_t vcast(const uint8x16_t &v) -{ - const float32x4x4_t ret = {{ - vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_low_u8(v))))), - vcvtq_f32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_low_u8(v))))), - vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_high_u8(v))))), - vcvtq_f32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_high_u8(v))))), - }}; - - return ret; -} - -template <> inline uint8x16_t vcast(const int32x4x4_t &v) -{ - // Saturate cast - return vcombine_u8(vqmovn_u16(vcombine_u16(vqmovun_s32(v.val[0]), vqmovun_s32(v.val[1]))), - vqmovn_u16(vcombine_u16(vqmovun_s32(v.val[2]), vqmovun_s32(v.val[3])))); -} - -template <> inline uint32x4x4_t vcast(const int32x4x4_t &v) -{ - // Saturate cast - const uint32x4x4_t ret = {{ - vcombine_u32(vqmovun_s64(vmovl_s32(vget_low_s32(v.val[0]))), - vqmovun_s64(vmovl_s32(vget_high_s32(v.val[0])))), - vcombine_u32(vqmovun_s64(vmovl_s32(vget_low_s32(v.val[1]))), - vqmovun_s64(vmovl_s32(vget_high_s32(v.val[1])))), - vcombine_u32(vqmovun_s64(vmovl_s32(vget_low_s32(v.val[2]))), - vqmovun_s64(vmovl_s32(vget_high_s32(v.val[2])))), - vcombine_u32(vqmovun_s64(vmovl_s32(vget_low_s32(v.val[3]))), - vqmovun_s64(vmovl_s32(vget_high_s32(v.val[3])))), - }}; - - return ret; -} - -template <> inline float32x4x4_t vcast(const int32x4x4_t &v) -{ - const float32x4x4_t ret = {{ - vcvtq_f32_s32(v.val[0]), vcvtq_f32_s32(v.val[1]), vcvtq_f32_s32(v.val[2]), - vcvtq_f32_s32(v.val[3]), - }}; - - return ret; -} - -template <> inline uint8x16_t vcast(const uint32x4x4_t &v) -{ - return vcombine_u8(vqmovn_u16(vcombine_u16(vqmovn_u32(v.val[0]), vqmovn_u32(v.val[1]))), - vqmovn_u16(vcombine_u16(vqmovn_u32(v.val[2]), vqmovn_u32(v.val[3])))); -} - -template <> inline int32x4x4_t vcast(const uint32x4x4_t &v) -{ - const int32x4x4_t ret = {{ - vcombine_s32(vmovn_s64(vreinterpretq_s64_u64(vmovl_u32(vget_low_u32(v.val[0])))), - vmovn_s64(vreinterpretq_s64_u64(vmovl_u32(vget_high_u32(v.val[0]))))), - vcombine_s32(vmovn_s64(vreinterpretq_s64_u64(vmovl_u32(vget_low_u32(v.val[1])))), - vmovn_s64(vreinterpretq_s64_u64(vmovl_u32(vget_high_u32(v.val[1]))))), - vcombine_s32(vmovn_s64(vreinterpretq_s64_u64(vmovl_u32(vget_low_u32(v.val[2])))), - vmovn_s64(vreinterpretq_s64_u64(vmovl_u32(vget_high_u32(v.val[2]))))), - vcombine_s32(vmovn_s64(vreinterpretq_s64_u64(vmovl_u32(vget_low_u32(v.val[3])))), - vmovn_s64(vreinterpretq_s64_u64(vmovl_u32(vget_high_u32(v.val[3]))))), - }}; - - return ret; -} - -template <> inline float32x4x4_t vcast(const uint32x4x4_t &v) -{ - const float32x4x4_t ret = {{ - vcvtq_f32_u32(v.val[0]), vcvtq_f32_u32(v.val[1]), vcvtq_f32_u32(v.val[2]), - vcvtq_f32_u32(v.val[3]), - }}; - - return ret; -} - -template <> inline uint8x16_t vcast(const float32x4x4_t &v) -{ - // Saturate cast - return vcombine_u8(vqmovn_u16(vcombine_u16(vqmovun_s32(vcvtq_s32_f32(v.val[0])), - vqmovun_s32(vcvtq_s32_f32(v.val[1])))), - vqmovn_u16(vcombine_u16(vqmovun_s32(vcvtq_s32_f32(v.val[2])), - vqmovun_s32(vcvtq_s32_f32(v.val[3]))))); -} - -template <> inline uint32x4x4_t vcast(const float32x4x4_t &v) -{ - const uint32x4x4_t ret = {{ - vcvtq_u32_f32(v.val[0]), vcvtq_u32_f32(v.val[1]), vcvtq_u32_f32(v.val[2]), - vcvtq_u32_f32(v.val[3]), - }}; - - return ret; -} - -template <> inline int32x4x4_t vcast(const float32x4x4_t &v) -{ - const int32x4x4_t ret = {{ - vcvtq_s32_f32(v.val[0]), vcvtq_s32_f32(v.val[1]), vcvtq_s32_f32(v.val[2]), - vcvtq_s32_f32(v.val[3]), - }}; - - return ret; -} - -template <typename T> struct cast_vector; -template <> struct cast_vector<bool> -{ - using type = bool8x16_t; -}; -template <> struct cast_vector<uint8_t> -{ - using type = uint8x16_t; -}; -template <> struct cast_vector<uint32_t> -{ - using type = uint32x4x4_t; -}; -template <> struct cast_vector<int32_t> -{ - using type = int32x4x4_t; -}; -template <> struct cast_vector<float> -{ - using type = float32x4x4_t; -}; - -template <typename T> inline void store_result(T *ptr, const typename cast_vector<T>::type &v) -{ - wrapper::vstore(ptr, v.val[0]); - wrapper::vstore(ptr + 4, v.val[1]); - wrapper::vstore(ptr + 8, v.val[2]); - wrapper::vstore(ptr + 12, v.val[3]); -} - -template <> inline void store_result<uint8_t>(uint8_t *ptr, const uint8x16_t &v) -{ - wrapper::vstore(ptr, v); -} - -inline bool8x16_t vloadq(const bool *ptr) -{ - bool8x16_t ret; - ret.val = wrapper::vloadq(reinterpret_cast<const uint8_t *>(ptr)); - return ret; -} - -template <typename T> inline typename cast_vector<T>::type load_input(const T *ptr) -{ - return wrapper::vloadq(ptr); -} - -template <> inline typename cast_vector<bool>::type load_input(const bool *ptr) -{ - return vloadq(ptr); -} - -template <> inline typename cast_vector<uint32_t>::type load_input(const uint32_t *ptr) -{ - return vld4q_u32(ptr); -} - -template <> inline typename cast_vector<int32_t>::type load_input(const int32_t *ptr) -{ - return vld4q_s32(ptr); -} - -template <> inline typename cast_vector<float>::type load_input(const float *ptr) -{ - return vld4q_f32(ptr); -} - -template <typename T> inline T get_value(const T *ptr) { return *ptr; } - -template <> inline bool get_value(const bool *ptr) -{ - bool ret = (*ptr != 0); - return ret; -} - -template <typename FromT> void run_cast(const ITensor *input, ITensor *output, const Window &window) -{ - const int window_step_x = 16; - const auto window_start_x = static_cast<int>(window.x().start()); - const auto window_end_x = static_cast<int>(window.x().end()); - - // Collapse window and reset first dimension to handle tail calculations manually - Window win_collapsed = window.collapse_if_possible(window, Window::DimZ); - win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1)); - - // Create iterators - Iterator in(input, win_collapsed); - Iterator out(output, win_collapsed); - -#ifdef __aarch64__ - constexpr RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_EVEN; -#else //__aarch64__ - constexpr RoundingPolicy rounding_policy = RoundingPolicy::TO_ZERO; -#endif //__aarch64__ - - execute_window_loop( - win_collapsed, - [&](const Coordinates &) { - const auto in_ptr = reinterpret_cast<const FromT *>(in.ptr()); - - int x = window_start_x; - for (; x <= (window_end_x - window_step_x); x += window_step_x) - { - using from_vector = typename cast_vector<FromT>::type; - const from_vector vin = load_input(in_ptr + x); - - switch (output->info()->data_type()) - { - case DataType::U8: - { - using to_vector = typename cast_vector<uint8_t>::type; - const to_vector vout = vcast<to_vector, from_vector>(vin); - store_result<uint8_t>(reinterpret_cast<uint8_t *>(out.ptr()) + x, vout); - break; - } - case DataType::QASYMM8: - { - using to_vector = typename cast_vector<float>::type; - const UniformQuantizationInfo &qinfo_out = - output->info()->quantization_info().uniform(); - const auto vf = vcast<to_vector, from_vector>(vin); - const auto vout = vquantize(vf, qinfo_out); - store_result<qasymm8_t>(reinterpret_cast<qasymm8_t *>(out.ptr()) + x, vout); - break; - } - case DataType::U32: - { - using to_vector = typename cast_vector<uint32_t>::type; - const to_vector vout = vcast<to_vector, from_vector>(vin); - store_result<uint32_t>(reinterpret_cast<uint32_t *>(out.ptr()) + x, vout); - break; - } - case DataType::S32: - { - using to_vector = typename cast_vector<int32_t>::type; - const to_vector vout = vcast<to_vector, from_vector>(vin); - store_result<int32_t>(reinterpret_cast<int32_t *>(out.ptr()) + x, vout); - break; - } - case DataType::F32: - { - using to_vector = typename cast_vector<float>::type; - const to_vector vout = vcast<to_vector, from_vector>(vin); - store_result<float>(reinterpret_cast<float *>(out.ptr()) + x, vout); - break; - } - default: - ARM_COMPUTE_ERROR("Unsupported data type."); - } - } - - // Compute left-over elements - for (; x < window_end_x; ++x) - { - FromT val = get_value(in_ptr + x); - switch (output->info()->data_type()) - { - case DataType::U8: - { - *(reinterpret_cast<uint8_t *>(out.ptr()) + x) = static_cast<uint8_t>(val); - break; - } - case DataType::QASYMM8: - { - const QuantizationInfo &qinfo_out = output->info()->quantization_info(); - const auto qval = - quantize_qasymm8(static_cast<float>(val), qinfo_out, rounding_policy); - *(reinterpret_cast<qasymm8_t *>(out.ptr()) + x) = qval; - break; - } - case DataType::U32: - { - *(reinterpret_cast<uint32_t *>(out.ptr()) + x) = static_cast<uint32_t>(val); - break; - } - case DataType::S32: - { - *(reinterpret_cast<int32_t *>(out.ptr()) + x) = static_cast<int32_t>(val); - break; - } - case DataType::F32: - { - *(reinterpret_cast<float *>(out.ptr()) + x) = static_cast<float>(val); - break; - } - default: - ARM_COMPUTE_ERROR("Unsupported data type."); - } - } - }, - in, out); -} - -void run_cast_qasymm8(const ITensor *input, ITensor *output, const Window &window) -{ - const int window_step_x = 16; - const auto window_start_x = static_cast<int>(window.x().start()); - const auto window_end_x = static_cast<int>(window.x().end()); - - // Collapse window and reset first dimension to handle tail calculations manually - Window win_collapsed = window.collapse_if_possible(window, Window::DimZ); - win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1)); - - // Create iterators - Iterator in(input, win_collapsed); - Iterator out(output, win_collapsed); - -#ifdef __aarch64__ - constexpr RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_EVEN; -#else //__aarch64__ - constexpr RoundingPolicy rounding_policy = RoundingPolicy::TO_ZERO; -#endif //__aarch64__ - const auto &qinfo_in = input->info()->quantization_info().uniform(); - const auto &qinfo_out = output->info()->quantization_info().uniform(); - - execute_window_loop( - win_collapsed, - [&](const Coordinates &) { - const auto in_ptr = reinterpret_cast<const qasymm8_t *>(in.ptr()); - - int x = window_start_x; - for (; x <= (window_end_x - window_step_x); x += window_step_x) - { - using from_vector = typename cast_vector<float>::type; - const auto vf = wrapper::vloadq(in_ptr + x); - const auto vin = vdequantize(vf, qinfo_in); - switch (output->info()->data_type()) - { - case DataType::U8: - { - using to_vector = typename cast_vector<uint8_t>::type; - const to_vector vout = vcast<to_vector, from_vector>(vin); - store_result<uint8_t>(reinterpret_cast<uint8_t *>(out.ptr()) + x, vout); - break; - } - case DataType::QASYMM8: - { - using to_vector = typename cast_vector<float>::type; - const auto vf = vcast<to_vector, from_vector>(vin); - const auto vout = vquantize(vf, qinfo_out); - store_result<qasymm8_t>(reinterpret_cast<qasymm8_t *>(out.ptr()) + x, vout); - break; - } - case DataType::U32: - { - using to_vector = typename cast_vector<uint32_t>::type; - const to_vector vout = vcast<to_vector, from_vector>(vin); - store_result<uint32_t>(reinterpret_cast<uint32_t *>(out.ptr()) + x, vout); - break; - } - case DataType::S32: - { - using to_vector = typename cast_vector<int32_t>::type; - const to_vector vout = vcast<to_vector, from_vector>(vin); - store_result<int32_t>(reinterpret_cast<int32_t *>(out.ptr()) + x, vout); - break; - } - case DataType::F32: - { - using to_vector = typename cast_vector<float>::type; - const to_vector vout = vcast<to_vector, from_vector>(vin); - store_result<float>(reinterpret_cast<float *>(out.ptr()) + x, vout); - break; - } - default: - ARM_COMPUTE_ERROR("Unsupported data type."); - } - } - - // Compute left-over elements - for (; x < window_end_x; ++x) - { - qasymm8_t qval_in = *(in_ptr + x); - const auto val = dequantize_qasymm8(qval_in, qinfo_in); - - switch (output->info()->data_type()) - { - case DataType::U8: - { - *(reinterpret_cast<uint8_t *>(out.ptr()) + x) = static_cast<uint8_t>(val); - break; - } - case DataType::QASYMM8: - { - const auto qval_out = quantize_qasymm8(val, qinfo_out, rounding_policy); - *(reinterpret_cast<qasymm8_t *>(out.ptr()) + x) = qval_out; - break; - } - case DataType::U32: - { - *(reinterpret_cast<uint32_t *>(out.ptr()) + x) = static_cast<uint32_t>(val); - break; - } - case DataType::S32: - { - *(reinterpret_cast<int32_t *>(out.ptr()) + x) = static_cast<int32_t>(val); - break; - } - case DataType::F32: - { - *(reinterpret_cast<float *>(out.ptr()) + x) = static_cast<float>(val); - break; - } - default: - ARM_COMPUTE_ERROR("Unsupported data type."); - } - } - }, - in, out); -} -} // namespace - -NECastKernel::NECastKernel() : _input(nullptr), _output(nullptr), _input_subtype(SubDataType::NONE) -{ -} - -void NECastKernel::configure(const ITensor *input, ITensor *output, SubDataType input_subtype) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), input_subtype)); - - _input = input; - _output = output; - _input_subtype = input_subtype; - - // Configure kernel window - auto win_config = validate_and_configure_window(input->info(), output->info()); - - ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config)); - - INEKernel::configure(std::get<1>(win_config)); -} - -Status NECastKernel::validate(const ITensorInfo *input, const ITensorInfo *output, - SubDataType input_subtype) -{ - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, input_subtype)); - ARM_COMPUTE_RETURN_ON_ERROR( - std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get()))); - return Status{}; -} - -void NECastKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - - switch (_input->info()->data_type()) - { - case DataType::U8: - if (_input_subtype == SubDataType::BOOL) - { - run_cast<bool>(_input, _output, window); - } - else - { - run_cast<uint8_t>(_input, _output, window); - } - break; - case DataType::QASYMM8: - run_cast_qasymm8(_input, _output, window); - break; - case DataType::U32: - run_cast<uint32_t>(_input, _output, window); - break; - case DataType::S32: - run_cast<int32_t>(_input, _output, window); - break; - case DataType::F32: - run_cast<float>(_input, _output, window); - break; - default: - ARM_COMPUTE_ERROR("Unsupported data type."); - } -} -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEDepthToSpaceLayerKernelEx.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEDepthToSpaceLayerKernelEx.cpp deleted file mode 100644 index 95e269dee..000000000 --- a/compute/ARMComputeEx/src/core/NEON/kernels/NEDepthToSpaceLayerKernelEx.cpp +++ /dev/null @@ -1,181 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernelEx.h" - -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/wrapper/wrapper.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/misc/ShapeCalculatorEx.h" -#include <arm_neon.h> -#include <cstdint> - -using namespace arm_compute::misc::shape_calculator; - -namespace arm_compute -{ -namespace -{ -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4); - ARM_COMPUTE_RETURN_ERROR_ON(block_shape < 2); - - const DataLayout data_layout = input->data_layout(); - const int idx_channel = - get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); - ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape()[idx_channel] % (block_shape * block_shape) != - 0); - // Validate output if initialized - if (output->total_size() != 0) - { - const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - const int idx_height = - get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); - ARM_COMPUTE_RETURN_ERROR_ON(output->tensor_shape()[idx_width] != - (block_shape * input->tensor_shape()[idx_width])); - ARM_COMPUTE_RETURN_ERROR_ON(output->tensor_shape()[idx_height] != - (block_shape * input->tensor_shape()[idx_height])); - ARM_COMPUTE_RETURN_ERROR_ON(output->num_dimensions() > 4); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - } - - return Status{}; -} -} // namespace - -NEDepthToSpaceLayerKernelEx::NEDepthToSpaceLayerKernelEx() - : _input(nullptr), _output(nullptr), _block_shape() -{ -} - -void NEDepthToSpaceLayerKernelEx::configure(const ITensor *input, ITensor *output, - int32_t block_shape) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - TensorShape output_shape = compute_depth_to_space_shape_ex(input->info(), block_shape); - // Output auto inizialitation if not yet initialized - auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape)); - - // Perform validation step - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), block_shape)); - - _input = input; - _output = output; - _block_shape = block_shape; - - // Configure kernel window - Window win = calculate_max_window(*input->info(), Steps()); - ICPPKernel::configure(win); -} - -Status NEDepthToSpaceLayerKernelEx::validate(const ITensorInfo *input, const ITensorInfo *output, - int32_t block_shape) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, block_shape)); - return Status{}; -} - -void NEDepthToSpaceLayerKernelEx::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICPPKernel::window(), window); - - const int idx_channel = - get_data_layout_dimension_index(_input->info()->data_layout(), DataLayoutDimension::CHANNEL); - const int depth_size = _input->info()->dimension(idx_channel); - const int r = (depth_size / (_block_shape * _block_shape)); - const int element_size = _input->info()->element_size(); - - Window slice_out = window.first_slice_window_3D(); - - // The slice_out slice does not move - slice_out.set(Window::DimX, Window::Dimension(0, 0, 0)); - slice_out.set(Window::DimY, Window::Dimension(0, 0, 0)); - slice_out.set(Window::DimZ, Window::Dimension(0, 0, 0)); - - // Main loop for NCHW and NHWC - if (_input->info()->data_layout() == DataLayout::NCHW) - { - Window slice_in = window.first_slice_window_2D(); - do - { - Iterator in(_input, slice_in); - execute_window_loop(slice_in, - [&](const Coordinates &id) { - const int x = id.x(); - const int y = id.y(); - - const int z = id.z() % r; - const int out_x = x * _block_shape + (id.z() / r) % _block_shape; - const int out_y = y * _block_shape + (id.z() / r) / _block_shape; - Coordinates output_coords{out_x, out_y, z, id[3]}; - memcpy(_output->ptr_to_element(output_coords), in.ptr(), element_size); - }, - in); - } while (window.slide_window_slice_2D(slice_in)); - } - else - { - Window slice_in = window.first_slice_window_3D(); - do - { - Iterator in(_input, slice_in); - execute_window_loop(slice_in, - [&](const Coordinates &id) { - const int x = id.y(); - const int y = id.z(); - - const int z = id.x() % r; - const int out_x = x * _block_shape + (id.x() / r) % _block_shape; - const int out_y = y * _block_shape + (id.x() / r) / _block_shape; - Coordinates output_coords{z, out_x, out_y, id[3]}; - memcpy(_output->ptr_to_element(output_coords), in.ptr(), element_size); - }, - in); - } while (window.slide_window_slice_3D(slice_in)); - } -} -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEElementwiseUnaryKernelEx.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEElementwiseUnaryKernelEx.cpp deleted file mode 100644 index 200fc4f87..000000000 --- a/compute/ARMComputeEx/src/core/NEON/kernels/NEElementwiseUnaryKernelEx.cpp +++ /dev/null @@ -1,221 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/core/NEON/kernels/NEElementwiseUnaryKernelEx.h" - -#include "arm_compute/core/CPP/Validate.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/IAccessWindow.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/NEAsymm.h" -#include "arm_compute/core/NEON/NEFixedPoint.h" -#include "arm_compute/core/NEON/wrapper/wrapper.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Validate.h" - -#include <algorithm> -#include <arm_neon.h> -#include <cstdint> -#include <map> -#include <string> - -namespace arm_compute -{ -class Coordinates; - -namespace -{ -template <ElementWiseUnaryEx op, typename ScalarType> -inline ScalarType elementwise_op_scalar(const ScalarType &a) -{ - switch (op) - { - case ElementWiseUnaryEx::NEG: - return -a; - default: - ARM_COMPUTE_ERROR("NOT_SUPPORTED!"); - } -} - -template <ElementWiseUnaryEx op, typename VectorType> -inline VectorType elementwise_op(const VectorType &a) -{ - switch (op) - { - case ElementWiseUnaryEx::NEG: - return wrapper::vneg(a); - default: - ARM_COMPUTE_ERROR("NOT_SUPPORTED!"); - } -} - -template <ElementWiseUnaryEx op, typename ScalarType> -void elementwise_op(const ITensor *in, ITensor *out, const Window &window) -{ - const int window_step_x = 16 / sizeof(ScalarType); - const auto window_start_x = static_cast<int>(window.x().start()); - const auto window_end_x = static_cast<int>(window.x().end()); - - Window win = window; - win.set(Window::DimX, Window::Dimension(0, 1, 1)); - - Iterator input(in, win); - Iterator output(out, win); - - execute_window_loop(win, - [&](const Coordinates &) { - auto output_ptr = reinterpret_cast<ScalarType *>(output.ptr()); - const auto input_ptr = reinterpret_cast<const ScalarType *>(input.ptr()); - - int x = window_start_x; - for (; x <= window_end_x - window_step_x; x += window_step_x) - { - wrapper::vstore(output_ptr + x, - elementwise_op<op>(wrapper::vloadq(input_ptr + x))); - } - for (; x < window_end_x; ++x) - { - *(output_ptr + x) = elementwise_op_scalar<op>(*(input_ptr + x)); - } - }, - input, output); -} - -template <ElementWiseUnaryEx op> -std::function<void(const ITensor *input, ITensor *output, const Window &window)> -configure_func(const ITensor *input, ITensor *output) -{ - std::string function_to_call("op_"); - function_to_call += string_from_data_type(input->info()->data_type()) + "_"; - function_to_call += string_from_data_type(output->info()->data_type()); - - static std::map<std::string, NEElementwiseUnaryKernelEx::ElementwiseUnaryFunction *> - map_function = { - {"op_F32_F32", &elementwise_op<op, float>}, {"op_S32_S32", &elementwise_op<op, int32_t>}, - }; -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - map_function["op_F16_F16"] = &elementwise_op<op, float16_t>; -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ - - auto it = map_function.find(function_to_call); - - if (it != map_function.end()) - { - auto func = it->second; - return [func](const ITensor *input, ITensor *output, const Window &window) { - func(input, output, window); - }; - } - return nullptr; -} -} // namespace - -NEElementwiseUnaryKernelEx::NEElementwiseUnaryKernelEx() - : _function(nullptr), _input(nullptr), _output(nullptr) -{ -} - -void NEElementwiseUnaryKernelEx::configure(ElementWiseUnaryEx op, const ITensor *input, - ITensor *output) -{ - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*input->info(), *output->info())); - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - - // Configure kernel window - const std::pair<TensorShape, ValidRegion> broadcast_pair = - ITensorInfo::broadcast_shape_and_valid_region(*input->info()); - const TensorShape &out_shape = broadcast_pair.first; - const ValidRegion &valid_region = broadcast_pair.second; - - // Auto initialize output if not initialized - auto_init_if_empty(*output->info(), out_shape, 1, input->info()->data_type()); - - Window win = calculate_max_window(valid_region); - - _input = input; - _output = output; - - INEKernel::configure(win); - - switch (op) - { - case ElementWiseUnaryEx::NEG: - _function = configure_func<ElementWiseUnaryEx::NEG>(input, output); - break; - default: - ARM_COMPUTE_ERROR("NOT_SUPPORTED!"); - } -} - -Status NEElementwiseUnaryKernelEx::validate_arguments(const ITensorInfo &input, - const ITensorInfo &output) -{ - ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(&input); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input, 1, DataType::F16, DataType::F32, - DataType::S32); - - // Validate in case of configured output - if (output.total_size() > 0) - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&input, &output); - } - - return Status{}; -} - -Status NEElementwiseUnaryKernelEx::validate(ElementWiseUnaryEx op, const ITensorInfo *input, - const ITensorInfo *output) -{ - ARM_COMPUTE_UNUSED(op); - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(*input, *output)); - return Status{}; -} - -void NEElementwiseUnaryKernelEx::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - ARM_COMPUTE_ERROR_ON(_function == nullptr); - _function(_input, _output, window); -} -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEPReLUKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEPReLUKernel.cpp deleted file mode 100644 index 641641b5a..000000000 --- a/compute/ARMComputeEx/src/core/NEON/kernels/NEPReLUKernel.cpp +++ /dev/null @@ -1,291 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/core/NEON/kernels/NEPReLUKernel.h" - -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/NEAsymm.h" -#include "arm_compute/core/NEON/NEElementwiseOperationFuncs.h" -#include "arm_compute/core/NEON/wrapper/wrapper.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Window.h" - -#include <arm_neon.h> - -using namespace arm_compute; -namespace -{ - -/** Conditional element-wise operations */ -enum class ConditionalOperation -{ - PRELU, /**< (x * y) for x < 0, x for x >= 0 */ -}; - -template <ConditionalOperation op, typename ScalarType> -inline ScalarType elementwise_conditional_op_scalar(const ScalarType &a, const ScalarType &b) -{ - auto res = ScalarType(0); - - switch (op) - { - case ConditionalOperation::PRELU: - res = a < 0 ? a * b : a; - break; - default: - ARM_COMPUTE_ERROR("NOT_SUPPORTED!"); - } - return res; -} - -template <ConditionalOperation op> -inline uint8_t elementwise_conditional_op_quantized_scalar(const float &a, const float &b, - QuantizationInfo qinfo) -{ - return quantize_qasymm8(elementwise_conditional_op_scalar<op>(a, b), qinfo, - RoundingPolicy::TO_NEAREST_UP); -} - -template <ConditionalOperation op, typename VectorType> -inline VectorType elementwise_conditional_op(const VectorType &a, const VectorType &b) -{ - VectorType res = {0, 0, 0, 0}; - VectorType const_0 = {0, 0, 0, 0}; - - switch (op) - { - case ConditionalOperation::PRELU: - res = wrapper::vbsl(wrapper::vcgt(a, const_0), a, wrapper::vmul(a, b)); - ; - break; - default: - ARM_COMPUTE_ERROR("NOT_SUPPORTED!"); - } - return res; -} - -template <ConditionalOperation op> -inline float32x4x4_t elementwise_conditional_op(const float32x4x4_t &a, const float32x4x4_t &b) -{ - float32x4x4_t out = {{ - elementwise_conditional_op<op>(a.val[0], b.val[0]), - elementwise_conditional_op<op>(a.val[1], b.val[1]), - elementwise_conditional_op<op>(a.val[2], b.val[2]), - elementwise_conditional_op<op>(a.val[3], b.val[3]), - }}; - return out; -} - -template <ConditionalOperation op, typename ScalarType, typename VectorType> -inline VectorType elementwise_conditional_op_broadcast(const VectorType &a, - const ScalarType &broadcast_value, - const bool reorder) -{ - VectorType broadcast_vector = wrapper::vdup_n(broadcast_value, wrapper::traits::vector_128_tag()); - return elementwise_conditional_op<op>(reorder ? broadcast_vector : a, - reorder ? a : broadcast_vector); -} - -template <ConditionalOperation op, typename ScalarType, typename VectorType> -inline int elementwise_conditional_op_loop(int window_start_x, int window_end_x, int window_step_x, - const ScalarType *input1_ptr, - const ScalarType *input2_ptr, ScalarType *output_ptr) -{ - int x = window_start_x; - for (; x <= (window_end_x - window_step_x); x += window_step_x) - { - const auto a = wrapper::vloadq(input1_ptr + x); - const auto b = wrapper::vloadq(input2_ptr + x); - wrapper::vstore(output_ptr + x, elementwise_conditional_op<op>(a, b)); - } - return x; -} - -template <ConditionalOperation op> -inline int elementwise_conditional_op_quantized_loop(int window_start_x, int window_end_x, - int window_step_x, const uint8_t *input1_ptr, - const uint8_t *input2_ptr, uint8_t *output_ptr, - int32x4_t voffset1, int32x4_t voffset2, - float32x4_t vscale1, float32x4_t vscale2, - float32x4_t voffseto, float32x4_t invvscaleo) -{ - int x = window_start_x; - for (; x <= (window_end_x - window_step_x); x += window_step_x) - { - // Get inputs and compute output - const float32x4x4_t af = load_quantized(input1_ptr + x, voffset1, vscale1); - const float32x4x4_t bf = load_quantized(input2_ptr + x, voffset2, vscale2); - const float32x4x4_t rf = elementwise_conditional_op<op>(af, bf); - store_quantized(output_ptr + x, rf, voffseto, invvscaleo); - } - return x; -} - -template <ConditionalOperation op, typename ScalarType, typename VectorType> -inline int elementwise_conditional_op_broadcast_loop(int window_start_x, int window_end_x, - int window_step_x, - const ScalarType *non_broadcast_input_ptr, - const ScalarType &broadcast_value, - ScalarType *output_ptr, const bool reorder) -{ - int x = window_start_x; - for (; x <= (window_end_x - window_step_x); x += window_step_x) - { - const auto a = wrapper::vloadq((non_broadcast_input_ptr + x)); - wrapper::vstore(output_ptr + x, - elementwise_conditional_op_broadcast<op>(a, broadcast_value, reorder)); - } - return x; -} - -template <ConditionalOperation op> -inline int elementwise_conditional_op_quantized_broadcast_loop( - int window_start_x, int window_end_x, int window_step_x, const uint8_t *non_broadcast_input_ptr, - float32x4x4_t broadcast_vector, uint8_t *output_ptr, int32x4_t voffset_non_broadcast, - float32x4_t vscale_non_broadcast, float32x4_t voffseto, float32x4_t invvscaleo, bool reorder) -{ - int x = window_start_x; - for (; x <= (window_end_x - window_step_x); x += window_step_x) - { - const float32x4x4_t af = - load_quantized(non_broadcast_input_ptr + x, voffset_non_broadcast, vscale_non_broadcast); - const float32x4x4_t rf = elementwise_conditional_op<op>(reorder ? broadcast_vector : af, - reorder ? af : broadcast_vector); - store_quantized(output_ptr + x, rf, voffseto, invvscaleo); - } - return x; -} - -template <ConditionalOperation op, typename ScalarType, typename VectorType> -void elementwise_conditional_op(const ITensor *in1, const ITensor *in2, ITensor *out, - const Window &window) -{ - elementwise_op(in1, in2, out, window, &elementwise_conditional_op_scalar<op, ScalarType>, - &elementwise_conditional_op_broadcast_loop<op, ScalarType, VectorType>, - &elementwise_conditional_op_loop<op, ScalarType, VectorType>); -} - -template <ConditionalOperation op> -void elementwise_conditional_op_quantized(const ITensor *in1, const ITensor *in2, ITensor *out, - const Window &window) -{ - elementwise_op_quantized(in1, in2, out, window, &elementwise_conditional_op_quantized_scalar<op>, - &elementwise_conditional_op_quantized_broadcast_loop<op>, - &elementwise_conditional_op_quantized_loop<op>); -} -} // namespace - -NEPReLUKernel::NEPReLUKernel() : _input(nullptr), _alpha(nullptr), _output(nullptr) {} - -void NEPReLUKernel::configure(const ITensor *input, const ITensor *alpha, ITensor *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, alpha, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*input->info(), *alpha->info(), *output->info())); - - // Configure kernel window - const std::pair<TensorShape, ValidRegion> broadcast_pair = - ITensorInfo::broadcast_shape_and_valid_region(*input->info(), *alpha->info()); - const TensorShape &out_shape = broadcast_pair.first; - const ValidRegion &valid_region = broadcast_pair.second; - - // Auto initialize output if not initialized - auto_init_if_empty(*output->info(), out_shape, 1, input->info()->data_type()); - - Window win = calculate_max_window(valid_region); - - _input = input; - _alpha = alpha; - _output = output; - INEKernel::configure(win); -} - -void NEPReLUKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - if (_input->info()->data_type() == DataType::F32) - { - elementwise_conditional_op<ConditionalOperation::PRELU, float, float32x4_t>(_input, _alpha, - _output, window); - } - else if (_input->info()->data_type() == DataType::QASYMM8) - { - elementwise_conditional_op_quantized<ConditionalOperation::PRELU>(_input, _alpha, _output, - window); - } - else - { - ARM_COMPUTE_ERROR("Wrong Type"); - } -} - -Status NEPReLUKernel::validate_arguments(const ITensorInfo &input, const ITensorInfo &alpha, - const ITensorInfo &output) -{ - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input, 1, DataType::QASYMM8, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&input, &alpha, &output); - - const TensorShape out_shape = - TensorShape::broadcast_shape(input.tensor_shape(), alpha.tensor_shape()); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0, - "Inputs are not broadcast compatible"); - - // Checks performed when output is configured - if (output.total_size() > 0) - { - ARM_COMPUTE_RETURN_ERROR_ON_MSG( - detail::have_different_dimensions(out_shape, output.tensor_shape(), 0), - "Wrong shape for output"); - } - - return Status{}; -} - -Status NEPReLUKernel::validate(const ITensorInfo *input, const ITensorInfo *alpha, - const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, alpha, output); - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(*input, *alpha, *output)); - - return Status{}; -} diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEQuantizationSymmetricKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEQuantizationSymmetricKernel.cpp index 6ba0f1fd4..5841f1d69 100644 --- a/compute/ARMComputeEx/src/core/NEON/kernels/NEQuantizationSymmetricKernel.cpp +++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEQuantizationSymmetricKernel.cpp @@ -64,7 +64,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 2); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON(output->tensor_shape().total_size() == 0); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::S8); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QASYMM8_SIGNED); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(scale_factor, 1, DataType::F16, DataType::F32); diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NESpaceToDepthLayerKernelEx.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NESpaceToDepthLayerKernelEx.cpp deleted file mode 100644 index 44feb200f..000000000 --- a/compute/ARMComputeEx/src/core/NEON/kernels/NESpaceToDepthLayerKernelEx.cpp +++ /dev/null @@ -1,181 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernelEx.h" - -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/wrapper/wrapper.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/misc/ShapeCalculatorEx.h" -#include <arm_neon.h> -#include <cstdint> - -using namespace arm_compute::misc::shape_calculator; - -namespace arm_compute -{ -namespace -{ -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4); - - ARM_COMPUTE_RETURN_ERROR_ON(block_shape < 1); - - // Validate output if initialized - if (output->total_size() != 0) - { - const DataLayout data_layout = input->data_layout(); - const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - const int idx_height = - get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); - const int idx_channel = - get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); - const int idx_batch = - get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES); - ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape()[idx_width] % block_shape != 0); - ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape()[idx_height] % block_shape != 0); - ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape()[idx_batch] != - output->tensor_shape()[idx_batch]); - ARM_COMPUTE_RETURN_ERROR_ON(output->tensor_shape()[idx_channel] % (block_shape * block_shape) != - 0); - ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape().total_size() != - output->tensor_shape().total_size()); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - } - - return Status{}; -} -} // namespace - -NESpaceToDepthLayerKernelEx::NESpaceToDepthLayerKernelEx() - : _input(nullptr), _output(nullptr), _block_shape() -{ -} - -void NESpaceToDepthLayerKernelEx::configure(const ITensor *input, ITensor *output, - int32_t block_shape) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - - TensorShape output_shape = compute_space_to_depth_shape_ex(input->info(), block_shape); - auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type()); - - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), block_shape)); - - _input = input; - _block_shape = block_shape; - _output = output; - - // Configure kernel window - Window win = calculate_max_window(*output->info(), Steps()); - INEKernel::configure(win); -} - -Status NESpaceToDepthLayerKernelEx::validate(const ITensorInfo *input, const ITensorInfo *output, - int32_t block_shape) -{ - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, block_shape)); - return Status{}; -} - -void NESpaceToDepthLayerKernelEx::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICPPKernel::window(), window); - - const DataLayout data_layout = _input->info()->data_layout(); - const int channel_idx = - get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); - const int element_size = _input->info()->element_size(); - - const size_t channel_size = _input->info()->dimension(channel_idx); - - Window slice_out = window.first_slice_window_3D(); - - int batch_id = 0; - - // Main loop for NCHW and NHWC - if (_output->info()->data_layout() == DataLayout::NCHW) - { - do - { - Iterator out(_output, slice_out); - execute_window_loop(slice_out, - [&](const Coordinates &id) { - const size_t channel_id = id.z(); - const size_t in_x = - id.x() * _block_shape + (channel_id / channel_size) % _block_shape; - const size_t in_y = - id.y() * _block_shape + (channel_id / channel_size) / _block_shape; - const int z = channel_id % channel_size; - Coordinates input_coords{in_x, in_y, z, batch_id}; - memcpy(out.ptr(), _input->ptr_to_element(input_coords), element_size); - }, - out); - ++batch_id; - } while (window.slide_window_slice_3D(slice_out)); - } - else - { - do - { - Iterator out(_output, slice_out); - execute_window_loop(slice_out, - [&](const Coordinates &id) { - const size_t channel_id = id.x(); - const size_t in_x = - id.y() * _block_shape + (channel_id / channel_size) % _block_shape; - const size_t in_y = - id.z() * _block_shape + (channel_id / channel_size) / _block_shape; - const int z = channel_id % channel_size; - Coordinates input_coords{z, in_x, in_y, batch_id}; - memcpy(out.ptr(), _input->ptr_to_element(input_coords), element_size); - }, - out); - ++batch_id; - } while (window.slide_window_slice_3D(slice_out)); - } -} -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLArgOperation.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLArgOperation.cpp deleted file mode 100644 index 2d379cf36..000000000 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLArgOperation.cpp +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/CL/functions/CLArgOperation.h" - -#include "arm_compute/core/CL/kernels/CLArgOperationKernel.h" -#include "arm_compute/runtime/CL/CLScheduler.h" - -namespace arm_compute -{ - -CLArgOperation::CLArgOperation() -{ - // DO NOTHING -} - -void CLArgOperation::configure(ICLTensor *input, ICLTensor *output, std::vector<uint32_t> axis, - ArgOperation op) -{ - ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), axis, output->info(), op)); - _input = input; - _output = output; - _axis = axis; - _arg_op = op; - // NOTE The argminmax_axis must have no duplication. - _num_of_kernels = axis.size(); - const size_t num_of_interm_tensors = _num_of_kernels - 1; - - _interm_tensors = arm_compute::support::cpp14::make_unique<CLTensor[]>(num_of_interm_tensors); - _argop_kernels = - arm_compute::support::cpp14::make_unique<CLArgOperationKernel[]>(_num_of_kernels); - - TensorShape shape{input->info()->tensor_shape()}; - for (size_t i = 0; i < num_of_interm_tensors; i++) - { - shape.set(_axis[i], 1); - _interm_tensors[i].allocator()->init( - TensorInfo(shape, input->info()->num_channels(), input->info()->data_type()) - .set_data_layout(input->info()->data_layout())); - _interm_tensors[i].allocator()->allocate(); - } - - // Set a vector that is ordered ICLTensors sequentially. - std::vector<ICLTensor *> tensors; - tensors.emplace_back(input); - for (size_t i = 0; i < num_of_interm_tensors; i++) - { - tensors.emplace_back(_interm_tensors.get() + i); - } - tensors.emplace_back(output); - - // Apply ArgMinMax on all kernels - for (size_t i = 0; i < _num_of_kernels; i++) - { - _argop_kernels[i].configure(tensors[i], tensors[i + 1], _axis[i], op); - } -} - -Status CLArgOperation::validate(const ITensorInfo *input, const std::vector<uint32_t> &axis, - const ITensorInfo *output, ArgOperation op) -{ - const size_t num_of_kernels = axis.size(); - const size_t num_of_interm_tensors = num_of_kernels - 1; - - // Create temporary tensor infos - auto interm_tensors = - arm_compute::support::cpp14::make_unique<TensorInfo[]>(num_of_interm_tensors); - - // Create intermediate tensor info - TensorShape shape{input->tensor_shape()}; - - for (size_t i = 0; i < num_of_interm_tensors; i++) - { - shape.set(axis[i], 1); - interm_tensors[i].set_data_type(input->data_type()); - interm_tensors[i].set_tensor_shape(shape); - interm_tensors[i].set_num_channels(input->num_channels()); - } - - // Set a vector that is ordered ITensorInfo sequentially. - std::vector<const ITensorInfo *> tensors; - tensors.emplace_back(input); - for (size_t i = 0; i < num_of_interm_tensors; i++) - { - tensors.emplace_back(interm_tensors.get() + i); - } - tensors.emplace_back(output); - - // Validate argminmax only on all kernels - for (size_t i = 0; i < num_of_kernels; i++) - { - ARM_COMPUTE_RETURN_ON_ERROR( - CLArgOperationKernel::validate(tensors[i], tensors[i + 1], axis[i], op)); - } - - return Status{}; -} - -void CLArgOperation::run() -{ - for (size_t i = 0; i < _num_of_kernels; ++i) - { - CLScheduler::get().enqueue(_argop_kernels[i]); - } -} - -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp index 92ee69a36..e5122ab8f 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp @@ -48,7 +48,7 @@ using namespace arm_compute; void CLBinaryLogicalOp::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, BinaryLogicalOperation op) { - auto k = arm_compute::support::cpp14::make_unique<CLBinaryLogicalOpKernel>(); + auto k = support::cpp14::make_unique<CLBinaryLogicalOpKernel>(); k->configure(input1, input2, output, op); _kernel = std::move(k); diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLCast.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLCast.cpp deleted file mode 100644 index b3118f39e..000000000 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLCast.cpp +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/CL/functions/CLCast.h" - -#include "arm_compute/core/CL/kernels/CLCastKernel.h" - -using namespace arm_compute; - -void CLCast::configure(ICLTensor *input, ICLTensor *output, SubDataType input_subtype) -{ - auto k = arm_compute::support::cpp14::make_unique<CLCastKernel>(); - k->configure(input, output, input_subtype); - _kernel = std::move(k); -} diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLDepthToSpace.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLDepthToSpace.cpp deleted file mode 100644 index db662505a..000000000 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLDepthToSpace.cpp +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/CL/functions/CLDepthToSpace.h" - -#include "arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h" - -using namespace arm_compute; - -void CLDepthToSpace::configure(ICLTensor *input, ICLTensor *output, const int32_t block_size) -{ - auto k = arm_compute::support::cpp14::make_unique<CLDepthToSpaceKernel>(); - k->configure(input, output, block_size); - _kernel = std::move(k); -} diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLDirectTransposeConvLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLDirectTransposeConvLayer.cpp new file mode 100644 index 000000000..3dede0562 --- /dev/null +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLDirectTransposeConvLayer.cpp @@ -0,0 +1,267 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Copyright (c) 2019-2020 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/UtilsEx.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/utils/misc/ShapeCalculatorEx.h" +#include "arm_compute/runtime/CL/CLScheduler.h" + +#include <memory> +#include <tuple> + +namespace arm_compute +{ +using namespace arm_compute::misc::shape_calculator; + +CLDirectTransposeConvLayer::CLDirectTransposeConvLayer( + std::shared_ptr<IMemoryManager> memory_manager) // NOLINT + : _memory_group(std::move(memory_manager)), + _scale_f(), + _conv_f(), + _flip_weights(), + _scaled_output(), + _original_weights(nullptr), + _weights_flipped(), + _flip_axis(), + _is_prepared(false) +{ +} + +Status CLDirectTransposeConvLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, + const ITensorInfo *bias, ITensorInfo *output, + const PadStrideInfo &info, unsigned int invalid_right, + unsigned int invalid_bottom, + const WeightsInfo &weights_info) +{ + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN( + input, 1, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, weights); + const DataLayout data_layout = input->data_layout(); + + const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + const size_t idx_c = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); + + ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) != weights->dimension(idx_h)); + ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) < 1); + + auto out_dims = transposeconv_output_dimensions( + input->dimension(idx_w), input->dimension(idx_h), weights->dimension(idx_w), + weights->dimension(idx_h), info, invalid_right, invalid_bottom); + + const TensorShape output_shape = compute_transposeconv_output_shape(out_dims, *input, *weights); + + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output, weights); + + if (bias != nullptr) + { + if (is_data_type_quantized_asymmetric(input->data_type())) + { + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bias, 1, DataType::S32); + } + else + { + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, bias); + } + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, bias); + } + + ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(idx_w) != output_shape[idx_w], + "Output's width is invalid."); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(idx_h) != output_shape[idx_h], + "Output's height is invalid."); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(idx_c) != output_shape[idx_c], + "Output's depth is invalid."); + + unsigned int pad_left = 0; + unsigned int pad_right = 0; + unsigned int pad_top = 0; + unsigned int pad_bottom = 0; + const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape( + *input, *weights, info, out_dims, invalid_right, invalid_bottom, pad_left, pad_right, pad_top, + pad_bottom); + TensorInfo scale_out_info(input->clone() + ->set_is_resizable(true) + .reset_padding() + .set_tensor_shape(scale_out_shape) + .set_data_layout(data_layout)); + const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL); + + ARM_COMPUTE_RETURN_ON_ERROR(CLDeconvolutionLayerUpsample::validate(input, &scale_out_info, info)); + ARM_COMPUTE_RETURN_ON_ERROR(CLConvolutionLayer::validate(&scale_out_info, weights, bias, output, + conv_info, weights_info)); + + return Status{}; +} + +void CLDirectTransposeConvLayer::configure(ICLTensor *input, ICLTensor *weights, + const ICLTensor *bias, ICLTensor *output, + const PadStrideInfo &info, unsigned int invalid_right, + unsigned int invalid_bottom, + const WeightsInfo &weights_info) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, weights, bias, output, info, + invalid_right, invalid_bottom, weights_info); +} + +void CLDirectTransposeConvLayer::configure(const CLCompileContext &compile_context, + ICLTensor *input, ICLTensor *weights, + const ICLTensor *bias, ICLTensor *output, + const PadStrideInfo &info, unsigned int invalid_right, + unsigned int invalid_bottom, + const WeightsInfo &weights_info) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); + + unsigned int pad_left = 0; + unsigned int pad_right = 0; + unsigned int pad_top = 0; + unsigned int pad_bottom = 0; + const unsigned int stride_x = info.stride().first; + const unsigned int stride_y = info.stride().second; + + const DataLayout data_layout = input->info()->data_layout(); + + const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + + _original_weights = weights; + _flip_axis.allocator()->init(TensorInfo(TensorShape(2U), 1, DataType::U32)); + _weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout)); + _flip_weights.configure(compile_context, weights, &_weights_flipped, &_flip_axis); + + auto out_dims = transposeconv_output_dimensions( + input->info()->dimension(idx_w), input->info()->dimension(idx_h), + weights->info()->dimension(idx_w), weights->info()->dimension(idx_h), info, invalid_right, + invalid_bottom); + + const TensorShape output_shape = + compute_transposeconv_output_shape(out_dims, *input->info(), *weights->info()); + + // Output auto initialization if not yet initialized + auto_init_if_empty( + *output->info(), + input->info()->clone()->set_tensor_shape(output_shape).set_data_layout(data_layout)); + + // Perform validation step + ARM_COMPUTE_ERROR_THROW_ON(CLDirectTransposeConvLayer::validate( + input->info(), weights->info(), bias == nullptr ? nullptr : bias->info(), output->info(), + info, invalid_right, invalid_bottom)); + + _is_prepared = weights_info.retain_internal_weights(); + + _memory_group.manage(&_scaled_output); + + // Find the upsampled dimensions and the padding needed for the convolution with stride 1 in order + // to match output shape + const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape( + *input->info(), *weights->info(), info, out_dims, invalid_right, invalid_bottom, pad_left, + pad_right, pad_top, pad_bottom); + + TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(), + input->info()->quantization_info()); + scale_out_info.set_data_layout(data_layout); + _scaled_output.allocator()->init(scale_out_info); + + // configure scale function + const PadStrideInfo upsample_info(stride_x, stride_y, pad_left, pad_right, pad_top, pad_bottom, + DimensionRoundingType::FLOOR); + _scale_f.configure(input, &_scaled_output, upsample_info); + + // Setup the function to convolve the upscaled output + const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL); + _conv_f.configure(compile_context, &_scaled_output, &_weights_flipped, bias, output, conv_info, + weights_info); + _scaled_output.allocator()->allocate(); + + // Setup flip axis data + _flip_axis.allocator()->allocate(); + _flip_axis.map(true); + auto axis_data = reinterpret_cast<uint32_t *>(_flip_axis.buffer()); + if (weights->info()->data_layout() == DataLayout::NHWC) + { + axis_data[0] = 1; + axis_data[1] = 2; + } + else + { + axis_data[0] = 0; + axis_data[1] = 1; + } + _flip_axis.unmap(); +} + +void CLDirectTransposeConvLayer::run() +{ + prepare(); + + MemoryGroupResourceScope scope_mg(_memory_group); + + _scale_f.run(); + _conv_f.run(); +} + +void CLDirectTransposeConvLayer::prepare() +{ + if (!_is_prepared) + { + ARM_COMPUTE_ERROR_ON(!_original_weights->is_used()); + + // Run weights flipping and mark original weights tensor as unused + _weights_flipped.allocator()->allocate(); + _flip_weights.run(); + _original_weights->mark_as_unused(); + + // Prepare convolution + _conv_f.prepare(); + + // Free flipped weights + if (!_weights_flipped.is_used()) + { + _weights_flipped.allocator()->free(); + } + + _is_prepared = true; + } +} +} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp index 3d9a28a48..ae9d8afc6 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp @@ -47,7 +47,7 @@ using namespace arm_compute; void CLEmbeddingLookup::configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *lookups) { - auto k = arm_compute::support::cpp14::make_unique<CLEmbeddingLookupKernel>(); + auto k = support::cpp14::make_unique<CLEmbeddingLookupKernel>(); k->configure(input, output, lookups); _kernel = std::move(k); } diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp index f098832b0..01989461e 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp @@ -45,7 +45,7 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" -#include "support/ToolchainSupport.h" +#include "support/MemorySupport.h" #include <algorithm> @@ -60,7 +60,7 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I ARM_COMPUTE_UNUSED(weights); ARM_COMPUTE_UNUSED(output); ARM_COMPUTE_RETURN_ON_ERROR( - CLGEMMLowpMatrixMultiplyCoreEx::validate(&input, &weights, nullptr, &output)); + CLGEMMLowpMatrixMultiplyCore::validate(&input, &weights, nullptr, &output)); return Status{}; } @@ -68,7 +68,7 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I void CLFullyConnectedHybridLayerReshapeWeights::configure(const ICLTensor *input, ICLTensor *output) { - auto k = arm_compute::support::cpp14::make_unique<CLTransposeKernel>(); + auto k = support::cpp14::make_unique<CLTransposeKernel>(); k->configure(input, output); _kernel = std::move(k); } @@ -172,7 +172,8 @@ void CLFullyConnectedHybridLayer::configure(const ICLTensor *input, const ICLTen // Quantize input _quantized_input.allocator()->init( - input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S8)); + input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type( + DataType::QASYMM8_SIGNED)); _memory_group.manage(&_quantized_input); _quant_input_kernel.configure(input, &_scale_factor, &_quantized_input); @@ -199,7 +200,7 @@ Status CLFullyConnectedHybridLayer::validate(const ITensorInfo *input, const ITe { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::S8); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QASYMM8_SIGNED); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 2); @@ -256,8 +257,9 @@ Status CLFullyConnectedHybridLayer::validate(const ITensorInfo *input, const ITe ARM_COMPUTE_RETURN_ON_ERROR(CLScaleFactorSymm8Kernel::validate(input, &scale_factor)); // Validate quantization symm8 kernel - const ITensorInfo &quantized_input = TensorInfo( - input->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S8)); + const ITensorInfo &quantized_input = + TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_data_type( + DataType::QASYMM8_SIGNED)); ARM_COMPUTE_RETURN_ON_ERROR( CLQuantizationSymmetricKernel::validate(input, &scale_factor, &quantized_input)); diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp index 63e291b36..2ff4b9659 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp @@ -46,7 +46,7 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" -#include "support/ToolchainSupport.h" +#include "support/MemorySupport.h" #include <algorithm> @@ -141,7 +141,7 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I void CLFullyConnectedLayerReshapeWeightsEx::configure(const ICLTensor *input, ICLTensor *output) { - auto k = arm_compute::support::cpp14::make_unique<CLTransposeKernel>(); + auto k = support::cpp14::make_unique<CLTransposeKernel>(); k->configure(input, output); _kernel = std::move(k); } diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp index 9aebc473e..157b4d977 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp @@ -53,18 +53,21 @@ void CLFullyConnectedReshapingLayer::configure(const arm_compute::ICLTensor *inp fc->configure(input_to_use, _weights, _biases, _output); return std::unique_ptr<arm_compute::IFunction>(fc); } - else + else if (kernel_type == KernelType::PREPROCESSED_WEIGHTS) { - assert(kernel_type == KernelType::PREPROCESSED_WEIGHTS); - bool is_hybrid = (input->info()->data_type() == DataType::F32 || input->info()->data_type() == DataType::F16) && - weights->info()->data_type() == DataType::S8; + (weights->info()->data_type() == DataType::S8 || + weights->info()->data_type() == DataType::QASYMM8_SIGNED); if (is_hybrid) { auto fc = new arm_compute::CLFullyConnectedHybridLayer{_memory_manager}; + ITensorInfo *weights_info = const_cast<ITensorInfo *>(_weights->info()); + const auto orgin_weights_data_type = weights_info->data_type(); + weights_info->set_data_type(DataType::QASYMM8_SIGNED); fc->configure(input_to_use, _weights, _biases, _output); + weights_info->set_data_type(orgin_weights_data_type); return std::unique_ptr<arm_compute::IFunction>(fc); } else @@ -74,6 +77,11 @@ void CLFullyConnectedReshapingLayer::configure(const arm_compute::ICLTensor *inp return std::unique_ptr<arm_compute::IFunction>(fc); } } + else + { + throw std::runtime_error("CLFullyConnectedReshapingLayer: Unsupported kernel type"); + } + }(); if (_needs_reshape) diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.cpp deleted file mode 100644 index ca5499dfc..000000000 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.cpp +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.h" - -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/core/utils/quantization/AsymmHelpers.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "arm_compute/runtime/MemoryGroup.h" - -namespace arm_compute -{ -using namespace arm_compute::misc::shape_calculator; -using namespace arm_compute::cl_gemm; - -namespace -{ -inline bool is_gemm_reshaped(bool reshape_b_only_on_first_run, GPUTarget gpu_target) -{ - return (get_arch_from_target(gpu_target) != GPUTarget::MIDGARD) && (reshape_b_only_on_first_run); -} -} // namespace - -CLGEMMLowpMatrixMultiplyCoreEx::CLGEMMLowpMatrixMultiplyCoreEx( - std::shared_ptr<IMemoryManager> memory_manager) - : _memory_group(std::move(memory_manager)), _mm_midgard_kernel(), _mtx_a_reduction_kernel(), - _mtx_b_reduction_kernel(), _vector_sum_col(), _vector_sum_row(), _a_offset(0), _b_offset(0), - _reshape_b_only_on_first_run(false), _is_prepared(false) -{ -} - -void CLGEMMLowpMatrixMultiplyCoreEx::configure(const ICLTensor *a, const ICLTensor *b, - const ICLTensor *c, ICLTensor *output, - const GEMMInfo &gemm_info) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(a, b, output); - ARM_COMPUTE_UNUSED(c); - ARM_COMPUTE_ERROR_THROW_ON(CLGEMMLowpMatrixMultiplyCoreEx::validate( - a->info(), b->info(), c != nullptr ? c->info() : nullptr, output->info(), gemm_info)); - - _is_prepared = false; - _reshape_b_only_on_first_run = gemm_info.reshape_b_only_on_first_run(); - _a_offset = a->info()->quantization_info().uniform().offset; - _b_offset = b->info()->quantization_info().uniform().offset; - - // Get the GPU target - const GPUTarget gpu_target = CLScheduler::get().target(); - - // Set the target for the kernels - _mm_midgard_kernel.set_target(gpu_target); - - // GEMMRHSMatrixInfo rhs_info; - // GEMMLHSMatrixInfo lhs_info; - - // Arguments used by GEMMReshapeInfo - // If we pass the matrix A and matrix B reshaped to CLGEMMMatrixMultiplyKernel, we need to pass m, - // n, k, mult_transpose1xW_width and mult_interleave4x4_height to CLGEMMReshapeInfo - // in order to know how the matrices have been reshaped - bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d(); - const unsigned int m = reinterpret_input_as_3d - ? (a->info()->dimension(1) * a->info()->dimension(2)) - : a->info()->dimension(1); - const unsigned int n = b->info()->dimension(0); - const unsigned int k = a->info()->dimension(0); - const int depth_output_gemm3d = gemm_info.depth_output_gemm3d(); - - const ICLTensor *matrix_b = b; - // Configure matrix multiply kernel - _mm_midgard_kernel.configure( - a, matrix_b, output, - GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d)); -} - -Status CLGEMMLowpMatrixMultiplyCoreEx::validate(const ITensorInfo *a, const ITensorInfo *b, - const ITensorInfo *c, const ITensorInfo *output, - const GEMMInfo &gemm_info) -{ - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(a, 1, DataType::S8); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(a, b); - ARM_COMPUTE_UNUSED(c); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_a_reshaped(), - "Matrix A already reshaped is not supported"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_b_reshaped(), - "Matrix B already reshaped is not supported"); - - const ITensorInfo *matrix_a_info = a; - - // Get the GPU target - const GPUTarget gpu_target = CLScheduler::get().target(); - - bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d(); - const unsigned int m = - reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1); - const unsigned int n = b->dimension(0); - const unsigned int k = a->dimension(0); - const int depth_output_gemm3d = gemm_info.depth_output_gemm3d(); - - bool reshape_matrix_b = is_gemm_reshaped(gemm_info.reshape_b_only_on_first_run(), gpu_target); - - const GEMMReshapeInfo reshape_info = - GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d); - - TensorInfo weights_info(*b); - const ITensorInfo *matrix_b_info = &weights_info; - if (reshape_matrix_b) - { - ARM_COMPUTE_RETURN_ERROR_ON_MSG(false, - "CLGEMMLowpMatrixMultiplyCoreEx does not support reshape_b"); - } - - // Validate matrix multiply - ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixMultiplyKernelEx::validate( - matrix_a_info, matrix_b_info, output, reshape_info)); - - return Status{}; -} - -void CLGEMMLowpMatrixMultiplyCoreEx::run() -{ - prepare(); - - MemoryGroupResourceScope scope_mg(_memory_group); - - // Run matrix multiply - CLScheduler::get().enqueue(_mm_midgard_kernel, false); -} - -void CLGEMMLowpMatrixMultiplyCoreEx::prepare() -{ - if (!_is_prepared) - { - _is_prepared = true; - } -} -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp index f594d7a2e..e0b833b04 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp @@ -48,7 +48,7 @@ using namespace arm_compute; void CLGatherEx::configure(const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis) { - auto k = arm_compute::support::cpp14::make_unique<CLGatherExKernel>(); + auto k = support::cpp14::make_unique<CLGatherExKernel>(); k->configure(input, indices, output, axis); _kernel = std::move(k); } diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp index 27ed8e828..65b89a389 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp @@ -47,7 +47,7 @@ using namespace arm_compute; void CLHashtableLookup::configure(const ICLTensor *lookups, const ICLTensor *keys, const ICLTensor *input, ICLTensor *output, ICLTensor *hits) { - auto k = arm_compute::support::cpp14::make_unique<CLHashtableLookupKernel>(); + auto k = support::cpp14::make_unique<CLHashtableLookupKernel>(); k->configure(lookups, keys, input, output, hits); _kernel = std::move(k); } diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp index 80393e8d1..5a7e40839 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp @@ -50,7 +50,7 @@ CLInstanceNormalizationLayerEx::CLInstanceNormalizationLayerEx() {} void CLInstanceNormalizationLayerEx::configure(ICLTensor *input, ICLTensor *output, ICLTensor *gamma, ICLTensor *beta, float epsilon) { - auto k = arm_compute::support::cpp14::make_unique<CLInstanceNormalizationLayerKernelEx>(); + auto k = support::cpp14::make_unique<CLInstanceNormalizationLayerKernelEx>(); k->configure(input, output, gamma, beta, epsilon); _kernel = std::move(k); } diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLPReLU.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLPReLU.cpp deleted file mode 100644 index fbb15ab1d..000000000 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLPReLU.cpp +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/CL/functions/CLPReLU.h" - -#include "arm_compute/core/CL/kernels/CLPReLUKernel.h" -#include "arm_compute/core/CL/ICLTensor.h" - -using namespace arm_compute; - -void CLPReLU::configure(ICLTensor *input, ICLTensor *alpha, ICLTensor *output) -{ - auto k = arm_compute::support::cpp14::make_unique<CLPReLUKernel>(); - k->configure(input, alpha, output); - _kernel = std::move(k); - - if (output->info()->dimension(0) > 1) - { - ICLTensor *broadcasted_info = (input->info()->dimension(0) == 1) ? input : alpha; - - if (broadcasted_info->info()->dimension(0) == 1) - { - _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE); - } - } -} diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLRNNLayerEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLRNNLayerEx.cpp deleted file mode 100644 index 6049b7e70..000000000 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLRNNLayerEx.cpp +++ /dev/null @@ -1,163 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/CL/functions/CLRNNLayerEx.h" - -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "support/ToolchainSupport.h" - -#include <utility> - -using namespace arm_compute; -using namespace arm_compute::misc::shape_calculator; - -CLRNNLayerEx::CLRNNLayerEx(std::shared_ptr<IMemoryManager> memory_manager) - : _memory_group(std::move(memory_manager)), _gemm_state_f(), _add_kernel(), - _activation_kernel(), _fully_connected_kernel(), _copy_kernel(), _fully_connected_out(), - _gemm_output(), _add_output(), _is_prepared(false) -{ -} - -Status CLRNNLayerEx::validate(const ITensorInfo *input, const ITensorInfo *weights, - const ITensorInfo *recurrent_weights, const ITensorInfo *bias, - const ITensorInfo *hidden_state, const ITensorInfo *output, - const ActivationLayerInfo &info) -{ - const int idx_width = 0; - const int idx_height = 1; - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, recurrent_weights, bias, hidden_state, - output); - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(idx_width) != weights->dimension(idx_width)); - ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_height) != - recurrent_weights->dimension(idx_width)); - ARM_COMPUTE_RETURN_ERROR_ON(recurrent_weights->dimension(idx_width) != - recurrent_weights->dimension(1)); - ARM_COMPUTE_RETURN_ERROR_ON(bias->num_dimensions() != 1); - ARM_COMPUTE_RETURN_ERROR_ON(bias->dimension(idx_width) != weights->dimension(idx_height)); - ARM_COMPUTE_RETURN_ERROR_ON(hidden_state->dimension(idx_width) != weights->dimension(idx_height)); - ARM_COMPUTE_RETURN_ERROR_ON(hidden_state->dimension(idx_height) != input->dimension(idx_height)); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), - hidden_state->tensor_shape()); - - auto shape_info = - TensorInfo(compute_rnn_shape(recurrent_weights, hidden_state->dimension(idx_height)), 1, - input->data_type()); - - ARM_COMPUTE_RETURN_ON_ERROR(CLFullyConnectedLayer::validate(input, weights, bias, &shape_info)); - ARM_COMPUTE_RETURN_ON_ERROR( - CLGEMM::validate(hidden_state, recurrent_weights, nullptr, &shape_info, 1.f, 0.f)); - ARM_COMPUTE_RETURN_ON_ERROR(CLSaturatedArithmeticOperationKernel::validate( - ArithmeticOperation::ADD, &shape_info, &shape_info, &shape_info, ConvertPolicy::SATURATE)); - ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayerKernel::validate(&shape_info, &shape_info, info)); - - return Status{}; -} - -void CLRNNLayerEx::configure(const ICLTensor *input, const ICLTensor *weights, - const ICLTensor *recurrent_weights, const ICLTensor *bias, - ICLTensor *hidden_state, ICLTensor *output, ActivationLayerInfo &info) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, recurrent_weights, bias, hidden_state, output); - ARM_COMPUTE_ERROR_THROW_ON(CLRNNLayerEx::validate(input->info(), weights->info(), - recurrent_weights->info(), bias->info(), - hidden_state->info(), output->info(), info)); - - const int idx_height = 1; - TensorShape shape = - compute_rnn_shape(recurrent_weights->info(), hidden_state->info()->dimension(idx_height)); - - _is_prepared = false; - - _fully_connected_out.allocator()->init(TensorInfo(shape, 1, input->info()->data_type())); - _gemm_output.allocator()->init(TensorInfo(shape, 1, input->info()->data_type())); - - // Manage intermediate buffers and configure - _memory_group.manage(&_fully_connected_out); - _fully_connected_kernel.configure(input, weights, bias, &_fully_connected_out); - - _memory_group.manage(&_gemm_output); - _gemm_state_f.configure(hidden_state, recurrent_weights, nullptr, &_gemm_output, 1.f, 0.f); - - _add_output.allocator()->init(TensorInfo(shape, 1, input->info()->data_type())); - _memory_group.manage(&_add_output); - - _add_kernel.configure(ArithmeticOperation::ADD, &_fully_connected_out, &_gemm_output, - &_add_output, ConvertPolicy::SATURATE); - - _fully_connected_out.allocator()->allocate(); - _gemm_output.allocator()->allocate(); - - _activation_kernel.configure(&_add_output, hidden_state, info); - _add_output.allocator()->allocate(); - - _copy_kernel.configure(hidden_state, output); -} - -void CLRNNLayerEx::run() -{ - prepare(); - - _memory_group.acquire(); - - _fully_connected_kernel.run(); - _gemm_state_f.run(); - CLScheduler::get().enqueue(_add_kernel); - CLScheduler::get().enqueue(_activation_kernel); - - // copy hidden out to output - CLScheduler::get().enqueue(_copy_kernel); - - _memory_group.release(); -} - -void CLRNNLayerEx::prepare() -{ - if (!_is_prepared) - { - _fully_connected_kernel.prepare(); - _gemm_state_f.prepare(); - - _is_prepared = true; - } -} diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp index 8ce2d746c..a41e6db60 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp @@ -60,8 +60,7 @@ Status CLReduceOperation::validate(const ITensorInfo *input, const ITensorInfo * const size_t num_of_interm_tensors = num_of_kernels - (keep_dims ? 1 : 0); // Create temporary tensor infos - auto interm_tensors = - arm_compute::support::cpp14::make_unique<TensorInfo[]>(num_of_interm_tensors); + auto interm_tensors = support::cpp14::make_unique<TensorInfo[]>(num_of_interm_tensors); // Create intermediate tensor info TensorShape shape{input->tensor_shape()}; @@ -119,9 +118,8 @@ void CLReduceOperation::configure(ICLTensor *input, ICLTensor *output, const size_t num_of_kernels = axis.size(); const size_t num_of_interm_tensors = num_of_kernels - (keep_dims ? 1 : 0); - _interm_tensors = arm_compute::support::cpp14::make_unique<CLTensor[]>(num_of_interm_tensors); - _reduce_kernels = - arm_compute::support::cpp14::make_unique<CLReduceOperationKernel[]>(num_of_kernels); + _interm_tensors = support::cpp14::make_unique<CLTensor[]>(num_of_interm_tensors); + _reduce_kernels = support::cpp14::make_unique<CLReduceOperationKernel[]>(num_of_kernels); // Set a vector that is ordered ICLTensors sequentially. std::vector<ICLTensor *> tensors; diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLSpaceToDepth.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLSpaceToDepth.cpp deleted file mode 100644 index 7d7b2264b..000000000 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLSpaceToDepth.cpp +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/CL/functions/CLSpaceToDepth.h" - -#include "arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h" - -using namespace arm_compute; - -void CLSpaceToDepth::configure(ICLTensor *input, ICLTensor *output, const int32_t block_size) -{ - auto k = arm_compute::support::cpp14::make_unique<CLSpaceToDepthKernel>(); - k->configure(input, output, block_size); - _kernel = std::move(k); -} diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp index e61746ef2..3215d01a7 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp @@ -15,7 +15,7 @@ */ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -37,218 +37,124 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ - #include "arm_compute/runtime/CL/functions/CLTransposeConvLayer.h" -#include "arm_compute/core/utils/misc/ShapeCalculatorEx.h" -#include "arm_compute/core/Helpers.h" #include "arm_compute/core/Utils.h" -#include "arm_compute/core/UtilsEx.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" -#include "arm_compute/runtime/CPP/CPPScheduler.h" +#include <cmath> #include <memory> #include <tuple> using namespace arm_compute; using namespace arm_compute::misc::shape_calculator; -CLTransposeConvLayer::CLTransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT - : _memory_group(std::move(memory_manager)), - _scale_f(), - _conv_f(), - _flip_weights(), - _scaled_output(), - _original_weights(nullptr), - _weights_flipped(), - _is_prepared(false) +CLTransposeConvLayer::CLTransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager) + : _memory_manager(std::move(memory_manager)), _function() +{ +} + +void CLTransposeConvLayer::configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, + ICLTensor *output, const PadStrideInfo &deconv_info, + unsigned int invalid_right, unsigned int invalid_bottom, + const WeightsInfo &weights_info) { + configure(CLKernelLibrary::get().get_compile_context(), input, weights, bias, output, deconv_info, + invalid_right, invalid_bottom, weights_info); +} + +void CLTransposeConvLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, + ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, + const PadStrideInfo &deconv_info, unsigned int invalid_right, + unsigned int invalid_bottom, const WeightsInfo &weights_info) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); + + switch (CLTransposeConvLayer::get_deconvolution_method(input->info(), weights->info(), nullptr, + output->info(), deconv_info, invalid_right, + invalid_bottom, weights_info)) + { + case DeconvolutionMethod::DIRECT: + { + auto f = arm_compute::support::cpp14::make_unique<CLDirectTransposeConvLayer>(); + f->configure(compile_context, input, weights, bias, output, deconv_info, invalid_right, + invalid_bottom, weights_info); + _function = std::move(f); + break; + } + case DeconvolutionMethod::GEMM: + { + auto f = arm_compute::support::cpp14::make_unique<CLGEMMDeconvolutionLayer>(_memory_manager); + f->configure(compile_context, input, weights, bias, output, deconv_info); + _function = std::move(f); + break; + } + default: + ARM_COMPUTE_ERROR("Not supported."); + break; + } } Status CLTransposeConvLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, ITensorInfo *output, - const PadStrideInfo &info, unsigned int invalid_right, + const PadStrideInfo &deconv_info, unsigned int invalid_right, unsigned int invalid_bottom, const WeightsInfo &weights_info) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, - DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, weights); - - const DataLayout data_layout = input->data_layout(); - - const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); - const size_t idx_c = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); - - ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) != weights->dimension(idx_h)); - ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) < 1); - - const unsigned int kernel_x = weights->dimension(idx_w); - const unsigned int kernel_y = weights->dimension(idx_h); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(invalid_right > kernel_x - 1, - "invalid_right must be smaller than kernel_x"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(invalid_bottom > kernel_y - 1, - "inner_border_top must be smaller than kernel_y"); - - // NOTE From the existing CLDeconvolutionLayer, invalid_right and invalid_bottom were added. - auto out_dims = transposeconv_output_dimensions( - input->dimension(idx_w), input->dimension(idx_h), weights->dimension(idx_w), - weights->dimension(idx_h), info, invalid_right, invalid_bottom); - - const TensorShape output_shape = compute_transposeconv_output_shape(out_dims, *input, *weights); - - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output, weights); - - if (bias != nullptr) + switch (CLTransposeConvLayer::get_deconvolution_method( + input, weights, bias, output, deconv_info, invalid_right, invalid_bottom, weights_info)) { - if (is_data_type_quantized_asymmetric(input->data_type())) + case DeconvolutionMethod::DIRECT: { - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bias, 1, DataType::S32); + // Validate direct convolution layer + ARM_COMPUTE_RETURN_ON_ERROR(CLDirectTransposeConvLayer::validate( + input, weights, bias, output, deconv_info, invalid_right, invalid_bottom, weights_info)); + break; } - else + case DeconvolutionMethod::GEMM: { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, bias); + // Validate gemm-based convolution layer + ARM_COMPUTE_RETURN_ON_ERROR( + CLGEMMDeconvolutionLayer::validate(input, weights, bias, output, deconv_info)); + break; } - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, bias); + default: + ARM_COMPUTE_ERROR("Not supported."); + break; } - ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(idx_w) != output_shape[idx_w], - "Output's width is invalid."); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(idx_h) != output_shape[idx_h], - "Output's height is invalid."); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(idx_c) != output_shape[idx_c], - "Output's depth is invalid."); - - unsigned int pad_left = 0; - unsigned int pad_right = 0; - unsigned int pad_top = 0; - unsigned int pad_bottom = 0; - const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape( - *input, *weights, info, out_dims, invalid_right, invalid_bottom, pad_left, pad_right, pad_top, - pad_bottom); - TensorInfo scale_out_info(input->clone() - ->set_is_resizable(true) - .reset_padding() - .set_tensor_shape(scale_out_shape) - .set_data_layout(data_layout)); - const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL); - - ARM_COMPUTE_RETURN_ON_ERROR( - CLTransposeConvLayerUpsample::validate(input, &scale_out_info, BorderSize(0, 0), info)); - ARM_COMPUTE_RETURN_ON_ERROR(CLConvolutionLayer::validate(&scale_out_info, weights, bias, output, - conv_info, weights_info)); - return Status{}; } -void CLTransposeConvLayer::configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, - ICLTensor *output, const PadStrideInfo &info, - unsigned int invalid_right, unsigned int invalid_bottom, - const WeightsInfo &weights_info) +DeconvolutionMethod CLTransposeConvLayer::get_deconvolution_method( + const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, + ITensorInfo *output, const PadStrideInfo &deconv_info, unsigned int invalid_right, + unsigned int invalid_bottom, const WeightsInfo &weights_info) { - ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); - - const unsigned int stride_x = info.stride().first; - const unsigned int stride_y = info.stride().second; + ARM_COMPUTE_UNUSED(output, bias, weights_info); - const DataLayout data_layout = input->info()->data_layout(); + const DataLayout data_layout = input->data_layout(); const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); - _original_weights = weights; - _weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout)); - _flip_weights.configure(weights, &_weights_flipped); - - // NOTE From the existing CLDeconvolutionLayer, invalid_right and invalid_bottom were - // added. - auto out_dims = transposeconv_output_dimensions( - input->info()->dimension(idx_w), input->info()->dimension(idx_h), - weights->info()->dimension(idx_w), weights->info()->dimension(idx_h), info, invalid_right, - invalid_bottom); - - const TensorShape output_shape = - compute_transposeconv_output_shape(out_dims, *input->info(), *weights->info()); - - // Output auto initialization if not yet initialized - auto_init_if_empty( - *output->info(), - input->info()->clone()->set_tensor_shape(output_shape).set_data_layout(data_layout)); - - // Perform validation step - ARM_COMPUTE_ERROR_THROW_ON(CLTransposeConvLayer::validate( - input->info(), weights->info(), bias == nullptr ? nullptr : bias->info(), output->info(), - info, invalid_right, invalid_bottom)); - - _is_prepared = weights_info.retain_internal_weights(); - - _memory_group.manage(&_scaled_output); - - // Find the upsampled dimensions and the padding needed for the convolution with stride 1 in order - // to match output shape - unsigned int pad_left = 0; - unsigned int pad_right = 0; - unsigned int pad_top = 0; - unsigned int pad_bottom = 0; - const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape( - *input->info(), *weights->info(), info, out_dims, invalid_right, invalid_bottom, pad_left, - pad_right, pad_top, pad_bottom); - - TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(), - input->info()->quantization_info()); - scale_out_info.set_data_layout(data_layout); - _scaled_output.allocator()->init(scale_out_info); - - // configure scale function - const PadStrideInfo upsample_info(stride_x, stride_y, pad_left, pad_right, pad_top, pad_bottom, - DimensionRoundingType::FLOOR); - _scale_f.configure(input, &_scaled_output, BorderSize(0, 0), upsample_info); - - // setup the function to convolve the upscaled output - const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL); - _conv_f.configure(&_scaled_output, &_weights_flipped, bias, output, conv_info, weights_info); - _scaled_output.allocator()->allocate(); + if (weights->dimension(idx_w) != deconv_info.stride().first || + weights->dimension(idx_h) != deconv_info.stride().second || invalid_right != 0 || + invalid_bottom != 0) + { + return DeconvolutionMethod::DIRECT; + } + + return DeconvolutionMethod::GEMM; } void CLTransposeConvLayer::run() { prepare(); - - _memory_group.acquire(); - - _scale_f.run(); - _conv_f.run(); - - _memory_group.release(); + _function->run(); } -void CLTransposeConvLayer::prepare() -{ - if (!_is_prepared) - { - ARM_COMPUTE_ERROR_ON(!_original_weights->is_used()); - - // Run weights flipping and mark original weights tensor as unused - _weights_flipped.allocator()->allocate(); - _weights_flipped.map(true); - _original_weights->map(CLScheduler::get().queue(), true); - CPPScheduler::get().schedule(&_flip_weights, Window::DimZ); - _weights_flipped.unmap(); - _original_weights->unmap(CLScheduler::get().queue()); - _original_weights->mark_as_unused(); - - // Prepare convolution - _conv_f.prepare(); - - if (!_weights_flipped.is_used()) - { - _weights_flipped.allocator()->free(); - } - - _is_prepared = true; - } -} +void CLTransposeConvLayer::prepare() { _function->prepare(); } diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayerUpsample.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayerUpsample.cpp deleted file mode 100644 index 07feb5a64..000000000 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayerUpsample.cpp +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h" - -#include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "arm_compute/core/CL/ICLTensor.h" - -#include <cmath> -#include <memory> -#include <tuple> - -using namespace arm_compute; - -CLTransposeConvLayerUpsample::CLTransposeConvLayerUpsample() // NOLINT - : _upsample(), - _output(nullptr) -{ -} - -Status CLTransposeConvLayerUpsample::validate(const ITensorInfo *input, const ITensorInfo *output, - const BorderSize &inner_border, - const PadStrideInfo &info) -{ - return CLTransposeConvLayerUpsampleKernel::validate(input, output, inner_border, info); -} - -void CLTransposeConvLayerUpsample::configure(ICLTensor *input, ICLTensor *output, - const BorderSize &inner_border, - const PadStrideInfo &info) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - - _output = output; - _upsample.configure(input, _output, inner_border, info); -} - -void CLTransposeConvLayerUpsample::run() -{ - _output->map(CLScheduler::get().queue(), true); - if (is_data_type_quantized_asymmetric(_output->info()->data_type())) - { - const uint8_t quantized_zero = _output->info()->quantization_info().uniform().offset; - std::fill_n(_output->buffer(), _output->info()->total_size(), quantized_zero); - } - else - { - memset(_output->buffer(), 0, _output->info()->total_size()); - } - _output->unmap(CLScheduler::get().queue()); - - CLScheduler::get().enqueue(_upsample, false); -} diff --git a/compute/ARMComputeEx/src/runtime/CPP/functions/CPPOneHotEx.cpp b/compute/ARMComputeEx/src/runtime/CPP/functions/CPPOneHotEx.cpp index 114e1a72d..768c15b41 100644 --- a/compute/ARMComputeEx/src/runtime/CPP/functions/CPPOneHotEx.cpp +++ b/compute/ARMComputeEx/src/runtime/CPP/functions/CPPOneHotEx.cpp @@ -41,14 +41,14 @@ #include "arm_compute/runtime/CPP/functions/CPPOneHotEx.h" #include "arm_compute/core/CPP/kernels/CPPOneHotKernelEx.h" -#include "support/ToolchainSupport.h" +#include "support/MemorySupport.h" using namespace arm_compute; void CPPOneHotEx::configure(const ITensor *indices, const ITensor *depth, const ITensor *on_value, const ITensor *off_value, ITensor *output, const int axis) { - auto k = arm_compute::support::cpp14::make_unique<CPPOneHotKernelEx>(); + auto k = support::cpp14::make_unique<CPPOneHotKernelEx>(); k->configure(indices, depth, on_value, off_value, output, axis); _kernel = std::move(k); } diff --git a/compute/ARMComputeEx/src/runtime/CPP/functions/CPPUpsampleEx.cpp b/compute/ARMComputeEx/src/runtime/CPP/functions/CPPUpsampleEx.cpp deleted file mode 100644 index 6c90ef3b4..000000000 --- a/compute/ARMComputeEx/src/runtime/CPP/functions/CPPUpsampleEx.cpp +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/CPP/functions/CPPUpsampleEx.h" - -#include "arm_compute/core/CPP/kernels/CPPUpsampleKernelEx.h" -#include "support/ToolchainSupport.h" - -using namespace arm_compute; - -void CPPUpsampleEx::configure(const ITensor *input, ITensor *output, const PadStrideInfo &info) -{ - auto k = arm_compute::support::cpp14::make_unique<CPPUpsampleKernelEx>(); - k->configure(input, output, info); - _kernel = std::move(k); -} diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEActivationLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEActivationLayerEx.cpp index ff81ff854..2752eb6aa 100644 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEActivationLayerEx.cpp +++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEActivationLayerEx.cpp @@ -42,7 +42,7 @@ #include "arm_compute/core/NEON/kernels/NEActivationLayerKernelEx.h" #include "arm_compute/runtime/IRuntimeContext.h" -#include "support/ToolchainSupport.h" +#include "support/MemorySupport.h" namespace arm_compute { @@ -53,7 +53,7 @@ NEActivationLayerEx::NEActivationLayerEx(IRuntimeContext *ctx) // NOLINT void NEActivationLayerEx::configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info) { - auto k = arm_compute::support::cpp14::make_unique<NEActivationLayerKernelEx>(); + auto k = support::cpp14::make_unique<NEActivationLayerKernelEx>(); k->configure(input, output, activation_info); _kernel = std::move(k); } diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp index e42c453cf..2fc94b267 100644 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp +++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp @@ -42,7 +42,7 @@ #include <arm_compute/core/NEON/kernels/NEBinaryLogicalOperationKernel.h> #include "arm_compute/core/ITensor.h" -#include "support/ToolchainSupport.h" +#include "support/MemorySupport.h" #include <utility> @@ -53,7 +53,7 @@ template <BinaryLogicalOperation COP> void NEBinaryLogicalOperationStatic<COP>::configure(ITensor *input1, ITensor *input2, ITensor *output) { - auto k = arm_compute::support::cpp14::make_unique<NEBinaryLogicalOperationKernel>(); + auto k = support::cpp14::make_unique<NEBinaryLogicalOperationKernel>(); k->configure(COP, input1, input2, output); _kernel = std::move(k); } @@ -69,7 +69,7 @@ Status NEBinaryLogicalOperationStatic<COP>::validate(const ITensorInfo *input1, void NEBinaryLogicalOperation::configure(ITensor *input1, ITensor *input2, ITensor *output, BinaryLogicalOperation op) { - auto k = arm_compute::support::cpp14::make_unique<NEBinaryLogicalOperationKernel>(); + auto k = support::cpp14::make_unique<NEBinaryLogicalOperationKernel>(); k->configure(op, input1, input2, output); _kernel = std::move(k); } diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NECast.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NECast.cpp deleted file mode 100644 index dc5c62061..000000000 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NECast.cpp +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/NEON/functions/NECast.h" - -#include "arm_compute/core/NEON/kernels/NECastKernel.h" -#include "support/ToolchainSupport.h" - -namespace arm_compute -{ -void NECast::configure(const ITensor *input, ITensor *output, SubDataType input_subtype) -{ - auto k = arm_compute::support::cpp14::make_unique<NECastKernel>(); - k->configure(input, output, input_subtype); - _kernel = std::move(k); -} - -Status NECast::validate(const ITensorInfo *input, const ITensorInfo *output, - SubDataType input_subtype) -{ - return NECastKernel::validate(input, output, input_subtype); -} -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEDepthToSpaceLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEDepthToSpaceLayerEx.cpp deleted file mode 100644 index 5ec0b8677..000000000 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEDepthToSpaceLayerEx.cpp +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/NEON/functions/NEDepthToSpaceLayerEx.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernelEx.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" - -namespace arm_compute -{ -void NEDepthToSpaceLayerEx::configure(const ITensor *input, ITensor *output, int32_t block_shape) -{ - auto k = arm_compute::support::cpp14::make_unique<NEDepthToSpaceLayerKernelEx>(); - k->configure(input, output, block_shape); - _kernel = std::move(k); -} - -Status NEDepthToSpaceLayerEx::validate(const ITensorInfo *input, const ITensorInfo *output, - int32_t block_shape) -{ - return NEDepthToSpaceLayerKernelEx::validate(input, output, block_shape); -} -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp index 53fb15081..e0ab3e025 100644 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp +++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp @@ -41,13 +41,13 @@ #include "arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h" #include "arm_compute/core/NEON/kernels/NEEmbeddingLookupKernel.h" -#include "support/ToolchainSupport.h" +#include "support/MemorySupport.h" using namespace arm_compute; void NEEmbeddingLookup::configure(const ITensor *input, ITensor *output, const ITensor *lookups) { - auto k = arm_compute::support::cpp14::make_unique<NEEmbeddingLookupKernel>(); + auto k = support::cpp14::make_unique<NEEmbeddingLookupKernel>(); k->configure(input, output, lookups); _kernel = std::move(k); } diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp index f45773251..a123439d9 100644 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp +++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp @@ -58,7 +58,7 @@ namespace Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const ITensorInfo &output) { ARM_COMPUTE_RETURN_ON_ERROR( - NEGEMMLowpMatrixMultiplyCoreEx::validate(&input, &weights, nullptr, &output)); + NEGEMMLowpMatrixMultiplyCore::validate(&input, &weights, nullptr, &output)); return Status{}; } @@ -66,7 +66,7 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I void NEFullyConnectedHybridLayerReshapeWeights::configure(const ITensor *input, ITensor *output) { - auto k = arm_compute::support::cpp14::make_unique<NETransposeKernel>(); + auto k = support::cpp14::make_unique<NETransposeKernel>(); k->configure(input, output); _kernel = std::move(k); } @@ -158,7 +158,8 @@ void NEFullyConnectedHybridLayer::configure(const ITensor *input, const ITensor // Quantize input _quantized_input.allocator()->init( - input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S8)); + input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type( + DataType::QASYMM8_SIGNED)); _scale_factor.allocator()->init( TensorInfo(TensorShape{output->info()->dimension(1)}, 1, DataType::F32)); _quant_input_kernel.configure(input, &_quantized_input, &_scale_factor); @@ -186,7 +187,7 @@ Status NEFullyConnectedHybridLayer::validate(const ITensorInfo *input, const ITe ARM_COMPUTE_UNUSED(fc_info.retain_internal_weights); ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::S8); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QASYMM8_SIGNED); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 2); ARM_COMPUTE_RETURN_ERROR_ON(output->num_dimensions() > 2); @@ -224,8 +225,9 @@ Status NEFullyConnectedHybridLayer::validate(const ITensorInfo *input, const ITe ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) != weights_to_use->dimension(1)); // Validate quantization kernel - const ITensorInfo &quantized_input = TensorInfo( - input->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S8)); + const ITensorInfo &quantized_input = + TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_data_type( + DataType::QASYMM8_SIGNED)); const ITensorInfo &scale_factor = TensorInfo(TensorShape{output->dimension(1)}, 1, DataType::F32); ARM_COMPUTE_RETURN_ON_ERROR( NEQuantizationSymmetricKernel::validate(input, &quantized_input, &scale_factor)); diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedReshapingLayer.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedReshapingLayer.cpp index fcac3c7ae..dc6c78478 100644 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedReshapingLayer.cpp +++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedReshapingLayer.cpp @@ -56,12 +56,17 @@ void NEFullyConnectedReshapingLayer::configure(const arm_compute::ITensor *input assert(kernel_type == KernelType::PREPROCESSED_WEIGHTS); bool is_hybrid = input->info()->data_type() == DataType::F32 && - weights->info()->data_type() == DataType::S8; + (weights->info()->data_type() == DataType::S8 || + weights->info()->data_type() == DataType::QASYMM8_SIGNED); if (is_hybrid) { auto fc = new arm_compute::NEFullyConnectedHybridLayer{_memory_manager}; + ITensorInfo *weights_info = const_cast<ITensorInfo *>(_weights->info()); + const auto orgin_weights_data_type = weights_info->data_type(); + weights_info->set_data_type(DataType::QASYMM8_SIGNED); fc->configure(input_to_use, _weights, _biases, _output); + weights_info->set_data_type(orgin_weights_data_type); return std::unique_ptr<arm_compute::IFunction>(fc); } else diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.cpp deleted file mode 100644 index 1290cfd39..000000000 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.cpp +++ /dev/null @@ -1,513 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" -#include "arm_compute/runtime/TensorAllocator.h" -#include "support/ToolchainSupport.h" - -using namespace arm_compute; -using namespace arm_compute::misc::shape_calculator; - -NEGEMMLowpMatrixMultiplyCoreEx::NEGEMMLowpMatrixMultiplyCoreEx( - std::shared_ptr<IMemoryManager> memory_manager) - : _memory_group(memory_manager), _asm_glue(memory_manager), _mm_kernel(nullptr), - _mtx_a_reshape_kernel(nullptr), _mtx_b_reshape_kernel(nullptr), _mtx_a_reduction_kernel(), - _mtx_b_reduction_kernel(), _offset_contribution_kernel(), - _offset_contribution_output_stage_kernel(), _vector_sum_col(), _vector_sum_row(), _tmp_a(), - _tmp_b(), _mm_result_s32(), _signed_a(), _signed_output(), _original_b(nullptr), _a_offset(0), - _b_offset(0), _run_vector_matrix_multiplication(false), _assembly_path(false), - _fused_assembly_path(false), _reshape_b_only_on_first_run(false), _is_prepared(false), - _fuse_output_stage(false), _flip_signedness(false) -{ -} - -void NEGEMMLowpMatrixMultiplyCoreEx::configure(const ITensor *a, const ITensor *b, const ITensor *c, - ITensor *output, const GEMMInfo &gemm_info) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(a, b, output); - ARM_COMPUTE_UNUSED(c); - ARM_COMPUTE_ERROR_THROW_ON(NEGEMMLowpMatrixMultiplyCoreEx::validate( - a->info(), b->info(), c != nullptr ? c->info() : nullptr, output->info(), gemm_info)); - - const ITensor *matrix_a = a; - const ITensor *matrix_b = b; - GEMMInfo info = gemm_info; - - // Clear state - _mtx_a_reshape_kernel = nullptr; - _mtx_b_reshape_kernel = nullptr; - - // Set internal variables - _a_offset = a->info()->quantization_info().uniform().offset; - _b_offset = b->info()->quantization_info().uniform().offset; - _run_vector_matrix_multiplication = a->info()->dimension(1) < 2; - _reshape_b_only_on_first_run = info.reshape_b_only_on_first_run(); - _is_prepared = false; - _fused_assembly_path = false; - _original_b = b; - - const ITensor *a_to_use = a; - - // If GEMMLowpOutputStage != NONE, fuse the offset contribution with the output stage - if (info.gemmlowp_output_stage().type != GEMMLowpOutputStageType::NONE) - { - _fuse_output_stage = true; - _memory_group.manage(&_mm_result_s32); - TensorInfo info_mm_result_s32(output->info()->tensor_shape(), 1, DataType::S32); - _mm_result_s32.allocator()->init(info_mm_result_s32); - } - -#ifdef __aarch64__ - switch (a->info()->data_type()) - { - case DataType::QASYMM8: - case DataType::QASYMM8_SIGNED: - case DataType::U8: - case DataType::S8: - { - if (a_to_use->info()->data_type() == DataType::QASYMM8 && - info.gemmlowp_output_stage().type == GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT) - { - _asm_glue.configure(a_to_use, b, c, output, gemm_info); - _fused_assembly_path = _asm_glue.is_configured(); - } - else - { - _asm_glue.configure(a_to_use, b, nullptr, _fuse_output_stage ? &_mm_result_s32 : output, - gemm_info); - } - _assembly_path = _asm_glue.is_configured(); - break; - } - default: - { - ARM_COMPUTE_ERROR("Datatype not supported"); - break; - } - } -#endif /* __aarch64__ */ - if (!(_assembly_path || _run_vector_matrix_multiplication)) - { - matrix_a = &_tmp_a; - matrix_b = &_tmp_b; - - // The interleaved output matrix will have the following shape: [ a_height * 4, ceil(a_width / - // 4.0f) ] - TensorInfo a_info(compute_interleaved_shape(*a_to_use->info()), 1, - a_to_use->info()->data_type(), a_to_use->info()->quantization_info()); - // The transpose1xW output matrix will have the following shape: [ b_height * 16, ceil(b_width / - // 16.0f) ] - TensorInfo b_info(compute_transpose1xW_shape(*b->info()), 1, b->info()->data_type(), - b->info()->quantization_info()); - _tmp_a.allocator()->init(a_info); - _tmp_b.allocator()->init(b_info); - _memory_group.manage(&_tmp_a); - if (!_reshape_b_only_on_first_run) - { - _memory_group.manage(&_tmp_b); - } - - // Configure interleave kernel - { - auto k = arm_compute::support::cpp14::make_unique<NEGEMMInterleave4x4Kernel>(); - k->configure(a_to_use, &_tmp_a); - _mtx_a_reshape_kernel = std::move(k); - } - - // Configure transpose kernel - { - auto k = arm_compute::support::cpp14::make_unique<NEGEMMTranspose1xWKernel>(); - k->configure(b, &_tmp_b); - _mtx_b_reshape_kernel = std::move(k); - } - } - - if (!_fused_assembly_path) - { - // Initialize matrix B reduction kernel only if _a_offset is not equal to 0 - if (_a_offset != 0) - { - TensorInfo info_vector_sum_col(compute_reductionA_shape(*b->info()), 1, DataType::S32); - - _vector_sum_col.allocator()->init(info_vector_sum_col); - if (!_reshape_b_only_on_first_run) - { - _memory_group.manage(&_vector_sum_col); - } - - // Configure Matrix B reduction kernel - _mtx_b_reduction_kernel.configure(b, &_vector_sum_col, a_to_use->info()->dimension(0), false); - } - - // Initialize Matrix A reduction kernel only if _b_offset is not equal to 0 - if (_b_offset != 0) - { - TensorInfo info_vector_sum_row(compute_reductionB_shape(*a_to_use->info()), 1, DataType::S32); - - _vector_sum_row.allocator()->init(info_vector_sum_row); - _memory_group.manage(&_vector_sum_row); - - // Configure matrix A reduction kernel - _mtx_a_reduction_kernel.configure(a_to_use, &_vector_sum_row, a_to_use->info()->dimension(0), - false); - } - - if (_fuse_output_stage) - { - // Configure matrix multiply kernel - if (!_assembly_path) - { - auto k = arm_compute::support::cpp14::make_unique<NEGEMMLowpMatrixMultiplyKernel>(); - k->configure(matrix_a, matrix_b, &_mm_result_s32); - _mm_kernel = std::move(k); - } - - _offset_contribution_output_stage_kernel.configure( - &_mm_result_s32, _a_offset == 0 ? nullptr : &_vector_sum_col, - _b_offset == 0 ? nullptr : &_vector_sum_row, c, - _flip_signedness ? &_signed_output : output, a->info()->dimension(0), _a_offset, - _b_offset, info.gemmlowp_output_stage()); - } - else - { - // Configure matrix multiply kernel - if (!_assembly_path) - { - auto k = arm_compute::support::cpp14::make_unique<NEGEMMLowpMatrixMultiplyKernel>(); - k->configure(matrix_a, matrix_b, output); - _mm_kernel = std::move(k); - } - // Configure offset contribution kernel - _offset_contribution_kernel.configure(output, _a_offset == 0 ? nullptr : &_vector_sum_col, - _b_offset == 0 ? nullptr : &_vector_sum_row, - a_to_use->info()->dimension(0), _a_offset, _b_offset); - } - } - - // Allocate tensors - if (!_assembly_path && !_run_vector_matrix_multiplication) - { - _tmp_a.allocator()->allocate(); - if (!_reshape_b_only_on_first_run) - { - _tmp_b.allocator()->allocate(); - } - } - - if (!_fused_assembly_path) - { - if (_a_offset != 0 && !_reshape_b_only_on_first_run) - { - _vector_sum_col.allocator()->allocate(); - } - - if (_b_offset != 0) - { - _vector_sum_row.allocator()->allocate(); - } - } - - if (_fuse_output_stage) - { - _mm_result_s32.allocator()->allocate(); - } -} - -Status NEGEMMLowpMatrixMultiplyCoreEx::validate(const ITensorInfo *a, const ITensorInfo *b, - const ITensorInfo *c, const ITensorInfo *output, - const GEMMInfo &gemm_info) -{ - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(a, 1, DataType::S8); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(b, 1, DataType::S8); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::S32); - ARM_COMPUTE_RETURN_ERROR_ON_MSG( - c != nullptr && gemm_info.gemmlowp_output_stage().type == GEMMLowpOutputStageType::NONE, - "Bias addition not supported in NEGEMMLowpMatrixMultiplyCoreEx for output S32"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG((a)->dimension(0) != (b)->dimension(1), - "The product AB is defined only if the number of columns in A is " - "equal to the number of rows in B"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_a_reshaped(), - "Matrix A already reshaped is not supported"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_b_reshaped(), - "Matrix B already reshaped is not supported"); - - GEMMInfo info = gemm_info; - const ITensorInfo *matrix_a_info = a; - const ITensorInfo *matrix_b_info = b; - - const ITensorInfo *a_to_use = a; - - TensorInfo tmp_a_info{}; - TensorInfo tmp_b_info{}; - TensorInfo mm_result_s32_info{}; - - int32_t a_offset = a->quantization_info().uniform().offset; - int32_t b_offset = b->quantization_info().uniform().offset; - - bool fuse_output_stage = info.gemmlowp_output_stage().type != GEMMLowpOutputStageType::NONE; - if (fuse_output_stage) - { - auto_init_if_empty( - mm_result_s32_info, - a->clone()->set_tensor_shape(output->tensor_shape()).set_data_type(DataType::S32)); - } - - // Check if we need to run the optimized assembly kernel - bool run_optimised = false; - bool run_optimised_requantized = false; - if (a_to_use->data_type() == DataType::QASYMM8 && - info.gemmlowp_output_stage().type == GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT) - { - run_optimised = bool(NEGEMMAssemblyDispatch::validate(a_to_use, b, c, output, gemm_info)); - run_optimised_requantized = run_optimised; - } - else - { - run_optimised = bool(NEGEMMAssemblyDispatch::validate( - a_to_use, b, c, fuse_output_stage ? &mm_result_s32_info : output, gemm_info)); - } - - if (run_optimised) - { - ARM_COMPUTE_RETURN_ERROR_ON(b->dimension(0) != output->dimension(0)); - if (info.depth_output_gemm3d() != 0) - { - if (info.reinterpret_input_as_3d()) - { - ARM_COMPUTE_RETURN_ERROR_ON(a->dimension(1) != output->dimension(1)); - ARM_COMPUTE_RETURN_ERROR_ON(a->dimension(2) != output->dimension(2)); - } - else - { - ARM_COMPUTE_RETURN_ERROR_ON(a->dimension(1) != output->dimension(1) * output->dimension(2)); - } - } - else - { - ARM_COMPUTE_RETURN_ERROR_ON(a->dimension(1) != output->dimension(1)); - } - } - else - { - ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.reinterpret_input_as_3d(), - "NEGEMM cannot reinterpret the input tensor as 3D"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.depth_output_gemm3d() != 0, - "NEGEMM cannot reinterpret the output tensor as 3D"); - - const bool run_vector_matrix_multiplication = a->dimension(1) < 2; - if (!run_vector_matrix_multiplication) - { - matrix_a_info = &tmp_a_info; - matrix_b_info = &tmp_b_info; - - // The interleaved output matrix will have the following shape: [ a_height * 4, ceil(a_width / - // 4.0f) ] - TensorShape shape_tmp_a = a->tensor_shape(); - shape_tmp_a.set(0, a->dimension(0) * 4); - shape_tmp_a.set(1, std::ceil(a->dimension(1) / 4.f)); - - // The transpose1xW output matrix will have the following shape: [ b_height * 16, ceil(b_width - // / 16.0f) ] - TensorShape shape_tmp_b = b->tensor_shape(); - shape_tmp_b.set(0, b->dimension(1) * 16); - shape_tmp_b.set(1, std::ceil(b->dimension(0) / 16.f)); - - // Validate interleave kernel - auto_init_if_empty(tmp_a_info, a_to_use->clone()->set_tensor_shape(shape_tmp_a)); - auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(shape_tmp_b)); - - ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMInterleave4x4Kernel::validate(a_to_use, &tmp_a_info)); - ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMTranspose1xWKernel::validate(b, &tmp_b_info)); - } - } - - if (!run_optimised_requantized) - { - TensorInfo info_vector_sum_col{}; - TensorInfo info_vector_sum_row{}; - - // Validate matrix B reduction kernel only if _a_offset is not equal to 0 - if (a_offset != 0) - { - info_vector_sum_col = TensorInfo(compute_reductionA_shape(*b), 1, DataType::S32); - - // Configure Matrix B reduction kernel - ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixBReductionKernel::validate( - b, &info_vector_sum_col, a->dimension(0), false)); - } - - // Validate Matrix A reduction kernel only if _b_offset is not equal to 0 - if (b_offset != 0) - { - info_vector_sum_row = TensorInfo(compute_reductionB_shape(*a), 1, DataType::S32); - - // Configure matrix A reduction kernel - ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixAReductionKernel::validate( - a_to_use, &info_vector_sum_row, a->dimension(0), false)); - } - - if (fuse_output_stage) - { - if (!run_optimised) - { - ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixMultiplyKernel::validate( - matrix_a_info, matrix_b_info, &mm_result_s32_info)); - } - - // Validate offset contribution kernel - ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpOffsetContributionOutputStageKernel::validate( - &mm_result_s32_info, a_offset == 0 ? nullptr : &info_vector_sum_col, - b_offset == 0 ? nullptr : &info_vector_sum_row, c, output, a_offset, b_offset, - info.gemmlowp_output_stage())); - } - else - { - if (!run_optimised) - { - ARM_COMPUTE_RETURN_ON_ERROR( - NEGEMMLowpMatrixMultiplyKernel::validate(matrix_a_info, matrix_b_info, output)); - } - // Validate offset contribution kernel - ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpOffsetContributionKernel::validate( - output, a_offset == 0 ? nullptr : &info_vector_sum_col, - b_offset == 0 ? nullptr : &info_vector_sum_row, a_offset, b_offset)); - } - } - return Status{}; -} - -void NEGEMMLowpMatrixMultiplyCoreEx::run() -{ - prepare(); - - MemoryGroupResourceScope scope_mg(_memory_group); - - // Reshape inputs - if (_mtx_a_reshape_kernel) - { - NEScheduler::get().schedule(_mtx_a_reshape_kernel.get(), Window::DimY); - } - if (_mtx_b_reshape_kernel && !_reshape_b_only_on_first_run) - { - NEScheduler::get().schedule(_mtx_b_reshape_kernel.get(), Window::DimY); - } - - // Run GEMM - if (_asm_glue.is_configured()) - { - _asm_glue.run(); - } - else - { - NEScheduler::get().schedule(_mm_kernel.get(), Window::DimY); - } - - if (!_fused_assembly_path) - { - // Run matrix A reduction kernel only if _b_offset is not equal to 0 - if (_b_offset != 0) - { - NEScheduler::get().schedule(&_mtx_a_reduction_kernel, Window::DimX); - } - - // Run matrix B reduction kernel only if _a_offset is not equal to 0 - if (_a_offset != 0 && !_reshape_b_only_on_first_run) - { - NEScheduler::get().schedule(&_mtx_b_reduction_kernel, Window::DimX); - } - - if (_fuse_output_stage) - { - // Run offset contribution kernel - NEScheduler::get().schedule(&_offset_contribution_output_stage_kernel, Window::DimY); - } - else - { - // Run offset contribution kernel - NEScheduler::get().schedule(&_offset_contribution_kernel, Window::DimY); - } - } -} - -void NEGEMMLowpMatrixMultiplyCoreEx::prepare() -{ - if (!_is_prepared) - { - // Run assembly reshape - if (_asm_glue.is_configured() && _reshape_b_only_on_first_run) - { - ARM_COMPUTE_ERROR_ON(!_original_b->is_used()); - - _asm_glue.prepare(); - _original_b->mark_as_unused(); - } - // Run non-assembly reshape - else if (_mtx_b_reshape_kernel && _reshape_b_only_on_first_run) - { - ARM_COMPUTE_ERROR_ON(!_original_b->is_used()); - - // Run reshape kernel and mark original weights tensor as unused - _tmp_b.allocator()->allocate(); - NEScheduler::get().schedule(_mtx_b_reshape_kernel.get(), Window::DimY); - _original_b->mark_as_unused(); - } - - // Run matrix B reduction kernel only if _a_offset is not equal to 0 - if (_a_offset != 0 && _reshape_b_only_on_first_run) - { - _vector_sum_col.allocator()->allocate(); - NEScheduler::get().schedule(&_mtx_b_reduction_kernel, Window::DimX); - } - - _is_prepared = true; - } -} diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp index c8bb88aea..433c35d58 100644 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp +++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp @@ -41,7 +41,7 @@ #include "arm_compute/runtime/NEON/functions/NEGatherEx.h" #include "arm_compute/core/NEON/kernels/NEGatherKernelEx.h" -#include "support/ToolchainSupport.h" +#include "support/MemorySupport.h" #include <utility> @@ -49,7 +49,7 @@ namespace arm_compute { void NEGatherEx::configure(const ITensor *input, const ITensor *indices, ITensor *output, int axis) { - auto k = arm_compute::support::cpp14::make_unique<NEGatherKernelEx>(); + auto k = support::cpp14::make_unique<NEGatherKernelEx>(); k->configure(input, indices, output, axis); _kernel = std::move(k); } diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEHashtableLookup.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEHashtableLookup.cpp index 078019f4e..52d58accf 100644 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEHashtableLookup.cpp +++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEHashtableLookup.cpp @@ -41,14 +41,14 @@ #include "arm_compute/runtime/NEON/functions/NEHashtableLookup.h" #include "arm_compute/core/NEON/kernels/NEHashtableLookupKernel.h" -#include "support/ToolchainSupport.h" +#include "support/MemorySupport.h" using namespace arm_compute; void NEHashtableLookup::configure(const ITensor *lookups, const ITensor *keys, const ITensor *input, ITensor *output, ITensor *hits) { - auto k = arm_compute::support::cpp14::make_unique<NEHashtableLookupKernel>(); + auto k = support::cpp14::make_unique<NEHashtableLookupKernel>(); k->configure(lookups, keys, input, output, hits); _kernel = std::move(k); } diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEPReLU.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEPReLU.cpp deleted file mode 100644 index dac3b849d..000000000 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEPReLU.cpp +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/NEON/functions/NEPReLU.h" - -#include "arm_compute/core/NEON/kernels/NEPReLUKernel.h" -#include "support/ToolchainSupport.h" - -#include <utility> - -using namespace arm_compute; - -void NEPReLU::configure(const ITensor *input, const ITensor *alpha, ITensor *output) -{ - auto k = arm_compute::support::cpp14::make_unique<NEPReLUKernel>(); - k->configure(input, alpha, output); - _kernel = std::move(k); -} diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NERNNLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NERNNLayerEx.cpp deleted file mode 100644 index 0e9a5e969..000000000 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NERNNLayerEx.cpp +++ /dev/null @@ -1,161 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/NEON/functions/NERNNLayerEx.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" - -namespace arm_compute -{ -NERNNLayerEx::NERNNLayerEx(std::shared_ptr<IMemoryManager> memory_manager) - : _memory_group(std::move(memory_manager)), _gemm_state_f(), _add_kernel(), - _activation_kernel(), _fully_connected_kernel(), _copy_kernel(), _fully_connected_out(), - _gemm_output(), _add_output(), _is_prepared(false) -{ -} - -Status NERNNLayerEx::validate(const ITensorInfo *input, const ITensorInfo *weights, - const ITensorInfo *recurrent_weights, const ITensorInfo *bias, - const ITensorInfo *hidden_state, const ITensorInfo *output, - const ActivationLayerInfo &info) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, recurrent_weights, bias, hidden_state, - output); - - const int idx_width = 0; - const int idx_height = 1; - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(idx_width) != weights->dimension(idx_width)); - ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_height) != - recurrent_weights->dimension(idx_width)); - ARM_COMPUTE_RETURN_ERROR_ON(recurrent_weights->dimension(idx_width) != - recurrent_weights->dimension(idx_height)); - ARM_COMPUTE_RETURN_ERROR_ON(bias->num_dimensions() != 1); - ARM_COMPUTE_RETURN_ERROR_ON(bias->dimension(idx_width) != weights->dimension(idx_height)); - ARM_COMPUTE_RETURN_ERROR_ON(hidden_state->dimension(idx_width) != weights->dimension(idx_height)); - ARM_COMPUTE_RETURN_ERROR_ON(hidden_state->dimension(idx_height) != input->dimension(idx_height)); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), - hidden_state->tensor_shape()); - - auto shape_info = TensorInfo(misc::shape_calculator::compute_rnn_shape( - recurrent_weights, hidden_state->dimension(idx_height)), - 1, input->data_type()); - - ARM_COMPUTE_RETURN_ON_ERROR(NEFullyConnectedLayer::validate(input, weights, bias, &shape_info)); - ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAdditionKernel::validate( - &shape_info, &shape_info, &shape_info, ConvertPolicy::SATURATE)); - ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayerKernel::validate(&shape_info, &shape_info, info)); - - return Status{}; -} - -void NERNNLayerEx::configure(const ITensor *input, const ITensor *weights, - const ITensor *recurrent_weights, const ITensor *bias, - ITensor *hidden_state, ITensor *output, ActivationLayerInfo &info) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, recurrent_weights, bias, hidden_state, output); - ARM_COMPUTE_ERROR_THROW_ON(NERNNLayerEx::validate(input->info(), weights->info(), - recurrent_weights->info(), bias->info(), - hidden_state->info(), output->info(), info)); - - const int idx_height = 1; - TensorShape shape = misc::shape_calculator::compute_rnn_shape( - recurrent_weights->info(), hidden_state->info()->dimension(idx_height)); - - _is_prepared = false; - - // Manage intermediate buffers and configure - _fully_connected_out.allocator()->init(TensorInfo(shape, 1, input->info()->data_type())); - _gemm_output.allocator()->init(TensorInfo(shape, 1, input->info()->data_type())); - - // Manage intermediate buffers and configure - _memory_group.manage(&_fully_connected_out); - _fully_connected_kernel.configure(input, weights, bias, &_fully_connected_out); - - _memory_group.manage(&_gemm_output); - _gemm_state_f.configure(hidden_state, recurrent_weights, nullptr, &_gemm_output, 1.f, 0.f); - - _add_output.allocator()->init(TensorInfo(shape, 1, input->info()->data_type())); - _memory_group.manage(&_add_output); - - _add_kernel.configure(&_fully_connected_out, &_gemm_output, &_add_output, - ConvertPolicy::SATURATE); - - _fully_connected_out.allocator()->allocate(); - _gemm_output.allocator()->allocate(); - - _activation_kernel.configure(&_add_output, hidden_state, info); - _add_output.allocator()->allocate(); - - _copy_kernel.configure(hidden_state, output); -} - -void NERNNLayerEx::run() -{ - prepare(); - - MemoryGroupResourceScope scope_mg(_memory_group); - - _fully_connected_kernel.run(); - - _gemm_state_f.run(); - - NEScheduler::get().schedule(&_add_kernel, Window::DimY); - NEScheduler::get().schedule(&_activation_kernel, Window::DimY); - - // copy hidden out to output - NEScheduler::get().schedule(&_copy_kernel, Window::DimY); -} - -void NERNNLayerEx::prepare() -{ - if (!_is_prepared) - { - _fully_connected_kernel.prepare(); - _gemm_state_f.prepare(); - - _is_prepared = true; - } -} -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceMeanEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceMeanEx.cpp deleted file mode 100644 index 116bba3c0..000000000 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceMeanEx.cpp +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/NEON/functions/NEReduceMeanEx.h" - -#include "arm_compute/core/Helpers.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" - -using namespace arm_compute; - -NEReduceMeanEx::NEReduceMeanEx(std::shared_ptr<IMemoryManager> memory_manager) - : _memory_group(std::move(memory_manager)), _reduction_kernels(), _reduced_outs(), _reshape(), - _reduction_ops(), _keep_dims() -{ -} - -Status NEReduceMeanEx::validate(const ITensorInfo *input, const Coordinates &reduction_axis, - bool keep_dims, const ITensorInfo *output) -{ - ARM_COMPUTE_UNUSED(keep_dims); - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input); - ARM_COMPUTE_RETURN_ERROR_ON(reduction_axis.num_dimensions() > input->num_dimensions()); - - TensorShape out_shape = input->tensor_shape(); - const unsigned int reduction_ops = reduction_axis.num_dimensions(); - const int input_dims = input->num_dimensions(); - Coordinates axis_local = reduction_axis; - - // Convert negative axis - for (unsigned int i = 0; i < reduction_ops; ++i) - { - axis_local[i] = wrap_around(axis_local[i], input_dims); - } - - std::sort(axis_local.begin(), axis_local.begin() + reduction_ops); - for (unsigned int i = 0; i < reduction_ops; ++i) - { - ARM_COMPUTE_RETURN_ERROR_ON(axis_local[i] > 3); - ARM_COMPUTE_RETURN_ERROR_ON(static_cast<unsigned int>(axis_local[i]) > - input->num_dimensions() - 1); - if (output->total_size() > 0 && keep_dims) - { - ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(axis_local[i]) != 1); - } - if (keep_dims) - { - out_shape.set(axis_local[i], 1); - } - else - { - out_shape.remove_dimension(axis_local[i] - i); - } - } - const TensorInfo out_info = input->clone()->set_tensor_shape(out_shape); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &out_info); - - return Status{}; -} - -void NEReduceMeanEx::configure(ITensor *input, const Coordinates &reduction_axis, bool keep_dims, - ITensor *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input); - - _reduction_ops = reduction_axis.num_dimensions(); - _reduction_kernels = - arm_compute::support::cpp14::make_unique<NEReductionOperation[]>(_reduction_ops); - _reduced_outs = - arm_compute::support::cpp14::make_unique<Tensor[]>(_reduction_ops - (keep_dims ? 1 : 0)); - _keep_dims = keep_dims; - - Coordinates axis_local = reduction_axis; - const int input_dims = input->info()->num_dimensions(); - const unsigned int reduction_ops = reduction_axis.num_dimensions(); - - // Convert negative axis - for (unsigned int i = 0; i < reduction_ops; ++i) - { - axis_local[i] = wrap_around(axis_local[i], input_dims); - } - - // Perform reduction for every axis - for (unsigned int i = 0; i < _reduction_ops; ++i) - { - TensorShape out_shape = i == 0 ? input->info()->tensor_shape() - : (_reduced_outs.get() + i - 1)->info()->tensor_shape(); - out_shape.set(axis_local[i], 1); - auto in = (i == 0) ? input : (_reduced_outs.get() + i - 1); - - if (i == _reduction_ops - 1 && keep_dims) - { - _reduction_kernels[i].configure(in, output, axis_local[i], ReductionOperation::MEAN_SUM); - } - else - { - _reduced_outs[i].allocator()->init(TensorInfo(out_shape, input->info()->num_channels(), - input->info()->data_type(), - input->info()->quantization_info()) - .set_data_layout(output->info()->data_layout())); - _memory_group.manage(_reduced_outs.get() + i); - _reduction_kernels[i].configure(in, _reduced_outs.get() + i, axis_local[i], - ReductionOperation::MEAN_SUM); - } - } - - // Allocate intermediate tensors - for (unsigned int i = 0; i < _reduction_ops - (keep_dims ? 1 : 0); ++i) - { - _reduced_outs[i].allocator()->allocate(); - } - - // Configure reshape layer if we want to drop the dimensions - if (!keep_dims) - { - TensorShape out_shape = input->info()->tensor_shape(); - - // We have to sort the reduction axis vectors in order for remove_dimension - // to work properly - std::sort(axis_local.begin(), axis_local.begin() + _reduction_ops); - for (unsigned int i = 0; i < _reduction_ops; ++i) - { - out_shape.remove_dimension(axis_local[i] - i); - } - auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(out_shape)); - _reshape.configure(_reduced_outs.get() + _reduction_ops - 1, output); - } -} - -void NEReduceMeanEx::run() -{ - _memory_group.acquire(); - - for (unsigned int i = 0; i < _reduction_ops; ++i) - { - _reduction_kernels[i].run(); - } - - if (!_keep_dims) - { - _reshape.run(); - } - _memory_group.release(); -} diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NESpaceToBatchLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NESpaceToBatchLayerEx.cpp deleted file mode 100644 index 198bb7672..000000000 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NESpaceToBatchLayerEx.cpp +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/NEON/functions/NESpaceToBatchLayerEx.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" - -namespace arm_compute -{ -NESpaceToBatchLayerEx::NESpaceToBatchLayerEx() - : _space_to_batch_kernel(), _memset_kernel(), _has_padding(false) -{ -} - -void NESpaceToBatchLayerEx::configure(const ITensor *input, const ITensor *block_shape, - const ITensor *paddings, ITensor *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, block_shape, paddings, output); - - if (input->info()->tensor_shape().total_size() != output->info()->tensor_shape().total_size()) - { - _has_padding = true; - _memset_kernel.configure( - output, PixelValue(0, output->info()->data_type(), output->info()->quantization_info())); - } - _space_to_batch_kernel.configure(input, block_shape, paddings, output); -} - -void NESpaceToBatchLayerEx::configure(const ITensor *input, const int block_shape_x, - const int block_shape_y, const Size2D &padding_left, - const Size2D &padding_right, ITensor *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - - if (input->info()->tensor_shape().total_size() != output->info()->tensor_shape().total_size()) - { - _has_padding = true; - _memset_kernel.configure( - output, PixelValue(0, output->info()->data_type(), output->info()->quantization_info())); - } - _space_to_batch_kernel.configure(input, block_shape_x, block_shape_y, padding_left, padding_right, - output); -} - -Status NESpaceToBatchLayerEx::validate(const ITensorInfo *input, const ITensorInfo *block_shape, - const ITensorInfo *paddings, const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ON_ERROR( - NESpaceToBatchLayerKernel::validate(input, block_shape, paddings, output)); - - return Status{}; -} - -Status NESpaceToBatchLayerEx::validate(const ITensorInfo *input, const int block_shape_x, - const int block_shape_y, const Size2D &padding_left, - const Size2D &padding_right, const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ON_ERROR(NESpaceToBatchLayerKernel::validate( - input, block_shape_x, block_shape_y, padding_left, padding_right, output)); - - return Status{}; -} - -void NESpaceToBatchLayerEx::run() -{ - // Zero out output only if we have paddings - if (_has_padding) - { - NEScheduler::get().schedule(&_memset_kernel, Window::DimY); - } - NEScheduler::get().schedule(&_space_to_batch_kernel, Window::DimY); -} -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NESpaceToDepthLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NESpaceToDepthLayerEx.cpp deleted file mode 100644 index 97697e3ea..000000000 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NESpaceToDepthLayerEx.cpp +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/NEON/functions/NESpaceToDepthLayerEx.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernelEx.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" - -namespace arm_compute -{ -void NESpaceToDepthLayerEx::configure(const ITensor *input, ITensor *output, int32_t block_shape) -{ - auto k = arm_compute::support::cpp14::make_unique<NESpaceToDepthLayerKernelEx>(); - k->configure(input, output, block_shape); - _kernel = std::move(k); -} - -Status NESpaceToDepthLayerEx::validate(const ITensorInfo *input, const ITensorInfo *output, - int32_t block_shape) -{ - ARM_COMPUTE_RETURN_ON_ERROR(NESpaceToDepthLayerKernelEx::validate(input, output, block_shape)); - return Status{}; -} -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp index df0689273..09f178005 100644 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp +++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp @@ -1,21 +1,5 @@ /* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -37,14 +21,11 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ - #include "arm_compute/runtime/NEON/functions/NETransposeConvLayer.h" #include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Utils.h" #include "arm_compute/core/UtilsEx.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/misc/ShapeCalculatorEx.h" #include "arm_compute/runtime/NEON/NEScheduler.h" @@ -52,20 +33,15 @@ using namespace arm_compute::misc::shape_calculator; namespace arm_compute { + NETransposeConvLayer::NETransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT : _memory_group(std::move(memory_manager)), _conv_f(), _upsample_f(), _flip_weights(), - _permute_input(), - _permute_weights(), - _permute_output(), _scaled_output(), _weights_flipped(), - _permuted_input(), - _permuted_weights(), - _permuted_output(), - _is_nchw(false), + _flip_axis(), _original_weights(nullptr), _input(nullptr), _info(), @@ -80,7 +56,7 @@ Status NETransposeConvLayer::validate(const ITensorInfo *input, const ITensorInf { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::F16, - DataType::QASYMM8); + DataType::QASYMM8, DataType::QASYMM8_SIGNED); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(weights, input); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(weights, input); const unsigned int width_idx = @@ -95,13 +71,16 @@ Status NETransposeConvLayer::validate(const ITensorInfo *input, const ITensorInf weights->dimension(height_idx), info, invalid_right, invalid_bottom); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); - if (is_data_type_quantized_asymmetric(input->data_type()) && bias) + if (bias != nullptr) { - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bias, 1, DataType::S32); - } - else if (bias) - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, bias); + if (is_data_type_quantized_asymmetric(input->data_type())) + { + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bias, 1, DataType::S32); + } + else + { + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, bias); + } } if (output->tensor_shape().total_size() > 0) @@ -110,12 +89,12 @@ Status NETransposeConvLayer::validate(const ITensorInfo *input, const ITensorInf const TensorShape output_shape = compute_transposeconv_output_shape(out_dims, *input, *weights); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimX) < output_shape.x(), - "Output's dim 0 is invalid."); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimY) < output_shape.y(), - "Output's dim 1 is invalid."); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimZ) < output_shape.z(), - "Output's dim 2 is invalid."); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimX) != output_shape.x(), + "Output's width is invalid."); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimY) != output_shape.y(), + "Output's height is invalid."); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimZ) != output_shape.z(), + "Output's depth is invalid."); } unsigned int pad_left = 0; @@ -127,7 +106,6 @@ Status NETransposeConvLayer::validate(const ITensorInfo *input, const ITensorInf pad_bottom); TensorInfo scale_out_info( input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(scale_out_shape)); - scale_out_info.set_data_layout(input->data_layout()); const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL); const unsigned int batches_idx = @@ -149,19 +127,13 @@ void NETransposeConvLayer::configure(ITensor *input, const ITensor *weights, con ITensor *output, const PadStrideInfo &info, unsigned int invalid_right, unsigned int invalid_bottom) { + // Perform validation step ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); + ARM_COMPUTE_ERROR_THROW_ON(NETransposeConvLayer::validate( + input->info(), weights->info(), (bias == nullptr) ? nullptr : bias->info(), output->info(), + info, invalid_right, invalid_bottom)); const DataLayout data_layout = input->info()->data_layout(); - - _input = input; - _original_weights = weights; - _info = info; - _is_prepared = false; - _is_nchw = data_layout == DataLayout::NCHW; - - const unsigned int stride_x = info.stride().first; - const unsigned int stride_y = info.stride().second; - const unsigned int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); const unsigned int height_idx = @@ -173,101 +145,54 @@ void NETransposeConvLayer::configure(ITensor *input, const ITensor *weights, con const TensorShape output_shape = compute_transposeconv_output_shape(out_dims, *input->info(), *weights->info()); + + _input = input; + _original_weights = weights; + _info = info; + _is_prepared = false; + + unsigned int pad_left = 0; + unsigned int pad_right = 0; + unsigned int pad_top = 0; + unsigned int pad_bottom = 0; + const unsigned int stride_x = info.stride().first; + const unsigned int stride_y = info.stride().second; + // Output auto initialization if not yet initialized auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->quantization_info()); - // Perform validation step - ARM_COMPUTE_ERROR_THROW_ON(NETransposeConvLayer::validate( - input->info(), weights->info(), bias == nullptr ? nullptr : bias->info(), output->info(), - info, invalid_right, invalid_bottom)); - + _flip_axis.allocator()->init(TensorInfo(TensorShape(2U), 1, DataType::U32)); _memory_group.manage(&_scaled_output); - if (!_is_nchw) - { - _memory_group.manage(&_permuted_input); - _memory_group.manage(&_permuted_weights); - _memory_group.manage(&_permuted_output); - - // Configure the function to transform the input tensor from NHWC -> NCHW - _permuted_input.info()->set_quantization_info(input->info()->quantization_info()); - _permute_input.configure(input, &_permuted_input, PermutationVector(1U, 2U, 0U)); - _permuted_input.info()->set_data_layout(DataLayout::NCHW); - - // Configure the function to transform the weights tensor from NHWC -> NCHW - _permuted_weights.info()->set_quantization_info(weights->info()->quantization_info()); - _permute_weights.configure(weights, &_permuted_weights, PermutationVector(1U, 2U, 0U)); - _permuted_weights.info()->set_data_layout(DataLayout::NCHW); - - // Find the upsampled dimensions and the padding needed for the convolution with stride 1 in - // order to match output shape - - unsigned int pad_left = 0; - unsigned int pad_right = 0; - unsigned int pad_top = 0; - unsigned int pad_bottom = 0; - const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape( - *_permuted_input.info(), *_permuted_weights.info(), info, out_dims, invalid_right, - invalid_bottom, pad_left, pad_right, pad_top, pad_bottom); - - TensorInfo scale_out_info(scale_out_shape, 1, _permuted_input.info()->data_type(), - _permuted_input.info()->quantization_info()); - scale_out_info.set_data_layout(DataLayout::NCHW); - _scaled_output.allocator()->init(scale_out_info); - - const PadStrideInfo upsample_info(stride_x, stride_y, pad_left, pad_right, pad_top, pad_bottom, - DimensionRoundingType::CEIL); - _upsample_f.configure(&_permuted_input, &_scaled_output, upsample_info); - - _weights_flipped.allocator()->init(*_permuted_weights.info()->clone()); - _weights_flipped.info()->set_quantization_info(weights->info()->quantization_info()); - _flip_weights.configure(&_permuted_weights, &_weights_flipped); - - // setup the function to convolve the upscaled output - const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL); - - const auto out_shape = output->info()->tensor_shape(); - TensorShape permuted_out_shape{out_shape[1], out_shape[2], out_shape[0], out_shape[3]}; - TensorInfo permuted_out_info(permuted_out_shape, 1, output->info()->data_type(), - output->info()->quantization_info()); - _permuted_output.allocator()->init(permuted_out_info); - _permuted_output.info()->set_data_layout(DataLayout::NCHW); - _conv_f.configure(&_scaled_output, &_weights_flipped, bias, &_permuted_output, conv_info); - - // Configure the function to transform the convoluted output to NHWC - _permute_output.configure(&_permuted_output, output, PermutationVector(2U, 0U, 1U)); - - _permuted_input.allocator()->allocate(); - _permuted_weights.allocator()->allocate(); - _permuted_output.allocator()->allocate(); - } - else - { - // Find the upsampled dimensions and the padding needed for the convolution with stride 1 in - // order to match output shape - unsigned int pad_left = 0; - unsigned int pad_right = 0; - unsigned int pad_top = 0; - unsigned int pad_bottom = 0; - const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape( - *input->info(), *weights->info(), info, out_dims, invalid_right, invalid_bottom, pad_left, - pad_right, pad_top, pad_bottom); - - TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(), - input->info()->quantization_info()); - _scaled_output.allocator()->init(scale_out_info); - const PadStrideInfo upsample_info(stride_x, stride_y, pad_left, pad_right, pad_top, pad_bottom, - DimensionRoundingType::FLOOR); - _upsample_f.configure(input, &_scaled_output, upsample_info); - - _weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout)); - _flip_weights.configure(weights, &_weights_flipped); - - // setup the function to convolve the upscaled output - const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL); - _conv_f.configure(&_scaled_output, &_weights_flipped, bias, output, conv_info); - } + _weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout)); + _flip_weights.configure(weights, &_weights_flipped, &_flip_axis); + + // setup the function to convolve the upscaled output + const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL); + + const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape( + *input->info(), *weights->info(), info, out_dims, invalid_right, invalid_bottom, pad_left, + pad_right, pad_top, pad_bottom); + + const PadStrideInfo upsample_info(stride_x, stride_y, pad_left, pad_right, pad_top, pad_bottom, + DimensionRoundingType::FLOOR); + + TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(), + input->info()->quantization_info()); + scale_out_info.set_data_layout(data_layout); + _scaled_output.allocator()->init(scale_out_info); + + _upsample_f.configure(input, &_scaled_output, upsample_info); + + _conv_f.configure(&_scaled_output, &_weights_flipped, bias, output, conv_info); + + // Setup flip axis data + _flip_axis.allocator()->allocate(); + auto axis_data = reinterpret_cast<uint32_t *>(_flip_axis.buffer()); + axis_data[0] = static_cast<uint32_t>(width_idx); + axis_data[1] = static_cast<uint32_t>(height_idx); + _scaled_output.allocator()->allocate(); } @@ -275,22 +200,10 @@ void NETransposeConvLayer::run() { prepare(); - // MemoryGroupResourceScope scope_mg(_memory_group); - - // Permute input - if (!_is_nchw) - { - _permute_input.run(); - } + MemoryGroupResourceScope scope_mg(_memory_group); _upsample_f.run(); _conv_f.run(); - - // Permute output - if (!_is_nchw) - { - _permute_output.run(); - } } void NETransposeConvLayer::prepare() @@ -301,22 +214,12 @@ void NETransposeConvLayer::prepare() // Run weights flipping and mark original weights tensor as unused _weights_flipped.allocator()->allocate(); - // Permute weights - if (!_is_nchw) - { - _permute_weights.run(); - } - NEScheduler::get().schedule(&_flip_weights, Window::DimZ); + _flip_weights.run(); _original_weights->mark_as_unused(); // Prepare convolution _conv_f.prepare(); - if (!_weights_flipped.is_used()) - { - _weights_flipped.allocator()->free(); - } - _is_prepared = true; } } |