diff options
author | Chunseok Lee <chunseok.lee@samsung.com> | 2020-07-30 11:40:16 +0900 |
---|---|---|
committer | Chunseok Lee <chunseok.lee@samsung.com> | 2020-07-30 11:40:16 +0900 |
commit | 9e45ab56bd165609118989c0d1bec309c3754560 (patch) | |
tree | 4979e8674abc7d21a6471770c1355e0e6c0e8a3f /compute/ARMComputeEx/arm_compute/runtime | |
parent | 05e0ec30a632339a8533082476f27bda31ccde16 (diff) | |
download | nnfw-9e45ab56bd165609118989c0d1bec309c3754560.tar.gz nnfw-9e45ab56bd165609118989c0d1bec309c3754560.tar.bz2 nnfw-9e45ab56bd165609118989c0d1bec309c3754560.zip |
patch for rebase master on release/1.7.0submit/tizen/20200731.060745submit/tizen/20200730.075407submit/tizen/20200730.023729accepted/tizen/unified/20200803.122223tizen_6_m1_backup
Change-Id: Id38b617d325ef7e854995a47f032bdf482a779b3
Diffstat (limited to 'compute/ARMComputeEx/arm_compute/runtime')
29 files changed, 313 insertions, 2210 deletions
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h index 97bc4cea5..cfbd13436 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h +++ b/compute/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h @@ -16,25 +16,14 @@ #ifndef __ARM_COMPUTE_CLFUNCTIONSEX_H__ #define __ARM_COMPUTE_CLFUNCTIONSEX_H__ -#include <arm_compute/runtime/CL/functions/CLArgOperation.h> -#include <arm_compute/runtime/CL/functions/CLBatchToSpaceND.h> #include <arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h> -#include <arm_compute/runtime/CL/functions/CLCast.h> -#include <arm_compute/runtime/CL/functions/CLDepthToSpace.h> #include <arm_compute/runtime/CL/functions/CLEmbeddingLookup.h> #include <arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h> #include <arm_compute/runtime/CL/functions/CLGatherEx.h> #include <arm_compute/runtime/CL/functions/CLHashtableLookup.h> #include <arm_compute/runtime/CL/functions/CLInstanceNormalizationLayerEx.h> -#include <arm_compute/runtime/CL/functions/CLLogicalNot.h> #include <arm_compute/runtime/CL/functions/CLNeg.h> -#include <arm_compute/runtime/CL/functions/CLPixelWiseDivision.h> -#include <arm_compute/runtime/CL/functions/CLPReLU.h> #include <arm_compute/runtime/CL/functions/CLReduceOperation.h> -#include <arm_compute/runtime/CL/functions/CLRNNLayerEx.h> -#include <arm_compute/runtime/CL/functions/CLSpaceToDepth.h> -#include <arm_compute/runtime/CL/functions/CLSplit.h> -#include <arm_compute/runtime/CL/functions/CLStridedSliceEx.h> #include <arm_compute/runtime/CL/functions/CLTopKV2.h> #include <arm_compute/runtime/CL/functions/CLTransposeConvLayer.h> diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgOperation.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgOperation.h deleted file mode 100644 index c37096f7c..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgOperation.h +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/** - * @file CLArgOperation.h - * @ingroup COM_AI_RUNTIME - * @brief This file contains arm_compute::CLArgOperation class - */ - -#ifndef __ARM_COMPUTE_CLARGOPERATION_H__ -#define __ARM_COMPUTE_CLARGOPERATION_H__ - -#include "arm_compute/core/CL/kernels/CLArgOperationKernel.h" -#include "arm_compute/runtime/CL/CLTensor.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/core/TypesEx.h" - -namespace arm_compute -{ -class ICLTensor; - -/** - * @brief Class to execute CLArgOperation operation - */ -class CLArgOperation : public IFunction -{ -public: - /** - * @brief Construct a new CLArgOperation object - */ - CLArgOperation(); - - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers) - */ - CLArgOperation(const CLArgOperation &) = delete; - - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers) - */ - CLArgOperation &operator=(const CLArgOperation &) = delete; - - /** - * @brief Construct a new CLArgOperation object by using copy constructor - * @param[in] CLArgOperation object to move - */ - CLArgOperation(CLArgOperation &&) = default; - - /** - * @brief Assign a CLArgOperation object. - * @param[in] CLArgOperation object to assign. This object will be moved. - */ - CLArgOperation &operator=(CLArgOperation &&) = default; - - /** - * @brief Initialise the kernel's inputs and outputs. - * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S32/F32. - * @param[out] output The result of arg operation. Data types supported: S32. - * @param[in] axis Axis along which to reduce. It must be sorted and no duplicates. - * @param[in] op Arg operation to perform. - * @return N/A - */ - void configure(ICLTensor *input, ICLTensor *output, std::vector<uint32_t> axis, ArgOperation op); - - /** - * @brief Static function to check if given info will lead to a valid configuration - * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S32/F32. - * @param[in] axis Axis along which to reduce. It must be sorted and no duplicates. - * @param[out] output The result of arg operation. Data types supported: S32. - * @param[in] op Arg operation to perform. - * @return a status - */ - static Status validate(const ITensorInfo *input, const std::vector<uint32_t> &axis, - const ITensorInfo *output, ArgOperation op); - /** - * @brief Run the OpenCL kernel for this operation - * @return N/A - */ - void run() override; - -private: - ICLTensor *_input{nullptr}; - ICLTensor *_output{nullptr}; - std::vector<uint32_t> _axis{}; - ArgOperation _arg_op{ArgOperation::MAX}; - - std::unique_ptr<CLTensor[]> _interm_tensors{nullptr}; - std::unique_ptr<CLArgOperationKernel[]> _argop_kernels{nullptr}; - size_t _num_of_kernels{0}; -}; -} -#endif /*__ARM_COMPUTE_CLARGOPERATION_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLBatchToSpaceND.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLBatchToSpaceND.h deleted file mode 100644 index eed5cb8a4..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLBatchToSpaceND.h +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_CLBATCH_TO_SPACE_ND_H__ -#define __ARM_COMPUTE_CLBATCH_TO_SPACE_ND_H__ - -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Basic function to run @ref CLBatchToSpaceNDKernel - * - * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/F16/F32. - * @note The function converts the input tensor to the tensor of the output tensor's type. - */ -class CLBatchToSpaceND : public ICLSimpleFunction -{ -public: - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. - * @param[out] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. - * @param[in] block_size A pointer to an array of integer values specifying block sizes - * for spatial dimension. - */ - void configure(ICLTensor *input, ICLTensor *output, const int32_t *block_size); -}; - -} // namespace arm_compute -#endif /* __ARM_COMPUTE_CLBATCH_TO_SPACE_ND_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLCast.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLCast.h deleted file mode 100644 index ebe0d8a1c..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLCast.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/** - * @file CLCast.h - * @ingroup COM_AI_RUNTIME - * @brief This file contains arm_compute::CLCast class - */ - -#ifndef __ARM_COMPUTE_CLCAST_H__ -#define __ARM_COMPUTE_CLCAST_H__ - -#include "arm_compute/core/TypesEx.h" -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - -namespace arm_compute -{ -class ICLTensor; - -/** - * @brief Class to run @ref CLCastKernel. - * This converts the input tensor to the tensor of the output tensor's type. - */ -class CLCast : public ICLSimpleFunction -{ -public: - /** - * @brief Initialise the kernel's input and output - * @param[in, out] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. - * The input tensor is [in, out] because its TensorInfo might be - * modified inside the kernel. - * @param[out] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. - * @param[in] input_subtype Sub data type of input. - */ - void configure(ICLTensor *input, ICLTensor *output, SubDataType input_subtype); -}; -} -#endif /* __ARM_COMPUTE_CLCAST_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDepthToSpace.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDepthToSpace.h deleted file mode 100644 index d52a538df..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDepthToSpace.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_CLDEPTHTOSPACE_H__ -#define __ARM_COMPUTE_CLDEPTHTOSPACE_H__ - -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Basic function to run @ref CLDepthToSpaceKernel - * - * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/F16/F32. - * @note The function converts the input tensor to the tensor of the output tensor's type. - */ -class CLDepthToSpace : public ICLSimpleFunction -{ -public: - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. - * @param[out] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. - * @param[block_size] block size integer only - */ - void configure(ICLTensor *input, ICLTensor *output, const int32_t block_size); -}; -} // namesace arm_compute - -#endif /* __ARM_COMPUTE_CLDEPTHTOSPACE_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h new file mode 100644 index 000000000..409eaf593 --- /dev/null +++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h @@ -0,0 +1,201 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Copyright (c) 2019-2020 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLDIRECTTRANSPOSECONVLAYER_H__ +#define __ARM_COMPUTE_CLDIRECTTRANSPOSECONVLAYER_H__ + +#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h" +#include "arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h" +#include "arm_compute/runtime/CL/functions/CLReverse.h" +#include "arm_compute/runtime/CL/functions/CLTranspose.h" + +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/IMemoryManager.h" +#include "arm_compute/runtime/MemoryGroup.h" + +#include <memory> + +namespace arm_compute +{ +class ICLTensor; +/** Function to run the deconvolution layer. + * + * Deconvolution Layer is the backward pass of Convolution Layer. First we transform the input + * depending on the stride and pad info and then perform a 1x1 + * convolution pass. Input stride defines how many zeroes we should put between each element of the + * input and pad is the amount of padding. + * + * The relation between input to output is as follows: + * \f[ + * width\_output = (width\_input - 1) \cdot stride\_x - 2 \cdot padding\_x + kernel\_x + * \f] + * \f[ + * height\_output = (height\_input - 1) \cdot stride\_y - 2 \cdot padding\_y + kernel\_y + * \f] + * + * where: + * width_input is the size of the first input dimension. + * height_input is the size of the second input dimension. + * width_output is the size of the first output dimension. + * height_output is the size of the second output dimension. + * kernel_x and kernel_y are the convolution sizes in x and y. + * stride_x and stride_y is the input stride of the first and second dimension. + * + * The weights used by Deconvolution are supposed to be the same as the ones used for Convolution. + * Therefore, it will be necessary to use the weights in the + * reverse order to perform an actual convolution. This is achieved by using @ref CLReverse. + * + * This function calls the following OpenCL kernels/functions: + * + * -# @ref CLDeconvolutionLayerUpsample + * -# @ref CLConvolutionLayer + * + * And the following CPP kernels: + * -# @ref CLReverse + * + */ +class CLDirectTransposeConvLayer : public IFunction +{ +public: + /** Constructor */ + CLDirectTransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDirectTransposeConvLayer(const CLDirectTransposeConvLayer &) = delete; + /** Default move constructor */ + CLDirectTransposeConvLayer(CLDirectTransposeConvLayer &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDirectTransposeConvLayer &operator=(const CLDirectTransposeConvLayer &) = delete; + /** Default move assignment operator */ + CLDirectTransposeConvLayer &operator=(CLDirectTransposeConvLayer &&) = default; + /** Set the input, weights, biases and output tensors. + * + * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an + * optional 4th dimension for batch of inputs. + * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. + * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type + * supported: Same as @p input. + * @param[in] bias (Optional) The biases have one dimension. + * Data type supported: Should match @p input data type, except for + * input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type + * @param[out] output Output tensor. The output has the same number of dimensions as the + * @p input. + * @param[in] info Contains padding and policies to be used in the deconvolution, this + * is decribed in @ref PadStrideInfo. + * @param[in] invalid_right The number of zeros added to right edge of the output. + * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. + * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer, + * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel. + * + */ + void configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, + const PadStrideInfo &info, unsigned int invalid_right, unsigned int invalid_bottom, + const WeightsInfo &weights_info = WeightsInfo()); + /** Set the input, weights, biases and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and + * an optional 4th dimension for batch of inputs. + * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. + * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data + * type supported: Same as @p input. + * @param[in] bias (Optional) The biases have one dimension. + * Data type supported: Should match @p input data type, except for + * input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type + * @param[out] output Output tensor. The output has the same number of dimensions as + * the @p input. + * @param[in] info Contains padding and policies to be used in the deconvolution, + * this is decribed in @ref PadStrideInfo. + * @param[in] invalid_right The number of zeros added to right edge of the output. + * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. + * @param[in] weights_info (Optional) Weights information needed for @ref + * CLConvolutionLayer, specifies if the weights tensor has been reshaped with @ref + * CLWeightsReshapeKernel. + * + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *weights, + const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &info, + unsigned int invalid_right, unsigned int invalid_bottom, + const WeightsInfo &weights_info = WeightsInfo()); + /** Static function to check if given info will lead to a valid configuration of @ref + * CLDirectTransposeConvLayer + * + * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an + * optional 4th dimension for batch of inputs. + * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. + * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data + * type supported: Same as @p input. + * @param[in] bias (Optional) The biases have one dimension. + * Data type supported: Should match @p input data type, except for input + * of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type + * @param[in] output Output tensor info. The output has the same number of dimensions as the + * @p input. + * @param[in] info Contains padding and policies to be used in the deconvolution, this is + * decribed in @ref PadStrideInfo. + * @param[in] invalid_right The number of zeros added to right edge of the output. + * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. + * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer, + * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *weights, + const ITensorInfo *bias, ITensorInfo *output, const PadStrideInfo &info, + unsigned int invalid_right, unsigned int invalid_bottom, + const WeightsInfo &weights_info = WeightsInfo()); + + // Inherited methods overridden: + void run() override; + void prepare() override; + +private: + MemoryGroup _memory_group; + CLDeconvolutionLayerUpsample _scale_f; + CLConvolutionLayer _conv_f; + CLReverse _flip_weights; + + CLTensor _scaled_output; + ICLTensor *_original_weights; + CLTensor _weights_flipped; + CLTensor _flip_axis; + + bool _is_prepared; +}; +} // namespace arm_compute +#endif /* __ARM_COMPUTE_CLDIRECTTRANSPOSECONVLAYER_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h index 1a0284a3e..f3266f688 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h +++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h @@ -50,7 +50,7 @@ #include "arm_compute/core/CL/kernels/CLTransposeKernel.h" #include "arm_compute/runtime/MemoryGroup.h" #include "arm_compute/runtime/CL/CLTensor.h" -#include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.h" +#include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h" namespace arm_compute { @@ -168,7 +168,7 @@ private: CLFullyConnectedHybridLayerReshapeWeights _reshape_weights_kernel; CLScaleFactorSymm8Kernel _scale_factor_kernel; CLQuantizationSymmetricKernel _quant_input_kernel; - CLGEMMLowpMatrixMultiplyCoreEx _mm_gemmlowp; + CLGEMMLowpMatrixMultiplyCore _mm_gemmlowp; CLMultiplyScaleFactorKernel _multiply_scale_kernel; CLGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel; // TODO(COMPMID-1889): Use CLGEMM to // add bias in diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.h deleted file mode 100644 index 68aba74ab..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.h +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYCOREEX_H__ -#define __ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYCOREEX_H__ - -#include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernelEx.h" -#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" -#include "arm_compute/runtime/CL/CLTensor.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/MemoryGroup.h" - -namespace arm_compute -{ -class IMemoryManager; -class ICLTensor; - -/** Basic function to execute GEMMLowpMatrixMultiplyCore on OpenCL. This function calls the - * following OpenCL kernels: - * - * -# @ref CLGEMMLowpMatrixMultiplyKernel (if the parameter "reshape_b_only_on_first_run" of - * GEMMInfo is FALSE) - * -# @ref CLGEMMLowpMatrixAReductionKernel (if the offset of matrix B is not 0) - * -# @ref CLGEMMLowpMatrixBReductionKernel (if the offset of matrix A is not 0) - * -*/ -class CLGEMMLowpMatrixMultiplyCoreEx : public IFunction -{ -public: - /** Constructor */ - CLGEMMLowpMatrixMultiplyCoreEx(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMLowpMatrixMultiplyCoreEx(const CLGEMMLowpMatrixMultiplyCoreEx &) = delete; - /** Default move constructor */ - CLGEMMLowpMatrixMultiplyCoreEx(CLGEMMLowpMatrixMultiplyCoreEx &&) = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMLowpMatrixMultiplyCoreEx &operator=(const CLGEMMLowpMatrixMultiplyCoreEx &) = delete; - /** Default move assignment operator */ - CLGEMMLowpMatrixMultiplyCoreEx &operator=(CLGEMMLowpMatrixMultiplyCoreEx &&) = default; - /** Initialise the kernel's inputs, output - * - * @note GEMMLowp: low precision GEMM kernel. [A * B + C] - * This kernel performs the following computations: - * - * -# Convert a values from QASYMM8 to int32 and add a_offset to each of them. - * -# Convert b values from QASYMM8 to int32 and add b_offset to each of them. - * -# Compute the matrix product of the resulting a * b in int32. - * -# Quantize to uint8 if gemm_info.gemmlowp_output_stage != NONE - * - * @param[in] a First input tensor (Matrix A). Data type supported: QASYMM8. - * @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a - * @param[in] c Third input tensor (Matrix C). It can be a nullptr. Data type supported: - * S32 - * @param[out] output Output tensor. Data type supported: S32 or QASYMM8 if - * gemm_info.gemmlowp_output_stage != NONE - * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped - * and - * if the reshape of matrix B should be executed only for the first run - */ - void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, - const GEMMInfo &gemm_info = GEMMInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref - * CLGEMMLowpMatrixMultiplyCoreEx - * - * @param[in] a First input tensor info (Matrix A). Data type supported: QASYMM8. - * @param[in] b Second input tensor info (Matrix B). Data type supported: same as @p a - * @param[in] c Third input tensor info (Matrix C). It can be a nullptr. Data type - * supported: S32 - * @param[in] output Output tensor info. Data type supported: S32 or QASYMM8 if - * gemm_info.gemmlowp_output_stage != NONE - * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped - * and - * if the reshape of matrix B should be executed only for the first run - * - * @return a status - */ - static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, - const ITensorInfo *output, const GEMMInfo &gemm_info = GEMMInfo()); - - // Inherited methods overridden: - void run() override; - void prepare() override; - -private: - MemoryGroup _memory_group; - - // Kernels used - CLGEMMLowpMatrixMultiplyKernelEx _mm_midgard_kernel; - CLGEMMLowpMatrixAReductionKernel _mtx_a_reduction_kernel; - CLGEMMLowpMatrixBReductionKernel _mtx_b_reduction_kernel; - - // Temporary tensors - CLTensor _vector_sum_col; - CLTensor _vector_sum_row; - - int32_t _a_offset; - int32_t _b_offset; - bool _reshape_b_only_on_first_run; - bool _is_prepared; -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYCOREEX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLLogicalNot.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLLogicalNot.h deleted file mode 100644 index 51216715f..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLLogicalNot.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_CLLOGICALNOT_H__ -#define __ARM_COMPUTE_CLLOGICALNOT_H__ - -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - -namespace arm_compute -{ -class ICLTensor; - -class CLLogicalNot : public ICLSimpleFunction -{ -public: - /** Initialise the function's source and destination. - * - * @param[in] input Source tensor. Data types supported: QASYMM8. - * @param[out] output Output tensor. Data types supported: QASYMM8. - */ - void configure(ICLTensor *input, ICLTensor *output); -}; - -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLLOGICALNOT_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLPReLU.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLPReLU.h deleted file mode 100644 index 7fbe558ff..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLPReLU.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_CLPRELU_H__ -#define __ARM_COMPUTE_CLPRELU_H__ - -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - -namespace arm_compute -{ -class ICLTensor; - -class CLPReLU : public ICLSimpleFunction -{ -public: - /** Initialise the function's source and destination. - * - * @param[in] input. Data types supported: - * QASYMM8/F16/F32. - * @param[in] alpha. Data types supported: - * QASYMM8/F16/F32. - * @param[out] output Output tensor. Data types supported: Same as @p input. - */ - void configure(ICLTensor *input, ICLTensor *alpha, ICLTensor *output); -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLPRELU_H__*/ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLPixelWiseDivision.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLPixelWiseDivision.h deleted file mode 100644 index e83fb01cd..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLPixelWiseDivision.h +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/** - * @file CLPixelWiseDivision.h - * @ingroup COM_AI_RUNTIME - * @brief This file contains arm_compute::CLPixelWiseDivision class - */ -#ifndef __ARM_COMPUTE_CLPIXELWISEDIVISION_H__ -#define __ARM_COMPUTE_CLPIXELWISEDIVISION_H__ - -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - -namespace arm_compute -{ -class ICLTensor; - -/** - * @brief Class to run @ref CLPixelWiseDivisionKernel. - */ -class CLPixelWiseDivision : public ICLSimpleFunction -{ -public: - /** - * @brief Initialise the kernel's inputs, output and convertion policy. - * @param[in, out] input1 An input tensor. Data types supported: U8/S16/F16/F32 - * The input tensor is [in, out] because its TensorInfo might be - * modified inside the kernel in case of broadcasting of dimension 0. - * @param[in, out] input2 An input tensor. Data types supported: same as @p input1. - * The input tensor is [in, out] because its TensorInfo might be - * modified inside the kernel in case of broadcasting of dimension 0. - * @param[out] output The output tensor, Data types supported: same as @p input1. - * Note: U8 requires both inputs to be U8. - * @param[in] scale Scale to apply after multiplication. - * Scale must be positive and its value must be either 1/255 or - * 1/2^n where n is between 0 and 15. - * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate - * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest - * even. - * @return N/A - */ - void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, float scale = 1.f, - ConvertPolicy overflow_policy = ConvertPolicy::WRAP, - RoundingPolicy rounding_policy = RoundingPolicy::TO_ZERO); - - /** - * @brief Static function to check if given info will lead to a valid configuration of @ref - * CLPixelWiseDivision - * @param[in] input1 An input tensor info. Data types supported: U8/S16/F16/F32 - * @param[in] input2 An input tensor info. Data types supported: same as @p input1. - * @param[in] output The output tensor info, Data types supported: same as @p input1. - * Note: U8 requires both inputs to be U8. - * @param[in] scale Scale to apply after multiplication. - * Scale must be positive and its value must be either 1/255 or 1/2^n - * where n is between 0 and 15. - * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate - * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even. - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, - const ITensorInfo *output, float scale = 1.f, - ConvertPolicy overflow_policy = ConvertPolicy::WRAP, - RoundingPolicy rounding_policy = RoundingPolicy::TO_ZERO); -}; -} -#endif /*__ARM_COMPUTE_CLPIXELWISEDIVISION_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLRNNLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLRNNLayerEx.h deleted file mode 100644 index b49cbd873..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLRNNLayerEx.h +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_CLRNN_LAYER_EX_H__ -#define __ARM_COMPUTE_CLRNN_LAYER_EX_H__ - -#include "arm_compute/core/CL/kernels/CLActivationLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLCopyKernel.h" -#include "arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h" -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" -#include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h" -#include "arm_compute/runtime/CL/functions/CLGEMM.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Basic function to run @ref CLRNNLayerEx */ -class CLRNNLayerEx : public IFunction -{ -public: - /** Default constructor */ - CLRNNLayerEx(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - /** Initialize the function - * - * @param[in] input Input is a 2-D tensor of shape [input_size, batch_size]. Data - * types supported: F16/F32 - * @param[in] weights Weights tensor of shape [input_size, num_units] that - * multiplies the input. Data types supported: Same as @p input - * @param[in] recurrent_weights Weights tensor of shape [num_units, num_units] that multiplies - * the current 'state'. Data types supported: Same as @p input - * @param[in] bias Bias vector of shape [num_units]. Data types supported: Same - * as @p input - * @param[out] output Output tensor of shape [num_units, batch_size]. Data types - * supported: Same as @p input - * @param[in,out] hidden_state Output tensor of shape [num_units, batch_size]. Data types - * supported: Same as @p input - * @param[in] info Activation layer parameter. - */ - void configure(const ICLTensor *input, const ICLTensor *weights, - const ICLTensor *recurrent_weights, const ICLTensor *bias, ICLTensor *hidden_state, - ICLTensor *output, ActivationLayerInfo &info); - /** Initialize the function - * - * @param[in] input Input is a 2-D tensor of shape [input_size, batch_size]. Data - * types supported: F16/F32 - * @param[in] weights Weights tensor of shape [input_size, num_units] that multiplies - * the input. Data types supported: Same as @p input - * @param[in] recurrent_weights Weights tensor of shape [num_units, num_units] that multiplies the - * current 'state'. Data types supported: Same as @p input - * @param[in] bias Bias vector of shape [num_units]. Data types supported: Same as @p - * input - * @param[in] output Output tensor of shape [num_units, batch_size]. Data types - * supported: Same as @p input - * @param[in] hidden_state Output tensor of shape [num_units, batch_size]. Data types - * supported: Same as @p input - * @param[in] info Activation layer parameter. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, - const ITensorInfo *recurrent_weights, const ITensorInfo *bias, - const ITensorInfo *hidden_state, const ITensorInfo *output, - const ActivationLayerInfo &info); - - // Inherited methods overridden: - void run() override; - void prepare() override; - -private: - MemoryGroup _memory_group; - CLGEMM _gemm_state_f; - CLSaturatedArithmeticOperationKernel _add_kernel; - CLActivationLayerKernel _activation_kernel; - CLFullyConnectedLayer _fully_connected_kernel; - CLCopyKernel _copy_kernel; - CLTensor _fully_connected_out; - CLTensor _gemm_output; - CLTensor _add_output; - bool _is_prepared; -}; -} -#endif /* __ARM_COMPUTE_CLRNN_LAYER_EX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToDepth.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToDepth.h deleted file mode 100644 index 2090b46fa..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToDepth.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_CLSPACETODEPTH_H__ -#define __ARM_COMPUTE_CLSPACETODEPTH_H__ - -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Basic function to run @ref CLSpaceToDepthKernel - * - * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/F16/F32. - * @note The function converts the input tensor to the tensor of the output tensor's type. - */ -class CLSpaceToDepth : public ICLSimpleFunction -{ -public: - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. - * @param[out] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. - * @param[block_size] block size integer only - */ - void configure(ICLTensor *input, ICLTensor *output, const int32_t block_size); -}; - -} // namespace arm_compute -#endif /* __ARM_COMPUTE_CLSPACETODEPTH_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLStridedSliceEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLStridedSliceEx.h deleted file mode 100644 index 03edd15e6..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLStridedSliceEx.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/** - * @file CLStridedSlice.h - * @ingroup COM_AI_RUNTIME - * @brief This file contains arm_compute::CLStridedSlice and arm_compute::CLStridedSliceCPU class - */ - -#ifndef __ARM_COMPUTE_CLSTRIDEDSLICEEX_H__ -#define __ARM_COMPUTE_CLSTRIDEDSLICEEX_H__ - -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - -namespace arm_compute -{ -class ICLTensor; - -/** - * @brief Class to run @ref CLStridedSliceKernel - */ -class CLStridedSliceEx : public ICLSimpleFunction -{ -public: - /** - * @brief Initialise the kernel's inputs and outputs - * @param[in] input Tensor input. Data type supported: - * U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 - * @param[out] output Output tensor. Data type supported: Same as @p input - * @param[in] beginData 'begin' vector of strided slice operation - * @param[in] endData 'end' vector of strided slice operation - * @param[in] stridesData 'strides' vector of strided slice operation - * @param[in] beginMask If the ith bit is set, begin[i] is ignored - * @param[in] endMask If the ith bit is set, end[i] is ignored - * @param[in] shrinkAxisMask If the ith bit is set, the ith specification shrinks the - * dimensionality by 1, taking on the value at index begin[i] - * @return N/A - */ - void configure(const ICLTensor *input, ICLTensor *output, ICLTensor *beginData, - ICLTensor *endData, ICLTensor *stridesData, int32_t beginMask, int32_t endMask, - int32_t shrinkAxisMask); -}; -} -#endif /*__ARM_COMPUTE_CLSTRIDEDSLICEEX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h index 54a697e69..5fb102e47 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h +++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h @@ -15,7 +15,7 @@ */ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -37,16 +37,11 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ - #ifndef __ARM_COMPUTE_CLTRANSPOSECONVLAYER_H__ #define __ARM_COMPUTE_CLTRANSPOSECONVLAYER_H__ -#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h" -#include "arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h" - -#include "arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h" - -#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h" +#include "arm_compute/runtime/CL/functions/CLGEMMDeconvolutionLayer.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" @@ -54,119 +49,102 @@ namespace arm_compute { -class ICLTensor; -/** Function to run the transpose convolution layer. - * - * @note This layer was copied in order to fix a bug computing to wrong output dimensions. - * - * TransposeConv Layer is the backward pass of Convolution Layer. First we transform the input - * depending on the stride and pad info and then perform a 1x1 - * convolution pass. Input stride defines how many zeroes we should put between each element of the - * input, pad is the amount of padding and finally a is a user - * specified value where a < stride - 1, that increases the padding top and right of the input - * image. - * - * The relation between input to output is as follows: - * \f[ - * width\_output = (width\_input - 1) \cdot stride\_x - \cdot padding\_x + kernel\_x - * \f] - * \f[ - * height\_output = (height\_input - 1) \cdot stride\_y - \cdot padding\_y + kernel\_y - * \f] - * - * where: - * width_input is the size of the first input dimension. - * height_input is the size of the second input dimension. - * width_output is the size of the first output dimension. - * height_output is the size of the second output dimension. - * kernel_x and kernel_y are the convolution sizes in x and y. - * stride_x and stride_y is the input stride of the first and second dimension. - * - * The weights used by Deconvolution are supposed to be the same as the ones used for Convolution. - * Therefore, it will be necessary to use the weights in the - * reverse order to perform an actual convolution. This is achieved by using the @ref - * CPPFlipWeightsKernel. - * - * This function calls the following OpenCL kernels/functions: - * - * -# @ref CLTransposeConvLayerUpsample - * -# @ref CLConvolutionLayer +/** Basic function to compute the deconvolution layer. This function calls the following OpenCL + * kernels/functions: * + * -# @ref CLGEMMDeconvolutionLayer + * -# @ref CLDirectTransposeConvLayer */ class CLTransposeConvLayer : public IFunction { public: - /** Constructor */ + /** Default constructor */ CLTransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLTransposeConvLayer(const CLTransposeConvLayer &) = delete; - /** Default move constructor */ - CLTransposeConvLayer(CLTransposeConvLayer &&) = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLTransposeConvLayer &operator=(const CLTransposeConvLayer &) = delete; - /** Default move assignment operator */ - CLTransposeConvLayer &operator=(CLTransposeConvLayer &&) = default; + /** Set the input, weights, biases and output tensors. * - * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, - * and an optional 4th dimension for batch of inputs. - * Data types supported: QASYMM8/F16/F32. - * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. - * Data type supported: Same as @p input. - * @param[in] bias (Optional) The biases have one dimension. Data type supported: - * Same as @p input. - * @param[out] output Output tensor. The output has the same number of dimensions - * as the @p input. - * @param[in] info Contains padding and policies to be used in the - * transpose convolution, this is decribed in @ref PadStrideInfo. - * @param[in] invalid_right The number of zeros added to right edge of the output. - * @param[in] invalid_bottom The number of zeros added to top edge of the output. - * @param[in] weights_info (Optional) Weights information needed for @ref - * CLConvolutionLayer, specifies if the weights tensor has been - * reshaped with @ref CLWeightsReshapeKernel. + * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an + * optional 4th dimension for batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. + * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type + * supported: Same as @p input. + * @param[in] bias (Optional) The biases have one dimension. Data type supported: Same + * as @p input. + * @param[out] output Output tensor. The output has the same number of dimensions as the + * @p input. + * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this + * is described in @ref PadStrideInfo. + * @param[in] invalid_right The number of zeros added to right edge of the output. + * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. + * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer, + * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel. + * */ void configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, - const PadStrideInfo &info, unsigned int invalid_right, unsigned int invalid_bottom, + const PadStrideInfo &deconv_info, unsigned int invalid_right, + unsigned int invalid_bottom, const WeightsInfo &weights_info = WeightsInfo()); + /** Set the input, weights, biases and output tensors. + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and + * an optional 4th dimension for batch of inputs. Data types supported: + * QASYMM8_SIGNED/QASYMM8/F16/F32. + * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data + * type supported: Same as @p input. + * @param[in] bias (Optional) The biases have one dimension. Data type supported: + * Same as @p input. + * @param[out] output Output tensor. The output has the same number of dimensions as + * the @p input. + * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, + * this is described in @ref PadStrideInfo. + * @param[in] invalid_right The number of zeros added to right edge of the output. + * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. + * @param[in] weights_info (Optional) Weights information needed for @ref + * CLConvolutionLayer, specifies if the weights tensor has been reshaped with @ref + * CLWeightsReshapeKernel. + * + */ + void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *weights, + const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &deconv_info, + unsigned int invalid_right, unsigned int invalid_bottom, const WeightsInfo &weights_info = WeightsInfo()); /** Static function to check if given info will lead to a valid configuration of @ref - * CLTransposeConvLayer + * CLTransposeConvLayer + * + * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an + * optional 4th dimension for batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. + * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data + * type supported: Same as @p input. + * @param[in] bias (Optional) The biases have one dimension. Data type supported: Same as + * @p input. + * @param[in] output Output tensor info. The output has the same number of dimensions as the + * @p input. + * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this is + * described in @ref PadStrideInfo. + * @param[in] invalid_right The number of zeros added to right edge of the output. + * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. + * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer, + * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel. * - * @param[in] input Input tensor info. 3 lower dimensions represent a single input, - * and an optional 4th dimension for batch of inputs. - * Data types supported: QASYMM8/F16/F32. - * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. - * Data type supported: Same as @p input. - * @param[in] bias (Optional) The biases have one dimension. Data type supported: - * Same as @p input. - * @param[in] output Output tensor info. The output has the same number of dimensions - * as the @p input. - * @param[in] info Contains padding and policies to be used in the - * transpose convolution, this is decribed in @ref PadStrideInfo. - * @param[in] innvalid_right The number of zeros added to right edge of the output. - * @param[in] invalid_bottom The number of zeros added to top edge of the output. - * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer, - * specifies if the weights tensor has been reshaped with @ref - * CLWeightsReshapeKernel. * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *weights, - const ITensorInfo *bias, ITensorInfo *output, const PadStrideInfo &info, - unsigned int innvalid_right, unsigned int invalid_bottom, + const ITensorInfo *bias, ITensorInfo *output, + const PadStrideInfo &deconv_info, unsigned int invalid_right, + unsigned int invalid_bottom, const WeightsInfo &weights_info = WeightsInfo()); + static DeconvolutionMethod + get_deconvolution_method(const ITensorInfo *input, const ITensorInfo *weights, + const ITensorInfo *bias, ITensorInfo *output, + const PadStrideInfo &deconv_info, unsigned int invalid_right, + unsigned int invalid_bottom, const WeightsInfo &weights_info); // Inherited methods overridden: void run() override; void prepare() override; private: - MemoryGroup _memory_group; - CLTransposeConvLayerUpsample _scale_f; - CLConvolutionLayer _conv_f; - CPPFlipWeightsKernel _flip_weights; - CLTensor _scaled_output; - ICLTensor *_original_weights; - CLTensor _weights_flipped; - bool _is_prepared; + std::shared_ptr<IMemoryManager> _memory_manager; + std::unique_ptr<IFunction> _function; }; -} +} // namespace arm_compute #endif /* __ARM_COMPUTE_CLTRANSPOSECONVLAYER_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h deleted file mode 100644 index 7570fe76d..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLE_H__ -#define __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLE_H__ - -#include "arm_compute/runtime/IFunction.h" - -#include "arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/IMemoryManager.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Basic function to run @ref CLTransposeConvLayerUpsampleKernel */ -class CLTransposeConvLayerUpsample : public IFunction -{ -public: - /** Default constructor */ - CLTransposeConvLayerUpsample(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLTransposeConvLayerUpsample(const CLTransposeConvLayerUpsample &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLTransposeConvLayerUpsample &operator=(const CLTransposeConvLayerUpsample &) = delete; - /** Allow instances of this class to be moved */ - CLTransposeConvLayerUpsample(CLTransposeConvLayerUpsample &&) = default; - /** Allow instances of this class to be moved */ - CLTransposeConvLayerUpsample &operator=(CLTransposeConvLayerUpsample &&) = default; - /** Default destructor */ - virtual ~CLTransposeConvLayerUpsample() = default; - - /** Initialize the function's source, destination, interpolation type and border_mode. - * - * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. - * @param[out] output Destination tensor. Data type supported: same as @p input. - * @param[in] inner_border The number of zeros added to right and top edges of the input. - * @param[in] info Contains padding and policies to be used in the deconvolution. - */ - void configure(ICLTensor *input, ICLTensor *output, const BorderSize &inner_border, - const PadStrideInfo &info); - /** Static function to check if given info will lead to a valid configuration of @ref - * CLTransposeConvLayerUpsample - * - * @param[in] input Source tensor info. Data type supported: QASYMM8/F16/F32. - * @param[in] output Destination tensor info. Data type supported: same as @p input. - * @param[in] inner_border The number of zeros added to right and top edges of the input. - * @param[in] info Contains padding and policies to be used in the deconvolution. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, - const BorderSize &inner_border, const PadStrideInfo &info); - - // Inherited methods overridden: - void run() override; - -private: - CLTransposeConvLayerUpsampleKernel _upsample; - ICLTensor *_output; -}; -} -#endif /* __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLE_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CPP/functions/CPPUpsampleEx.h b/compute/ARMComputeEx/arm_compute/runtime/CPP/functions/CPPUpsampleEx.h deleted file mode 100644 index 666afef4b..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/CPP/functions/CPPUpsampleEx.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_CPPUPSAMPLE_EX_H__ -#define __ARM_COMPUTE_CPPUPSAMPLE_EX_H__ - -#include "arm_compute/runtime/CPP/ICPPSimpleFunction.h" - -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ITensor; - -/** Basic function to run @ref CPPUpsample */ -class CPPUpsampleEx : public ICPPSimpleFunction -{ -public: - /** Configure the upsample CPP kernel - * - * @param[in] input The input tensor to upsample. Data types supported: F32/F16/QASYMM8 - * @param[out] output The output tensor. Data types supported: Same as @p input - * @param[in] info Padding information - */ - void configure(const ITensor *input, ITensor *output, const PadStrideInfo &info); -}; -} -#endif /* __ARM_COMPUTE_CPPUPSAMPLE_EX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h index 49504fde3..3fad230f1 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h +++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h @@ -18,20 +18,13 @@ #include <arm_compute/runtime/NEON/functions/NEActivationLayerEx.h> #include <arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h> -#include <arm_compute/runtime/NEON/functions/NECast.h> -#include <arm_compute/runtime/NEON/functions/NEDepthToSpaceLayerEx.h> #include <arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h> #include <arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h> #include <arm_compute/runtime/NEON/functions/NEGatherEx.h> #include <arm_compute/runtime/NEON/functions/NEHashtableLookup.h> #include <arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayerEx.h> -#include <arm_compute/runtime/NEON/functions/NEPReLU.h> -#include <arm_compute/runtime/NEON/functions/NEReduceMeanEx.h> #include <arm_compute/runtime/NEON/functions/NEReduceSum.h> -#include <arm_compute/runtime/NEON/functions/NERNNLayerEx.h> #include <arm_compute/runtime/NEON/functions/NEReduceOperation.h> -#include <arm_compute/runtime/NEON/functions/NESpaceToBatchLayerEx.h> -#include <arm_compute/runtime/NEON/functions/NESpaceToDepthLayerEx.h> #include <arm_compute/runtime/NEON/functions/NETransposeConvLayer.h> #endif // __ARM_COMPUTE_NEFUNCTIONSEX_H__ diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NECast.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NECast.h deleted file mode 100644 index f0f0d8114..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NECast.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_NECAST_H__ -#define __ARM_COMPUTE_NECAST_H__ - -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -#include "arm_compute/core/Types.h" -#include "arm_compute/core/TypesEx.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Basic function to run @ref NECastKernel that converts an input tensor to the other types */ -class NECast : public INESimpleFunctionNoBorder -{ -public: - /** Configure the kernel. - * - * @param[in] input Source tensor. Data types supported: U8/S8/QASYMM8/U32/S32/F32. - * @param[out] output Destination tensor with the same dimensions of input. Data type supported: - * U8/S8/QASYMM8/U32/S32/F32. - * @param[in] input_subtype Sub data type of input. - */ - void configure(const ITensor *input, ITensor *output, - SubDataType input_subtype = SubDataType::NONE); - /** Static function to check if given info will lead to a valid configuration of @ref NECast - * - * @param[in] input Input tensor info. Data types supported: U8/S8/QASYMM8/U32/S32/F32. - * @param[in] output Output tensor info. Data type supported: U8/S8/QASYMM8/U32/S32/F32. - * @param[in] input_subtype Sub data type of input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, - SubDataType input_subtype = SubDataType::NONE); -}; -} // namespace arm_compute -#endif /* __ARM_COMPUTE_NECAST_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayerEx.h deleted file mode 100644 index 005d85add..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayerEx.h +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_NEDEPTHTOSPACELAYEREX_H__ -#define __ARM_COMPUTE_NEDEPTHTOSPACELAYEREX_H__ - -#include "arm_compute/runtime/IFunction.h" - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -namespace arm_compute -{ -class ITensor; - -/** Basic function to run @ref NEDepthToSpaceLayerKernelEx. */ -class NEDepthToSpaceLayerEx : public INESimpleFunctionNoBorder -{ -public: - /** Set the input and output tensors. - * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: - * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. - * @param[out] output Tensor output. Data types supported: same as @p input - * @param[in] block_shape Block shape value. - */ - void configure(const ITensor *input, ITensor *output, int32_t block_shape); - /** Static function to check if given info will lead to a valid configuration of @ref - * NEDepthToSpaceLayerEx. - * - * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: - * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. - * @param[in] output Tensor output info. Data types supported: same as @p input - * @param[in] block_shape Block shape x value. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape); -}; -} // namespace arm_compute -#endif /* __ARM_COMPUTE_NEDEPTHTOSPACELAYEREX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayerEx.h deleted file mode 100644 index 27a38e982..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayerEx.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_NEELEMENTWISEUNARYLAYEREX_H__ -#define __ARM_COMPUTE_NEELEMENTWISEUNARYLAYEREX_H__ - -#include "arm_compute/runtime/NEON/INESimpleFunction.h" - -namespace arm_compute -{ -class ITensor; - -/** Basic function to perform negative on an input tensor. */ -class NENegLayer : public INESimpleFunction -{ -public: - /** Initialize the function - * - * @param[in] input Input tensor. Data types supported: F16/F32/S32. - * @param[out] output Output tensor. Data types supported: same as @p input. - */ - void configure(const ITensor *input, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NERsqrtLayer - * - * @param[in] input First tensor input info. Data types supported: F16/F32/S32. - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); -}; -} // namespace arm_compute -#endif /* __ARM_COMPUTE_NEELEMENTWISEUNARYLAYEREX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h index 39c57eb70..56548a479 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h +++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h @@ -46,7 +46,7 @@ #include "arm_compute/core/NEON/kernels/NEMuliplyScaleFactorKernel.h" #include "arm_compute/core/NEON/kernels/NETransposeKernel.h" #include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.h" +#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h" #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" #include "arm_compute/runtime/Tensor.h" @@ -164,7 +164,7 @@ private: MemoryGroup _memory_group; NEFullyConnectedHybridLayerReshapeWeights _reshape_weights_function; NEQuantizationSymmetricKernel _quant_input_kernel; - NEGEMMLowpMatrixMultiplyCoreEx _mm_gemmlowp; + NEGEMMLowpMatrixMultiplyCore _mm_gemmlowp; NEMultiplyScaleFactorKernel _multiply_scale_kernel; NEGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel; Tensor _reshape_weights_output; diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.h deleted file mode 100644 index d844513c9..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.h +++ /dev/null @@ -1,170 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCOREEX_H__ -#define __ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCOREEX_H__ - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -// #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" -#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" -#include "arm_compute/runtime/Tensor.h" - -#include <memory> - -namespace arm_compute -{ -class ITensor; - -/** Basic function to execute GEMMLowpMatrixMultiplyCore on NEON. This function calls the following - * NEON kernels if the DOT product instruction is not available: - * - * -# @ref NEGEMMInterleave4x4Kernel - * -# @ref NEGEMMTranspose1xWKernel - * -# @ref NEGEMMLowpMatrixMultiplyKernel - * -# @ref NEGEMMLowpOffsetContributionKernel - * -# @ref NEActivationLayer - * - * otherwise if the DOT product instruction is available: - * - * -# @ref NEGEMMLowpOffsetContributionKernel - * -*/ -class NEGEMMLowpMatrixMultiplyCoreEx : public IFunction -{ -public: - /** Constructor */ - NEGEMMLowpMatrixMultiplyCoreEx(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGEMMLowpMatrixMultiplyCoreEx(const NEGEMMLowpMatrixMultiplyCoreEx &) = delete; - /** Default move constructor */ - NEGEMMLowpMatrixMultiplyCoreEx(NEGEMMLowpMatrixMultiplyCoreEx &&) = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGEMMLowpMatrixMultiplyCoreEx &operator=(const NEGEMMLowpMatrixMultiplyCoreEx &) = delete; - /** Default move assignment operator */ - NEGEMMLowpMatrixMultiplyCoreEx &operator=(NEGEMMLowpMatrixMultiplyCoreEx &&) = default; - /** Initialise the kernel's inputs, output - * - * @note GEMM_LOWP: low precision GEMM kernel - * This kernel performs the following computations: - * - * -# Convert a values from QASYMM8 to int32 and add a_offset to each of them. - * -# Convert b values from QASYMM8 to int32 add b_offset to each of them. - * -# Compute the matrix product of the resulting a * b in int32. - * - * @note The @p output type is S32 if @p gemm_info.type == GEMMLowpOutputStageType::NONE. It is - * QASYMM8/QASYMM8_SIGNED otherwise - * - * @param[in] a First input tensor (Matrix A). Data type supported: - * QASYMM8/QASYMM8_SIGNED. - * @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a - * @param[in] c Third input tensor (Matrix C). It can be a nullptr. Data type supported: - * S32 - * @param[out] output Output tensor. Data type supported: Data type supported: - * S32/QASYMM8/QASYMM8_SIGNED - * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped - * and - * if the reshape of matrix B should be executed only for the first run - */ - void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *output, - const GEMMInfo &gemm_info = GEMMInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref - * NEGEMMLowpMatrixMultiplyCoreEx - * - * @note The @p output type is S32 if @p gemm_info.type == GEMMLowpOutputStageType::NONE. It is - * QASYMM8/QASYMM8_SIGNED otherwise - * - * @param[in] a First input tensor info (Matrix A). Data type supported: - * QASYMM8/QASYMM8_SIGNED. - * @param[in] b Second input tensor info (Matrix B). Data type supported: same as @p a - * @param[in] c Third input tensor info (Matrix C). It can be a nullptr. Data type - * supported: S32 - * @param[in] output Output tensor info. Data type supported: Data type supported: - * S32/QASYMM8/QASYMM8_SIGNED - * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped - * and - * if the reshape of matrix B should be executed only for the first run - * - * @return a status - */ - static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, - const ITensorInfo *output, const GEMMInfo &gemm_info = GEMMInfo()); - - // Inherited methods overridden - void run() override; - void prepare() override; - -private: - MemoryGroup _memory_group; - NEGEMMAssemblyDispatch _asm_glue; - std::unique_ptr<INEKernel> _mm_kernel; - std::unique_ptr<INEKernel> _mtx_a_reshape_kernel; - std::unique_ptr<INEKernel> _mtx_b_reshape_kernel; - NEGEMMLowpMatrixAReductionKernel _mtx_a_reduction_kernel; - NEGEMMLowpMatrixBReductionKernel _mtx_b_reduction_kernel; - NEGEMMLowpOffsetContributionKernel _offset_contribution_kernel; - NEGEMMLowpOffsetContributionOutputStageKernel _offset_contribution_output_stage_kernel; - - Tensor _vector_sum_col; - Tensor _vector_sum_row; - Tensor _tmp_a; - Tensor _tmp_b; - Tensor _mm_result_s32; - Tensor _signed_a; - Tensor _signed_output; - const ITensor *_original_b; - int32_t _a_offset; - int32_t _b_offset; - - bool _run_vector_matrix_multiplication; - bool _assembly_path; - bool _fused_assembly_path; - bool _reshape_b_only_on_first_run; - bool _is_prepared; - bool _fuse_output_stage; - bool _flip_signedness; -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCOREEX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEPReLU.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEPReLU.h deleted file mode 100644 index ca8413352..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEPReLU.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_NEPRELU_H__ -#define __ARM_COMPUTE_NEPRELU_H__ - -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -namespace arm_compute -{ -class ITensor; - -/** Basic function to run @ref NEPReLUKernel */ -class NEPReLU : public INESimpleFunctionNoBorder -{ -public: - /** Initialise the kernel's inputs and output - * - * @param[in] input. Data types supported: QASYMM8/F32. - * @param[in] alpha. Data types supported: Same as @p input. - * @param[out] output Output tensor. Data types supported: Same as @p input. - */ - void configure(const ITensor *input, const ITensor *alpha, ITensor *output); -}; -} // namespace arm_compute -#endif /* __ARM_COMPUTE_NEPRELU_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NERNNLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NERNNLayerEx.h deleted file mode 100644 index 8a7b17946..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NERNNLayerEx.h +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_NERNNLAYER_EX_H__ -#define __ARM_COMPUTE_NERNNLAYER_EX_H__ - -#include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h" -#include "arm_compute/core/NEON/kernels/NECopyKernel.h" - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h" -#include "arm_compute/runtime/NEON/functions/NEGEMM.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Basic function to run @ref NERNNLayerEx */ -class NERNNLayerEx : public IFunction -{ -public: - /** Default constructor */ - NERNNLayerEx(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NERNNLayerEx(const NERNNLayerEx &) = delete; - /** Default move constructor */ - NERNNLayerEx(NERNNLayerEx &&) = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NERNNLayerEx &operator=(const NERNNLayerEx &) = delete; - /** Default move assignment operator */ - NERNNLayerEx &operator=(NERNNLayerEx &&) = default; - /** Initialize the function - * - * @param[in] input Input is a 2-D tensor of shape [input_size, batch_size]. Data - * types supported: F16/F32 - * @param[in] weights Weights tensor of shape [input_size, num_units] that - * multiplies the input. Data types supported: Same as @p input - * @param[in] recurrent_weights Weights tensor of shape [num_units, num_units] that multiplies - * the current 'state'. Data types supported: Same as @p input - * @param[in] bias Bias vector of shape [num_units]. Data types supported: Same - * as @p input - * @param[out] output Output tensor of shape [num_units, batch_size]. Data types - * supported: Same as @p input - * @param[in,out] hidden_state Output tensor of shape [num_units, batch_size]. Data types - * supported: Same as @p input - * @param[in] info Activation layer parameter. - */ - void configure(const ITensor *input, const ITensor *weights, const ITensor *recurrent_weights, - const ITensor *bias, ITensor *hidden_state, ITensor *output, - ActivationLayerInfo &info); - /** Initialize the function - * - * @param[in] input Input is a 2-D tensor of shape [input_size, batch_size]. Data - * types supported: F16/F32 - * @param[in] weights Weights tensor of shape [input_size, num_units] that multiplies - * the input. Data types supported: Same as @p input - * @param[in] recurrent_weights Weights tensor of shape [num_units, num_units] that multiplies the - * current 'state'. Data types supported: Same as @p input - * @param[in] bias Bias vector of shape [num_units]. Data types supported: Same as @p - * input - * @param[in] output Output tensor of shape [num_units, batch_size]. Data types - * supported: Same as @p input - * @param[in] hidden_state Output tensor of shape [num_units, batch_size]. Data types - * supported: Same as @p input - * @param[in] info Activation layer parameter. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, - const ITensorInfo *recurrent_weights, const ITensorInfo *bias, - const ITensorInfo *hidden_state, const ITensorInfo *output, - const ActivationLayerInfo &info); - - // Inherited methods overridden: - void run() override; - void prepare() override; - -private: - MemoryGroup _memory_group; - NEGEMM _gemm_state_f; - NEArithmeticAdditionKernel _add_kernel; - NEActivationLayerKernel _activation_kernel; - NEFullyConnectedLayer _fully_connected_kernel; - NECopyKernel _copy_kernel; - Tensor _fully_connected_out; - Tensor _gemm_output; - Tensor _add_output; - bool _is_prepared; -}; -} // namespace arm_compute -#endif /* __ARM_COMPUTE_NERNNLAYER_EX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceMeanEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceMeanEx.h deleted file mode 100644 index 03ac45798..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceMeanEx.h +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_NEON_REDUCE_MEAN_EX_H__ -#define __ARM_COMPUTE_NEON_REDUCE_MEAN_EX_H__ - -#include "arm_compute/runtime/IFunction.h" - -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NEReductionOperation.h" -#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h" - -namespace arm_compute -{ -class ITensor; - -/** Basic function to perform reduce operation */ -class NEReduceMeanEx : public IFunction -{ -public: - /** Constructor */ - NEReduceMeanEx(std::shared_ptr<IMemoryManager> memory_manager = nullptr); - /** Configure kernel - * - * @note Supported tensor rank: up to 4 - * - * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32 - * @param[in] reduction_axis Reduction axis vector. - * @param[in] keep_dims If positive, retains reduced dimensions with length 1. - * @param[out] output Destination tensor. Data type supported: Same as @p input - */ - void configure(ITensor *input, const Coordinates &reduction_axis, bool keep_dims, - ITensor *output); - - /** Static function to check if given info will lead to a valid configuration of @ref - * NEReduceMeanEx - * - * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32 - * @param[in] reduction_axis Reduction axis vector. - * @param[in] keep_dims If positive, retains reduced dimensions with length 1. - * @param[in] output Destination tensor. Data type supported: Same as @p input - * - * @return A status - */ - static Status validate(const ITensorInfo *input, const Coordinates &reduction_axis, - bool keep_dims, const ITensorInfo *output); - - // Inherited methods overridden: - void run() override; - -private: - MemoryGroup _memory_group; - std::unique_ptr<NEReductionOperation[]> _reduction_kernels{nullptr}; - std::unique_ptr<Tensor[]> _reduced_outs{nullptr}; - NEReshapeLayer _reshape; - unsigned int _reduction_ops; - bool _keep_dims; -}; -} // namespace arm_compute -#endif /* __ARM_COMPUTE_NEON_REDUCE_MEAN_EX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NESpaceToBatchLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NESpaceToBatchLayerEx.h deleted file mode 100644 index 3b695fbc0..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NESpaceToBatchLayerEx.h +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_NESPACETOBATCHLAYEREX_H__ -#define __ARM_COMPUTE_NESPACETOBATCHLAYEREX_H__ - -#include "arm_compute/runtime/IFunction.h" - -#include "arm_compute/core/NEON/kernels/NEMemsetKernel.h" -#include "arm_compute/core/NEON/kernels/NESpaceToBatchLayerKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ITensor; - -/** Basic function to spatial divide a tensor. This function calls the following NEON - * kernels/functions: - * - * -# @ref NEMemsetKernel - * -# @ref NESpaceToBatchLayerKernel - */ -class NESpaceToBatchLayerEx : public IFunction -{ -public: - /** Default constructor */ - NESpaceToBatchLayerEx(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESpaceToBatchLayerEx(const NESpaceToBatchLayerEx &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESpaceToBatchLayerEx &operator=(const NESpaceToBatchLayerEx &) = delete; - /** Allow instances of this class to be moved */ - NESpaceToBatchLayerEx(NESpaceToBatchLayerEx &&) = default; - /** Allow instances of this class to be moved */ - NESpaceToBatchLayerEx &operator=(NESpaceToBatchLayerEx &&) = default; - /** Default destructor */ - virtual ~NESpaceToBatchLayerEx() = default; - /** Set the input and output tensors. - * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: - * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. - * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 - * @param[in] paddings 2-D tensor with shape [2, M]. Data types supported: S32 - * @param[out] output Tensor output. Data types supported: same as @p input - */ - void configure(const ITensor *input, const ITensor *block_shape, const ITensor *paddings, - ITensor *output); - /** Set the input and output tensors. (Static block shape and paddings) - * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: - * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. - * @param[in] block_shape_x Block shape x value. - * @param[in] block_shape_y Block shape y value. - * @param[in] padding_left The left padding of the output tensor. - * @param[in] padding_right The right padding of the output tensor. - * @param[out] output Tensor output. Data types supported: same as @p input - */ - void configure(const ITensor *input, const int block_shape_x, const int block_shape_y, - const Size2D &padding_left, const Size2D &padding_right, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref - * NESpaceToBatchLayerEx - * - * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: - * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. - * @param[in] block_shape block shape tensor info with shape [M]. Data types supported: S32 - * @param[in] paddings paddings tensor info with shape [2, M]. Data types supported: S32 - * @param[in] output Tensor output info. Data types supported: same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, - const ITensorInfo *paddings, const ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration of @ref - * NESpaceToBatchLayerEx (Static block shape and paddings) - * - * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: - * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. - * @param[in] block_shape_x Block shape x value. - * @param[in] block_shape_y Block shape y value. - * @param[in] padding_left The left padding of the output tensor. - * @param[in] padding_right The right padding of the output tensor. - * @param[in] output Tensor output info. Data types supported: same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const int block_shape_x, const int block_shape_y, - const Size2D &padding_left, const Size2D &padding_right, - const ITensorInfo *output); - - // Inherited methods overridden: - void run() override; - -private: - NESpaceToBatchLayerKernel _space_to_batch_kernel; /**< SpaceToBatch kernel to run */ - NEMemsetKernel _memset_kernel; /**< Memset kernel to run */ - bool _has_padding; /**< Flag to check if the output has padding */ -}; -} // namespace arm_compute -#endif /* __ARM_COMPUTE_NESPACETOBATCHLAYEREX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NESpaceToDepthLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NESpaceToDepthLayerEx.h deleted file mode 100644 index 9f32616f3..000000000 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NESpaceToDepthLayerEx.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_NESPACETODEPTHLAYEREX_H__ -#define __ARM_COMPUTE_NESPACETODEPTHLAYEREX_H__ - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -namespace arm_compute -{ -class ITensor; - -/** This function calls the following NEON kernels/functions: - * - * -# @ref NESpaceToDepthLayerKernelEx - */ -class NESpaceToDepthLayerEx : public INESimpleFunctionNoBorder -{ -public: - /** Set the input and output tensors. - * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: - * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. - * @param[out] output Tensor output. Data types supported: same as @p input - * @param[in] block_shape Block shape value - */ - void configure(const ITensor *input, ITensor *output, int32_t block_shape); - /** Static function to check if given info will lead to a valid configuration of @ref - * NESpaceToDepthLayerEx (Static block shape and paddings) - * - * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: - * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. - * @param[in] output Tensor output info. Data types supported: same as @p input - * @param[in] block_shape Block shape value - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape); -}; -} // namespace arm_compute -#endif /* __ARM_COMPUTE_NESPACETODEPTHLAYEREX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h index 408d150d0..24ff5dac9 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h +++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h @@ -15,7 +15,7 @@ */ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -37,16 +37,14 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ - #ifndef __ARM_COMPUTE_NETRANSPOSECONVLAYER_H__ #define __ARM_COMPUTE_NETRANSPOSECONVLAYER_H__ -#include "arm_compute/runtime/CPP/functions/CPPUpsampleEx.h" +#include "arm_compute/runtime/CPP/functions/CPPUpsample.h" #include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h" #include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h" -#include "arm_compute/runtime/NEON/functions/NEPermute.h" +#include "arm_compute/runtime/NEON/functions/NEReverse.h" -#include "arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" @@ -59,8 +57,8 @@ namespace arm_compute { /** Function to run the deconvolution layer. * - * Transpose convolution Layer is the backward pass of Convolution Layer. First we transform the - * input depending on the stride and pad info and then perfrom a 1x1 + * Deconvolution Layer is the backward pass of Convolution Layer. First we transform the input + * depending on the stride and pad info and then perfrom a 1x1 * convolution pass. Input stride defines how many zeroes we should put between each element of the * input, pad is the amount of padding and finaly a is a user * specified value where a < stride - 1 that increases the padding top and right of the input image. @@ -81,21 +79,22 @@ namespace arm_compute * kernel_x and kernel_y are the convolution sizes in x and y. * stride_x and stride_y is the input stride of the first and second dimension. * - * The weights used by Transpose convolution are supposed to be the same as the ones used for - * Convolution. Therefore, it will be necessary to use the weights in the - * reverse order to perform an actual convolution. This is achieved by using the @ref - * CPPFlipWeightsKernel. + * The weights used by Deconvolution are supposed to be the same as the ones used for Convolution. + * Therefore, it will be necessary to use the weights in the + * reverse order to perform an actual convolution. This is achieved by using @ref NEReverse. * * This function calls the following NEON kernels/functions: * - * -# @ref CPPUpsample + * -# @ref CPPUpsampleEx * -# @ref NEConvolutionLayer + * -# @ref NEPermute + * -# @ref NEReverse * */ class NETransposeConvLayer : public IFunction { public: - /** Default constructor */ + /** Constructor */ NETransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr); /** Prevent instances of this class from being copied (As this class contains pointers) */ @@ -112,37 +111,38 @@ public: /** Set the input, weights, biases and output tensors. * * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an - * optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8. + * optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED. * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type - * supported: Same as @p input. + * supported: Same as @p input. * @param[in] bias Optional, ignored if NULL. The biases have one dimension. Data type - * supported: Data types supported: S32 for QASYMM8 input, F32 for F32 input, F16 for F16 input. + * supported: Data types supported: S32 for QASYMM8 and QASYMM8_SIGNED input, F32 for F32 input, F16 + * for F16 input. * @param[out] output Output tensor. The output has the same number of dimensions as the @p - * input. + * input. * @param[in] info Contains padding and policies to be used in the deconvolution, this is - * decribed in @ref PadStrideInfo. - * @param[in] invalid_right The number of zeros added to right edge of the output. - * @param[in] invalid_bottom The number of zeros added to top edge of the output. + * decribed in @ref PadStrideInfo. + * @param[in] invalid_right The number of zeros added to right edge of the output. + * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. * */ void configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, const PadStrideInfo &info, unsigned int invalid_right, unsigned int invalid_bottom); /** Static function to check if given info will lead to a valid configuration of @ref - * NETransposeConvLayer + * NETransposeConvLayer * * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an - * optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8. + * optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED. * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data type - * supported: Same as @p input. + * supported: Same as @p input. * @param[in] bias (Optional) The biases have one dimension. Data type supported: Data types - * supported: S32 for QASYMM8 input, F32 for F32 input, F16 for F16 input. + * supported: S32 for QASYMM8 and QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input. * @param[in] output Output tensor info. The output has the same number of dimensions as the @p - * input. + * input. * @param[in] info Contains padding and policies to be used in the deconvolution, this is - * decribed in @ref PadStrideInfo. - * @param[in] innvalid_right The number of zeros added to right edge of the output. - * @param[in] invalid_bottom The number of zeros added to top edge of the output. + * decribed in @ref PadStrideInfo. + * @param[in] innvalid_right The number of zeros added to right edge of the output. + * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. * * @return a status */ @@ -158,17 +158,11 @@ public: private: MemoryGroup _memory_group; NEConvolutionLayer _conv_f; - CPPUpsampleEx _upsample_f; - CPPFlipWeightsKernel _flip_weights; - NEPermute _permute_input; - NEPermute _permute_weights; - NEPermute _permute_output; + CPPUpsample _upsample_f; + NEReverse _flip_weights; Tensor _scaled_output; Tensor _weights_flipped; - Tensor _permuted_input; - Tensor _permuted_weights; - Tensor _permuted_output; - bool _is_nchw; + Tensor _flip_axis; const ITensor *_original_weights; ITensor *_input; PadStrideInfo _info; |