diff options
Diffstat (limited to 'runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions')
21 files changed, 1464 insertions, 0 deletions
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgOperation.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgOperation.h new file mode 100644 index 000000000..d9d0d4d35 --- /dev/null +++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgOperation.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2017 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file CLArgOperation.h + * @ingroup COM_AI_RUNTIME + * @brief This file contains arm_compute::CLArgOperation class + */ + +#ifndef __ARM_COMPUTE_CLARGOPERATION_H__ +#define __ARM_COMPUTE_CLARGOPERATION_H__ + +#include "arm_compute/core/CL/kernels/CLArgOperationKernel.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/core/TypesEx.h" + +namespace arm_compute +{ +class ICLTensor; + +/** + * @brief Class to execute CLArgOperation operation + */ +class CLArgOperation : public IFunction +{ +public: + /** + * @brief Construct a new CLArgOperation object + */ + CLArgOperation(); + + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers) + */ + CLArgOperation(const CLArgOperation &) = delete; + + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers) + */ + CLArgOperation &operator=(const CLArgOperation &) = delete; + + /** + * @brief Construct a new CLArgOperation object by using copy constructor + * @param[in] CLArgOperation object to move + */ + CLArgOperation(CLArgOperation &&) = default; + + /** + * @brief Assign a CLArgOperation object. + * @param[in] CLArgOperation object to assign. This object will be moved. + */ + CLArgOperation &operator=(CLArgOperation &&) = default; + + /** + * @brief Initialise the kernel's inputs and outputs. + * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S32/F32. + * @param[out] output The result of arg operation. Data types supported: S32. + * @param[in] axis Axis along which to reduce. It must be sorted and no duplicates. + * @param[in] op Arg operation to perform. + * @return N/A + */ + void configure(ICLTensor *input, ICLTensor *output, std::vector<uint32_t> axis, ArgOperation op); + + /** + * @brief Static function to check if given info will lead to a valid configuration + * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S32/F32. + * @param[in] axis Axis along which to reduce. It must be sorted and no duplicates. + * @param[out] output The result of arg operation. Data types supported: S32. + * @param[in] op Arg operation to perform. + * @return a status + */ + static Status validate(const ITensorInfo *input, const std::vector<uint32_t> &axis, + const ITensorInfo *output, ArgOperation op); + /** + * @brief Run the OpenCL kernel for this operation + * @return N/A + */ + void run() override; + +private: + ICLTensor *_input{nullptr}; + ICLTensor *_output{nullptr}; + std::vector<uint32_t> _axis{}; + ArgOperation _arg_op{ArgOperation::MAX}; + + std::unique_ptr<CLTensor[]> _interm_tensors{nullptr}; + std::unique_ptr<CLArgOperationKernel[]> _argop_kernels{nullptr}; + size_t _num_of_kernels{0}; +}; +} +#endif /*__ARM_COMPUTE_CLARGOPERATION_H__ */ diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLBatchToSpaceND.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLBatchToSpaceND.h new file mode 100644 index 000000000..d16a0762d --- /dev/null +++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLBatchToSpaceND.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __ARM_COMPUTE_CLBATCH_TO_SPACE_ND_H__ +#define __ARM_COMPUTE_CLBATCH_TO_SPACE_ND_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLBatchToSpaceNDKernel + * + * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/F16/F32. + * @note The function converts the input tensor to the tensor of the output tensor's type. + */ +class CLBatchToSpaceND : public ICLSimpleFunction +{ +public: + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. + * @param[out] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. + * @param[in] block_size A pointer to an array of integer values specifying block sizes + * for spatial dimension. + */ + void configure(ICLTensor *input, ICLTensor *output, const int32_t *block_size); +}; + +} // namespace arm_compute +#endif /* __ARM_COMPUTE_CLBATCH_TO_SPACE_ND_H__ */ diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h new file mode 100644 index 000000000..061e34f26 --- /dev/null +++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __ARM_COMPUTE_CLBINARYLOGICALOP_H__ +#define __ARM_COMPUTE_CLBINARYLOGICALOP_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" +#include "arm_compute/core/TypesEx.h" + +namespace arm_compute +{ +class ICLTensor; + +class CLBinaryLogicalOp : public ICLSimpleFunction +{ +public: + /** Initialise the function's source and destination. + * + * @param[in] input1 Source tensor1. Data types supported: U8, QASYMM8. + * @param[in] input2 Source tensor2. Data types supported: U8 QASYMM8. + * @param[out] output Output tensor. Data types supported: U8, QASYMM8. + */ + void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, + BinaryLogicalOperation op); +}; + +} // namespace arm_compute +#endif /*__ARM_COMPUTE_CLBINARYLOGICALOP_H__ */ diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLCast.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLCast.h new file mode 100644 index 000000000..56b8408e2 --- /dev/null +++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLCast.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file CLCast.h + * @ingroup COM_AI_RUNTIME + * @brief This file contains arm_compute::CLCast class + */ + +#ifndef __ARM_COMPUTE_CLCAST_H__ +#define __ARM_COMPUTE_CLCAST_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** + * @brief Class to run @ref CLCastKernel. + * This converts the input tensor to the tensor of the output tensor's type. + */ +class CLCast : public ICLSimpleFunction +{ +public: + /** + * @brief Initialise the kernel's input and output + * @param[in, out] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. + * The input tensor is [in, out] because its TensorInfo might be + * modified inside the kernel. + * @param[out] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. + */ + void configure(ICLTensor *input, ICLTensor *output); +}; +} +#endif /* __ARM_COMPUTE_CLCAST_H__ */ diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLDepthToSpace.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLDepthToSpace.h new file mode 100644 index 000000000..d78a6ada4 --- /dev/null +++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLDepthToSpace.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __ARM_COMPUTE_CLDEPTHTOSPACE_H__ +#define __ARM_COMPUTE_CLDEPTHTOSPACE_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLDepthToSpaceKernel + * + * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/F16/F32. + * @note The function converts the input tensor to the tensor of the output tensor's type. + */ +class CLDepthToSpace : public ICLSimpleFunction +{ +public: + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. + * @param[out] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. + * @param[block_size] block size integer only + */ + void configure(ICLTensor *input, ICLTensor *output, const int32_t block_size); +}; +} // namesace arm_compute + +#endif /* __ARM_COMPUTE_CLDEPTHTOSPACE_H__ */ diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h new file mode 100644 index 000000000..257772a89 --- /dev/null +++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file CLEmbeddingLookup.h + * @ingroup COM_AI_RUNTIME + * @brief This file contains arm_compute::CLEmbeddingLookup class + */ + +#ifndef __ARM_COMPUTE_CLEMBEDDINGLOOKUP_H__ +#define __ARM_COMPUTE_CLEMBEDDINGLOOKUP_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include <vector> + +namespace arm_compute +{ +class ICLTensor; + +/** + * @brief Class to perform EmbeddingLookup operation + */ +class CLEmbeddingLookup : public ICLSimpleFunction +{ +public: + /** + * @brief Set the input and output tensors. + * @param[in] input Source tensor. + * Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 + * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p + * input. + * @param[in] lookups Lookups 1D tensor that values are indices into the first dimension of + * input. + * @return N/A + */ + void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *lookups); +}; +} +#endif /*__ARM_COMPUTE_CLEMBEDDINGLOOKUP_H__ */ diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h new file mode 100644 index 000000000..0867cf6bb --- /dev/null +++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file CLFullyConnectedReshapingLayer.h + * @brief This file contains CLFullyConnectedReshapingLayer class + * @ingroup COM_AI_RUNTIME + */ + +#ifndef __ARM_COMPUTE_CL_FULLY_CONNECTED_RESHAPING_LAYER_H__ +#define __ARM_COMPUTE_CL_FULLY_CONNECTED_RESHAPING_LAYER_H__ + +#include <arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h> +#include <arm_compute/runtime/misc/functions/GenericReshapeLayer.h> +#include <arm_compute/runtime/IMemoryManager.h> + +namespace arm_compute +{ +/** + * @brief Class to run FullyConnected Layer after reshaping input tensor + */ +class CLFullyConnectedReshapingLayer : public arm_compute::IFunction +{ +public: + CLFullyConnectedReshapingLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr) + : _input(nullptr), _weights(nullptr), _biases(nullptr), _output(nullptr), _cl_buffer{}, + _cl_fc{memory_manager}, _cl_reshape{}, _needs_reshape(false) + { + // DO NOTHING + } + +public: + /** + * @brief Configure the layer + * @param[in] input The source tensor + * @param[in] weights The tensor that is filled with weight values + * @param[in] biases The tensor that is filled with biase values + * @param[in] output The destination tensor + * @param[in] needs_reshape Whether it needs to be reshaped or not + * @param[in] reshape The tensor shape to be reshaped. Only valid when needs_reshape is true. + * @return N/A + */ + void configure(const arm_compute::ICLTensor *input, const arm_compute::ICLTensor *weights, + const arm_compute::ICLTensor *biases, arm_compute::ICLTensor *output, + bool needs_reshape, const arm_compute::TensorShape &reshape); + +public: + /** + * @brief Run the operation. Must be called after configure(). + * @return N/A + */ + void run(void) override; + +private: + const arm_compute::ICLTensor *_input; + const arm_compute::ICLTensor *_weights; + const arm_compute::ICLTensor *_biases; + arm_compute::ICLTensor *_output; + + // buffer for reshaping input tensor + arm_compute::CLTensor _cl_buffer; + +private: + arm_compute::CLFullyConnectedLayer _cl_fc; + // TODO Change to CLReshapeLayer + arm_compute::misc::GenericReshapeLayer _cl_reshape; + bool _needs_reshape; +}; +} // namespace arm_compute + +#endif // __ARM_COMPUTE_CL_FULLY_CONNECTED_RESHAPING_LAYER_H__ diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLGatherEx.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLGatherEx.h new file mode 100644 index 000000000..04d227aa7 --- /dev/null +++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLGatherEx.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file CLGatherEx.h + * @brief This file contains CLGatherEx class + * @ingroup COM_AI_RUNTIME + */ + +#ifndef __ARM_COMPUTE_CLGATHEREX_H__ +#define __ARM_COMPUTE_CLGATHEREX_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** + * @brief Class to to run @ref CLGatherKernel. + */ +class CLGatherEx : public ICLSimpleFunction +{ +public: + /** + * @brief Initialise the kernel's inputs, output and convertion policy. + * @param[in] input An input tensor. Data types supported: U8/QASYMM8/S32/F32. + * @param[in] indices An indexes tensor. Data types supported: S32. + * @param[out] output The output tensor, Data types supported: same as @p input. + * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Defaults to 0 + * @return N/A + */ + void configure(const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis = 0); + + /** + * @brief Static function to check if given info will lead to a valid configuration + * of @ref CLGatherEx + * @param[in] input An input tensor. Data types supported: U8/QASYMM8/S32/F32. + * @param[in] indices An indexes tensor. Data types supported: S32. + * @param[out] output The output tensor, Data types supported: same as @p input. + * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Defaults to 0 + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *indices, + const ITensorInfo *output, int axis = 0); +}; +} +#endif /*__ARM_COMPUTE_CLGATHEREX_H__ */ diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h new file mode 100644 index 000000000..65aa6cbd5 --- /dev/null +++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file CLHashtableLookup.h + * @ingroup COM_AI_RUNTIME + * @brief This file contains arm_compute::CLHashtableLookup class + */ + +#ifndef __ARM_COMPUTE_CLHASHTABLELOOKUP_H__ +#define __ARM_COMPUTE_CLHASHTABLELOOKUP_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include <vector> + +namespace arm_compute +{ +class ICLTensor; + +/** + * @brief Class to perform HashtableLookup operation + */ +class CLHashtableLookup : public ICLSimpleFunction +{ +public: + /** + * @brief Set the input and output tensors. + * @param[in] lookups Lookups 1D tensor that values are indices into the first dimension of + * input. + * @param[in] keys Keys 1D tensor. keys and input pair represent a map. + * Data types supported: S32 + * @param[in] input Source tensor. + * Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 + * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p + * input. + * @param[out] hits Hits 1D tensor. A boolean tensor that indicates whether the lookup hits + * (True) or not (False). Data types supported: U8/QASYMM8 + * @return N/A + */ + void configure(const ICLTensor *lookups, const ICLTensor *keys, const ICLTensor *intput, + ICLTensor *output, ICLTensor *hits); +}; +} +#endif /*__ARM_COMPUTE_CLHASHTABLELOOKUP_H__ */ diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLLogicalNot.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLLogicalNot.h new file mode 100644 index 000000000..4bf203c5a --- /dev/null +++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLLogicalNot.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __ARM_COMPUTE_CLLOGICALNOT_H__ +#define __ARM_COMPUTE_CLLOGICALNOT_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +class CLLogicalNot : public ICLSimpleFunction +{ +public: + /** Initialise the function's source and destination. + * + * @param[in] input Source tensor. Data types supported: QASYMM8. + * @param[out] output Output tensor. Data types supported: QASYMM8. + */ + void configure(ICLTensor *input, ICLTensor *output); +}; + +} // namespace arm_compute +#endif /*__ARM_COMPUTE_CLLOGICALNOT_H__ */ diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLNeg.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLNeg.h new file mode 100644 index 000000000..198a0fd4e --- /dev/null +++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLNeg.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __ARM_COMPUTE_CLNEG_H__ +#define __ARM_COMPUTE_CLNEG_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +class CLNeg : public ICLSimpleFunction +{ +public: + /** Initialise the function's source and destination. + * + * @param[in] input Source tensor. Data types supported: + * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[out] output Output tensor. Data types supported: Same as @p input. + * + */ + void configure(ICLTensor *input, ICLTensor *output); +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_CLNEG_H__ */ diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPReLU.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPReLU.h new file mode 100644 index 000000000..622a61b5e --- /dev/null +++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPReLU.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __ARM_COMPUTE_CLPRELU_H__ +#define __ARM_COMPUTE_CLPRELU_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +class CLPReLU : public ICLSimpleFunction +{ +public: + /** Initialise the function's source and destination. + * + * @param[in] input. Data types supported: + * QASYMM8/F16/F32. + * @param[in] alpha. Data types supported: + * QASYMM8/F16/F32. + * @param[out] output Output tensor. Data types supported: Same as @p input. + */ + void configure(ICLTensor *input, ICLTensor *alpha, ICLTensor *output); +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_CLPRELU_H__*/ diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPixelWiseDivision.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPixelWiseDivision.h new file mode 100644 index 000000000..b142d3a2e --- /dev/null +++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPixelWiseDivision.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file CLPixelWiseDivision.h + * @ingroup COM_AI_RUNTIME + * @brief This file contains arm_compute::CLPixelWiseDivision class + */ +#ifndef __ARM_COMPUTE_CLPIXELWISEDIVISION_H__ +#define __ARM_COMPUTE_CLPIXELWISEDIVISION_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** + * @brief Class to run @ref CLPixelWiseDivisionKernel. + */ +class CLPixelWiseDivision : public ICLSimpleFunction +{ +public: + /** + * @brief Initialise the kernel's inputs, output and convertion policy. + * @param[in, out] input1 An input tensor. Data types supported: U8/S16/F16/F32 + * The input tensor is [in, out] because its TensorInfo might be + * modified inside the kernel in case of broadcasting of dimension 0. + * @param[in, out] input2 An input tensor. Data types supported: same as @p input1. + * The input tensor is [in, out] because its TensorInfo might be + * modified inside the kernel in case of broadcasting of dimension 0. + * @param[out] output The output tensor, Data types supported: same as @p input1. + * Note: U8 requires both inputs to be U8. + * @param[in] scale Scale to apply after multiplication. + * Scale must be positive and its value must be either 1/255 or + * 1/2^n where n is between 0 and 15. + * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate + * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest + * even. + * @return N/A + */ + void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, float scale = 1.f, + ConvertPolicy overflow_policy = ConvertPolicy::WRAP, + RoundingPolicy rounding_policy = RoundingPolicy::TO_ZERO); + + /** + * @brief Static function to check if given info will lead to a valid configuration of @ref + * CLPixelWiseDivision + * @param[in] input1 An input tensor info. Data types supported: U8/S16/F16/F32 + * @param[in] input2 An input tensor info. Data types supported: same as @p input1. + * @param[in] output The output tensor info, Data types supported: same as @p input1. + * Note: U8 requires both inputs to be U8. + * @param[in] scale Scale to apply after multiplication. + * Scale must be positive and its value must be either 1/255 or 1/2^n + * where n is between 0 and 15. + * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate + * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even. + * @return a status + */ + static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, + const ITensorInfo *output, float scale = 1.f, + ConvertPolicy overflow_policy = ConvertPolicy::WRAP, + RoundingPolicy rounding_policy = RoundingPolicy::TO_ZERO); +}; +} +#endif /*__ARM_COMPUTE_CLPIXELWISEDIVISION_H__ */ diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLRNNLayerEx.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLRNNLayerEx.h new file mode 100644 index 000000000..7e88cb369 --- /dev/null +++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLRNNLayerEx.h @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLRNN_LAYER_EX_H__ +#define __ARM_COMPUTE_CLRNN_LAYER_EX_H__ + +#include "arm_compute/core/CL/kernels/CLActivationLayerKernel.h" +#include "arm_compute/core/CL/kernels/CLCopyKernel.h" +#include "arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" +#include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h" +#include "arm_compute/runtime/CL/functions/CLGEMM.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLRNNLayerEx */ +class CLRNNLayerEx : public IFunction +{ +public: + /** Default constructor */ + CLRNNLayerEx(std::shared_ptr<IMemoryManager> memory_manager = nullptr); + /** Initialize the function + * + * @param[in] input Input is a 2-D tensor of shape [input_size, batch_size]. Data + * types supported: F16/F32 + * @param[in] weights Weights tensor of shape [input_size, num_units] that + * multiplies the input. Data types supported: Same as @p input + * @param[in] recurrent_weights Weights tensor of shape [num_units, num_units] that multiplies + * the current 'state'. Data types supported: Same as @p input + * @param[in] bias Bias vector of shape [num_units]. Data types supported: Same + * as @p input + * @param[out] output Output tensor of shape [num_units, batch_size]. Data types + * supported: Same as @p input + * @param[in,out] hidden_state Output tensor of shape [num_units, batch_size]. Data types + * supported: Same as @p input + * @param[in] info Activation layer parameter. + */ + void configure(const ICLTensor *input, const ICLTensor *weights, + const ICLTensor *recurrent_weights, const ICLTensor *bias, ICLTensor *hidden_state, + ICLTensor *output, ActivationLayerInfo &info); + /** Initialize the function + * + * @param[in] input Input is a 2-D tensor of shape [input_size, batch_size]. Data + * types supported: F16/F32 + * @param[in] weights Weights tensor of shape [input_size, num_units] that multiplies + * the input. Data types supported: Same as @p input + * @param[in] recurrent_weights Weights tensor of shape [num_units, num_units] that multiplies the + * current 'state'. Data types supported: Same as @p input + * @param[in] bias Bias vector of shape [num_units]. Data types supported: Same as @p + * input + * @param[in] output Output tensor of shape [num_units, batch_size]. Data types + * supported: Same as @p input + * @param[in] hidden_state Output tensor of shape [num_units, batch_size]. Data types + * supported: Same as @p input + * @param[in] info Activation layer parameter. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *weights, + const ITensorInfo *recurrent_weights, const ITensorInfo *bias, + const ITensorInfo *hidden_state, const ITensorInfo *output, + const ActivationLayerInfo &info); + + // Inherited methods overridden: + void run() override; + void prepare() override; + +private: + CLMemoryGroup _memory_group; + CLGEMM _gemm_state_f; + CLSaturatedArithmeticOperationKernel _add_kernel; + CLActivationLayerKernel _activation_kernel; + CLFullyConnectedLayer _fully_connected_kernel; + CLCopyKernel _copy_kernel; + CLTensor _fully_connected_out; + CLTensor _gemm_output; + CLTensor _add_output; + bool _is_prepared; +}; +} +#endif /* __ARM_COMPUTE_CLRNN_LAYER_EX_H__ */ diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h new file mode 100644 index 000000000..e1a6f6ab4 --- /dev/null +++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file CLReduceOperation.h + * @ingroup COM_AI_RUNTIME + * @brief This file contains arm_compute::CLReduceOperation class + */ + +#ifndef __ARM_COMPUTE_CLREDUCEOPERATION_H__ +#define __ARM_COMPUTE_CLREDUCEOPERATION_H__ + +#include "arm_compute/core/CL/kernels/CLReduceOperationKernel.h" +#include "arm_compute/core/TypesEx.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/IFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** + * @brief Class to perform ReduceOperation + */ +class CLReduceOperation : public IFunction +{ +public: + /** + * @brief Construct a new ReduceOperation object + */ + CLReduceOperation(); + + /** + * @brief Set the input and output tensors. + * @param[in] input Source tensor. Data types supported: U8/S32/F32 + * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p + * input. + * @param[in] axis Axis along which to reduce. It must be sorted and no duplicates. + * @param[in] op Reduce operation to perform. + * @return N/A + */ + void configure(ICLTensor *input, ICLTensor *output, const std::set<uint32_t> &axis, + ReduceOperation op); + + /** + * @brief Static function to check if given info will lead to a valid configuration of @ref + * CLReduceOperation. + * @param[in] input Source tensor info. Data types supported: U8/S32/F32 + * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p + * input. + * @param[in] axis Axis along which to reduce. It must be sorted and no duplicates. + * @param[in] op Reduce operation to perform. + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, + const std::set<uint32_t> &axis, const ReduceOperation &op); + + /** + * @brief Run the OpenCL kernel for this operation + * @return N/A + */ + void run() override; + +private: + ICLTensor *_input; + ICLTensor *_output; + std::set<uint32_t> _axis; + + std::unique_ptr<CLTensor[]> _interm_tensors{nullptr}; + std::unique_ptr<CLReduceOperationKernel[]> _reduce_kernels{nullptr}; +}; +} +#endif /*__ARM_COMPUTE_CLREDUCEOPERATION_H__ */ diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToBatchND.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToBatchND.h new file mode 100644 index 000000000..7e2df8986 --- /dev/null +++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToBatchND.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __ARM_COMPUTE_CLSPACE_TO_BATCH_ND_H__ +#define __ARM_COMPUTE_CLSPACE_TO_BATCH_ND_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLSpaceToBatchNDKernel + * + * @note The tensor data type for the inputs must be U8/QASYMM8/S16/F16/S32/F32. + * @note The function divides "spatial" dimensions of the input into a grid of blocks of shape + * block_shape, and interleaves these blocks with the "batch" dimension such that in the output. + */ +class CLSpaceToBatchND : public ICLSimpleFunction +{ +public: + /** Initialise the kernel's input and output. + * + * @note The data layout of input and output must be the same. + * @note The number of dimensions of input and output must be 4, and `spatial` dimensions + * are height and width. + * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/F16/S32/F32. + * Data layout supported: NCHW/NHWC + * @param[in] block_size Tensor of integer values specifying block sizes for spatial + * dimension. + * Data types supported: S32 + * @param[in] padding_size Tensor of integer values specifying padding sizes for spatial + * dimension. + * Data types supported: S32 + * @param[out] output Output tensor. Data types supported: same as @p input. + * Data layout supported: NCHW/NHWC + */ + void configure(const ICLTensor *input, const ICLTensor *block_size, const ICLTensor *padding_size, + ICLTensor *output); +}; + +} // namespace arm_compute +#endif /* __ARM_COMPUTE_CLSPACE_TO_BATCH_ND_H__ */ diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToDepth.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToDepth.h new file mode 100644 index 000000000..17f762092 --- /dev/null +++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToDepth.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __ARM_COMPUTE_CLSPACETODEPTH_H__ +#define __ARM_COMPUTE_CLSPACETODEPTH_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLSpaceToDepthKernel + * + * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/F16/F32. + * @note The function converts the input tensor to the tensor of the output tensor's type. + */ +class CLSpaceToDepth : public ICLSimpleFunction +{ +public: + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. + * @param[out] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. + * @param[block_size] block size integer only + */ + void configure(ICLTensor *input, ICLTensor *output, const int32_t block_size); +}; + +} // namespace arm_compute +#endif /* __ARM_COMPUTE_CLSPACETODEPTH_H__ */ diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLStridedSliceEx.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLStridedSliceEx.h new file mode 100644 index 000000000..6b26a85c8 --- /dev/null +++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLStridedSliceEx.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2017 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file CLStridedSlice.h + * @ingroup COM_AI_RUNTIME + * @brief This file contains arm_compute::CLStridedSlice and arm_compute::CLStridedSliceCPU class + */ + +#ifndef __ARM_COMPUTE_CLSTRIDEDSLICEEX_H__ +#define __ARM_COMPUTE_CLSTRIDEDSLICEEX_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** + * @brief Class to run @ref CLStridedSliceKernel + */ +class CLStridedSliceEx : public ICLSimpleFunction +{ +public: + /** + * @brief Initialise the kernel's inputs and outputs + * @param[in] input Tensor input. Data type supported: + * U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 + * @param[out] output Output tensor. Data type supported: Same as @p input + * @param[in] beginData 'begin' vector of strided slice operation + * @param[in] endData 'end' vector of strided slice operation + * @param[in] stridesData 'strides' vector of strided slice operation + * @param[in] beginMask If the ith bit is set, begin[i] is ignored + * @param[in] endMask If the ith bit is set, end[i] is ignored + * @param[in] shrinkAxisMask If the ith bit is set, the ith specification shrinks the + * dimensionality by 1, taking on the value at index begin[i] + * @return N/A + */ + void configure(const ICLTensor *input, ICLTensor *output, ICLTensor *beginData, + ICLTensor *endData, ICLTensor *stridesData, int32_t beginMask, int32_t endMask, + int32_t shrinkAxisMask); +}; +} +#endif /*__ARM_COMPUTE_CLSTRIDEDSLICEEX_H__ */ diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h new file mode 100644 index 000000000..5327e016f --- /dev/null +++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file CLTopKV2.h + * @ingroup COM_AI_RUNTIME + * @brief This file contains arm_compute::CLTopKV2 class + */ +#ifndef __ARM_COMPUTE_CLTOPK_V2_H__ +#define __ARM_COMPUTE_CLTOPK_V2_H__ + +#include "arm_compute/core/CL/kernels/CLTopKV2Kernel.h" + +#include "arm_compute/runtime/IFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** + * @brief Class to execute TopKV2 operation. + */ +class CLTopKV2 : public IFunction +{ +public: + /** + * @brief Construct a new CLTopKV2 object + */ + CLTopKV2(); + + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers) + */ + CLTopKV2(const CLTopKV2 &) = delete; + + /** + * @brief Prevent instances of this class from being copied (As this class contains pointers) + */ + CLTopKV2 &operator=(const CLTopKV2 &) = delete; + + /** + * @brief Construct a new CLTopKV2 object by using copy constructor + * @param[in] CLTopKV2 object to move + */ + CLTopKV2(CLTopKV2 &&) = default; + + /** + * @brief Assign a CLTopKV2 object. + * @param[in] CLTopKV2 object to assign. This object will be moved. + */ + CLTopKV2 &operator=(CLTopKV2 &&) = default; + + /** + * @brief Initialise the kernel's inputs and outputs. + * @param[in] input Input image. Data types supported: U8/S16/F32. + * @param[in] k The value of `k`. + * @param[out] values Top k values. Data types supported: S32 if input type is U8/S16, F32 if + * input type is F32. + * @param[out] indices Indices related to top k values. Data types supported: S32 if input type + * is U8/S16, F32 if input type is F32. + * @return N/A + */ + void configure(ICLTensor *input, int k, ICLTensor *values, ICLTensor *indices, + int total_bits = 32, int bits = 4); + + /** + * @brief Run the kernels contained in the function + * Depending on the value of the following environment variables it works differently: + * - If the value of environment variable "ACL_TOPKV2" == "GPU_SINGLE", + * quick sort on GPU is used. + * - If the value of environment variable "ACL_TOPKV2" == ""GPU"", + * radix sort on GPU is used. + * - For other value, TopKV2 runs on CPU + * @return N/A + */ + void run() override; + +private: + void run_on_cpu(); + void run_on_gpu(); + void run_on_gpu_single_quicksort(); + + uint32_t _k; + uint32_t _total_bits; + uint32_t _bits; + uint32_t _radix; + uint32_t _hist_buf_size; + uint32_t _glob_sum_buf_size; + uint32_t _n; + + ICLTensor *_input; + ICLTensor *_values; + ICLTensor *_indices; + + cl::Buffer _qs_idx_buf; + cl::Buffer _qs_temp_buf; + cl::Buffer _hist_buf; + cl::Buffer _glob_sum_buf; + cl::Buffer _temp_buf; + cl::Buffer _first_negative_idx_buf; + cl::Buffer _in_key_buf; + cl::Buffer _out_key_buf; + cl::Buffer _in_ind_buf; + cl::Buffer _out_ind_buf; + + cl::Buffer *_p_in_key_buf; + cl::Buffer *_p_out_key_buf; + cl::Buffer *_p_in_ind_buf; + cl::Buffer *_p_out_ind_buf; + + CLTopKV2Single _qs_kernel; + CLTopKV2Init _init_kernel; + CLRadixSortHistogram _hist_kernel; + CLRadixSortScanHistogram _scan_hist_kernel; + CLRadixSortGlobalScanHistogram _glob_scan_hist_kernel; + CLRadixSortPasteHistogram _paste_hist_kernel; + CLRadixSortReorder _reorder_kernel; + CLTopKV2FindFirstNegative _find_first_negative_kernel; + CLTopKV2ReorderNegatives _reorder_negatives_kernel; + CLTopKV2Store _store_kernel; +}; +} +#endif // __ARM_COMPUTE_CLTOPK_V2_H__ diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h new file mode 100644 index 000000000..340a7bfe9 --- /dev/null +++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2017-2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLTRANSPOSECONVLAYER_H__ +#define __ARM_COMPUTE_CLTRANSPOSECONVLAYER_H__ + +#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h" +#include "arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h" + +#include "arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h" + +#include "arm_compute/runtime/CL/CLMemoryGroup.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/IMemoryManager.h" + +#include <memory> + +namespace arm_compute +{ +class ICLTensor; +/** Function to run the transpose convolution layer. + * + * @note This layer was copied in order to fix a bug computing to wrong output dimensions. + * + * TransposeConv Layer is the backward pass of Convolution Layer. First we transform the input + * depending on the stride and pad info and then perform a 1x1 + * convolution pass. Input stride defines how many zeroes we should put between each element of the + * input, pad is the amount of padding and finally a is a user + * specified value where a < stride - 1, that increases the padding top and right of the input + * image. + * + * The relation between input to output is as follows: + * \f[ + * width\_output = (width\_input - 1) \cdot stride\_x - \cdot padding\_x + kernel\_x + * \f] + * \f[ + * height\_output = (height\_input - 1) \cdot stride\_y - \cdot padding\_y + kernel\_y + * \f] + * + * where: + * width_input is the size of the first input dimension. + * height_input is the size of the second input dimension. + * width_output is the size of the first output dimension. + * height_output is the size of the second output dimension. + * kernel_x and kernel_y are the convolution sizes in x and y. + * stride_x and stride_y is the input stride of the first and second dimension. + * + * The weights used by Deconvolution are supposed to be the same as the ones used for Convolution. + * Therefore, it will be necessary to use the weights in the + * reverse order to perform an actual convolution. This is achieved by using the @ref + * CPPFlipWeightsKernel. + * + * This function calls the following OpenCL kernels/functions: + * + * -# @ref CLTransposeConvLayerUpsample + * -# @ref CLConvolutionLayer + * + */ +class CLTransposeConvLayer : public IFunction +{ +public: + /** Constructor */ + CLTransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLTransposeConvLayer(const CLTransposeConvLayer &) = delete; + /** Default move constructor */ + CLTransposeConvLayer(CLTransposeConvLayer &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLTransposeConvLayer &operator=(const CLTransposeConvLayer &) = delete; + /** Default move assignment operator */ + CLTransposeConvLayer &operator=(CLTransposeConvLayer &&) = default; + /** Set the input, weights, biases and output tensors. + * + * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, + * and an optional 4th dimension for batch of inputs. + * Data types supported: QASYMM8/F16/F32. + * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. + * Data type supported: Same as @p input. + * @param[in] bias (Optional) The biases have one dimension. Data type supported: + * Same as @p input. + * @param[out] output Output tensor. The output has the same number of dimensions + * as the @p input. + * @param[in] info Contains padding and policies to be used in the + * transpose convolution, this is decribed in @ref PadStrideInfo. + * @param[in] invalid_right The number of zeros added to right edge of the output. + * @param[in] invalid_bottom The number of zeros added to top edge of the output. + * @param[in] weights_info (Optional) Weights information needed for @ref + * CLConvolutionLayer, specifies if the weights tensor has been + * reshaped with @ref CLWeightsReshapeKernel. + */ + void configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, + const PadStrideInfo &info, unsigned int invalid_right, unsigned int invalid_bottom, + const WeightsInfo &weights_info = WeightsInfo()); + /** Static function to check if given info will lead to a valid configuration of @ref + * CLTransposeConvLayer + * + * @param[in] input Input tensor info. 3 lower dimensions represent a single input, + * and an optional 4th dimension for batch of inputs. + * Data types supported: QASYMM8/F16/F32. + * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. + * Data type supported: Same as @p input. + * @param[in] bias (Optional) The biases have one dimension. Data type supported: + * Same as @p input. + * @param[in] output Output tensor info. The output has the same number of dimensions + * as the @p input. + * @param[in] info Contains padding and policies to be used in the + * transpose convolution, this is decribed in @ref PadStrideInfo. + * @param[in] innvalid_right The number of zeros added to right edge of the output. + * @param[in] invalid_bottom The number of zeros added to top edge of the output. + * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer, + * specifies if the weights tensor has been reshaped with @ref + * CLWeightsReshapeKernel. + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *weights, + const ITensorInfo *bias, ITensorInfo *output, const PadStrideInfo &info, + unsigned int innvalid_right, unsigned int invalid_bottom, + const WeightsInfo &weights_info = WeightsInfo()); + + // Inherited methods overridden: + void run() override; + void prepare() override; + +private: + CLMemoryGroup _memory_group; + CLTransposeConvLayerUpsample _scale_f; + CLConvolutionLayer _conv_f; + CPPFlipWeightsKernel _flip_weights; + CLTensor _scaled_output; + ICLTensor *_original_weights; + CLTensor _weights_flipped; + bool _is_prepared; +}; +} +#endif /* __ARM_COMPUTE_CLTRANSPOSECONVLAYER_H__ */ diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h new file mode 100644 index 000000000..4ae0e1830 --- /dev/null +++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2017-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLE_H__ +#define __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLE_H__ + +#include "arm_compute/runtime/IFunction.h" + +#include "arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLMemoryGroup.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/IMemoryManager.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLTransposeConvLayerUpsampleKernel */ +class CLTransposeConvLayerUpsample : public IFunction +{ +public: + /** Default constructor */ + CLTransposeConvLayerUpsample(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLTransposeConvLayerUpsample(const CLTransposeConvLayerUpsample &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLTransposeConvLayerUpsample &operator=(const CLTransposeConvLayerUpsample &) = delete; + /** Allow instances of this class to be moved */ + CLTransposeConvLayerUpsample(CLTransposeConvLayerUpsample &&) = default; + /** Allow instances of this class to be moved */ + CLTransposeConvLayerUpsample &operator=(CLTransposeConvLayerUpsample &&) = default; + /** Default destructor */ + virtual ~CLTransposeConvLayerUpsample() = default; + + /** Initialize the function's source, destination, interpolation type and border_mode. + * + * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. + * @param[out] output Destination tensor. Data type supported: same as @p input. + * @param[in] inner_border The number of zeros added to right and top edges of the input. + * @param[in] info Contains padding and policies to be used in the deconvolution. + */ + void configure(ICLTensor *input, ICLTensor *output, const BorderSize &inner_border, + const PadStrideInfo &info); + /** Static function to check if given info will lead to a valid configuration of @ref + * CLTransposeConvLayerUpsample + * + * @param[in] input Source tensor info. Data type supported: QASYMM8/F16/F32. + * @param[in] output Destination tensor info. Data type supported: same as @p input. + * @param[in] inner_border The number of zeros added to right and top edges of the input. + * @param[in] info Contains padding and policies to be used in the deconvolution. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, + const BorderSize &inner_border, const PadStrideInfo &info); + + // Inherited methods overridden: + void run() override; + +private: + CLTransposeConvLayerUpsampleKernel _upsample; + ICLTensor *_output; +}; +} +#endif /* __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLE_H__ */ |