summaryrefslogtreecommitdiff
path: root/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions
diff options
context:
space:
mode:
Diffstat (limited to 'runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions')
-rw-r--r--runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgOperation.h106
-rw-r--r--runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLBatchToSpaceND.h45
-rw-r--r--runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h41
-rw-r--r--runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLCast.h50
-rw-r--r--runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLDepthToSpace.h44
-rw-r--r--runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h54
-rw-r--r--runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h84
-rw-r--r--runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLGatherEx.h62
-rw-r--r--runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h59
-rw-r--r--runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLLogicalNot.h38
-rw-r--r--runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLNeg.h39
-rw-r--r--runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPReLU.h40
-rw-r--r--runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPixelWiseDivision.h80
-rw-r--r--runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLRNNLayerEx.h104
-rw-r--r--runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h87
-rw-r--r--runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToBatchND.h56
-rw-r--r--runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToDepth.h44
-rw-r--r--runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLStridedSliceEx.h58
-rw-r--r--runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h137
-rw-r--r--runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h157
-rw-r--r--runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h79
21 files changed, 1464 insertions, 0 deletions
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgOperation.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgOperation.h
new file mode 100644
index 000000000..d9d0d4d35
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgOperation.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file CLArgOperation.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file contains arm_compute::CLArgOperation class
+ */
+
+#ifndef __ARM_COMPUTE_CLARGOPERATION_H__
+#define __ARM_COMPUTE_CLARGOPERATION_H__
+
+#include "arm_compute/core/CL/kernels/CLArgOperationKernel.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/core/TypesEx.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/**
+ * @brief Class to execute CLArgOperation operation
+ */
+class CLArgOperation : public IFunction
+{
+public:
+ /**
+ * @brief Construct a new CLArgOperation object
+ */
+ CLArgOperation();
+
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers)
+ */
+ CLArgOperation(const CLArgOperation &) = delete;
+
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers)
+ */
+ CLArgOperation &operator=(const CLArgOperation &) = delete;
+
+ /**
+ * @brief Construct a new CLArgOperation object by using copy constructor
+ * @param[in] CLArgOperation object to move
+ */
+ CLArgOperation(CLArgOperation &&) = default;
+
+ /**
+ * @brief Assign a CLArgOperation object.
+ * @param[in] CLArgOperation object to assign. This object will be moved.
+ */
+ CLArgOperation &operator=(CLArgOperation &&) = default;
+
+ /**
+ * @brief Initialise the kernel's inputs and outputs.
+ * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S32/F32.
+ * @param[out] output The result of arg operation. Data types supported: S32.
+ * @param[in] axis Axis along which to reduce. It must be sorted and no duplicates.
+ * @param[in] op Arg operation to perform.
+ * @return N/A
+ */
+ void configure(ICLTensor *input, ICLTensor *output, std::vector<uint32_t> axis, ArgOperation op);
+
+ /**
+ * @brief Static function to check if given info will lead to a valid configuration
+ * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S32/F32.
+ * @param[in] axis Axis along which to reduce. It must be sorted and no duplicates.
+ * @param[out] output The result of arg operation. Data types supported: S32.
+ * @param[in] op Arg operation to perform.
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const std::vector<uint32_t> &axis,
+ const ITensorInfo *output, ArgOperation op);
+ /**
+ * @brief Run the OpenCL kernel for this operation
+ * @return N/A
+ */
+ void run() override;
+
+private:
+ ICLTensor *_input{nullptr};
+ ICLTensor *_output{nullptr};
+ std::vector<uint32_t> _axis{};
+ ArgOperation _arg_op{ArgOperation::MAX};
+
+ std::unique_ptr<CLTensor[]> _interm_tensors{nullptr};
+ std::unique_ptr<CLArgOperationKernel[]> _argop_kernels{nullptr};
+ size_t _num_of_kernels{0};
+};
+}
+#endif /*__ARM_COMPUTE_CLARGOPERATION_H__ */
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLBatchToSpaceND.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLBatchToSpaceND.h
new file mode 100644
index 000000000..d16a0762d
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLBatchToSpaceND.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLBATCH_TO_SPACE_ND_H__
+#define __ARM_COMPUTE_CLBATCH_TO_SPACE_ND_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLBatchToSpaceNDKernel
+ *
+ * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/F16/F32.
+ * @note The function converts the input tensor to the tensor of the output tensor's type.
+ */
+class CLBatchToSpaceND : public ICLSimpleFunction
+{
+public:
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
+ * @param[out] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
+ * @param[in] block_size A pointer to an array of integer values specifying block sizes
+ * for spatial dimension.
+ */
+ void configure(ICLTensor *input, ICLTensor *output, const int32_t *block_size);
+};
+
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_CLBATCH_TO_SPACE_ND_H__ */
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h
new file mode 100644
index 000000000..061e34f26
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLBINARYLOGICALOP_H__
+#define __ARM_COMPUTE_CLBINARYLOGICALOP_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+#include "arm_compute/core/TypesEx.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+class CLBinaryLogicalOp : public ICLSimpleFunction
+{
+public:
+ /** Initialise the function's source and destination.
+ *
+ * @param[in] input1 Source tensor1. Data types supported: U8, QASYMM8.
+ * @param[in] input2 Source tensor2. Data types supported: U8 QASYMM8.
+ * @param[out] output Output tensor. Data types supported: U8, QASYMM8.
+ */
+ void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output,
+ BinaryLogicalOperation op);
+};
+
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLBINARYLOGICALOP_H__ */
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLCast.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLCast.h
new file mode 100644
index 000000000..56b8408e2
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLCast.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file CLCast.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file contains arm_compute::CLCast class
+ */
+
+#ifndef __ARM_COMPUTE_CLCAST_H__
+#define __ARM_COMPUTE_CLCAST_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/**
+ * @brief Class to run @ref CLCastKernel.
+ * This converts the input tensor to the tensor of the output tensor's type.
+ */
+class CLCast : public ICLSimpleFunction
+{
+public:
+ /**
+ * @brief Initialise the kernel's input and output
+ * @param[in, out] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
+ * The input tensor is [in, out] because its TensorInfo might be
+ * modified inside the kernel.
+ * @param[out] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
+ */
+ void configure(ICLTensor *input, ICLTensor *output);
+};
+}
+#endif /* __ARM_COMPUTE_CLCAST_H__ */
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLDepthToSpace.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLDepthToSpace.h
new file mode 100644
index 000000000..d78a6ada4
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLDepthToSpace.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLDEPTHTOSPACE_H__
+#define __ARM_COMPUTE_CLDEPTHTOSPACE_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLDepthToSpaceKernel
+ *
+ * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/F16/F32.
+ * @note The function converts the input tensor to the tensor of the output tensor's type.
+ */
+class CLDepthToSpace : public ICLSimpleFunction
+{
+public:
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
+ * @param[out] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
+ * @param[block_size] block size integer only
+ */
+ void configure(ICLTensor *input, ICLTensor *output, const int32_t block_size);
+};
+} // namesace arm_compute
+
+#endif /* __ARM_COMPUTE_CLDEPTHTOSPACE_H__ */
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h
new file mode 100644
index 000000000..257772a89
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file CLEmbeddingLookup.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file contains arm_compute::CLEmbeddingLookup class
+ */
+
+#ifndef __ARM_COMPUTE_CLEMBEDDINGLOOKUP_H__
+#define __ARM_COMPUTE_CLEMBEDDINGLOOKUP_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+#include <vector>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/**
+ * @brief Class to perform EmbeddingLookup operation
+ */
+class CLEmbeddingLookup : public ICLSimpleFunction
+{
+public:
+ /**
+ * @brief Set the input and output tensors.
+ * @param[in] input Source tensor.
+ * Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+ * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p
+ * input.
+ * @param[in] lookups Lookups 1D tensor that values are indices into the first dimension of
+ * input.
+ * @return N/A
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *lookups);
+};
+}
+#endif /*__ARM_COMPUTE_CLEMBEDDINGLOOKUP_H__ */
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h
new file mode 100644
index 000000000..0867cf6bb
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file        CLFullyConnectedReshapingLayer.h
+ * @brief       This file contains CLFullyConnectedReshapingLayer class
+ * @ingroup     COM_AI_RUNTIME
+ */
+
+#ifndef __ARM_COMPUTE_CL_FULLY_CONNECTED_RESHAPING_LAYER_H__
+#define __ARM_COMPUTE_CL_FULLY_CONNECTED_RESHAPING_LAYER_H__
+
+#include <arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h>
+#include <arm_compute/runtime/misc/functions/GenericReshapeLayer.h>
+#include <arm_compute/runtime/IMemoryManager.h>
+
+namespace arm_compute
+{
+/**
+ * @brief Class to run FullyConnected Layer after reshaping input tensor
+ */
+class CLFullyConnectedReshapingLayer : public arm_compute::IFunction
+{
+public:
+ CLFullyConnectedReshapingLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr)
+ : _input(nullptr), _weights(nullptr), _biases(nullptr), _output(nullptr), _cl_buffer{},
+ _cl_fc{memory_manager}, _cl_reshape{}, _needs_reshape(false)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Configure the layer
+ * @param[in] input The source tensor
+ * @param[in] weights The tensor that is filled with weight values
+ * @param[in] biases The tensor that is filled with biase values
+ * @param[in] output The destination tensor
+ * @param[in] needs_reshape Whether it needs to be reshaped or not
+ * @param[in] reshape The tensor shape to be reshaped. Only valid when needs_reshape is true.
+ * @return N/A
+ */
+ void configure(const arm_compute::ICLTensor *input, const arm_compute::ICLTensor *weights,
+ const arm_compute::ICLTensor *biases, arm_compute::ICLTensor *output,
+ bool needs_reshape, const arm_compute::TensorShape &reshape);
+
+public:
+ /**
+ * @brief Run the operation. Must be called after configure().
+ * @return N/A
+ */
+ void run(void) override;
+
+private:
+ const arm_compute::ICLTensor *_input;
+ const arm_compute::ICLTensor *_weights;
+ const arm_compute::ICLTensor *_biases;
+ arm_compute::ICLTensor *_output;
+
+ // buffer for reshaping input tensor
+ arm_compute::CLTensor _cl_buffer;
+
+private:
+ arm_compute::CLFullyConnectedLayer _cl_fc;
+ // TODO Change to CLReshapeLayer
+ arm_compute::misc::GenericReshapeLayer _cl_reshape;
+ bool _needs_reshape;
+};
+} // namespace arm_compute
+
+#endif // __ARM_COMPUTE_CL_FULLY_CONNECTED_RESHAPING_LAYER_H__
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLGatherEx.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLGatherEx.h
new file mode 100644
index 000000000..04d227aa7
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLGatherEx.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file CLGatherEx.h
+ * @brief This file contains CLGatherEx class
+ * @ingroup COM_AI_RUNTIME
+ */
+
+#ifndef __ARM_COMPUTE_CLGATHEREX_H__
+#define __ARM_COMPUTE_CLGATHEREX_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/**
+ * @brief Class to to run @ref CLGatherKernel.
+ */
+class CLGatherEx : public ICLSimpleFunction
+{
+public:
+ /**
+ * @brief Initialise the kernel's inputs, output and convertion policy.
+ * @param[in] input An input tensor. Data types supported: U8/QASYMM8/S32/F32.
+ * @param[in] indices An indexes tensor. Data types supported: S32.
+ * @param[out] output The output tensor, Data types supported: same as @p input.
+ * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Defaults to 0
+ * @return N/A
+ */
+ void configure(const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis = 0);
+
+ /**
+ * @brief Static function to check if given info will lead to a valid configuration
+ * of @ref CLGatherEx
+ * @param[in] input An input tensor. Data types supported: U8/QASYMM8/S32/F32.
+ * @param[in] indices An indexes tensor. Data types supported: S32.
+ * @param[out] output The output tensor, Data types supported: same as @p input.
+ * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Defaults to 0
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *indices,
+ const ITensorInfo *output, int axis = 0);
+};
+}
+#endif /*__ARM_COMPUTE_CLGATHEREX_H__ */
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h
new file mode 100644
index 000000000..65aa6cbd5
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file CLHashtableLookup.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file contains arm_compute::CLHashtableLookup class
+ */
+
+#ifndef __ARM_COMPUTE_CLHASHTABLELOOKUP_H__
+#define __ARM_COMPUTE_CLHASHTABLELOOKUP_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+#include <vector>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/**
+ * @brief Class to perform HashtableLookup operation
+ */
+class CLHashtableLookup : public ICLSimpleFunction
+{
+public:
+ /**
+ * @brief Set the input and output tensors.
+ * @param[in] lookups Lookups 1D tensor that values are indices into the first dimension of
+ * input.
+ * @param[in] keys Keys 1D tensor. keys and input pair represent a map.
+ * Data types supported: S32
+ * @param[in] input Source tensor.
+ * Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+ * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p
+ * input.
+ * @param[out] hits Hits 1D tensor. A boolean tensor that indicates whether the lookup hits
+ * (True) or not (False). Data types supported: U8/QASYMM8
+ * @return N/A
+ */
+ void configure(const ICLTensor *lookups, const ICLTensor *keys, const ICLTensor *intput,
+ ICLTensor *output, ICLTensor *hits);
+};
+}
+#endif /*__ARM_COMPUTE_CLHASHTABLELOOKUP_H__ */
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLLogicalNot.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLLogicalNot.h
new file mode 100644
index 000000000..4bf203c5a
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLLogicalNot.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLLOGICALNOT_H__
+#define __ARM_COMPUTE_CLLOGICALNOT_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+class CLLogicalNot : public ICLSimpleFunction
+{
+public:
+ /** Initialise the function's source and destination.
+ *
+ * @param[in] input Source tensor. Data types supported: QASYMM8.
+ * @param[out] output Output tensor. Data types supported: QASYMM8.
+ */
+ void configure(ICLTensor *input, ICLTensor *output);
+};
+
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLLOGICALNOT_H__ */
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLNeg.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLNeg.h
new file mode 100644
index 000000000..198a0fd4e
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLNeg.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLNEG_H__
+#define __ARM_COMPUTE_CLNEG_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+class CLNeg : public ICLSimpleFunction
+{
+public:
+ /** Initialise the function's source and destination.
+ *
+ * @param[in] input Source tensor. Data types supported:
+ * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+ * @param[out] output Output tensor. Data types supported: Same as @p input.
+ *
+ */
+ void configure(ICLTensor *input, ICLTensor *output);
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLNEG_H__ */
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPReLU.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPReLU.h
new file mode 100644
index 000000000..622a61b5e
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPReLU.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLPRELU_H__
+#define __ARM_COMPUTE_CLPRELU_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+class CLPReLU : public ICLSimpleFunction
+{
+public:
+ /** Initialise the function's source and destination.
+ *
+ * @param[in] input. Data types supported:
+ * QASYMM8/F16/F32.
+ * @param[in] alpha. Data types supported:
+ * QASYMM8/F16/F32.
+ * @param[out] output Output tensor. Data types supported: Same as @p input.
+ */
+ void configure(ICLTensor *input, ICLTensor *alpha, ICLTensor *output);
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLPRELU_H__*/
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPixelWiseDivision.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPixelWiseDivision.h
new file mode 100644
index 000000000..b142d3a2e
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPixelWiseDivision.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file CLPixelWiseDivision.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file contains arm_compute::CLPixelWiseDivision class
+ */
+#ifndef __ARM_COMPUTE_CLPIXELWISEDIVISION_H__
+#define __ARM_COMPUTE_CLPIXELWISEDIVISION_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/**
+ * @brief Class to run @ref CLPixelWiseDivisionKernel.
+ */
+class CLPixelWiseDivision : public ICLSimpleFunction
+{
+public:
+ /**
+ * @brief Initialise the kernel's inputs, output and convertion policy.
+ * @param[in, out] input1 An input tensor. Data types supported: U8/S16/F16/F32
+ * The input tensor is [in, out] because its TensorInfo might be
+ * modified inside the kernel in case of broadcasting of dimension 0.
+ * @param[in, out] input2 An input tensor. Data types supported: same as @p input1.
+ * The input tensor is [in, out] because its TensorInfo might be
+ * modified inside the kernel in case of broadcasting of dimension 0.
+ * @param[out] output The output tensor, Data types supported: same as @p input1.
+ * Note: U8 requires both inputs to be U8.
+ * @param[in] scale Scale to apply after multiplication.
+ * Scale must be positive and its value must be either 1/255 or
+ * 1/2^n where n is between 0 and 15.
+ * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
+ * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest
+ * even.
+ * @return N/A
+ */
+ void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, float scale = 1.f,
+ ConvertPolicy overflow_policy = ConvertPolicy::WRAP,
+ RoundingPolicy rounding_policy = RoundingPolicy::TO_ZERO);
+
+ /**
+ * @brief Static function to check if given info will lead to a valid configuration of @ref
+ * CLPixelWiseDivision
+ * @param[in] input1 An input tensor info. Data types supported: U8/S16/F16/F32
+ * @param[in] input2 An input tensor info. Data types supported: same as @p input1.
+ * @param[in] output The output tensor info, Data types supported: same as @p input1.
+ * Note: U8 requires both inputs to be U8.
+ * @param[in] scale Scale to apply after multiplication.
+ * Scale must be positive and its value must be either 1/255 or 1/2^n
+ * where n is between 0 and 15.
+ * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
+ * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even.
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input1, const ITensorInfo *input2,
+ const ITensorInfo *output, float scale = 1.f,
+ ConvertPolicy overflow_policy = ConvertPolicy::WRAP,
+ RoundingPolicy rounding_policy = RoundingPolicy::TO_ZERO);
+};
+}
+#endif /*__ARM_COMPUTE_CLPIXELWISEDIVISION_H__ */
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLRNNLayerEx.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLRNNLayerEx.h
new file mode 100644
index 000000000..7e88cb369
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLRNNLayerEx.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLRNN_LAYER_EX_H__
+#define __ARM_COMPUTE_CLRNN_LAYER_EX_H__
+
+#include "arm_compute/core/CL/kernels/CLActivationLayerKernel.h"
+#include "arm_compute/core/CL/kernels/CLCopyKernel.h"
+#include "arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h"
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+#include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h"
+#include "arm_compute/runtime/CL/functions/CLGEMM.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLRNNLayerEx */
+class CLRNNLayerEx : public IFunction
+{
+public:
+ /** Default constructor */
+ CLRNNLayerEx(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Initialize the function
+ *
+ * @param[in] input Input is a 2-D tensor of shape [input_size, batch_size]. Data
+ * types supported: F16/F32
+ * @param[in] weights Weights tensor of shape [input_size, num_units] that
+ * multiplies the input. Data types supported: Same as @p input
+ * @param[in] recurrent_weights Weights tensor of shape [num_units, num_units] that multiplies
+ * the current 'state'. Data types supported: Same as @p input
+ * @param[in] bias Bias vector of shape [num_units]. Data types supported: Same
+ * as @p input
+ * @param[out] output Output tensor of shape [num_units, batch_size]. Data types
+ * supported: Same as @p input
+ * @param[in,out] hidden_state Output tensor of shape [num_units, batch_size]. Data types
+ * supported: Same as @p input
+ * @param[in] info Activation layer parameter.
+ */
+ void configure(const ICLTensor *input, const ICLTensor *weights,
+ const ICLTensor *recurrent_weights, const ICLTensor *bias, ICLTensor *hidden_state,
+ ICLTensor *output, ActivationLayerInfo &info);
+ /** Initialize the function
+ *
+ * @param[in] input Input is a 2-D tensor of shape [input_size, batch_size]. Data
+ * types supported: F16/F32
+ * @param[in] weights Weights tensor of shape [input_size, num_units] that multiplies
+ * the input. Data types supported: Same as @p input
+ * @param[in] recurrent_weights Weights tensor of shape [num_units, num_units] that multiplies the
+ * current 'state'. Data types supported: Same as @p input
+ * @param[in] bias Bias vector of shape [num_units]. Data types supported: Same as @p
+ * input
+ * @param[in] output Output tensor of shape [num_units, batch_size]. Data types
+ * supported: Same as @p input
+ * @param[in] hidden_state Output tensor of shape [num_units, batch_size]. Data types
+ * supported: Same as @p input
+ * @param[in] info Activation layer parameter.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *weights,
+ const ITensorInfo *recurrent_weights, const ITensorInfo *bias,
+ const ITensorInfo *hidden_state, const ITensorInfo *output,
+ const ActivationLayerInfo &info);
+
+ // Inherited methods overridden:
+ void run() override;
+ void prepare() override;
+
+private:
+ CLMemoryGroup _memory_group;
+ CLGEMM _gemm_state_f;
+ CLSaturatedArithmeticOperationKernel _add_kernel;
+ CLActivationLayerKernel _activation_kernel;
+ CLFullyConnectedLayer _fully_connected_kernel;
+ CLCopyKernel _copy_kernel;
+ CLTensor _fully_connected_out;
+ CLTensor _gemm_output;
+ CLTensor _add_output;
+ bool _is_prepared;
+};
+}
+#endif /* __ARM_COMPUTE_CLRNN_LAYER_EX_H__ */
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h
new file mode 100644
index 000000000..e1a6f6ab4
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file CLReduceOperation.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file contains arm_compute::CLReduceOperation class
+ */
+
+#ifndef __ARM_COMPUTE_CLREDUCEOPERATION_H__
+#define __ARM_COMPUTE_CLREDUCEOPERATION_H__
+
+#include "arm_compute/core/CL/kernels/CLReduceOperationKernel.h"
+#include "arm_compute/core/TypesEx.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/IFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/**
+ * @brief Class to perform ReduceOperation
+ */
+class CLReduceOperation : public IFunction
+{
+public:
+ /**
+ * @brief Construct a new ReduceOperation object
+ */
+ CLReduceOperation();
+
+ /**
+ * @brief Set the input and output tensors.
+ * @param[in] input Source tensor. Data types supported: U8/S32/F32
+ * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p
+ * input.
+ * @param[in] axis Axis along which to reduce. It must be sorted and no duplicates.
+ * @param[in] op Reduce operation to perform.
+ * @return N/A
+ */
+ void configure(ICLTensor *input, ICLTensor *output, const std::set<uint32_t> &axis,
+ ReduceOperation op);
+
+ /**
+ * @brief Static function to check if given info will lead to a valid configuration of @ref
+ * CLReduceOperation.
+ * @param[in] input Source tensor info. Data types supported: U8/S32/F32
+ * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p
+ * input.
+ * @param[in] axis Axis along which to reduce. It must be sorted and no duplicates.
+ * @param[in] op Reduce operation to perform.
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output,
+ const std::set<uint32_t> &axis, const ReduceOperation &op);
+
+ /**
+ * @brief Run the OpenCL kernel for this operation
+ * @return N/A
+ */
+ void run() override;
+
+private:
+ ICLTensor *_input;
+ ICLTensor *_output;
+ std::set<uint32_t> _axis;
+
+ std::unique_ptr<CLTensor[]> _interm_tensors{nullptr};
+ std::unique_ptr<CLReduceOperationKernel[]> _reduce_kernels{nullptr};
+};
+}
+#endif /*__ARM_COMPUTE_CLREDUCEOPERATION_H__ */
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToBatchND.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToBatchND.h
new file mode 100644
index 000000000..7e2df8986
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToBatchND.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLSPACE_TO_BATCH_ND_H__
+#define __ARM_COMPUTE_CLSPACE_TO_BATCH_ND_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLSpaceToBatchNDKernel
+ *
+ * @note The tensor data type for the inputs must be U8/QASYMM8/S16/F16/S32/F32.
+ * @note The function divides "spatial" dimensions of the input into a grid of blocks of shape
+ * block_shape, and interleaves these blocks with the "batch" dimension such that in the output.
+ */
+class CLSpaceToBatchND : public ICLSimpleFunction
+{
+public:
+ /** Initialise the kernel's input and output.
+ *
+ * @note The data layout of input and output must be the same.
+ * @note The number of dimensions of input and output must be 4, and `spatial` dimensions
+ * are height and width.
+ * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/F16/S32/F32.
+ * Data layout supported: NCHW/NHWC
+ * @param[in] block_size Tensor of integer values specifying block sizes for spatial
+ * dimension.
+ * Data types supported: S32
+ * @param[in] padding_size Tensor of integer values specifying padding sizes for spatial
+ * dimension.
+ * Data types supported: S32
+ * @param[out] output Output tensor. Data types supported: same as @p input.
+ * Data layout supported: NCHW/NHWC
+ */
+ void configure(const ICLTensor *input, const ICLTensor *block_size, const ICLTensor *padding_size,
+ ICLTensor *output);
+};
+
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_CLSPACE_TO_BATCH_ND_H__ */
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToDepth.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToDepth.h
new file mode 100644
index 000000000..17f762092
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToDepth.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLSPACETODEPTH_H__
+#define __ARM_COMPUTE_CLSPACETODEPTH_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLSpaceToDepthKernel
+ *
+ * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/F16/F32.
+ * @note The function converts the input tensor to the tensor of the output tensor's type.
+ */
+class CLSpaceToDepth : public ICLSimpleFunction
+{
+public:
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
+ * @param[out] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
+ * @param[block_size] block size integer only
+ */
+ void configure(ICLTensor *input, ICLTensor *output, const int32_t block_size);
+};
+
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_CLSPACETODEPTH_H__ */
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLStridedSliceEx.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLStridedSliceEx.h
new file mode 100644
index 000000000..6b26a85c8
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLStridedSliceEx.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file CLStridedSlice.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file contains arm_compute::CLStridedSlice and arm_compute::CLStridedSliceCPU class
+ */
+
+#ifndef __ARM_COMPUTE_CLSTRIDEDSLICEEX_H__
+#define __ARM_COMPUTE_CLSTRIDEDSLICEEX_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/**
+ * @brief Class to run @ref CLStridedSliceKernel
+ */
+class CLStridedSliceEx : public ICLSimpleFunction
+{
+public:
+ /**
+ * @brief Initialise the kernel's inputs and outputs
+ * @param[in] input Tensor input. Data type supported:
+ * U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+ * @param[out] output Output tensor. Data type supported: Same as @p input
+ * @param[in] beginData 'begin' vector of strided slice operation
+ * @param[in] endData 'end' vector of strided slice operation
+ * @param[in] stridesData 'strides' vector of strided slice operation
+ * @param[in] beginMask If the ith bit is set, begin[i] is ignored
+ * @param[in] endMask If the ith bit is set, end[i] is ignored
+ * @param[in] shrinkAxisMask If the ith bit is set, the ith specification shrinks the
+ * dimensionality by 1, taking on the value at index begin[i]
+ * @return N/A
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, ICLTensor *beginData,
+ ICLTensor *endData, ICLTensor *stridesData, int32_t beginMask, int32_t endMask,
+ int32_t shrinkAxisMask);
+};
+}
+#endif /*__ARM_COMPUTE_CLSTRIDEDSLICEEX_H__ */
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h
new file mode 100644
index 000000000..5327e016f
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file CLTopKV2.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file contains arm_compute::CLTopKV2 class
+ */
+#ifndef __ARM_COMPUTE_CLTOPK_V2_H__
+#define __ARM_COMPUTE_CLTOPK_V2_H__
+
+#include "arm_compute/core/CL/kernels/CLTopKV2Kernel.h"
+
+#include "arm_compute/runtime/IFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/**
+ * @brief Class to execute TopKV2 operation.
+ */
+class CLTopKV2 : public IFunction
+{
+public:
+ /**
+ * @brief Construct a new CLTopKV2 object
+ */
+ CLTopKV2();
+
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers)
+ */
+ CLTopKV2(const CLTopKV2 &) = delete;
+
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers)
+ */
+ CLTopKV2 &operator=(const CLTopKV2 &) = delete;
+
+ /**
+ * @brief Construct a new CLTopKV2 object by using copy constructor
+ * @param[in] CLTopKV2 object to move
+ */
+ CLTopKV2(CLTopKV2 &&) = default;
+
+ /**
+ * @brief Assign a CLTopKV2 object.
+ * @param[in] CLTopKV2 object to assign. This object will be moved.
+ */
+ CLTopKV2 &operator=(CLTopKV2 &&) = default;
+
+ /**
+ * @brief Initialise the kernel's inputs and outputs.
+ * @param[in] input Input image. Data types supported: U8/S16/F32.
+ * @param[in] k The value of `k`.
+ * @param[out] values Top k values. Data types supported: S32 if input type is U8/S16, F32 if
+ * input type is F32.
+ * @param[out] indices Indices related to top k values. Data types supported: S32 if input type
+ * is U8/S16, F32 if input type is F32.
+ * @return N/A
+ */
+ void configure(ICLTensor *input, int k, ICLTensor *values, ICLTensor *indices,
+ int total_bits = 32, int bits = 4);
+
+ /**
+ * @brief Run the kernels contained in the function
+ * Depending on the value of the following environment variables it works differently:
+ * - If the value of environment variable "ACL_TOPKV2" == "GPU_SINGLE",
+ * quick sort on GPU is used.
+ * - If the value of environment variable "ACL_TOPKV2" == ""GPU"",
+ * radix sort on GPU is used.
+ * - For other value, TopKV2 runs on CPU
+ * @return N/A
+ */
+ void run() override;
+
+private:
+ void run_on_cpu();
+ void run_on_gpu();
+ void run_on_gpu_single_quicksort();
+
+ uint32_t _k;
+ uint32_t _total_bits;
+ uint32_t _bits;
+ uint32_t _radix;
+ uint32_t _hist_buf_size;
+ uint32_t _glob_sum_buf_size;
+ uint32_t _n;
+
+ ICLTensor *_input;
+ ICLTensor *_values;
+ ICLTensor *_indices;
+
+ cl::Buffer _qs_idx_buf;
+ cl::Buffer _qs_temp_buf;
+ cl::Buffer _hist_buf;
+ cl::Buffer _glob_sum_buf;
+ cl::Buffer _temp_buf;
+ cl::Buffer _first_negative_idx_buf;
+ cl::Buffer _in_key_buf;
+ cl::Buffer _out_key_buf;
+ cl::Buffer _in_ind_buf;
+ cl::Buffer _out_ind_buf;
+
+ cl::Buffer *_p_in_key_buf;
+ cl::Buffer *_p_out_key_buf;
+ cl::Buffer *_p_in_ind_buf;
+ cl::Buffer *_p_out_ind_buf;
+
+ CLTopKV2Single _qs_kernel;
+ CLTopKV2Init _init_kernel;
+ CLRadixSortHistogram _hist_kernel;
+ CLRadixSortScanHistogram _scan_hist_kernel;
+ CLRadixSortGlobalScanHistogram _glob_scan_hist_kernel;
+ CLRadixSortPasteHistogram _paste_hist_kernel;
+ CLRadixSortReorder _reorder_kernel;
+ CLTopKV2FindFirstNegative _find_first_negative_kernel;
+ CLTopKV2ReorderNegatives _reorder_negatives_kernel;
+ CLTopKV2Store _store_kernel;
+};
+}
+#endif // __ARM_COMPUTE_CLTOPK_V2_H__
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h
new file mode 100644
index 000000000..340a7bfe9
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLTRANSPOSECONVLAYER_H__
+#define __ARM_COMPUTE_CLTRANSPOSECONVLAYER_H__
+
+#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h"
+#include "arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h"
+
+#include "arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h"
+
+#include "arm_compute/runtime/CL/CLMemoryGroup.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IMemoryManager.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+class ICLTensor;
+/** Function to run the transpose convolution layer.
+ *
+ * @note This layer was copied in order to fix a bug computing to wrong output dimensions.
+ *
+ * TransposeConv Layer is the backward pass of Convolution Layer. First we transform the input
+ * depending on the stride and pad info and then perform a 1x1
+ * convolution pass. Input stride defines how many zeroes we should put between each element of the
+ * input, pad is the amount of padding and finally a is a user
+ * specified value where a < stride - 1, that increases the padding top and right of the input
+ * image.
+ *
+ * The relation between input to output is as follows:
+ * \f[
+ * width\_output = (width\_input - 1) \cdot stride\_x - \cdot padding\_x + kernel\_x
+ * \f]
+ * \f[
+ * height\_output = (height\_input - 1) \cdot stride\_y - \cdot padding\_y + kernel\_y
+ * \f]
+ *
+ * where:
+ * width_input is the size of the first input dimension.
+ * height_input is the size of the second input dimension.
+ * width_output is the size of the first output dimension.
+ * height_output is the size of the second output dimension.
+ * kernel_x and kernel_y are the convolution sizes in x and y.
+ * stride_x and stride_y is the input stride of the first and second dimension.
+ *
+ * The weights used by Deconvolution are supposed to be the same as the ones used for Convolution.
+ * Therefore, it will be necessary to use the weights in the
+ * reverse order to perform an actual convolution. This is achieved by using the @ref
+ * CPPFlipWeightsKernel.
+ *
+ * This function calls the following OpenCL kernels/functions:
+ *
+ * -# @ref CLTransposeConvLayerUpsample
+ * -# @ref CLConvolutionLayer
+ *
+ */
+class CLTransposeConvLayer : public IFunction
+{
+public:
+ /** Constructor */
+ CLTransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLTransposeConvLayer(const CLTransposeConvLayer &) = delete;
+ /** Default move constructor */
+ CLTransposeConvLayer(CLTransposeConvLayer &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLTransposeConvLayer &operator=(const CLTransposeConvLayer &) = delete;
+ /** Default move assignment operator */
+ CLTransposeConvLayer &operator=(CLTransposeConvLayer &&) = default;
+ /** Set the input, weights, biases and output tensors.
+ *
+ * @param[in,out] input Input tensor. 3 lower dimensions represent a single input,
+ * and an optional 4th dimension for batch of inputs.
+ * Data types supported: QASYMM8/F16/F32.
+ * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM].
+ * Data type supported: Same as @p input.
+ * @param[in] bias (Optional) The biases have one dimension. Data type supported:
+ * Same as @p input.
+ * @param[out] output Output tensor. The output has the same number of dimensions
+ * as the @p input.
+ * @param[in] info Contains padding and policies to be used in the
+ * transpose convolution, this is decribed in @ref PadStrideInfo.
+ * @param[in] invalid_right The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom The number of zeros added to top edge of the output.
+ * @param[in] weights_info (Optional) Weights information needed for @ref
+ * CLConvolutionLayer, specifies if the weights tensor has been
+ * reshaped with @ref CLWeightsReshapeKernel.
+ */
+ void configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output,
+ const PadStrideInfo &info, unsigned int invalid_right, unsigned int invalid_bottom,
+ const WeightsInfo &weights_info = WeightsInfo());
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * CLTransposeConvLayer
+ *
+ * @param[in] input Input tensor info. 3 lower dimensions represent a single input,
+ * and an optional 4th dimension for batch of inputs.
+ * Data types supported: QASYMM8/F16/F32.
+ * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM].
+ * Data type supported: Same as @p input.
+ * @param[in] bias (Optional) The biases have one dimension. Data type supported:
+ * Same as @p input.
+ * @param[in] output Output tensor info. The output has the same number of dimensions
+ * as the @p input.
+ * @param[in] info Contains padding and policies to be used in the
+ * transpose convolution, this is decribed in @ref PadStrideInfo.
+ * @param[in] innvalid_right The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom The number of zeros added to top edge of the output.
+ * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer,
+ * specifies if the weights tensor has been reshaped with @ref
+ * CLWeightsReshapeKernel.
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *weights,
+ const ITensorInfo *bias, ITensorInfo *output, const PadStrideInfo &info,
+ unsigned int innvalid_right, unsigned int invalid_bottom,
+ const WeightsInfo &weights_info = WeightsInfo());
+
+ // Inherited methods overridden:
+ void run() override;
+ void prepare() override;
+
+private:
+ CLMemoryGroup _memory_group;
+ CLTransposeConvLayerUpsample _scale_f;
+ CLConvolutionLayer _conv_f;
+ CPPFlipWeightsKernel _flip_weights;
+ CLTensor _scaled_output;
+ ICLTensor *_original_weights;
+ CLTensor _weights_flipped;
+ bool _is_prepared;
+};
+}
+#endif /* __ARM_COMPUTE_CLTRANSPOSECONVLAYER_H__ */
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h
new file mode 100644
index 000000000..4ae0e1830
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLE_H__
+#define __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLE_H__
+
+#include "arm_compute/runtime/IFunction.h"
+
+#include "arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLMemoryGroup.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IMemoryManager.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLTransposeConvLayerUpsampleKernel */
+class CLTransposeConvLayerUpsample : public IFunction
+{
+public:
+ /** Default constructor */
+ CLTransposeConvLayerUpsample();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLTransposeConvLayerUpsample(const CLTransposeConvLayerUpsample &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLTransposeConvLayerUpsample &operator=(const CLTransposeConvLayerUpsample &) = delete;
+ /** Allow instances of this class to be moved */
+ CLTransposeConvLayerUpsample(CLTransposeConvLayerUpsample &&) = default;
+ /** Allow instances of this class to be moved */
+ CLTransposeConvLayerUpsample &operator=(CLTransposeConvLayerUpsample &&) = default;
+ /** Default destructor */
+ virtual ~CLTransposeConvLayerUpsample() = default;
+
+ /** Initialize the function's source, destination, interpolation type and border_mode.
+ *
+ * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32.
+ * @param[out] output Destination tensor. Data type supported: same as @p input.
+ * @param[in] inner_border The number of zeros added to right and top edges of the input.
+ * @param[in] info Contains padding and policies to be used in the deconvolution.
+ */
+ void configure(ICLTensor *input, ICLTensor *output, const BorderSize &inner_border,
+ const PadStrideInfo &info);
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * CLTransposeConvLayerUpsample
+ *
+ * @param[in] input Source tensor info. Data type supported: QASYMM8/F16/F32.
+ * @param[in] output Destination tensor info. Data type supported: same as @p input.
+ * @param[in] inner_border The number of zeros added to right and top edges of the input.
+ * @param[in] info Contains padding and policies to be used in the deconvolution.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output,
+ const BorderSize &inner_border, const PadStrideInfo &info);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ CLTransposeConvLayerUpsampleKernel _upsample;
+ ICLTensor *_output;
+};
+}
+#endif /* __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLE_H__ */