29 files changed, 313 insertions, 2210 deletions
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h
index 97bc4cea5..cfbd13436 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h
@@ -16,25 +16,14 @@
 #ifndef __ARM_COMPUTE_CLFUNCTIONSEX_H__
 #define __ARM_COMPUTE_CLFUNCTIONSEX_H__
 
-#include <arm_compute/runtime/CL/functions/CLArgOperation.h>
-#include <arm_compute/runtime/CL/functions/CLBatchToSpaceND.h>
 #include <arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h>
-#include <arm_compute/runtime/CL/functions/CLCast.h>
-#include <arm_compute/runtime/CL/functions/CLDepthToSpace.h>
 #include <arm_compute/runtime/CL/functions/CLEmbeddingLookup.h>
 #include <arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h>
 #include <arm_compute/runtime/CL/functions/CLGatherEx.h>
 #include <arm_compute/runtime/CL/functions/CLHashtableLookup.h>
 #include <arm_compute/runtime/CL/functions/CLInstanceNormalizationLayerEx.h>
-#include <arm_compute/runtime/CL/functions/CLLogicalNot.h>
 #include <arm_compute/runtime/CL/functions/CLNeg.h>
-#include <arm_compute/runtime/CL/functions/CLPixelWiseDivision.h>
-#include <arm_compute/runtime/CL/functions/CLPReLU.h>
 #include <arm_compute/runtime/CL/functions/CLReduceOperation.h>
-#include <arm_compute/runtime/CL/functions/CLRNNLayerEx.h>
-#include <arm_compute/runtime/CL/functions/CLSpaceToDepth.h>
-#include <arm_compute/runtime/CL/functions/CLSplit.h>
-#include <arm_compute/runtime/CL/functions/CLStridedSliceEx.h>
 #include <arm_compute/runtime/CL/functions/CLTopKV2.h>
 #include <arm_compute/runtime/CL/functions/CLTransposeConvLayer.h>
 
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgOperation.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgOperation.h
deleted file mode 100644
index c37096f7c..000000000
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgOperation.h
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/**
- * @file CLArgOperation.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file contains arm_compute::CLArgOperation class
- */
-
-#ifndef __ARM_COMPUTE_CLARGOPERATION_H__
-#define __ARM_COMPUTE_CLARGOPERATION_H__
-
-#include "arm_compute/core/CL/kernels/CLArgOperationKernel.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/TypesEx.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/**
- * @brief Class to execute CLArgOperation operation
- */
-class CLArgOperation : public IFunction
-{
-public:
-  /**
-   * @brief Construct a new CLArgOperation object
-   */
-  CLArgOperation();
-
-  /**
-   * @brief Prevent instances of this class from being copied (As this class contains pointers)
-   */
-  CLArgOperation(const CLArgOperation &) = delete;
-
-  /**
-   * @brief Prevent instances of this class from being copied (As this class contains pointers)
-   */
-  CLArgOperation &operator=(const CLArgOperation &) = delete;
-
-  /**
-   * @brief Construct a new CLArgOperation object by using copy constructor
-   * @param[in] CLArgOperation object to move
-   */
-  CLArgOperation(CLArgOperation &&) = default;
-
-  /**
-   * @brief Assign a CLArgOperation object.
-   * @param[in] CLArgOperation object to assign. This object will be moved.
-   */
-  CLArgOperation &operator=(CLArgOperation &&) = default;
-
-  /**
-   * @brief Initialise the kernel's inputs and outputs.
-   * @param[in]  input     Input tensor. Data types supported: U8/QASYMM8/S32/F32.
-   * @param[out] output    The result of arg operation. Data types supported: S32.
-   * @param[in]  axis      Axis along which to reduce. It must be sorted and no duplicates.
-   * @param[in]  op        Arg operation to perform.
-   * @return N/A
-   */
-  void configure(ICLTensor *input, ICLTensor *output, std::vector<uint32_t> axis, ArgOperation op);
-
-  /**
-   * @brief Static function to check if given info will lead to a valid configuration
-   * @param[in]  input     Input tensor. Data types supported: U8/QASYMM8/S32/F32.
-   * @param[in]  axis      Axis along which to reduce. It must be sorted and no duplicates.
-   * @param[out] output    The result of arg operation. Data types supported: S32.
-   * @param[in]  op        Arg operation to perform.
-   * @return a status
-   */
-  static Status validate(const ITensorInfo *input, const std::vector<uint32_t> &axis,
-                         const ITensorInfo *output, ArgOperation op);
-  /**
-   * @brief Run the OpenCL kernel for this operation
-   * @return N/A
-   */
-  void run() override;
-
-private:
-  ICLTensor *_input{nullptr};
-  ICLTensor *_output{nullptr};
-  std::vector<uint32_t> _axis{};
-  ArgOperation _arg_op{ArgOperation::MAX};
-
-  std::unique_ptr<CLTensor[]> _interm_tensors{nullptr};
-  std::unique_ptr<CLArgOperationKernel[]> _argop_kernels{nullptr};
-  size_t _num_of_kernels{0};
-};
-}
-#endif /*__ARM_COMPUTE_CLARGOPERATION_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLBatchToSpaceND.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLBatchToSpaceND.h
deleted file mode 100644
index eed5cb8a4..000000000
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLBatchToSpaceND.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_CLBATCH_TO_SPACE_ND_H__
-#define __ARM_COMPUTE_CLBATCH_TO_SPACE_ND_H__
-
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Basic function to run @ref CLBatchToSpaceNDKernel
- *
- * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/F16/F32.
- * @note The function converts the input tensor to the tensor of the output tensor's type.
- */
-class CLBatchToSpaceND : public ICLSimpleFunction
-{
-public:
-  /** Initialise the kernel's input and output.
-   *
-   * @param[in]  input              Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
-   * @param[out] output             Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
-   * @param[in]  block_size         A pointer to an array of integer values specifying block sizes
-   *                                for spatial dimension.
-   */
-  void configure(ICLTensor *input, ICLTensor *output, const int32_t *block_size);
-};
-
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_CLBATCH_TO_SPACE_ND_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLCast.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLCast.h
deleted file mode 100644
index ebe0d8a1c..000000000
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLCast.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/**
- * @file CLCast.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file contains arm_compute::CLCast class
- */
-
-#ifndef __ARM_COMPUTE_CLCAST_H__
-#define __ARM_COMPUTE_CLCAST_H__
-
-#include "arm_compute/core/TypesEx.h"
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/**
- * @brief Class to run @ref CLCastKernel.
- * This converts the input tensor to the tensor of the output tensor's type.
- */
-class CLCast : public ICLSimpleFunction
-{
-public:
-  /**
-   * @brief Initialise the kernel's input and output
-   * @param[in, out] input    Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
-   *                          The input tensor is [in, out] because its TensorInfo might be
-   *                          modified inside the kernel.
-   * @param[out]     output   Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
-   * @param[in]      input_subtype  Sub data type of input.
-   */
-  void configure(ICLTensor *input, ICLTensor *output, SubDataType input_subtype);
-};
-}
-#endif /* __ARM_COMPUTE_CLCAST_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDepthToSpace.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDepthToSpace.h
deleted file mode 100644
index d52a538df..000000000
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDepthToSpace.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_CLDEPTHTOSPACE_H__
-#define __ARM_COMPUTE_CLDEPTHTOSPACE_H__
-
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Basic function to run @ref CLDepthToSpaceKernel
- *
- * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/F16/F32.
- * @note The function converts the input tensor to the tensor of the output tensor's type.
- */
-class CLDepthToSpace : public ICLSimpleFunction
-{
-public:
-  /** Initialise the kernel's input and output.
-   *
-   * @param[in]  input              Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
-   * @param[out] output             Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
-   * @param[block_size] block size  integer only
-   */
-  void configure(ICLTensor *input, ICLTensor *output, const int32_t block_size);
-};
-} // namesace arm_compute
-
-#endif /* __ARM_COMPUTE_CLDEPTHTOSPACE_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h
new file mode 100644
index 000000000..409eaf593
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2019-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLDIRECTTRANSPOSECONVLAYER_H__
+#define __ARM_COMPUTE_CLDIRECTTRANSPOSECONVLAYER_H__
+
+#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h"
+#include "arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h"
+#include "arm_compute/runtime/CL/functions/CLReverse.h"
+#include "arm_compute/runtime/CL/functions/CLTranspose.h"
+
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IMemoryManager.h"
+#include "arm_compute/runtime/MemoryGroup.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+class ICLTensor;
+/** Function to run the deconvolution layer.
+ *
+ * Deconvolution Layer is the backward pass of Convolution Layer. First we transform the input
+ * depending on the stride and pad info and then perform a 1x1
+ * convolution pass. Input stride defines how many zeroes we should put between each element of the
+ * input and pad is the amount of padding.
+ *
+ *  The relation between input to output is as follows:
+ *  \f[
+ *       width\_output = (width\_input - 1) \cdot stride\_x - 2 \cdot padding\_x + kernel\_x
+ *  \f]
+ *  \f[
+ *       height\_output = (height\_input - 1) \cdot stride\_y - 2 \cdot padding\_y + kernel\_y
+ *  \f]
+ *
+ *  where:
+ *      width_input is the size of the first input dimension.
+ *      height_input is the size of the second input dimension.
+ *      width_output is the size of the first output dimension.
+ *      height_output is the size of the second output dimension.
+ *      kernel_x and kernel_y are the convolution sizes in x and y.
+ *      stride_x and stride_y is the input stride of the first and second dimension.
+ *
+ * The weights used by Deconvolution are supposed to be the same as the ones used for Convolution.
+ * Therefore, it will be necessary to use the weights in the
+ * reverse order to perform an actual convolution. This is achieved by using @ref CLReverse.
+ *
+ * This function calls the following OpenCL kernels/functions:
+ *
+ * -# @ref CLDeconvolutionLayerUpsample
+ * -# @ref CLConvolutionLayer
+ *
+ * And the following CPP kernels:
+ * -# @ref CLReverse
+ *
+ */
+class CLDirectTransposeConvLayer : public IFunction
+{
+public:
+  /** Constructor */
+  CLDirectTransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+  /** Prevent instances of this class from being copied (As this class contains pointers) */
+  CLDirectTransposeConvLayer(const CLDirectTransposeConvLayer &) = delete;
+  /** Default move constructor */
+  CLDirectTransposeConvLayer(CLDirectTransposeConvLayer &&) = default;
+  /** Prevent instances of this class from being copied (As this class contains pointers) */
+  CLDirectTransposeConvLayer &operator=(const CLDirectTransposeConvLayer &) = delete;
+  /** Default move assignment operator */
+  CLDirectTransposeConvLayer &operator=(CLDirectTransposeConvLayer &&) = default;
+  /** Set the input, weights, biases and output tensors.
+   *
+   * @param[in,out] input        Input tensor. 3 lower dimensions represent a single input, and an
+ * optional 4th dimension for batch of inputs.
+   *                             Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+   * @param[in]     weights      The 4d weights with dimensions [width, height, IFM, OFM]. Data type
+ * supported: Same as @p input.
+   * @param[in]     bias         (Optional) The biases have one dimension.
+   *                             Data type supported: Should match @p input data type, except for
+ * input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type
+   * @param[out]    output       Output tensor. The output has the same number of dimensions as the
+ * @p input.
+   * @param[in]     info         Contains padding and policies to be used in the deconvolution, this
+ * is decribed in @ref PadStrideInfo.
+ * @param[in] invalid_right  The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom  The number of zeros added to bottom edge of the output.
+   * @param[in]     weights_info (Optional) Weights information needed for @ref CLConvolutionLayer,
+ * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel.
+   *
+   */
+  void configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output,
+                 const PadStrideInfo &info, unsigned int invalid_right, unsigned int invalid_bottom,
+                 const WeightsInfo &weights_info = WeightsInfo());
+  /** Set the input, weights, biases and output tensors.
+   *
+   * @param[in]     compile_context The compile context to be used.
+   * @param[in,out] input           Input tensor. 3 lower dimensions represent a single input, and
+ * an optional 4th dimension for batch of inputs.
+   *                                Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+   * @param[in]     weights         The 4d weights with dimensions [width, height, IFM, OFM]. Data
+ * type supported: Same as @p input.
+   * @param[in]     bias            (Optional) The biases have one dimension.
+   *                                Data type supported: Should match @p input data type, except for
+ * input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type
+   * @param[out]    output          Output tensor. The output has the same number of dimensions as
+ * the @p input.
+   * @param[in]     info            Contains padding and policies to be used in the deconvolution,
+ * this is decribed in @ref PadStrideInfo.
+ * @param[in] invalid_right  The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom  The number of zeros added to bottom edge of the output.
+   * @param[in]     weights_info    (Optional) Weights information needed for @ref
+ * CLConvolutionLayer, specifies if the weights tensor has been reshaped with @ref
+ * CLWeightsReshapeKernel.
+   *
+   */
+  void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *weights,
+                 const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &info,
+                 unsigned int invalid_right, unsigned int invalid_bottom,
+                 const WeightsInfo &weights_info = WeightsInfo());
+  /** Static function to check if given info will lead to a valid configuration of @ref
+ * CLDirectTransposeConvLayer
+   *
+   * @param[in] input        Input tensor info. 3 lower dimensions represent a single input, and an
+ * optional 4th dimension for batch of inputs.
+   *                         Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+   * @param[in] weights      The 4d weights info with dimensions [width, height, IFM, OFM]. Data
+ * type supported: Same as @p input.
+   * @param[in] bias         (Optional) The biases have one dimension.
+   *                         Data type supported: Should match @p input data type, except for input
+ * of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type
+   * @param[in] output       Output tensor info. The output has the same number of dimensions as the
+ * @p input.
+   * @param[in] info         Contains padding and policies to be used in the deconvolution, this is
+ * decribed in @ref PadStrideInfo.
+ * @param[in] invalid_right  The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom  The number of zeros added to bottom edge of the output.
+   * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer,
+ * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel.
+   *
+   * @return a status
+   */
+  static Status validate(const ITensorInfo *input, const ITensorInfo *weights,
+                         const ITensorInfo *bias, ITensorInfo *output, const PadStrideInfo &info,
+                         unsigned int invalid_right, unsigned int invalid_bottom,
+                         const WeightsInfo &weights_info = WeightsInfo());
+
+  // Inherited methods overridden:
+  void run() override;
+  void prepare() override;
+
+private:
+  MemoryGroup _memory_group;
+  CLDeconvolutionLayerUpsample _scale_f;
+  CLConvolutionLayer _conv_f;
+  CLReverse _flip_weights;
+
+  CLTensor _scaled_output;
+  ICLTensor *_original_weights;
+  CLTensor _weights_flipped;
+  CLTensor _flip_axis;
+
+  bool _is_prepared;
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_CLDIRECTTRANSPOSECONVLAYER_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h
index 1a0284a3e..f3266f688 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h
@@ -50,7 +50,7 @@
 #include "arm_compute/core/CL/kernels/CLTransposeKernel.h"
 #include "arm_compute/runtime/MemoryGroup.h"
 #include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.h"
+#include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h"
 
 namespace arm_compute
 {
@@ -168,7 +168,7 @@ private:
   CLFullyConnectedHybridLayerReshapeWeights _reshape_weights_kernel;
   CLScaleFactorSymm8Kernel _scale_factor_kernel;
   CLQuantizationSymmetricKernel _quant_input_kernel;
-  CLGEMMLowpMatrixMultiplyCoreEx _mm_gemmlowp;
+  CLGEMMLowpMatrixMultiplyCore _mm_gemmlowp;
   CLMultiplyScaleFactorKernel _multiply_scale_kernel;
   CLGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel; // TODO(COMPMID-1889): Use CLGEMM to
                                                                 // add bias in
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.h
deleted file mode 100644
index 68aba74ab..000000000
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.h
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYCOREEX_H__
-#define __ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYCOREEX_H__
-
-#include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernelEx.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-
-namespace arm_compute
-{
-class IMemoryManager;
-class ICLTensor;
-
-/** Basic function to execute GEMMLowpMatrixMultiplyCore on OpenCL. This function calls the
- * following OpenCL kernels:
- *
- *  -# @ref CLGEMMLowpMatrixMultiplyKernel (if the parameter "reshape_b_only_on_first_run" of
- * GEMMInfo is FALSE)
- *  -# @ref CLGEMMLowpMatrixAReductionKernel (if the offset of matrix B is not 0)
- *  -# @ref CLGEMMLowpMatrixBReductionKernel (if the offset of matrix A is not 0)
- *
-*/
-class CLGEMMLowpMatrixMultiplyCoreEx : public IFunction
-{
-public:
-  /** Constructor */
-  CLGEMMLowpMatrixMultiplyCoreEx(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  CLGEMMLowpMatrixMultiplyCoreEx(const CLGEMMLowpMatrixMultiplyCoreEx &) = delete;
-  /** Default move constructor */
-  CLGEMMLowpMatrixMultiplyCoreEx(CLGEMMLowpMatrixMultiplyCoreEx &&) = default;
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  CLGEMMLowpMatrixMultiplyCoreEx &operator=(const CLGEMMLowpMatrixMultiplyCoreEx &) = delete;
-  /** Default move assignment operator */
-  CLGEMMLowpMatrixMultiplyCoreEx &operator=(CLGEMMLowpMatrixMultiplyCoreEx &&) = default;
-  /** Initialise the kernel's inputs, output
-   *
-   * @note GEMMLowp:  low precision GEMM kernel. [A * B + C]
-   *  This kernel performs the following computations:
-   *
-   *  -# Convert a values from QASYMM8 to int32 and add a_offset to each of them.
-   *  -# Convert b values from QASYMM8 to int32 and add b_offset to each of them.
-   *  -# Compute the matrix product of the resulting a * b in int32.
-   *  -# Quantize to uint8 if gemm_info.gemmlowp_output_stage != NONE
-   *
-   * @param[in]  a         First input tensor  (Matrix A). Data type supported: QASYMM8.
-   * @param[in]  b         Second input tensor (Matrix B). Data type supported: same as @p a
-   * @param[in]  c         Third input tensor  (Matrix C). It can be a nullptr. Data type supported:
-   * S32
-   * @param[out] output    Output tensor. Data type supported: S32 or QASYMM8 if
-   * gemm_info.gemmlowp_output_stage != NONE
-   * @param[in]  gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped
-   * and
-   *                       if the reshape of matrix B should be executed only for the first run
-   */
-  void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output,
-                 const GEMMInfo &gemm_info = GEMMInfo());
-  /** Static function to check if given info will lead to a valid configuration of @ref
-   * CLGEMMLowpMatrixMultiplyCoreEx
-   *
-   * @param[in] a         First input tensor info (Matrix A). Data type supported: QASYMM8.
-   * @param[in] b         Second input tensor info (Matrix B). Data type supported: same as @p a
-   * @param[in] c         Third input tensor info (Matrix C). It can be a nullptr. Data type
-   * supported: S32
-   * @param[in] output    Output tensor info. Data type supported: S32 or QASYMM8 if
-   * gemm_info.gemmlowp_output_stage != NONE
-   * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped
-   * and
-   *                      if the reshape of matrix B should be executed only for the first run
-   *
-   * @return a status
-   */
-  static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c,
-                         const ITensorInfo *output, const GEMMInfo &gemm_info = GEMMInfo());
-
-  // Inherited methods overridden:
-  void run() override;
-  void prepare() override;
-
-private:
-  MemoryGroup _memory_group;
-
-  // Kernels used
-  CLGEMMLowpMatrixMultiplyKernelEx _mm_midgard_kernel;
-  CLGEMMLowpMatrixAReductionKernel _mtx_a_reduction_kernel;
-  CLGEMMLowpMatrixBReductionKernel _mtx_b_reduction_kernel;
-
-  // Temporary tensors
-  CLTensor _vector_sum_col;
-  CLTensor _vector_sum_row;
-
-  int32_t _a_offset;
-  int32_t _b_offset;
-  bool _reshape_b_only_on_first_run;
-  bool _is_prepared;
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYCOREEX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLLogicalNot.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLLogicalNot.h
deleted file mode 100644
index 51216715f..000000000
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLLogicalNot.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_CLLOGICALNOT_H__
-#define __ARM_COMPUTE_CLLOGICALNOT_H__
-
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-class CLLogicalNot : public ICLSimpleFunction
-{
-public:
-  /** Initialise the function's source and destination.
-   *
-   * @param[in]  input  Source tensor. Data types supported: QASYMM8.
-   * @param[out] output Output tensor. Data types supported: QASYMM8.
-   */
-  void configure(ICLTensor *input, ICLTensor *output);
-};
-
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLLOGICALNOT_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLPReLU.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLPReLU.h
deleted file mode 100644
index 7fbe558ff..000000000
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLPReLU.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_CLPRELU_H__
-#define __ARM_COMPUTE_CLPRELU_H__
-
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-class CLPReLU : public ICLSimpleFunction
-{
-public:
-  /** Initialise the function's source and destination.
-   *
-   * @param[in]  input. Data types supported:
-   * QASYMM8/F16/F32.
-   * @param[in]  alpha. Data types supported:
-   * QASYMM8/F16/F32.
-   * @param[out] output Output tensor. Data types supported: Same as @p input.
-   */
-  void configure(ICLTensor *input, ICLTensor *alpha, ICLTensor *output);
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLPRELU_H__*/
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLPixelWiseDivision.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLPixelWiseDivision.h
deleted file mode 100644
index e83fb01cd..000000000
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLPixelWiseDivision.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/**
- * @file CLPixelWiseDivision.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file contains arm_compute::CLPixelWiseDivision class
- */
-#ifndef __ARM_COMPUTE_CLPIXELWISEDIVISION_H__
-#define __ARM_COMPUTE_CLPIXELWISEDIVISION_H__
-
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/**
- * @brief Class to run @ref CLPixelWiseDivisionKernel.
- */
-class CLPixelWiseDivision : public ICLSimpleFunction
-{
-public:
-  /**
-   * @brief Initialise the kernel's inputs, output and convertion policy.
-   * @param[in, out] input1          An input tensor. Data types supported: U8/S16/F16/F32
-   *                                 The input tensor is [in, out] because its TensorInfo might be
-   * modified inside the kernel in case of broadcasting of dimension 0.
-   * @param[in, out] input2          An input tensor. Data types supported: same as @p input1.
-   *                                 The input tensor is [in, out] because its TensorInfo might be
-   * modified inside the kernel in case of broadcasting of dimension 0.
-   * @param[out]     output          The output tensor, Data types supported: same as @p input1.
-   * Note: U8 requires both inputs to be U8.
-   * @param[in]      scale           Scale to apply after multiplication.
-   *                                 Scale must be positive and its value must be either 1/255 or
-   * 1/2^n where n is between 0 and 15.
-   * @param[in]      overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
-   * @param[in]      rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest
-   * even.
-   * @return N/A
-   */
-  void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, float scale = 1.f,
-                 ConvertPolicy overflow_policy = ConvertPolicy::WRAP,
-                 RoundingPolicy rounding_policy = RoundingPolicy::TO_ZERO);
-
-  /**
-   * @brief Static function to check if given info will lead to a valid configuration of @ref
-   * CLPixelWiseDivision
-   * @param[in] input1          An input tensor info. Data types supported: U8/S16/F16/F32
-   * @param[in] input2          An input tensor info. Data types supported: same as @p input1.
-   * @param[in] output          The output tensor info, Data types supported: same as @p input1.
-   * Note: U8 requires both inputs to be U8.
-   * @param[in] scale           Scale to apply after multiplication.
-   *                            Scale must be positive and its value must be either 1/255 or 1/2^n
-   * where n is between 0 and 15.
-   * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
-   * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even.
-   * @return a status
-   */
-  static Status validate(const ITensorInfo *input1, const ITensorInfo *input2,
-                         const ITensorInfo *output, float scale = 1.f,
-                         ConvertPolicy overflow_policy = ConvertPolicy::WRAP,
-                         RoundingPolicy rounding_policy = RoundingPolicy::TO_ZERO);
-};
-}
-#endif /*__ARM_COMPUTE_CLPIXELWISEDIVISION_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLRNNLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLRNNLayerEx.h
deleted file mode 100644
index b49cbd873..000000000
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLRNNLayerEx.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_CLRNN_LAYER_EX_H__
-#define __ARM_COMPUTE_CLRNN_LAYER_EX_H__
-
-#include "arm_compute/core/CL/kernels/CLActivationLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLCopyKernel.h"
-#include "arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h"
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-#include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h"
-#include "arm_compute/runtime/CL/functions/CLGEMM.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Basic function to run @ref CLRNNLayerEx */
-class CLRNNLayerEx : public IFunction
-{
-public:
-  /** Default constructor */
-  CLRNNLayerEx(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
-  /** Initialize the function
-   *
-   * @param[in]     input             Input is a 2-D tensor of shape [input_size, batch_size]. Data
-   * types supported: F16/F32
-   * @param[in]     weights           Weights tensor of shape [input_size, num_units] that
-   * multiplies the input. Data types supported: Same as @p input
-   * @param[in]     recurrent_weights Weights tensor of shape [num_units, num_units] that multiplies
-   * the current 'state'. Data types supported: Same as @p input
-   * @param[in]     bias              Bias vector of shape [num_units]. Data types supported: Same
-   * as @p input
-   * @param[out]    output            Output tensor of shape [num_units, batch_size]. Data types
-   * supported: Same as @p input
-   * @param[in,out] hidden_state      Output tensor of shape [num_units, batch_size]. Data types
-   * supported: Same as @p input
-   * @param[in]     info              Activation layer parameter.
-   */
-  void configure(const ICLTensor *input, const ICLTensor *weights,
-                 const ICLTensor *recurrent_weights, const ICLTensor *bias, ICLTensor *hidden_state,
-                 ICLTensor *output, ActivationLayerInfo &info);
-  /** Initialize the function
-   *
-   * @param[in] input             Input is a 2-D tensor of shape [input_size, batch_size]. Data
-   * types supported: F16/F32
-   * @param[in] weights           Weights tensor of shape [input_size, num_units] that multiplies
-   * the input. Data types supported: Same as @p input
-   * @param[in] recurrent_weights Weights tensor of shape [num_units, num_units] that multiplies the
-   * current 'state'. Data types supported: Same as @p input
-   * @param[in] bias              Bias vector of shape [num_units]. Data types supported: Same as @p
-   * input
-   * @param[in] output            Output tensor of shape [num_units, batch_size]. Data types
-   * supported: Same as @p input
-   * @param[in] hidden_state      Output tensor of shape [num_units, batch_size]. Data types
-   * supported: Same as @p input
-   * @param[in] info              Activation layer parameter.
-   *
-   * @return a status
-   */
-  static Status validate(const ITensorInfo *input, const ITensorInfo *weights,
-                         const ITensorInfo *recurrent_weights, const ITensorInfo *bias,
-                         const ITensorInfo *hidden_state, const ITensorInfo *output,
-                         const ActivationLayerInfo &info);
-
-  // Inherited methods overridden:
-  void run() override;
-  void prepare() override;
-
-private:
-  MemoryGroup _memory_group;
-  CLGEMM _gemm_state_f;
-  CLSaturatedArithmeticOperationKernel _add_kernel;
-  CLActivationLayerKernel _activation_kernel;
-  CLFullyConnectedLayer _fully_connected_kernel;
-  CLCopyKernel _copy_kernel;
-  CLTensor _fully_connected_out;
-  CLTensor _gemm_output;
-  CLTensor _add_output;
-  bool _is_prepared;
-};
-}
-#endif /* __ARM_COMPUTE_CLRNN_LAYER_EX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToDepth.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToDepth.h
deleted file mode 100644
index 2090b46fa..000000000
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToDepth.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_CLSPACETODEPTH_H__
-#define __ARM_COMPUTE_CLSPACETODEPTH_H__
-
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Basic function to run @ref CLSpaceToDepthKernel
- *
- * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/F16/F32.
- * @note The function converts the input tensor to the tensor of the output tensor's type.
- */
-class CLSpaceToDepth : public ICLSimpleFunction
-{
-public:
-  /** Initialise the kernel's input and output.
-   *
-   * @param[in]  input              Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
-   * @param[out] output             Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
-   * @param[block_size] block size  integer only
-   */
-  void configure(ICLTensor *input, ICLTensor *output, const int32_t block_size);
-};
-
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_CLSPACETODEPTH_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLStridedSliceEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLStridedSliceEx.h
deleted file mode 100644
index 03edd15e6..000000000
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLStridedSliceEx.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/**
- * @file CLStridedSlice.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file contains arm_compute::CLStridedSlice and arm_compute::CLStridedSliceCPU class
- */
-
-#ifndef __ARM_COMPUTE_CLSTRIDEDSLICEEX_H__
-#define __ARM_COMPUTE_CLSTRIDEDSLICEEX_H__
-
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/**
- * @brief Class to run @ref CLStridedSliceKernel
- */
-class CLStridedSliceEx : public ICLSimpleFunction
-{
-public:
-  /**
-   * @brief Initialise the kernel's inputs and outputs
-   * @param[in]  input   Tensor input. Data type supported:
-   *                     U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
-   * @param[out] output  Output tensor. Data type supported: Same as @p input
-   * @param[in]  beginData 'begin' vector of strided slice operation
-   * @param[in]  endData   'end' vector of strided slice operation
-   * @param[in]  stridesData 'strides' vector of strided slice operation
-   * @param[in]  beginMask  If the ith bit is set, begin[i] is ignored
-   * @param[in]  endMask    If the ith bit is set, end[i] is ignored
-   * @param[in]  shrinkAxisMask  If the ith bit is set, the ith specification shrinks the
-   *                             dimensionality by 1, taking on the value at index begin[i]
-   * @return N/A
-   */
-  void configure(const ICLTensor *input, ICLTensor *output, ICLTensor *beginData,
-                 ICLTensor *endData, ICLTensor *stridesData, int32_t beginMask, int32_t endMask,
-                 int32_t shrinkAxisMask);
-};
-}
-#endif /*__ARM_COMPUTE_CLSTRIDEDSLICEEX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h
index 54a697e69..5fb102e47 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h
@@ -15,7 +15,7 @@
  */
 
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -37,16 +37,11 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-
 #ifndef __ARM_COMPUTE_CLTRANSPOSECONVLAYER_H__
 #define __ARM_COMPUTE_CLTRANSPOSECONVLAYER_H__
 
-#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h"
-#include "arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h"
-
-#include "arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h"
-
-#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h"
+#include "arm_compute/runtime/CL/functions/CLGEMMDeconvolutionLayer.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
 
@@ -54,119 +49,102 @@
 
 namespace arm_compute
 {
-class ICLTensor;
-/** Function to run the transpose convolution layer.
- *
- * @note This layer was copied in order to fix a bug computing to wrong output dimensions.
- *
- * TransposeConv Layer is the backward pass of Convolution Layer. First we transform the input
- * depending on the stride and pad info and then perform a 1x1
- * convolution pass. Input stride defines how many zeroes we should put between each element of the
- * input, pad is the amount of padding and finally a is a user
- * specified value where a < stride - 1, that increases the padding top and right of the input
- * image.
- *
- *  The relation between input to output is as follows:
- *  \f[
- *       width\_output = (width\_input - 1) \cdot stride\_x - \cdot padding\_x + kernel\_x
- *  \f]
- *  \f[
- *       height\_output = (height\_input - 1) \cdot stride\_y - \cdot padding\_y + kernel\_y
- *  \f]
- *
- *  where:
- *      width_input is the size of the first input dimension.
- *      height_input is the size of the second input dimension.
- *      width_output is the size of the first output dimension.
- *      height_output is the size of the second output dimension.
- *      kernel_x and kernel_y are the convolution sizes in x and y.
- *      stride_x and stride_y is the input stride of the first and second dimension.
- *
- * The weights used by Deconvolution are supposed to be the same as the ones used for Convolution.
- * Therefore, it will be necessary to use the weights in the
- * reverse order to perform an actual convolution. This is achieved by using the @ref
- * CPPFlipWeightsKernel.
- *
- * This function calls the following OpenCL kernels/functions:
- *
- * -# @ref CLTransposeConvLayerUpsample
- * -# @ref CLConvolutionLayer
+/** Basic function to compute the deconvolution layer. This function calls the following OpenCL
+ * kernels/functions:
  *
+ * -# @ref CLGEMMDeconvolutionLayer
+ * -# @ref CLDirectTransposeConvLayer
  */
 class CLTransposeConvLayer : public IFunction
 {
 public:
-  /** Constructor */
+  /** Default constructor */
   CLTransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  CLTransposeConvLayer(const CLTransposeConvLayer &) = delete;
-  /** Default move constructor */
-  CLTransposeConvLayer(CLTransposeConvLayer &&) = default;
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  CLTransposeConvLayer &operator=(const CLTransposeConvLayer &) = delete;
-  /** Default move assignment operator */
-  CLTransposeConvLayer &operator=(CLTransposeConvLayer &&) = default;
+
   /** Set the input, weights, biases and output tensors.
    *
-   * @param[in,out] input          Input tensor. 3 lower dimensions represent a single input,
-   *                               and an optional 4th dimension for batch of inputs.
-   *                               Data types supported: QASYMM8/F16/F32.
-   * @param[in]     weights        The 4d weights with dimensions [width, height, IFM, OFM].
-   *                               Data type supported: Same as @p input.
-   * @param[in]     bias           (Optional) The biases have one dimension. Data type supported:
-   *                               Same as @p input.
-   * @param[out]    output         Output tensor. The output has the same number of dimensions
-   *                               as the @p input.
-   * @param[in]     info           Contains padding and policies to be used in the
-   *                               transpose convolution, this is decribed in @ref PadStrideInfo.
-   * @param[in]     invalid_right  The number of zeros added to right edge of the output.
-   * @param[in]     invalid_bottom The number of zeros added to top edge of the output.
-   * @param[in]     weights_info   (Optional) Weights information needed for @ref
-   *                               CLConvolutionLayer, specifies if the weights tensor has been
-   *                               reshaped with @ref CLWeightsReshapeKernel.
+   * @param[in,out] input        Input tensor. 3 lower dimensions represent a single input, and an
+ * optional 4th dimension for batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+   * @param[in]     weights      The 4d weights with dimensions [width, height, IFM, OFM]. Data type
+ * supported: Same as @p input.
+   * @param[in]     bias         (Optional) The biases have one dimension. Data type supported: Same
+ * as @p input.
+   * @param[out]    output       Output tensor. The output has the same number of dimensions as the
+ * @p input.
+   * @param[in]     deconv_info  Contains padding and policies to be used in the deconvolution, this
+ * is described in @ref PadStrideInfo.
+ * @param[in] invalid_right  The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom  The number of zeros added to bottom edge of the output.
+   * @param[in]     weights_info (Optional) Weights information needed for @ref CLConvolutionLayer,
+ * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel.
+   *
    */
   void configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output,
-                 const PadStrideInfo &info, unsigned int invalid_right, unsigned int invalid_bottom,
+                 const PadStrideInfo &deconv_info, unsigned int invalid_right,
+                 unsigned int invalid_bottom, const WeightsInfo &weights_info = WeightsInfo());
+  /** Set the input, weights, biases and output tensors.
+   *
+   * @param[in]     compile_context The compile context to be used.
+   * @param[in,out] input           Input tensor. 3 lower dimensions represent a single input, and
+ * an optional 4th dimension for batch of inputs. Data types supported:
+ * QASYMM8_SIGNED/QASYMM8/F16/F32.
+   * @param[in]     weights         The 4d weights with dimensions [width, height, IFM, OFM]. Data
+ * type supported: Same as @p input.
+   * @param[in]     bias            (Optional) The biases have one dimension. Data type supported:
+ * Same as @p input.
+   * @param[out]    output          Output tensor. The output has the same number of dimensions as
+ * the @p input.
+   * @param[in]     deconv_info     Contains padding and policies to be used in the deconvolution,
+ * this is described in @ref PadStrideInfo.
+ * @param[in] invalid_right  The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom  The number of zeros added to bottom edge of the output.
+   * @param[in]     weights_info    (Optional) Weights information needed for @ref
+ * CLConvolutionLayer, specifies if the weights tensor has been reshaped with @ref
+ * CLWeightsReshapeKernel.
+   *
+   */
+  void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *weights,
+                 const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &deconv_info,
+                 unsigned int invalid_right, unsigned int invalid_bottom,
                  const WeightsInfo &weights_info = WeightsInfo());
   /** Static function to check if given info will lead to a valid configuration of @ref
-   * CLTransposeConvLayer
+ * CLTransposeConvLayer
+   *
+   * @param[in] input        Input tensor info. 3 lower dimensions represent a single input, and an
+ * optional 4th dimension for batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+   * @param[in] weights      The 4d weights info with dimensions [width, height, IFM, OFM]. Data
+ * type supported: Same as @p input.
+   * @param[in] bias         (Optional) The biases have one dimension. Data type supported: Same as
+ * @p input.
+   * @param[in] output       Output tensor info. The output has the same number of dimensions as the
+ * @p input.
+   * @param[in] deconv_info  Contains padding and policies to be used in the deconvolution, this is
+ * described in @ref PadStrideInfo.
+ * @param[in] invalid_right  The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom  The number of zeros added to bottom edge of the output.
+   * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer,
+ * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel.
    *
-   * @param[in] input           Input tensor info. 3 lower dimensions represent a single input,
-   *                            and an optional 4th dimension for batch of inputs.
-   *                            Data types supported: QASYMM8/F16/F32.
-   * @param[in] weights         The 4d weights info with dimensions [width, height, IFM, OFM].
-   *                            Data type supported: Same as @p input.
-   * @param[in] bias            (Optional) The biases have one dimension. Data type supported:
-   *                            Same as @p input.
-   * @param[in] output          Output tensor info. The output has the same number of dimensions
-   *                            as the @p input.
-   * @param[in] info            Contains padding and policies to be used in the
-   *                            transpose convolution, this is decribed in @ref PadStrideInfo.
-   * @param[in] innvalid_right  The number of zeros added to right edge of the output.
-   * @param[in] invalid_bottom  The number of zeros added to top edge of the output.
-   * @param[in] weights_info    (Optional) Weights information needed for @ref CLConvolutionLayer,
-   *                            specifies if the weights tensor has been reshaped with @ref
-   *                            CLWeightsReshapeKernel.
    * @return a status
    */
   static Status validate(const ITensorInfo *input, const ITensorInfo *weights,
-                         const ITensorInfo *bias, ITensorInfo *output, const PadStrideInfo &info,
-                         unsigned int innvalid_right, unsigned int invalid_bottom,
+                         const ITensorInfo *bias, ITensorInfo *output,
+                         const PadStrideInfo &deconv_info, unsigned int invalid_right,
+                         unsigned int invalid_bottom,
                          const WeightsInfo &weights_info = WeightsInfo());
 
+  static DeconvolutionMethod
+  get_deconvolution_method(const ITensorInfo *input, const ITensorInfo *weights,
+                           const ITensorInfo *bias, ITensorInfo *output,
+                           const PadStrideInfo &deconv_info, unsigned int invalid_right,
+                           unsigned int invalid_bottom, const WeightsInfo &weights_info);
   // Inherited methods overridden:
   void run() override;
   void prepare() override;
 
 private:
-  MemoryGroup _memory_group;
-  CLTransposeConvLayerUpsample _scale_f;
-  CLConvolutionLayer _conv_f;
-  CPPFlipWeightsKernel _flip_weights;
-  CLTensor _scaled_output;
-  ICLTensor *_original_weights;
-  CLTensor _weights_flipped;
-  bool _is_prepared;
+  std::shared_ptr<IMemoryManager> _memory_manager;
+  std::unique_ptr<IFunction> _function;
 };
-}
+} // namespace arm_compute
 #endif /* __ARM_COMPUTE_CLTRANSPOSECONVLAYER_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h
deleted file mode 100644
index 7570fe76d..000000000
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLE_H__
-#define __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLE_H__
-
-#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Basic function to run @ref CLTransposeConvLayerUpsampleKernel */
-class CLTransposeConvLayerUpsample : public IFunction
-{
-public:
-  /** Default constructor */
-  CLTransposeConvLayerUpsample();
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  CLTransposeConvLayerUpsample(const CLTransposeConvLayerUpsample &) = delete;
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  CLTransposeConvLayerUpsample &operator=(const CLTransposeConvLayerUpsample &) = delete;
-  /** Allow instances of this class to be moved */
-  CLTransposeConvLayerUpsample(CLTransposeConvLayerUpsample &&) = default;
-  /** Allow instances of this class to be moved */
-  CLTransposeConvLayerUpsample &operator=(CLTransposeConvLayerUpsample &&) = default;
-  /** Default destructor */
-  virtual ~CLTransposeConvLayerUpsample() = default;
-
-  /** Initialize the function's source, destination, interpolation type and border_mode.
-   *
-   * @param[in, out] input        Source tensor. Data type supported: QASYMM8/F16/F32.
-   * @param[out]     output       Destination tensor. Data type supported: same as @p input.
-   * @param[in]      inner_border The number of zeros added to right and top edges of the input.
-   * @param[in]      info         Contains padding and policies to be used in the deconvolution.
-   */
-  void configure(ICLTensor *input, ICLTensor *output, const BorderSize &inner_border,
-                 const PadStrideInfo &info);
-  /** Static function to check if given info will lead to a valid configuration of @ref
-   * CLTransposeConvLayerUpsample
-   *
-   * @param[in] input        Source tensor info. Data type supported: QASYMM8/F16/F32.
-   * @param[in] output       Destination tensor info. Data type supported: same as @p input.
-   * @param[in] inner_border The number of zeros added to right and top edges of the input.
-   * @param[in] info         Contains padding and policies to be used in the deconvolution.
-   *
-   * @return a status
-   */
-  static Status validate(const ITensorInfo *input, const ITensorInfo *output,
-                         const BorderSize &inner_border, const PadStrideInfo &info);
-
-  // Inherited methods overridden:
-  void run() override;
-
-private:
-  CLTransposeConvLayerUpsampleKernel _upsample;
-  ICLTensor *_output;
-};
-}
-#endif /* __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLE_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CPP/functions/CPPUpsampleEx.h b/compute/ARMComputeEx/arm_compute/runtime/CPP/functions/CPPUpsampleEx.h
deleted file mode 100644
index 666afef4b..000000000
--- a/compute/ARMComputeEx/arm_compute/runtime/CPP/functions/CPPUpsampleEx.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_CPPUPSAMPLE_EX_H__
-#define __ARM_COMPUTE_CPPUPSAMPLE_EX_H__
-
-#include "arm_compute/runtime/CPP/ICPPSimpleFunction.h"
-
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to run @ref CPPUpsample */
-class CPPUpsampleEx : public ICPPSimpleFunction
-{
-public:
-  /** Configure the upsample CPP kernel
-   *
-   * @param[in]  input  The input tensor to upsample. Data types supported: F32/F16/QASYMM8
-   * @param[out] output The output tensor. Data types supported: Same as @p input
-   * @param[in]  info   Padding information
-   */
-  void configure(const ITensor *input, ITensor *output, const PadStrideInfo &info);
-};
-}
-#endif /* __ARM_COMPUTE_CPPUPSAMPLE_EX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h
index 49504fde3..3fad230f1 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h
@@ -18,20 +18,13 @@
 
 #include <arm_compute/runtime/NEON/functions/NEActivationLayerEx.h>
 #include <arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h>
-#include <arm_compute/runtime/NEON/functions/NECast.h>
-#include <arm_compute/runtime/NEON/functions/NEDepthToSpaceLayerEx.h>
 #include <arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h>
 #include <arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h>
 #include <arm_compute/runtime/NEON/functions/NEGatherEx.h>
 #include <arm_compute/runtime/NEON/functions/NEHashtableLookup.h>
 #include <arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayerEx.h>
-#include <arm_compute/runtime/NEON/functions/NEPReLU.h>
-#include <arm_compute/runtime/NEON/functions/NEReduceMeanEx.h>
 #include <arm_compute/runtime/NEON/functions/NEReduceSum.h>
-#include <arm_compute/runtime/NEON/functions/NERNNLayerEx.h>
 #include <arm_compute/runtime/NEON/functions/NEReduceOperation.h>
-#include <arm_compute/runtime/NEON/functions/NESpaceToBatchLayerEx.h>
-#include <arm_compute/runtime/NEON/functions/NESpaceToDepthLayerEx.h>
 #include <arm_compute/runtime/NEON/functions/NETransposeConvLayer.h>
 
 #endif // __ARM_COMPUTE_NEFUNCTIONSEX_H__
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NECast.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NECast.h
deleted file mode 100644
index f0f0d8114..000000000
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NECast.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_NECAST_H__
-#define __ARM_COMPUTE_NECAST_H__
-
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/TypesEx.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Basic function to run @ref NECastKernel that converts an input tensor to the other types */
-class NECast : public INESimpleFunctionNoBorder
-{
-public:
-  /** Configure the kernel.
-   *
-   * @param[in]  input  Source tensor. Data types supported: U8/S8/QASYMM8/U32/S32/F32.
-   * @param[out] output Destination tensor with the same dimensions of input. Data type supported:
-   * U8/S8/QASYMM8/U32/S32/F32.
-   * @param[in]  input_subtype  Sub data type of input.
-   */
-  void configure(const ITensor *input, ITensor *output,
-                 SubDataType input_subtype = SubDataType::NONE);
-  /** Static function to check if given info will lead to a valid configuration of @ref NECast
-   *
-   * @param[in] input  Input tensor info. Data types supported: U8/S8/QASYMM8/U32/S32/F32.
-   * @param[in] output Output tensor info. Data type supported: U8/S8/QASYMM8/U32/S32/F32.
-   * @param[in] input_subtype  Sub data type of input.
-   *
-   * @return a status
-   */
-  static Status validate(const ITensorInfo *input, const ITensorInfo *output,
-                         SubDataType input_subtype = SubDataType::NONE);
-};
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_NECAST_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayerEx.h
deleted file mode 100644
index 005d85add..000000000
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayerEx.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_NEDEPTHTOSPACELAYEREX_H__
-#define __ARM_COMPUTE_NEDEPTHTOSPACELAYEREX_H__
-
-#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to run @ref NEDepthToSpaceLayerKernelEx. */
-class NEDepthToSpaceLayerEx : public INESimpleFunctionNoBorder
-{
-public:
-  /** Set the input and output tensors.
-   *
-   * @param[in]  input       Tensor input. Supported tensor rank: 4. Data types supported:
-   * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
-   * @param[out] output      Tensor output. Data types supported: same as @p input
-   * @param[in]  block_shape Block shape value.
-   */
-  void configure(const ITensor *input, ITensor *output, int32_t block_shape);
-  /** Static function to check if given info will lead to a valid configuration of @ref
-   * NEDepthToSpaceLayerEx.
-   *
-   * @param[in] input       Tensor input info. Supported tensor rank: 4. Data types supported:
-   * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
-   * @param[in] output      Tensor output info. Data types supported: same as @p input
-   * @param[in] block_shape Block shape x value.
-   *
-   * @return a status
-   */
-  static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape);
-};
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_NEDEPTHTOSPACELAYEREX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayerEx.h
deleted file mode 100644
index 27a38e982..000000000
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayerEx.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_NEELEMENTWISEUNARYLAYEREX_H__
-#define __ARM_COMPUTE_NEELEMENTWISEUNARYLAYEREX_H__
-
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to perform negative on an input tensor. */
-class NENegLayer : public INESimpleFunction
-{
-public:
-  /** Initialize the function
-   *
-   * @param[in]  input  Input tensor. Data types supported: F16/F32/S32.
-   * @param[out] output Output tensor. Data types supported: same as @p input.
-   */
-  void configure(const ITensor *input, ITensor *output);
-  /** Static function to check if given info will lead to a valid configuration of @ref NERsqrtLayer
-   *
-   * @param[in] input  First tensor input info. Data types supported: F16/F32/S32.
-   * @param[in] output Output tensor info. Data types supported: Same as @p input.
-   *
-   * @return a status
-   */
-  static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-};
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_NEELEMENTWISEUNARYLAYEREX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h
index 39c57eb70..56548a479 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h
@@ -46,7 +46,7 @@
 #include "arm_compute/core/NEON/kernels/NEMuliplyScaleFactorKernel.h"
 #include "arm_compute/core/NEON/kernels/NETransposeKernel.h"
 #include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.h"
+#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h"
 #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 #include "arm_compute/runtime/Tensor.h"
 
@@ -164,7 +164,7 @@ private:
   MemoryGroup _memory_group;
   NEFullyConnectedHybridLayerReshapeWeights _reshape_weights_function;
   NEQuantizationSymmetricKernel _quant_input_kernel;
-  NEGEMMLowpMatrixMultiplyCoreEx _mm_gemmlowp;
+  NEGEMMLowpMatrixMultiplyCore _mm_gemmlowp;
   NEMultiplyScaleFactorKernel _multiply_scale_kernel;
   NEGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel;
   Tensor _reshape_weights_output;
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.h
deleted file mode 100644
index d844513c9..000000000
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.h
+++ /dev/null
@@ -1,170 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCOREEX_H__
-#define __ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCOREEX_H__
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-// #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h"
-#include "arm_compute/runtime/Tensor.h"
-
-#include <memory>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to execute GEMMLowpMatrixMultiplyCore on NEON. This function calls the following
- * NEON kernels if the DOT product instruction is not available:
- *
- *  -# @ref NEGEMMInterleave4x4Kernel
- *  -# @ref NEGEMMTranspose1xWKernel
- *  -# @ref NEGEMMLowpMatrixMultiplyKernel
- *  -# @ref NEGEMMLowpOffsetContributionKernel
- *  -# @ref NEActivationLayer
- *
- * otherwise if the DOT product instruction is available:
- *
- *  -# @ref NEGEMMLowpOffsetContributionKernel
- *
-*/
-class NEGEMMLowpMatrixMultiplyCoreEx : public IFunction
-{
-public:
-  /** Constructor */
-  NEGEMMLowpMatrixMultiplyCoreEx(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  NEGEMMLowpMatrixMultiplyCoreEx(const NEGEMMLowpMatrixMultiplyCoreEx &) = delete;
-  /** Default move constructor */
-  NEGEMMLowpMatrixMultiplyCoreEx(NEGEMMLowpMatrixMultiplyCoreEx &&) = default;
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  NEGEMMLowpMatrixMultiplyCoreEx &operator=(const NEGEMMLowpMatrixMultiplyCoreEx &) = delete;
-  /** Default move assignment operator */
-  NEGEMMLowpMatrixMultiplyCoreEx &operator=(NEGEMMLowpMatrixMultiplyCoreEx &&) = default;
-  /** Initialise the kernel's inputs, output
-   *
-   * @note GEMM_LOWP:  low precision GEMM kernel
-   *  This kernel performs the following computations:
-   *
-   *  -# Convert a values from QASYMM8 to int32 and add a_offset to each of them.
-   *  -# Convert b values from QASYMM8 to int32 add b_offset to each of them.
-   *  -# Compute the matrix product of the resulting a * b in int32.
-   *
-   * @note The @p output type is S32 if @p gemm_info.type == GEMMLowpOutputStageType::NONE. It is
-   * QASYMM8/QASYMM8_SIGNED otherwise
-   *
-   * @param[in]  a         First input tensor  (Matrix A). Data type supported:
-   * QASYMM8/QASYMM8_SIGNED.
-   * @param[in]  b         Second input tensor (Matrix B). Data type supported: same as @p a
-   * @param[in]  c         Third input tensor  (Matrix C). It can be a nullptr. Data type supported:
-   * S32
-   * @param[out] output    Output tensor. Data type supported: Data type supported:
-   * S32/QASYMM8/QASYMM8_SIGNED
-   * @param[in]  gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped
-   * and
-   *                       if the reshape of matrix B should be executed only for the first run
-   */
-  void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *output,
-                 const GEMMInfo &gemm_info = GEMMInfo());
-  /** Static function to check if given info will lead to a valid configuration of @ref
-   * NEGEMMLowpMatrixMultiplyCoreEx
-   *
-   * @note The @p output type is S32 if @p gemm_info.type == GEMMLowpOutputStageType::NONE. It is
-   * QASYMM8/QASYMM8_SIGNED otherwise
-   *
-   * @param[in] a         First input tensor info  (Matrix A). Data type supported:
-   * QASYMM8/QASYMM8_SIGNED.
-   * @param[in] b         Second input tensor info (Matrix B). Data type supported: same as @p a
-   * @param[in] c         Third input tensor  info (Matrix C). It can be a nullptr. Data type
-   * supported: S32
-   * @param[in] output    Output tensor info. Data type supported: Data type supported:
-   * S32/QASYMM8/QASYMM8_SIGNED
-   * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped
-   * and
-   *                      if the reshape of matrix B should be executed only for the first run
-   *
-   * @return a status
-   */
-  static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c,
-                         const ITensorInfo *output, const GEMMInfo &gemm_info = GEMMInfo());
-
-  // Inherited methods overridden
-  void run() override;
-  void prepare() override;
-
-private:
-  MemoryGroup _memory_group;
-  NEGEMMAssemblyDispatch _asm_glue;
-  std::unique_ptr<INEKernel> _mm_kernel;
-  std::unique_ptr<INEKernel> _mtx_a_reshape_kernel;
-  std::unique_ptr<INEKernel> _mtx_b_reshape_kernel;
-  NEGEMMLowpMatrixAReductionKernel _mtx_a_reduction_kernel;
-  NEGEMMLowpMatrixBReductionKernel _mtx_b_reduction_kernel;
-  NEGEMMLowpOffsetContributionKernel _offset_contribution_kernel;
-  NEGEMMLowpOffsetContributionOutputStageKernel _offset_contribution_output_stage_kernel;
-
-  Tensor _vector_sum_col;
-  Tensor _vector_sum_row;
-  Tensor _tmp_a;
-  Tensor _tmp_b;
-  Tensor _mm_result_s32;
-  Tensor _signed_a;
-  Tensor _signed_output;
-  const ITensor *_original_b;
-  int32_t _a_offset;
-  int32_t _b_offset;
-
-  bool _run_vector_matrix_multiplication;
-  bool _assembly_path;
-  bool _fused_assembly_path;
-  bool _reshape_b_only_on_first_run;
-  bool _is_prepared;
-  bool _fuse_output_stage;
-  bool _flip_signedness;
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCOREEX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEPReLU.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEPReLU.h
deleted file mode 100644
index ca8413352..000000000
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEPReLU.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_NEPRELU_H__
-#define __ARM_COMPUTE_NEPRELU_H__
-
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to run @ref NEPReLUKernel */
-class NEPReLU : public INESimpleFunctionNoBorder
-{
-public:
-  /** Initialise the kernel's inputs and output
-   *
-   * @param[in]  input. Data types supported: QASYMM8/F32.
-   * @param[in]  alpha. Data types supported: Same as @p input.
-   * @param[out] output Output tensor. Data types supported: Same as @p input.
-   */
-  void configure(const ITensor *input, const ITensor *alpha, ITensor *output);
-};
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_NEPRELU_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NERNNLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NERNNLayerEx.h
deleted file mode 100644
index 8a7b17946..000000000
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NERNNLayerEx.h
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_NERNNLAYER_EX_H__
-#define __ARM_COMPUTE_NERNNLAYER_EX_H__
-
-#include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h"
-#include "arm_compute/core/NEON/kernels/NECopyKernel.h"
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMM.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Basic function to run @ref NERNNLayerEx */
-class NERNNLayerEx : public IFunction
-{
-public:
-  /** Default constructor */
-  NERNNLayerEx(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  NERNNLayerEx(const NERNNLayerEx &) = delete;
-  /** Default move constructor */
-  NERNNLayerEx(NERNNLayerEx &&) = default;
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  NERNNLayerEx &operator=(const NERNNLayerEx &) = delete;
-  /** Default move assignment operator */
-  NERNNLayerEx &operator=(NERNNLayerEx &&) = default;
-  /** Initialize the function
-   *
-   * @param[in]     input             Input is a 2-D tensor of shape [input_size, batch_size]. Data
-   * types supported: F16/F32
-   * @param[in]     weights           Weights tensor of shape [input_size, num_units] that
-   * multiplies the input. Data types supported: Same as @p input
-   * @param[in]     recurrent_weights Weights tensor of shape [num_units, num_units] that multiplies
-   * the current 'state'. Data types supported: Same as @p input
-   * @param[in]     bias              Bias vector of shape [num_units]. Data types supported: Same
-   * as @p input
-   * @param[out]    output            Output tensor of shape [num_units, batch_size]. Data types
-   * supported: Same as @p input
-   * @param[in,out] hidden_state      Output tensor of shape [num_units, batch_size]. Data types
-   * supported: Same as @p input
-   * @param[in]     info              Activation layer parameter.
-   */
-  void configure(const ITensor *input, const ITensor *weights, const ITensor *recurrent_weights,
-                 const ITensor *bias, ITensor *hidden_state, ITensor *output,
-                 ActivationLayerInfo &info);
-  /** Initialize the function
-   *
-   * @param[in] input             Input is a 2-D tensor of shape [input_size, batch_size]. Data
-   * types supported: F16/F32
-   * @param[in] weights           Weights tensor of shape [input_size, num_units] that multiplies
-   * the input. Data types supported: Same as @p input
-   * @param[in] recurrent_weights Weights tensor of shape [num_units, num_units] that multiplies the
-   * current 'state'. Data types supported: Same as @p input
-   * @param[in] bias              Bias vector of shape [num_units]. Data types supported: Same as @p
-   * input
-   * @param[in] output            Output tensor of shape [num_units, batch_size]. Data types
-   * supported: Same as @p input
-   * @param[in] hidden_state      Output tensor of shape [num_units, batch_size]. Data types
-   * supported: Same as @p input
-   * @param[in] info              Activation layer parameter.
-   *
-   * @return a status
-   */
-  static Status validate(const ITensorInfo *input, const ITensorInfo *weights,
-                         const ITensorInfo *recurrent_weights, const ITensorInfo *bias,
-                         const ITensorInfo *hidden_state, const ITensorInfo *output,
-                         const ActivationLayerInfo &info);
-
-  // Inherited methods overridden:
-  void run() override;
-  void prepare() override;
-
-private:
-  MemoryGroup _memory_group;
-  NEGEMM _gemm_state_f;
-  NEArithmeticAdditionKernel _add_kernel;
-  NEActivationLayerKernel _activation_kernel;
-  NEFullyConnectedLayer _fully_connected_kernel;
-  NECopyKernel _copy_kernel;
-  Tensor _fully_connected_out;
-  Tensor _gemm_output;
-  Tensor _add_output;
-  bool _is_prepared;
-};
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_NERNNLAYER_EX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceMeanEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceMeanEx.h
deleted file mode 100644
index 03ac45798..000000000
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceMeanEx.h
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_NEON_REDUCE_MEAN_EX_H__
-#define __ARM_COMPUTE_NEON_REDUCE_MEAN_EX_H__
-
-#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/functions/NEReductionOperation.h"
-#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to perform reduce operation */
-class NEReduceMeanEx : public IFunction
-{
-public:
-  /** Constructor */
-  NEReduceMeanEx(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
-  /** Configure kernel
-   *
-   * @note Supported tensor rank: up to 4
-   *
-   * @param[in]  input          Source tensor. Data type supported: QASYMM8/F16/F32
-   * @param[in]  reduction_axis Reduction axis vector.
-   * @param[in]  keep_dims      If positive, retains reduced dimensions with length 1.
-   * @param[out] output         Destination tensor. Data type supported: Same as @p input
-   */
-  void configure(ITensor *input, const Coordinates &reduction_axis, bool keep_dims,
-                 ITensor *output);
-
-  /** Static function to check if given info will lead to a valid configuration of @ref
-   * NEReduceMeanEx
-   *
-   * @param[in] input          Source tensor. Data type supported: QASYMM8/F16/F32
-   * @param[in] reduction_axis Reduction axis vector.
-   * @param[in] keep_dims      If positive, retains reduced dimensions with length 1.
-   * @param[in] output         Destination tensor. Data type supported: Same as @p input
-   *
-   * @return A status
-   */
-  static Status validate(const ITensorInfo *input, const Coordinates &reduction_axis,
-                         bool keep_dims, const ITensorInfo *output);
-
-  // Inherited methods overridden:
-  void run() override;
-
-private:
-  MemoryGroup _memory_group;
-  std::unique_ptr<NEReductionOperation[]> _reduction_kernels{nullptr};
-  std::unique_ptr<Tensor[]> _reduced_outs{nullptr};
-  NEReshapeLayer _reshape;
-  unsigned int _reduction_ops;
-  bool _keep_dims;
-};
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_NEON_REDUCE_MEAN_EX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NESpaceToBatchLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NESpaceToBatchLayerEx.h
deleted file mode 100644
index 3b695fbc0..000000000
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NESpaceToBatchLayerEx.h
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_NESPACETOBATCHLAYEREX_H__
-#define __ARM_COMPUTE_NESPACETOBATCHLAYEREX_H__
-
-#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/core/NEON/kernels/NEMemsetKernel.h"
-#include "arm_compute/core/NEON/kernels/NESpaceToBatchLayerKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to spatial divide a tensor. This function calls the following NEON
- * kernels/functions:
- *
- *  -# @ref NEMemsetKernel
- *  -# @ref NESpaceToBatchLayerKernel
- */
-class NESpaceToBatchLayerEx : public IFunction
-{
-public:
-  /** Default constructor */
-  NESpaceToBatchLayerEx();
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  NESpaceToBatchLayerEx(const NESpaceToBatchLayerEx &) = delete;
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  NESpaceToBatchLayerEx &operator=(const NESpaceToBatchLayerEx &) = delete;
-  /** Allow instances of this class to be moved */
-  NESpaceToBatchLayerEx(NESpaceToBatchLayerEx &&) = default;
-  /** Allow instances of this class to be moved */
-  NESpaceToBatchLayerEx &operator=(NESpaceToBatchLayerEx &&) = default;
-  /** Default destructor */
-  virtual ~NESpaceToBatchLayerEx() = default;
-  /** Set the input and output tensors.
-   *
-   * @param[in]  input       Tensor input. Supported tensor rank: 4. Data types supported:
-   * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
-   * @param[in]  block_shape 1-D tensor with shape [M]. Data types supported: S32
-   * @param[in]  paddings    2-D tensor with shape [2, M]. Data types supported: S32
-   * @param[out] output      Tensor output. Data types supported: same as @p input
-   */
-  void configure(const ITensor *input, const ITensor *block_shape, const ITensor *paddings,
-                 ITensor *output);
-  /** Set the input and output tensors. (Static block shape and paddings)
-   *
-   * @param[in]  input         Tensor input. Supported tensor rank: 4. Data types supported:
-   * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
-   * @param[in]  block_shape_x Block shape x value.
-   * @param[in]  block_shape_y Block shape y value.
-   * @param[in]  padding_left  The left padding of the output tensor.
-   * @param[in]  padding_right The right padding of the output tensor.
-   * @param[out] output        Tensor output. Data types supported: same as @p input
-   */
-  void configure(const ITensor *input, const int block_shape_x, const int block_shape_y,
-                 const Size2D &padding_left, const Size2D &padding_right, ITensor *output);
-  /** Static function to check if given info will lead to a valid configuration of @ref
-   * NESpaceToBatchLayerEx
-   *
-   * @param[in] input       Tensor input info. Supported tensor rank: 4. Data types supported:
-   * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
-   * @param[in] block_shape block shape tensor info with shape [M]. Data types supported: S32
-   * @param[in] paddings    paddings tensor info with shape [2, M]. Data types supported: S32
-   * @param[in] output      Tensor output info. Data types supported: same as @p input
-   *
-   * @return a status
-   */
-  static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape,
-                         const ITensorInfo *paddings, const ITensorInfo *output);
-  /** Static function to check if given info will lead to a valid configuration of @ref
-   * NESpaceToBatchLayerEx (Static block shape and paddings)
-   *
-   * @param[in] input         Tensor input info. Supported tensor rank: 4. Data types supported:
-   * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
-   * @param[in] block_shape_x Block shape x value.
-   * @param[in] block_shape_y Block shape y value.
-   * @param[in] padding_left  The left padding of the output tensor.
-   * @param[in] padding_right The right padding of the output tensor.
-   * @param[in] output        Tensor output info. Data types supported: same as @p input
-   *
-   * @return a status
-   */
-  static Status validate(const ITensorInfo *input, const int block_shape_x, const int block_shape_y,
-                         const Size2D &padding_left, const Size2D &padding_right,
-                         const ITensorInfo *output);
-
-  // Inherited methods overridden:
-  void run() override;
-
-private:
-  NESpaceToBatchLayerKernel _space_to_batch_kernel; /**< SpaceToBatch kernel to run */
-  NEMemsetKernel _memset_kernel;                    /**< Memset kernel to run */
-  bool _has_padding;                                /**< Flag to check if the output has padding */
-};
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_NESPACETOBATCHLAYEREX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NESpaceToDepthLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NESpaceToDepthLayerEx.h
deleted file mode 100644
index 9f32616f3..000000000
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NESpaceToDepthLayerEx.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_NESPACETODEPTHLAYEREX_H__
-#define __ARM_COMPUTE_NESPACETODEPTHLAYEREX_H__
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** This function calls the following NEON kernels/functions:
- *
- *  -# @ref NESpaceToDepthLayerKernelEx
- */
-class NESpaceToDepthLayerEx : public INESimpleFunctionNoBorder
-{
-public:
-  /** Set the input and output tensors.
-   *
-   * @param[in]  input       Tensor input. Supported tensor rank: 4. Data types supported:
-   * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
-   * @param[out] output      Tensor output. Data types supported: same as @p input
-   * @param[in]  block_shape Block shape value
-   */
-  void configure(const ITensor *input, ITensor *output, int32_t block_shape);
-  /** Static function to check if given info will lead to a valid configuration of @ref
-   * NESpaceToDepthLayerEx (Static block shape and paddings)
-   *
-   * @param[in] input       Tensor input info. Supported tensor rank: 4. Data types supported:
-   * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
-   * @param[in] output      Tensor output info. Data types supported: same as @p input
-   * @param[in] block_shape Block shape value
-   *
-   * @return a status
-   */
-  static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape);
-};
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_NESPACETODEPTHLAYEREX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h
index 408d150d0..24ff5dac9 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h
@@ -15,7 +15,7 @@
  */
 
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -37,16 +37,14 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-
 #ifndef __ARM_COMPUTE_NETRANSPOSECONVLAYER_H__
 #define __ARM_COMPUTE_NETRANSPOSECONVLAYER_H__
 
-#include "arm_compute/runtime/CPP/functions/CPPUpsampleEx.h"
+#include "arm_compute/runtime/CPP/functions/CPPUpsample.h"
 #include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEPermute.h"
+#include "arm_compute/runtime/NEON/functions/NEReverse.h"
 
-#include "arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
@@ -59,8 +57,8 @@ namespace arm_compute
 {
 /** Function to run the deconvolution layer.
  *
- * Transpose convolution Layer is the backward pass of Convolution Layer. First we transform the
- * input depending on the stride and pad info and then perfrom a 1x1
+ * Deconvolution Layer is the backward pass of Convolution Layer. First we transform the input
+ * depending on the stride and pad info and then perfrom a 1x1
  * convolution pass. Input stride defines how many zeroes we should put between each element of the
  * input, pad is the amount of padding and finaly a is a user
  * specified value where a < stride - 1 that increases the padding top and right of the input image.
@@ -81,21 +79,22 @@ namespace arm_compute
  *      kernel_x and kernel_y are the convolution sizes in x and y.
  *      stride_x and stride_y is the input stride of the first and second dimension.
  *
- * The weights used by Transpose convolution are supposed to be the same as the ones used for
- * Convolution. Therefore, it will be necessary to use the weights in the
- * reverse order to perform an actual convolution. This is achieved by using the @ref
- * CPPFlipWeightsKernel.
+ * The weights used by Deconvolution are supposed to be the same as the ones used for Convolution.
+ * Therefore, it will be necessary to use the weights in the
+ * reverse order to perform an actual convolution. This is achieved by using @ref NEReverse.
  *
  * This function calls the following NEON kernels/functions:
  *
- * -# @ref CPPUpsample
+ * -# @ref CPPUpsampleEx
  * -# @ref NEConvolutionLayer
+ * -# @ref NEPermute
+ * -# @ref NEReverse
  *
  */
 class NETransposeConvLayer : public IFunction
 {
 public:
-  /** Default constructor */
+  /** Constructor */
   NETransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
 
   /** Prevent instances of this class from being copied (As this class contains pointers) */
@@ -112,37 +111,38 @@ public:
   /** Set the input, weights, biases and output tensors.
    *
    * @param[in,out] input   Input tensor. 3 lower dimensions represent a single input, and an
-   * optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8.
+ * optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED.
    * @param[in]     weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type
-   * supported: Same as @p input.
+ * supported: Same as @p input.
    * @param[in]     bias    Optional, ignored if NULL. The biases have one dimension. Data type
-   * supported: Data types supported: S32 for QASYMM8 input, F32 for F32 input, F16 for F16 input.
+ * supported: Data types supported: S32 for QASYMM8 and QASYMM8_SIGNED input, F32 for F32 input, F16
+ * for F16 input.
    * @param[out]    output  Output tensor. The output has the same number of dimensions as the @p
-   * input.
+ * input.
    * @param[in]     info    Contains padding and policies to be used in the deconvolution, this is
-   * decribed in @ref PadStrideInfo.
-   * @param[in]     invalid_right  The number of zeros added to right edge of the output.
-   * @param[in]     invalid_bottom The number of zeros added to top edge of the output.
+ * decribed in @ref PadStrideInfo.
+ * @param[in]     invalid_right  The number of zeros added to right edge of the output.
+ * @param[in]     invalid_bottom The number of zeros added to bottom edge of the output.
    *
    */
   void configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output,
                  const PadStrideInfo &info, unsigned int invalid_right,
                  unsigned int invalid_bottom);
   /** Static function to check if given info will lead to a valid configuration of @ref
-   * NETransposeConvLayer
+ * NETransposeConvLayer
    *
    * @param[in] input   Input tensor info. 3 lower dimensions represent a single input, and an
-   * optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8.
+ * optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED.
    * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data type
-   * supported: Same as @p input.
+ * supported: Same as @p input.
    * @param[in] bias    (Optional) The biases have one dimension. Data type supported: Data types
-   * supported: S32 for QASYMM8 input, F32 for F32 input, F16 for F16 input.
+ * supported: S32 for QASYMM8 and QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input.
    * @param[in] output  Output tensor info. The output has the same number of dimensions as the @p
-   * input.
+ * input.
    * @param[in] info    Contains padding and policies to be used in the deconvolution, this is
-   * decribed in @ref PadStrideInfo.
-   * @param[in] innvalid_right  The number of zeros added to right edge of the output.
-   * @param[in] invalid_bottom  The number of zeros added to top edge of the output.
+ * decribed in @ref PadStrideInfo.
+ * @param[in] innvalid_right  The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom  The number of zeros added to bottom edge of the output.
    *
    * @return a status
    */
@@ -158,17 +158,11 @@ public:
 private:
   MemoryGroup _memory_group;
   NEConvolutionLayer _conv_f;
-  CPPUpsampleEx _upsample_f;
-  CPPFlipWeightsKernel _flip_weights;
-  NEPermute _permute_input;
-  NEPermute _permute_weights;
-  NEPermute _permute_output;
+  CPPUpsample _upsample_f;
+  NEReverse _flip_weights;
   Tensor _scaled_output;
   Tensor _weights_flipped;
-  Tensor _permuted_input;
-  Tensor _permuted_weights;
-  Tensor _permuted_output;
-  bool _is_nchw;
+  Tensor _flip_axis;
   const ITensor *_original_weights;
   ITensor *_input;
   PadStrideInfo _info;