summaryrefslogtreecommitdiff
path: root/compute/ARMComputeEx/arm_compute/runtime
diff options
context:
space:
mode:
authorChunseok Lee <chunseok.lee@samsung.com>2020-12-14 14:43:04 +0900
committerChunseok Lee <chunseok.lee@samsung.com>2020-12-14 14:43:04 +0900
commit12d88feea8573f8490629cf62fc342b152e57d65 (patch)
tree3c734cc4d629834d2d523f4575ef84cd64684e57 /compute/ARMComputeEx/arm_compute/runtime
parentd6b371e095d737922187a518b8faba1ef6f3a2b1 (diff)
downloadnnfw-12d88feea8573f8490629cf62fc342b152e57d65.tar.gz
nnfw-12d88feea8573f8490629cf62fc342b152e57d65.tar.bz2
nnfw-12d88feea8573f8490629cf62fc342b152e57d65.zip
Imported Upstream version 1.11.0upstream/1.11.0
Diffstat (limited to 'compute/ARMComputeEx/arm_compute/runtime')
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h34
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgMinMaxLayerEx.h108
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h65
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLCastBool.h71
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h201
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h77
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h186
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h235
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h97
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGatherEx.h85
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h82
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayerEx.h96
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLNeg.h63
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLOneHot.h120
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h120
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLSplitVEx.h86
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h164
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h150
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h31
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h114
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NECastBool.h76
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h88
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h180
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedLayerEx.h164
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h98
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEGatherEx.h84
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEHashtableLookup.h100
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayerEx.h116
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEOneHot.h90
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceOperation.h102
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceSum.h98
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h172
32 files changed, 3553 insertions, 0 deletions
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h
new file mode 100644
index 000000000..484ebfd0b
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_CLFUNCTIONSEX_H__
+#define __ARM_COMPUTE_CLFUNCTIONSEX_H__
+
+#include <arm_compute/runtime/CL/functions/CLArgMinMaxLayerEx.h>
+#include <arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h>
+#include <arm_compute/runtime/CL/functions/CLCastBool.h>
+#include <arm_compute/runtime/CL/functions/CLEmbeddingLookup.h>
+#include <arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h>
+#include <arm_compute/runtime/CL/functions/CLGatherEx.h>
+#include <arm_compute/runtime/CL/functions/CLHashtableLookup.h>
+#include <arm_compute/runtime/CL/functions/CLInstanceNormalizationLayerEx.h>
+#include <arm_compute/runtime/CL/functions/CLNeg.h>
+#include <arm_compute/runtime/CL/functions/CLOneHot.h>
+#include <arm_compute/runtime/CL/functions/CLReduceOperation.h>
+#include <arm_compute/runtime/CL/functions/CLSplitVEx.h>
+#include <arm_compute/runtime/CL/functions/CLTopKV2.h>
+#include <arm_compute/runtime/CL/functions/CLTransposeConvLayer.h>
+
+#endif // __ARM_COMPUTE_CLFUNCTIONSEX_H__
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgMinMaxLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgMinMaxLayerEx.h
new file mode 100644
index 000000000..b1ee52bf9
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgMinMaxLayerEx.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2018-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLARGMINMAXLAYEREX_H__
+#define __ARM_COMPUTE_CLARGMINMAXLAYEREX_H__
+
+#include "arm_compute/core/CL/kernels/CLArgMinMaxLayerKernelEx.h"
+#include "arm_compute/core/CL/kernels/CLReshapeLayerKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IMemoryManager.h"
+#include "arm_compute/runtime/MemoryGroup.h"
+
+namespace arm_compute
+{
+class ITensorInfo;
+class ICLTensor;
+
+/** Function to calculate the index of the minimum or maximum values in a
+ * tensor based on an axis.
+ *
+ * @note The default data type for an uninitialized output tensor is
+ * signed 32-bit integer (S32). It is the user's responsibility to check
+ * that the results do not overflow because the indices are computed
+ * in unsigned 32-bit (U32).
+ */
+class CLArgMinMaxLayerEx : public IFunction
+{
+public:
+ /** Default Constructor.
+ *
+ * @param[in] memory_manager (Optional) Memory manager.
+ */
+ CLArgMinMaxLayerEx(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Input source tensor. Data types supported: QASYMM8/F16/F32.
+ * @param[in] axis Axis to find max/min index.
+ * @param[out] output Output source tensor. Data types supported: U32/S32.
+ * @param[in] op Reduction operation to perform. Operations supported: ARG_IDX_MAX,
+ * ARG_IDX_MIN
+ */
+ void configure(const ICLTensor *input, int axis, ICLTensor *output, const ReductionOperation &op);
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * CLArgMinMaxLayerEx
+ *
+ * @param[in] input Input source tensor info. Data types supported: QASYMM8/F16/F32.
+ * @param[in] axis Axis to find max/min index.
+ * @param[in] output Output source tensor info. Data types supported: U32/S32.
+ * @param[in] op Reduction operation to perform. Operations supported: ARG_IDX_MAX,
+ * ARG_IDX_MIN
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, int axis, const ITensorInfo *output,
+ const ReductionOperation &op);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ MemoryGroup _memory_group;
+ std::vector<CLTensor> _results_vector;
+ CLTensor _not_reshaped_output;
+ std::vector<CLArgMinMaxLayerKernelEx> _reduction_kernels_vector;
+ CLReshapeLayerKernel _reshape_kernel;
+ unsigned int _num_of_stages;
+ unsigned int _reduction_axis;
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_CLARGMINMAXLAYEREX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h
new file mode 100644
index 000000000..88a9b00ec
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_CLBINARYLOGICALOP_H__
+#define __ARM_COMPUTE_CLBINARYLOGICALOP_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+#include "arm_compute/core/TypesEx.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+class CLBinaryLogicalOp : public ICLSimpleFunction
+{
+public:
+ /** Initialise the function's source and destination.
+ *
+ * @param[in] input1 Source tensor1. Data types supported: U8, QASYMM8.
+ * @param[in] input2 Source tensor2. Data types supported: U8 QASYMM8.
+ * @param[out] output Output tensor. Data types supported: U8, QASYMM8.
+ */
+ void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output,
+ BinaryLogicalOperation op);
+};
+
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLBINARYLOGICALOP_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLCastBool.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLCastBool.h
new file mode 100644
index 000000000..d6150684a
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLCastBool.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/**
+ * @file CLCastBool.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file contains arm_compute::CLCastBool class
+ */
+
+#ifndef ARM_COMPUTE_CLCASTBOOL_H
+#define ARM_COMPUTE_CLCASTBOOL_H
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/**
+ * @brief Class to run @ref CLCastBoolKernel.
+ * This converts the boolean input tensor to the output tensor's type.
+ */
+class CLCastBool : public ICLSimpleFunction
+{
+public:
+ /**
+ * @brief Initialise the kernel's input and output
+ * @param[in] input Input tensor. Data types supported: U8
+ * @param[out] output Output tensor. Data types supported: U8/S8/U16/S16/U32/F16/F32.
+ */
+ void configure(ICLTensor *input, ICLTensor *output);
+};
+}
+#endif /* ARM_COMPUTE_CLCASTBOOL_H */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h
new file mode 100644
index 000000000..409eaf593
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2019-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLDIRECTTRANSPOSECONVLAYER_H__
+#define __ARM_COMPUTE_CLDIRECTTRANSPOSECONVLAYER_H__
+
+#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h"
+#include "arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h"
+#include "arm_compute/runtime/CL/functions/CLReverse.h"
+#include "arm_compute/runtime/CL/functions/CLTranspose.h"
+
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IMemoryManager.h"
+#include "arm_compute/runtime/MemoryGroup.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+class ICLTensor;
+/** Function to run the deconvolution layer.
+ *
+ * Deconvolution Layer is the backward pass of Convolution Layer. First we transform the input
+ * depending on the stride and pad info and then perform a 1x1
+ * convolution pass. Input stride defines how many zeroes we should put between each element of the
+ * input and pad is the amount of padding.
+ *
+ * The relation between input to output is as follows:
+ * \f[
+ * width\_output = (width\_input - 1) \cdot stride\_x - 2 \cdot padding\_x + kernel\_x
+ * \f]
+ * \f[
+ * height\_output = (height\_input - 1) \cdot stride\_y - 2 \cdot padding\_y + kernel\_y
+ * \f]
+ *
+ * where:
+ * width_input is the size of the first input dimension.
+ * height_input is the size of the second input dimension.
+ * width_output is the size of the first output dimension.
+ * height_output is the size of the second output dimension.
+ * kernel_x and kernel_y are the convolution sizes in x and y.
+ * stride_x and stride_y is the input stride of the first and second dimension.
+ *
+ * The weights used by Deconvolution are supposed to be the same as the ones used for Convolution.
+ * Therefore, it will be necessary to use the weights in the
+ * reverse order to perform an actual convolution. This is achieved by using @ref CLReverse.
+ *
+ * This function calls the following OpenCL kernels/functions:
+ *
+ * -# @ref CLDeconvolutionLayerUpsample
+ * -# @ref CLConvolutionLayer
+ *
+ * And the following CPP kernels:
+ * -# @ref CLReverse
+ *
+ */
+class CLDirectTransposeConvLayer : public IFunction
+{
+public:
+ /** Constructor */
+ CLDirectTransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLDirectTransposeConvLayer(const CLDirectTransposeConvLayer &) = delete;
+ /** Default move constructor */
+ CLDirectTransposeConvLayer(CLDirectTransposeConvLayer &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLDirectTransposeConvLayer &operator=(const CLDirectTransposeConvLayer &) = delete;
+ /** Default move assignment operator */
+ CLDirectTransposeConvLayer &operator=(CLDirectTransposeConvLayer &&) = default;
+ /** Set the input, weights, biases and output tensors.
+ *
+ * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an
+ * optional 4th dimension for batch of inputs.
+ * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+ * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type
+ * supported: Same as @p input.
+ * @param[in] bias (Optional) The biases have one dimension.
+ * Data type supported: Should match @p input data type, except for
+ * input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type
+ * @param[out] output Output tensor. The output has the same number of dimensions as the
+ * @p input.
+ * @param[in] info Contains padding and policies to be used in the deconvolution, this
+ * is decribed in @ref PadStrideInfo.
+ * @param[in] invalid_right The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
+ * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer,
+ * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel.
+ *
+ */
+ void configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output,
+ const PadStrideInfo &info, unsigned int invalid_right, unsigned int invalid_bottom,
+ const WeightsInfo &weights_info = WeightsInfo());
+ /** Set the input, weights, biases and output tensors.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and
+ * an optional 4th dimension for batch of inputs.
+ * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+ * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data
+ * type supported: Same as @p input.
+ * @param[in] bias (Optional) The biases have one dimension.
+ * Data type supported: Should match @p input data type, except for
+ * input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type
+ * @param[out] output Output tensor. The output has the same number of dimensions as
+ * the @p input.
+ * @param[in] info Contains padding and policies to be used in the deconvolution,
+ * this is decribed in @ref PadStrideInfo.
+ * @param[in] invalid_right The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
+ * @param[in] weights_info (Optional) Weights information needed for @ref
+ * CLConvolutionLayer, specifies if the weights tensor has been reshaped with @ref
+ * CLWeightsReshapeKernel.
+ *
+ */
+ void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *weights,
+ const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &info,
+ unsigned int invalid_right, unsigned int invalid_bottom,
+ const WeightsInfo &weights_info = WeightsInfo());
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * CLDirectTransposeConvLayer
+ *
+ * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an
+ * optional 4th dimension for batch of inputs.
+ * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+ * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data
+ * type supported: Same as @p input.
+ * @param[in] bias (Optional) The biases have one dimension.
+ * Data type supported: Should match @p input data type, except for input
+ * of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type
+ * @param[in] output Output tensor info. The output has the same number of dimensions as the
+ * @p input.
+ * @param[in] info Contains padding and policies to be used in the deconvolution, this is
+ * decribed in @ref PadStrideInfo.
+ * @param[in] invalid_right The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
+ * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer,
+ * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *weights,
+ const ITensorInfo *bias, ITensorInfo *output, const PadStrideInfo &info,
+ unsigned int invalid_right, unsigned int invalid_bottom,
+ const WeightsInfo &weights_info = WeightsInfo());
+
+ // Inherited methods overridden:
+ void run() override;
+ void prepare() override;
+
+private:
+ MemoryGroup _memory_group;
+ CLDeconvolutionLayerUpsample _scale_f;
+ CLConvolutionLayer _conv_f;
+ CLReverse _flip_weights;
+
+ CLTensor _scaled_output;
+ ICLTensor *_original_weights;
+ CLTensor _weights_flipped;
+ CLTensor _flip_axis;
+
+ bool _is_prepared;
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_CLDIRECTTRANSPOSECONVLAYER_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h
new file mode 100644
index 000000000..fbee7e40e
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/**
+ * @file CLEmbeddingLookup.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file contains arm_compute::CLEmbeddingLookup class
+ */
+
+#ifndef __ARM_COMPUTE_CLEMBEDDINGLOOKUP_H__
+#define __ARM_COMPUTE_CLEMBEDDINGLOOKUP_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+#include <vector>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/**
+ * @brief Class to perform EmbeddingLookup operation
+ */
+class CLEmbeddingLookup : public ICLSimpleFunction
+{
+public:
+ /**
+ * @brief Set the input and output tensors.
+ * @param[in] input Source tensor.
+ * Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+ * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p
+ * input.
+ * @param[in] lookups Lookups 1D tensor that values are indices into the first dimension of
+ * input.
+ * @return N/A
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *lookups);
+};
+}
+#endif /*__ARM_COMPUTE_CLEMBEDDINGLOOKUP_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h
new file mode 100644
index 000000000..f3266f688
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_CLFULLYCONNECTEDHYBRIDLAYER_H__
+#define __ARM_COMPUTE_CLFULLYCONNECTEDHYBRIDLAYER_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+#include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h"
+#include "arm_compute/core/CL/kernels/CLMultiplyScaleFactorKernel.h"
+#include "arm_compute/core/CL/kernels/CLQuantizationSymmetricKernel.h"
+#include "arm_compute/core/CL/kernels/CLScaleFactorSymm8Kernel.h"
+#include "arm_compute/core/CL/kernels/CLTransposeKernel.h"
+#include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h"
+
+namespace arm_compute
+{
+/** Basic function to reshape the weights of Fully Connected layer with OpenCL. This function calls
+ * the following kernels:
+ *
+ * -# @ref CLTransposeKernel
+ *
+ * @note The fully connected layer accepts "weights" tensors only with 2 dimensions.
+ */
+class CLFullyConnectedHybridLayerReshapeWeights : public ICLSimpleFunction
+{
+public:
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported:
+ * S8.
+ * @param[out] output Destination tensor which stores the transposed input tensor. Data type
+ * supported: Same as @p input.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * CLFullyConnectedHybridLayerReshapeWeights
+ *
+ * @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported:
+ * S8.
+ * @param[in] output Destination tensor which stores the transposed input tensor. Data type
+ * supported: Same as @p input.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+};
+
+/** Basic function to compute a Fully Connected layer on OpenCL. This function calls the following
+ * OpenCL kernels:
+ *
+ * -# @ref CLIm2ColKernel (called when the input comes from a convolutional layer)
+ * -# @ref CLFullyConnectedHybridLayerReshapeWeights (if @p are_weights_reshaped is set to false
+ * and transpose_weights is set to true ) (called once)
+ * -# @ref CLGEMMLowpMatrixMultiplyCore (if quantized symmetric)
+ * -# @ref CLGEMMMatrixAccumulateBiasesKernel (if @p biases is not equal to nullptr)
+ *
+ * @note The fully connected layer accepts "weights" tensors only with 2 dimensions.
+ */
+class CLFullyConnectedHybridLayer : public IFunction
+{
+public:
+ /** Constructor */
+ CLFullyConnectedHybridLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLFullyConnectedHybridLayer(const CLFullyConnectedHybridLayer &) = delete;
+ /** Default move constructor */
+ CLFullyConnectedHybridLayer(CLFullyConnectedHybridLayer &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLFullyConnectedHybridLayer &operator=(const CLFullyConnectedHybridLayer &) = delete;
+ /** Default move assignment operator */
+ CLFullyConnectedHybridLayer &operator=(CLFullyConnectedHybridLayer &&) = default;
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Source tensor. Data type supported: F16/F32.
+ * @param[in] weights Weights tensor. The weights must be 2 dimensional.
+ * If this function is called after a Convolution Layer, the (transposed)
+ * weights will have as many rows as the product of the first 3 input's dimensions.
+ * If it is called after another FullyConnected Layer, the (transposed)
+ * weights will have as many rows as the input's first dimension.
+ * Data type supported: S8.
+ * @param[in] biases Bias tensor. Can be nullptr. Data type supported:Same as @p input.
+ * @param[out] output Destination tensor. Its shape should be equal to the output of a matrix
+ * multiplication between:
+ * - The output of im2col on the input and the (transposed) 2D weights, if the
+ * function is called after a Convolution Layer
+ * - The input tensor and the (transposed) 2D weights, if the function is
+ * called after another FullyConnected Layer.
+ * Data type supported: Same as @p input.
+ * @param[in] fc_info (Optional) Fully connected layer additional info
+ */
+ void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases,
+ ICLTensor *output, FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo());
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * CLFullyConnectedHybridLayer
+ *
+ * @param[in] input Source tensor info. Data type supported: F16/F32.
+ * @param[in] weights Weights tensor info. The weights must be 2 dimensional.
+ * If this function is called after a Convolution Layer, the (transposed)
+ * weights will have as many rows as the product of the first 3 input's dimensions.
+ * If it is called after another FullyConnected Layer, the (transposed)
+ * weights will have as many rows as the input's first dimension.
+ * Data type supported: S8.
+ * @param[in] biases Bias tensor info. Can be nullptr. Data type supported:Same as @p input.
+ * @param[out] output Destination tensor info. Its shape should be equal to the output of a
+ * matrix multiplication between:
+ * - The output of im2col on the input and the (transposed) 2D weights, if the
+ * function is called after a Convolution Layer
+ * - The input tensor and the (transposed) 2D weights, if the function is
+ * called after another FullyConnected Layer.
+ * Data type supported: Same as @p input.
+ * @param[in] fc_info (Optional) Fully connected layer additional info
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *weights,
+ const ITensorInfo *biases, const ITensorInfo *output,
+ FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo());
+
+ // Inherited methods override
+ void run() override;
+ void prepare() override;
+
+private:
+ void configure_mm(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output,
+ bool retain_internal_weights);
+
+ MemoryGroup _memory_group;
+ CLFullyConnectedHybridLayerReshapeWeights _reshape_weights_kernel;
+ CLScaleFactorSymm8Kernel _scale_factor_kernel;
+ CLQuantizationSymmetricKernel _quant_input_kernel;
+ CLGEMMLowpMatrixMultiplyCore _mm_gemmlowp;
+ CLMultiplyScaleFactorKernel _multiply_scale_kernel;
+ CLGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel; // TODO(COMPMID-1889): Use CLGEMM to
+ // add bias in
+ // CLFullyConnectedHybridLayer
+ CLTensor _reshape_weights_output;
+ CLTensor _quantized_input;
+ CLTensor _scale_factor;
+ CLTensor _gemmlowp_output;
+ bool _are_weights_reshaped;
+ bool _accumulate_biases;
+ bool _is_prepared;
+ const ICLTensor *_original_weights;
+};
+}
+#endif /* __ARM_COMPUTE_CLFULLYCONNECTEDHYBRIDLAYER_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h
new file mode 100644
index 000000000..e65a646dc
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h
@@ -0,0 +1,235 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_CLFULLYCONNECTEDLAYEREX_H__
+#define __ARM_COMPUTE_CLFULLYCONNECTEDLAYEREX_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+#include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h"
+#include "arm_compute/core/CL/kernels/CLTransposeKernel.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h"
+#include "arm_compute/runtime/CL/functions/CLFlattenLayer.h"
+#include "arm_compute/runtime/CL/functions/CLGEMM.h"
+#include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h"
+#include "arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h"
+#include "arm_compute/runtime/IWeightsManager.h"
+#include "arm_compute/runtime/MemoryGroup.h"
+
+namespace arm_compute
+{
+/** Basic function to reshape the weights of Fully Connected layer with OpenCL. This function calls
+ * the following kernels:
+ *
+ * -# @ref CLTransposeKernel
+ *
+ * @note The fully connected layer accepts "weights" tensors only with 2 dimensions.
+ */
+class CLFullyConnectedLayerReshapeWeightsEx : public ICLSimpleFunction
+{
+public:
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported:
+ * QASYMM8/F16/F32.
+ * @param[out] output Destination tensor which stores the transposed input tensor. Data type
+ * supported: Same as @p input.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * CLFullyConnectedLayerReshapeWeightsEx
+ *
+ * @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported:
+ * QASYMM8/F16/F32.
+ * @param[in] output Destination tensor which stores the transposed input tensor. Data type
+ * supported: Same as @p input.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+};
+
+namespace weights_transformations
+{
+/** Basic function to manage the reshape weights generated from @ref
+ * CLFullyConnectedLayerReshapeWeightsEx */
+class CLFullyConnectedLayerReshapeWeightsExManaged : public ITransformWeights
+{
+public:
+ // Inherited method override
+ void run() override
+ {
+ _output.allocator()->allocate();
+ _func.run();
+ _reshape_run = true;
+ }
+
+ // Inherited method override
+ void release() override { _output.allocator()->free(); }
+
+ // Inherited method override
+ ICLTensor *get_weights() override { return &_output; }
+
+ // Inherited method override
+ uint32_t uid() override { return _uid; }
+
+ /** Configures the @ref CLFullyConnectedLayerReshapeWeightsEx function
+ *
+ * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32.
+ */
+ void configure(const ICLTensor *input) { _func.configure(input, &_output); }
+
+private:
+ static constexpr uint32_t _uid = 0x0;
+ CLTensor _output{};
+ CLFullyConnectedLayerReshapeWeightsEx _func{};
+};
+} // namespace weights_transformations
+
+/** Basic function to compute a Fully Connected layer on OpenCL. This function calls the following
+ * OpenCL kernels:
+ *
+ * -# @ref CLIm2ColKernel (called when the input comes from a convolutional layer)
+ * -# @ref CLFullyConnectedLayerReshapeWeightsEx (if @p are_weights_reshaped is set to false and
+ * transpose_weights is set to true ) (called once)
+ * -# @ref CLGEMMMatrixMultiplyKernel or @ref CLGEMMLowpMatrixMultiplyCore (if quantized
+ * asymmetric)
+ * -# @ref CLGEMMMatrixAccumulateBiasesKernel or @ref
+ * CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint (if quantized asymmetric) (if @p biases is
+ * not equal to nullptr)
+ *
+ * @note The fully connected layer accepts "weights" tensors only with 2 dimensions.
+ */
+class CLFullyConnectedLayerEx : public IFunction
+{
+public:
+ /** Constructor */
+ CLFullyConnectedLayerEx(std::shared_ptr<IMemoryManager> memory_manager = nullptr,
+ IWeightsManager *weights_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLFullyConnectedLayerEx(const CLFullyConnectedLayerEx &) = delete;
+ /** Default move constructor */
+ CLFullyConnectedLayerEx(CLFullyConnectedLayerEx &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLFullyConnectedLayerEx &operator=(const CLFullyConnectedLayerEx &) = delete;
+ /** Default move assignment operator */
+ CLFullyConnectedLayerEx &operator=(CLFullyConnectedLayerEx &&) = default;
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32.
+ * @param[in] weights Weights tensor. The weights must be 2 dimensional.
+ * If this function is called after a Convolution Layer, the (transposed)
+ * weights will have as many rows as the product of the first 3 input's dimensions.
+ * If it is called after another FullyConnected Layer, the (transposed)
+ * weights will have as many rows as the input's first dimension.
+ * Data type supported: Same as @p input.
+ * @param[in] biases Bias tensor. Can be nullptr. Data type supported:Same as @p input.
+ * @param[out] output Destination tensor. Its shape should be equal to the output of a matrix
+ * multiplication between:
+ * - The output of im2col on the input and the (transposed) 2D weights, if the
+ * function is called after a Convolution Layer
+ * - The input tensor and the (transposed) 2D weights, if the function is
+ * called after another FullyConnected Layer.
+ * Data type supported: Same as @p input.
+ * @param[in] fc_info (Optional) Fully connected layer additional info
+ */
+ void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases,
+ ICLTensor *output, FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo());
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * CLFullyConnectedLayerEx
+ *
+ * @param[in] input Source tensor info. Data type supported: QASYMM8/F16/F32.
+ * @param[in] weights Weights tensor info. The weights must be 2 dimensional.
+ * If this function is called after a Convolution Layer, the (transposed)
+ * weights will have as many rows as the product of the first 3 input's dimensions.
+ * If it is called after another FullyConnected Layer, the (transposed)
+ * weights will have as many rows as the input's first dimension.
+ * Data type supported: Same as @p input.
+ * @param[in] biases Bias tensor info. Can be nullptr. Data type supported:Same as @p input.
+ * @param[out] output Destination tensor info. Its shape should be equal to the output of a
+ * matrix multiplication between:
+ * - The output of im2col on the input and the (transposed) 2D weights, if the
+ * function is called after a Convolution Layer
+ * - The input tensor and the (transposed) 2D weights, if the function is
+ * called after another FullyConnected Layer.
+ * Data type supported: Same as @p input.
+ * @param[in] fc_info (Optional) Fully connected layer additional info
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *weights,
+ const ITensorInfo *biases, const ITensorInfo *output,
+ FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo());
+
+ // Inherited methods override
+ void run() override;
+ void prepare() override;
+
+private:
+ void configure_fc_fc(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias,
+ ICLTensor *output, const FullyConnectedLayerInfo &fc_info);
+ void configure_conv_fc(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias,
+ ICLTensor *output, const FullyConnectedLayerInfo &fc_info);
+ void configure_mm(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias,
+ ICLTensor *output, const FullyConnectedLayerInfo &fc_info);
+
+ MemoryGroup _memory_group;
+ IWeightsManager *_weights_manager;
+ CLConvertFullyConnectedWeights _convert_weights;
+ weights_transformations::CLConvertFullyConnectedWeightsManaged _convert_weights_managed;
+ weights_transformations::CLFullyConnectedLayerReshapeWeightsExManaged
+ _reshape_weights_managed_function;
+ CLFlattenLayer _flatten_layer;
+ CLFullyConnectedLayerReshapeWeightsEx _reshape_weights_function;
+ CLGEMM _mm_gemm;
+ CLGEMMLowpMatrixMultiplyCore _mm_gemmlowp;
+ CLTensor _flatten_output;
+ CLTensor _converted_weights_output;
+ CLTensor _reshape_weights_output;
+ bool _are_weights_converted;
+ bool _are_weights_reshaped;
+ bool _is_fc_after_conv;
+ bool _is_quantized;
+ bool _is_prepared;
+ const ICLTensor *_original_weights;
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_CLFULLYCONNECTEDLAYEREX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h
new file mode 100644
index 000000000..289ab167f
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file        CLFullyConnectedReshapingLayer.h
+ * @brief       This file contains CLFullyConnectedReshapingLayer class
+ * @ingroup     COM_AI_RUNTIME
+ */
+
+#ifndef __ARM_COMPUTE_CL_FULLY_CONNECTED_RESHAPING_LAYER_H__
+#define __ARM_COMPUTE_CL_FULLY_CONNECTED_RESHAPING_LAYER_H__
+
+#include <arm_compute/runtime/CL/CLTensor.h>
+#include <arm_compute/runtime/CL/functions/CLReshapeLayer.h>
+#include <arm_compute/runtime/IMemoryManager.h>
+
+namespace arm_compute
+{
+/**
+ * @brief Class to run FullyConnected Layer after reshaping input tensor
+ */
+class CLFullyConnectedReshapingLayer : public arm_compute::IFunction
+{
+public:
+ enum class KernelType
+ {
+ GENERAL, //< General FC
+ PREPROCESSED_WEIGHTS //< Weights are constants so it can be preprocessed
+ };
+
+public:
+ CLFullyConnectedReshapingLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr)
+ : _input(nullptr), _weights(nullptr), _biases(nullptr), _output(nullptr), _cl_buffer{},
+ _memory_manager{memory_manager}, _cl_fc{nullptr}, _cl_reshape{}, _needs_reshape(false)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Configure the layer
+ * @param[in] input The source tensor
+ * @param[in] weights The tensor that is filled with weight values
+ * @param[in] biases The tensor that is filled with biase values
+ * @param[in] output The destination tensor
+ * @param[in] needs_reshape Whether it needs to be reshaped or not
+ * @param[in] reshape The tensor shape to be reshaped. Only valid when needs_reshape is true.
+ * @return N/A
+ */
+ void configure(const arm_compute::ICLTensor *input, const arm_compute::ICLTensor *weights,
+ const arm_compute::ICLTensor *biases, arm_compute::ICLTensor *output,
+ bool needs_reshape, const arm_compute::TensorShape &reshape,
+ KernelType kernel_type);
+
+public:
+ /**
+ * @brief Run the operation. Must be called after configure().
+ * @return N/A
+ */
+ void run(void) override;
+ /**
+ * @brief Prepare the operation
+ * @return N/A
+ */
+ void prepare(void) override;
+
+private:
+ const arm_compute::ICLTensor *_input;
+ const arm_compute::ICLTensor *_weights;
+ const arm_compute::ICLTensor *_biases;
+ arm_compute::ICLTensor *_output;
+
+ // buffer for reshaping input tensor
+ arm_compute::CLTensor _cl_buffer;
+
+private:
+ std::shared_ptr<IMemoryManager> _memory_manager;
+ std::unique_ptr<arm_compute::IFunction> _cl_fc;
+ CLReshapeLayer _cl_reshape;
+ bool _needs_reshape;
+};
+} // namespace arm_compute
+
+#endif // __ARM_COMPUTE_CL_FULLY_CONNECTED_RESHAPING_LAYER_H__
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGatherEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGatherEx.h
new file mode 100644
index 000000000..b01ec4255
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGatherEx.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/**
+ * @file CLGatherEx.h
+ * @brief This file contains CLGatherEx class
+ * @ingroup COM_AI_RUNTIME
+ */
+
+#ifndef __ARM_COMPUTE_CLGATHEREX_H__
+#define __ARM_COMPUTE_CLGATHEREX_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/**
+ * @brief Class to to run @ref CLGatherKernel.
+ */
+class CLGatherEx : public ICLSimpleFunction
+{
+public:
+ /**
+ * @brief Initialise the kernel's inputs, output and convertion policy.
+ * @param[in] input An input tensor. Data types supported: U8/QASYMM8/S32/F32.
+ * @param[in] indices An indexes tensor. Data types supported: S32.
+ * @param[out] output The output tensor, Data types supported: same as @p input.
+ * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Defaults to 0
+ * @return N/A
+ */
+ void configure(const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis = 0);
+
+ /**
+ * @brief Static function to check if given info will lead to a valid configuration
+ * of @ref CLGatherEx
+ * @param[in] input An input tensor. Data types supported: U8/QASYMM8/S32/F32.
+ * @param[in] indices An indexes tensor. Data types supported: S32.
+ * @param[out] output The output tensor, Data types supported: same as @p input.
+ * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Defaults to 0
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *indices,
+ const ITensorInfo *output, int axis = 0);
+};
+}
+#endif /*__ARM_COMPUTE_CLGATHEREX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h
new file mode 100644
index 000000000..6618f5aa4
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/**
+ * @file CLHashtableLookup.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file contains arm_compute::CLHashtableLookup class
+ */
+
+#ifndef __ARM_COMPUTE_CLHASHTABLELOOKUP_H__
+#define __ARM_COMPUTE_CLHASHTABLELOOKUP_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+#include <vector>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/**
+ * @brief Class to perform HashtableLookup operation
+ */
+class CLHashtableLookup : public ICLSimpleFunction
+{
+public:
+ /**
+ * @brief Set the input and output tensors.
+ * @param[in] lookups Lookups 1D tensor that values are indices into the first dimension of
+ * input.
+ * @param[in] keys Keys 1D tensor. keys and input pair represent a map.
+ * Data types supported: S32
+ * @param[in] input Source tensor.
+ * Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+ * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p
+ * input.
+ * @param[out] hits Hits 1D tensor. A boolean tensor that indicates whether the lookup hits
+ * (True) or not (False). Data types supported: U8/QASYMM8
+ * @return N/A
+ */
+ void configure(const ICLTensor *lookups, const ICLTensor *keys, const ICLTensor *intput,
+ ICLTensor *output, ICLTensor *hits);
+};
+}
+#endif /*__ARM_COMPUTE_CLHASHTABLELOOKUP_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayerEx.h
new file mode 100644
index 000000000..887e7aaa5
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayerEx.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYEREX_H__
+#define __ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYEREX_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to perform a Instance normalization.
+ *
+ * This function runs the following kernels:
+ * -# @ref CLInstanceNormalizationLayerKernelEx
+ */
+class CLInstanceNormalizationLayerEx : public ICLSimpleFunction
+{
+public:
+ /** Default constructor */
+ CLInstanceNormalizationLayerEx();
+ /** Set the input and output tensors.
+ *
+ * @param[in, out] input Source tensor. In case of @p output tensor = nullptr this tensor will
+ * store the result of the normalization.
+ * Data types supported: F16/F32. Data layout supported: NHWC, NCHW
+ * @param[out] output Destination tensor. Data types and data layouts supported: same as @p
+ * input.
+ * @param[in] gamma (Optional) The scale tensor applied to the normalized tensor. Defaults
+ * to nullptr
+ * @param[in] beta (Optional) The offset tensor applied to the normalized tensor. Defaults
+ * to nullptr
+ * @param[in] epsilon (Optional) Lower bound value for the normalization. Defaults to 1e-12
+ */
+ void configure(ICLTensor *input, ICLTensor *output, ICLTensor *gamma = nullptr,
+ ICLTensor *beta = nullptr, float epsilon = 1e-12f);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * CLInstanceNormalizationLayerEx.
+ *
+ * @param[in] input Source tensor info. Data types supported: F16/F32. Data layout supported:
+ * NHWC, NCHW
+ * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p
+ * input.
+ * @param[in] gamma (Optional) The scale tensor applied to the normalized tensor. Defaults to
+ * nullptr
+ * @param[in] beta (Optional) The offset tensor applied to the normalized tensor. Defaults to
+ * nullptr
+ * @param[in] epsilon (Optional) Lower bound value for the normalization. Defaults to 1e-12
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output,
+ const ITensorInfo *gamma = nullptr, const ITensorInfo *beta = nullptr,
+ float epsilon = 1e-12f);
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYEREX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLNeg.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLNeg.h
new file mode 100644
index 000000000..8ec9aa307
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLNeg.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_CLNEG_H__
+#define __ARM_COMPUTE_CLNEG_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+class CLNeg : public ICLSimpleFunction
+{
+public:
+ /** Initialise the function's source and destination.
+ *
+ * @param[in] input Source tensor. Data types supported:
+ * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+ * @param[out] output Output tensor. Data types supported: Same as @p input.
+ *
+ */
+ void configure(ICLTensor *input, ICLTensor *output);
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLNEG_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLOneHot.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLOneHot.h
new file mode 100644
index 000000000..2bbfca821
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLOneHot.h
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLONEHOT_H__
+#define __ARM_COMPUTE_CLONEHOT_H__
+#include "arm_compute/core/CL/kernels/CLMemsetKernel.h"
+#include "arm_compute/core/CL/kernels/CLOneHotKernel.h"
+#include "arm_compute/runtime/IFunction.h"
+namespace arm_compute
+{
+class ICLTensor;
+/** Basic function to run @ref CLOneHotKernel */
+class CLOneHot : public IFunction
+{
+public:
+ /** Constructor */
+ CLOneHot();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLOneHot(const CLOneHot &) = delete;
+ /** Default move constructor */
+ CLOneHot(CLOneHot &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLOneHot &operator=(const CLOneHot &) = delete;
+ /** Default move assignment operator */
+ CLOneHot &operator=(CLOneHot &&) = default;
+ /** Initialise the kernel's inputs and outputs
+ *
+ * @param[in] indices Indices tensor. Supported tensor rank: up to 3. Must be one of the
+ * following types: U32/S32
+ * @param[in] on_value On value tensor. Supported tensor rank: only 1. Data type supported:
+ * U8/S8/U16/S16/F16/U32/S32/F32
+ * @param[in] off_value Off value tensor. Supported tensor rank: only 1. Data type supported:
+ * Same as @p on_value
+ * @param[out] output Destination tensor. Data type supported: Same as @p on_value
+ * @param[in] depth The depth of the one hot dimension.
+ * @param[in] axis (Optional) The axis to fill. Negative values wrap around. Defaults to -1.
+ * value must be in range [-indices.rank , indices.rank)
+ */
+ void configure(const ICLTensor *indices, const ICLTensor *on_value, const ICLTensor *off_value,
+ ICLTensor *output, int depth, int axis = -1);
+ /** Initialise the kernel's inputs and outputs with off_value being constant
+ *
+ * @param[in] indices Indices tensor. Supported tensor rank: up to 3. Must be one of the
+ * following types: U32/S32
+ * @param[in] on_value On value tensor. Supported tensor rank: only 1. Data type supported:
+ * U8/S8/U16/S16/F16/U32/S32/F32
+ * @param[out] output Destination tensor. Data type supported: Same as @p on_value
+ * @param[in] off_value The PixelValue for off value. Data type supported: Same as @p on_value
+ * @param[in] depth The depth of the one hot dimension.
+ * @param[in] axis (Optional) The axis to fill. Negative values wrap around. Defaults to -1.
+ * value must be in range [-indices.rank , indices.rank)
+ */
+ void configure(const ICLTensor *indices, const ICLTensor *on_value, ICLTensor *output,
+ PixelValue off_value, int depth, int axis = -1);
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * CLOneHotKernel
+ *
+ * @param[in] indices Indices tensor. Supported tensor rank: up to 3. Must be one of the
+ * following types: U32/S32
+ * @param[in] on_value On value tensor. Supported tensor rank: only 1. Data type supported:
+ * U8/S8/U16/S16/F16/U32/S32/F32
+ * @param[in] off_value Off value tensor. Supported tensor rank: only 1. Data type supported:
+ * Same as @p on_value
+ * @param[in] output Destination tensor. Data type supported: Same as @p on_value
+ * @param[in] depth The depth of the one hot dimension.
+ * @param[in] axis (Optional) The axis to fill. Negative values wrap around. Defaults to -1.
+ * value must be in range [-indices.rank , indices.rank)
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *indices, const ITensorInfo *on_value,
+ const ITensorInfo *off_value, const ITensorInfo *output, int depth,
+ int axis = -1);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ CLMemsetKernel _memset_kernel; /**< Memset kernel */
+ CLOneHotKernel _onehot_kernel; /**< OneHot kernel */
+ bool _has_to_memset;
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_CLONEHOT_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h
new file mode 100644
index 000000000..bb852e404
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/**
+ * @file CLReduceOperation.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file contains arm_compute::CLReduceOperation class
+ */
+
+#ifndef __ARM_COMPUTE_CLREDUCEOPERATION_H__
+#define __ARM_COMPUTE_CLREDUCEOPERATION_H__
+
+#include "arm_compute/core/CL/kernels/CLReduceOperationKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLReshapeLayer.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IMemoryManager.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/**
+ * @brief Class to perform ReduceOperation
+ */
+class CLReduceOperation : public IFunction
+{
+public:
+ /**
+ * @brief Construct a new ReduceOperation object
+ */
+ CLReduceOperation(std::shared_ptr<IMemoryManager> memory_manager);
+
+ /**
+ * @brief Set the input and output tensors.
+ * @param[in] input Source tensor. Data types supported: U8/S32/F32
+ * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p
+ * input.
+ * @param[in] axis Axis along which to reduce. It must be sorted and no duplicates.
+ * @param[in] keep_dims If positive, retains reduced dimensions with length 1.
+ * @param[in] op Reduce operation to perform.
+ * @return N/A
+ */
+ void configure(ICLTensor *input, ICLTensor *output, const std::set<uint32_t> &axis,
+ bool keep_dims, ReductionOperation op);
+
+ /**
+ * @brief Static function to check if given info will lead to a valid configuration of @ref
+ * CLReduceOperation.
+ * @param[in] input Source tensor info. Data types supported: U8/S32/F32
+ * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p
+ * input.
+ * @param[in] axis Axis along which to reduce. It must be sorted and no duplicates.
+ * @param[in] keep_dims If positive, retains reduced dimensions with length 1.
+ * @param[in] op Reduce operation to perform.
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output,
+ const std::set<uint32_t> &axis, bool keep_dims,
+ const ReductionOperation &op);
+
+ /**
+ * @brief Run the OpenCL kernel for this operation
+ * @return N/A
+ */
+ void run() override;
+
+private:
+ MemoryGroup _memory_group;
+ ICLTensor *_input;
+ ICLTensor *_output;
+ std::set<uint32_t> _axis;
+ bool _keep_dims;
+
+ std::unique_ptr<CLTensor[]> _interm_tensors{nullptr};
+ std::unique_ptr<CLReduceOperationKernel[]> _reduce_kernels{nullptr};
+ CLReshapeLayer _reshape;
+};
+}
+#endif /*__ARM_COMPUTE_CLREDUCEOPERATION_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLSplitVEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLSplitVEx.h
new file mode 100644
index 000000000..bb741d98d
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLSplitVEx.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLSPLITVEX__
+#define __ARM_COMPUTE_CLSPLITVEX__
+
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/CL/functions/CLSlice.h"
+#include "arm_compute/core/Types.h"
+#include <vector>
+#include <memory>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLSplitVKernel */
+class CLSplitVEx : public IFunction
+{
+public:
+ /** Default constructor */
+ CLSplitVEx();
+ /** Configure the split CL kernel
+ *
+ * @param[in] input The input tensor to split. Data types supported:
+ * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in] size_splits A 1-D tensor containing the number of tensor values per split
+ * @param[out] outputs A vector containing the output tensor. Data types supported: Same as @p
+ * input
+ * The output tensors should match the input tensor dimensions for all
+ * shape dimensions apart
+ * from the split dimension.
+ * @param[in] split_dim Integer value representing the input tensor dimension along which to
+ * split
+ * @param[in] num_splits Number of splits
+ */
+ void configure(const ICLTensor *input, const ICLTensor *size_splits, uint32_t split_dim,
+ const std::vector<ICLTensor *> &outputs, unsigned int num_splits);
+
+ void run() override;
+
+private:
+ const ICLTensor *_input;
+ const ICLTensor *_size_splits;
+ std::vector<ICLTensor *> _outputs;
+ unsigned int _num_splits;
+ std::vector<CLSlice> _slice_functions;
+};
+}
+#endif /* __ARM_COMPUTE_CLSPLITVEX__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h
new file mode 100644
index 000000000..e301a5152
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/**
+ * @file CLTopKV2.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file contains arm_compute::CLTopKV2 class
+ */
+#ifndef __ARM_COMPUTE_CLTOPK_V2_H__
+#define __ARM_COMPUTE_CLTOPK_V2_H__
+
+#include "arm_compute/core/CL/kernels/CLTopKV2Kernel.h"
+
+#include "arm_compute/runtime/IFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/**
+ * @brief Class to execute TopKV2 operation.
+ */
+class CLTopKV2 : public IFunction
+{
+public:
+ /**
+ * @brief Construct a new CLTopKV2 object
+ */
+ CLTopKV2();
+
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers)
+ */
+ CLTopKV2(const CLTopKV2 &) = delete;
+
+ /**
+ * @brief Prevent instances of this class from being copied (As this class contains pointers)
+ */
+ CLTopKV2 &operator=(const CLTopKV2 &) = delete;
+
+ /**
+ * @brief Construct a new CLTopKV2 object by using copy constructor
+ * @param[in] CLTopKV2 object to move
+ */
+ CLTopKV2(CLTopKV2 &&) = default;
+
+ /**
+ * @brief Assign a CLTopKV2 object.
+ * @param[in] CLTopKV2 object to assign. This object will be moved.
+ */
+ CLTopKV2 &operator=(CLTopKV2 &&) = default;
+
+ /**
+ * @brief Initialise the kernel's inputs and outputs.
+ * @param[in] input Input image. Data types supported: U8/S16/F32.
+ * @param[in] k The value of `k`.
+ * @param[out] values Top k values. Data types supported: S32 if input type is U8/S16, F32 if
+ * input type is F32.
+ * @param[out] indices Indices related to top k values. Data types supported: S32 if input type
+ * is U8/S16, F32 if input type is F32.
+ * @return N/A
+ */
+ void configure(ICLTensor *input, int k, ICLTensor *values, ICLTensor *indices,
+ int total_bits = 32, int bits = 4);
+
+ /**
+ * @brief Run the kernels contained in the function
+ * Depending on the value of the following environment variables it works differently:
+ * - If the value of environment variable "ACL_TOPKV2" == "GPU_SINGLE",
+ * quick sort on GPU is used.
+ * - If the value of environment variable "ACL_TOPKV2" == ""GPU"",
+ * radix sort on GPU is used.
+ * - For other value, TopKV2 runs on CPU
+ * @return N/A
+ */
+ void run() override;
+
+private:
+ void run_on_cpu();
+ void run_on_gpu();
+ void run_on_gpu_single_quicksort();
+
+ uint32_t _k;
+ uint32_t _total_bits;
+ uint32_t _bits;
+ uint32_t _radix;
+ uint32_t _hist_buf_size;
+ uint32_t _glob_sum_buf_size;
+ uint32_t _n;
+
+ ICLTensor *_input;
+ ICLTensor *_values;
+ ICLTensor *_indices;
+
+ cl::Buffer _qs_idx_buf;
+ cl::Buffer _qs_temp_buf;
+ cl::Buffer _hist_buf;
+ cl::Buffer _glob_sum_buf;
+ cl::Buffer _temp_buf;
+ cl::Buffer _first_negative_idx_buf;
+ cl::Buffer _in_key_buf;
+ cl::Buffer _out_key_buf;
+ cl::Buffer _in_ind_buf;
+ cl::Buffer _out_ind_buf;
+
+ cl::Buffer *_p_in_key_buf;
+ cl::Buffer *_p_out_key_buf;
+ cl::Buffer *_p_in_ind_buf;
+ cl::Buffer *_p_out_ind_buf;
+// Disable GPU implementation
+// TODO Enable GPU implementation with verification, or remove code
+// Invalid result on GPU
+#if 0
+ CLTopKV2Single _qs_kernel;
+ CLTopKV2Init _init_kernel;
+ CLRadixSortHistogram _hist_kernel;
+ CLRadixSortScanHistogram _scan_hist_kernel;
+ CLRadixSortGlobalScanHistogram _glob_scan_hist_kernel;
+ CLRadixSortPasteHistogram _paste_hist_kernel;
+ CLRadixSortReorder _reorder_kernel;
+ CLTopKV2FindFirstNegative _find_first_negative_kernel;
+ CLTopKV2ReorderNegatives _reorder_negatives_kernel;
+ CLTopKV2Store _store_kernel;
+#endif
+};
+}
+#endif // __ARM_COMPUTE_CLTOPK_V2_H__
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h
new file mode 100644
index 000000000..5fb102e47
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLTRANSPOSECONVLAYER_H__
+#define __ARM_COMPUTE_CLTRANSPOSECONVLAYER_H__
+
+#include "arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h"
+#include "arm_compute/runtime/CL/functions/CLGEMMDeconvolutionLayer.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IMemoryManager.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+/** Basic function to compute the deconvolution layer. This function calls the following OpenCL
+ * kernels/functions:
+ *
+ * -# @ref CLGEMMDeconvolutionLayer
+ * -# @ref CLDirectTransposeConvLayer
+ */
+class CLTransposeConvLayer : public IFunction
+{
+public:
+ /** Default constructor */
+ CLTransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+
+ /** Set the input, weights, biases and output tensors.
+ *
+ * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an
+ * optional 4th dimension for batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+ * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type
+ * supported: Same as @p input.
+ * @param[in] bias (Optional) The biases have one dimension. Data type supported: Same
+ * as @p input.
+ * @param[out] output Output tensor. The output has the same number of dimensions as the
+ * @p input.
+ * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this
+ * is described in @ref PadStrideInfo.
+ * @param[in] invalid_right The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
+ * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer,
+ * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel.
+ *
+ */
+ void configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output,
+ const PadStrideInfo &deconv_info, unsigned int invalid_right,
+ unsigned int invalid_bottom, const WeightsInfo &weights_info = WeightsInfo());
+ /** Set the input, weights, biases and output tensors.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and
+ * an optional 4th dimension for batch of inputs. Data types supported:
+ * QASYMM8_SIGNED/QASYMM8/F16/F32.
+ * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data
+ * type supported: Same as @p input.
+ * @param[in] bias (Optional) The biases have one dimension. Data type supported:
+ * Same as @p input.
+ * @param[out] output Output tensor. The output has the same number of dimensions as
+ * the @p input.
+ * @param[in] deconv_info Contains padding and policies to be used in the deconvolution,
+ * this is described in @ref PadStrideInfo.
+ * @param[in] invalid_right The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
+ * @param[in] weights_info (Optional) Weights information needed for @ref
+ * CLConvolutionLayer, specifies if the weights tensor has been reshaped with @ref
+ * CLWeightsReshapeKernel.
+ *
+ */
+ void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *weights,
+ const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &deconv_info,
+ unsigned int invalid_right, unsigned int invalid_bottom,
+ const WeightsInfo &weights_info = WeightsInfo());
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * CLTransposeConvLayer
+ *
+ * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an
+ * optional 4th dimension for batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+ * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data
+ * type supported: Same as @p input.
+ * @param[in] bias (Optional) The biases have one dimension. Data type supported: Same as
+ * @p input.
+ * @param[in] output Output tensor info. The output has the same number of dimensions as the
+ * @p input.
+ * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this is
+ * described in @ref PadStrideInfo.
+ * @param[in] invalid_right The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
+ * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer,
+ * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *weights,
+ const ITensorInfo *bias, ITensorInfo *output,
+ const PadStrideInfo &deconv_info, unsigned int invalid_right,
+ unsigned int invalid_bottom,
+ const WeightsInfo &weights_info = WeightsInfo());
+
+ static DeconvolutionMethod
+ get_deconvolution_method(const ITensorInfo *input, const ITensorInfo *weights,
+ const ITensorInfo *bias, ITensorInfo *output,
+ const PadStrideInfo &deconv_info, unsigned int invalid_right,
+ unsigned int invalid_bottom, const WeightsInfo &weights_info);
+ // Inherited methods overridden:
+ void run() override;
+ void prepare() override;
+
+private:
+ std::shared_ptr<IMemoryManager> _memory_manager;
+ std::unique_ptr<IFunction> _function;
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_CLTRANSPOSECONVLAYER_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h
new file mode 100644
index 000000000..efc296d6c
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ARM_COMPUTE_NEFUNCTIONSEX_H__
+#define __ARM_COMPUTE_NEFUNCTIONSEX_H__
+
+#include <arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h>
+#include <arm_compute/runtime/NEON/functions/NECastBool.h>
+#include <arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h>
+#include <arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h>
+#include <arm_compute/runtime/NEON/functions/NEGatherEx.h>
+#include <arm_compute/runtime/NEON/functions/NEHashtableLookup.h>
+#include <arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayerEx.h>
+#include <arm_compute/runtime/NEON/functions/NEOneHot.h>
+#include <arm_compute/runtime/NEON/functions/NEReduceSum.h>
+#include <arm_compute/runtime/NEON/functions/NEReduceOperation.h>
+#include <arm_compute/runtime/NEON/functions/NETransposeConvLayer.h>
+
+#endif // __ARM_COMPUTE_NEFUNCTIONSEX_H__
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h
new file mode 100644
index 000000000..026d30098
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2018-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_NEBINARYLOGICALOPERATION_H__
+#define __ARM_COMPUTE_NEBINARYLOGICALOPERATION_H__
+
+#include "arm_compute/core/TypesEx.h"
+#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Basic function to run @ref NEBinaryLogicalOperationKernel.
+ *
+ * @note The tensor data type for the inputs must be QASYMM8/U8.
+ * @note The function performs a binary logical operation between two tensors.
+ */
+class NEBinaryLogicalOperation : public INESimpleFunction
+{
+public:
+ /** Initialise the kernel's inputs, output and conversion policy.
+ *
+ * @param[in, out] input1 First tensor input. Data types supported: QASYMM8/U8.
+ * @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1.
+ * @param[out] output Output tensor. Data types supported: Same as @p input1.
+ * @param[in] op Binary Logical Operation to be performed.
+ */
+ void configure(ITensor *input1, ITensor *input2, ITensor *output, BinaryLogicalOperation op);
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * NEBinaryLogicalOperationKernel
+ *
+ * @param[in] input1 First tensor input info. Data types supported: QASYMM8/U8.
+ * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
+ * @param[in] output Output tensor info. Data types supported: Same as @p input1.
+ * @param[in] op Binary Logical Operation to be performed.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input1, const ITensorInfo *input2,
+ const ITensorInfo *output, BinaryLogicalOperation op);
+};
+
+/** Basic function to run @ref NEBinaryLogicalOperationKernel
+ *
+ * @note The tensor data type for the inputs must be QASYMM8/U8.
+ * @note The function performs a binary logical operation between two tensors.
+ */
+template <BinaryLogicalOperation op> class NEBinaryLogicalOperationStatic : public INESimpleFunction
+{
+public:
+ /** Initialise the kernel's inputs, output and conversion policy.
+ *
+ * @param[in, out] input1 First tensor input. Data types supported: QASYMM8/U8
+ * @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1.
+ * @param[out] output Output tensor. Data types supported: Same as @p input1.
+ */
+ void configure(ITensor *input1, ITensor *input2, ITensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * NEBinaryLogicalOperationKernel
+ *
+ * @param[in] input1 First tensor input info. Data types supported: QASYMM8/U8
+ * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
+ * @param[in] output Output tensor info. Data types supported: Same as @p input1.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input1, const ITensorInfo *input2,
+ const ITensorInfo *output);
+};
+
+/** Basic function to run equal comparison. */
+using NELogicalAnd = NEBinaryLogicalOperationStatic<BinaryLogicalOperation::AND>;
+/** Basic function to run not equal comparison. */
+using NELogicalOr = NEBinaryLogicalOperationStatic<BinaryLogicalOperation::OR>;
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_NEBINARYLOGICALOPERATION_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NECastBool.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NECastBool.h
new file mode 100644
index 000000000..c8b08af8d
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NECastBool.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2019-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NECASTBOOL_H__
+#define __ARM_COMPUTE_NECASTBOOL_H__
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/**
+ * @brief Class to run @ref NECastBoolKernel.
+ */
+class NECastBool : public INESimpleFunction
+{
+public:
+ /** Initialize the function's source, destination
+ *
+ * Valid conversions Input -> Output :
+ *
+ * - U8 -> U8, S8, U16, S16, U32, S32, F32, F16
+ *
+ * @param[in] input The input tensor to convert. Data types supported: U8
+ * @param[out] output The output tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
+ */
+ void configure(const ITensor *input, ITensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref NECastBool
+ *
+ * @param[in] input Source tensor info. Data types supported: U8.
+ * @param[in] output Destination tensor info. Data type supported: U8/S8/U16/S16/U32/S32/F16/F32.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_NECASTBOOL_H__*/
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h
new file mode 100644
index 000000000..63f7714aa
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/**
+ * @file NEEmbeddingLookup.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file contains arm_compute::NEEmbeddingLookup class
+ */
+
+#ifndef __ARM_COMPUTE_NEEMBEDDINGLOOKUP_H__
+#define __ARM_COMPUTE_NEEMBEDDINGLOOKUP_H__
+
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+
+#include <vector>
+
+namespace arm_compute
+{
+class ITensor;
+
+/**
+ * @brief Class to perform EmbeddingLookup operation
+ */
+class NEEmbeddingLookup : public INESimpleFunctionNoBorder
+{
+public:
+ /**
+ * @brief Set the input and output tensors.
+ * @param[in] input Source tensor.
+ * Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+ * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p
+ * input.
+ * @param[in] lookups Lookups 1D tensor that values are indices into the first dimension of
+ * input. Data types supported: S32.
+ * @return N/A
+ */
+ void configure(const ITensor *input, ITensor *output, const ITensor *lookups);
+ /** Static function to check if given info will lead to a valid configuration of @ref NECopy
+ *
+ * @param[in] input Source tensor info. Data types supported:
+ * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+ * @param[in] output Output tensor info. Data types supported: Same as @p input.
+ * @param[in] output Lookups tensor info. Data types supported: S32.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output,
+ const ITensorInfo *lookups);
+};
+}
+#endif /*__ARM_COMPUTE_NEEMBEDDINGLOOKUP_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h
new file mode 100644
index 000000000..56548a479
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h
@@ -0,0 +1,180 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_NEFULLYCONNECTEDHYBRIDLAYER_H__
+#define __ARM_COMPUTE_NEFULLYCONNECTEDHYBRIDLAYER_H__
+
+#include "arm_compute/core/NEON/kernels/NEQuantizationSymmetricKernel.h"
+#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h"
+#include "arm_compute/core/NEON/kernels/NEMuliplyScaleFactorKernel.h"
+#include "arm_compute/core/NEON/kernels/NETransposeKernel.h"
+#include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+#include "arm_compute/runtime/Tensor.h"
+
+namespace arm_compute
+{
+/** Basic function to reshape the weights of Fully Connected layer with NEON. This function calls
+ * the following kernels:
+ *
+ * -# @ref NETransposeKernel
+ *
+ * @note The fully connected layer accepts "weights" tensors only with 2 dimensions.
+ */
+class NEFullyConnectedHybridLayerReshapeWeights : public INESimpleFunctionNoBorder
+{
+public:
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported:
+ * QASYMM8/F16/F32.
+ * @param[out] output Destination tensor. Data type supported: Same as @p input.
+ */
+ void configure(const ITensor *input, ITensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * NEFullyConnectedHybridLayerReshapeWeights
+ *
+ * @param[in] input Weights tensor info. The weights must be 2 dimensional. Data types supported:
+ * QASYMM8/F16/F32.
+ * @param[in] output Destination tensor info. Data type supported: Same as @p input.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+};
+
+/** Basic function to compute a Fully Connected layer on NEON. This function calls the following
+ * NEON kernels:
+ * -# @ref NEIm2ColKernel (called when the input comes from a convolutional layer)
+ * -# @ref NEFullyConnectedHybridLayerReshapeWeights (if @p are_weights_reshaped is set to false
+ * and transpose_weights is set to true ) (called once)
+ * -# @ref NEGEMMMatrixMultiplyKernel or @ref NEGEMMLowpMatrixMultiplyCore (if quantized
+ * asymmetric)
+ * -# @ref NEGEMMMatrixAccumulateBiasesKernel or @ref
+ * NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint (if quantized asymmetric) (if @p biases is
+ * not equal to nullptr)
+ *
+ * @note The fully connected layer accepts "weights" tensors only with 2 dimensions.
+ */
+class NEFullyConnectedHybridLayer : public IFunction
+{
+public:
+ /** Constructor */
+ NEFullyConnectedHybridLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEFullyConnectedHybridLayer(const NEFullyConnectedHybridLayer &) = delete;
+ /** Default move constructor */
+ NEFullyConnectedHybridLayer(NEFullyConnectedHybridLayer &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEFullyConnectedHybridLayer &operator=(const NEFullyConnectedHybridLayer &) = delete;
+ /** Default move assignment operator */
+ NEFullyConnectedHybridLayer &operator=(NEFullyConnectedHybridLayer &&) = default;
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Source tensor. Data type supported: F16/F32.
+ * @param[in] weights Weights tensor. The weights must be 2 dimensional.
+ * If this function is called after a Convolution Layer, the (transposed)
+ * weights will have as many rows as the product of the first 3 input's dimensions.
+ * If it is called after another FullyConnected Layer, the (transposed)
+ * weights will have as many rows as the input's first dimension.
+ * Data type supported: S8.
+ * @param[in] biases Bias tensor. Can be nullptr. Data type supported:Same as @p input.
+ * @param[out] output Destination tensor. Its shape should be equal to the output of a matrix
+ * multiplication between:
+ * - The output of im2col on the input and the (transposed) 2D weights, if the
+ * function is called after a Convolution Layer
+ * - The input tensor and the (transposed) 2D weights, if the function is
+ * called after another FullyConnected Layer.
+ * Data type supported: Same as @p input.
+ * @param[in] fc_info (Optional) Fully connected layer additional info
+ */
+ void configure(const ITensor *input, const ITensor *weights, const ITensor *biases,
+ ITensor *output, FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo());
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * NEFullyConnectedHybridLayer
+ *
+ * @param[in] input Source tensor info. Data type supported: F16/F32.
+ * @param[in] weights Weights tensor info. The weights must be 2 dimensional.
+ * If this function is called after a Convolution Layer, the (transposed)
+ * weights will have as many rows as the product of the first 3 input's dimensions.
+ * If it is called after another FullyConnected Layer, the (transposed)
+ * weights will have as many rows as the input's first dimension.
+ * Data type supported: S8.
+ * @param[in] biases Bias tensor info. Can be nullptr. Data type supported:Same as @p input.
+ * @param[out] output Destination tensor info. Its shape should be equal to the output of a
+ * matrix multiplication between:
+ * - The output of im2col on the input and the (transposed) 2D weights, if the
+ * function is called after a Convolution Layer
+ * - The input tensor and the (transposed) 2D weights, if the function is
+ * called after another FullyConnected Layer.
+ * Data type supported: Same as @p input.
+ * @param[in] fc_info (Optional) Fully connected layer additional info
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *weights,
+ const ITensorInfo *biases, const ITensorInfo *output,
+ FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo());
+
+ // Inherited methods override
+ void run() override;
+ void prepare() override;
+
+private:
+ void configure_mm(const ITensor *input, const ITensor *weights, ITensor *output);
+
+ MemoryGroup _memory_group;
+ NEFullyConnectedHybridLayerReshapeWeights _reshape_weights_function;
+ NEQuantizationSymmetricKernel _quant_input_kernel;
+ NEGEMMLowpMatrixMultiplyCore _mm_gemmlowp;
+ NEMultiplyScaleFactorKernel _multiply_scale_kernel;
+ NEGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel;
+ Tensor _reshape_weights_output;
+ Tensor _quantized_input;
+ Tensor _scale_factor;
+ Tensor _gemmlowp_output;
+ const ITensor *_original_weights;
+ bool _are_weights_reshaped;
+ bool _accumulate_biases;
+ bool _is_prepared;
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_NEFULLYCONNECTEDHYBRIDLAYER_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedLayerEx.h
new file mode 100644
index 000000000..8f98f220a
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedLayerEx.h
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_NEFULLYCONNECTEDLAYEREX_H__
+#define __ARM_COMPUTE_NEFULLYCONNECTEDLAYEREX_H__
+
+#include "arm_compute/runtime/IFunction.h"
+
+#include "arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h"
+#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h"
+#include "arm_compute/core/NEON/kernels/NETransposeKernel.h"
+#include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h"
+#include "arm_compute/runtime/NEON/functions/NEGEMM.h"
+#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h"
+#include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h"
+#include "arm_compute/runtime/Tensor.h"
+
+namespace arm_compute
+{
+/** Basic function to compute a Fully Connected layer on NEON. This function calls the following
+ * NEON kernels:
+ * -# @ref NEIm2ColKernel (called when the input comes from a convolutional layer)
+ * -# @ref NEFullyConnectedLayerReshapeWeights (if @p are_weights_reshaped is set to false and
+ * transpose_weights is set to true ) (called once)
+ * -# @ref NEGEMMMatrixMultiplyKernel or @ref NEGEMMLowpMatrixMultiplyCore (if quantized
+ * asymmetric)
+ * -# @ref NEGEMMMatrixAccumulateBiasesKernel or @ref
+ * NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint (if quantized asymmetric) (if @p biases is
+ * not equal to nullptr)
+ *
+ * @note The fully connected layer accepts "weights" tensors only with 2 dimensions.
+ * @note The difference from NEFullyConnectedLayer is that this class supports weights as input
+ * with performance loss.
+ */
+class NEFullyConnectedLayerEx : public IFunction
+{
+public:
+ /** Constructor */
+ NEFullyConnectedLayerEx(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEFullyConnectedLayerEx(const NEFullyConnectedLayerEx &) = delete;
+ /** Default move constructor */
+ NEFullyConnectedLayerEx(NEFullyConnectedLayerEx &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEFullyConnectedLayerEx &operator=(const NEFullyConnectedLayerEx &) = delete;
+ /** Default move assignment operator */
+ NEFullyConnectedLayerEx &operator=(NEFullyConnectedLayerEx &&) = default;
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32.
+ * @param[in] weights Weights tensor. The weights must be 2 dimensional.
+ * If this function is called after a Convolution Layer, the (transposed)
+ * weights will have as many rows as the product of the first 3 input's dimensions.
+ * If it is called after another FullyConnected Layer, the (transposed)
+ * weights will have as many rows as the input's first dimension.
+ * Data type supported: Same as @p input.
+ * @param[in] biases Bias tensor. Can be nullptr. Data type supported:Same as @p input.
+ * @param[out] output Destination tensor. Its shape should be equal to the output of a matrix
+ * multiplication between:
+ * - The output of im2col on the input and the (transposed) 2D weights, if the
+ * function is called after a Convolution Layer
+ * - The input tensor and the (transposed) 2D weights, if the function is
+ * called after another FullyConnected Layer.
+ * Data type supported: Same as @p input.
+ * @param[in] fc_info (Optional) Fully connected layer additional info
+ */
+ void configure(const ITensor *input, const ITensor *weights, const ITensor *biases,
+ ITensor *output, FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo());
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * NEFullyConnectedLayerEx
+ *
+ * @param[in] input Source tensor info. Data type supported: QASYMM8/F16/F32.
+ * @param[in] weights Weights tensor info. The weights must be 2 dimensional.
+ * If this function is called after a Convolution Layer, the (transposed)
+ * weights will have as many rows as the product of the first 3 input's dimensions.
+ * If it is called after another FullyConnected Layer, the (transposed)
+ * weights will have as many rows as the input's first dimension.
+ * Data type supported: Same as @p input.
+ * @param[in] biases Bias tensor info. Can be nullptr. Data type supported:Same as @p input.
+ * @param[out] output Destination tensor info. Its shape should be equal to the output of a
+ * matrix multiplication between:
+ * - The output of im2col on the input and the (transposed) 2D weights, if the
+ * function is called after a Convolution Layer
+ * - The input tensor and the (transposed) 2D weights, if the function is
+ * called after another FullyConnected Layer.
+ * Data type supported: Same as @p input.
+ * @param[in] fc_info (Optional) Fully connected layer additional info
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *weights,
+ const ITensorInfo *biases, const ITensorInfo *output,
+ FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo());
+
+ // Inherited methods override
+ void run() override;
+ void prepare() override;
+
+private:
+ void configure_fc_fc(const ITensor *input, const ITensor *weights, ITensor *output);
+ void configure_conv_fc(const ITensor *input, const ITensor *weights, ITensor *output);
+ void configure_mm(const ITensor *input, const ITensor *weights, ITensor *output);
+
+ MemoryGroup _memory_group;
+ NEFlattenLayerKernel _flatten_kernel;
+ NEConvertFullyConnectedWeights _convert_weights;
+ NEFullyConnectedLayerReshapeWeights _reshape_weights_function;
+ NEGEMM _mm_gemm;
+ NEGEMMLowpMatrixMultiplyCore _mm_gemmlowp;
+ NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint _gemmlowp_output_stage;
+ NEGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel;
+ Tensor _flatten_output;
+ Tensor _gemmlowp_output;
+ Tensor _converted_weights_output;
+ Tensor _reshape_weights_output;
+ const ITensor *_original_weights;
+ bool _are_weights_converted;
+ bool _are_weights_reshaped;
+ bool _is_fc_after_conv;
+ bool _accumulate_biases;
+ bool _is_quantized;
+ bool _is_prepared;
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_NEFULLYCONNECTEDLAYEREX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h
new file mode 100644
index 000000000..18cb61bf9
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file        NEFullyConnectedReshapingLayer.h
+ * @brief       This file contains NEFullyConnectedReshapingLayer class
+ * @ingroup     COM_AI_RUNTIME
+ */
+
+#ifndef __ARM_COMPUTE_NE_FULLY_CONNECTED_RESHAPING_LAYER_H__
+#define __ARM_COMPUTE_NE_FULLY_CONNECTED_RESHAPING_LAYER_H__
+
+#include <arm_compute/runtime/NEON/functions/NEReshapeLayer.h>
+#include <arm_compute/runtime/IMemoryManager.h>
+#include <arm_compute/runtime/Tensor.h>
+
+namespace arm_compute
+{
+/**
+ * @brief Class to run FullyConnected Layer after reshaping input tensor
+ */
+class NEFullyConnectedReshapingLayer : public arm_compute::IFunction
+{
+public:
+ enum class KernelType
+ {
+ GENERAL, //< General FC
+ PREPROCESSED_WEIGHTS //< Weights are constants so it can be preprocessed
+ };
+
+public:
+ NEFullyConnectedReshapingLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr)
+ : _memory_manager{memory_manager}, _input(nullptr), _weights(nullptr), _biases(nullptr),
+ _output(nullptr), _neon_buffer{}, _neon_fc{nullptr}, _neon_reshape{}, _needs_reshape(false)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Configure the layer
+ * @param[in] input The source tensor
+ * @param[in] weights The tensor that is filled with weight values
+ * @param[in] biases The tensor that is filled with biase values
+ * @param[in] output The destination tensor
+ * @param[in] needs_reshape Whether it needs to be reshaped or not
+ * @param[in] reshape The tensor shape to be reshaped. Only valid when needs_reshape is true.
+ * @param[in] kernel_type The kernel type for actual FullyConnected layer
+ * @return N/A
+ */
+ void configure(const arm_compute::ITensor *input, const arm_compute::ITensor *weights,
+ const arm_compute::ITensor *biases, arm_compute::ITensor *output,
+ bool needs_reshape, const arm_compute::TensorShape &reshape,
+ KernelType kernel_type);
+
+public:
+ /**
+ * @brief Run the operation. Must be called after configure().
+ * @return N/A
+ */
+ void run(void) override;
+ /**
+ * @brief Prepare the operation
+ * @return N/A
+ */
+ void prepare(void) override;
+
+private:
+ std::shared_ptr<IMemoryManager> _memory_manager;
+ const arm_compute::ITensor *_input;
+ const arm_compute::ITensor *_weights;
+ const arm_compute::ITensor *_biases;
+ arm_compute::ITensor *_output;
+
+ // buffer for reshaping input tensor
+ arm_compute::Tensor _neon_buffer;
+
+private:
+ std::unique_ptr<arm_compute::IFunction> _neon_fc;
+ NEReshapeLayer _neon_reshape;
+ bool _needs_reshape;
+};
+} // namespace arm_compute
+
+#endif // __ARM_COMPUTE_NE_FULLY_CONNECTED_RESHAPING_LAYER_H__
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEGatherEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEGatherEx.h
new file mode 100644
index 000000000..155a1b837
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEGatherEx.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_NEGATHEREX_H__
+#define __ARM_COMPUTE_NEGATHEREX_H__
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Basic function to run @ref NEGatherKernelEx */
+class NEGatherEx : public INESimpleFunctionNoBorder
+{
+public:
+ /** Initialise the kernel's inputs and outputs
+ *
+ * @param[in] input Source tensor. Supported tensor rank: up to 4. Data type supported:
+ * U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+ * @param[in] indices Indices tensor. Supported tensor rank: up to 3. Must be one of the
+ * following type: U32/S32. Each value Must be in range [0, input.shape[@p axis])
+ * @param[out] output Destination tensor. Data type supported: Same as @p input
+ * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Defaults to 0
+ */
+ void configure(const ITensor *input, const ITensor *indices, ITensor *output, int axis = 0);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * NEGatherKernelEx
+ *
+ * @param[in] input Source tensor info. Supported tensor rank: up to 4. Data type supported:
+ * U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+ * @param[in] indices Indices tensor info. Supported tensor rank: up to 3. Must be one of the
+ * following types: U32/S32. Each value Must be in range [0, input.shape[@p axis])
+ * @param[in] output Destination tensor info. Data type supported: Same as @p input
+ * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Defaults to 0
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *indices,
+ const ITensorInfo *output, int axis);
+};
+
+} // namespace arm_compute
+
+#endif /* __ARM_COMPUTE_NEGATHEREX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEHashtableLookup.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEHashtableLookup.h
new file mode 100644
index 000000000..521a05ad9
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEHashtableLookup.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/**
+ * @file NEHashtableLookup.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file contains arm_compute::NEHashtableLookup class
+ */
+
+#ifndef __ARM_COMPUTE_NEHASHTABLELOOKUP_H__
+#define __ARM_COMPUTE_NEHASHTABLELOOKUP_H__
+
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+
+#include <vector>
+
+namespace arm_compute
+{
+class ITensor;
+
+/**
+ * @brief Class to perform HashtableLookup operation
+ */
+class NEHashtableLookup : public INESimpleFunctionNoBorder
+{
+public:
+ /**
+ * @brief Set the input and output tensors.
+ * @param[in] lookups Lookups 1D tensor that values are indices into the first dimension of
+ * input. Data types supported: S32
+ * @param[in] keys Keys 1D tensor. keys and input pair represent a map.
+ * Data types supported: S32
+ * @param[in] input Source tensor.
+ * Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+ * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p
+ * input.
+ * @param[out] hits Hits 1D tensor. A boolean tensor that indicates whether the lookup hits
+ * (True) or not (False). Data types supported: U8/QASYMM8
+ * @return N/A
+ */
+ void configure(const ITensor *lookups, const ITensor *keys, const ITensor *input, ITensor *output,
+ ITensor *hits);
+ /** Static function to check if given info will lead to a valid configuration of @ref NECopy
+ *
+ * @param[in] lookups Lookups 1D tensor info.
+ * Data types supported: S32
+ * @param[in] keys Keys 1D tensor info. keys and input pair represent a map.
+ * Data types supported: S32
+ * @param[in] input Source tensor info.
+ * Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+ * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p
+ * input.
+ * @param[in] hits Hits 1D tensor info. A boolean tensor that indicates whether the lookup
+ * hits (True) or not (False). Data types supported: U8/QASYMM8
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *lookups, const ITensorInfo *keys,
+ const ITensorInfo *input, const ITensorInfo *output,
+ const ITensorInfo *hits);
+};
+}
+#endif /*__ARM_COMPUTE_NEHASHTABLELOOKUP_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayerEx.h
new file mode 100644
index 000000000..18e813923
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayerEx.h
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYEREX_H__
+#define __ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYEREX_H__
+
+#include "arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IMemoryManager.h"
+#include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/NEON/functions/NEPermute.h"
+#include "arm_compute/runtime/NEON/functions/NEReductionOperation.h"
+#include "arm_compute/runtime/Tensor.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Basic function to perform a Instance normalization.
+ *
+ * This function runs the following kernels:
+ * -# @ref NEInstanceNormalizationLayerKernelEx
+ */
+class NEInstanceNormalizationLayerEx : public IFunction
+{
+public:
+ /** Constructor */
+ NEInstanceNormalizationLayerEx(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Set the input and output tensors.
+ *
+ * @param[in, out] input Source tensor. In case of @p output tensor = nullptr this tensor will
+ * store the result of the normalization.
+ * Data types supported: F16/F32. Data layout supported: NHWC, NCHW
+ * @param[out] output Destination tensor. Data types and data layouts supported: same as @p
+ * input.
+ * @param[in] gamma (Optional) The scale scalar value applied to the normalized tensor.
+ * Defaults to 1.0
+ * @param[in] beta (Optional) The offset scalar value applied to the normalized tensor.
+ * Defaults to 0.0
+ * @param[in] epsilon (Optional) Lower bound value for the normalization. Defaults to 1e-12
+ */
+ void configure(ITensor *input, ITensor *output, ITensor *gamma, ITensor *beta,
+ float epsilon = 1e-12f);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * NEInstanceNormalizationLayer.
+ *
+ * @param[in] input Source tensor info. Data types supported: F16/F32. Data layout supported:
+ * NHWC, NCHW
+ * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p
+ * input.
+ * @param[in] gamma (Optional) The scale scalar value applied to the normalized tensor. Defaults
+ * to 1.0
+ * @param[in] beta (Optional) The offset scalar value applied to the normalized tensor.
+ * Defaults to 0.0
+ * @param[in] epsilon (Optional) Lower bound value for the normalization. Defaults to 1e-12
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output,
+ const ITensorInfo *gamma = nullptr, const ITensorInfo *beta = nullptr,
+ float epsilon = 1e-12f);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ MemoryGroup _memory_group;
+ NEInstanceNormalizationLayerKernelEx _normalization_kernel;
+ bool _is_nchw;
+ NEPermute _permute_input;
+ NEPermute _permute_output;
+ Tensor _permuted_input;
+ Tensor _permuted_output;
+};
+}
+#endif /* __ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYEREX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEOneHot.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEOneHot.h
new file mode 100644
index 000000000..b2ea6270f
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEOneHot.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2019 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NEONEHOT_H__
+#define __ARM_COMPUTE_NEONEHOT_H__
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+namespace arm_compute
+{
+// Forward declarations
+class ITensor;
+/** Basic function to run @ref NEOneHotKernel */
+class NEOneHot : public INESimpleFunctionNoBorder
+{
+public:
+ /** Initialise the kernel's inputs and outputs
+ *
+ * @param[in] indices Indices tensor. Supported tensor rank: up to 3. Must be one of the
+ * following types: U32/S32
+ * @param[in] depth The tensor for depth of the one hot dimension. Supported tensor rank: up
+ * to 3. Must be one of the following types: U32/S32
+ * @param[in] on_value On value tensor. Supported tensor rank: only 1. Data type supported:
+ * U8/S8/U16/S16/F16/U32/S32/F32
+ * @param[in] off_value Off value tensor. Supported tensor rank: only 1. Data type supported:
+ * Same as @p on_value
+ * @param[out] output Destination tensor. Data type supported: Same as @p on_value
+ * @param[in] axis (Optional) The axis to fill. Negative values wrap around. Defaults to -1.
+ * The value must be in range [-indices.rank , indices.rank)
+ */
+ void configure(const ITensor *indices, const ITensor *depth, const ITensor *on_value,
+ const ITensor *off_value, ITensor *output, int axis = -1);
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * NEOneHotKernel
+ *
+ * @param[in] indices Indices tensor info. Supported tensor rank: up to 3. Must be one of the
+ * following types: U32/S32
+ * @param[in] depth The tensor info for depth of the one hot dimension. Supported tensor rank:
+ * up to 3. Must be one of the following types: U32/S32
+ * @param[in] on_value On value tensor info. Supported tensor rank: only 1. Data type supported:
+ * U8/S8/U16/S16/F16/U32/S32/F32
+ * @param[in] off_value Off value tensor info. Supported tensor rank: only 1. Data type supported:
+ * Same as @p on_value
+ * @param[out] output Destination tensor info. Data type supported: Same as @p on_value
+ * @param[in] axis (Optional) The axis to fill. Negative values wrap around. Defaults to -1.
+ * The value must be in range [-indices.rank , indices.rank)
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *indices, const ITensorInfo *depth,
+ const ITensorInfo *on_value, const ITensorInfo *off_value,
+ const ITensorInfo *output, int axis = -1);
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_NEONEHOT_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceOperation.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceOperation.h
new file mode 100644
index 000000000..91eec815c
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceOperation.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2018-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_NEON_REDUCE_OPERATION_H__
+#define __ARM_COMPUTE_NEON_REDUCE_OPERATION_H__
+
+#include "arm_compute/runtime/IFunction.h"
+
+#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/NEON/functions/NEReductionOperation.h"
+#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h"
+#include "arm_compute/runtime/Tensor.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Basic function to perform reduce operation */
+class NEReduceOperation : public IFunction
+{
+public:
+ /** Constructor */
+ NEReduceOperation(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Configure kernel
+ *
+ * @note Supported tensor rank: up to 4
+ *
+ * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32
+ * @param[in] reduction_axis Reduction axis vector.
+ * @param[in] keep_dims If positive, retains reduced dimensions with length 1.
+ * @param[out] output Destination tensor. Data type supported: Same as @p input
+ * @param[in] op Reduce operation to perform.
+ */
+ void configure(ITensor *input, const Coordinates &reduction_axis, bool keep_dims, ITensor *output,
+ ReductionOperation op);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * NEReduceOperation
+ *
+ * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32
+ * @param[in] reduction_axis Reduction axis vector.
+ * @param[in] keep_dims If positive, retains reduced dimensions with length 1.
+ * @param[in] output Destination tensor. Data type supported: Same as @p input
+ * @param[in] op Reduce operation to perform.
+ *
+ * @return A status
+ */
+ static Status validate(const ITensorInfo *input, const Coordinates &reduction_axis,
+ bool keep_dims, const ITensorInfo *output, ReductionOperation op);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ MemoryGroup _memory_group;
+ std::vector<NEReductionOperation> _reduction_kernels;
+ std::vector<Tensor> _reduced_outs;
+ NEReshapeLayer _reshape;
+ unsigned int _reduction_ops;
+ bool _keep_dims;
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_NEON_REDUCE_OPERATION_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceSum.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceSum.h
new file mode 100644
index 000000000..48b416923
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceSum.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2018-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_NEON_REDUCE_SUM_H__
+#define __ARM_COMPUTE_NEON_REDUCE_SUM_H__
+
+#include "arm_compute/runtime/IFunction.h"
+
+#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/NEON/functions/NEReductionOperation.h"
+#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Basic function to perform reduce operation */
+class NEReduceSum : public IFunction
+{
+public:
+ /** Constructor */
+ NEReduceSum(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Configure kernel
+ *
+ * @note Supported tensor rank: up to 4
+ *
+ * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32
+ * @param[in] reduction_axis Reduction axis vector.
+ * @param[in] keep_dims If positive, retains reduced dimensions with length 1.
+ * @param[out] output Destination tensor. Data type supported: Same as @p input
+ */
+ void configure(ITensor *input, const Coordinates &reduction_axis, bool keep_dims,
+ ITensor *output);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref NEReduceSum
+ *
+ * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32
+ * @param[in] reduction_axis Reduction axis vector.
+ * @param[in] keep_dims If positive, retains reduced dimensions with length 1.
+ * @param[in] output Destination tensor. Data type supported: Same as @p input
+ *
+ * @return A status
+ */
+ static Status validate(const ITensorInfo *input, const Coordinates &reduction_axis,
+ bool keep_dims, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ MemoryGroup _memory_group;
+ std::vector<NEReductionOperation> _reduction_kernels;
+ std::vector<Tensor> _reduced_outs;
+ NEReshapeLayer _reshape;
+ unsigned int _reduction_ops;
+ bool _keep_dims;
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_NEON_REDUCE_SUM_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h
new file mode 100644
index 000000000..24ff5dac9
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NETRANSPOSECONVLAYER_H__
+#define __ARM_COMPUTE_NETRANSPOSECONVLAYER_H__
+
+#include "arm_compute/runtime/CPP/functions/CPPUpsample.h"
+#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEReverse.h"
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IMemoryManager.h"
+#include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/Tensor.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+/** Function to run the deconvolution layer.
+ *
+ * Deconvolution Layer is the backward pass of Convolution Layer. First we transform the input
+ * depending on the stride and pad info and then perfrom a 1x1
+ * convolution pass. Input stride defines how many zeroes we should put between each element of the
+ * input, pad is the amount of padding and finaly a is a user
+ * specified value where a < stride - 1 that increases the padding top and right of the input image.
+ *
+ * The relation between input to output is as follows:
+ * \f[
+ * width\_output = (width\_input - 1) \cdot stride\_x - 2 \cdot padding\_x + kernel\_x
+ * \f]
+ * \f[
+ * height\_output = (height\_input - 1) \cdot stride\_y - 2 \cdot padding\_y + kernel\_y
+ * \f]
+ *
+ * where
+ * width is the size of the first input dimension.
+ * height is the size of the second input dimension.
+ * width_output is the size of the first output dimension.
+ * height_output is the size of the second output dimension.
+ * kernel_x and kernel_y are the convolution sizes in x and y.
+ * stride_x and stride_y is the input stride of the first and second dimension.
+ *
+ * The weights used by Deconvolution are supposed to be the same as the ones used for Convolution.
+ * Therefore, it will be necessary to use the weights in the
+ * reverse order to perform an actual convolution. This is achieved by using @ref NEReverse.
+ *
+ * This function calls the following NEON kernels/functions:
+ *
+ * -# @ref CPPUpsampleEx
+ * -# @ref NEConvolutionLayer
+ * -# @ref NEPermute
+ * -# @ref NEReverse
+ *
+ */
+class NETransposeConvLayer : public IFunction
+{
+public:
+ /** Constructor */
+ NETransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NETransposeConvLayer(const NETransposeConvLayer &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NETransposeConvLayer &operator=(const NETransposeConvLayer &) = delete;
+ /** Allow instances of this class to be moved */
+ NETransposeConvLayer(NETransposeConvLayer &&) = default;
+ /** Allow instances of this class to be moved */
+ NETransposeConvLayer &operator=(NETransposeConvLayer &&) = default;
+ /** Default destructor */
+ virtual ~NETransposeConvLayer() = default;
+
+ /** Set the input, weights, biases and output tensors.
+ *
+ * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an
+ * optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED.
+ * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type
+ * supported: Same as @p input.
+ * @param[in] bias Optional, ignored if NULL. The biases have one dimension. Data type
+ * supported: Data types supported: S32 for QASYMM8 and QASYMM8_SIGNED input, F32 for F32 input, F16
+ * for F16 input.
+ * @param[out] output Output tensor. The output has the same number of dimensions as the @p
+ * input.
+ * @param[in] info Contains padding and policies to be used in the deconvolution, this is
+ * decribed in @ref PadStrideInfo.
+ * @param[in] invalid_right The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
+ *
+ */
+ void configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output,
+ const PadStrideInfo &info, unsigned int invalid_right,
+ unsigned int invalid_bottom);
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * NETransposeConvLayer
+ *
+ * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an
+ * optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED.
+ * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data type
+ * supported: Same as @p input.
+ * @param[in] bias (Optional) The biases have one dimension. Data type supported: Data types
+ * supported: S32 for QASYMM8 and QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input.
+ * @param[in] output Output tensor info. The output has the same number of dimensions as the @p
+ * input.
+ * @param[in] info Contains padding and policies to be used in the deconvolution, this is
+ * decribed in @ref PadStrideInfo.
+ * @param[in] innvalid_right The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *weights,
+ const ITensorInfo *bias, const ITensorInfo *output,
+ const PadStrideInfo &info, unsigned int invalid_right,
+ unsigned int invalid_bottom);
+
+ // Inherited methods overridden:
+ void run() override;
+ void prepare() override;
+
+private:
+ MemoryGroup _memory_group;
+ NEConvolutionLayer _conv_f;
+ CPPUpsample _upsample_f;
+ NEReverse _flip_weights;
+ Tensor _scaled_output;
+ Tensor _weights_flipped;
+ Tensor _flip_axis;
+ const ITensor *_original_weights;
+ ITensor *_input;
+ PadStrideInfo _info;
+ bool _is_prepared;
+};
+} // arm_compute
+#endif /* __ARM_COMPUTE_NETRANSPOSECONVLAYER_H__ */