summaryrefslogtreecommitdiff
path: root/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions
diff options
context:
space:
mode:
Diffstat (limited to 'runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions')
-rw-r--r--runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NEArgMinMax.h81
-rw-r--r--runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h98
-rw-r--r--runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayerEx.h54
-rw-r--r--runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h83
-rw-r--r--runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NEPReLU.h47
-rw-r--r--runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NERNNLayerEx.h114
-rw-r--r--runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceMeanEx.h83
-rw-r--r--runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceSum.h82
-rw-r--r--runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h162
9 files changed, 804 insertions, 0 deletions
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NEArgMinMax.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NEArgMinMax.h
new file mode 100644
index 000000000..604cd93c4
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NEArgMinMax.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2018-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NEON_ARG_MIN_MAX_H__
+#define __ARM_COMPUTE_NEON_ARG_MIN_MAX_H__
+
+#include "arm_compute/runtime/IFunction.h"
+
+#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Basic function to perform reduce min/max operation */
+template <ReductionOperation op> class NEArgMinMaxStatic : public IFunction
+{
+public:
+ /** Constructor */
+ NEArgMinMaxStatic(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Configure kernel
+ *
+ * @note Supported tensor rank: up to 4
+ *
+ * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32
+ * @param[in] axis Reduction axis.
+ * @param[out] output Destination tensor. Data type supported: Same as @p input
+ */
+ void configure(ITensor *input, int axis, ITensor *output);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref NEArgMinMax
+ *
+ * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32
+ * @param[in] axis Reduction axis.
+ * @param[in] output Destination tensor. Data type supported: Same as @p input
+ *
+ * @return A status
+ */
+ static Status validate(const ITensorInfo *input, int axis, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ MemoryGroup _memory_group;
+ NEArgMinMaxLayer _reduction_kernel;
+ Tensor _reduced_out;
+ NEReshapeLayer _reshape;
+};
+
+/** Basic function to run arg max. */
+using NEArgMax = NEArgMinMaxStatic<ReductionOperation::ARG_IDX_MAX>;
+/** Basic function to run arg min. */
+using NEArgMin = NEArgMinMaxStatic<ReductionOperation::ARG_IDX_MIN>;
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_NEON_ARG_MIN_MAX_H__ */
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h
new file mode 100644
index 000000000..2a624656d
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2018-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NEBINARYLOGICALOPERATION_H__
+#define __ARM_COMPUTE_NEBINARYLOGICALOPERATION_H__
+
+#include "arm_compute/core/TypesEx.h"
+#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Basic function to run @ref NEBinaryLogicalOperationKernel.
+ *
+ * @note The tensor data type for the inputs must be QASYMM8/U8.
+ * @note The function performs a binary logical operation between two tensors.
+ */
+class NEBinaryLogicalOperation : public INESimpleFunction
+{
+public:
+ /** Initialise the kernel's inputs, output and conversion policy.
+ *
+ * @param[in, out] input1 First tensor input. Data types supported: QASYMM8/U8.
+ * @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1.
+ * @param[out] output Output tensor. Data types supported: Same as @p input1.
+ * @param[in] op Binary Logical Operation to be performed.
+ */
+ void configure(ITensor *input1, ITensor *input2, ITensor *output, BinaryLogicalOperation op);
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * NEBinaryLogicalOperationKernel
+ *
+ * @param[in] input1 First tensor input info. Data types supported: QASYMM8/U8.
+ * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
+ * @param[in] output Output tensor info. Data types supported: Same as @p input1.
+ * @param[in] op Binary Logical Operation to be performed.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input1, const ITensorInfo *input2,
+ const ITensorInfo *output, BinaryLogicalOperation op);
+};
+
+/** Basic function to run @ref NEBinaryLogicalOperationKernel
+ *
+ * @note The tensor data type for the inputs must be QASYMM8/U8.
+ * @note The function performs a binary logical operation between two tensors.
+ */
+template <BinaryLogicalOperation op> class NEBinaryLogicalOperationStatic : public INESimpleFunction
+{
+public:
+ /** Initialise the kernel's inputs, output and conversion policy.
+ *
+ * @param[in, out] input1 First tensor input. Data types supported: QASYMM8/U8
+ * @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1.
+ * @param[out] output Output tensor. Data types supported: Same as @p input1.
+ */
+ void configure(ITensor *input1, ITensor *input2, ITensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * NEBinaryLogicalOperationKernel
+ *
+ * @param[in] input1 First tensor input info. Data types supported: QASYMM8/U8
+ * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
+ * @param[in] output Output tensor info. Data types supported: Same as @p input1.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input1, const ITensorInfo *input2,
+ const ITensorInfo *output);
+};
+
+/** Basic function to run equal comparison. */
+using NELogicalAnd = NEBinaryLogicalOperationStatic<BinaryLogicalOperation::AND>;
+/** Basic function to run not equal comparison. */
+using NELogicalOr = NEBinaryLogicalOperationStatic<BinaryLogicalOperation::OR>;
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_NEBINARYLOGICALOPERATION_H__ */
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayerEx.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayerEx.h
new file mode 100644
index 000000000..f0c8ecdb5
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayerEx.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2018-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NEELEMENTWISEUNARYLAYEREX_H__
+#define __ARM_COMPUTE_NEELEMENTWISEUNARYLAYEREX_H__
+
+#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Basic function to perform negative on an input tensor. */
+class NENegLayer : public INESimpleFunction
+{
+public:
+ /** Initialize the function
+ *
+ * @param[in] input Input tensor. Data types supported: F16/F32/S32.
+ * @param[out] output Output tensor. Data types supported: same as @p input.
+ */
+ void configure(const ITensor *input, ITensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref NERsqrtLayer
+ *
+ * @param[in] input First tensor input info. Data types supported: F16/F32/S32.
+ * @param[in] output Output tensor info. Data types supported: Same as @p input.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_NEELEMENTWISEUNARYLAYEREX_H__ */
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h
new file mode 100644
index 000000000..21352ff8b
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file        NEFullyConnectedReshapingLayer.h
+ * @brief       This file contains NEFullyConnectedReshapingLayer class
+ * @ingroup     COM_AI_RUNTIME
+ */
+
+#ifndef __ARM_COMPUTE_NE_FULLY_CONNECTED_RESHAPING_LAYER_H__
+#define __ARM_COMPUTE_NE_FULLY_CONNECTED_RESHAPING_LAYER_H__
+
+#include <arm_compute/runtime/NEON/functions/NEReshapeLayer.h>
+#include <arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h>
+#include <arm_compute/runtime/IMemoryManager.h>
+
+namespace arm_compute
+{
+/**
+ * @brief Class to run FullyConnected Layer after reshaping input tensor
+ */
+class NEFullyConnectedReshapingLayer : public arm_compute::IFunction
+{
+public:
+ NEFullyConnectedReshapingLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr)
+ : _input(nullptr), _weights(nullptr), _biases(nullptr), _output(nullptr), _neon_buffer{},
+ _neon_fc{memory_manager}, _neon_reshape{}, _needs_reshape(false)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Configure the layer
+ * @param[in] input The source tensor
+ * @param[in] weights The tensor that is filled with weight values
+ * @param[in] biases The tensor that is filled with biase values
+ * @param[in] output The destination tensor
+ * @param[in] needs_reshape Whether it needs to be reshaped or not
+ * @param[in] reshape The tensor shape to be reshaped. Only valid when needs_reshape is true.
+ * @return N/A
+ */
+ void configure(const arm_compute::ITensor *input, const arm_compute::ITensor *weights,
+ const arm_compute::ITensor *biases, arm_compute::ITensor *output,
+ bool needs_reshape, const arm_compute::TensorShape &reshape);
+
+public:
+ /**
+ * @brief Run the operation. Must be called after configure().
+ * @return N/A
+ */
+ void run(void) override;
+
+private:
+ const arm_compute::ITensor *_input;
+ const arm_compute::ITensor *_weights;
+ const arm_compute::ITensor *_biases;
+ arm_compute::ITensor *_output;
+
+ // buffer for reshaping input tensor
+ arm_compute::Tensor _neon_buffer;
+
+private:
+ arm_compute::NEFullyConnectedLayer _neon_fc;
+ NEReshapeLayer _neon_reshape;
+ bool _needs_reshape;
+};
+} // namespace arm_compute
+
+#endif // __ARM_COMPUTE_NE_FULLY_CONNECTED_RESHAPING_LAYER_H__
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NEPReLU.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NEPReLU.h
new file mode 100644
index 000000000..5664c57cb
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NEPReLU.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2018-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NEPRELU_H__
+#define __ARM_COMPUTE_NEPRELU_H__
+
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Basic function to run @ref NEPReLUKernel */
+class NEPReLU : public INESimpleFunctionNoBorder
+{
+public:
+ /** Initialise the kernel's inputs and output
+ *
+ * @param[in] input. Data types supported: QASYMM8/F32.
+ * @param[in] alpha. Data types supported: Same as @p input.
+ * @param[out] output Output tensor. Data types supported: Same as @p input.
+ */
+ void configure(const ITensor *input, const ITensor *alpha, ITensor *output);
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_NEPRELU_H__ */
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NERNNLayerEx.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NERNNLayerEx.h
new file mode 100644
index 000000000..17c37d806
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NERNNLayerEx.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NERNNLAYER_EX_H__
+#define __ARM_COMPUTE_NERNNLAYER_EX_H__
+
+#include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h"
+#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h"
+#include "arm_compute/core/NEON/kernels/NECopyKernel.h"
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEGEMM.h"
+
+namespace arm_compute
+{
+// Forward declarations
+class ITensor;
+
+/** Basic function to run @ref NERNNLayerEx */
+class NERNNLayerEx : public IFunction
+{
+public:
+ /** Default constructor */
+ NERNNLayerEx(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NERNNLayerEx(const NERNNLayerEx &) = delete;
+ /** Default move constructor */
+ NERNNLayerEx(NERNNLayerEx &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NERNNLayerEx &operator=(const NERNNLayerEx &) = delete;
+ /** Default move assignment operator */
+ NERNNLayerEx &operator=(NERNNLayerEx &&) = default;
+ /** Initialize the function
+ *
+ * @param[in] input Input is a 2-D tensor of shape [input_size, batch_size]. Data
+ * types supported: F16/F32
+ * @param[in] weights Weights tensor of shape [input_size, num_units] that
+ * multiplies the input. Data types supported: Same as @p input
+ * @param[in] recurrent_weights Weights tensor of shape [num_units, num_units] that multiplies
+ * the current 'state'. Data types supported: Same as @p input
+ * @param[in] bias Bias vector of shape [num_units]. Data types supported: Same
+ * as @p input
+ * @param[out] output Output tensor of shape [num_units, batch_size]. Data types
+ * supported: Same as @p input
+ * @param[in,out] hidden_state Output tensor of shape [num_units, batch_size]. Data types
+ * supported: Same as @p input
+ * @param[in] info Activation layer parameter.
+ */
+ void configure(const ITensor *input, const ITensor *weights, const ITensor *recurrent_weights,
+ const ITensor *bias, ITensor *hidden_state, ITensor *output,
+ ActivationLayerInfo &info);
+ /** Initialize the function
+ *
+ * @param[in] input Input is a 2-D tensor of shape [input_size, batch_size]. Data
+ * types supported: F16/F32
+ * @param[in] weights Weights tensor of shape [input_size, num_units] that multiplies
+ * the input. Data types supported: Same as @p input
+ * @param[in] recurrent_weights Weights tensor of shape [num_units, num_units] that multiplies the
+ * current 'state'. Data types supported: Same as @p input
+ * @param[in] bias Bias vector of shape [num_units]. Data types supported: Same as @p
+ * input
+ * @param[in] output Output tensor of shape [num_units, batch_size]. Data types
+ * supported: Same as @p input
+ * @param[in] hidden_state Output tensor of shape [num_units, batch_size]. Data types
+ * supported: Same as @p input
+ * @param[in] info Activation layer parameter.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *weights,
+ const ITensorInfo *recurrent_weights, const ITensorInfo *bias,
+ const ITensorInfo *hidden_state, const ITensorInfo *output,
+ const ActivationLayerInfo &info);
+
+ // Inherited methods overridden:
+ void run() override;
+ void prepare() override;
+
+private:
+ MemoryGroup _memory_group;
+ NEGEMM _gemm_state_f;
+ NEArithmeticAdditionKernel _add_kernel;
+ NEActivationLayerKernel _activation_kernel;
+ NEFullyConnectedLayer _fully_connected_kernel;
+ NECopyKernel _copy_kernel;
+ Tensor _fully_connected_out;
+ Tensor _gemm_output;
+ Tensor _add_output;
+ bool _is_prepared;
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_NERNNLAYER_EX_H__ */
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceMeanEx.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceMeanEx.h
new file mode 100644
index 000000000..7209acf19
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceMeanEx.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NEON_REDUCE_MEAN_EX_H__
+#define __ARM_COMPUTE_NEON_REDUCE_MEAN_EX_H__
+
+#include "arm_compute/runtime/IFunction.h"
+
+#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/NEON/functions/NEReductionOperation.h"
+#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Basic function to perform reduce operation */
+class NEReduceMeanEx : public IFunction
+{
+public:
+ /** Constructor */
+ NEReduceMeanEx(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Configure kernel
+ *
+ * @note Supported tensor rank: up to 4
+ *
+ * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32
+ * @param[in] reduction_axis Reduction axis vector.
+ * @param[in] keep_dims If positive, retains reduced dimensions with length 1.
+ * @param[out] output Destination tensor. Data type supported: Same as @p input
+ */
+ void configure(ITensor *input, const Coordinates &reduction_axis, bool keep_dims,
+ ITensor *output);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * NEReduceMeanEx
+ *
+ * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32
+ * @param[in] reduction_axis Reduction axis vector.
+ * @param[in] keep_dims If positive, retains reduced dimensions with length 1.
+ * @param[in] output Destination tensor. Data type supported: Same as @p input
+ *
+ * @return A status
+ */
+ static Status validate(const ITensorInfo *input, const Coordinates &reduction_axis,
+ bool keep_dims, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ MemoryGroup _memory_group;
+ std::unique_ptr<NEReductionOperation[]> _reduction_kernels{nullptr};
+ std::unique_ptr<Tensor[]> _reduced_outs{nullptr};
+ NEReshapeLayer _reshape;
+ unsigned int _reduction_ops;
+ bool _keep_dims;
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_NEON_REDUCE_MEAN_EX_H__ */
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceSum.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceSum.h
new file mode 100644
index 000000000..c028ea658
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceSum.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2018-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NEON_REDUCE_SUM_H__
+#define __ARM_COMPUTE_NEON_REDUCE_SUM_H__
+
+#include "arm_compute/runtime/IFunction.h"
+
+#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/NEON/functions/NEReductionOperation.h"
+#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Basic function to perform reduce operation */
+class NEReduceSum : public IFunction
+{
+public:
+ /** Constructor */
+ NEReduceSum(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Configure kernel
+ *
+ * @note Supported tensor rank: up to 4
+ *
+ * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32
+ * @param[in] reduction_axis Reduction axis vector.
+ * @param[in] keep_dims If positive, retains reduced dimensions with length 1.
+ * @param[out] output Destination tensor. Data type supported: Same as @p input
+ */
+ void configure(ITensor *input, const Coordinates &reduction_axis, bool keep_dims,
+ ITensor *output);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref NEReduceSum
+ *
+ * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32
+ * @param[in] reduction_axis Reduction axis vector.
+ * @param[in] keep_dims If positive, retains reduced dimensions with length 1.
+ * @param[in] output Destination tensor. Data type supported: Same as @p input
+ *
+ * @return A status
+ */
+ static Status validate(const ITensorInfo *input, const Coordinates &reduction_axis,
+ bool keep_dims, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ MemoryGroup _memory_group;
+ std::vector<NEReductionOperation> _reduction_kernels;
+ std::vector<Tensor> _reduced_outs;
+ NEReshapeLayer _reshape;
+ unsigned int _reduction_ops;
+ bool _keep_dims;
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_NEON_REDUCE_SUM_H__ */
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h
new file mode 100644
index 000000000..a50b9ea60
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NETRANSPOSECONVLAYER_H__
+#define __ARM_COMPUTE_NETRANSPOSECONVLAYER_H__
+
+#include "arm_compute/runtime/CPP/functions/CPPUpsampleEx.h"
+#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEPermute.h"
+
+#include "arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IMemoryManager.h"
+#include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/Tensor.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+/** Function to run the deconvolution layer.
+ *
+ * Transpose convolution Layer is the backward pass of Convolution Layer. First we transform the
+ * input depending on the stride and pad info and then perfrom a 1x1
+ * convolution pass. Input stride defines how many zeroes we should put between each element of the
+ * input, pad is the amount of padding and finaly a is a user
+ * specified value where a < stride - 1 that increases the padding top and right of the input image.
+ *
+ * The relation between input to output is as follows:
+ * \f[
+ * width\_output = (width\_input - 1) \cdot stride\_x - 2 \cdot padding\_x + kernel\_x
+ * \f]
+ * \f[
+ * height\_output = (height\_input - 1) \cdot stride\_y - 2 \cdot padding\_y + kernel\_y
+ * \f]
+ *
+ * where
+ * width is the size of the first input dimension.
+ * height is the size of the second input dimension.
+ * width_output is the size of the first output dimension.
+ * height_output is the size of the second output dimension.
+ * kernel_x and kernel_y are the convolution sizes in x and y.
+ * stride_x and stride_y is the input stride of the first and second dimension.
+ *
+ * The weights used by Transpose convolution are supposed to be the same as the ones used for
+ * Convolution. Therefore, it will be necessary to use the weights in the
+ * reverse order to perform an actual convolution. This is achieved by using the @ref
+ * CPPFlipWeightsKernel.
+ *
+ * This function calls the following NEON kernels/functions:
+ *
+ * -# @ref CPPUpsample
+ * -# @ref NEConvolutionLayer
+ *
+ */
+class NETransposeConvLayer : public IFunction
+{
+public:
+ /** Default constructor */
+ NETransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NETransposeConvLayer(const NETransposeConvLayer &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NETransposeConvLayer &operator=(const NETransposeConvLayer &) = delete;
+ /** Allow instances of this class to be moved */
+ NETransposeConvLayer(NETransposeConvLayer &&) = default;
+ /** Allow instances of this class to be moved */
+ NETransposeConvLayer &operator=(NETransposeConvLayer &&) = default;
+ /** Default destructor */
+ virtual ~NETransposeConvLayer() = default;
+
+ /** Set the input, weights, biases and output tensors.
+ *
+ * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an
+ * optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8.
+ * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type
+ * supported: Same as @p input.
+ * @param[in] bias Optional, ignored if NULL. The biases have one dimension. Data type
+ * supported: Data types supported: S32 for QASYMM8 input, F32 for F32 input, F16 for F16 input.
+ * @param[out] output Output tensor. The output has the same number of dimensions as the @p
+ * input.
+ * @param[in] info Contains padding and policies to be used in the deconvolution, this is
+ * decribed in @ref PadStrideInfo.
+ * @param[in] invalid_right The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom The number of zeros added to top edge of the output.
+ *
+ */
+ void configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output,
+ const PadStrideInfo &info, unsigned int invalid_right,
+ unsigned int invalid_bottom);
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * NETransposeConvLayer
+ *
+ * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an
+ * optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8.
+ * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data type
+ * supported: Same as @p input.
+ * @param[in] bias (Optional) The biases have one dimension. Data type supported: Data types
+ * supported: S32 for QASYMM8 input, F32 for F32 input, F16 for F16 input.
+ * @param[in] output Output tensor info. The output has the same number of dimensions as the @p
+ * input.
+ * @param[in] info Contains padding and policies to be used in the deconvolution, this is
+ * decribed in @ref PadStrideInfo.
+ * @param[in] innvalid_right The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom The number of zeros added to top edge of the output.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *weights,
+ const ITensorInfo *bias, const ITensorInfo *output,
+ const PadStrideInfo &info, unsigned int invalid_right,
+ unsigned int invalid_bottom);
+
+ // Inherited methods overridden:
+ void run() override;
+ void prepare() override;
+
+private:
+ MemoryGroup _memory_group;
+ NEConvolutionLayer _conv_f;
+ CPPUpsampleEx _upsample_f;
+ CPPFlipWeightsKernel _flip_weights;
+ NEPermute _permute_input;
+ NEPermute _permute_weights;
+ NEPermute _permute_output;
+ Tensor _scaled_output;
+ Tensor _weights_flipped;
+ Tensor _permuted_input;
+ Tensor _permuted_weights;
+ Tensor _permuted_output;
+ bool _is_nchw;
+ const ITensor *_original_weights;
+ ITensor *_input;
+ PadStrideInfo _info;
+ bool _is_prepared;
+};
+} // arm_compute
+#endif /* __ARM_COMPUTE_NETRANSPOSECONVLAYER_H__ */