summaryrefslogtreecommitdiff
path: root/compute/ARMComputeEx/arm_compute/runtime
diff options
context:
space:
mode:
Diffstat (limited to 'compute/ARMComputeEx/arm_compute/runtime')
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h5
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgMinMaxLayerEx.h109
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h1
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLCastBool.h (renamed from compute/ARMComputeEx/arm_compute/runtime/CPP/functions/CPPOneHotEx.h)43
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h95
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h2
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h8
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h31
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h4
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGatherEx.h7
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h2
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayerEx.h3
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLOneHot.h122
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLPadLayerEx.h130
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h9
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLSplitVEx.h89
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h2
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h92
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h4
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEActivationLayerEx.h103
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h2
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NECastBool.h (renamed from compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReductionOperationEx.h)68
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h4
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h2
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedLayerEx.h14
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h4
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEGatherEx.h1
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEHashtableLookup.h4
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayerEx.h3
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEOneHot.h93
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceOperation.h12
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceSum.h4
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h61
33 files changed, 790 insertions, 343 deletions
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h
index cfbd13436..664b8b3b1 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h
@@ -16,14 +16,19 @@
#ifndef __ARM_COMPUTE_CLFUNCTIONSEX_H__
#define __ARM_COMPUTE_CLFUNCTIONSEX_H__
+#include <arm_compute/runtime/CL/functions/CLArgMinMaxLayerEx.h>
#include <arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h>
+#include <arm_compute/runtime/CL/functions/CLCastBool.h>
#include <arm_compute/runtime/CL/functions/CLEmbeddingLookup.h>
#include <arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h>
#include <arm_compute/runtime/CL/functions/CLGatherEx.h>
#include <arm_compute/runtime/CL/functions/CLHashtableLookup.h>
#include <arm_compute/runtime/CL/functions/CLInstanceNormalizationLayerEx.h>
#include <arm_compute/runtime/CL/functions/CLNeg.h>
+#include <arm_compute/runtime/CL/functions/CLOneHot.h>
+#include <arm_compute/runtime/CL/functions/CLPadLayerEx.h>
#include <arm_compute/runtime/CL/functions/CLReduceOperation.h>
+#include <arm_compute/runtime/CL/functions/CLSplitVEx.h>
#include <arm_compute/runtime/CL/functions/CLTopKV2.h>
#include <arm_compute/runtime/CL/functions/CLTransposeConvLayer.h>
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgMinMaxLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgMinMaxLayerEx.h
new file mode 100644
index 000000000..05bcc4075
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgMinMaxLayerEx.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2018-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLARGMINMAXLAYEREX_H__
+#define __ARM_COMPUTE_CLARGMINMAXLAYEREX_H__
+
+#include "arm_compute/core/CL/kernels/CLArgMinMaxLayerKernelEx.h"
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/functions/CLReshapeLayer.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IMemoryManager.h"
+#include "arm_compute/runtime/MemoryGroup.h"
+
+namespace arm_compute
+{
+class ITensorInfo;
+class ICLTensor;
+
+/** Function to calculate the index of the minimum or maximum values in a
+ * tensor based on an axis.
+ *
+ * @note The default data type for an uninitialized output tensor is
+ * signed 32-bit integer (S32). It is the user's responsibility to check
+ * that the results do not overflow because the indices are computed
+ * in unsigned 32-bit (U32).
+ */
+class CLArgMinMaxLayerEx : public IFunction
+{
+public:
+ /** Default Constructor.
+ *
+ * @param[in] memory_manager (Optional) Memory manager.
+ */
+ CLArgMinMaxLayerEx(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Input source tensor. Data types supported: QASYMM8/F16/F32.
+ * @param[in] axis Axis to find max/min index.
+ * @param[out] output Output source tensor. Data types supported: U32/S32.
+ * @param[in] op Reduction operation to perform. Operations supported: ARG_IDX_MAX,
+ * ARG_IDX_MIN
+ */
+ void configure(const ICLTensor *input, int axis, ICLTensor *output, const ReductionOperation &op);
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * CLArgMinMaxLayerEx
+ *
+ * @param[in] input Input source tensor info. Data types supported: QASYMM8/F16/F32.
+ * @param[in] axis Axis to find max/min index.
+ * @param[in] output Output source tensor info. Data types supported: U32/S32.
+ * @param[in] op Reduction operation to perform. Operations supported: ARG_IDX_MAX,
+ * ARG_IDX_MIN
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, int axis, const ITensorInfo *output,
+ const ReductionOperation &op);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ MemoryGroup _memory_group;
+ std::vector<CLTensor> _results_vector;
+ CLTensor _not_reshaped_output;
+ std::vector<CLArgMinMaxLayerKernelEx> _reduction_kernels_vector;
+ CLReshapeLayer _reshape_kernel;
+ unsigned int _num_of_stages;
+ unsigned int _reduction_axis;
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_CLARGMINMAXLAYEREX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h
index 88a9b00ec..fc4322798 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h
@@ -43,6 +43,7 @@
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
#include "arm_compute/core/TypesEx.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
namespace arm_compute
{
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CPP/functions/CPPOneHotEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLCastBool.h
index 7930e4e20..854ddce52 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CPP/functions/CPPOneHotEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLCastBool.h
@@ -15,7 +15,7 @@
*/
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -38,31 +38,34 @@
* SOFTWARE.
*/
-#ifndef __ARM_COMPUTE_CPPONEHOT_EX_H__
-#define __ARM_COMPUTE_CPPONEHOT_EX_H__
+/**
+ * @file CLCastBool.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file contains arm_compute::CLCastBool class
+ */
+
+#ifndef ARM_COMPUTE_CLCASTBOOL_H
+#define ARM_COMPUTE_CLCASTBOOL_H
-#include "arm_compute/runtime/CPP/ICPPSimpleFunction.h"
-#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
-class ITensor;
+class ICLTensor;
-/** Basic function to run @ref CPPOneHot */
-class CPPOneHotEx : public ICPPSimpleFunction
+/**
+ * @brief Class to run @ref CLCastBoolKernel.
+ * This converts the boolean input tensor to the output tensor's type.
+ */
+class CLCastBool : public ICLSimpleFunction
{
public:
- /** Configure the one_hot function
- *
- * @param[in] indices A tensor for indices. Data types supported: S32
- * @param[in] depth A tensor for depth. Data types supported: S32
- * @param[in] on_value A tensor for on_value. Data types supported: F32
- * @param[in] off_value A tensor for off_value. Data types supported: F32
- * @param[out] output A tensor for computed value of one hot operator
- * @param[in] axis An int value for axis
+ /**
+ * @brief Initialise the kernel's input and output
+ * @param[in] input Input tensor. Data types supported: U8
+ * @param[out] output Output tensor. Data types supported: U8/S8/U16/S16/U32/F16/F32.
*/
- void configure(const ITensor *indices, const ITensor *depth, const ITensor *on_value,
- const ITensor *off_value, ITensor *output, const int axis);
+ void configure(ICLTensor *input, ICLTensor *output);
};
-}
-#endif /* __ARM_COMPUTE_CPPONEHOT_EX_H__ */
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLCASTBOOL_H */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h
index 409eaf593..026209f69 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h
@@ -106,22 +106,24 @@ public:
CLDirectTransposeConvLayer &operator=(CLDirectTransposeConvLayer &&) = default;
/** Set the input, weights, biases and output tensors.
*
- * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an
- * optional 4th dimension for batch of inputs.
- * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
- * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type
- * supported: Same as @p input.
- * @param[in] bias (Optional) The biases have one dimension.
- * Data type supported: Should match @p input data type, except for
- * input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type
- * @param[out] output Output tensor. The output has the same number of dimensions as the
- * @p input.
- * @param[in] info Contains padding and policies to be used in the deconvolution, this
- * is decribed in @ref PadStrideInfo.
- * @param[in] invalid_right The number of zeros added to right edge of the output.
- * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
- * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer,
- * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel.
+ * @param[in,out] input Input tensor. 3 lower dimensions represent a single input,
+ * and an optional 4th dimension for batch of inputs.
+ * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+ * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM].
+ * Data type supported: Same as @p input.
+ * @param[in] bias (Optional) The biases have one dimension.
+ * Data type supported: Should match @p input data type,
+ * except for input of QASYMM8 and QASYMM8_SIGNED type
+ * where biases should be of S32 type
+ * @param[out] output Output tensor.
+ * The output has the same number of dimensions as the @p input.
+ * @param[in] info Contains padding and policies to be used in the deconvolution,
+ * this is decribed in @ref PadStrideInfo.
+ * @param[in] invalid_right The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
+ * @param[in] weights_info (Optional) Weights information needed for
+ * @ref CLConvolutionLayer, specifies if the weights tensor has been reshaped with
+ * @ref CLWeightsReshapeKernel.
*
*/
void configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output,
@@ -130,23 +132,24 @@ public:
/** Set the input, weights, biases and output tensors.
*
* @param[in] compile_context The compile context to be used.
- * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and
- * an optional 4th dimension for batch of inputs.
+ * @param[in,out] input Input tensor. 3 lower dimensions represent a single input,
+ * and an optional 4th dimension for batch of inputs.
* Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
- * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data
- * type supported: Same as @p input.
+ * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM].
+ * Data type supported: Same as @p input.
* @param[in] bias (Optional) The biases have one dimension.
* Data type supported: Should match @p input data type, except for
- * input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type
+ * input of QASYMM8 and QASYMM8_SIGNED type
+ * where biases should be of S32 type
* @param[out] output Output tensor. The output has the same number of dimensions as
- * the @p input.
+ * the @p input.
* @param[in] info Contains padding and policies to be used in the deconvolution,
- * this is decribed in @ref PadStrideInfo.
- * @param[in] invalid_right The number of zeros added to right edge of the output.
- * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
- * @param[in] weights_info (Optional) Weights information needed for @ref
- * CLConvolutionLayer, specifies if the weights tensor has been reshaped with @ref
- * CLWeightsReshapeKernel.
+ * this is decribed in @ref PadStrideInfo.
+ * @param[in] invalid_right The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
+ * @param[in] weights_info (Optional) Weights information needed for
+ * @ref CLConvolutionLayer, specifies if the weights tensor has
+ * been reshaped with @ref CLWeightsReshapeKernel.
*
*/
void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *weights,
@@ -154,24 +157,26 @@ public:
unsigned int invalid_right, unsigned int invalid_bottom,
const WeightsInfo &weights_info = WeightsInfo());
/** Static function to check if given info will lead to a valid configuration of @ref
- * CLDirectTransposeConvLayer
+ * CLDirectTransposeConvLayer
*
- * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an
- * optional 4th dimension for batch of inputs.
- * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
- * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data
- * type supported: Same as @p input.
- * @param[in] bias (Optional) The biases have one dimension.
- * Data type supported: Should match @p input data type, except for input
- * of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type
- * @param[in] output Output tensor info. The output has the same number of dimensions as the
- * @p input.
- * @param[in] info Contains padding and policies to be used in the deconvolution, this is
- * decribed in @ref PadStrideInfo.
- * @param[in] invalid_right The number of zeros added to right edge of the output.
- * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
- * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer,
- * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel.
+ * @param[in] input Input tensor info. 3 lower dimensions represent a single input,
+ * and an optional 4th dimension for batch of inputs.
+ * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+ * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM].
+ * Data type supported: Same as @p input.
+ * @param[in] bias (Optional) The biases have one dimension.
+ * Data type supported: Should match @p input data type,
+ * except for input of QASYMM8 and QASYMM8_SIGNED type
+ * where biases should be of S32 type
+ * @param[in] output Output tensor info. The output has the same number of dimensions
+ * as the @p input.
+ * @param[in] info Contains padding and policies to be used in the deconvolution,
+ * this is decribed in @ref PadStrideInfo.
+ * @param[in] invalid_right The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
+ * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer,
+ * specifies if the weights tensor has been reshaped
+ * with @ref CLWeightsReshapeKernel.
*
* @return a status
*/
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h
index fbee7e40e..b0149cb09 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h
@@ -73,5 +73,5 @@ public:
*/
void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *lookups);
};
-}
+} // namespace arm_compute
#endif /*__ARM_COMPUTE_CLEMBEDDINGLOOKUP_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h
index f3266f688..c75ae9a50 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h
@@ -43,14 +43,14 @@
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h"
#include "arm_compute/core/CL/kernels/CLMultiplyScaleFactorKernel.h"
#include "arm_compute/core/CL/kernels/CLQuantizationSymmetricKernel.h"
#include "arm_compute/core/CL/kernels/CLScaleFactorSymm8Kernel.h"
-#include "arm_compute/core/CL/kernels/CLTransposeKernel.h"
-#include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h"
+#include "arm_compute/runtime/MemoryGroup.h"
+#include "src/core/CL/kernels/CLTransposeKernel.h"
namespace arm_compute
{
@@ -182,5 +182,5 @@ private:
bool _is_prepared;
const ICLTensor *_original_weights;
};
-}
+} // namespace arm_compute
#endif /* __ARM_COMPUTE_CLFULLYCONNECTEDHYBRIDLAYER_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h
index e65a646dc..c08da526a 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h
@@ -43,16 +43,14 @@
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h"
-#include "arm_compute/core/CL/kernels/CLTransposeKernel.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h"
#include "arm_compute/runtime/CL/functions/CLFlattenLayer.h"
#include "arm_compute/runtime/CL/functions/CLGEMM.h"
#include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h"
-#include "arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h"
#include "arm_compute/runtime/IWeightsManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
+#include "src/core/CL/kernels/CLTransposeKernel.h"
namespace arm_compute
{
@@ -132,9 +130,6 @@ private:
* transpose_weights is set to true ) (called once)
* -# @ref CLGEMMMatrixMultiplyKernel or @ref CLGEMMLowpMatrixMultiplyCore (if quantized
* asymmetric)
- * -# @ref CLGEMMMatrixAccumulateBiasesKernel or @ref
- * CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint (if quantized asymmetric) (if @p biases is
- * not equal to nullptr)
*
* @note The fully connected layer accepts "weights" tensors only with 2 dimensions.
*/
@@ -157,40 +152,36 @@ public:
* @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32.
* @param[in] weights Weights tensor. The weights must be 2 dimensional.
* If this function is called after a Convolution Layer, the (transposed)
- * weights will have as many rows as the product of the first 3 input's dimensions.
- * If it is called after another FullyConnected Layer, the (transposed)
- * weights will have as many rows as the input's first dimension.
- * Data type supported: Same as @p input.
+ * weights will have as many rows as the product of the first 3 input's dimensions. If it is
+ * called after another FullyConnected Layer, the (transposed) weights will have as many rows as
+ * the input's first dimension. Data type supported: Same as @p input.
* @param[in] biases Bias tensor. Can be nullptr. Data type supported:Same as @p input.
* @param[out] output Destination tensor. Its shape should be equal to the output of a matrix
* multiplication between:
* - The output of im2col on the input and the (transposed) 2D weights, if the
* function is called after a Convolution Layer
* - The input tensor and the (transposed) 2D weights, if the function is
- * called after another FullyConnected Layer.
- * Data type supported: Same as @p input.
+ * called after another FullyConnected Layer. Data type supported: Same as @p input.
* @param[in] fc_info (Optional) Fully connected layer additional info
*/
void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases,
ICLTensor *output, FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo());
/** Static function to check if given info will lead to a valid configuration of @ref
- * CLFullyConnectedLayerEx
+ * CLFullyConnectedLayer
*
* @param[in] input Source tensor info. Data type supported: QASYMM8/F16/F32.
* @param[in] weights Weights tensor info. The weights must be 2 dimensional.
* If this function is called after a Convolution Layer, the (transposed)
- * weights will have as many rows as the product of the first 3 input's dimensions.
- * If it is called after another FullyConnected Layer, the (transposed)
- * weights will have as many rows as the input's first dimension.
- * Data type supported: Same as @p input.
+ * weights will have as many rows as the product of the first 3 input's dimensions. If it is
+ * called after another FullyConnected Layer, the (transposed) weights will have as many rows as
+ * the input's first dimension. Data type supported: Same as @p input.
* @param[in] biases Bias tensor info. Can be nullptr. Data type supported:Same as @p input.
* @param[out] output Destination tensor info. Its shape should be equal to the output of a
* matrix multiplication between:
* - The output of im2col on the input and the (transposed) 2D weights, if the
* function is called after a Convolution Layer
* - The input tensor and the (transposed) 2D weights, if the function is
- * called after another FullyConnected Layer.
- * Data type supported: Same as @p input.
+ * called after another FullyConnected Layer. Data type supported: Same as @p input.
* @param[in] fc_info (Optional) Fully connected layer additional info
*
* @return a status
@@ -216,7 +207,7 @@ private:
CLConvertFullyConnectedWeights _convert_weights;
weights_transformations::CLConvertFullyConnectedWeightsManaged _convert_weights_managed;
weights_transformations::CLFullyConnectedLayerReshapeWeightsExManaged
- _reshape_weights_managed_function;
+ _reshape_weights_managed_function;
CLFlattenLayer _flatten_layer;
CLFullyConnectedLayerReshapeWeightsEx _reshape_weights_function;
CLGEMM _mm_gemm;
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h
index 289ab167f..bdb168664 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h
@@ -43,8 +43,8 @@ public:
public:
CLFullyConnectedReshapingLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr)
- : _input(nullptr), _weights(nullptr), _biases(nullptr), _output(nullptr), _cl_buffer{},
- _memory_manager{memory_manager}, _cl_fc{nullptr}, _cl_reshape{}, _needs_reshape(false)
+ : _input(nullptr), _weights(nullptr), _biases(nullptr), _output(nullptr), _cl_buffer{},
+ _memory_manager{memory_manager}, _cl_fc{nullptr}, _cl_reshape{}, _needs_reshape(false)
{
// DO NOTHING
}
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGatherEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGatherEx.h
index b01ec4255..385eb0b2c 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGatherEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGatherEx.h
@@ -47,11 +47,14 @@
#ifndef __ARM_COMPUTE_CLGATHEREX_H__
#define __ARM_COMPUTE_CLGATHEREX_H__
+#include "arm_compute/core/Error.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/**
* @brief Class to to run @ref CLGatherKernel.
@@ -66,7 +69,7 @@ public:
* @param[out] output The output tensor, Data types supported: same as @p input.
* @param[in] axis (Optional) The axis in @p input to gather @p indices from. Defaults to 0
* @return N/A
- */
+ */
void configure(const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis = 0);
/**
@@ -81,5 +84,5 @@ public:
static Status validate(const ITensorInfo *input, const ITensorInfo *indices,
const ITensorInfo *output, int axis = 0);
};
-}
+} // namespace arm_compute
#endif /*__ARM_COMPUTE_CLGATHEREX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h
index 6618f5aa4..5e172a4c7 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h
@@ -78,5 +78,5 @@ public:
void configure(const ICLTensor *lookups, const ICLTensor *keys, const ICLTensor *intput,
ICLTensor *output, ICLTensor *hits);
};
-}
+} // namespace arm_compute
#endif /*__ARM_COMPUTE_CLHASHTABLELOOKUP_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayerEx.h
index 887e7aaa5..02ae6d719 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayerEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayerEx.h
@@ -41,11 +41,14 @@
#ifndef __ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYEREX_H__
#define __ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYEREX_H__
+#include "arm_compute/core/Error.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to perform a Instance normalization.
*
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLOneHot.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLOneHot.h
new file mode 100644
index 000000000..62a36f06d
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLOneHot.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLONEHOT_H__
+#define __ARM_COMPUTE_CLONEHOT_H__
+
+#include "arm_compute/core/CL/kernels/CLOneHotKernel.h"
+#include "arm_compute/core/CL/kernels/CLMemsetKernel.h"
+#include "arm_compute/runtime/IFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+/** Basic function to run @ref CLOneHotKernel */
+class CLOneHot : public IFunction
+{
+public:
+ /** Constructor */
+ CLOneHot();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLOneHot(const CLOneHot &) = delete;
+ /** Default move constructor */
+ CLOneHot(CLOneHot &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLOneHot &operator=(const CLOneHot &) = delete;
+ /** Default move assignment operator */
+ CLOneHot &operator=(CLOneHot &&) = default;
+ /** Initialise the kernel's inputs and outputs
+ *
+ * @param[in] indices Indices tensor. Supported tensor rank: up to 3. Must be one of the
+ * following types: U32/S32
+ * @param[in] on_value On value tensor. Supported tensor rank: only 1. Data type supported:
+ * U8/S8/U16/S16/F16/U32/S32/F32
+ * @param[in] off_value Off value tensor. Supported tensor rank: only 1. Data type supported:
+ * Same as @p on_value
+ * @param[out] output Destination tensor. Data type supported: Same as @p on_value
+ * @param[in] depth The depth of the one hot dimension.
+ * @param[in] axis (Optional) The axis to fill. Negative values wrap around. Defaults to -1.
+ * value must be in range [-indices.rank , indices.rank)
+ */
+ void configure(const ICLTensor *indices, const ICLTensor *on_value, const ICLTensor *off_value,
+ ICLTensor *output, int depth, int axis = -1);
+ /** Initialise the kernel's inputs and outputs with off_value being constant
+ *
+ * @param[in] indices Indices tensor. Supported tensor rank: up to 3. Must be one of the
+ * following types: U32/S32
+ * @param[in] on_value On value tensor. Supported tensor rank: only 1. Data type supported:
+ * U8/S8/U16/S16/F16/U32/S32/F32
+ * @param[out] output Destination tensor. Data type supported: Same as @p on_value
+ * @param[in] off_value The PixelValue for off value. Data type supported: Same as @p on_value
+ * @param[in] depth The depth of the one hot dimension.
+ * @param[in] axis (Optional) The axis to fill. Negative values wrap around. Defaults to -1.
+ * value must be in range [-indices.rank , indices.rank)
+ */
+ void configure(const ICLTensor *indices, const ICLTensor *on_value, ICLTensor *output,
+ PixelValue off_value, int depth, int axis = -1);
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * CLOneHotKernel
+ *
+ * @param[in] indices Indices tensor. Supported tensor rank: up to 3. Must be one of the
+ * following types: U32/S32
+ * @param[in] on_value On value tensor. Supported tensor rank: only 1. Data type supported:
+ * U8/S8/U16/S16/F16/U32/S32/F32
+ * @param[in] off_value Off value tensor. Supported tensor rank: only 1. Data type supported:
+ * Same as @p on_value
+ * @param[in] output Destination tensor. Data type supported: Same as @p on_value
+ * @param[in] depth The depth of the one hot dimension.
+ * @param[in] axis (Optional) The axis to fill. Negative values wrap around. Defaults to -1.
+ * value must be in range [-indices.rank , indices.rank)
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *indices, const ITensorInfo *on_value,
+ const ITensorInfo *off_value, const ITensorInfo *output, int depth,
+ int axis = -1);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ CLMemsetKernel _memset_kernel; /**< Memset kernel */
+ CLOneHotKernel _onehot_kernel; /**< OneHot kernel */
+ bool _has_to_memset;
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_CLONEHOT_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLPadLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLPadLayerEx.h
new file mode 100644
index 000000000..ee1879aaa
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLPadLayerEx.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2018-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLPADLAYEREX_H
+#define ARM_COMPUTE_CLPADLAYEREX_H
+
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/core/CL/kernels/CLPadLayerKernelEx.h"
+#include "src/core/gpu/cl/kernels/ClCopyKernel.h"
+// #include "arm_compute/runtime/CL/functions/CLCopy.h"
+#include <memory>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to pad a tensor. This function calls the following OpenCL functions/kernels:
+ *
+ * -# @ref CLPadLayerKernelEx if there is padding to be added
+ * -# @ref CLCopyKernel otherwise
+ */
+class CLPadLayerEx : public IFunction
+{
+public:
+ /** Default constructor */
+ CLPadLayerEx();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLPadLayerEx(const CLPadLayerEx &) = delete;
+ /** Default move constructor */
+ CLPadLayerEx(CLPadLayerEx &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLPadLayerEx &operator=(const CLPadLayerEx &) = delete;
+ /** Default move assignment operator */
+ CLPadLayerEx &operator=(CLPadLayerEx &&) = default;
+
+ /** Initialize the function
+ *
+ * @param[in] input Source tensor. Data types supported: All.
+ * @param[out] output Output tensor. Data type supported: same as @p input
+ * @param[in] padding The padding for each spatial dimension of the input tensor. The pair
+ * padding[i] specifies the front and the end padding in the i-th dimension.
+ * @param[in] constant_value (Optional) Constant value to be used for the padding.
+ * @param[in] mode (Optional) Controls whether the padding should be filled with @p
+ * constant_value using CONSTANT, or reflect the input, either including the border values
+ * (SYMMETRIC) or not (REFLECT).
+ */
+ void configure(ICLTensor *input, ICLTensor *output, const PaddingList &padding,
+ PixelValue constant_value = PixelValue(),
+ PaddingMode mode = PaddingMode::CONSTANT);
+ /** Initialize the function
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: All.
+ * @param[out] output Output tensor. Data type supported: same as @p input
+ * @param[in] padding The padding for each spatial dimension of the input tensor. The
+ * pair padding[i] specifies the front and the end padding in the i-th dimension.
+ * @param[in] constant_value (Optional) Constant value to be used for the padding.
+ * @param[in] mode (Optional) Controls whether the padding should be filled with @p
+ * constant_value using CONSTANT, or reflect the input, either including the border values
+ * (SYMMETRIC) or not (REFLECT).
+ */
+ void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output,
+ const PaddingList &padding, PixelValue constant_value = PixelValue(),
+ PaddingMode mode = PaddingMode::CONSTANT);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * CLPadLayerEx.
+ *
+ * @param[in] input Source tensor info. Data types supported: All.
+ * @param[in] output Output tensor info. Data type supported: same as @p input
+ * @param[in] padding The padding for each spatial dimension of the input tensor. The pair
+ * padding[i] specifies the front and the end padding in the i-th dimension.
+ * @param[in] constant_value (Optional) Constant value to be used for the padding
+ * @param[in] mode (Optional) Controls whether the padding should be filled with @p
+ * constant_value using CONSTANT, or reflect the input, either including the border values
+ * (SYMMETRIC) or not (REFLECT).
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output,
+ const PaddingList &padding, PixelValue constant_value = PixelValue(),
+ PaddingMode mode = PaddingMode::CONSTANT);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ void configure_reflect_mode(ICLTensor *input, ICLTensor *output);
+
+ std::unique_ptr<CLPadLayerKernelEx> _pad_kernel;
+ std::unique_ptr<opencl::kernels::ClCopyKernel> _copy_kernel;
+ bool _perform_pad;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLPADLAYEREX_H */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h
index 7dba84b12..45eb72bef 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h
@@ -48,7 +48,7 @@
#define __ARM_COMPUTE_CLREDUCEOPERATION_H__
#include "arm_compute/core/CL/kernels/CLReduceOperationKernel.h"
-#include "arm_compute/core/TypesEx.h"
+#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
@@ -82,7 +82,7 @@ public:
* @return N/A
*/
void configure(ICLTensor *input, ICLTensor *output, const std::set<uint32_t> &axis,
- bool keep_dims, ReduceOperation op);
+ bool keep_dims, ReductionOperation op);
/**
* @brief Static function to check if given info will lead to a valid configuration of @ref
@@ -96,7 +96,8 @@ public:
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *output,
- const std::set<uint32_t> &axis, bool keep_dims, const ReduceOperation &op);
+ const std::set<uint32_t> &axis, bool keep_dims,
+ const ReductionOperation &op);
/**
* @brief Run the OpenCL kernel for this operation
@@ -115,5 +116,5 @@ private:
std::unique_ptr<CLReduceOperationKernel[]> _reduce_kernels{nullptr};
CLReshapeLayer _reshape;
};
-}
+} // namespace arm_compute
#endif /*__ARM_COMPUTE_CLREDUCEOPERATION_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLSplitVEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLSplitVEx.h
new file mode 100644
index 000000000..3023df3f0
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLSplitVEx.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLSPLITVEX__
+#define __ARM_COMPUTE_CLSPLITVEX__
+
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/CL/functions/CLSlice.h"
+#include "arm_compute/core/Types.h"
+#include <vector>
+#include <memory>
+
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/runtime/CPP/functions/CPPSplit.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLSplitVKernel */
+class CLSplitVEx : public IFunction
+{
+public:
+ /** Default constructor */
+ CLSplitVEx();
+ /** Configure the split CL kernel
+ *
+ * @param[in] input The input tensor to split. Data types supported:
+ * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in] size_splits A 1-D tensor containing the number of tensor values per split
+ * @param[out] outputs A vector containing the output tensor. Data types supported: Same as @p
+ * input
+ * The output tensors should match the input tensor dimensions for all
+ * shape dimensions apart
+ * from the split dimension.
+ * @param[in] split_dim Integer value representing the input tensor dimension along which to
+ * split
+ * @param[in] num_splits Number of splits
+ */
+ void configure(const ICLTensor *input, const ICLTensor *size_splits, uint32_t split_dim,
+ const std::vector<ICLTensor *> &outputs, unsigned int num_splits);
+
+ void run() override;
+
+private:
+ const ICLTensor *_input;
+ const ICLTensor *_size_splits;
+ std::vector<ICLTensor *> _outputs;
+ unsigned int _num_splits;
+ std::vector<CLSlice> _slice_functions;
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_CLSPLITVEX__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h
index e301a5152..f426a4d75 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h
@@ -160,5 +160,5 @@ private:
CLTopKV2Store _store_kernel;
#endif
};
-}
+} // namespace arm_compute
#endif // __ARM_COMPUTE_CLTOPK_V2_H__
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h
index 5fb102e47..5b27d362a 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h
@@ -63,20 +63,22 @@ public:
/** Set the input, weights, biases and output tensors.
*
- * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an
- * optional 4th dimension for batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
- * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type
- * supported: Same as @p input.
- * @param[in] bias (Optional) The biases have one dimension. Data type supported: Same
- * as @p input.
- * @param[out] output Output tensor. The output has the same number of dimensions as the
- * @p input.
- * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this
- * is described in @ref PadStrideInfo.
- * @param[in] invalid_right The number of zeros added to right edge of the output.
- * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
- * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer,
- * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel.
+ * @param[in,out] input Input tensor. 3 lower dimensions represent a single input,
+ * and an optional 4th dimension for batch of inputs.
+ * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+ * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM].
+ * Data type supported: Same as @p input.
+ * @param[in] bias (Optional) The biases have one dimension.
+ * Data type supported: Same as @p input.
+ * @param[out] output Output tensor. The output has the same number of dimensions
+ * as the @p input.
+ * @param[in] deconv_info Contains padding and policies to be used in the deconvolution,
+ * this is described in @ref PadStrideInfo.
+ * @param[in] invalid_right The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
+ * @param[in] weights_info (Optional) Weights information needed for
+ * @ref CLConvolutionLayer, specifies if the weights tensor has
+ * been reshaped with @ref CLWeightsReshapeKernel.
*
*/
void configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output,
@@ -85,22 +87,22 @@ public:
/** Set the input, weights, biases and output tensors.
*
* @param[in] compile_context The compile context to be used.
- * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and
- * an optional 4th dimension for batch of inputs. Data types supported:
- * QASYMM8_SIGNED/QASYMM8/F16/F32.
- * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data
- * type supported: Same as @p input.
- * @param[in] bias (Optional) The biases have one dimension. Data type supported:
- * Same as @p input.
- * @param[out] output Output tensor. The output has the same number of dimensions as
- * the @p input.
+ * @param[in,out] input Input tensor. 3 lower dimensions represent a single input,
+ * and an optional 4th dimension for batch of inputs.
+ * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+ * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM].
+ * Data type supported: Same as @p input.
+ * @param[in] bias (Optional) The biases have one dimension.
+ * Data type supported: Same as @p input.
+ * @param[out] output Output tensor. The output has the same number of dimensions
+ * as the @p input.
* @param[in] deconv_info Contains padding and policies to be used in the deconvolution,
- * this is described in @ref PadStrideInfo.
- * @param[in] invalid_right The number of zeros added to right edge of the output.
- * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
- * @param[in] weights_info (Optional) Weights information needed for @ref
- * CLConvolutionLayer, specifies if the weights tensor has been reshaped with @ref
- * CLWeightsReshapeKernel.
+ * this is described in @ref PadStrideInfo.
+ * @param[in] invalid_right The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
+ * @param[in] weights_info (Optional) Weights information needed for
+ * @ref CLConvolutionLayer, specifies if the weights tensor has
+ * been reshaped with @ref CLWeightsReshapeKernel.
*
*/
void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *weights,
@@ -108,22 +110,24 @@ public:
unsigned int invalid_right, unsigned int invalid_bottom,
const WeightsInfo &weights_info = WeightsInfo());
/** Static function to check if given info will lead to a valid configuration of @ref
- * CLTransposeConvLayer
+ * CLTransposeConvLayer
*
- * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an
- * optional 4th dimension for batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
- * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data
- * type supported: Same as @p input.
- * @param[in] bias (Optional) The biases have one dimension. Data type supported: Same as
- * @p input.
- * @param[in] output Output tensor info. The output has the same number of dimensions as the
- * @p input.
- * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this is
- * described in @ref PadStrideInfo.
- * @param[in] invalid_right The number of zeros added to right edge of the output.
- * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
- * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer,
- * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel.
+ * @param[in] input Input tensor info. 3 lower dimensions represent a single input,
+ * and an optional 4th dimension for batch of inputs.
+ * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+ * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM].
+ * Data type supported: Same as @p input.
+ * @param[in] bias (Optional) The biases have one dimension.
+ * Data type supported: Same as @p input.
+ * @param[in] output Output tensor info. The output has the same number of dimensions
+ * as the @p input.
+ * @param[in] deconv_info Contains padding and policies to be used in the deconvolution,
+ * this is described in @ref PadStrideInfo.
+ * @param[in] invalid_right The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
+ * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer,
+ * specifies if the weights tensor has been reshaped with
+ * @ref CLWeightsReshapeKernel.
*
* @return a status
*/
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h
index 3fad230f1..d0ddc2609 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h
@@ -16,13 +16,13 @@
#ifndef __ARM_COMPUTE_NEFUNCTIONSEX_H__
#define __ARM_COMPUTE_NEFUNCTIONSEX_H__
-#include <arm_compute/runtime/NEON/functions/NEActivationLayerEx.h>
-#include <arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h>
+#include <arm_compute/runtime/NEON/functions/NECastBool.h>
#include <arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h>
#include <arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h>
#include <arm_compute/runtime/NEON/functions/NEGatherEx.h>
#include <arm_compute/runtime/NEON/functions/NEHashtableLookup.h>
#include <arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayerEx.h>
+#include <arm_compute/runtime/NEON/functions/NEOneHot.h>
#include <arm_compute/runtime/NEON/functions/NEReduceSum.h>
#include <arm_compute/runtime/NEON/functions/NEReduceOperation.h>
#include <arm_compute/runtime/NEON/functions/NETransposeConvLayer.h>
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEActivationLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEActivationLayerEx.h
deleted file mode 100644
index 6156c84f8..000000000
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEActivationLayerEx.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_NEACTIVATIONLAYEREX_H__
-#define __ARM_COMPUTE_NEACTIVATIONLAYEREX_H__
-
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Basic function to run @ref NEActivationLayerKernelEx
- *
- * @note The function simulates an activation layer with the specified activation function.
- */
-class NEActivationLayerEx : public INESimpleFunctionNoBorder
-{
-public:
- /** Constructor
- *
- * @param[in] ctx Runtime context to be used by the function
- */
- NEActivationLayerEx(IRuntimeContext *ctx = nullptr);
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEActivationLayerEx(const NEActivationLayerEx &) = delete;
- /** Default move constructor */
- NEActivationLayerEx(NEActivationLayerEx &&) = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEActivationLayerEx &operator=(const NEActivationLayerEx &) = delete;
- /** Default move assignment operator */
- NEActivationLayerEx &operator=(NEActivationLayerEx &&) = default;
- /** [NEActivationLayerEx snippet] **/
- /** Set the input and output tensor.
- *
- * @note If the output tensor is a nullptr or is equal to the input, the activation function will
- * be performed in-place
- *
- * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this
- * tensor will store the result
- * of the activation function. Data types supported:
- * QASYMM8/QSYMM16/F16/F32.
- * @param[out] output Destination tensor. Data type supported: same as @p input
- * @param[in] activation_info Activation layer parameters.
- */
- void configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info);
- /** [NEActivationLayerEx snippet] **/
- /** Static function to check if given info will lead to a valid configuration of @ref
- * NEActivationLayerEx
- *
- * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor
- * will store the result
- * of the activation function. Data types supported: QASYMM8/QSYMM16/F16/F32.
- * @param[in] output Destination tensor info. Data type supported: same as @p input
- * @param[in] act_info Activation layer information.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output,
- const ActivationLayerInfo &act_info);
-};
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_NEACTIVATIONLAYEREX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h
index 026d30098..8d931f08d 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h
@@ -41,8 +41,10 @@
#ifndef __ARM_COMPUTE_NEBINARYLOGICALOPERATION_H__
#define __ARM_COMPUTE_NEBINARYLOGICALOPERATION_H__
+#include "arm_compute/core/Error.h"
#include "arm_compute/core/TypesEx.h"
#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/core/ITensorInfo.h"
namespace arm_compute
{
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReductionOperationEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NECastBool.h
index 1693922b7..dd62645ee 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReductionOperationEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NECastBool.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -15,7 +15,7 @@
*/
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -37,63 +37,41 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+#ifndef __ARM_COMPUTE_NECASTBOOL_H__
+#define __ARM_COMPUTE_NECASTBOOL_H__
-#ifndef __ARM_COMPUTE_NEREDUCTIONOPERATIONEX_H__
-#define __ARM_COMPUTE_NEREDUCTIONOPERATIONEX_H__
-
-#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NEReductionOperationKernelEx.h"
-#include "arm_compute/core/TypesEx.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
-/** Basic function to simulate a reduction operation. This function calls the following NEON
- * kernels:
- *
- * -# @ref NEFillBorderKernel
- * -# @ref NEReductionOperationKernelEx
- *
+/**
+ * @brief Class to run @ref INESimpleFunctionNoBorder.
*/
-class NEReductionOperationEx : public IFunction
+class NECastBool : public INESimpleFunctionNoBorder
{
public:
- /** Default constructor */
- NEReductionOperationEx();
- /** Set the input and output tensors.
+ /** Initialize the function's source, destination
+ *
+ * Valid conversions Input -> Output :
*
- * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32.
- * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input.
- * @param[in] axis Dimension along which to reduce.
- * @param[in] op Reduction operation to perform.
+ * - U8 -> U8, S8, U16, S16, U32, S32, F32, F16
+ *
+ * @param[in] input The input tensor to convert. Data types supported: U8
+ * @param[out] output The output tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
*/
- void configure(ITensor *input, ITensor *output, unsigned int axis, ReduceOperation op);
-
- /** Static function to check if given info will lead to a valid configuration of @ref
- * NEReductionOperationEx.
+ void configure(const ITensor *input, ITensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref NECastBool
*
- * @param[in] input Source tensor info. Data type supported: QASYMM8/F16/F32.
- * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p
- * input.
- * @param[in] axis Dimension along which to reduce.
- * @param[in] op Reduction operation to perform.
+ * @param[in] input Source tensor info. Data types supported: U8.
+ * @param[in] output Destination tensor info. Data type supported: U8/S8/U16/S16/U32/S32/F16/F32.
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis,
- ReduceOperation op);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- NEReductionOperationKernelEx _reduction_kernel;
- NEFillBorderKernel _fill_border_kernel;
- size_t _window_split;
- int _reduction_axis;
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output);
};
} // namespace arm_compute
-#endif /* __ARM_COMPUTE_NEREDUCTIONOPERATIONEX_H__ */
+#endif /*__ARM_COMPUTE_NECASTBOOL_H__*/
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h
index 63f7714aa..82a789e86 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h
@@ -48,12 +48,14 @@
#define __ARM_COMPUTE_NEEMBEDDINGLOOKUP_H__
#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+#include "arm_compute/core/Error.h"
#include <vector>
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
/**
* @brief Class to perform EmbeddingLookup operation
@@ -84,5 +86,5 @@ public:
static Status validate(const ITensorInfo *input, const ITensorInfo *output,
const ITensorInfo *lookups);
};
-}
+} // namespace arm_compute
#endif /*__ARM_COMPUTE_NEEMBEDDINGLOOKUP_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h
index 56548a479..214592710 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h
@@ -44,11 +44,11 @@
#include "arm_compute/core/NEON/kernels/NEQuantizationSymmetricKernel.h"
#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h"
#include "arm_compute/core/NEON/kernels/NEMuliplyScaleFactorKernel.h"
-#include "arm_compute/core/NEON/kernels/NETransposeKernel.h"
#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h"
#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
#include "arm_compute/runtime/Tensor.h"
+#include "src/core/NEON/kernels/NETransposeKernel.h"
namespace arm_compute
{
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedLayerEx.h
index 8f98f220a..2bbb1fea1 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedLayerEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedLayerEx.h
@@ -43,16 +43,16 @@
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h"
-#include "arm_compute/core/NEON/kernels/NETransposeKernel.h"
-#include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/NEON/functions/NEFlattenLayer.h"
#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h"
#include "arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h"
#include "arm_compute/runtime/NEON/functions/NEGEMM.h"
#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h"
#include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h"
+#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h"
+#include "src/core/NEON/kernels/NETransposeKernel.h"
namespace arm_compute
{
@@ -79,11 +79,11 @@ public:
/** Prevent instances of this class from being copied (As this class contains pointers) */
NEFullyConnectedLayerEx(const NEFullyConnectedLayerEx &) = delete;
/** Default move constructor */
- NEFullyConnectedLayerEx(NEFullyConnectedLayerEx &&) = default;
+ NEFullyConnectedLayerEx(NEFullyConnectedLayerEx &&) = delete;
/** Prevent instances of this class from being copied (As this class contains pointers) */
NEFullyConnectedLayerEx &operator=(const NEFullyConnectedLayerEx &) = delete;
/** Default move assignment operator */
- NEFullyConnectedLayerEx &operator=(NEFullyConnectedLayerEx &&) = default;
+ NEFullyConnectedLayerEx &operator=(NEFullyConnectedLayerEx &&) = delete;
/** Set the input and output tensors.
*
* @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32.
@@ -141,7 +141,7 @@ private:
void configure_mm(const ITensor *input, const ITensor *weights, ITensor *output);
MemoryGroup _memory_group;
- NEFlattenLayerKernel _flatten_kernel;
+ NEFlattenLayer _flatten_kernel;
NEConvertFullyConnectedWeights _convert_weights;
NEFullyConnectedLayerReshapeWeights _reshape_weights_function;
NEGEMM _mm_gemm;
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h
index 18cb61bf9..e34b4dcb0 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h
@@ -43,8 +43,8 @@ public:
public:
NEFullyConnectedReshapingLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr)
- : _memory_manager{memory_manager}, _input(nullptr), _weights(nullptr), _biases(nullptr),
- _output(nullptr), _neon_buffer{}, _neon_fc{nullptr}, _neon_reshape{}, _needs_reshape(false)
+ : _memory_manager{memory_manager}, _input(nullptr), _weights(nullptr), _biases(nullptr),
+ _output(nullptr), _neon_buffer{}, _neon_fc{nullptr}, _neon_reshape{}, _needs_reshape(false)
{
// DO NOTHING
}
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEGatherEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEGatherEx.h
index 155a1b837..6944c77f6 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEGatherEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEGatherEx.h
@@ -47,6 +47,7 @@
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
/** Basic function to run @ref NEGatherKernelEx */
class NEGatherEx : public INESimpleFunctionNoBorder
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEHashtableLookup.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEHashtableLookup.h
index 521a05ad9..f6fda60a9 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEHashtableLookup.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEHashtableLookup.h
@@ -48,12 +48,14 @@
#define __ARM_COMPUTE_NEHASHTABLELOOKUP_H__
#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+#include "arm_compute/core/Error.h"
#include <vector>
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
/**
* @brief Class to perform HashtableLookup operation
@@ -96,5 +98,5 @@ public:
const ITensorInfo *input, const ITensorInfo *output,
const ITensorInfo *hits);
};
-}
+} // namespace arm_compute
#endif /*__ARM_COMPUTE_NEHASHTABLELOOKUP_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayerEx.h
index 18e813923..0ee967698 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayerEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayerEx.h
@@ -54,6 +54,7 @@
namespace arm_compute
{
class ITensor;
+class ITensorInfo;
/** Basic function to perform a Instance normalization.
*
@@ -112,5 +113,5 @@ private:
Tensor _permuted_input;
Tensor _permuted_output;
};
-}
+} // namespace arm_compute
#endif /* __ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYEREX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEOneHot.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEOneHot.h
new file mode 100644
index 000000000..668f024a1
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEOneHot.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2019 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NEONEHOT_H__
+#define __ARM_COMPUTE_NEONEHOT_H__
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+namespace arm_compute
+{
+// Forward declarations
+class ITensor;
+class ITensorInfo;
+
+/** Basic function to run @ref NEOneHotKernel */
+class NEOneHot : public INESimpleFunctionNoBorder
+{
+public:
+ /** Initialise the kernel's inputs and outputs
+ *
+ * @param[in] indices Indices tensor. Supported tensor rank: up to 3. Must be one of the
+ * following types: U32/S32
+ * @param[in] depth The tensor for depth of the one hot dimension. Supported tensor rank: up
+ * to 3. Must be one of the following types: U32/S32
+ * @param[in] on_value On value tensor. Supported tensor rank: only 1. Data type supported:
+ * U8/S8/U16/S16/F16/U32/S32/F32
+ * @param[in] off_value Off value tensor. Supported tensor rank: only 1. Data type supported:
+ * Same as @p on_value
+ * @param[out] output Destination tensor. Data type supported: Same as @p on_value
+ * @param[in] axis (Optional) The axis to fill. Negative values wrap around. Defaults to -1.
+ * The value must be in range [-indices.rank , indices.rank)
+ */
+ void configure(const ITensor *indices, const ITensor *depth, const ITensor *on_value,
+ const ITensor *off_value, ITensor *output, int axis = -1);
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * NEOneHotKernel
+ *
+ * @param[in] indices Indices tensor info. Supported tensor rank: up to 3.
+ * Must be one of the following types: U32/S32
+ * @param[in] depth The tensor info for depth of the one hot dimension.
+ * Supported tensor rank: up to 3.
+ * Must be one of the following types: U32/S32
+ * @param[in] on_value On value tensor info. Supported tensor rank: only 1.
+ * Data type supported: U8/S8/U16/S16/F16/U32/S32/F32
+ * @param[in] off_value Off value tensor info. Supported tensor rank: only 1.
+ * Data type supported: Same as @p on_value
+ * @param[out] output Destination tensor info. Data type supported: Same as @p on_value
+ * @param[in] axis (Optional) The axis to fill. Negative values wrap around. Defaults to -1.
+ * The value must be in range [-indices.rank , indices.rank)
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *indices, const ITensorInfo *depth,
+ const ITensorInfo *on_value, const ITensorInfo *off_value,
+ const ITensorInfo *output, int axis = -1);
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_NEONEHOT_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceOperation.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceOperation.h
index 7f764b000..9858e6c09 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceOperation.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceOperation.h
@@ -43,10 +43,10 @@
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/TypesEx.h"
+#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/functions/NEReductionOperationEx.h"
+#include "arm_compute/runtime/NEON/functions/NEReductionOperation.h"
#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h"
#include "arm_compute/runtime/Tensor.h"
@@ -71,7 +71,7 @@ public:
* @param[in] op Reduce operation to perform.
*/
void configure(ITensor *input, const Coordinates &reduction_axis, bool keep_dims, ITensor *output,
- ReduceOperation op);
+ ReductionOperation op);
/** Static function to check if given info will lead to a valid configuration of @ref
* NEReduceOperation
@@ -85,14 +85,14 @@ public:
* @return A status
*/
static Status validate(const ITensorInfo *input, const Coordinates &reduction_axis,
- bool keep_dims, const ITensorInfo *output, ReduceOperation op);
+ bool keep_dims, const ITensorInfo *output, ReductionOperation op);
// Inherited methods overridden:
void run() override;
private:
MemoryGroup _memory_group;
- std::vector<NEReductionOperationEx> _reduction_kernels;
+ std::vector<NEReductionOperation> _reduction_kernels;
std::vector<Tensor> _reduced_outs;
NEReshapeLayer _reshape;
unsigned int _reduction_ops;
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceSum.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceSum.h
index 48b416923..f34a8f8af 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceSum.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceSum.h
@@ -43,11 +43,13 @@
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/NEON/functions/NEDequantizationLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEQuantizationLayer.h"
#include "arm_compute/runtime/NEON/functions/NEReductionOperation.h"
#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h"
+#include "arm_compute/runtime/Tensor.h"
namespace arm_compute
{
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h
index 24ff5dac9..f82579a45 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h
@@ -102,47 +102,50 @@ public:
/** Prevent instances of this class from being copied (As this class contains pointers) */
NETransposeConvLayer &operator=(const NETransposeConvLayer &) = delete;
/** Allow instances of this class to be moved */
- NETransposeConvLayer(NETransposeConvLayer &&) = default;
+ NETransposeConvLayer(NETransposeConvLayer &&) = delete;
/** Allow instances of this class to be moved */
- NETransposeConvLayer &operator=(NETransposeConvLayer &&) = default;
+ NETransposeConvLayer &operator=(NETransposeConvLayer &&) = delete;
/** Default destructor */
virtual ~NETransposeConvLayer() = default;
/** Set the input, weights, biases and output tensors.
*
- * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an
- * optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED.
- * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type
- * supported: Same as @p input.
- * @param[in] bias Optional, ignored if NULL. The biases have one dimension. Data type
- * supported: Data types supported: S32 for QASYMM8 and QASYMM8_SIGNED input, F32 for F32 input, F16
- * for F16 input.
- * @param[out] output Output tensor. The output has the same number of dimensions as the @p
- * input.
- * @param[in] info Contains padding and policies to be used in the deconvolution, this is
- * decribed in @ref PadStrideInfo.
- * @param[in] invalid_right The number of zeros added to right edge of the output.
- * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
+ * @param[in,out] input Input tensor. 3 lower dimensions represent a single input,
+ * and an optional 4th dimension for batch of inputs.
+ * Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED.
+ * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM].
+ * Data type supported: Same as @p input.
+ * @param[in] bias Optional, ignored if NULL. The biases have one dimension.
+ * Data type supported: Data types supported: S32 for QASYMM8 and
+ * QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input.
+ * @param[out] output Output tensor. The output has the same number of dimensions as
+ * the @p input.
+ * @param[in] info Contains padding and policies to be used in the deconvolution,
+ * this is decribed in @ref PadStrideInfo.
+ * @param[in] invalid_right The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
*
*/
void configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output,
const PadStrideInfo &info, unsigned int invalid_right,
unsigned int invalid_bottom);
/** Static function to check if given info will lead to a valid configuration of @ref
- * NETransposeConvLayer
+ * NETransposeConvLayer
*
- * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an
- * optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED.
- * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data type
- * supported: Same as @p input.
- * @param[in] bias (Optional) The biases have one dimension. Data type supported: Data types
- * supported: S32 for QASYMM8 and QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input.
- * @param[in] output Output tensor info. The output has the same number of dimensions as the @p
- * input.
- * @param[in] info Contains padding and policies to be used in the deconvolution, this is
- * decribed in @ref PadStrideInfo.
- * @param[in] innvalid_right The number of zeros added to right edge of the output.
- * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
+ * @param[in] input Input tensor info. 3 lower dimensions represent a single input,
+ * and an optional 4th dimension for batch of inputs.
+ * Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED.
+ * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM].
+ * Data type supported: Same as @p input.
+ * @param[in] bias (Optional) The biases have one dimension.
+ * Data types supported: S32 for QASYMM8 and QASYMM8_SIGNED input,
+ * F32 for F32 input, F16 for F16 input.
+ * @param[in] output Output tensor info. The output has the same number of dimensions as
+ * the @p input.
+ * @param[in] info Contains padding and policies to be used in the deconvolution,
+ * this is decribed in @ref PadStrideInfo.
+ * @param[in] innvalid_right The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
*
* @return a status
*/
@@ -168,5 +171,5 @@ private:
PadStrideInfo _info;
bool _is_prepared;
};
-} // arm_compute
+} // namespace arm_compute
#endif /* __ARM_COMPUTE_NETRANSPOSECONVLAYER_H__ */