summaryrefslogtreecommitdiff
path: root/compute/ARMComputeEx
diff options
context:
space:
mode:
authorChunseok Lee <chunseok.lee@samsung.com>2020-12-14 14:43:43 +0900
committerChunseok Lee <chunseok.lee@samsung.com>2020-12-14 14:43:43 +0900
commit62529acabbafce7730601ed01d5709d7bc0d378a (patch)
treebf6912cfa8fac4a2997292bfcb3c82055734c97e /compute/ARMComputeEx
parent6ea13af5257155ff993c205cf997b870cc627f73 (diff)
downloadnnfw-62529acabbafce7730601ed01d5709d7bc0d378a.tar.gz
nnfw-62529acabbafce7730601ed01d5709d7bc0d378a.tar.bz2
nnfw-62529acabbafce7730601ed01d5709d7bc0d378a.zip
Imported Upstream version 1.12.0upstream/1.12.0
Diffstat (limited to 'compute/ARMComputeEx')
-rw-r--r--compute/ARMComputeEx/arm_compute/core/CL/CLKernelLibraryEx.h10
-rw-r--r--compute/ARMComputeEx/arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h4
-rw-r--r--compute/ARMComputeEx/arm_compute/core/CL/kernels/CLHashtableLookupKernel.h4
-rw-r--r--compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEOneHotKernel.h49
-rw-r--r--compute/ARMComputeEx/arm_compute/core/utils/misc/ShapeCalculatorEx.h14
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h95
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h2
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h4
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGatherEx.h2
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h92
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h4
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEOneHot.h25
-rw-r--r--compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h55
-rw-r--r--compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp182
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/arg_min_max_ex.cl39
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/binary_logical_op.cl4
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/embedding_lookup.cl12
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/gemmlowp_ex.cl10
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/hashtable_lookup.cl12
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/helpers.h22
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/helpers_asymm.h244
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/instance_normalization_ex.cl20
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/multiply_scale_factor.cl4
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/one_hot.cl8
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_mul_quantized.cl2
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/quantization_symm8.cl10
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/reduce_operation.cl18
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLArgMinMaxLayerKernelEx.cpp39
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLBinaryLogicalOpKernel.cpp34
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLCastBoolKernel.cpp2
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLEmbeddingLookupKernel.cpp14
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLGatherExKernel.cpp18
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLHashtableLookupKernel.cpp18
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.cpp8
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLMultiplyScaleFactorKernel.cpp18
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLNegKernel.cpp4
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLOneHotKernel.cpp20
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLQuantizationSymmetricKernel.cpp18
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLReduceOperationKernel.cpp2
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLScaleFactorSymm8Kernel.cpp8
-rw-r--r--compute/ARMComputeEx/src/core/NEON/NEElementwiseOperationFuncs.cpp87
-rw-r--r--compute/ARMComputeEx/src/core/NEON/kernels/NEBinaryLogicalOperationKernel.cpp20
-rw-r--r--compute/ARMComputeEx/src/core/NEON/kernels/NECastBoolKernel.cpp339
-rw-r--r--compute/ARMComputeEx/src/core/NEON/kernels/NEEmbeddingLookupKernel.cpp27
-rw-r--r--compute/ARMComputeEx/src/core/NEON/kernels/NEGatherKernelEx.cpp134
-rw-r--r--compute/ARMComputeEx/src/core/NEON/kernels/NEHashtableLookupKernel.cpp48
-rw-r--r--compute/ARMComputeEx/src/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.cpp214
-rw-r--r--compute/ARMComputeEx/src/core/NEON/kernels/NEMultiplyScaleFactorKernel.cpp48
-rw-r--r--compute/ARMComputeEx/src/core/NEON/kernels/NEOneHotKernel.cpp70
-rw-r--r--compute/ARMComputeEx/src/core/NEON/kernels/NEQuantizationSymmetricKernel.cpp92
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLArgMinMaxLayerEx.cpp58
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLDirectTransposeConvLayer.cpp58
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp59
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp86
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp5
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp8
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLSplitVEx.cpp6
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp16
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp14
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp53
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp72
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedReshapingLayer.cpp2
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEInstanceNormalizationLayerEx.cpp10
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp6
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceSum.cpp8
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp58
66 files changed, 1374 insertions, 1374 deletions
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/CLKernelLibraryEx.h b/compute/ARMComputeEx/arm_compute/core/CL/CLKernelLibraryEx.h
index d29886a9d..4a3717885 100644
--- a/compute/ARMComputeEx/arm_compute/core/CL/CLKernelLibraryEx.h
+++ b/compute/ARMComputeEx/arm_compute/core/CL/CLKernelLibraryEx.h
@@ -255,14 +255,14 @@ private:
cl::Device _device; /**< Underlying CL device. */
std::string _kernel_path; /**< Path to the kernels folder. */
mutable std::map<std::string, const Program>
- _programs_map; /**< Map with all already loaded program data. */
+ _programs_map; /**< Map with all already loaded program data. */
mutable std::map<std::string, cl::Program>
- _built_programs_map; /**< Map with all already built program data. */
+ _built_programs_map; /**< Map with all already built program data. */
static const std::map<std::string, std::string>
- _kernel_program_map; /**< Map that associates kernel names with programs. */
+ _kernel_program_map; /**< Map that associates kernel names with programs. */
static const std::map<std::string, std::string>
- _program_source_map; /**< Contains sources for all programs.
- Used for compile-time kernel inclusion. >*/
+ _program_source_map; /**< Contains sources for all programs.
+ Used for compile-time kernel inclusion. >*/
};
}
#endif /* __ARM_COMPUTE_CLKERNELLIBRARY_EX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h
index a614d5259..fb689f747 100644
--- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h
@@ -54,8 +54,8 @@ namespace arm_compute
class ICLTensor;
/**
-* @brief Class to perform EmbeddingLookup operation with opencl kernel
-*/
+ * @brief Class to perform EmbeddingLookup operation with opencl kernel
+ */
class CLEmbeddingLookupKernel : public ICLKernel
{
public:
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLHashtableLookupKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLHashtableLookupKernel.h
index 99cfa61ec..96f830898 100644
--- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLHashtableLookupKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLHashtableLookupKernel.h
@@ -55,8 +55,8 @@ namespace arm_compute
class ICLTensor;
/**
-* @brief Class to perform HashtableLookup operation with opencl kernel
-*/
+ * @brief Class to perform HashtableLookup operation with opencl kernel
+ */
class CLHashtableLookupKernel : public ICLKernel
{
public:
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEOneHotKernel.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEOneHotKernel.h
index 99bb351bc..963d7b821 100644
--- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEOneHotKernel.h
+++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEOneHotKernel.h
@@ -68,34 +68,37 @@ public:
const char *name() const override { return "NEOneHotKernel"; }
/** Initialise the kernel's inputs and outputs
*
- * @param[in] indices Indices tensor. Supported tensor rank: up to 3. Must be one of the
- * following types: U32/S32
- * @param[in] depth The tensor for depth of the one hot dimension. Supported tensor rank: up to
- * 3. Must be one of the following types: U32/S32
- * @param[in] on_value On value tensor. Supported tensor rank: only 1. Data type supported:
- * U8/S8/U16/S16/F16/U32/S32/F32
- * @param[in] off_value Off value tensor. Supported tensor rank: only 1. Data type supported: Same
- * as @p on_value
- * @param[out] output Destination tensor. Data type supported: Same as @p on_value
- * @param[in] axis (Optional) The axis to fill. Negative values wrap around. Defaults to -1.
- * The value must be in range [-indices.rank , indices.rank)
+ * @param[in] indices Indices tensor. Supported tensor rank: up to 3. Must be one of the
+ * following types: U32/S32
+ * @param[in] depth The tensor for depth of the one hot dimension.
+ * Supported tensor rank: up to 3.
+ * Must be one of the following types: U32/S32
+ * @param[in] on_value On value tensor. Supported tensor rank: only 1.
+ * Data type supported: U8/S8/U16/S16/F16/U32/S32/F32
+ * @param[in] off_value Off value tensor. Supported tensor rank: only 1.
+ * Data type supported: Same as @p on_value
+ * @param[out] output Destination tensor. Data type supported: Same as @p on_value
+ * @param[in] axis (Optional) The axis to fill. Negative values wrap around.
+ * Defaults to -1.
+ * The value must be in range [-indices.rank , indices.rank)
*/
void configure(const ITensor *indices, const ITensor *depth, const ITensor *on_value,
const ITensor *off_value, ITensor *output, int axis = -1);
/** Static function to check if given info will lead to a valid configuration of @ref
- * NEOneHotKernel
+ * NEOneHotKernel
*
- * @param[in] indices Indices tensor info. Supported tensor rank: up to 3. Must be one of the
- * following types: U32/S32
- * @param[in] depth The tensor info for depth of the one hot dimension. Supported tensor rank:
- * up to 3. Must be one of the following types: U32/S32
- * @param[in] on_value On value tensor info. Supported tensor rank: only 1. Data type supported:
- * U8/S8/U16/S16/F16/U32/S32/F32
- * @param[in] off_value Off value tensor info. Supported tensor rank: only 1. Data type supported:
- * Same as @p on_value
- * @param[out] output Destination tensor info. Data type supported: Same as @p on_value
- * @param[in] axis (Optional) The axis to fill. Negative values wrap around. Defaults to -1.
- * The value must be in range [-indices.rank , indices.rank)
+ * @param[in] indices Indices tensor info. Supported tensor rank: up to 3.
+ * Must be one of the following types: U32/S32
+ * @param[in] depth The tensor info for depth of the one hot dimension.
+ * Supported tensor rank: up to 3.
+ * Must be one of the following types: U32/S32
+ * @param[in] on_value On value tensor info. Supported tensor rank: only 1.
+ * Data type supported: U8/S8/U16/S16/F16/U32/S32/F32
+ * @param[in] off_value Off value tensor info. Supported tensor rank: only 1.
+ * Data type supported: Same as @p on_value
+ * @param[out] output Destination tensor info. Data type supported: Same as @p on_value
+ * @param[in] axis (Optional) The axis to fill. Negative values wrap around. Defaults to -1.
+ * The value must be in range [-indices.rank , indices.rank)
*
* @return a status
*/
diff --git a/compute/ARMComputeEx/arm_compute/core/utils/misc/ShapeCalculatorEx.h b/compute/ARMComputeEx/arm_compute/core/utils/misc/ShapeCalculatorEx.h
index 1e69f0912..2aaab6b3a 100644
--- a/compute/ARMComputeEx/arm_compute/core/utils/misc/ShapeCalculatorEx.h
+++ b/compute/ARMComputeEx/arm_compute/core/utils/misc/ShapeCalculatorEx.h
@@ -72,10 +72,10 @@ namespace shape_calculator
* @return the calculated shape
*/
inline TensorShape compute_transposeconv_upsampled_shape(
- const ITensorInfo &input, const ITensorInfo &weights, const PadStrideInfo &info,
- std::pair<unsigned int, unsigned int> &out_dims, unsigned int invalid_right,
- unsigned int invalid_bottom, unsigned int &pad_left, unsigned int &pad_right,
- unsigned int &pad_top, unsigned int &pad_bottom)
+ const ITensorInfo &input, const ITensorInfo &weights, const PadStrideInfo &info,
+ std::pair<unsigned int, unsigned int> &out_dims, unsigned int invalid_right,
+ unsigned int invalid_bottom, unsigned int &pad_left, unsigned int &pad_right,
+ unsigned int &pad_top, unsigned int &pad_bottom)
{
unsigned int sx = info.stride().first;
unsigned int sy = info.stride().second;
@@ -103,7 +103,7 @@ inline TensorShape compute_transposeconv_upsampled_shape(
unsigned int padx_all_except_invallid = padx + info.pad_left() + info.pad_right() - invalid_right;
unsigned int pady_all_except_invallid =
- pady + info.pad_top() + info.pad_bottom() - invalid_bottom;
+ pady + info.pad_top() + info.pad_bottom() - invalid_bottom;
pad_left = (padx_all_except_invallid + 1) / 2 - info.pad_left();
pad_right = pady_all_except_invallid / 2 - info.pad_right() + invalid_right;
pad_top = (padx_all_except_invallid + 1) / 2 - info.pad_top();
@@ -135,7 +135,7 @@ compute_transposeconv_output_shape(const std::pair<unsigned int, unsigned int> &
const int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
const int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
const int channel_idx =
- get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
+ get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
const int batch_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES);
TensorShape out_shape{input_shape};
@@ -160,7 +160,7 @@ inline TensorShape compute_depth_to_space_shape_ex(const ITensorInfo *input, int
const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
const int idx_channel =
- get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
+ get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
TensorShape output_shape{input->tensor_shape()};
output_shape.set(idx_width, input->dimension(idx_width) * block);
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h
index 409eaf593..026209f69 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h
@@ -106,22 +106,24 @@ public:
CLDirectTransposeConvLayer &operator=(CLDirectTransposeConvLayer &&) = default;
/** Set the input, weights, biases and output tensors.
*
- * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an
- * optional 4th dimension for batch of inputs.
- * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
- * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type
- * supported: Same as @p input.
- * @param[in] bias (Optional) The biases have one dimension.
- * Data type supported: Should match @p input data type, except for
- * input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type
- * @param[out] output Output tensor. The output has the same number of dimensions as the
- * @p input.
- * @param[in] info Contains padding and policies to be used in the deconvolution, this
- * is decribed in @ref PadStrideInfo.
- * @param[in] invalid_right The number of zeros added to right edge of the output.
- * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
- * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer,
- * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel.
+ * @param[in,out] input Input tensor. 3 lower dimensions represent a single input,
+ * and an optional 4th dimension for batch of inputs.
+ * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+ * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM].
+ * Data type supported: Same as @p input.
+ * @param[in] bias (Optional) The biases have one dimension.
+ * Data type supported: Should match @p input data type,
+ * except for input of QASYMM8 and QASYMM8_SIGNED type
+ * where biases should be of S32 type
+ * @param[out] output Output tensor.
+ * The output has the same number of dimensions as the @p input.
+ * @param[in] info Contains padding and policies to be used in the deconvolution,
+ * this is decribed in @ref PadStrideInfo.
+ * @param[in] invalid_right The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
+ * @param[in] weights_info (Optional) Weights information needed for
+ * @ref CLConvolutionLayer, specifies if the weights tensor has been reshaped with
+ * @ref CLWeightsReshapeKernel.
*
*/
void configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output,
@@ -130,23 +132,24 @@ public:
/** Set the input, weights, biases and output tensors.
*
* @param[in] compile_context The compile context to be used.
- * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and
- * an optional 4th dimension for batch of inputs.
+ * @param[in,out] input Input tensor. 3 lower dimensions represent a single input,
+ * and an optional 4th dimension for batch of inputs.
* Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
- * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data
- * type supported: Same as @p input.
+ * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM].
+ * Data type supported: Same as @p input.
* @param[in] bias (Optional) The biases have one dimension.
* Data type supported: Should match @p input data type, except for
- * input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type
+ * input of QASYMM8 and QASYMM8_SIGNED type
+ * where biases should be of S32 type
* @param[out] output Output tensor. The output has the same number of dimensions as
- * the @p input.
+ * the @p input.
* @param[in] info Contains padding and policies to be used in the deconvolution,
- * this is decribed in @ref PadStrideInfo.
- * @param[in] invalid_right The number of zeros added to right edge of the output.
- * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
- * @param[in] weights_info (Optional) Weights information needed for @ref
- * CLConvolutionLayer, specifies if the weights tensor has been reshaped with @ref
- * CLWeightsReshapeKernel.
+ * this is decribed in @ref PadStrideInfo.
+ * @param[in] invalid_right The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
+ * @param[in] weights_info (Optional) Weights information needed for
+ * @ref CLConvolutionLayer, specifies if the weights tensor has
+ * been reshaped with @ref CLWeightsReshapeKernel.
*
*/
void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *weights,
@@ -154,24 +157,26 @@ public:
unsigned int invalid_right, unsigned int invalid_bottom,
const WeightsInfo &weights_info = WeightsInfo());
/** Static function to check if given info will lead to a valid configuration of @ref
- * CLDirectTransposeConvLayer
+ * CLDirectTransposeConvLayer
*
- * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an
- * optional 4th dimension for batch of inputs.
- * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
- * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data
- * type supported: Same as @p input.
- * @param[in] bias (Optional) The biases have one dimension.
- * Data type supported: Should match @p input data type, except for input
- * of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type
- * @param[in] output Output tensor info. The output has the same number of dimensions as the
- * @p input.
- * @param[in] info Contains padding and policies to be used in the deconvolution, this is
- * decribed in @ref PadStrideInfo.
- * @param[in] invalid_right The number of zeros added to right edge of the output.
- * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
- * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer,
- * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel.
+ * @param[in] input Input tensor info. 3 lower dimensions represent a single input,
+ * and an optional 4th dimension for batch of inputs.
+ * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+ * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM].
+ * Data type supported: Same as @p input.
+ * @param[in] bias (Optional) The biases have one dimension.
+ * Data type supported: Should match @p input data type,
+ * except for input of QASYMM8 and QASYMM8_SIGNED type
+ * where biases should be of S32 type
+ * @param[in] output Output tensor info. The output has the same number of dimensions
+ * as the @p input.
+ * @param[in] info Contains padding and policies to be used in the deconvolution,
+ * this is decribed in @ref PadStrideInfo.
+ * @param[in] invalid_right The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
+ * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer,
+ * specifies if the weights tensor has been reshaped
+ * with @ref CLWeightsReshapeKernel.
*
* @return a status
*/
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h
index e65a646dc..f27e9913e 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h
@@ -216,7 +216,7 @@ private:
CLConvertFullyConnectedWeights _convert_weights;
weights_transformations::CLConvertFullyConnectedWeightsManaged _convert_weights_managed;
weights_transformations::CLFullyConnectedLayerReshapeWeightsExManaged
- _reshape_weights_managed_function;
+ _reshape_weights_managed_function;
CLFlattenLayer _flatten_layer;
CLFullyConnectedLayerReshapeWeightsEx _reshape_weights_function;
CLGEMM _mm_gemm;
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h
index 289ab167f..bdb168664 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h
@@ -43,8 +43,8 @@ public:
public:
CLFullyConnectedReshapingLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr)
- : _input(nullptr), _weights(nullptr), _biases(nullptr), _output(nullptr), _cl_buffer{},
- _memory_manager{memory_manager}, _cl_fc{nullptr}, _cl_reshape{}, _needs_reshape(false)
+ : _input(nullptr), _weights(nullptr), _biases(nullptr), _output(nullptr), _cl_buffer{},
+ _memory_manager{memory_manager}, _cl_fc{nullptr}, _cl_reshape{}, _needs_reshape(false)
{
// DO NOTHING
}
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGatherEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGatherEx.h
index b01ec4255..167554c9e 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGatherEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGatherEx.h
@@ -66,7 +66,7 @@ public:
* @param[out] output The output tensor, Data types supported: same as @p input.
* @param[in] axis (Optional) The axis in @p input to gather @p indices from. Defaults to 0
* @return N/A
- */
+ */
void configure(const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis = 0);
/**
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h
index 5fb102e47..5b27d362a 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h
@@ -63,20 +63,22 @@ public:
/** Set the input, weights, biases and output tensors.
*
- * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an
- * optional 4th dimension for batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
- * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type
- * supported: Same as @p input.
- * @param[in] bias (Optional) The biases have one dimension. Data type supported: Same
- * as @p input.
- * @param[out] output Output tensor. The output has the same number of dimensions as the
- * @p input.
- * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this
- * is described in @ref PadStrideInfo.
- * @param[in] invalid_right The number of zeros added to right edge of the output.
- * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
- * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer,
- * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel.
+ * @param[in,out] input Input tensor. 3 lower dimensions represent a single input,
+ * and an optional 4th dimension for batch of inputs.
+ * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+ * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM].
+ * Data type supported: Same as @p input.
+ * @param[in] bias (Optional) The biases have one dimension.
+ * Data type supported: Same as @p input.
+ * @param[out] output Output tensor. The output has the same number of dimensions
+ * as the @p input.
+ * @param[in] deconv_info Contains padding and policies to be used in the deconvolution,
+ * this is described in @ref PadStrideInfo.
+ * @param[in] invalid_right The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
+ * @param[in] weights_info (Optional) Weights information needed for
+ * @ref CLConvolutionLayer, specifies if the weights tensor has
+ * been reshaped with @ref CLWeightsReshapeKernel.
*
*/
void configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output,
@@ -85,22 +87,22 @@ public:
/** Set the input, weights, biases and output tensors.
*
* @param[in] compile_context The compile context to be used.
- * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and
- * an optional 4th dimension for batch of inputs. Data types supported:
- * QASYMM8_SIGNED/QASYMM8/F16/F32.
- * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data
- * type supported: Same as @p input.
- * @param[in] bias (Optional) The biases have one dimension. Data type supported:
- * Same as @p input.
- * @param[out] output Output tensor. The output has the same number of dimensions as
- * the @p input.
+ * @param[in,out] input Input tensor. 3 lower dimensions represent a single input,
+ * and an optional 4th dimension for batch of inputs.
+ * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+ * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM].
+ * Data type supported: Same as @p input.
+ * @param[in] bias (Optional) The biases have one dimension.
+ * Data type supported: Same as @p input.
+ * @param[out] output Output tensor. The output has the same number of dimensions
+ * as the @p input.
* @param[in] deconv_info Contains padding and policies to be used in the deconvolution,
- * this is described in @ref PadStrideInfo.
- * @param[in] invalid_right The number of zeros added to right edge of the output.
- * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
- * @param[in] weights_info (Optional) Weights information needed for @ref
- * CLConvolutionLayer, specifies if the weights tensor has been reshaped with @ref
- * CLWeightsReshapeKernel.
+ * this is described in @ref PadStrideInfo.
+ * @param[in] invalid_right The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
+ * @param[in] weights_info (Optional) Weights information needed for
+ * @ref CLConvolutionLayer, specifies if the weights tensor has
+ * been reshaped with @ref CLWeightsReshapeKernel.
*
*/
void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *weights,
@@ -108,22 +110,24 @@ public:
unsigned int invalid_right, unsigned int invalid_bottom,
const WeightsInfo &weights_info = WeightsInfo());
/** Static function to check if given info will lead to a valid configuration of @ref
- * CLTransposeConvLayer
+ * CLTransposeConvLayer
*
- * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an
- * optional 4th dimension for batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
- * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data
- * type supported: Same as @p input.
- * @param[in] bias (Optional) The biases have one dimension. Data type supported: Same as
- * @p input.
- * @param[in] output Output tensor info. The output has the same number of dimensions as the
- * @p input.
- * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this is
- * described in @ref PadStrideInfo.
- * @param[in] invalid_right The number of zeros added to right edge of the output.
- * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
- * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer,
- * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel.
+ * @param[in] input Input tensor info. 3 lower dimensions represent a single input,
+ * and an optional 4th dimension for batch of inputs.
+ * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+ * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM].
+ * Data type supported: Same as @p input.
+ * @param[in] bias (Optional) The biases have one dimension.
+ * Data type supported: Same as @p input.
+ * @param[in] output Output tensor info. The output has the same number of dimensions
+ * as the @p input.
+ * @param[in] deconv_info Contains padding and policies to be used in the deconvolution,
+ * this is described in @ref PadStrideInfo.
+ * @param[in] invalid_right The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
+ * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer,
+ * specifies if the weights tensor has been reshaped with
+ * @ref CLWeightsReshapeKernel.
*
* @return a status
*/
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h
index 18cb61bf9..e34b4dcb0 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h
@@ -43,8 +43,8 @@ public:
public:
NEFullyConnectedReshapingLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr)
- : _memory_manager{memory_manager}, _input(nullptr), _weights(nullptr), _biases(nullptr),
- _output(nullptr), _neon_buffer{}, _neon_fc{nullptr}, _neon_reshape{}, _needs_reshape(false)
+ : _memory_manager{memory_manager}, _input(nullptr), _weights(nullptr), _biases(nullptr),
+ _output(nullptr), _neon_buffer{}, _neon_fc{nullptr}, _neon_reshape{}, _needs_reshape(false)
{
// DO NOTHING
}
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEOneHot.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEOneHot.h
index b2ea6270f..1a68f801a 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEOneHot.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEOneHot.h
@@ -66,19 +66,20 @@ public:
void configure(const ITensor *indices, const ITensor *depth, const ITensor *on_value,
const ITensor *off_value, ITensor *output, int axis = -1);
/** Static function to check if given info will lead to a valid configuration of @ref
- * NEOneHotKernel
+ * NEOneHotKernel
*
- * @param[in] indices Indices tensor info. Supported tensor rank: up to 3. Must be one of the
- * following types: U32/S32
- * @param[in] depth The tensor info for depth of the one hot dimension. Supported tensor rank:
- * up to 3. Must be one of the following types: U32/S32
- * @param[in] on_value On value tensor info. Supported tensor rank: only 1. Data type supported:
- * U8/S8/U16/S16/F16/U32/S32/F32
- * @param[in] off_value Off value tensor info. Supported tensor rank: only 1. Data type supported:
- * Same as @p on_value
- * @param[out] output Destination tensor info. Data type supported: Same as @p on_value
- * @param[in] axis (Optional) The axis to fill. Negative values wrap around. Defaults to -1.
- * The value must be in range [-indices.rank , indices.rank)
+ * @param[in] indices Indices tensor info. Supported tensor rank: up to 3.
+ * Must be one of the following types: U32/S32
+ * @param[in] depth The tensor info for depth of the one hot dimension.
+ * Supported tensor rank: up to 3.
+ * Must be one of the following types: U32/S32
+ * @param[in] on_value On value tensor info. Supported tensor rank: only 1.
+ * Data type supported: U8/S8/U16/S16/F16/U32/S32/F32
+ * @param[in] off_value Off value tensor info. Supported tensor rank: only 1.
+ * Data type supported: Same as @p on_value
+ * @param[out] output Destination tensor info. Data type supported: Same as @p on_value
+ * @param[in] axis (Optional) The axis to fill. Negative values wrap around. Defaults to -1.
+ * The value must be in range [-indices.rank , indices.rank)
*
* @return a status
*/
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h
index 24ff5dac9..7a08dae97 100644
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h
@@ -110,39 +110,42 @@ public:
/** Set the input, weights, biases and output tensors.
*
- * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an
- * optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED.
- * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type
- * supported: Same as @p input.
- * @param[in] bias Optional, ignored if NULL. The biases have one dimension. Data type
- * supported: Data types supported: S32 for QASYMM8 and QASYMM8_SIGNED input, F32 for F32 input, F16
- * for F16 input.
- * @param[out] output Output tensor. The output has the same number of dimensions as the @p
- * input.
- * @param[in] info Contains padding and policies to be used in the deconvolution, this is
- * decribed in @ref PadStrideInfo.
- * @param[in] invalid_right The number of zeros added to right edge of the output.
- * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
+ * @param[in,out] input Input tensor. 3 lower dimensions represent a single input,
+ * and an optional 4th dimension for batch of inputs.
+ * Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED.
+ * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM].
+ * Data type supported: Same as @p input.
+ * @param[in] bias Optional, ignored if NULL. The biases have one dimension.
+ * Data type supported: Data types supported: S32 for QASYMM8 and
+ * QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input.
+ * @param[out] output Output tensor. The output has the same number of dimensions as
+ * the @p input.
+ * @param[in] info Contains padding and policies to be used in the deconvolution,
+ * this is decribed in @ref PadStrideInfo.
+ * @param[in] invalid_right The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
*
*/
void configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output,
const PadStrideInfo &info, unsigned int invalid_right,
unsigned int invalid_bottom);
/** Static function to check if given info will lead to a valid configuration of @ref
- * NETransposeConvLayer
+ * NETransposeConvLayer
*
- * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an
- * optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED.
- * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data type
- * supported: Same as @p input.
- * @param[in] bias (Optional) The biases have one dimension. Data type supported: Data types
- * supported: S32 for QASYMM8 and QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input.
- * @param[in] output Output tensor info. The output has the same number of dimensions as the @p
- * input.
- * @param[in] info Contains padding and policies to be used in the deconvolution, this is
- * decribed in @ref PadStrideInfo.
- * @param[in] innvalid_right The number of zeros added to right edge of the output.
- * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
+ * @param[in] input Input tensor info. 3 lower dimensions represent a single input,
+ * and an optional 4th dimension for batch of inputs.
+ * Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED.
+ * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM].
+ * Data type supported: Same as @p input.
+ * @param[in] bias (Optional) The biases have one dimension.
+ * Data types supported: S32 for QASYMM8 and QASYMM8_SIGNED input,
+ * F32 for F32 input, F16 for F16 input.
+ * @param[in] output Output tensor info. The output has the same number of dimensions as
+ * the @p input.
+ * @param[in] info Contains padding and policies to be used in the deconvolution,
+ * this is decribed in @ref PadStrideInfo.
+ * @param[in] innvalid_right The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
*
* @return a status
*/
diff --git a/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp b/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp
index 81d0cb70f..1a8ff3e71 100644
--- a/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp
+++ b/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp
@@ -54,123 +54,123 @@
using namespace arm_compute;
const std::map<std::string, std::string> CLKernelLibraryEx::_kernel_program_map = {
- // ARMComputeEx kernels
- {"arg_min_max_ex_x", "arg_min_max_ex.cl"},
- {"arg_min_max_ex_y", "arg_min_max_ex.cl"},
- {"arg_min_max_ex_z", "arg_min_max_ex.cl"},
- {"arg_min_max_ex_w", "arg_min_max_ex.cl"},
- {"binary_logical_op", "binary_logical_op.cl"},
- {"cast_bool", "cast.cl"},
- {"embedding_lookup", "embedding_lookup.cl"},
- {"gather_ex", "gather_ex.cl"},
- {"gather_ex_1d", "gather_ex.cl"},
- {"gather_ex_1d_out", "gather_ex.cl"},
- {"gemmlowp_mm_midgard_ex", "gemmlowp_ex.cl"},
- {"hashtable_lookup", "hashtable_lookup.cl"},
- {"instance_normalization_ex", "instance_normalization_ex.cl"},
- {"multiply_scale_factor", "multiply_scale_factor.cl"},
- {"neg_tensor", "neg_tensor.cl"},
- {"one_hot", "one_hot.cl"},
- {"one_hot_only_on_value", "one_hot.cl"},
- {"quantization_symm8", "quantization_symm8.cl"},
- {"reduce_min_max", "reduce_operation.cl"},
- {"reduce_sum_mean", "reduce_operation.cl"},
- {"topkv2_init", "topkv2.cl"},
- {"topkv2_find_first_negative", "topkv2.cl"},
- {"topkv2_reorder_negatives", "topkv2.cl"},
- {"topkv2_store", "topkv2.cl"},
- {"radixsort_histogram", "topkv2_radixsort.cl"},
- {"radixsort_scanhistograms", "topkv2_radixsort.cl"},
- {"radixsort_pastehistograms", "topkv2_radixsort.cl"},
- {"radixsort_reorder", "topkv2_radixsort.cl"},
- {"topkv2_quicksort", "topkv2_quicksort.cl"},
- {"scale_factor_symm8", "scale_factor.cl"},
+ // ARMComputeEx kernels
+ {"arg_min_max_ex_x", "arg_min_max_ex.cl"},
+ {"arg_min_max_ex_y", "arg_min_max_ex.cl"},
+ {"arg_min_max_ex_z", "arg_min_max_ex.cl"},
+ {"arg_min_max_ex_w", "arg_min_max_ex.cl"},
+ {"binary_logical_op", "binary_logical_op.cl"},
+ {"cast_bool", "cast.cl"},
+ {"embedding_lookup", "embedding_lookup.cl"},
+ {"gather_ex", "gather_ex.cl"},
+ {"gather_ex_1d", "gather_ex.cl"},
+ {"gather_ex_1d_out", "gather_ex.cl"},
+ {"gemmlowp_mm_midgard_ex", "gemmlowp_ex.cl"},
+ {"hashtable_lookup", "hashtable_lookup.cl"},
+ {"instance_normalization_ex", "instance_normalization_ex.cl"},
+ {"multiply_scale_factor", "multiply_scale_factor.cl"},
+ {"neg_tensor", "neg_tensor.cl"},
+ {"one_hot", "one_hot.cl"},
+ {"one_hot_only_on_value", "one_hot.cl"},
+ {"quantization_symm8", "quantization_symm8.cl"},
+ {"reduce_min_max", "reduce_operation.cl"},
+ {"reduce_sum_mean", "reduce_operation.cl"},
+ {"topkv2_init", "topkv2.cl"},
+ {"topkv2_find_first_negative", "topkv2.cl"},
+ {"topkv2_reorder_negatives", "topkv2.cl"},
+ {"topkv2_store", "topkv2.cl"},
+ {"radixsort_histogram", "topkv2_radixsort.cl"},
+ {"radixsort_scanhistograms", "topkv2_radixsort.cl"},
+ {"radixsort_pastehistograms", "topkv2_radixsort.cl"},
+ {"radixsort_reorder", "topkv2_radixsort.cl"},
+ {"topkv2_quicksort", "topkv2_quicksort.cl"},
+ {"scale_factor_symm8", "scale_factor.cl"},
};
const std::map<std::string, std::string> CLKernelLibraryEx::_program_source_map = {
#ifdef EMBEDDED_KERNELS
- {
- "arg_min_max_ex.cl",
+ {
+ "arg_min_max_ex.cl",
#include "./cl_kernels/arg_min_max_ex.clembed"
- },
- {
- "cast.cl",
+ },
+ {
+ "cast.cl",
#include "./cl_kernels/cast.clembed"
- },
- {
- "embedding_lookup.cl",
+ },
+ {
+ "embedding_lookup.cl",
#include "./cl_kernels/embedding_lookup.clembed"
- },
- {
- "gather_ex.cl",
+ },
+ {
+ "gather_ex.cl",
#include "./cl_kernels/gather_ex.clembed"
- },
- {
- "gemmlowp_ex.cl",
+ },
+ {
+ "gemmlowp_ex.cl",
#include "./cl_kernels/gemmlowp_ex.clembed"
- },
- {
- "hashtable_lookup.cl",
+ },
+ {
+ "hashtable_lookup.cl",
#include "./cl_kernels/hashtable_lookup.clembed"
- },
- {
- "helpers.h",
+ },
+ {
+ "helpers.h",
#include "./cl_kernels/helpers.hembed"
- },
- {
- "helpers_asymm.h",
+ },
+ {
+ "helpers_asymm.h",
#include "./cl_kernels/helpers_asymm.hembed"
- },
- {
- "instance_normalization_ex.cl",
+ },
+ {
+ "instance_normalization_ex.cl",
#include "./cl_kernels/instance_normalization_ex.clembed"
- },
- {
- "binary_logical_op.cl",
+ },
+ {
+ "binary_logical_op.cl",
#include "./cl_kernels/binary_logical_op.clembed"
- },
- {
- "multiply_scale_factor.cl",
+ },
+ {
+ "multiply_scale_factor.cl",
#include "./cl_kernels/multiply_scale_factor.clembed"
- },
- {
- "neg_tensor.cl",
+ },
+ {
+ "neg_tensor.cl",
#include "./cl_kernels/neg_tensor.clembed"
- },
- {
- "one_hot.cl",
+ },
+ {
+ "one_hot.cl",
#include "./cl_kernels/one_hot.clembed"
- },
- {
- "quantization_symm8.cl",
+ },
+ {
+ "quantization_symm8.cl",
#include "./cl_kernels/quantization_symm8.clembed"
- },
- {
- "reduce_operation.cl",
+ },
+ {
+ "reduce_operation.cl",
#include "./cl_kernels/reduce_operation.clembed"
- },
- {
- "scale_factor.cl",
+ },
+ {
+ "scale_factor.cl",
#include "./cl_kernels/scale_factor.clembed"
- },
- {
- "topkv2.cl",
+ },
+ {
+ "topkv2.cl",
#include "./cl_kernels/topkv2.clembed"
- },
- {
- "topkv2_radixsort.cl",
+ },
+ {
+ "topkv2_radixsort.cl",
#include "./cl_kernels/topkv2_radixsort.clembed"
- },
- {
- "topkv2_quicksort.cl",
+ },
+ {
+ "topkv2_quicksort.cl",
#include "./cl_kernels/topkv2_quicksort.clembed"
- },
+ },
#endif /* EMBEDDED_KERNELS */
};
CLKernelLibraryEx::CLKernelLibraryEx()
- : _context(), _device(), _kernel_path("."), _programs_map(), _built_programs_map()
+ : _context(), _device(), _kernel_path("."), _programs_map(), _built_programs_map()
{
opencl_is_available(); // Make sure the OpenCL symbols are initialised *before* the
// CLKernelLibraryEx is built
@@ -337,8 +337,8 @@ size_t CLKernelLibraryEx::max_local_workgroup_size(const cl::Kernel &kernel) con
size_t err = kernel.getWorkGroupInfo(_device, CL_KERNEL_WORK_GROUP_SIZE, &result);
ARM_COMPUTE_ERROR_ON_MSG(
- err != 0,
- "clGetKernelWorkGroupInfo failed to return the maximum workgroup size for the kernel");
+ err != 0,
+ "clGetKernelWorkGroupInfo failed to return the maximum workgroup size for the kernel");
ARM_COMPUTE_UNUSED(err);
return result;
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/arg_min_max_ex.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/arg_min_max_ex.cl
index 0a014d15c..135cacf59 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/arg_min_max_ex.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/arg_min_max_ex.cl
@@ -119,15 +119,15 @@ inline DATA_TYPE_OUTPUT arg_idx_min(__global const DATA_TYPE *input, const int x
in.s01234567 = select(in.s89abcdef, in.s01234567, idx_sel);
res.s01234567 = select(res.s89abcdef, res.s01234567, CONVERT(idx_sel, int8));
- idx_sel.s0123 = (in.s0123 < in.s4567) ||
- (in.s0123 == in.s4567 &&
- CONVERT((res.s0123 < res.s4567), VEC_DATA_TYPE(DATA_TYPE_SELECT, 4)));
+ idx_sel.s0123 =
+ (in.s0123 < in.s4567) ||
+ (in.s0123 == in.s4567 && CONVERT((res.s0123 < res.s4567), VEC_DATA_TYPE(DATA_TYPE_SELECT, 4)));
in.s0123 = select(in.s4567, in.s0123, idx_sel.s0123);
res.s0123 = select(res.s4567, res.s0123, CONVERT(idx_sel.s0123, int4));
idx_sel.s01 =
- (in.s01 < in.s23) ||
- (in.s01 == in.s23 && CONVERT((res.s01 < res.s23), VEC_DATA_TYPE(DATA_TYPE_SELECT, 2)));
+ (in.s01 < in.s23) ||
+ (in.s01 == in.s23 && CONVERT((res.s01 < res.s23), VEC_DATA_TYPE(DATA_TYPE_SELECT, 2)));
in.s01 = select(in.s23, in.s01, idx_sel.s01);
res.s01 = select(res.s23, res.s01, CONVERT(idx_sel.s01, int2));
@@ -204,15 +204,15 @@ inline DATA_TYPE_OUTPUT arg_idx_max(__global const DATA_TYPE *input, const int x
in.s01234567 = select(in.s89abcdef, in.s01234567, idx_sel);
res.s01234567 = select(res.s89abcdef, res.s01234567, CONVERT(idx_sel, int8));
- idx_sel.s0123 = (in.s0123 > in.s4567) ||
- (in.s0123 == in.s4567 &&
- CONVERT((res.s0123 < res.s4567), VEC_DATA_TYPE(DATA_TYPE_SELECT, 4)));
+ idx_sel.s0123 =
+ (in.s0123 > in.s4567) ||
+ (in.s0123 == in.s4567 && CONVERT((res.s0123 < res.s4567), VEC_DATA_TYPE(DATA_TYPE_SELECT, 4)));
in.s0123 = select(in.s4567, in.s0123, idx_sel.s0123);
res.s0123 = select(res.s4567, res.s0123, CONVERT(idx_sel.s0123, int4));
idx_sel.s01 =
- (in.s01 > in.s23) ||
- (in.s01 == in.s23 && CONVERT((res.s01 < res.s23), VEC_DATA_TYPE(DATA_TYPE_SELECT, 2)));
+ (in.s01 > in.s23) ||
+ (in.s01 == in.s23 && CONVERT((res.s01 < res.s23), VEC_DATA_TYPE(DATA_TYPE_SELECT, 2)));
in.s01 = select(in.s23, in.s01, idx_sel.s01);
res.s01 = select(res.s23, res.s01, CONVERT(idx_sel.s01, int2));
@@ -296,22 +296,21 @@ __kernel void arg_min_max_ex_x(IMAGE_DECLARATION(src),
const uint x_idx = get_global_id(0);
const uint y_idx = get_global_id(1);
const __global DATA_TYPE *src_in_row =
- (const __global DATA_TYPE *)(src_ptr + src_offset_first_element_in_bytes +
- y_idx * src_step_y);
+ (const __global DATA_TYPE *)(src_ptr + src_offset_first_element_in_bytes + y_idx * src_step_y);
for (unsigned int y = 0; y < get_local_size(1); ++y)
{
#if defined(ARG_MAX)
#if defined(PREV_OUTPUT)
- local_results[lid] = arg_idx_max_prev_out(
- src_in_row, (__global DATA_TYPE_OUTPUT *)offset(&prev_res, 0, y), x_idx);
+ local_results[lid] =
+ arg_idx_max_prev_out(src_in_row, (__global DATA_TYPE_OUTPUT *)offset(&prev_res, 0, y), x_idx);
#else // !defined(PREV_OUTPUT)
local_results[lid] = arg_idx_max((__global DATA_TYPE *)offset(&src, 0, y), x_idx);
#endif // defined(PREV_OUTPUT)
#else // defined(ARG_MIN)
#if defined(PREV_OUTPUT)
- local_results[lid] = arg_idx_min_prev_out(
- src_in_row, (__global DATA_TYPE_OUTPUT *)offset(&prev_res, 0, y), x_idx);
+ local_results[lid] =
+ arg_idx_min_prev_out(src_in_row, (__global DATA_TYPE_OUTPUT *)offset(&prev_res, 0, y), x_idx);
#else // !defined(PREV_OUTPUT)
local_results[lid] = arg_idx_min((__global DATA_TYPE *)offset(&src, 0, y), x_idx);
#endif // defined(PREV_OUTPUT)
@@ -334,12 +333,12 @@ __kernel void arg_min_max_ex_x(IMAGE_DECLARATION(src),
DATA_TYPE tmp1 = *(src_in_row + local_results[lid + i]);
#if defined(ARG_MAX)
condition_check3 =
- ((tmp0 == tmp1) && (local_results[lid + i] < local_results[lid])) || (tmp0 < tmp1);
+ ((tmp0 == tmp1) && (local_results[lid + i] < local_results[lid])) || (tmp0 < tmp1);
local_results[lid] = select(local_results[lid], local_results[lid + i], condition_check3);
#else // defined(ARG_MIN)
local_results[lid] = select(
- local_results[lid], local_results[lid + i],
- ((tmp0 == tmp1) && (local_results[lid + i] < local_results[lid])) || (tmp0 > tmp1));
+ local_results[lid], local_results[lid + i],
+ ((tmp0 == tmp1) && (local_results[lid + i] < local_results[lid])) || (tmp0 > tmp1));
#endif // defined(ARG_MAX) || defined(ARG_MIN)
}
barrier(CLK_LOCAL_MEM_FENCE);
@@ -403,7 +402,7 @@ __kernel void arg_min_max_ex_y(IMAGE_DECLARATION(src), IMAGE_DECLARATION(output)
{
VEC_DATA_TYPE(DATA_TYPE, 16)
in =
- CONVERT(vload16(0, (__global DATA_TYPE *)offset(&src, 0, y)), VEC_DATA_TYPE(DATA_TYPE, 16));
+ CONVERT(vload16(0, (__global DATA_TYPE *)offset(&src, 0, y)), VEC_DATA_TYPE(DATA_TYPE, 16));
VEC_DATA_TYPE(DATA_TYPE_OUTPUT, 16)
cond_conv = CONVERT(CONDITION_TO_USE(in, res), VEC_DATA_TYPE(DATA_TYPE_OUTPUT, 16));
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/binary_logical_op.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/binary_logical_op.cl
index e249663bc..f8b5bbeb8 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/binary_logical_op.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/binary_logical_op.cl
@@ -111,14 +111,14 @@ __kernel void binary_logical_op(TENSOR3D_DECLARATION(input1), TENSOR3D_DECLARATI
#if OP_CODE == 1 // LOGICAL AND
VSTORE(VEC_SIZE)
(CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input1.ptr) &&
- VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input2.ptr),
+ VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input2.ptr),
VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)),
0, (__global DATA_TYPE *)output.ptr);
#elif OP_CODE == 2 // LOGICAL OR
VSTORE(VEC_SIZE)
(CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input1.ptr) ||
- VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input2.ptr),
+ VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input2.ptr),
VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)),
0, (__global DATA_TYPE *)output.ptr);
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/embedding_lookup.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/embedding_lookup.cl
index 92e5dfbee..5ebc78d23 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/embedding_lookup.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/embedding_lookup.cl
@@ -117,15 +117,15 @@ __kernel void embedding_lookup(TENSOR4D_DECLARATION(input), TENSOR4D_DECLARATION
// lookup ids for based on the tensor dimensions
int lup_id[4] = {0};
- lup_id[0] = (NUM_DIMS == 1) ? *((__global int *)vector_offset(&lups, get_global_id(0)))
- : get_global_id(0);
- lup_id[1] = (NUM_DIMS == 2) ? *((__global int *)vector_offset(&lups, get_global_id(1)))
- : get_global_id(1);
+ lup_id[0] =
+ (NUM_DIMS == 1) ? *((__global int *)vector_offset(&lups, get_global_id(0))) : get_global_id(0);
+ lup_id[1] =
+ (NUM_DIMS == 2) ? *((__global int *)vector_offset(&lups, get_global_id(1))) : get_global_id(1);
lup_id[2] = (NUM_DIMS == 3) ? *((__global int *)vector_offset(&lups, get_global_id(2)))
: get_global_id(2) % DEPTH_OUT;
lup_id[3] = (NUM_DIMS == 4)
- ? *((__global int *)vector_offset(&lups, get_global_id(2) / DEPTH_OUT))
- : get_global_id(2) / DEPTH_OUT;
+ ? *((__global int *)vector_offset(&lups, get_global_id(2) / DEPTH_OUT))
+ : get_global_id(2) / DEPTH_OUT;
in.ptr += input_offset_first_element_in_bytes + lup_id[0] * input_step_x +
lup_id[1] * input_step_y + lup_id[2] * input_step_z + lup_id[3] * input_step_w;
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/gemmlowp_ex.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/gemmlowp_ex.cl
index 80ba73d1d..85fc09de4 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/gemmlowp_ex.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/gemmlowp_ex.cl
@@ -41,7 +41,7 @@
#include "helpers.h"
#if defined(NUM_ELEMS_PROCESSED_PER_THREAD_X) && defined(NUM_ELEMS_PROCESSED_PER_THREAD_Y) && \
- defined(COLS_A)
+ defined(COLS_A)
#define VECTOR_CHAR VEC_DATA_TYPE(char, NUM_ELEMS_PROCESSED_PER_THREAD_X)
#define VECTOR_INT VEC_DATA_TYPE(int, NUM_ELEMS_PROCESSED_PER_THREAD_X)
#define VECTOR_FLOAT VEC_DATA_TYPE(float, NUM_ELEMS_PROCESSED_PER_THREAD_X)
@@ -117,7 +117,7 @@ __kernel void gemmlowp_mm_midgard_ex(IMAGE_DECLARATION(src0), IMAGE_DECLARATION(
,
uint dst_cross_plane_pad
#endif // REINTERPRET_OUTPUT_AS_3D
- )
+)
{
int idx = get_global_id(0) * NUM_ELEMS_PROCESSED_PER_THREAD_X;
@@ -208,9 +208,9 @@ __kernel void gemmlowp_mm_midgard_ex(IMAGE_DECLARATION(src0), IMAGE_DECLARATION(
#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4
// Load values from matrix B
VECTOR_CHAR b0 =
- VLOAD(NUM_ELEMS_PROCESSED_PER_THREAD_X)(0, (__global char *)(src1_ptr + src_addr.s1));
+ VLOAD(NUM_ELEMS_PROCESSED_PER_THREAD_X)(0, (__global char *)(src1_ptr + src_addr.s1));
VECTOR_CHAR b1 = VLOAD(NUM_ELEMS_PROCESSED_PER_THREAD_X)(
- 0, (__global char *)(src1_ptr + src_addr.s1 + src1_stride_y));
+ 0, (__global char *)(src1_ptr + src_addr.s1 + src1_stride_y));
// Accumulate
acc0 += CONVERT(b0, VECTOR_INT) * (VECTOR_INT)a0.s0;
@@ -251,7 +251,7 @@ __kernel void gemmlowp_mm_midgard_ex(IMAGE_DECLARATION(src0), IMAGE_DECLARATION(
#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4
// Load values from matrix B
VECTOR_CHAR b0 =
- VLOAD(NUM_ELEMS_PROCESSED_PER_THREAD_X)(0, (__global char *)(src1_ptr + src_addr.s1));
+ VLOAD(NUM_ELEMS_PROCESSED_PER_THREAD_X)(0, (__global char *)(src1_ptr + src_addr.s1));
// Accumulate
acc0 += CONVERT(b0, VECTOR_INT) * (VECTOR_INT)a0;
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/hashtable_lookup.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/hashtable_lookup.cl
index a4f7dbd48..3ace1fde8 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/hashtable_lookup.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/hashtable_lookup.cl
@@ -115,15 +115,15 @@ __kernel void hashtable_lookup(TENSOR4D_DECLARATION(input), TENSOR4D_DECLARATION
int lup_id[4] = {0};
- lup_id[0] = (NUM_DIMS == 1) ? *((__global int *)vector_offset(&lups, get_global_id(0)))
- : get_global_id(0);
- lup_id[1] = (NUM_DIMS == 2) ? *((__global int *)vector_offset(&lups, get_global_id(1)))
- : get_global_id(1);
+ lup_id[0] =
+ (NUM_DIMS == 1) ? *((__global int *)vector_offset(&lups, get_global_id(0))) : get_global_id(0);
+ lup_id[1] =
+ (NUM_DIMS == 2) ? *((__global int *)vector_offset(&lups, get_global_id(1))) : get_global_id(1);
lup_id[2] = (NUM_DIMS == 3) ? *((__global int *)vector_offset(&lups, get_global_id(2)))
: get_global_id(2) % DEPTH_OUT;
lup_id[3] = (NUM_DIMS == 4)
- ? *((__global int *)vector_offset(&lups, get_global_id(2) / DEPTH_OUT))
- : get_global_id(2) / DEPTH_OUT;
+ ? *((__global int *)vector_offset(&lups, get_global_id(2) / DEPTH_OUT))
+ : get_global_id(2) / DEPTH_OUT;
if (lup_id[NUM_DIMS - 1] < 0)
{
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers.h b/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers.h
index e07a25ec9..4a3bc1369 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers.h
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers.h
@@ -49,7 +49,7 @@
#endif // defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8)
#if defined(ARM_COMPUTE_OPENCL_DOT8_ACC_ENABLED) && \
- defined(cl_arm_integer_dot_product_accumulate_int8)
+ defined(cl_arm_integer_dot_product_accumulate_int8)
#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_int8 : enable
#endif // defined(ARM_COMPUTE_OPENCL_DOT8_ACC_ENABLED) &&
// defined(cl_arm_integer_dot_product_accumulate_int8)
@@ -288,21 +288,21 @@
#define VECTOR_DECLARATION(name) \
__global uchar *name##_ptr, uint name##_stride_x, uint name##_step_x, \
- uint name##_offset_first_element_in_bytes
+ uint name##_offset_first_element_in_bytes
#define IMAGE_DECLARATION(name) \
__global uchar *name##_ptr, uint name##_stride_x, uint name##_step_x, uint name##_stride_y, \
- uint name##_step_y, uint name##_offset_first_element_in_bytes
+ uint name##_step_y, uint name##_offset_first_element_in_bytes
#define TENSOR3D_DECLARATION(name) \
__global uchar *name##_ptr, uint name##_stride_x, uint name##_step_x, uint name##_stride_y, \
- uint name##_step_y, uint name##_stride_z, uint name##_step_z, \
- uint name##_offset_first_element_in_bytes
+ uint name##_step_y, uint name##_stride_z, uint name##_step_z, \
+ uint name##_offset_first_element_in_bytes
#define TENSOR4D_DECLARATION(name) \
__global uchar *name##_ptr, uint name##_stride_x, uint name##_step_x, uint name##_stride_y, \
- uint name##_step_y, uint name##_stride_z, uint name##_step_z, uint name##_stride_w, \
- uint name##_step_w, uint name##_offset_first_element_in_bytes
+ uint name##_step_y, uint name##_stride_z, uint name##_step_z, uint name##_stride_w, \
+ uint name##_step_w, uint name##_offset_first_element_in_bytes
#define CONVERT_TO_VECTOR_STRUCT(name) \
update_vector_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, \
@@ -406,9 +406,9 @@ inline Vector update_vector_workitem_ptr(__global uchar *ptr, uint offset_first_
uint stride_x, uint step_x)
{
Vector vector = {
- .ptr = ptr,
- .offset_first_element_in_bytes = offset_first_element_in_bytes,
- .stride_x = stride_x,
+ .ptr = ptr,
+ .offset_first_element_in_bytes = offset_first_element_in_bytes,
+ .stride_x = stride_x,
};
vector.ptr += vector.offset_first_element_in_bytes + get_global_id(0) * step_x;
return vector;
@@ -436,7 +436,7 @@ inline Image update_image_workitem_ptr(__global uchar *ptr, uint offset_first_el
.stride_x = stride_x,
.stride_y = stride_y};
img.ptr +=
- img.offset_first_element_in_bytes + get_global_id(0) * step_x + get_global_id(1) * step_y;
+ img.offset_first_element_in_bytes + get_global_id(0) * step_x + get_global_id(1) * step_y;
return img;
}
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers_asymm.h b/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers_asymm.h
index 5f1b3f902..d7f1d0814 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers_asymm.h
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers_asymm.h
@@ -100,16 +100,16 @@ inline float dequantize_qasymm8_signed(char input, float offset, float scale)
*
* @return quantized values
*/
-#define QUANTIZE_IMPL(type, size) \
- inline VEC_DATA_TYPE(type, size) \
- quantize_##type##size(VEC_DATA_TYPE(float, size) input, float offset, float scale) \
- { \
- VEC_DATA_TYPE(float, size) \
- out_f32 = input / (VEC_DATA_TYPE(float, size))(scale) + (VEC_DATA_TYPE(float, size))(offset); \
- VEC_DATA_TYPE(type, size) \
- res = CONVERT_SAT(CONVERT_DOWN_RTE(out_f32, VEC_DATA_TYPE(int, size)), \
- VEC_DATA_TYPE(type, size)); \
- return res; \
+#define QUANTIZE_IMPL(type, size) \
+ inline VEC_DATA_TYPE(type, size) \
+ quantize_##type##size(VEC_DATA_TYPE(float, size) input, float offset, float scale) \
+ { \
+ VEC_DATA_TYPE(float, size) \
+ out_f32 = input / (VEC_DATA_TYPE(float, size))(scale) + (VEC_DATA_TYPE(float, size))(offset); \
+ VEC_DATA_TYPE(type, size) \
+ res = \
+ CONVERT_SAT(CONVERT_DOWN_RTE(out_f32, VEC_DATA_TYPE(int, size)), VEC_DATA_TYPE(type, size)); \
+ return res; \
}
/** Dequantize a vector of values to floating-point
@@ -119,11 +119,11 @@ inline float dequantize_qasymm8_signed(char input, float offset, float scale)
*
* @return dequantized values in floating point
*/
-#define DEQUANTIZE_IMPL(type, size) \
- inline VEC_DATA_TYPE(float, size) \
- dequantize_##type##size(VEC_DATA_TYPE(type, size) input, float offset, float scale) \
- { \
- return (CONVERT(input, VEC_DATA_TYPE(float, size)) - offset) * scale; \
+#define DEQUANTIZE_IMPL(type, size) \
+ inline VEC_DATA_TYPE(float, size) \
+ dequantize_##type##size(VEC_DATA_TYPE(type, size) input, float offset, float scale) \
+ { \
+ return (CONVERT(input, VEC_DATA_TYPE(float, size)) - offset) * scale; \
}
/** Correctly-rounded-to-nearest division by a power-of-two.
@@ -134,7 +134,7 @@ inline float dequantize_qasymm8_signed(char input, float offset, float scale)
*/
#define ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(size) \
inline VEC_DATA_TYPE(int, size) asymm_rounding_divide_by_POW2_##size( \
- VEC_DATA_TYPE(int, size) x, VEC_DATA_TYPE(int, size) exponent) \
+ VEC_DATA_TYPE(int, size) x, VEC_DATA_TYPE(int, size) exponent) \
{ \
const VEC_DATA_TYPE(int, size) zero = (VEC_DATA_TYPE(int, size))0; \
const VEC_DATA_TYPE(int, size) one = (VEC_DATA_TYPE(int, size))1; \
@@ -152,32 +152,32 @@ inline float dequantize_qasymm8_signed(char input, float offset, float scale)
*
* @return Product of two fixed-point numbers.
*/
-#define ASYMM_MULT_IMPL(size) \
- inline VEC_DATA_TYPE(int, size) \
- asymm_mult##size(VEC_DATA_TYPE(int, size) a, VEC_DATA_TYPE(int, size) b) \
- { \
- VEC_DATA_TYPE(int, size) \
- overflow = a == b && a == INT_MIN; \
- VEC_DATA_TYPE(long, size) \
- a_64 = convert_long##size(a); \
- VEC_DATA_TYPE(long, size) \
- b_64 = convert_long##size(b); \
- VEC_DATA_TYPE(long, size) \
- ab_64 = a_64 * b_64; \
- /* Revert COMPMID-907 */ \
- VEC_DATA_TYPE(long, size) \
- mask1 = 1 << 30; \
- VEC_DATA_TYPE(long, size) \
- mask2 = 1 - (1 << 30); \
- VEC_DATA_TYPE(long, size) \
- is_positive_or_zero = ab_64 >= 0; \
- VEC_DATA_TYPE(long, size) \
- nudge = select(mask2, mask1, is_positive_or_zero); \
- VEC_DATA_TYPE(long, size) \
- mask = 1ll << 31; \
- VEC_DATA_TYPE(int, size) \
- ab_x2_high32 = convert_int##size((ab_64 + nudge) / mask); \
- return select(ab_x2_high32, INT_MAX, overflow); \
+#define ASYMM_MULT_IMPL(size) \
+ inline VEC_DATA_TYPE(int, size) \
+ asymm_mult##size(VEC_DATA_TYPE(int, size) a, VEC_DATA_TYPE(int, size) b) \
+ { \
+ VEC_DATA_TYPE(int, size) \
+ overflow = a == b && a == INT_MIN; \
+ VEC_DATA_TYPE(long, size) \
+ a_64 = convert_long##size(a); \
+ VEC_DATA_TYPE(long, size) \
+ b_64 = convert_long##size(b); \
+ VEC_DATA_TYPE(long, size) \
+ ab_64 = a_64 * b_64; \
+ /* Revert COMPMID-907 */ \
+ VEC_DATA_TYPE(long, size) \
+ mask1 = 1 << 30; \
+ VEC_DATA_TYPE(long, size) \
+ mask2 = 1 - (1 << 30); \
+ VEC_DATA_TYPE(long, size) \
+ is_positive_or_zero = ab_64 >= 0; \
+ VEC_DATA_TYPE(long, size) \
+ nudge = select(mask2, mask1, is_positive_or_zero); \
+ VEC_DATA_TYPE(long, size) \
+ mask = 1ll << 31; \
+ VEC_DATA_TYPE(int, size) \
+ ab_x2_high32 = convert_int##size((ab_64 + nudge) / mask); \
+ return select(ab_x2_high32, INT_MAX, overflow); \
}
/** Calculates \f$ exp(x) \f$ for x in [-1/4, 0).
@@ -186,32 +186,32 @@ inline float dequantize_qasymm8_signed(char input, float offset, float scale)
*
* @return Result in fixed-point format Q0.
*/
-#define ASYMM_EXP_ON_INTERVAL_BETWEEN_NEGATIVE_ONE_QUARTER_AND_0_EXCL_IMPL(size) \
- inline VEC_DATA_TYPE(int, size) \
- asymm_exp_on_interval_between_negative_one_quarter_and_0_excl##size(VEC_DATA_TYPE(int, size) \
- a) \
- { \
- const VEC_DATA_TYPE(int, size) constant_term = 1895147668; \
- const VEC_DATA_TYPE(int, size) constant_1_over_3 = 715827883; \
- const int k_fractional_bits = 31; \
- VEC_DATA_TYPE(int, size) \
- x = a + (1 << (k_fractional_bits - 3)); \
- VEC_DATA_TYPE(int, size) \
- x2 = ASYMM_MULT(x, x, size); \
- VEC_DATA_TYPE(int, size) \
- x3 = ASYMM_MULT(x2, x, size); \
- VEC_DATA_TYPE(int, size) \
- x4 = ASYMM_MULT(x2, x2, size); \
- VEC_DATA_TYPE(int, size) \
- x4_over_4 = ASYMM_ROUNDING_DIVIDE_BY_POW2(x4, 2, size); \
- VEC_DATA_TYPE(int, size) \
- x4_over_24_plus_x3_over_6_plus_x2 = \
- ASYMM_MULT((x4_over_4 + x3), constant_1_over_3, size) + x2; \
- VEC_DATA_TYPE(int, size) \
- x4_over_24_plus_x3_over_6_plus_x2_over_2 = \
- ASYMM_ROUNDING_DIVIDE_BY_POW2(x4_over_24_plus_x3_over_6_plus_x2, 1, size); \
- return constant_term + \
- ASYMM_MULT(constant_term, x + x4_over_24_plus_x3_over_6_plus_x2_over_2, size); \
+#define ASYMM_EXP_ON_INTERVAL_BETWEEN_NEGATIVE_ONE_QUARTER_AND_0_EXCL_IMPL(size) \
+ inline VEC_DATA_TYPE(int, size) \
+ asymm_exp_on_interval_between_negative_one_quarter_and_0_excl##size(VEC_DATA_TYPE(int, size) \
+ a) \
+ { \
+ const VEC_DATA_TYPE(int, size) constant_term = 1895147668; \
+ const VEC_DATA_TYPE(int, size) constant_1_over_3 = 715827883; \
+ const int k_fractional_bits = 31; \
+ VEC_DATA_TYPE(int, size) \
+ x = a + (1 << (k_fractional_bits - 3)); \
+ VEC_DATA_TYPE(int, size) \
+ x2 = ASYMM_MULT(x, x, size); \
+ VEC_DATA_TYPE(int, size) \
+ x3 = ASYMM_MULT(x2, x, size); \
+ VEC_DATA_TYPE(int, size) \
+ x4 = ASYMM_MULT(x2, x2, size); \
+ VEC_DATA_TYPE(int, size) \
+ x4_over_4 = ASYMM_ROUNDING_DIVIDE_BY_POW2(x4, 2, size); \
+ VEC_DATA_TYPE(int, size) \
+ x4_over_24_plus_x3_over_6_plus_x2 = \
+ ASYMM_MULT((x4_over_4 + x3), constant_1_over_3, size) + x2; \
+ VEC_DATA_TYPE(int, size) \
+ x4_over_24_plus_x3_over_6_plus_x2_over_2 = \
+ ASYMM_ROUNDING_DIVIDE_BY_POW2(x4_over_24_plus_x3_over_6_plus_x2, 1, size); \
+ return constant_term + \
+ ASYMM_MULT(constant_term, x + x4_over_24_plus_x3_over_6_plus_x2_over_2, size); \
}
/** Each bit of the result is set to the corresponding bit of either then_val or
@@ -263,15 +263,15 @@ inline float dequantize_qasymm8_signed(char input, float offset, float scale)
#define EXP_BARREL_SHIFTER_IMPL(size) \
inline VEC_DATA_TYPE(int, size) exp_barrel_shifter##size( \
- VEC_DATA_TYPE(int, size) result, int exponent, int fp_multiplier, int k_integer_bits, \
- int k_fractional_bits, VEC_DATA_TYPE(int, size) remainder) \
+ VEC_DATA_TYPE(int, size) result, int exponent, int fp_multiplier, int k_integer_bits, \
+ int k_fractional_bits, VEC_DATA_TYPE(int, size) remainder) \
{ \
if (k_integer_bits > exponent) \
{ \
const int k_shift_amount = k_integer_bits > exponent ? k_fractional_bits + exponent : 0; \
return ASYMM_SELECT_USING_MASK( \
- ASYMM_MASK_IF_NON_ZERO(remainder & (1 << k_shift_amount), size), \
- ASYMM_MULT(result, fp_multiplier, size), result, size); \
+ ASYMM_MASK_IF_NON_ZERO(remainder & (1 << k_shift_amount), size), \
+ ASYMM_MULT(result, fp_multiplier, size), result, size); \
} \
\
return result; \
@@ -285,7 +285,7 @@ inline float dequantize_qasymm8_signed(char input, float offset, float scale)
*/
#define ASYMM_EXP_ON_NEGATIVE_VALUES_IMPL(size) \
inline VEC_DATA_TYPE(int, size) \
- asymm_exp_on_negative_values##size(VEC_DATA_TYPE(int, size) a, int k_integer_bits) \
+ asymm_exp_on_negative_values##size(VEC_DATA_TYPE(int, size) a, int k_integer_bits) \
{ \
const int k_fractional_bits = 31 - k_integer_bits; \
VEC_DATA_TYPE(int, size) \
@@ -298,7 +298,7 @@ inline float dequantize_qasymm8_signed(char input, float offset, float scale)
a_mod_quarter_minus_one_quarter_scaled = a_mod_quarter_minus_one_quarter << k_integer_bits; \
VEC_DATA_TYPE(int, size) \
result = ASYMM_EXP_ON_INTERVAL_BETWEEN_NEGATIVE_ONE_QUARTER_AND_0_EXCL( \
- a_mod_quarter_minus_one_quarter_scaled, size); \
+ a_mod_quarter_minus_one_quarter_scaled, size); \
VEC_DATA_TYPE(int, size) \
remainder = a_mod_quarter_minus_one_quarter - a; \
\
@@ -312,10 +312,10 @@ inline float dequantize_qasymm8_signed(char input, float offset, float scale)
remainder, size); \
result = EXP_BARREL_SHIFTER(result, +2, 39332535, k_integer_bits, k_fractional_bits, \
remainder, size); \
- result = EXP_BARREL_SHIFTER(result, +3, 720401, k_integer_bits, k_fractional_bits, remainder, \
- size); \
result = \
- EXP_BARREL_SHIFTER(result, +4, 242, k_integer_bits, k_fractional_bits, remainder, size); \
+ EXP_BARREL_SHIFTER(result, +3, 720401, k_integer_bits, k_fractional_bits, remainder, size); \
+ result = \
+ EXP_BARREL_SHIFTER(result, +4, 242, k_integer_bits, k_fractional_bits, remainder, size); \
\
if (k_integer_bits > 5) \
{ \
@@ -335,27 +335,27 @@ inline float dequantize_qasymm8_signed(char input, float offset, float scale)
*
* @return Arithmetic left or right shift.
*/
-#define ASYMM_SATURATING_ROUNDING_MULT_BY_POW2_IMPL(size) \
- inline VEC_DATA_TYPE(int, size) \
- asymm_saturating_rounding_mult_by_pow2##size(VEC_DATA_TYPE(int, size) x, int exponent) \
- { \
- if (exponent < 0) \
- { \
- return ASYMM_ROUNDING_DIVIDE_BY_POW2(x, -exponent, size); \
- } \
- \
- const VEC_DATA_TYPE(int, size) min = INT_MIN; \
- const VEC_DATA_TYPE(int, size) max = INT_MAX; \
- int threshold = ((1 << (31 - exponent)) - 1); \
- VEC_DATA_TYPE(int, size) \
- positive_mask = ASYMM_MASK_IF_NON_ZERO(x > threshold, size); \
- VEC_DATA_TYPE(int, size) \
- negative_mask = ASYMM_MASK_IF_NON_ZERO(x < -threshold, size); \
- VEC_DATA_TYPE(int, size) \
- result = x << exponent; \
- result = ASYMM_SELECT_USING_MASK(positive_mask, max, result, size); \
- result = ASYMM_SELECT_USING_MASK(negative_mask, min, result, size); \
- return result; \
+#define ASYMM_SATURATING_ROUNDING_MULT_BY_POW2_IMPL(size) \
+ inline VEC_DATA_TYPE(int, size) \
+ asymm_saturating_rounding_mult_by_pow2##size(VEC_DATA_TYPE(int, size) x, int exponent) \
+ { \
+ if (exponent < 0) \
+ { \
+ return ASYMM_ROUNDING_DIVIDE_BY_POW2(x, -exponent, size); \
+ } \
+ \
+ const VEC_DATA_TYPE(int, size) min = INT_MIN; \
+ const VEC_DATA_TYPE(int, size) max = INT_MAX; \
+ int threshold = ((1 << (31 - exponent)) - 1); \
+ VEC_DATA_TYPE(int, size) \
+ positive_mask = ASYMM_MASK_IF_NON_ZERO(x > threshold, size); \
+ VEC_DATA_TYPE(int, size) \
+ negative_mask = ASYMM_MASK_IF_NON_ZERO(x < -threshold, size); \
+ VEC_DATA_TYPE(int, size) \
+ result = x << exponent; \
+ result = ASYMM_SELECT_USING_MASK(positive_mask, max, result, size); \
+ result = ASYMM_SELECT_USING_MASK(negative_mask, min, result, size); \
+ return result; \
}
/** Calculates (a+b)/2, rounded to the nearest integer.
@@ -365,21 +365,21 @@ inline float dequantize_qasymm8_signed(char input, float offset, float scale)
*
* @return (a+b)/2, rounded to the nearest integer.
*/
-#define ASYMM_ROUNDING_HALF_SUM_IMPL(size) \
- inline VEC_DATA_TYPE(int, size) \
- asymm_rounding_half_sum##size(VEC_DATA_TYPE(int, size) a, VEC_DATA_TYPE(int, size) b) \
- { \
- VEC_DATA_TYPE(long, size) \
- a64 = convert_long##size(a); \
- VEC_DATA_TYPE(long, size) \
- b64 = convert_long##size(b); \
- VEC_DATA_TYPE(long, size) \
- sum = a64 + b64; \
- const VEC_DATA_TYPE(long, size) one = 1; \
- const VEC_DATA_TYPE(long, size) minus_one = -1; \
- VEC_DATA_TYPE(long, size) \
- sign = select(minus_one, one, sum >= 0); \
- return convert_int##size((sum + sign) / 2); \
+#define ASYMM_ROUNDING_HALF_SUM_IMPL(size) \
+ inline VEC_DATA_TYPE(int, size) \
+ asymm_rounding_half_sum##size(VEC_DATA_TYPE(int, size) a, VEC_DATA_TYPE(int, size) b) \
+ { \
+ VEC_DATA_TYPE(long, size) \
+ a64 = convert_long##size(a); \
+ VEC_DATA_TYPE(long, size) \
+ b64 = convert_long##size(b); \
+ VEC_DATA_TYPE(long, size) \
+ sum = a64 + b64; \
+ const VEC_DATA_TYPE(long, size) one = 1; \
+ const VEC_DATA_TYPE(long, size) minus_one = -1; \
+ VEC_DATA_TYPE(long, size) \
+ sign = select(minus_one, one, sum >= 0); \
+ return convert_int##size((sum + sign) / 2); \
}
/** Calculates \f$ 1 / (1 + x) \f$ for x in (0, 1).
@@ -390,7 +390,7 @@ inline float dequantize_qasymm8_signed(char input, float offset, float scale)
*/
#define ASYMM_ONE_OVER_ONE_PLUS_X_FOR_X_IN_0_1_IMPL(size) \
inline VEC_DATA_TYPE(int, size) \
- asymm_one_over_one_plus_x_for_x_in_0_1##size(VEC_DATA_TYPE(int, size) a) \
+ asymm_one_over_one_plus_x_for_x_in_0_1##size(VEC_DATA_TYPE(int, size) a) \
{ \
const VEC_DATA_TYPE(int, size) Q0_one = INT_MAX; \
const VEC_DATA_TYPE(int, size) Q2_one = 1 << (31 - 2); \
@@ -462,14 +462,14 @@ inline float dequantize_qasymm8_signed(char input, float offset, float scale)
#define ASYMM_RESCALE(value, src_integer_bits, dst_integer_bits, size) \
asymm_rescale##size(value, src_integer_bits, dst_integer_bits)
-#define MULTIPLY_BY_QUANTIZED_MULTIPLIER_IMPL(size) \
- inline VEC_DATA_TYPE(int, size) \
- multiply_by_quantized_multiplier##size(VEC_DATA_TYPE(int, size) input, int qmul, int shift) \
- { \
- const int left_shift = shift > 0 ? shift : 0; \
- const int right_shift = shift > 0 ? 0 : -shift; \
- return ASYMM_ROUNDING_DIVIDE_BY_POW2(ASYMM_MULT(input * (1 << left_shift), qmul, size), \
- right_shift, size); \
+#define MULTIPLY_BY_QUANTIZED_MULTIPLIER_IMPL(size) \
+ inline VEC_DATA_TYPE(int, size) \
+ multiply_by_quantized_multiplier##size(VEC_DATA_TYPE(int, size) input, int qmul, int shift) \
+ { \
+ const int left_shift = shift > 0 ? shift : 0; \
+ const int right_shift = shift > 0 ? 0 : -shift; \
+ return ASYMM_ROUNDING_DIVIDE_BY_POW2(ASYMM_MULT(input * (1 << left_shift), qmul, size), \
+ right_shift, size); \
}
#define MULTIPLY_BY_QUANTIZED_MULTIPLIER(input, qmul, shift, size) \
multiply_by_quantized_multiplier##size(input, qmul, shift)
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/instance_normalization_ex.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/instance_normalization_ex.cl
index 014842680..96a243110 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/instance_normalization_ex.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/instance_normalization_ex.cl
@@ -41,7 +41,7 @@
#include "helpers.h"
#if defined(VEC_SIZE) && defined(DATA_TYPE) && defined(EPSILON) && defined(DIM_X) && \
- defined(DIM_Y) && defined(DIM_Z)
+ defined(DIM_Y) && defined(DIM_Z)
/** This function normalizes the input 2D tensor across the first dimension with respect to mean and
* standard deviation of the same dimension.
*
@@ -108,14 +108,14 @@ __kernel void instance_normalization_ex(TENSOR4D_DECLARATION(input),
TENSOR4D_DECLARATION(output)
#endif /* IN_PLACE */
#ifdef GAMMA
- ,
+ ,
VECTOR_DECLARATION(gamma)
#endif // GAMMA
#ifdef BETA
- ,
+ ,
VECTOR_DECLARATION(beta)
#endif // BETA
- )
+)
{
Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, 0);
#ifndef IN_PLACE
@@ -213,12 +213,12 @@ __kernel void instance_normalization_ex(TENSOR4D_DECLARATION(input),
for (int i_h = 0; i_h < DIM_Z; ++i_h)
{
__global DATA_TYPE *input_address =
- (__global DATA_TYPE *)tensor4D_offset(&in, ch, i_w, i_h, batch);
+ (__global DATA_TYPE *)tensor4D_offset(&in, ch, i_w, i_h, batch);
#ifdef IN_PLACE
__global DATA_TYPE *output_address = input_address;
#else /* !IN_PLACE */
__global DATA_TYPE *output_address =
- (__global DATA_TYPE *)tensor4D_offset(&out, ch, i_w, i_h, batch);
+ (__global DATA_TYPE *)tensor4D_offset(&out, ch, i_w, i_h, batch);
#endif /* IN_PLACE */
*(output_address) = (*(input_address)-mean) * multip + beta;
}
@@ -231,12 +231,12 @@ __kernel void instance_normalization_ex(TENSOR4D_DECLARATION(input),
for (; x <= (DIM_X - VEC_SIZE); x += VEC_SIZE)
{
__global DATA_TYPE *input_address =
- (__global DATA_TYPE *)tensor4D_offset(&in, x, y, ch, batch);
+ (__global DATA_TYPE *)tensor4D_offset(&in, x, y, ch, batch);
#ifdef IN_PLACE
__global DATA_TYPE *output_address = input_address;
#else /* !IN_PLACE */
__global DATA_TYPE *output_address =
- (__global DATA_TYPE *)tensor4D_offset(&out, x, y, ch, batch);
+ (__global DATA_TYPE *)tensor4D_offset(&out, x, y, ch, batch);
#endif /* IN_PLACE */
VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
@@ -251,12 +251,12 @@ __kernel void instance_normalization_ex(TENSOR4D_DECLARATION(input),
for (; x < DIM_X; ++x)
{
__global DATA_TYPE *input_address =
- (__global DATA_TYPE *)tensor4D_offset(&in, x, y, ch, batch);
+ (__global DATA_TYPE *)tensor4D_offset(&in, x, y, ch, batch);
#ifdef IN_PLACE
__global DATA_TYPE *output_address = input_address;
#else /* !IN_PLACE */
__global DATA_TYPE *output_address =
- (__global DATA_TYPE *)tensor4D_offset(&out, x, y, ch, batch);
+ (__global DATA_TYPE *)tensor4D_offset(&out, x, y, ch, batch);
#endif /* IN_PLACE */
*(output_address) = (*(input_address)-mean) * multip + beta;
}
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/multiply_scale_factor.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/multiply_scale_factor.cl
index 3943fc4c2..abbfbd275 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/multiply_scale_factor.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/multiply_scale_factor.cl
@@ -114,8 +114,8 @@ __kernel void multiply_scale_factor(IMAGE_DECLARATION(input), VECTOR_DECLARATION
(val, 0, (__global DATA_TYPE *)output.ptr);
#else // !defined(VEC_SIZE) || !defined(LAST_ACCESSED_X)
*((__global DATA_TYPE *)(output.ptr)) =
- ((DATA_TYPE)(*((__global int *)(input.ptr)))) *
- *(((__global DATA_TYPE *)(scale_ptr)) + get_global_id(1)) * (DATA_TYPE)(multiplier);
+ ((DATA_TYPE)(*((__global int *)(input.ptr)))) *
+ *(((__global DATA_TYPE *)(scale_ptr)) + get_global_id(1)) * (DATA_TYPE)(multiplier);
#endif // defined(VEC_SIZE) && defined(LAST_ACCESSED_X)
}
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/one_hot.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/one_hot.cl
index c274aba62..784a8d6aa 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/one_hot.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/one_hot.cl
@@ -206,16 +206,16 @@ __kernel void one_hot_only_on_value(TENSOR3D_DECLARATION(indices), VECTOR_DECLAR
#if AXIS == 0
*(__global DATA_TYPE *)tensor4D_offset(&output, index, px, py, pz) =
- *((__global const DATA_TYPE *)on_value_ptr);
+ *((__global const DATA_TYPE *)on_value_ptr);
#elif AXIS == 1
*(__global DATA_TYPE *)tensor4D_offset(&output, px, index, py, pz) =
- *((__global const DATA_TYPE *)on_value_ptr);
+ *((__global const DATA_TYPE *)on_value_ptr);
#elif AXIS == 2
*(__global DATA_TYPE *)tensor4D_offset(&output, px, py, index, pz) =
- *((__global const DATA_TYPE *)on_value_ptr);
+ *((__global const DATA_TYPE *)on_value_ptr);
#elif AXIS == 3
*(__global DATA_TYPE *)tensor4D_offset(&output, px, py, pz, index) =
- *((__global const DATA_TYPE *)on_value_ptr);
+ *((__global const DATA_TYPE *)on_value_ptr);
#endif // AXIS
}
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_mul_quantized.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_mul_quantized.cl
index 76fda9041..532000e9e 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_mul_quantized.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_mul_quantized.cl
@@ -138,7 +138,7 @@ __kernel void pixelwise_mul_qasymm8(TENSOR3D_DECLARATION(in1), TENSOR3D_DECLARAT
// Multiply with a multiplier smaller than 1
out_val =
- ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(out_val, RESULT_MULT_INT, RESULT_SHIFT, 16);
+ ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(out_val, RESULT_MULT_INT, RESULT_SHIFT, 16);
out_val += (VEC_DATA_TYPE(int, 16))(RESULT_OFFSET);
VEC_DATA_TYPE(uchar, 16) res = CONVERT(out_val, VEC_DATA_TYPE(uchar, 16));
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/quantization_symm8.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/quantization_symm8.cl
index 4ae9adb0b..c829f264d 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/quantization_symm8.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/quantization_symm8.cl
@@ -116,7 +116,7 @@ __kernel void quantization_symm8(IMAGE_DECLARATION(input), VECTOR_DECLARATION(sc
// Create scale vector
const VEC_DATA_TYPE(DATA_TYPE_IN, VEC_SIZE) vscale =
- *(((__global DATA_TYPE_IN *)(scale_ptr)) + get_global_id(1));
+ *(((__global DATA_TYPE_IN *)(scale_ptr)) + get_global_id(1));
// Quantize
VEC_DATA_TYPE(int, VEC_SIZE)
@@ -127,10 +127,10 @@ __kernel void quantization_symm8(IMAGE_DECLARATION(input), VECTOR_DECLARATION(sc
(CONVERT(res, VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE)), 0, (__global DATA_TYPE_OUT *)output.ptr);
#else //! defined(VEC_SIZE) || !defined(LAST_ACCESSED_X)
*((__global DATA_TYPE_OUT *)(output.ptr)) = (DATA_TYPE_OUT)CLAMP(
- CONVERT_RTE((*(__global DATA_TYPE_IN *)input.ptr) /
- (*(((__global DATA_TYPE_IN *)(scale_ptr)) + get_global_id(1))),
- int),
- MIN_QUANT_VAL, MAX_QUANT_VAL);
+ CONVERT_RTE((*(__global DATA_TYPE_IN *)input.ptr) /
+ (*(((__global DATA_TYPE_IN *)(scale_ptr)) + get_global_id(1))),
+ int),
+ MIN_QUANT_VAL, MAX_QUANT_VAL);
#endif // defined(VEC_SIZE) && defined(LAST_ACCESSED_X)
}
#endif // defined(VEC_SIZE) && defined(DATA_TYPE_IN) && defined(DATA_TYPE_OUT)
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/reduce_operation.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/reduce_operation.cl
index 832ac1270..d0ef31b20 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/reduce_operation.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/reduce_operation.cl
@@ -100,12 +100,14 @@ __kernel void reduce_min_max(TENSOR4D_DECLARATION(input), TENSOR4D_DECLARATION(o
Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH_OUT);
int indices[4] = {
- get_global_id(0), get_global_id(1), get_global_id(2) % DEPTH_OUT,
- get_global_id(2) / DEPTH_OUT,
+ get_global_id(0),
+ get_global_id(1),
+ get_global_id(2) % DEPTH_OUT,
+ get_global_id(2) / DEPTH_OUT,
};
DATA_TYPE value =
- *((__global DATA_TYPE *)tensor4D_offset(&in, indices[0], indices[1], indices[2], indices[3]));
+ *((__global DATA_TYPE *)tensor4D_offset(&in, indices[0], indices[1], indices[2], indices[3]));
for (int i = 1; i < dim; ++i)
{
indices[axis] = i;
@@ -186,16 +188,18 @@ __kernel void reduce_sum_mean(TENSOR4D_DECLARATION(input), TENSOR4D_DECLARATION(
Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH_OUT);
int indices[4] = {
- get_global_id(0), get_global_id(1), get_global_id(2) % DEPTH_OUT,
- get_global_id(2) / DEPTH_OUT,
+ get_global_id(0),
+ get_global_id(1),
+ get_global_id(2) % DEPTH_OUT,
+ get_global_id(2) / DEPTH_OUT,
};
DATA_TYPE sum_value = (DATA_TYPE)0;
for (int i = 0; i < dim; ++i)
{
indices[axis] = i;
- sum_value += *(
- (__global DATA_TYPE *)tensor4D_offset(&in, indices[0], indices[1], indices[2], indices[3]));
+ sum_value +=
+ *((__global DATA_TYPE *)tensor4D_offset(&in, indices[0], indices[1], indices[2], indices[3]));
}
#if OP_CODE == 3 // REDUCE_SUM
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLArgMinMaxLayerKernelEx.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLArgMinMaxLayerKernelEx.cpp
index 047004d5e..45307fad7 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLArgMinMaxLayerKernelEx.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLArgMinMaxLayerKernelEx.cpp
@@ -63,10 +63,11 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *prev_outp
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::S32,
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8,
+ DataType::QASYMM8_SIGNED, DataType::S32,
DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(op != ReductionOperation::ARG_IDX_MAX &&
- op != ReductionOperation::ARG_IDX_MIN,
+ op != ReductionOperation::ARG_IDX_MIN,
"Only ARG_IDX_MAX and ARG_IDX_MIN are supported");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions,
"Reduction axis greater than max number of dimensions");
@@ -101,13 +102,13 @@ std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input,
output_shape.set(axis, 1);
DataType output_data_type = (prev_output != nullptr) ? (prev_output->data_type()) : DataType::S32;
auto_init_if_empty(*output, input->clone()
- ->set_tensor_shape(output_shape)
- .set_data_type(output_data_type)
- .reset_padding()
- .set_is_resizable(true));
+ ->set_tensor_shape(output_shape)
+ .set_data_type(output_data_type)
+ .reset_padding()
+ .set_is_resizable(true));
- Window win = calculate_max_window((prev_output != nullptr) ? (*prev_output) : (*input),
- Steps(vector_size));
+ Window win =
+ calculate_max_window((prev_output != nullptr) ? (*prev_output) : (*input), Steps(vector_size));
bool window_changed = false;
switch (axis)
@@ -137,15 +138,15 @@ std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input,
}
Status err = (window_changed)
- ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
- : Status{};
+ ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
+ : Status{};
return std::make_tuple(err, win);
}
} // namespace
CLArgMinMaxLayerKernelEx::CLArgMinMaxLayerKernelEx()
- : _input(nullptr), _prev_output(nullptr), _output(nullptr), _reduction_axis(0),
- _op(ReductionOperation::ARG_IDX_MAX)
+ : _input(nullptr), _prev_output(nullptr), _output(nullptr), _reduction_axis(0),
+ _op(ReductionOperation::ARG_IDX_MAX)
{
}
@@ -155,11 +156,11 @@ void CLArgMinMaxLayerKernelEx::configure(const ICLTensor *input, const ICLTensor
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_ERROR_THROW_ON(
- validate_arguments(input->info(), (prev_output != nullptr) ? prev_output->info() : nullptr,
- output->info(), axis, op));
+ validate_arguments(input->info(), (prev_output != nullptr) ? prev_output->info() : nullptr,
+ output->info(), axis, op));
auto win_config = validate_and_configure_window(
- input->info(), (prev_output != nullptr) ? prev_output->info() : nullptr, output->info(), axis,
- op);
+ input->info(), (prev_output != nullptr) ? prev_output->info() : nullptr, output->info(), axis,
+ op);
ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config));
_input = input;
@@ -213,7 +214,7 @@ void CLArgMinMaxLayerKernelEx::configure(const ICLTensor *input, const ICLTensor
ARM_COMPUTE_ERROR("Not supported");
}
_kernel = static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(
- "arg_min_max_ex_" + kernel_axis_name, build_opts.options()));
+ "arg_min_max_ex_" + kernel_axis_name, build_opts.options()));
// Configure kernel window
ICLKernel::configure_internal(std::get<1>(win_config), lws_hint);
@@ -225,8 +226,8 @@ Status CLArgMinMaxLayerKernelEx::validate(const ITensorInfo *input, const ITenso
{
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, prev_output, output, axis, op));
ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>(validate_and_configure_window(
- input->clone().get(), (prev_output != nullptr) ? prev_output->clone().get() : nullptr,
- output->clone().get(), axis, op)));
+ input->clone().get(), (prev_output != nullptr) ? prev_output->clone().get() : nullptr,
+ output->clone().get(), axis, op)));
return Status{};
}
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLBinaryLogicalOpKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLBinaryLogicalOpKernel.cpp
index fbc76f5e1..ffa2c5a67 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLBinaryLogicalOpKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLBinaryLogicalOpKernel.cpp
@@ -55,7 +55,7 @@ Status validate_parameters(const ITensorInfo *input1, const ITensorInfo *input2,
const ITensorInfo *output)
{
const TensorShape &out_shape =
- TensorShape::broadcast_shape(input1->tensor_shape(), input2->tensor_shape());
+ TensorShape::broadcast_shape(input1->tensor_shape(), input2->tensor_shape());
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::QASYMM8);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::QASYMM8);
@@ -68,15 +68,15 @@ Status validate_parameters(const ITensorInfo *input1, const ITensorInfo *input2,
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8,
DataType::QASYMM8);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(
- detail::have_different_dimensions(out_shape, output->tensor_shape(), 0),
- "Wrong shape for output");
+ detail::have_different_dimensions(out_shape, output->tensor_shape(), 0),
+ "Wrong shape for output");
}
return Status{};
}
} // namespace
CLBinaryLogicalOpKernel::CLBinaryLogicalOpKernel()
- : _input1(nullptr), _input2(nullptr), _output(nullptr)
+ : _input1(nullptr), _input2(nullptr), _output(nullptr)
{
}
@@ -111,13 +111,13 @@ void CLBinaryLogicalOpKernel::configure(const ICLTensor *input1, const ICLTensor
build_opts.emplace(("-DOP_CODE=" + support::cpp11::to_string(op_code)));
build_opts.emplace(
- ("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)));
+ ("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)));
_kernel =
- static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts));
+ static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts));
const std::pair<TensorShape, ValidRegion> broadcast_pair =
- ITensorInfo::broadcast_shape_and_valid_region(*input1->info(), *input2->info());
+ ITensorInfo::broadcast_shape_and_valid_region(*input1->info(), *input2->info());
const ValidRegion &valid_region = broadcast_pair.second;
@@ -130,8 +130,8 @@ void CLBinaryLogicalOpKernel::configure(const ICLTensor *input1, const ICLTensor
AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
update_window_and_padding(win_input1, input1_access) ||
- update_window_and_padding(win_input2, input2_access) ||
- update_window_and_padding(win, output_access);
+ update_window_and_padding(win_input2, input2_access) ||
+ update_window_and_padding(win, output_access);
output_access.set_valid_region(win, valid_region);
@@ -151,7 +151,7 @@ void CLBinaryLogicalOpKernel::run(const Window &window, cl::CommandQueue &queue)
if (std::min(in_shape1.total_size(), in_shape2.total_size()) > 1)
{
can_collapse =
- (std::min(in_shape1.num_dimensions(), in_shape2.num_dimensions()) > Window::DimZ);
+ (std::min(in_shape1.num_dimensions(), in_shape2.num_dimensions()) > Window::DimZ);
for (size_t d = Window::DimZ; can_collapse && (d < out_shape.num_dimensions()); d++)
{
can_collapse = (in_shape1[d] == in_shape2[d]);
@@ -160,13 +160,13 @@ void CLBinaryLogicalOpKernel::run(const Window &window, cl::CommandQueue &queue)
bool has_collapsed = false;
Window collapsed =
- can_collapse ? window.collapse_if_possible(ICLKernel::window(), Window::DimZ, &has_collapsed)
- : window;
+ can_collapse ? window.collapse_if_possible(ICLKernel::window(), Window::DimZ, &has_collapsed)
+ : window;
const TensorShape &in_shape1_collapsed =
- has_collapsed ? in_shape1.collapsed_from(Window::DimZ) : in_shape1;
+ has_collapsed ? in_shape1.collapsed_from(Window::DimZ) : in_shape1;
const TensorShape &in_shape2_collapsed =
- has_collapsed ? in_shape2.collapsed_from(Window::DimZ) : in_shape2;
+ has_collapsed ? in_shape2.collapsed_from(Window::DimZ) : in_shape2;
Window slice = collapsed.first_slice_window_3D();
Window slice_input1 = slice.broadcast_if_dimension_le_one(in_shape1_collapsed);
@@ -189,9 +189,9 @@ void CLBinaryLogicalOpKernel::run(const Window &window, cl::CommandQueue &queue)
BorderSize CLBinaryLogicalOpKernel::border_size() const
{
const unsigned int replicateSize =
- _output->info()->dimension(0) -
- std::min(_input1->info()->dimension(0), _input2->info()->dimension(0));
+ _output->info()->dimension(0) -
+ std::min(_input1->info()->dimension(0), _input2->info()->dimension(0));
const unsigned int border =
- std::min<unsigned int>(num_elems_processed_per_iteration - 1U, replicateSize);
+ std::min<unsigned int>(num_elems_processed_per_iteration - 1U, replicateSize);
return BorderSize(0, border, 0, 0);
}
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLCastBoolKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLCastBoolKernel.cpp
index 6e0bcde7f..3f2ae357d 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLCastBoolKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLCastBoolKernel.cpp
@@ -103,7 +103,7 @@ void CLCastBoolKernel::configure(const ICLTensor *input, ICLTensor *output)
// Create kernel
const std::string kernel_name = "cast_bool";
_kernel = static_cast<cl::Kernel>(
- CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts.options()));
+ CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts.options()));
// Configure kernel
ICLSimple2DKernel::configure(input, output, num_elems_processed_per_iteration);
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLEmbeddingLookupKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLEmbeddingLookupKernel.cpp
index 67aaf2db6..e4c617c8d 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLEmbeddingLookupKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLEmbeddingLookupKernel.cpp
@@ -61,14 +61,14 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
input_access.set_valid_region(win, output->valid_region());
Status err = (window_changed)
- ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
- : Status{};
+ ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
+ : Status{};
return std::make_pair(err, win);
}
} // namespace
CLEmbeddingLookupKernel::CLEmbeddingLookupKernel()
- : _input(nullptr), _output(nullptr), _lookups(nullptr)
+ : _input(nullptr), _output(nullptr), _lookups(nullptr)
{
}
@@ -77,8 +77,8 @@ Status CLEmbeddingLookupKernel::validate(const ITensorInfo *input, const ITensor
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, lookups);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(
- input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16,
- DataType::U32, DataType::S32, DataType::F16, DataType::F32);
+ input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16,
+ DataType::U32, DataType::S32, DataType::F16, DataType::F32);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lookups, 1, DataType::S32);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
@@ -108,8 +108,8 @@ void CLEmbeddingLookupKernel::configure(const ICLTensor *input, ICLTensor *outpu
build_opts.emplace("-DNUM_DIMS=" + support::cpp11::to_string(_input->info()->num_dimensions()));
// Create kernel
- _kernel = static_cast<cl::Kernel>(
- CLKernelLibraryEx::get().create_kernel(kernel_name.str(), build_opts));
+ _kernel =
+ static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(kernel_name.str(), build_opts));
// Configure kernel window
auto win_config = validate_and_configure_window(input->info(), output->info());
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLGatherExKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLGatherExKernel.cpp
index 3bfe3e407..8b5885225 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLGatherExKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLGatherExKernel.cpp
@@ -62,15 +62,15 @@ inline Status validate_arguments(const ITensorInfo *input, const ITensorInfo *in
ARM_COMPUTE_RETURN_ERROR_ON(actual_axis >= input->num_dimensions());
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(
- input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16,
- DataType::U32, DataType::S32, DataType::F16, DataType::F32);
+ input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16,
+ DataType::U32, DataType::S32, DataType::F16, DataType::F32);
if (output->total_size() != 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output);
TensorShape output_shape = arm_compute::misc::shape_calculator::compute_gather_shape_ex(
- input->tensor_shape(), indices->tensor_shape(), actual_axis);
+ input->tensor_shape(), indices->tensor_shape(), actual_axis);
ARM_COMPUTE_RETURN_ERROR_ON(output_shape.total_size() != output->tensor_shape().total_size());
}
@@ -86,7 +86,7 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
const uint32_t actual_axis = wrap_around(axis, static_cast<int>(input->num_dimensions()));
std::unique_ptr<ITensorInfo> output_info = input->clone();
output_info->set_tensor_shape(arm_compute::misc::shape_calculator::compute_gather_shape_ex(
- input->tensor_shape(), indices->tensor_shape(), actual_axis));
+ input->tensor_shape(), indices->tensor_shape(), actual_axis));
// Output auto initialization if not yet initialized
auto_init_if_empty((*output), output_info->tensor_shape(), 1, input->data_type());
@@ -100,7 +100,7 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
} // namespace
CLGatherExKernel::CLGatherExKernel()
- : _input(nullptr), _indices(nullptr), _output(nullptr), _axis(0)
+ : _input(nullptr), _indices(nullptr), _output(nullptr), _axis(0)
{
}
@@ -109,11 +109,11 @@ void CLGatherExKernel::configure(const ICLTensor *input, const ICLTensor *indice
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, indices);
ARM_COMPUTE_ERROR_THROW_ON(
- validate_arguments(input->info(), indices->info(), output->info(), axis));
+ validate_arguments(input->info(), indices->info(), output->info(), axis));
// Configure kernel window
auto win_config =
- validate_and_configure_window(input->info(), indices->info(), output->info(), axis);
+ validate_and_configure_window(input->info(), indices->info(), output->info(), axis);
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
_input = input;
@@ -133,7 +133,7 @@ void CLGatherExKernel::configure(const ICLTensor *input, const ICLTensor *indice
// Create kernel
_kernel = static_cast<cl::Kernel>(
- CLKernelLibraryEx::get().create_kernel("gather_ex", build_opts.options()));
+ CLKernelLibraryEx::get().create_kernel("gather_ex", build_opts.options()));
ICLKernel::configure_internal(win_config.second);
}
@@ -144,7 +144,7 @@ Status CLGatherExKernel::validate(const ITensorInfo *input, const ITensorInfo *i
ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(),
indices->clone().get(),
output->clone().get(), axis)
- .first);
+ .first);
return Status{};
}
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLHashtableLookupKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLHashtableLookupKernel.cpp
index 930e7c944..f0a761b97 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLHashtableLookupKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLHashtableLookupKernel.cpp
@@ -61,8 +61,8 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
input_access.set_valid_region(win, output->valid_region());
Status err = (window_changed)
- ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
- : Status{};
+ ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
+ : Status{};
return std::make_pair(err, win);
}
} // namespace
@@ -78,8 +78,8 @@ Status CLHashtableLookupKernel::validate(const ITensorInfo *lookups, const ITens
{
ARM_COMPUTE_ERROR_ON_NULLPTR(lookups, keys, input, output, hits);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(
- input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16,
- DataType::U32, DataType::S32, DataType::F16, DataType::F32);
+ input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16,
+ DataType::U32, DataType::S32, DataType::F16, DataType::F32);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lookups, 1, DataType::S32);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(keys, 1, DataType::S32);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(hits, 1, DataType::U8, DataType::QASYMM8);
@@ -102,7 +102,7 @@ void CLHashtableLookupKernel::configure(const ICLTensor *lookups, const ICLTenso
const ICLTensor *input, ICLTensor *output, ICLTensor *hits)
{
ARM_COMPUTE_ERROR_THROW_ON(
- validate(lookups->info(), keys->info(), input->info(), output->info(), hits->info()));
+ validate(lookups->info(), keys->info(), input->info(), output->info(), hits->info()));
_lookups = lookups;
_keys = keys;
@@ -113,7 +113,7 @@ void CLHashtableLookupKernel::configure(const ICLTensor *lookups, const ICLTenso
// Make _lookup_indices tensor
_lookup_indices = support::cpp14::make_unique<CLTensor>();
_lookup_indices->allocator()->init(
- TensorInfo(lookups->info()->tensor_shape(), lookups->info()->num_channels(), DataType::S32));
+ TensorInfo(lookups->info()->tensor_shape(), lookups->info()->num_channels(), DataType::S32));
_lookup_indices->allocator()->allocate();
// Set kernel build options
@@ -127,8 +127,8 @@ void CLHashtableLookupKernel::configure(const ICLTensor *lookups, const ICLTenso
build_opts.emplace("-DNUM_DIMS=" + support::cpp11::to_string(_input->info()->num_dimensions()));
// Create kernel
- _kernel = static_cast<cl::Kernel>(
- CLKernelLibraryEx::get().create_kernel(kernel_name.str(), build_opts));
+ _kernel =
+ static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(kernel_name.str(), build_opts));
// Configure kernel window
auto win_config = validate_and_configure_window(input->info(), output->info());
@@ -148,7 +148,7 @@ void CLHashtableLookupKernel::run(const Window &window, cl::CommandQueue &queue)
// Set values of hits
const int32_t *lookups_buf =
- reinterpret_cast<int32_t *>(const_cast<ICLTensor *>(_lookups)->buffer());
+ reinterpret_cast<int32_t *>(const_cast<ICLTensor *>(_lookups)->buffer());
const int32_t *keys_buf = reinterpret_cast<int32_t *>(const_cast<ICLTensor *>(_keys)->buffer());
uint8_t *hits_buf = reinterpret_cast<uint8_t *>(_hits->buffer());
int32_t *lookup_indices_buf = reinterpret_cast<int32_t *>(_lookup_indices->buffer());
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.cpp
index 61c14d271..dab6480b2 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.cpp
@@ -94,8 +94,8 @@ std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITe
} // namespace
CLInstanceNormalizationLayerKernelEx::CLInstanceNormalizationLayerKernelEx()
- : _input(nullptr), _output(nullptr), _gamma(nullptr), _beta(nullptr), _epsilon(1e-12),
- _run_in_place(false)
+ : _input(nullptr), _output(nullptr), _gamma(nullptr), _beta(nullptr), _epsilon(1e-12),
+ _run_in_place(false)
{
}
@@ -132,7 +132,7 @@ void CLInstanceNormalizationLayerKernelEx::configure(ICLTensor *input, ICLTensor
// Create kernel
_kernel = static_cast<cl::Kernel>(
- CLKernelLibraryEx::get().create_kernel("instance_normalization_ex", build_opts.options()));
+ CLKernelLibraryEx::get().create_kernel("instance_normalization_ex", build_opts.options()));
// Configure kernel window
auto win_config = validate_and_configure_window(_input->info(), _output->info());
@@ -147,7 +147,7 @@ Status CLInstanceNormalizationLayerKernelEx::validate(const ITensorInfo *input,
{
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, gamma, beta, epsilon));
ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>(validate_and_configure_window(
- input->clone().get(), (output == nullptr ? input->clone().get() : output->clone().get()))));
+ input->clone().get(), (output == nullptr ? input->clone().get() : output->clone().get()))));
return Status{};
}
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLMultiplyScaleFactorKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLMultiplyScaleFactorKernel.cpp
index 6b27c9917..1d4b141a7 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLMultiplyScaleFactorKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLMultiplyScaleFactorKernel.cpp
@@ -99,7 +99,7 @@ std::tuple<Status, Window> validate_and_configure_window(const ITensorInfo *inpu
} // namespace
CLMultiplyScaleFactorKernel::CLMultiplyScaleFactorKernel()
- : _input(nullptr), _scale_factor(nullptr), _output(nullptr), _multiplier(1.f)
+ : _input(nullptr), _scale_factor(nullptr), _output(nullptr), _multiplier(1.f)
{
}
@@ -108,7 +108,7 @@ void CLMultiplyScaleFactorKernel::configure(const ICLTensor *input, const ICLTen
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_ERROR_THROW_ON(
- validate_arguments(input->info(), scale_factor->info(), output->info()));
+ validate_arguments(input->info(), scale_factor->info(), output->info()));
_input = input;
_scale_factor = scale_factor;
@@ -123,9 +123,9 @@ void CLMultiplyScaleFactorKernel::configure(const ICLTensor *input, const ICLTen
Window win = calculate_max_window(*output->info());
if (multi_access_x)
{
- win.set(Window::DimX,
- Window::Dimension(win.x().start(), ceil_to_multiple(win.x().end(), vec_size_x),
- vec_size_x));
+ win.set(
+ Window::DimX,
+ Window::Dimension(win.x().start(), ceil_to_multiple(win.x().end(), vec_size_x), vec_size_x));
}
ICLKernel::configure_internal(win);
@@ -134,11 +134,11 @@ void CLMultiplyScaleFactorKernel::configure(const ICLTensor *input, const ICLTen
build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x));
build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(output->info()->data_type()));
build_opts.add_option_if(
- multi_access_x, "-DLAST_ACCESSED_X=" +
- support::cpp11::to_string(std::max<int>(output_width_x - vec_size_x, 0)));
+ multi_access_x, "-DLAST_ACCESSED_X=" +
+ support::cpp11::to_string(std::max<int>(output_width_x - vec_size_x, 0)));
_kernel = static_cast<cl::Kernel>(
- CLKernelLibraryEx::get().create_kernel("multiply_scale_factor", build_opts.options()));
+ CLKernelLibraryEx::get().create_kernel("multiply_scale_factor", build_opts.options()));
}
Status CLMultiplyScaleFactorKernel::validate(const ITensorInfo *input,
@@ -147,7 +147,7 @@ Status CLMultiplyScaleFactorKernel::validate(const ITensorInfo *input,
{
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, scale_factor, output));
ARM_COMPUTE_RETURN_ON_ERROR(
- std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get())));
+ std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get())));
return Status{};
}
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLNegKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLNegKernel.cpp
index 643c8b110..ee633d437 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLNegKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLNegKernel.cpp
@@ -80,9 +80,9 @@ void CLNegKernel::configure(const ICLTensor *input, ICLTensor *output)
std::set<std::string> build_opts;
build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())));
build_opts.emplace(
- ("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)));
+ ("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)));
_kernel =
- static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel("neg_tensor", build_opts));
+ static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel("neg_tensor", build_opts));
// Configure window
Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLOneHotKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLOneHotKernel.cpp
index 35d70d689..0b8e7cc41 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLOneHotKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLOneHotKernel.cpp
@@ -65,7 +65,7 @@ inline Status validate_arguments(const ITensorInfo *indices, const ITensorInfo *
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(on_value, output);
TensorShape output_shape = arm_compute::misc::shape_calculator::compute_onehot_shape_ex(
- indices->tensor_shape(), static_cast<uint32_t>(depth), actual_axis);
+ indices->tensor_shape(), static_cast<uint32_t>(depth), actual_axis);
ARM_COMPUTE_RETURN_ERROR_ON(output_shape.total_size() != output->tensor_shape().total_size());
}
return Status{};
@@ -79,7 +79,7 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *indices,
const uint32_t actual_axis = wrap_around(axis, static_cast<int>(output->num_dimensions()));
// Output auto initialization if not yet initialized
TensorShape output_shape = arm_compute::misc::shape_calculator::compute_onehot_shape_ex(
- indices->tensor_shape(), static_cast<uint32_t>(depth), actual_axis);
+ indices->tensor_shape(), static_cast<uint32_t>(depth), actual_axis);
auto_init_if_empty((*output), output_shape, 1, on_value->data_type());
// Create window
Window win = calculate_max_window(*output, Steps());
@@ -88,8 +88,8 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *indices,
}
} // namespace
CLOneHotKernel::CLOneHotKernel()
- : _indices(nullptr), _on_value(nullptr), _off_value(nullptr), _output(nullptr),
- _is_off_value_memset(false)
+ : _indices(nullptr), _on_value(nullptr), _off_value(nullptr), _output(nullptr),
+ _is_off_value_memset(false)
{
}
void CLOneHotKernel::configure(const ICLTensor *indices, const ICLTensor *on_value,
@@ -114,10 +114,10 @@ void CLOneHotKernel::configure_common(const ICLTensor *indices, const ICLTensor
ICLTensor *output, int depth, int axis)
{
ARM_COMPUTE_ERROR_THROW_ON(
- validate_arguments(indices->info(), on_value->info(), output->info(), depth, axis));
+ validate_arguments(indices->info(), on_value->info(), output->info(), depth, axis));
// Configure kernel window
auto win_config =
- validate_and_configure_window(indices->info(), on_value->info(), output->info(), depth, axis);
+ validate_and_configure_window(indices->info(), on_value->info(), output->info(), depth, axis);
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
if (_is_off_value_memset)
{
@@ -131,7 +131,7 @@ void CLOneHotKernel::configure_common(const ICLTensor *indices, const ICLTensor
// Set build options
CLBuildOptions build_opts;
build_opts.add_option("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(
- data_size_from_type(on_value->info()->data_type())));
+ data_size_from_type(on_value->info()->data_type())));
build_opts.add_option("-DAXIS=" + support::cpp11::to_string(actual_axis));
build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(depth));
build_opts.add_option("-DOUTPUT_DIM_Z=" +
@@ -139,7 +139,7 @@ void CLOneHotKernel::configure_common(const ICLTensor *indices, const ICLTensor
// Create kernel
const std::string kernel_name = _is_off_value_memset ? "one_hot_only_on_value" : "one_hot";
_kernel = static_cast<cl::Kernel>(
- CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts.options()));
+ CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts.options()));
ICLKernel::configure_internal(win_config.second);
}
Status CLOneHotKernel::validate(const ITensorInfo *indices, const ITensorInfo *on_value,
@@ -153,7 +153,7 @@ Status CLOneHotKernel::validate(const ITensorInfo *indices, const ITensorInfo *o
ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(indices->clone().get(),
on_value->clone().get(),
output->clone().get(), depth, axis)
- .first);
+ .first);
return Status{};
}
Status CLOneHotKernel::validate(const ITensorInfo *indices, const ITensorInfo *on_value,
@@ -163,7 +163,7 @@ Status CLOneHotKernel::validate(const ITensorInfo *indices, const ITensorInfo *o
ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(indices->clone().get(),
on_value->clone().get(),
output->clone().get(), depth, axis)
- .first);
+ .first);
return Status{};
}
void CLOneHotKernel::run(const Window &window, cl::CommandQueue &queue)
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLQuantizationSymmetricKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLQuantizationSymmetricKernel.cpp
index 1a7a18cfa..b417a7103 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLQuantizationSymmetricKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLQuantizationSymmetricKernel.cpp
@@ -87,9 +87,9 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
if (multi_access_x)
{
- win.set(Window::DimX,
- Window::Dimension(win.x().start(), ceil_to_multiple(win.x().end(), vec_size_x),
- vec_size_x));
+ win.set(
+ Window::DimX,
+ Window::Dimension(win.x().start(), ceil_to_multiple(win.x().end(), vec_size_x), vec_size_x));
}
Coordinates coord;
@@ -101,7 +101,7 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
} // namespace
CLQuantizationSymmetricKernel::CLQuantizationSymmetricKernel()
- : _input(nullptr), _scale_factor(nullptr), _output(nullptr)
+ : _input(nullptr), _scale_factor(nullptr), _output(nullptr)
{
}
@@ -110,7 +110,7 @@ void CLQuantizationSymmetricKernel::configure(const ICLTensor *input, const ICLT
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, scale_factor, output);
ARM_COMPUTE_ERROR_THROW_ON(
- validate_arguments(input->info(), scale_factor->info(), output->info()));
+ validate_arguments(input->info(), scale_factor->info(), output->info()));
_input = input;
_scale_factor = scale_factor;
@@ -132,11 +132,11 @@ void CLQuantizationSymmetricKernel::configure(const ICLTensor *input, const ICLT
build_opts.add_option("-DDATA_TYPE_OUT=" +
get_cl_type_from_data_type(output->info()->data_type()));
build_opts.add_option_if(
- multi_access_x, "-DLAST_ACCESSED_X=" +
- support::cpp11::to_string(std::max<int>(input_width_x - vec_size_x, 0)));
+ multi_access_x,
+ "-DLAST_ACCESSED_X=" + support::cpp11::to_string(std::max<int>(input_width_x - vec_size_x, 0)));
_kernel = static_cast<cl::Kernel>(
- CLKernelLibraryEx::get().create_kernel("quantization_symm8", build_opts.options()));
+ CLKernelLibraryEx::get().create_kernel("quantization_symm8", build_opts.options()));
}
Status CLQuantizationSymmetricKernel::validate(const ITensorInfo *input,
@@ -145,7 +145,7 @@ Status CLQuantizationSymmetricKernel::validate(const ITensorInfo *input,
{
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, scale_factor, output));
ARM_COMPUTE_RETURN_ON_ERROR(
- validate_and_configure_window(input->clone().get(), output->clone().get()).first);
+ validate_and_configure_window(input->clone().get(), output->clone().get()).first);
return Status{};
}
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLReduceOperationKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLReduceOperationKernel.cpp
index 3fbebf25a..3906009c2 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLReduceOperationKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLReduceOperationKernel.cpp
@@ -145,7 +145,7 @@ void CLReduceOperationKernel::configure(const ICLTensor *input, ICLTensor *outpu
// Create kernel
_kernel =
- static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts));
+ static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts));
// Configure kernel window
Window win = calculate_max_window(*output_info, Steps());
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLScaleFactorSymm8Kernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLScaleFactorSymm8Kernel.cpp
index 8d8853c81..4a6374444 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLScaleFactorSymm8Kernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLScaleFactorSymm8Kernel.cpp
@@ -94,8 +94,8 @@ std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITe
output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
Status err = (window_changed)
- ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
- : Status{};
+ ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
+ : Status{};
return std::make_tuple(err, win);
}
} // namespace
@@ -115,7 +115,7 @@ void CLScaleFactorSymm8Kernel::configure(const ICLTensor *input, ICLTensor *outp
// Create kernel
_kernel = static_cast<cl::Kernel>(
- CLKernelLibraryEx::get().create_kernel("scale_factor_symm8", build_opts));
+ CLKernelLibraryEx::get().create_kernel("scale_factor_symm8", build_opts));
auto win_config = validate_and_configure_window(input->info(), output->info());
@@ -128,7 +128,7 @@ Status CLScaleFactorSymm8Kernel::validate(const ITensorInfo *input, const ITenso
{
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output));
ARM_COMPUTE_RETURN_ON_ERROR(
- std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get())));
+ std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get())));
return Status{};
}
diff --git a/compute/ARMComputeEx/src/core/NEON/NEElementwiseOperationFuncs.cpp b/compute/ARMComputeEx/src/core/NEON/NEElementwiseOperationFuncs.cpp
index dfe5d59b0..c88bef6d7 100644
--- a/compute/ARMComputeEx/src/core/NEON/NEElementwiseOperationFuncs.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/NEElementwiseOperationFuncs.cpp
@@ -53,12 +53,12 @@ namespace
using namespace arm_compute;
template <typename InputScalarType, typename OutputScalarType, typename InputVectorType>
void elementwise_op_templ(
- const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window,
- OutputScalarType (*scalar_func)(const InputScalarType &, const InputScalarType &),
- int (*broadcast_func)(int, int, int, const InputScalarType *, const InputScalarType &,
- OutputScalarType *, const bool),
- int (*neon_func)(int, int, int, const InputScalarType *, const InputScalarType *,
- OutputScalarType *))
+ const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window,
+ OutputScalarType (*scalar_func)(const InputScalarType &, const InputScalarType &),
+ int (*broadcast_func)(int, int, int, const InputScalarType *, const InputScalarType &,
+ OutputScalarType *, const bool),
+ int (*neon_func)(int, int, int, const InputScalarType *, const InputScalarType *,
+ OutputScalarType *))
{
// Create input windows
Window input1_win = window.broadcast_if_dimension_le_one(in1->info()->tensor_shape());
@@ -88,26 +88,26 @@ void elementwise_op_templ(
Iterator non_broadcast_input(non_broadcast_tensor, non_broadcast_win);
Iterator output(out, win);
- execute_window_loop(win,
- [&](const Coordinates &) {
- auto output_ptr = reinterpret_cast<OutputScalarType *>(output.ptr());
- const auto non_broadcast_input_ptr =
- reinterpret_cast<const InputScalarType *>(non_broadcast_input.ptr());
- const InputScalarType broadcast_value =
- *reinterpret_cast<const InputScalarType *>(broadcast_input.ptr());
-
- int x = (*broadcast_func)(window_start_x, window_end_x, window_step_x,
- non_broadcast_input_ptr, broadcast_value,
- output_ptr, !is_broadcast_input_2);
- for (; x < window_end_x; ++x)
- {
- const auto a = *(non_broadcast_input_ptr + x);
- *(output_ptr + x) =
- (*scalar_func)(!is_broadcast_input_2 ? broadcast_value : a,
- !is_broadcast_input_2 ? a : broadcast_value);
- }
- },
- broadcast_input, non_broadcast_input, output);
+ execute_window_loop(
+ win,
+ [&](const Coordinates &) {
+ auto output_ptr = reinterpret_cast<OutputScalarType *>(output.ptr());
+ const auto non_broadcast_input_ptr =
+ reinterpret_cast<const InputScalarType *>(non_broadcast_input.ptr());
+ const InputScalarType broadcast_value =
+ *reinterpret_cast<const InputScalarType *>(broadcast_input.ptr());
+
+ int x =
+ (*broadcast_func)(window_start_x, window_end_x, window_step_x, non_broadcast_input_ptr,
+ broadcast_value, output_ptr, !is_broadcast_input_2);
+ for (; x < window_end_x; ++x)
+ {
+ const auto a = *(non_broadcast_input_ptr + x);
+ *(output_ptr + x) = (*scalar_func)(!is_broadcast_input_2 ? broadcast_value : a,
+ !is_broadcast_input_2 ? a : broadcast_value);
+ }
+ },
+ broadcast_input, non_broadcast_input, output);
}
else
{
@@ -119,24 +119,23 @@ void elementwise_op_templ(
Iterator input2(in2, input2_win);
Iterator output(out, win);
- execute_window_loop(win,
- [&](const Coordinates &) {
- auto output_ptr = reinterpret_cast<OutputScalarType *>(output.ptr());
- const auto input1_ptr =
- reinterpret_cast<const InputScalarType *>(input1.ptr());
- const auto input2_ptr =
- reinterpret_cast<const InputScalarType *>(input2.ptr());
-
- int x = (*neon_func)(window_start_x, window_end_x, window_step_x,
- input1_ptr, input2_ptr, output_ptr);
- for (; x < window_end_x; ++x)
- {
- const auto a = *(input1_ptr + x);
- const auto b = *(input2_ptr + x);
- *(output_ptr + x) = (*scalar_func)(a, b);
- }
- },
- input1, input2, output);
+ execute_window_loop(
+ win,
+ [&](const Coordinates &) {
+ auto output_ptr = reinterpret_cast<OutputScalarType *>(output.ptr());
+ const auto input1_ptr = reinterpret_cast<const InputScalarType *>(input1.ptr());
+ const auto input2_ptr = reinterpret_cast<const InputScalarType *>(input2.ptr());
+
+ int x = (*neon_func)(window_start_x, window_end_x, window_step_x, input1_ptr, input2_ptr,
+ output_ptr);
+ for (; x < window_end_x; ++x)
+ {
+ const auto a = *(input1_ptr + x);
+ const auto b = *(input2_ptr + x);
+ *(output_ptr + x) = (*scalar_func)(a, b);
+ }
+ },
+ input1, input2, output);
}
}
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEBinaryLogicalOperationKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEBinaryLogicalOperationKernel.cpp
index 32d7d6237..a8464afce 100644
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEBinaryLogicalOperationKernel.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEBinaryLogicalOperationKernel.cpp
@@ -103,8 +103,10 @@ template <BinaryLogicalOperation op>
inline uint8x16x4_t elementwise_logic_op(const uint8x16x4_t &a, const uint8x16x4_t &b)
{
uint8x16x4_t out = {{
- elementwise_logic_op<op>(a.val[0], b.val[0]), elementwise_logic_op<op>(a.val[1], b.val[1]),
- elementwise_logic_op<op>(a.val[2], b.val[2]), elementwise_logic_op<op>(a.val[3], b.val[3]),
+ elementwise_logic_op<op>(a.val[0], b.val[0]),
+ elementwise_logic_op<op>(a.val[1], b.val[1]),
+ elementwise_logic_op<op>(a.val[2], b.val[2]),
+ elementwise_logic_op<op>(a.val[3], b.val[3]),
}};
return out;
}
@@ -160,8 +162,8 @@ void elementwise_logic_op(const ITensor *in1, const ITensor *in2, ITensor *out,
}
std::function<void(const ITensor *, const ITensor *, ITensor *, const Window &)> configure_func(
- const ITensor *input1, const ITensor *input2, ITensor *output,
- std::map<std::string, NEElementwiseOperationKernel::ElementwiseFunction *> map_function)
+ const ITensor *input1, const ITensor *input2, ITensor *output,
+ std::map<std::string, NEElementwiseOperationKernel::ElementwiseFunction *> map_function)
{
std::string function_to_call("op_");
function_to_call += string_from_data_type(input1->info()->data_type()) + "_";
@@ -184,8 +186,8 @@ std::function<void(const ITensor *, const ITensor *, ITensor *, const Window &)>
configure_logic_func(const ITensor *input1, const ITensor *input2, ITensor *output)
{
static std::map<std::string, NEElementwiseOperationKernel::ElementwiseFunction *> map_function = {
- {"op_U8_U8_U8", &elementwise_logic_op<op, uint8_t, uint8x16_t>},
- {"op_QASYMM8_QASYMM8_QASYMM8", &elementwise_logic_op<op, uint8_t, uint8x16_t>}};
+ {"op_U8_U8_U8", &elementwise_logic_op<op, uint8_t, uint8x16_t>},
+ {"op_QASYMM8_QASYMM8_QASYMM8", &elementwise_logic_op<op, uint8_t, uint8x16_t>}};
return configure_func(input1, input2, output, map_function);
}
@@ -223,7 +225,7 @@ Status NEBinaryLogicalOperationKernel::validate_arguments(const ITensorInfo &inp
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&input1, &input2);
const TensorShape out_shape =
- TensorShape::broadcast_shape(input1.tensor_shape(), input2.tensor_shape());
+ TensorShape::broadcast_shape(input1.tensor_shape(), input2.tensor_shape());
ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0,
"Inputs are not broadcast compatible");
@@ -232,8 +234,8 @@ Status NEBinaryLogicalOperationKernel::validate_arguments(const ITensorInfo &inp
if (output.total_size() > 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_MSG(
- detail::have_different_dimensions(out_shape, output.tensor_shape(), 0),
- "Wrong shape for output");
+ detail::have_different_dimensions(out_shape, output.tensor_shape(), 0),
+ "Wrong shape for output");
}
return Status{};
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NECastBoolKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NECastBoolKernel.cpp
index 12017e543..f935596e6 100644
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NECastBoolKernel.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NECastBoolKernel.cpp
@@ -129,125 +129,125 @@ void NECastBoolKernel::run(const Window &window, const ThreadInfo &info)
case DataType::S8:
{
/* Conversion U8 -> S8 */
- execute_window_loop(win,
- [&](const Coordinates &) {
- const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr());
- const auto output_ptr = reinterpret_cast<int8_t *>(output.ptr());
-
- int x = window_start_x;
- for (; x <= (window_end_x - window_step_x); x += window_step_x)
- {
- const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x);
-
- vst1q_s8(output_ptr + x, vreinterpretq_s8_u8(vandq_u8(
- texels_u8, vdupq_n_u8(true_val))));
- }
-
- // Compute left-over elements
- for (; x < window_end_x; ++x)
- {
- *(output_ptr + x) = static_cast<int8_t>(*(input_ptr + x) & true_val);
- }
- },
- input, output);
+ execute_window_loop(
+ win,
+ [&](const Coordinates &) {
+ const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr());
+ const auto output_ptr = reinterpret_cast<int8_t *>(output.ptr());
+
+ int x = window_start_x;
+ for (; x <= (window_end_x - window_step_x); x += window_step_x)
+ {
+ const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x);
+
+ vst1q_s8(output_ptr + x,
+ vreinterpretq_s8_u8(vandq_u8(texels_u8, vdupq_n_u8(true_val))));
+ }
+
+ // Compute left-over elements
+ for (; x < window_end_x; ++x)
+ {
+ *(output_ptr + x) = static_cast<int8_t>(*(input_ptr + x) & true_val);
+ }
+ },
+ input, output);
break;
}
case DataType::S16:
{
/* Up-conversion U8 -> S16 */
execute_window_loop(
- win,
- [&](const Coordinates &) {
- const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr());
- const auto output_ptr = reinterpret_cast<int16_t *>(output.ptr());
-
- int x = window_start_x;
- for (; x <= (window_end_x - window_step_x); x += window_step_x)
- {
- const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x);
-
- const int16x8x2_t texels = {
- {vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_low_u8(texels_u8), mask_bool))),
- vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_high_u8(texels_u8), mask_bool)))}};
-
- vst1q_s16(output_ptr + x, texels.val[0]);
- vst1q_s16(output_ptr + x + 8, texels.val[1]);
- }
-
- // Compute left-over elements
- for (; x < window_end_x; ++x)
- {
- *(output_ptr + x) = static_cast<int32_t>(*(input_ptr + x) & true_val);
- }
- },
- input, output);
+ win,
+ [&](const Coordinates &) {
+ const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr());
+ const auto output_ptr = reinterpret_cast<int16_t *>(output.ptr());
+
+ int x = window_start_x;
+ for (; x <= (window_end_x - window_step_x); x += window_step_x)
+ {
+ const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x);
+
+ const int16x8x2_t texels = {
+ {vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_low_u8(texels_u8), mask_bool))),
+ vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_high_u8(texels_u8), mask_bool)))}};
+
+ vst1q_s16(output_ptr + x, texels.val[0]);
+ vst1q_s16(output_ptr + x + 8, texels.val[1]);
+ }
+
+ // Compute left-over elements
+ for (; x < window_end_x; ++x)
+ {
+ *(output_ptr + x) = static_cast<int32_t>(*(input_ptr + x) & true_val);
+ }
+ },
+ input, output);
break;
}
case DataType::S32:
{
/* Up-conversion U8 -> S32 */
execute_window_loop(
- win,
- [&](const Coordinates &) {
- const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr());
- const auto output_ptr = reinterpret_cast<int32_t *>(output.ptr());
-
- int x = window_start_x;
- for (; x <= (window_end_x - window_step_x); x += window_step_x)
- {
- const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x);
-
- const int16x8x2_t texels = {
- {vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_low_u8(texels_u8), mask_bool))),
- vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_high_u8(texels_u8), mask_bool)))}};
-
- vst1q_s32(output_ptr + x, vmovl_s16(vget_low_s16(texels.val[0])));
- vst1q_s32(output_ptr + x + 4, vmovl_s16(vget_high_s16(texels.val[0])));
- vst1q_s32(output_ptr + x + 8, vmovl_s16(vget_low_s16(texels.val[1])));
- vst1q_s32(output_ptr + x + 12, vmovl_s16(vget_high_s16(texels.val[1])));
- }
-
- // Compute left-over elements
- for (; x < window_end_x; ++x)
- {
- *(output_ptr + x) = static_cast<uint32_t>(*(input_ptr + x) & true_val);
- }
- },
- input, output);
+ win,
+ [&](const Coordinates &) {
+ const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr());
+ const auto output_ptr = reinterpret_cast<int32_t *>(output.ptr());
+
+ int x = window_start_x;
+ for (; x <= (window_end_x - window_step_x); x += window_step_x)
+ {
+ const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x);
+
+ const int16x8x2_t texels = {
+ {vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_low_u8(texels_u8), mask_bool))),
+ vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_high_u8(texels_u8), mask_bool)))}};
+
+ vst1q_s32(output_ptr + x, vmovl_s16(vget_low_s16(texels.val[0])));
+ vst1q_s32(output_ptr + x + 4, vmovl_s16(vget_high_s16(texels.val[0])));
+ vst1q_s32(output_ptr + x + 8, vmovl_s16(vget_low_s16(texels.val[1])));
+ vst1q_s32(output_ptr + x + 12, vmovl_s16(vget_high_s16(texels.val[1])));
+ }
+
+ // Compute left-over elements
+ for (; x < window_end_x; ++x)
+ {
+ *(output_ptr + x) = static_cast<uint32_t>(*(input_ptr + x) & true_val);
+ }
+ },
+ input, output);
break;
}
case DataType::F32:
{
/* Up-conversion U8 -> F32 */
execute_window_loop(
- win,
- [&](const Coordinates &) {
- const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr());
- const auto output_ptr = reinterpret_cast<float *>(output.ptr());
-
- int x = window_start_x;
- for (; x <= (window_end_x - window_step_x); x += window_step_x)
- {
- const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x);
-
- const int16x8x2_t texels = {
- {vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_low_u8(texels_u8), mask_bool))),
- vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_high_u8(texels_u8), mask_bool)))}};
- vst1q_f32(output_ptr + x, vcvtq_f32_s32(vmovl_s16(vget_low_s16(texels.val[0]))));
- vst1q_f32(output_ptr + x + 4, vcvtq_f32_s32(vmovl_s16(vget_high_s16(texels.val[0]))));
- vst1q_f32(output_ptr + x + 8, vcvtq_f32_s32(vmovl_s16(vget_low_s16(texels.val[1]))));
- vst1q_f32(output_ptr + x + 12,
- vcvtq_f32_s32(vmovl_s16(vget_high_s16(texels.val[1]))));
- }
-
- // Compute left-over elements
- for (; x < window_end_x; ++x)
- {
- auto in = static_cast<uint32_t>(*(input_ptr + x) & true_val);
- *(output_ptr + x) = static_cast<float>(in);
- }
- },
- input, output);
+ win,
+ [&](const Coordinates &) {
+ const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr());
+ const auto output_ptr = reinterpret_cast<float *>(output.ptr());
+
+ int x = window_start_x;
+ for (; x <= (window_end_x - window_step_x); x += window_step_x)
+ {
+ const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x);
+
+ const int16x8x2_t texels = {
+ {vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_low_u8(texels_u8), mask_bool))),
+ vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_high_u8(texels_u8), mask_bool)))}};
+ vst1q_f32(output_ptr + x, vcvtq_f32_s32(vmovl_s16(vget_low_s16(texels.val[0]))));
+ vst1q_f32(output_ptr + x + 4, vcvtq_f32_s32(vmovl_s16(vget_high_s16(texels.val[0]))));
+ vst1q_f32(output_ptr + x + 8, vcvtq_f32_s32(vmovl_s16(vget_low_s16(texels.val[1]))));
+ vst1q_f32(output_ptr + x + 12, vcvtq_f32_s32(vmovl_s16(vget_high_s16(texels.val[1]))));
+ }
+
+ // Compute left-over elements
+ for (; x < window_end_x; ++x)
+ {
+ auto in = static_cast<uint32_t>(*(input_ptr + x) & true_val);
+ *(output_ptr + x) = static_cast<float>(in);
+ }
+ },
+ input, output);
break;
}
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
@@ -255,86 +255,87 @@ void NECastBoolKernel::run(const Window &window, const ThreadInfo &info)
{
/* Up-conversion U8 -> F16 */
execute_window_loop(
- win,
- [&](const Coordinates &) {
- const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr());
- const auto output_ptr = reinterpret_cast<float16_t *>(output.ptr());
-
- int x = window_start_x;
- for (; x <= (window_end_x - window_step_x); x += window_step_x)
- {
- const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x);
-
- const int16x8x2_t texels = {
- {vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_low_u8(texels_u8), mask_bool))),
- vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_high_u8(texels_u8), mask_bool)))}};
- vst1q_f16(output_ptr + x, vcvtq_f16_s16(texels.val[0]));
- vst1q_f16(output_ptr + x + 8, vcvtq_f16_s16(texels.val[1]));
- }
-
- // Compute left-over elements
- for (; x < window_end_x; ++x)
- {
- *(output_ptr + x) = static_cast<float16_t>(*(input_ptr + x) & true_val);
- }
- },
- input, output);
+ win,
+ [&](const Coordinates &) {
+ const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr());
+ const auto output_ptr = reinterpret_cast<float16_t *>(output.ptr());
+
+ int x = window_start_x;
+ for (; x <= (window_end_x - window_step_x); x += window_step_x)
+ {
+ const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x);
+
+ const int16x8x2_t texels = {
+ {vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_low_u8(texels_u8), mask_bool))),
+ vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_high_u8(texels_u8), mask_bool)))}};
+ vst1q_f16(output_ptr + x, vcvtq_f16_s16(texels.val[0]));
+ vst1q_f16(output_ptr + x + 8, vcvtq_f16_s16(texels.val[1]));
+ }
+
+ // Compute left-over elements
+ for (; x < window_end_x; ++x)
+ {
+ *(output_ptr + x) = static_cast<float16_t>(*(input_ptr + x) & true_val);
+ }
+ },
+ input, output);
break;
}
#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
case DataType::U8:
{
/* Conversion U8 -> S8 */
- execute_window_loop(win,
- [&](const Coordinates &) {
- const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr());
- const auto output_ptr = reinterpret_cast<uint8_t *>(output.ptr());
-
- int x = window_start_x;
- for (; x <= (window_end_x - window_step_x); x += window_step_x)
- {
- const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x);
-
- vst1q_u8(output_ptr + x, vandq_u8(texels_u8, vdupq_n_u8(true_val)));
- }
-
- // Compute left-over elements
- for (; x < window_end_x; ++x)
- {
- *(output_ptr + x) = static_cast<uint8_t>(*(input_ptr + x) & true_val);
- }
- },
- input, output);
+ execute_window_loop(
+ win,
+ [&](const Coordinates &) {
+ const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr());
+ const auto output_ptr = reinterpret_cast<uint8_t *>(output.ptr());
+
+ int x = window_start_x;
+ for (; x <= (window_end_x - window_step_x); x += window_step_x)
+ {
+ const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x);
+
+ vst1q_u8(output_ptr + x, vandq_u8(texels_u8, vdupq_n_u8(true_val)));
+ }
+
+ // Compute left-over elements
+ for (; x < window_end_x; ++x)
+ {
+ *(output_ptr + x) = static_cast<uint8_t>(*(input_ptr + x) & true_val);
+ }
+ },
+ input, output);
break;
}
case DataType::U16:
{
/* Up-conversion U8 -> U16 */
execute_window_loop(
- win,
- [&](const Coordinates &) {
- const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr());
- const auto output_ptr = reinterpret_cast<uint16_t *>(output.ptr());
-
- int x = window_start_x;
- for (; x <= (window_end_x - window_step_x); x += window_step_x)
- {
- const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x);
-
- const uint16x8x2_t texels = {{vmovl_u8(vand_u8(vget_low_u8(texels_u8), mask_bool)),
- vmovl_u8(vand_u8(vget_high_u8(texels_u8), mask_bool))}};
-
- vst1q_u16(output_ptr + x, texels.val[0]);
- vst1q_u16(output_ptr + x + 8, texels.val[1]);
- }
-
- // Compute left-over elements
- for (; x < window_end_x; ++x)
- {
- *(output_ptr + x) = static_cast<uint16_t>(*(input_ptr + x) & true_val);
- }
- },
- input, output);
+ win,
+ [&](const Coordinates &) {
+ const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr());
+ const auto output_ptr = reinterpret_cast<uint16_t *>(output.ptr());
+
+ int x = window_start_x;
+ for (; x <= (window_end_x - window_step_x); x += window_step_x)
+ {
+ const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x);
+
+ const uint16x8x2_t texels = {{vmovl_u8(vand_u8(vget_low_u8(texels_u8), mask_bool)),
+ vmovl_u8(vand_u8(vget_high_u8(texels_u8), mask_bool))}};
+
+ vst1q_u16(output_ptr + x, texels.val[0]);
+ vst1q_u16(output_ptr + x + 8, texels.val[1]);
+ }
+
+ // Compute left-over elements
+ for (; x < window_end_x; ++x)
+ {
+ *(output_ptr + x) = static_cast<uint16_t>(*(input_ptr + x) & true_val);
+ }
+ },
+ input, output);
break;
}
default:
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEEmbeddingLookupKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEEmbeddingLookupKernel.cpp
index 091d38c56..e3a77c6b1 100644
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEEmbeddingLookupKernel.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEEmbeddingLookupKernel.cpp
@@ -50,7 +50,7 @@
using namespace arm_compute;
NEEmbeddingLookupKernel::NEEmbeddingLookupKernel()
- : _input(nullptr), _lookups(nullptr), _output(nullptr)
+ : _input(nullptr), _lookups(nullptr), _output(nullptr)
{
}
@@ -79,8 +79,8 @@ Status NEEmbeddingLookupKernel::validate(const arm_compute::ITensorInfo *input,
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, lookups);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(
- input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16,
- DataType::U32, DataType::S32, DataType::F16, DataType::F32);
+ input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16,
+ DataType::U32, DataType::S32, DataType::F16, DataType::F32);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lookups, 1, DataType::S32);
ARM_COMPUTE_ERROR_ON(input->num_dimensions() < 2 && input->num_dimensions() > 4);
@@ -119,16 +119,17 @@ void NEEmbeddingLookupKernel::run(const Window &window, const ThreadInfo &info)
{
Iterator output_it(_output, out_slice);
- execute_window_loop(out_slice,
- [&](const Coordinates &id) {
- const int32_t lookup = *reinterpret_cast<int32_t *>(
- _lookups->ptr_to_element(Coordinates{id[lookup_dim]}));
- Coordinates input_id{id};
- input_id.set(lookup_dim, lookup);
- memcpy(output_it.ptr(), _input->ptr_to_element(input_id),
- _output->info()->dimension(0) * _output->info()->element_size());
- },
- output_it);
+ execute_window_loop(
+ out_slice,
+ [&](const Coordinates &id) {
+ const int32_t lookup =
+ *reinterpret_cast<int32_t *>(_lookups->ptr_to_element(Coordinates{id[lookup_dim]}));
+ Coordinates input_id{id};
+ input_id.set(lookup_dim, lookup);
+ memcpy(output_it.ptr(), _input->ptr_to_element(input_id),
+ _output->info()->dimension(0) * _output->info()->element_size());
+ },
+ output_it);
} while (window.slide_window_slice_4D(out_slice));
}
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEGatherKernelEx.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEGatherKernelEx.cpp
index 93963a504..c9f0799d4 100644
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEGatherKernelEx.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEGatherKernelEx.cpp
@@ -71,7 +71,7 @@ template <typename U> void validate_indices(const ITensor *indices)
} // namespace
NEGatherKernelEx::NEGatherKernelEx()
- : _input{}, _indices{}, _axis{}, _indices_rank{}, _output{}, _func{}
+ : _input{}, _indices{}, _axis{}, _indices_rank{}, _output{}, _func{}
{
}
@@ -85,36 +85,35 @@ inline void NEGatherKernelEx::gather_0_axis(const Window &window, const ThreadIn
Iterator output_it(_output, window);
execute_window_loop(
- window,
- [&](const Coordinates &id) {
- Coordinates gather_id(id);
- gather_id.collapse(_indices_rank);
-
- U new_index;
- switch (_indices_rank)
- {
- case 1:
- new_index = *(reinterpret_cast<U *>(_indices->ptr_to_element(Coordinates(id[0]))));
- break;
- case 2:
- new_index =
- *(reinterpret_cast<U *>(_indices->ptr_to_element(Coordinates(id[0], id[1]))));
- break;
- case 3:
- new_index = *(
- reinterpret_cast<U *>(_indices->ptr_to_element(Coordinates(id[0], id[1], id[2]))));
- break;
- default:
- ARM_COMPUTE_ERROR("Wrong num of dimensions");
- break;
- }
-
- gather_id.set(0, new_index);
-
- std::copy_n(_input->ptr_to_element(gather_id), _output->info()->element_size(),
- output_it.ptr());
- },
- output_it);
+ window,
+ [&](const Coordinates &id) {
+ Coordinates gather_id(id);
+ gather_id.collapse(_indices_rank);
+
+ U new_index;
+ switch (_indices_rank)
+ {
+ case 1:
+ new_index = *(reinterpret_cast<U *>(_indices->ptr_to_element(Coordinates(id[0]))));
+ break;
+ case 2:
+ new_index = *(reinterpret_cast<U *>(_indices->ptr_to_element(Coordinates(id[0], id[1]))));
+ break;
+ case 3:
+ new_index =
+ *(reinterpret_cast<U *>(_indices->ptr_to_element(Coordinates(id[0], id[1], id[2]))));
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Wrong num of dimensions");
+ break;
+ }
+
+ gather_id.set(0, new_index);
+
+ std::copy_n(_input->ptr_to_element(gather_id), _output->info()->element_size(),
+ output_it.ptr());
+ },
+ output_it);
}
template <typename U>
@@ -130,37 +129,36 @@ void NEGatherKernelEx::gather_n_axis(const Window &window, const ThreadInfo &inf
Iterator output_it(_output, output_window);
execute_window_loop(
- output_window,
- [&](const Coordinates &id) {
- Coordinates gather_id(id);
- gather_id.collapse(_indices_rank, _axis);
-
- U new_index;
- switch (_indices_rank)
- {
- case 1:
- new_index = *(reinterpret_cast<U *>(_indices->ptr_to_element(Coordinates(id[_axis]))));
- break;
- case 2:
- new_index = *(reinterpret_cast<U *>(
- _indices->ptr_to_element(Coordinates(id[_axis], id[_axis + 1]))));
- break;
- case 3:
- new_index = *(reinterpret_cast<U *>(
- _indices->ptr_to_element(Coordinates(id[_axis], id[_axis + 1], id[_axis + 2]))));
- break;
- default:
- ARM_COMPUTE_ERROR("Wrong num of dimensions");
- break;
- }
-
- gather_id.set(_axis, new_index);
-
- std::copy_n(_input->ptr_to_element(gather_id),
- _input->info()->dimension(0) * _output->info()->element_size(),
- output_it.ptr());
- },
- output_it);
+ output_window,
+ [&](const Coordinates &id) {
+ Coordinates gather_id(id);
+ gather_id.collapse(_indices_rank, _axis);
+
+ U new_index;
+ switch (_indices_rank)
+ {
+ case 1:
+ new_index = *(reinterpret_cast<U *>(_indices->ptr_to_element(Coordinates(id[_axis]))));
+ break;
+ case 2:
+ new_index = *(
+ reinterpret_cast<U *>(_indices->ptr_to_element(Coordinates(id[_axis], id[_axis + 1]))));
+ break;
+ case 3:
+ new_index = *(reinterpret_cast<U *>(
+ _indices->ptr_to_element(Coordinates(id[_axis], id[_axis + 1], id[_axis + 2]))));
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Wrong num of dimensions");
+ break;
+ }
+
+ gather_id.set(_axis, new_index);
+
+ std::copy_n(_input->ptr_to_element(gather_id),
+ _input->info()->dimension(0) * _output->info()->element_size(), output_it.ptr());
+ },
+ output_it);
}
void NEGatherKernelEx::configure(const ITensor *input, const ITensor *indices, ITensor *output,
@@ -170,8 +168,8 @@ void NEGatherKernelEx::configure(const ITensor *input, const ITensor *indices, I
ARM_COMPUTE_ERROR_ON(indices->info()->num_dimensions() > 3);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(indices, 1, DataType::U32, DataType::S32);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(
- input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16,
- DataType::U32, DataType::S32, DataType::F16, DataType::F32);
+ input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16,
+ DataType::U32, DataType::S32, DataType::F16, DataType::F32);
_input = input;
_indices = indices;
@@ -217,7 +215,7 @@ void NEGatherKernelEx::configure(const ITensor *input, const ITensor *indices, I
}
// Output auto initialization if not yet initialized
TensorShape output_shape = arm_compute::misc::shape_calculator::compute_gather_shape_ex(
- input->info()->tensor_shape(), indices->info()->tensor_shape(), _axis);
+ input->info()->tensor_shape(), indices->info()->tensor_shape(), _axis);
auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type());
// Create window
@@ -243,15 +241,15 @@ Status NEGatherKernelEx::validate(const ITensorInfo *input, const ITensorInfo *i
ARM_COMPUTE_RETURN_ERROR_ON(0 > axis || axis >= static_cast<int32_t>(input->num_dimensions()));
ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(
- input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16,
- DataType::U32, DataType::S32, DataType::F16, DataType::F32);
+ input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16,
+ DataType::U32, DataType::S32, DataType::F16, DataType::F32);
if (output->total_size() != 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output);
TensorShape output_shape = arm_compute::misc::shape_calculator::compute_gather_shape_ex(
- input->tensor_shape(), indices->tensor_shape(), axis);
+ input->tensor_shape(), indices->tensor_shape(), axis);
ARM_COMPUTE_RETURN_ERROR_ON(output_shape.total_size() != output->tensor_shape().total_size());
}
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEHashtableLookupKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEHashtableLookupKernel.cpp
index 30787c0a4..52b40e767 100644
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEHashtableLookupKernel.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEHashtableLookupKernel.cpp
@@ -57,7 +57,7 @@ constexpr size_t NOT_HIT = 0xFFFFFFFF;
} // namespace
NEHashtableLookupKernel::NEHashtableLookupKernel()
- : _lookups(nullptr), _keys(nullptr), _input(nullptr), _output(nullptr), _hits{nullptr}
+ : _lookups(nullptr), _keys(nullptr), _input(nullptr), _output(nullptr), _hits{nullptr}
{
}
@@ -66,7 +66,7 @@ void NEHashtableLookupKernel::configure(const ITensor *lookups, const ITensor *k
{
ARM_COMPUTE_ERROR_ON_NULLPTR(lookups, keys, input, output, hits);
ARM_COMPUTE_ERROR_THROW_ON(
- validate(lookups->info(), keys->info(), input->info(), output->info(), hits->info()));
+ validate(lookups->info(), keys->info(), input->info(), output->info(), hits->info()));
_lookups = lookups;
_keys = keys;
@@ -92,8 +92,8 @@ Status NEHashtableLookupKernel::validate(const ITensorInfo *lookups, const ITens
{
ARM_COMPUTE_ERROR_ON_NULLPTR(lookups, keys, input, output, hits);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(
- input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16,
- DataType::U32, DataType::S32, DataType::F16, DataType::F32);
+ input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16,
+ DataType::U32, DataType::S32, DataType::F16, DataType::F32);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lookups, 1, DataType::S32);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(keys, 1, DataType::S32);
@@ -134,8 +134,8 @@ void NEHashtableLookupKernel::run(const Window &window, const ThreadInfo &info)
const size_t lookup_dim = _output->info()->num_dimensions() - 1;
const int const_0 = _output->info()->data_type() == DataType::QASYMM8
- ? _output->info()->quantization_info().uniform().offset
- : 0;
+ ? _output->info()->quantization_info().uniform().offset
+ : 0;
std::unordered_map<int32_t, size_t> key_index_map;
for (size_t n = 0; n < _keys->info()->dimension(0); ++n)
@@ -174,24 +174,24 @@ void NEHashtableLookupKernel::run(const Window &window, const ThreadInfo &info)
{
Iterator output_it(_output, out_slice);
- execute_window_loop(out_slice,
- [&](const Coordinates &id) {
- const auto lookup = lookup_indices.at(id[lookup_dim]);
- if (lookup == NOT_HIT)
- {
- memset(output_it.ptr(), const_0,
- _output->info()->dimension(0) * _output->info()->element_size());
- }
- else
- {
- Coordinates input_id{id};
- input_id.set(lookup_dim, lookup);
- memcpy(output_it.ptr(), _input->ptr_to_element(input_id),
- _output->info()->dimension(0) * _output->info()->element_size());
- }
-
- },
- output_it);
+ execute_window_loop(
+ out_slice,
+ [&](const Coordinates &id) {
+ const auto lookup = lookup_indices.at(id[lookup_dim]);
+ if (lookup == NOT_HIT)
+ {
+ memset(output_it.ptr(), const_0,
+ _output->info()->dimension(0) * _output->info()->element_size());
+ }
+ else
+ {
+ Coordinates input_id{id};
+ input_id.set(lookup_dim, lookup);
+ memcpy(output_it.ptr(), _input->ptr_to_element(input_id),
+ _output->info()->dimension(0) * _output->info()->element_size());
+ }
+ },
+ output_it);
} while (window.slide_window_slice_4D(out_slice));
}
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.cpp
index 49adf1462..4dc0f5535 100644
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.cpp
@@ -63,7 +63,7 @@ void instance_normalization_nchw(ITensor *input, ITensor *output, ITensor *gamma
{
/** NEON vector tag type. */
using ExactTagType =
- typename wrapper::traits::neon_bitvector_tag_t<T, wrapper::traits::BitWidth::W128>;
+ typename wrapper::traits::neon_bitvector_tag_t<T, wrapper::traits::BitWidth::W128>;
// Clear X/Y dimensions on execution window as we handle the planes manually
Window win = window;
@@ -73,107 +73,107 @@ void instance_normalization_nchw(ITensor *input, ITensor *output, ITensor *gamma
constexpr int window_step_x = 16 / sizeof(T);
const unsigned int elements_plane = input->info()->dimension(0) * output->info()->dimension(1);
const auto channel_idx =
- get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::CHANNEL);
+ get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::CHANNEL);
Iterator input_it(input, win);
execute_window_loop(
- win,
- [&](const Coordinates &id) {
- Window win_plane = window;
- win_plane.set(Window::DimX, Window::Dimension(0, 1, 1));
- win_plane.set(Window::DimZ, Window::Dimension(id[2], id[2] + 1, 1));
- win_plane.set(3, Window::Dimension(id[3], id[3] + 1, 1));
-
- Iterator input_plane_it(input, win_plane);
- Iterator output_plane_it(output, win_plane);
-
- auto sum_h_w = static_cast<T>(0.f);
- auto sum_squares_h_w = static_cast<T>(0.f);
-
- execute_window_loop(
- win_plane,
- [&](const Coordinates &) {
- const auto input_ptr = reinterpret_cast<const T *>(input_plane_it.ptr());
-
- auto vec_sum_h_w = wrapper::vdup_n(static_cast<T>(0.f), ExactTagType{});
- auto vec_sum_squares_h_w = wrapper::vdup_n(static_cast<T>(0.f), ExactTagType{});
-
- // Compute S elements per iteration
- int x = window.x().start();
- for (; x <= (window.x().end() - window_step_x); x += window_step_x)
- {
- auto vec_input_val = wrapper::vloadq(input_ptr + x);
- vec_sum_h_w = wrapper::vadd(vec_sum_h_w, vec_input_val);
- vec_sum_squares_h_w =
- wrapper::vadd(vec_sum_squares_h_w, wrapper::vmul(vec_input_val, vec_input_val));
- }
-
- auto vec2_sum_h_w =
- wrapper::vpadd(wrapper::vgethigh(vec_sum_h_w), wrapper::vgetlow(vec_sum_h_w));
- auto vec2_sum_squares_h_w = wrapper::vpadd(wrapper::vgethigh(vec_sum_squares_h_w),
- wrapper::vgetlow(vec_sum_squares_h_w));
- for (int i = 0; i < window_step_x / 4; ++i)
- {
- vec2_sum_h_w = wrapper::vpadd(vec2_sum_h_w, vec2_sum_h_w);
- vec2_sum_squares_h_w = wrapper::vpadd(vec2_sum_squares_h_w, vec2_sum_squares_h_w);
- }
- sum_h_w += wrapper::vgetlane(vec2_sum_h_w, 0);
- sum_squares_h_w += wrapper::vgetlane(vec2_sum_squares_h_w, 0);
-
- // Compute left-over elements
- for (; x < window.x().end(); ++x)
- {
- const auto value = *(input_ptr + x);
- sum_h_w += value;
- sum_squares_h_w += value * value;
- }
- },
- input_plane_it, output_plane_it);
-
- const auto mean_h_w = sum_h_w / elements_plane;
- const auto var_h_w = sum_squares_h_w / elements_plane - mean_h_w * mean_h_w;
-
- auto gamma_val = 1.0f;
- if (gamma != nullptr)
- {
- gamma_val = *reinterpret_cast<T *>(gamma->ptr_to_element({id[channel_idx]}));
- }
- const auto multip_h_w = gamma_val / std::sqrt(var_h_w + epsilon);
- const auto vec_mean_h_w = wrapper::vdup_n(static_cast<T>(mean_h_w), ExactTagType{});
- const auto vec_multip_h_w = wrapper::vdup_n(static_cast<T>(multip_h_w), ExactTagType{});
- auto beta_val = 0.0f;
- if (beta != nullptr)
- {
- beta_val = *reinterpret_cast<T *>(beta->ptr_to_element({id[channel_idx]}));
- }
- const auto vec_beta = wrapper::vdup_n(static_cast<T>(beta_val), ExactTagType{});
-
- execute_window_loop(
- win_plane,
- [&](const Coordinates &) {
- auto input_ptr = reinterpret_cast<T *>(input_plane_it.ptr());
- auto output_ptr = reinterpret_cast<T *>(output_plane_it.ptr());
-
- // Compute S elements per iteration
- int x = window.x().start();
- auto vec_val = wrapper::vdup_n(static_cast<T>(0.0f), ExactTagType{});
- for (; x <= (window.x().end() - window_step_x); x += window_step_x)
- {
- vec_val = wrapper::vloadq(input_ptr + x);
- vec_val = wrapper::vadd(
- wrapper::vmul(wrapper::vsub(vec_val, vec_mean_h_w), vec_multip_h_w), vec_beta);
- wrapper::vstore(output_ptr + x, vec_val);
- }
-
- // Compute left-over elements
- for (; x < window.x().end(); ++x)
- {
- *(output_ptr + x) = ((*(input_ptr + x)) - mean_h_w) * multip_h_w + beta_val;
- }
- },
- input_plane_it, output_plane_it);
- },
- input_it);
+ win,
+ [&](const Coordinates &id) {
+ Window win_plane = window;
+ win_plane.set(Window::DimX, Window::Dimension(0, 1, 1));
+ win_plane.set(Window::DimZ, Window::Dimension(id[2], id[2] + 1, 1));
+ win_plane.set(3, Window::Dimension(id[3], id[3] + 1, 1));
+
+ Iterator input_plane_it(input, win_plane);
+ Iterator output_plane_it(output, win_plane);
+
+ auto sum_h_w = static_cast<T>(0.f);
+ auto sum_squares_h_w = static_cast<T>(0.f);
+
+ execute_window_loop(
+ win_plane,
+ [&](const Coordinates &) {
+ const auto input_ptr = reinterpret_cast<const T *>(input_plane_it.ptr());
+
+ auto vec_sum_h_w = wrapper::vdup_n(static_cast<T>(0.f), ExactTagType{});
+ auto vec_sum_squares_h_w = wrapper::vdup_n(static_cast<T>(0.f), ExactTagType{});
+
+ // Compute S elements per iteration
+ int x = window.x().start();
+ for (; x <= (window.x().end() - window_step_x); x += window_step_x)
+ {
+ auto vec_input_val = wrapper::vloadq(input_ptr + x);
+ vec_sum_h_w = wrapper::vadd(vec_sum_h_w, vec_input_val);
+ vec_sum_squares_h_w =
+ wrapper::vadd(vec_sum_squares_h_w, wrapper::vmul(vec_input_val, vec_input_val));
+ }
+
+ auto vec2_sum_h_w =
+ wrapper::vpadd(wrapper::vgethigh(vec_sum_h_w), wrapper::vgetlow(vec_sum_h_w));
+ auto vec2_sum_squares_h_w = wrapper::vpadd(wrapper::vgethigh(vec_sum_squares_h_w),
+ wrapper::vgetlow(vec_sum_squares_h_w));
+ for (int i = 0; i < window_step_x / 4; ++i)
+ {
+ vec2_sum_h_w = wrapper::vpadd(vec2_sum_h_w, vec2_sum_h_w);
+ vec2_sum_squares_h_w = wrapper::vpadd(vec2_sum_squares_h_w, vec2_sum_squares_h_w);
+ }
+ sum_h_w += wrapper::vgetlane(vec2_sum_h_w, 0);
+ sum_squares_h_w += wrapper::vgetlane(vec2_sum_squares_h_w, 0);
+
+ // Compute left-over elements
+ for (; x < window.x().end(); ++x)
+ {
+ const auto value = *(input_ptr + x);
+ sum_h_w += value;
+ sum_squares_h_w += value * value;
+ }
+ },
+ input_plane_it, output_plane_it);
+
+ const auto mean_h_w = sum_h_w / elements_plane;
+ const auto var_h_w = sum_squares_h_w / elements_plane - mean_h_w * mean_h_w;
+
+ auto gamma_val = 1.0f;
+ if (gamma != nullptr)
+ {
+ gamma_val = *reinterpret_cast<T *>(gamma->ptr_to_element({id[channel_idx]}));
+ }
+ const auto multip_h_w = gamma_val / std::sqrt(var_h_w + epsilon);
+ const auto vec_mean_h_w = wrapper::vdup_n(static_cast<T>(mean_h_w), ExactTagType{});
+ const auto vec_multip_h_w = wrapper::vdup_n(static_cast<T>(multip_h_w), ExactTagType{});
+ auto beta_val = 0.0f;
+ if (beta != nullptr)
+ {
+ beta_val = *reinterpret_cast<T *>(beta->ptr_to_element({id[channel_idx]}));
+ }
+ const auto vec_beta = wrapper::vdup_n(static_cast<T>(beta_val), ExactTagType{});
+
+ execute_window_loop(
+ win_plane,
+ [&](const Coordinates &) {
+ auto input_ptr = reinterpret_cast<T *>(input_plane_it.ptr());
+ auto output_ptr = reinterpret_cast<T *>(output_plane_it.ptr());
+
+ // Compute S elements per iteration
+ int x = window.x().start();
+ auto vec_val = wrapper::vdup_n(static_cast<T>(0.0f), ExactTagType{});
+ for (; x <= (window.x().end() - window_step_x); x += window_step_x)
+ {
+ vec_val = wrapper::vloadq(input_ptr + x);
+ vec_val = wrapper::vadd(
+ wrapper::vmul(wrapper::vsub(vec_val, vec_mean_h_w), vec_multip_h_w), vec_beta);
+ wrapper::vstore(output_ptr + x, vec_val);
+ }
+
+ // Compute left-over elements
+ for (; x < window.x().end(); ++x)
+ {
+ *(output_ptr + x) = ((*(input_ptr + x)) - mean_h_w) * multip_h_w + beta_val;
+ }
+ },
+ input_plane_it, output_plane_it);
+ },
+ input_it);
}
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output,
@@ -199,8 +199,8 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output,
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, gamma);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->dimension(get_data_layout_dimension_index(
- input->data_layout(), DataLayoutDimension::CHANNEL)) !=
- gamma->dimension(0),
+ input->data_layout(), DataLayoutDimension::CHANNEL)) !=
+ gamma->dimension(0),
"Gamma's size must be the same as size of input's channel");
}
@@ -208,8 +208,8 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output,
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, beta);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->dimension(get_data_layout_dimension_index(
- input->data_layout(), DataLayoutDimension::CHANNEL)) !=
- beta->dimension(0),
+ input->data_layout(), DataLayoutDimension::CHANNEL)) !=
+ beta->dimension(0),
"Beta's size must be the same as size of input's channel");
}
@@ -234,8 +234,8 @@ std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITe
} // namespace
NEInstanceNormalizationLayerKernelEx::NEInstanceNormalizationLayerKernelEx()
- : _func(nullptr), _input(nullptr), _output(nullptr), _gamma(nullptr), _beta(nullptr),
- _epsilon(1e-12)
+ : _func(nullptr), _input(nullptr), _output(nullptr), _gamma(nullptr), _beta(nullptr),
+ _epsilon(1e-12)
{
}
@@ -251,7 +251,7 @@ void NEInstanceNormalizationLayerKernelEx::configure(ITensor *input, ITensor *ou
_epsilon = epsilon;
ARM_COMPUTE_ERROR_THROW_ON(
- validate_arguments(_input->info(), _output->info(), gamma->info(), beta->info(), epsilon));
+ validate_arguments(_input->info(), _output->info(), gamma->info(), beta->info(), epsilon));
if (_input->info()->data_type() == DataType::F32)
{
@@ -282,7 +282,7 @@ Status NEInstanceNormalizationLayerKernelEx::validate(const ITensorInfo *input,
{
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, gamma, beta, epsilon));
ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>(validate_and_configure_window(
- input->clone().get(), (output == nullptr ? input->clone().get() : output->clone().get()))));
+ input->clone().get(), (output == nullptr ? input->clone().get() : output->clone().get()))));
return Status{};
}
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEMultiplyScaleFactorKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEMultiplyScaleFactorKernel.cpp
index b92130cec..ad4728175 100644
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEMultiplyScaleFactorKernel.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEMultiplyScaleFactorKernel.cpp
@@ -123,15 +123,17 @@ inline float32x4x4_t multiply_scale_vec(const int32x4x4_t &iv, float scale)
const float32x4_t vscale = vdupq_n_f32(scale);
const float32x4x4_t ret = {{
- vmulq_f32(vcvtq_f32_s32(iv.val[0]), vscale), vmulq_f32(vcvtq_f32_s32(iv.val[1]), vscale),
- vmulq_f32(vcvtq_f32_s32(iv.val[2]), vscale), vmulq_f32(vcvtq_f32_s32(iv.val[3]), vscale),
+ vmulq_f32(vcvtq_f32_s32(iv.val[0]), vscale),
+ vmulq_f32(vcvtq_f32_s32(iv.val[1]), vscale),
+ vmulq_f32(vcvtq_f32_s32(iv.val[2]), vscale),
+ vmulq_f32(vcvtq_f32_s32(iv.val[3]), vscale),
}};
return ret;
}
} // namespace
NEMultiplyScaleFactorKernel::NEMultiplyScaleFactorKernel()
- : _input(nullptr), _scale_factor(nullptr), _output(nullptr), _multiplier(1.f)
+ : _input(nullptr), _scale_factor(nullptr), _output(nullptr), _multiplier(1.f)
{
}
@@ -140,7 +142,7 @@ void NEMultiplyScaleFactorKernel::configure(const ITensor *input, const ITensor
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_ERROR_THROW_ON(
- validate_arguments(input->info(), scale_factor->info(), output->info()));
+ validate_arguments(input->info(), scale_factor->info(), output->info()));
_input = input;
_scale_factor = scale_factor;
@@ -180,25 +182,25 @@ template <typename T> void NEMultiplyScaleFactorKernel::multiply(const Window &w
Iterator output(_output, win_collapsed);
win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
execute_window_loop(
- win_collapsed,
- [&](const Coordinates &id) {
- auto scale = *reinterpret_cast<T *>(_scale_factor->ptr_to_element({id.y()}));
- scale *= _multiplier;
-
- const auto input_ptr = reinterpret_cast<const int32_t *>(input.ptr());
- auto output_ptr = reinterpret_cast<T *>(output.ptr());
- int x = window_start_x;
- for (; x <= (window_end_x - window_step); x += window_step)
- {
- store_result<float>(&output_ptr[x], multiply_scale_vec(load_value(&input_ptr[x]), scale));
- }
- // Compute left-over elements
- for (; x < window_end_x; ++x)
- {
- output_ptr[x] = input_ptr[x] * scale;
- }
- },
- input, output);
+ win_collapsed,
+ [&](const Coordinates &id) {
+ auto scale = *reinterpret_cast<T *>(_scale_factor->ptr_to_element({id.y()}));
+ scale *= _multiplier;
+
+ const auto input_ptr = reinterpret_cast<const int32_t *>(input.ptr());
+ auto output_ptr = reinterpret_cast<T *>(output.ptr());
+ int x = window_start_x;
+ for (; x <= (window_end_x - window_step); x += window_step)
+ {
+ store_result<float>(&output_ptr[x], multiply_scale_vec(load_value(&input_ptr[x]), scale));
+ }
+ // Compute left-over elements
+ for (; x < window_end_x; ++x)
+ {
+ output_ptr[x] = input_ptr[x] * scale;
+ }
+ },
+ input, output);
}
void NEMultiplyScaleFactorKernel::run(const Window &window, const ThreadInfo &info)
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEOneHotKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEOneHotKernel.cpp
index 0a11eb509..0daff5c6a 100644
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEOneHotKernel.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEOneHotKernel.cpp
@@ -101,8 +101,8 @@ bool isOnValue(U index, U depth)
} // namespace
NEOneHotKernel::NEOneHotKernel()
- : _indices{nullptr}, _depth{nullptr}, _on_value{nullptr}, _off_value{nullptr}, _axis{-1},
- _output{nullptr}, _func{}
+ : _indices{nullptr}, _depth{nullptr}, _on_value{nullptr},
+ _off_value{nullptr}, _axis{-1}, _output{nullptr}, _func{}
{
}
@@ -117,22 +117,22 @@ void NEOneHotKernel::onehot_0_axis(const Window &window, const ThreadInfo &info)
Iterator output_it(_output, output_window);
const U off_value = *reinterpret_cast<U *>(_off_value->buffer());
execute_window_loop(
- output_window,
- [&](const Coordinates &id) {
- std::fill_n(output_it.ptr(),
- _output->info()->dimension(0) * _output->info()->element_size(), off_value);
- Coordinates indices_id(id);
- indices_id.remove(0);
- const U new_index = *(reinterpret_cast<U *>(_indices->ptr_to_element(indices_id)));
- if (isOnValue(new_index, *(reinterpret_cast<U *>(_depth->buffer()))))
- {
- Coordinates onehot_id(id);
- onehot_id.set(0, new_index);
- std::copy_n(_on_value->buffer(), _output->info()->element_size(),
- _output->ptr_to_element(onehot_id));
- }
- },
- output_it);
+ output_window,
+ [&](const Coordinates &id) {
+ std::fill_n(output_it.ptr(), _output->info()->dimension(0) * _output->info()->element_size(),
+ off_value);
+ Coordinates indices_id(id);
+ indices_id.remove(0);
+ const U new_index = *(reinterpret_cast<U *>(_indices->ptr_to_element(indices_id)));
+ if (isOnValue(new_index, *(reinterpret_cast<U *>(_depth->buffer()))))
+ {
+ Coordinates onehot_id(id);
+ onehot_id.set(0, new_index);
+ std::copy_n(_on_value->buffer(), _output->info()->element_size(),
+ _output->ptr_to_element(onehot_id));
+ }
+ },
+ output_it);
}
template <typename U>
@@ -142,22 +142,22 @@ inline void NEOneHotKernel::onehot_n_axis(const Window &window, const ThreadInfo
// Validate that the indices are not negative
validate_depth<U>(_depth, _output, _axis);
Iterator output_it(_output, window);
- execute_window_loop(window,
- [&](const Coordinates &id) {
- Coordinates indices_id(id);
- indices_id.remove(_axis);
- const U new_index =
- *(reinterpret_cast<U *>(_indices->ptr_to_element(indices_id)));
- if (isOnValue(new_index, *(reinterpret_cast<U *>(_depth->buffer()))))
- {
- Coordinates onehot_id(id);
- onehot_id.set(_axis, new_index);
- std::copy_n(static_cast<U>(id[_axis]) == new_index ? _on_value->buffer()
- : _off_value->buffer(),
- _output->info()->element_size(), output_it.ptr());
- }
- },
- output_it);
+ execute_window_loop(
+ window,
+ [&](const Coordinates &id) {
+ Coordinates indices_id(id);
+ indices_id.remove(_axis);
+ const U new_index = *(reinterpret_cast<U *>(_indices->ptr_to_element(indices_id)));
+ if (isOnValue(new_index, *(reinterpret_cast<U *>(_depth->buffer()))))
+ {
+ Coordinates onehot_id(id);
+ onehot_id.set(_axis, new_index);
+ std::copy_n(static_cast<U>(id[_axis]) == new_index ? _on_value->buffer()
+ : _off_value->buffer(),
+ _output->info()->element_size(), output_it.ptr());
+ }
+ },
+ output_it);
}
void NEOneHotKernel::configure(const ITensor *indices, const ITensor *depth,
@@ -215,7 +215,7 @@ Status NEOneHotKernel::validate(const ITensorInfo *indices, const ITensorInfo *d
const ITensorInfo *output, int axis)
{
ARM_COMPUTE_RETURN_ON_ERROR(
- validate_arguments(indices, depth, on_value, off_value, output, axis));
+ validate_arguments(indices, depth, on_value, off_value, output, axis));
return Status{};
}
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEQuantizationSymmetricKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEQuantizationSymmetricKernel.cpp
index 5841f1d69..2306228d5 100644
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEQuantizationSymmetricKernel.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEQuantizationSymmetricKernel.cpp
@@ -107,19 +107,15 @@ inline int8x16_t vquantizeSymm(const float32x4x4_t &fv, float scale_factor_inv,
const int32x4x4_t rf = {{
#ifdef __aarch64__
- vminq_s32(vposend,
- vmaxq_s32(vnagend, vcvtnq_s32_f32(round(vmulq_f32(fv.val[0], vinvscale))))),
- vminq_s32(vposend,
- vmaxq_s32(vnagend, vcvtnq_s32_f32(round(vmulq_f32(fv.val[1], vinvscale))))),
- vminq_s32(vposend,
- vmaxq_s32(vnagend, vcvtnq_s32_f32(round(vmulq_f32(fv.val[2], vinvscale))))),
- vminq_s32(vposend,
- vmaxq_s32(vnagend, vcvtnq_s32_f32(round(vmulq_f32(fv.val[3], vinvscale))))),
+ vminq_s32(vposend, vmaxq_s32(vnagend, vcvtnq_s32_f32(round(vmulq_f32(fv.val[0], vinvscale))))),
+ vminq_s32(vposend, vmaxq_s32(vnagend, vcvtnq_s32_f32(round(vmulq_f32(fv.val[1], vinvscale))))),
+ vminq_s32(vposend, vmaxq_s32(vnagend, vcvtnq_s32_f32(round(vmulq_f32(fv.val[2], vinvscale))))),
+ vminq_s32(vposend, vmaxq_s32(vnagend, vcvtnq_s32_f32(round(vmulq_f32(fv.val[3], vinvscale))))),
#else //__aarch64__
- vminq_s32(vposend, vmaxq_s32(vnagend, vcvtq_s32_f32(round(vmulq_f32(fv.val[0], vinvscale))))),
- vminq_s32(vposend, vmaxq_s32(vnagend, vcvtq_s32_f32(round(vmulq_f32(fv.val[1], vinvscale))))),
- vminq_s32(vposend, vmaxq_s32(vnagend, vcvtq_s32_f32(round(vmulq_f32(fv.val[2], vinvscale))))),
- vminq_s32(vposend, vmaxq_s32(vnagend, vcvtq_s32_f32(round(vmulq_f32(fv.val[3], vinvscale))))),
+ vminq_s32(vposend, vmaxq_s32(vnagend, vcvtq_s32_f32(round(vmulq_f32(fv.val[0], vinvscale))))),
+ vminq_s32(vposend, vmaxq_s32(vnagend, vcvtq_s32_f32(round(vmulq_f32(fv.val[1], vinvscale))))),
+ vminq_s32(vposend, vmaxq_s32(vnagend, vcvtq_s32_f32(round(vmulq_f32(fv.val[2], vinvscale))))),
+ vminq_s32(vposend, vmaxq_s32(vnagend, vcvtq_s32_f32(round(vmulq_f32(fv.val[3], vinvscale))))),
#endif //__aarch64__
}};
const int8x8_t pa = vqmovn_s16(vcombine_s16(vqmovn_s32(rf.val[0]), vqmovn_s32(rf.val[1])));
@@ -129,7 +125,7 @@ inline int8x16_t vquantizeSymm(const float32x4x4_t &fv, float scale_factor_inv,
} // namespace
NEQuantizationSymmetricKernel::NEQuantizationSymmetricKernel()
- : _input(nullptr), _output(nullptr), _scale_factor(nullptr)
+ : _input(nullptr), _output(nullptr), _scale_factor(nullptr)
{
}
@@ -138,7 +134,7 @@ void NEQuantizationSymmetricKernel::configure(const ITensor *input, ITensor *out
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_ERROR_THROW_ON(
- validate_arguments(input->info(), output->info(), scale_factor->info()));
+ validate_arguments(input->info(), output->info(), scale_factor->info()));
_input = input;
_output = output;
@@ -182,40 +178,40 @@ template <typename T> void NEQuantizationSymmetricKernel::quantize(const Window
const auto dim_x = _input->info()->dimension(0);
win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
execute_window_loop(
- win_collapsed,
- [&](const Coordinates &id) {
- const auto start = reinterpret_cast<const T *>(input.ptr());
- const auto min_max = std::minmax_element(start, start + dim_x);
- const auto int8_scale = 127;
- auto range = std::max(std::abs(*min_max.first), std::abs(*min_max.second));
- if (range == 0)
- {
- *reinterpret_cast<T *>(_scale_factor->ptr_to_element({id.y()})) = 1;
- range = 1;
- }
- else
- {
- *reinterpret_cast<T *>(_scale_factor->ptr_to_element({id.y()})) = range / int8_scale;
- }
- const auto scale_factor_inv = int8_scale / range;
-
- auto input_ptr = reinterpret_cast<const T *>(input.ptr());
- auto output_ptr = reinterpret_cast<int8_t *>(output.ptr());
- int x = window_start_x;
- for (; x <= (window_end_x - window_step); x += window_step)
- {
- wrapper::vstore(&output_ptr[x],
- vquantizeSymm(load_value(&input_ptr[x]), scale_factor_inv, int8_scale));
- }
- // Compute left-over elements
- for (; x < window_end_x; ++x)
- {
- int quantized = arm_compute::round(input_ptr[x] * scale_factor_inv, rounding_policy);
- quantized = std::min(int8_scale, std::max(quantized, -int8_scale));
- output_ptr[x] = static_cast<int8_t>(quantized);
- }
- },
- input, output);
+ win_collapsed,
+ [&](const Coordinates &id) {
+ const auto start = reinterpret_cast<const T *>(input.ptr());
+ const auto min_max = std::minmax_element(start, start + dim_x);
+ const auto int8_scale = 127;
+ auto range = std::max(std::abs(*min_max.first), std::abs(*min_max.second));
+ if (range == 0)
+ {
+ *reinterpret_cast<T *>(_scale_factor->ptr_to_element({id.y()})) = 1;
+ range = 1;
+ }
+ else
+ {
+ *reinterpret_cast<T *>(_scale_factor->ptr_to_element({id.y()})) = range / int8_scale;
+ }
+ const auto scale_factor_inv = int8_scale / range;
+
+ auto input_ptr = reinterpret_cast<const T *>(input.ptr());
+ auto output_ptr = reinterpret_cast<int8_t *>(output.ptr());
+ int x = window_start_x;
+ for (; x <= (window_end_x - window_step); x += window_step)
+ {
+ wrapper::vstore(&output_ptr[x],
+ vquantizeSymm(load_value(&input_ptr[x]), scale_factor_inv, int8_scale));
+ }
+ // Compute left-over elements
+ for (; x < window_end_x; ++x)
+ {
+ int quantized = arm_compute::round(input_ptr[x] * scale_factor_inv, rounding_policy);
+ quantized = std::min(int8_scale, std::max(quantized, -int8_scale));
+ output_ptr[x] = static_cast<int8_t>(quantized);
+ }
+ },
+ input, output);
}
void NEQuantizationSymmetricKernel::run(const Window &window, const ThreadInfo &info)
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLArgMinMaxLayerEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLArgMinMaxLayerEx.cpp
index 267228eac..b02a48ef2 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLArgMinMaxLayerEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLArgMinMaxLayerEx.cpp
@@ -50,8 +50,8 @@
namespace arm_compute
{
CLArgMinMaxLayerEx::CLArgMinMaxLayerEx(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _results_vector(), _not_reshaped_output(),
- _reduction_kernels_vector(), _reshape_kernel(), _num_of_stages(), _reduction_axis()
+ : _memory_group(std::move(memory_manager)), _results_vector(), _not_reshaped_output(),
+ _reduction_kernels_vector(), _reshape_kernel(), _num_of_stages(), _reduction_axis()
{
}
@@ -60,13 +60,13 @@ Status CLArgMinMaxLayerEx::validate(const ITensorInfo *input, int axis, const IT
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(op != ReductionOperation::ARG_IDX_MAX &&
- op != ReductionOperation::ARG_IDX_MIN,
+ op != ReductionOperation::ARG_IDX_MIN,
"Invalid reduction operation");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= static_cast<int>(TensorShape::num_max_dimensions),
"Reduction axis greater than max number of dimensions");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis > 3, "Unsupported reduction axis");
const unsigned int num_of_stages =
- calculate_number_of_stages_only_x_axis(input->dimension(0), axis);
+ calculate_number_of_stages_only_x_axis(input->dimension(0), axis);
DataType output_data_type = DataType::S32;
TensorInfo not_reshaped_output;
@@ -76,9 +76,9 @@ Status CLArgMinMaxLayerEx::validate(const ITensorInfo *input, int axis, const IT
if (output->total_size() != 0)
{
output_data_type = output->data_type();
- const TensorInfo expected_output_shape = output->clone()->set_tensor_shape(
- arm_compute::misc::shape_calculator::compute_reduced_shape(input->tensor_shape(), axis,
- false));
+ const TensorInfo expected_output_shape =
+ output->clone()->set_tensor_shape(arm_compute::misc::shape_calculator::compute_reduced_shape(
+ input->tensor_shape(), axis, false));
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(&expected_output_shape, output);
}
@@ -87,9 +87,9 @@ Status CLArgMinMaxLayerEx::validate(const ITensorInfo *input, int axis, const IT
auto initialize_tensorinfo = [](TensorInfo &ti, TensorShape shape, DataType data_type,
int num_channels, QuantizationInfo qinfo) {
ti.set_data_type(data_type)
- .set_tensor_shape(shape)
- .set_num_channels(num_channels)
- .set_quantization_info(qinfo);
+ .set_tensor_shape(shape)
+ .set_num_channels(num_channels)
+ .set_quantization_info(qinfo);
};
initialize_tensorinfo(not_reshaped_output, shape_before_reshape, output_data_type,
@@ -98,7 +98,7 @@ Status CLArgMinMaxLayerEx::validate(const ITensorInfo *input, int axis, const IT
if (num_of_stages == 1)
{
ARM_COMPUTE_RETURN_ON_ERROR(
- CLArgMinMaxLayerKernelEx::validate(input, nullptr, &not_reshaped_output, axis, op));
+ CLArgMinMaxLayerKernelEx::validate(input, nullptr, &not_reshaped_output, axis, op));
}
else
{
@@ -118,19 +118,19 @@ Status CLArgMinMaxLayerEx::validate(const ITensorInfo *input, int axis, const IT
// Validate ReductionOperation only on first kernel
ARM_COMPUTE_RETURN_ON_ERROR(
- CLArgMinMaxLayerKernelEx::validate(input, nullptr, &sums_vector[0], axis, op));
+ CLArgMinMaxLayerKernelEx::validate(input, nullptr, &sums_vector[0], axis, op));
// Validate ReductionOperation on intermediate stages
for (unsigned int i = 1; i < num_of_stages - 1; ++i)
{
- ARM_COMPUTE_RETURN_ON_ERROR(CLArgMinMaxLayerKernelEx::validate(input, &sums_vector[i - 1],
- &sums_vector[i], axis, op));
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ CLArgMinMaxLayerKernelEx::validate(input, &sums_vector[i - 1], &sums_vector[i], axis, op));
}
// Validate ReductionOperation on the last stage
const unsigned int last_stage = num_of_stages - 1;
ARM_COMPUTE_RETURN_ON_ERROR(CLArgMinMaxLayerKernelEx::validate(
- input, &sums_vector[last_stage - 1], &not_reshaped_output, axis, op));
+ input, &sums_vector[last_stage - 1], &not_reshaped_output, axis, op));
}
ARM_COMPUTE_RETURN_ON_ERROR(CLReshapeLayerKernel::validate(&not_reshaped_output, output));
return Status{};
@@ -144,16 +144,16 @@ void CLArgMinMaxLayerEx::configure(const ICLTensor *input, int axis, ICLTensor *
_reduction_axis = axis;
const TensorShape output_shape = arm_compute::misc::shape_calculator::compute_reduced_shape(
- input->info()->tensor_shape(), axis, false);
+ input->info()->tensor_shape(), axis, false);
DataType output_data_type = (output->info()->data_type() == DataType::UNKNOWN)
- ? DataType::S32
- : output->info()->data_type();
+ ? DataType::S32
+ : output->info()->data_type();
auto_init_if_empty(*output->info(), input->info()
- ->clone()
- ->set_tensor_shape(output_shape)
- .set_data_type(output_data_type)
- .reset_padding()
- .set_is_resizable(true));
+ ->clone()
+ ->set_tensor_shape(output_shape)
+ .set_data_type(output_data_type)
+ .reset_padding()
+ .set_is_resizable(true));
// Configure reduction operation kernels
_reduction_kernels_vector.resize(_num_of_stages);
@@ -166,11 +166,11 @@ void CLArgMinMaxLayerEx::configure(const ICLTensor *input, int axis, ICLTensor *
TensorShape output_shape{input->info()->tensor_shape()};
output_shape.set(axis, 1);
auto_init_if_empty(*_not_reshaped_output.info(), input->info()
- ->clone()
- ->set_tensor_shape(output_shape)
- .set_data_type(output_data_type)
- .reset_padding()
- .set_is_resizable(true));
+ ->clone()
+ ->set_tensor_shape(output_shape)
+ .set_data_type(output_data_type)
+ .reset_padding()
+ .set_is_resizable(true));
_not_reshaped_output.info()->set_tensor_shape(output_shape);
_reduction_kernels_vector[0].configure(input, nullptr, &_not_reshaped_output, axis, op);
}
@@ -182,7 +182,7 @@ void CLArgMinMaxLayerEx::configure(const ICLTensor *input, int axis, ICLTensor *
{
shape.set(0, ceil(shape.x() / 128.f));
_results_vector[i].allocator()->init(
- input->info()->clone()->set_tensor_shape(shape).set_data_type(output_data_type));
+ input->info()->clone()->set_tensor_shape(shape).set_data_type(output_data_type));
}
// Apply ReductionOperation only on first kernel
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLDirectTransposeConvLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLDirectTransposeConvLayer.cpp
index 3dede0562..6359b4bcb 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLDirectTransposeConvLayer.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLDirectTransposeConvLayer.cpp
@@ -53,16 +53,10 @@ namespace arm_compute
using namespace arm_compute::misc::shape_calculator;
CLDirectTransposeConvLayer::CLDirectTransposeConvLayer(
- std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
- : _memory_group(std::move(memory_manager)),
- _scale_f(),
- _conv_f(),
- _flip_weights(),
- _scaled_output(),
- _original_weights(nullptr),
- _weights_flipped(),
- _flip_axis(),
- _is_prepared(false)
+ std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
+ : _memory_group(std::move(memory_manager)), _scale_f(), _conv_f(), _flip_weights(),
+ _scaled_output(), _original_weights(nullptr), _weights_flipped(), _flip_axis(),
+ _is_prepared(false)
{
}
@@ -74,7 +68,7 @@ Status CLDirectTransposeConvLayer::validate(const ITensorInfo *input, const ITen
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(
- input, 1, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::F16, DataType::F32);
+ input, 1, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, weights);
const DataLayout data_layout = input->data_layout();
@@ -86,8 +80,8 @@ Status CLDirectTransposeConvLayer::validate(const ITensorInfo *input, const ITen
ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) < 1);
auto out_dims = transposeconv_output_dimensions(
- input->dimension(idx_w), input->dimension(idx_h), weights->dimension(idx_w),
- weights->dimension(idx_h), info, invalid_right, invalid_bottom);
+ input->dimension(idx_w), input->dimension(idx_h), weights->dimension(idx_w),
+ weights->dimension(idx_h), info, invalid_right, invalid_bottom);
const TensorShape output_shape = compute_transposeconv_output_shape(out_dims, *input, *weights);
@@ -117,19 +111,19 @@ Status CLDirectTransposeConvLayer::validate(const ITensorInfo *input, const ITen
unsigned int pad_right = 0;
unsigned int pad_top = 0;
unsigned int pad_bottom = 0;
- const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape(
- *input, *weights, info, out_dims, invalid_right, invalid_bottom, pad_left, pad_right, pad_top,
- pad_bottom);
+ const TensorShape scale_out_shape =
+ compute_transposeconv_upsampled_shape(*input, *weights, info, out_dims, invalid_right,
+ invalid_bottom, pad_left, pad_right, pad_top, pad_bottom);
TensorInfo scale_out_info(input->clone()
- ->set_is_resizable(true)
- .reset_padding()
- .set_tensor_shape(scale_out_shape)
- .set_data_layout(data_layout));
+ ->set_is_resizable(true)
+ .reset_padding()
+ .set_tensor_shape(scale_out_shape)
+ .set_data_layout(data_layout));
const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
ARM_COMPUTE_RETURN_ON_ERROR(CLDeconvolutionLayerUpsample::validate(input, &scale_out_info, info));
- ARM_COMPUTE_RETURN_ON_ERROR(CLConvolutionLayer::validate(&scale_out_info, weights, bias, output,
- conv_info, weights_info));
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ CLConvolutionLayer::validate(&scale_out_info, weights, bias, output, conv_info, weights_info));
return Status{};
}
@@ -171,22 +165,22 @@ void CLDirectTransposeConvLayer::configure(const CLCompileContext &compile_conte
_flip_weights.configure(compile_context, weights, &_weights_flipped, &_flip_axis);
auto out_dims = transposeconv_output_dimensions(
- input->info()->dimension(idx_w), input->info()->dimension(idx_h),
- weights->info()->dimension(idx_w), weights->info()->dimension(idx_h), info, invalid_right,
- invalid_bottom);
+ input->info()->dimension(idx_w), input->info()->dimension(idx_h),
+ weights->info()->dimension(idx_w), weights->info()->dimension(idx_h), info, invalid_right,
+ invalid_bottom);
const TensorShape output_shape =
- compute_transposeconv_output_shape(out_dims, *input->info(), *weights->info());
+ compute_transposeconv_output_shape(out_dims, *input->info(), *weights->info());
// Output auto initialization if not yet initialized
auto_init_if_empty(
- *output->info(),
- input->info()->clone()->set_tensor_shape(output_shape).set_data_layout(data_layout));
+ *output->info(),
+ input->info()->clone()->set_tensor_shape(output_shape).set_data_layout(data_layout));
// Perform validation step
ARM_COMPUTE_ERROR_THROW_ON(CLDirectTransposeConvLayer::validate(
- input->info(), weights->info(), bias == nullptr ? nullptr : bias->info(), output->info(),
- info, invalid_right, invalid_bottom));
+ input->info(), weights->info(), bias == nullptr ? nullptr : bias->info(), output->info(), info,
+ invalid_right, invalid_bottom));
_is_prepared = weights_info.retain_internal_weights();
@@ -195,8 +189,8 @@ void CLDirectTransposeConvLayer::configure(const CLCompileContext &compile_conte
// Find the upsampled dimensions and the padding needed for the convolution with stride 1 in order
// to match output shape
const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape(
- *input->info(), *weights->info(), info, out_dims, invalid_right, invalid_bottom, pad_left,
- pad_right, pad_top, pad_bottom);
+ *input->info(), *weights->info(), info, out_dims, invalid_right, invalid_bottom, pad_left,
+ pad_right, pad_top, pad_bottom);
TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(),
input->info()->quantization_info());
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp
index 01989461e..79d0929a9 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp
@@ -60,7 +60,7 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I
ARM_COMPUTE_UNUSED(weights);
ARM_COMPUTE_UNUSED(output);
ARM_COMPUTE_RETURN_ON_ERROR(
- CLGEMMLowpMatrixMultiplyCore::validate(&input, &weights, nullptr, &output));
+ CLGEMMLowpMatrixMultiplyCore::validate(&input, &weights, nullptr, &output));
return Status{};
}
@@ -80,12 +80,12 @@ Status CLFullyConnectedHybridLayerReshapeWeights::validate(const ITensorInfo *in
}
CLFullyConnectedHybridLayer::CLFullyConnectedHybridLayer(
- std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(memory_manager), _reshape_weights_kernel(), _quant_input_kernel(),
- _mm_gemmlowp(memory_manager), _multiply_scale_kernel(), _accumulate_biases_kernel(),
- _reshape_weights_output(), _quantized_input(), _scale_factor(), _gemmlowp_output(),
- _are_weights_reshaped(true), _accumulate_biases(false), _is_prepared(false),
- _original_weights(nullptr)
+ std::shared_ptr<IMemoryManager> memory_manager)
+ : _memory_group(memory_manager), _reshape_weights_kernel(), _quant_input_kernel(),
+ _mm_gemmlowp(memory_manager), _multiply_scale_kernel(), _accumulate_biases_kernel(),
+ _reshape_weights_output(), _quantized_input(), _scale_factor(), _gemmlowp_output(),
+ _are_weights_reshaped(true), _accumulate_biases(false), _is_prepared(false),
+ _original_weights(nullptr)
{
}
void CLFullyConnectedHybridLayer::configure_mm(const ICLTensor *input, const ICLTensor *weights,
@@ -107,8 +107,8 @@ void CLFullyConnectedHybridLayer::configure(const ICLTensor *input, const ICLTen
// Perform validate step
ARM_COMPUTE_ERROR_THROW_ON(CLFullyConnectedHybridLayer::validate(
- input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(),
- fc_info));
+ input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(),
+ fc_info));
_are_weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
_accumulate_biases = false;
@@ -140,10 +140,10 @@ void CLFullyConnectedHybridLayer::configure(const ICLTensor *input, const ICLTen
bool is_fc_after_conv = false;
if (is_batched_fc_layer)
{
- is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) &&
- (std::equal(input->info()->tensor_shape().cbegin() + 3,
- input->info()->tensor_shape().cend(),
- output->info()->tensor_shape().cbegin() + 1));
+ is_fc_after_conv =
+ (TensorShape::num_max_dimensions >= 4) &&
+ (std::equal(input->info()->tensor_shape().cbegin() + 3, input->info()->tensor_shape().cend(),
+ output->info()->tensor_shape().cbegin() + 1));
}
else
{
@@ -158,28 +158,28 @@ void CLFullyConnectedHybridLayer::configure(const ICLTensor *input, const ICLTen
{
// Reshape the weights
_reshape_weights_output.allocator()->init(
- weights->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
- compute_transposed_shape(*weights->info())));
+ weights->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
+ compute_transposed_shape(*weights->info())));
_reshape_weights_kernel.configure(weights_to_use, &_reshape_weights_output);
weights_to_use = &_reshape_weights_output;
}
// Extract scale factor
_scale_factor.allocator()->init(
- TensorInfo(TensorShape{output->info()->dimension(1)}, 1, input->info()->data_type()));
+ TensorInfo(TensorShape{output->info()->dimension(1)}, 1, input->info()->data_type()));
_memory_group.manage(&_scale_factor);
_scale_factor_kernel.configure(input, &_scale_factor);
// Quantize input
_quantized_input.allocator()->init(
- input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(
- DataType::QASYMM8_SIGNED));
+ input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(
+ DataType::QASYMM8_SIGNED));
_memory_group.manage(&_quantized_input);
_quant_input_kernel.configure(input, &_scale_factor, &_quantized_input);
// GEMMLowp
_gemmlowp_output.allocator()->init(
- output->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
+ output->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
_memory_group.manage(&_gemmlowp_output);
configure_mm(&_quantized_input, weights_to_use, &_gemmlowp_output,
fc_info.retain_internal_weights);
@@ -209,15 +209,15 @@ Status CLFullyConnectedHybridLayer::validate(const ITensorInfo *input, const ITe
const GPUTarget gpu_target = CLScheduler::get().target();
const ITensorInfo &reshaped_weights =
- TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
- compute_transposed_shape(*weights)));
+ TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
+ compute_transposed_shape(*weights)));
// Configure accumulate biases kernel for non quantized asymmetric types
if (biases != nullptr)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases);
ARM_COMPUTE_RETURN_ON_ERROR(
- CLGEMMMatrixAccumulateBiasesKernel::validate(output, biases, gpu_target));
+ CLGEMMMatrixAccumulateBiasesKernel::validate(output, biases, gpu_target));
}
// With the Fully Connected layer we can have 4 different cases:
@@ -247,33 +247,32 @@ Status CLFullyConnectedHybridLayer::validate(const ITensorInfo *input, const ITe
{
// Validate reshape weights kernel
ARM_COMPUTE_RETURN_ON_ERROR(
- CLFullyConnectedHybridLayerReshapeWeights::validate(weights_to_use, &reshaped_weights));
+ CLFullyConnectedHybridLayerReshapeWeights::validate(weights_to_use, &reshaped_weights));
weights_to_use = &reshaped_weights;
}
// Validate Scale factor kernel
const ITensorInfo &scale_factor =
- TensorInfo(TensorShape{output->dimension(1)}, 1, input->data_type());
+ TensorInfo(TensorShape{output->dimension(1)}, 1, input->data_type());
ARM_COMPUTE_RETURN_ON_ERROR(CLScaleFactorSymm8Kernel::validate(input, &scale_factor));
// Validate quantization symm8 kernel
- const ITensorInfo &quantized_input =
- TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_data_type(
- DataType::QASYMM8_SIGNED));
+ const ITensorInfo &quantized_input = TensorInfo(
+ input->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::QASYMM8_SIGNED));
ARM_COMPUTE_RETURN_ON_ERROR(
- CLQuantizationSymmetricKernel::validate(input, &scale_factor, &quantized_input));
+ CLQuantizationSymmetricKernel::validate(input, &scale_factor, &quantized_input));
// Fully Connected layer after a Fully Connected Layer without batches
ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) != weights_to_use->dimension(1));
// Validate matrix multiply kernel
const ITensorInfo &gemmlowp_output = TensorInfo(
- output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
+ output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(quantized_input, *weights_to_use, gemmlowp_output));
// Multiply scale
ARM_COMPUTE_RETURN_ON_ERROR(
- CLMultiplyScaleFactorKernel::validate(&gemmlowp_output, &scale_factor, output));
+ CLMultiplyScaleFactorKernel::validate(&gemmlowp_output, &scale_factor, output));
return Status{};
}
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp
index 2ff4b9659..13d3acbac 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp
@@ -79,7 +79,7 @@ Status construct_gemmlowp_output_stage(const ITensorInfo &input, const ITensorIn
int output_multiplier = 0;
int output_shift = 0;
ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier_less_than_one(
- multiplier, &output_multiplier, &output_shift));
+ multiplier, &output_multiplier, &output_shift));
// Set the GEMMLowp output stage info
gemmlowp_output_stage.gemmlowp_offset = output_quant_info.offset;
@@ -99,7 +99,7 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I
{
GEMMLowpOutputStageInfo gemmlowp_output_stage;
ARM_COMPUTE_RETURN_ON_ERROR(
- construct_gemmlowp_output_stage(input, weights, output, gemmlowp_output_stage));
+ construct_gemmlowp_output_stage(input, weights, output, gemmlowp_output_stage));
const GEMMInfo &gemm_info = GEMMInfo(false, // is_a_reshaped
false, // is_b_reshaped
@@ -125,14 +125,14 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I
// Validate gemmlowp function
ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixMultiplyCore::validate(
- &input.clone()->set_quantization_info(input_quantization_info),
- &weights.clone()->set_quantization_info(weights_quantization_info), bias, &output,
- gemm_info));
+ &input.clone()->set_quantization_info(input_quantization_info),
+ &weights.clone()->set_quantization_info(weights_quantization_info), bias, &output,
+ gemm_info));
}
else
{
ARM_COMPUTE_RETURN_ON_ERROR(
- CLGEMM::validate(&input, &weights, bias, &output, 1.f, 1.f, gemm_info));
+ CLGEMM::validate(&input, &weights, bias, &output, 1.f, 1.f, gemm_info));
}
return Status{};
@@ -154,12 +154,12 @@ Status CLFullyConnectedLayerReshapeWeightsEx::validate(const ITensorInfo *input,
CLFullyConnectedLayerEx::CLFullyConnectedLayerEx(std::shared_ptr<IMemoryManager> memory_manager,
IWeightsManager *weights_manager)
- : _memory_group(memory_manager), _weights_manager(weights_manager), _convert_weights(),
- _convert_weights_managed(), _reshape_weights_managed_function(), _flatten_layer(),
- _reshape_weights_function(), _mm_gemm(memory_manager, weights_manager),
- _mm_gemmlowp(memory_manager), _flatten_output(), _converted_weights_output(),
- _reshape_weights_output(), _are_weights_converted(true), _are_weights_reshaped(true),
- _is_fc_after_conv(true), _is_quantized(false), _is_prepared(false), _original_weights(nullptr)
+ : _memory_group(memory_manager), _weights_manager(weights_manager), _convert_weights(),
+ _convert_weights_managed(), _reshape_weights_managed_function(), _flatten_layer(),
+ _reshape_weights_function(), _mm_gemm(memory_manager, weights_manager),
+ _mm_gemmlowp(memory_manager), _flatten_output(), _converted_weights_output(),
+ _reshape_weights_output(), _are_weights_converted(true), _are_weights_reshaped(true),
+ _is_fc_after_conv(true), _is_quantized(false), _is_prepared(false), _original_weights(nullptr)
{
}
void CLFullyConnectedLayerEx::configure_mm(const ICLTensor *input, const ICLTensor *weights,
@@ -190,9 +190,9 @@ void CLFullyConnectedLayerEx::configure_mm(const ICLTensor *input, const ICLTens
const QuantizationInfo weights_quantization_info = weights->info()->quantization_info();
input->info()->set_quantization_info(QuantizationInfo(
- input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
+ input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
weights->info()->set_quantization_info(QuantizationInfo(
- weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
+ weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
// Configure gemmlowp function
_mm_gemmlowp.configure(input, weights, bias, output, gemm_info);
@@ -214,8 +214,8 @@ void CLFullyConnectedLayerEx::configure_conv_fc(const ICLTensor *input, const IC
const FullyConnectedLayerInfo &fc_info)
{
ARM_COMPUTE_ERROR_ON(
- (weights->info()->dimension(1) !=
- (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2))));
+ (weights->info()->dimension(1) !=
+ (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2))));
// If the fully connected layer is called after a convolution layer, the input tensor must be
// linearized
@@ -223,11 +223,11 @@ void CLFullyConnectedLayerEx::configure_conv_fc(const ICLTensor *input, const IC
// Initialize output tensor for flatten
TensorShape shape_flatten = compute_flatten_shape(input->info());
_flatten_output.allocator()->init(input->info()
- ->clone()
- ->set_is_resizable(true)
- .reset_padding()
- .set_tensor_shape(shape_flatten)
- .set_data_layout(DataLayout::NCHW));
+ ->clone()
+ ->set_is_resizable(true)
+ .reset_padding()
+ .set_tensor_shape(shape_flatten)
+ .set_data_layout(DataLayout::NCHW));
// Configure flatten kernel
_memory_group.manage(&_flatten_output);
@@ -258,8 +258,8 @@ void CLFullyConnectedLayerEx::configure(const ICLTensor *input, const ICLTensor
// Perform validate step
ARM_COMPUTE_ERROR_THROW_ON(CLFullyConnectedLayerEx::validate(
- input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(),
- fc_info));
+ input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(),
+ fc_info));
_are_weights_converted = true;
_are_weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
@@ -285,10 +285,10 @@ void CLFullyConnectedLayerEx::configure(const ICLTensor *input, const ICLTensor
const bool is_batched_fc_layer = output->info()->dimension(1) > 1;
if (is_batched_fc_layer)
{
- _is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) &&
- (std::equal(input->info()->tensor_shape().cbegin() + 3,
- input->info()->tensor_shape().cend(),
- output->info()->tensor_shape().cbegin() + 1));
+ _is_fc_after_conv =
+ (TensorShape::num_max_dimensions >= 4) &&
+ (std::equal(input->info()->tensor_shape().cbegin() + 3, input->info()->tensor_shape().cend(),
+ output->info()->tensor_shape().cbegin() + 1));
}
else
{
@@ -302,7 +302,7 @@ void CLFullyConnectedLayerEx::configure(const ICLTensor *input, const ICLTensor
{
_reshape_weights_managed_function.configure(weights);
weights_to_use = utils::cast::polymorphic_downcast<ICLTensor *>(
- _weights_manager->acquire(weights, &_reshape_weights_managed_function));
+ _weights_manager->acquire(weights, &_reshape_weights_managed_function));
}
else
{
@@ -320,7 +320,7 @@ void CLFullyConnectedLayerEx::configure(const ICLTensor *input, const ICLTensor
_convert_weights_managed.configure(weights_to_use, input->info()->tensor_shape(),
fc_info.weights_trained_layout);
weights_to_use = utils::cast::polymorphic_downcast<ICLTensor *>(
- _weights_manager->acquire(weights, &_convert_weights_managed));
+ _weights_manager->acquire(weights, &_convert_weights_managed));
}
else
{
@@ -359,16 +359,16 @@ Status CLFullyConnectedLayerEx::validate(const ITensorInfo *input, const ITensor
bool is_fc_after_conv = true;
const ITensorInfo &flatten_input = TensorInfo(input->clone()
- ->set_is_resizable(true)
- .reset_padding()
- .set_tensor_shape(compute_flatten_shape(input))
- .set_data_layout(DataLayout::NCHW));
+ ->set_is_resizable(true)
+ .reset_padding()
+ .set_tensor_shape(compute_flatten_shape(input))
+ .set_data_layout(DataLayout::NCHW));
const ITensorInfo &reshaped_weights =
- TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
- compute_transposed_shape(*weights)));
+ TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
+ compute_transposed_shape(*weights)));
const ITensorInfo &converted_weights =
- weights_reshaped ? TensorInfo(weights->clone()->set_is_resizable(true).reset_padding())
- : TensorInfo(*reshaped_weights.clone());
+ weights_reshaped ? TensorInfo(weights->clone()->set_is_resizable(true).reset_padding())
+ : TensorInfo(*reshaped_weights.clone());
// With the Fully Connected layer we can have 4 different cases:
// 1) Convolution layer -> Fully Connected layer without batches
@@ -396,7 +396,7 @@ Status CLFullyConnectedLayerEx::validate(const ITensorInfo *input, const ITensor
{
// Validate reshape weights kernel
ARM_COMPUTE_RETURN_ON_ERROR(
- CLFullyConnectedLayerReshapeWeightsEx::validate(weights, &reshaped_weights));
+ CLFullyConnectedLayerReshapeWeightsEx::validate(weights, &reshaped_weights));
weights_to_use = &reshaped_weights;
}
@@ -404,7 +404,7 @@ Status CLFullyConnectedLayerEx::validate(const ITensorInfo *input, const ITensor
{
// Validate convert weights kernel
ARM_COMPUTE_RETURN_ON_ERROR(CLConvertFullyConnectedWeights::validate(
- weights_to_use, &converted_weights, input->tensor_shape(), fc_info.weights_trained_layout));
+ weights_to_use, &converted_weights, input->tensor_shape(), fc_info.weights_trained_layout));
weights_to_use = &converted_weights;
}
@@ -412,8 +412,8 @@ Status CLFullyConnectedLayerEx::validate(const ITensorInfo *input, const ITensor
{
// Fully Connected layer after a Convolution Layer without batches
ARM_COMPUTE_RETURN_ERROR_ON(
- (weights_to_use->dimension(1) !=
- (input->dimension(0) * input->dimension(1) * input->dimension(2))));
+ (weights_to_use->dimension(1) !=
+ (input->dimension(0) * input->dimension(1) * input->dimension(2))));
// Validate flatten kernel
ARM_COMPUTE_RETURN_ON_ERROR(CLFlattenLayer::validate(input, &flatten_input));
@@ -427,7 +427,7 @@ Status CLFullyConnectedLayerEx::validate(const ITensorInfo *input, const ITensor
// Validate matrix multiply kernel
ARM_COMPUTE_RETURN_ON_ERROR(
- validate_mm(*input_to_use, *weights_to_use, biases, *output, fc_info));
+ validate_mm(*input_to_use, *weights_to_use, biases, *output, fc_info));
return Status{};
}
@@ -457,7 +457,7 @@ void CLFullyConnectedLayerEx::run()
if (_weights_manager && _weights_manager->are_weights_managed(cur_weights))
{
_original_weights = utils::cast::polymorphic_downcast<ICLTensor *>(
- _weights_manager->run(cur_weights, &_reshape_weights_managed_function));
+ _weights_manager->run(cur_weights, &_reshape_weights_managed_function));
}
else
{
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp
index 157b4d977..ac6982e6f 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp
@@ -41,7 +41,7 @@ void CLFullyConnectedReshapingLayer::configure(const arm_compute::ICLTensor *inp
// reshape
auto_init_if_empty(*_cl_buffer.info(),
_input->info()->clone()->set_tensor_shape(reshape).set_data_layout(
- _input->info()->data_layout()));
+ _input->info()->data_layout()));
_cl_reshape.configure(_input, &_cl_buffer);
input_to_use = &_cl_buffer;
}
@@ -57,7 +57,7 @@ void CLFullyConnectedReshapingLayer::configure(const arm_compute::ICLTensor *inp
{
bool is_hybrid = (input->info()->data_type() == DataType::F32 ||
input->info()->data_type() == DataType::F16) &&
- (weights->info()->data_type() == DataType::S8 ||
+ (weights->info()->data_type() == DataType::QSYMM8 ||
weights->info()->data_type() == DataType::QASYMM8_SIGNED);
if (is_hybrid)
@@ -81,7 +81,6 @@ void CLFullyConnectedReshapingLayer::configure(const arm_compute::ICLTensor *inp
{
throw std::runtime_error("CLFullyConnectedReshapingLayer: Unsupported kernel type");
}
-
}();
if (_needs_reshape)
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp
index 02ee4ad8a..c246041bb 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp
@@ -46,8 +46,8 @@
using namespace arm_compute;
CLReduceOperation::CLReduceOperation(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _input(nullptr), _output(nullptr), _axis(),
- _keep_dims(false), _interm_tensors(), _reduce_kernels(), _reshape()
+ : _memory_group(std::move(memory_manager)), _input(nullptr), _output(nullptr), _axis(),
+ _keep_dims(false), _interm_tensors(), _reduce_kernels(), _reshape()
{
}
@@ -91,13 +91,13 @@ Status CLReduceOperation::validate(const ITensorInfo *input, const ITensorInfo *
for (size_t i = 0; i < num_of_kernels; ++i, ++it)
{
ARM_COMPUTE_RETURN_ON_ERROR(
- CLReduceOperationKernel::validate(tensors[i], tensors[i + 1], *it, op));
+ CLReduceOperationKernel::validate(tensors[i], tensors[i + 1], *it, op));
}
if (!keep_dims)
{
ARM_COMPUTE_RETURN_ON_ERROR(
- CLReshapeLayer::validate(&interm_tensors[num_of_interm_tensors - 1], output));
+ CLReshapeLayer::validate(&interm_tensors[num_of_interm_tensors - 1], output));
}
return Status{};
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLSplitVEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLSplitVEx.cpp
index a502f032e..12c0aa829 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLSplitVEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLSplitVEx.cpp
@@ -134,8 +134,8 @@ void configure_slices(const ICLTensor *input, const std::vector<ICLTensor *> &ou
// Output auto inizialitation if not yet initialized
TensorInfo tmp_output_info = *output->info()->clone();
auto_init_if_empty(
- tmp_output_info,
- input->info()->clone()->set_is_resizable(true).set_tensor_shape(output_shape));
+ tmp_output_info,
+ input->info()->clone()->set_is_resizable(true).set_tensor_shape(output_shape));
// Update coordinate on axis
start_coords.set(split_dim, axis_offset);
@@ -153,7 +153,7 @@ void configure_slices(const ICLTensor *input, const std::vector<ICLTensor *> &ou
} // namespace
CLSplitVEx::CLSplitVEx()
- : _input(nullptr), _size_splits(nullptr), _outputs(), _num_splits(0), _slice_functions()
+ : _input(nullptr), _size_splits(nullptr), _outputs(), _num_splits(0), _slice_functions()
{
}
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp
index 3ac95a8e6..accd51302 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp
@@ -49,14 +49,14 @@ namespace arm_compute
{
CLTopKV2::CLTopKV2()
- : _k(0), _total_bits(0), _bits(0), _radix(0), _hist_buf_size(0), _glob_sum_buf_size(0), _n(0),
- _input(nullptr), _values(nullptr), _indices(nullptr), _qs_idx_buf(), _qs_temp_buf(),
- _hist_buf(), _glob_sum_buf(), _temp_buf(), _first_negative_idx_buf(), _in_key_buf(),
- _out_key_buf(), _in_ind_buf(), _out_ind_buf(), _p_in_key_buf(nullptr),
- _p_out_key_buf(nullptr), _p_in_ind_buf(nullptr), _p_out_ind_buf(nullptr) /*, _qs_kernel(),
- _init_kernel(), _hist_kernel(), _scan_hist_kernel(), _glob_scan_hist_kernel(),
- _paste_hist_kernel(), _reorder_kernel(), _find_first_negative_kernel(),
- _reorder_negatives_kernel(), _store_kernel()*/
+ : _k(0), _total_bits(0), _bits(0), _radix(0), _hist_buf_size(0), _glob_sum_buf_size(0), _n(0),
+ _input(nullptr), _values(nullptr), _indices(nullptr), _qs_idx_buf(), _qs_temp_buf(),
+ _hist_buf(), _glob_sum_buf(), _temp_buf(), _first_negative_idx_buf(), _in_key_buf(),
+ _out_key_buf(), _in_ind_buf(), _out_ind_buf(), _p_in_key_buf(nullptr), _p_out_key_buf(nullptr),
+ _p_in_ind_buf(nullptr), _p_out_ind_buf(nullptr) /*, _qs_kernel(),
+ _init_kernel(), _hist_kernel(), _scan_hist_kernel(), _glob_scan_hist_kernel(),
+ _paste_hist_kernel(), _reorder_kernel(), _find_first_negative_kernel(),
+ _reorder_negatives_kernel(), _store_kernel()*/
{
}
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp
index 3215d01a7..0754fd813 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp
@@ -53,7 +53,7 @@ using namespace arm_compute;
using namespace arm_compute::misc::shape_calculator;
CLTransposeConvLayer::CLTransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_manager(std::move(memory_manager)), _function()
+ : _memory_manager(std::move(memory_manager)), _function()
{
}
@@ -105,20 +105,20 @@ Status CLTransposeConvLayer::validate(const ITensorInfo *input, const ITensorInf
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
switch (CLTransposeConvLayer::get_deconvolution_method(
- input, weights, bias, output, deconv_info, invalid_right, invalid_bottom, weights_info))
+ input, weights, bias, output, deconv_info, invalid_right, invalid_bottom, weights_info))
{
case DeconvolutionMethod::DIRECT:
{
// Validate direct convolution layer
ARM_COMPUTE_RETURN_ON_ERROR(CLDirectTransposeConvLayer::validate(
- input, weights, bias, output, deconv_info, invalid_right, invalid_bottom, weights_info));
+ input, weights, bias, output, deconv_info, invalid_right, invalid_bottom, weights_info));
break;
}
case DeconvolutionMethod::GEMM:
{
// Validate gemm-based convolution layer
ARM_COMPUTE_RETURN_ON_ERROR(
- CLGEMMDeconvolutionLayer::validate(input, weights, bias, output, deconv_info));
+ CLGEMMDeconvolutionLayer::validate(input, weights, bias, output, deconv_info));
break;
}
default:
@@ -130,9 +130,9 @@ Status CLTransposeConvLayer::validate(const ITensorInfo *input, const ITensorInf
}
DeconvolutionMethod CLTransposeConvLayer::get_deconvolution_method(
- const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias,
- ITensorInfo *output, const PadStrideInfo &deconv_info, unsigned int invalid_right,
- unsigned int invalid_bottom, const WeightsInfo &weights_info)
+ const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias,
+ ITensorInfo *output, const PadStrideInfo &deconv_info, unsigned int invalid_right,
+ unsigned int invalid_bottom, const WeightsInfo &weights_info)
{
ARM_COMPUTE_UNUSED(output, bias, weights_info);
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp
index a123439d9..e212a03c7 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp
@@ -58,7 +58,7 @@ namespace
Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const ITensorInfo &output)
{
ARM_COMPUTE_RETURN_ON_ERROR(
- NEGEMMLowpMatrixMultiplyCore::validate(&input, &weights, nullptr, &output));
+ NEGEMMLowpMatrixMultiplyCore::validate(&input, &weights, nullptr, &output));
return Status{};
}
@@ -78,11 +78,11 @@ Status NEFullyConnectedHybridLayerReshapeWeights::validate(const ITensorInfo *in
}
NEFullyConnectedHybridLayer::NEFullyConnectedHybridLayer(
- std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _reshape_weights_function(), _quant_input_kernel(),
- _mm_gemmlowp(), _accumulate_biases_kernel(), _reshape_weights_output(), _quantized_input(),
- _scale_factor(), _original_weights(nullptr), _are_weights_reshaped(false),
- _accumulate_biases(false), _is_prepared(false)
+ std::shared_ptr<IMemoryManager> memory_manager)
+ : _memory_group(std::move(memory_manager)), _reshape_weights_function(), _quant_input_kernel(),
+ _mm_gemmlowp(), _accumulate_biases_kernel(), _reshape_weights_output(), _quantized_input(),
+ _scale_factor(), _original_weights(nullptr), _are_weights_reshaped(false),
+ _accumulate_biases(false), _is_prepared(false)
{
}
@@ -103,8 +103,8 @@ void NEFullyConnectedHybridLayer::configure(const ITensor *input, const ITensor
// Perform validate step
ARM_COMPUTE_ERROR_THROW_ON(NEFullyConnectedHybridLayer::validate(
- input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(),
- fc_info));
+ input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(),
+ fc_info));
_are_weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
_accumulate_biases = false;
@@ -132,10 +132,10 @@ void NEFullyConnectedHybridLayer::configure(const ITensor *input, const ITensor
bool _is_fc_after_conv;
if (is_batched_fc_layer)
{
- _is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) &&
- (std::equal(input->info()->tensor_shape().cbegin() + 3,
- input->info()->tensor_shape().cend(),
- output->info()->tensor_shape().cbegin() + 1));
+ _is_fc_after_conv =
+ (TensorShape::num_max_dimensions >= 4) &&
+ (std::equal(input->info()->tensor_shape().cbegin() + 3, input->info()->tensor_shape().cend(),
+ output->info()->tensor_shape().cbegin() + 1));
}
else
{
@@ -150,23 +150,23 @@ void NEFullyConnectedHybridLayer::configure(const ITensor *input, const ITensor
{
// Reshape the weights
_reshape_weights_output.allocator()->init(
- weights->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
- compute_transposed_shape(*weights->info())));
+ weights->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
+ compute_transposed_shape(*weights->info())));
_reshape_weights_function.configure(weights_to_use, &_reshape_weights_output);
weights_to_use = &_reshape_weights_output;
}
// Quantize input
_quantized_input.allocator()->init(
- input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(
- DataType::QASYMM8_SIGNED));
+ input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(
+ DataType::QASYMM8_SIGNED));
_scale_factor.allocator()->init(
- TensorInfo(TensorShape{output->info()->dimension(1)}, 1, DataType::F32));
+ TensorInfo(TensorShape{output->info()->dimension(1)}, 1, DataType::F32));
_quant_input_kernel.configure(input, &_quantized_input, &_scale_factor);
// GEMM
_gemmlowp_output.allocator()->init(
- output->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
+ output->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
configure_mm(&_quantized_input, weights_to_use, &_gemmlowp_output);
// Multiply scale
@@ -195,8 +195,8 @@ Status NEFullyConnectedHybridLayer::validate(const ITensorInfo *input, const ITe
bool weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
const ITensorInfo &reshaped_weights =
- TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
- compute_transposed_shape(*weights)));
+ TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
+ compute_transposed_shape(*weights)));
// Configure accumulate biases kernel for non quantized asymmetric types
if (biases != nullptr)
@@ -217,7 +217,7 @@ Status NEFullyConnectedHybridLayer::validate(const ITensorInfo *input, const ITe
{
// Validate reshape weights kernel
ARM_COMPUTE_RETURN_ON_ERROR(
- NEFullyConnectedHybridLayerReshapeWeights::validate(weights_to_use, &reshaped_weights));
+ NEFullyConnectedHybridLayerReshapeWeights::validate(weights_to_use, &reshaped_weights));
weights_to_use = &reshaped_weights;
}
@@ -225,20 +225,19 @@ Status NEFullyConnectedHybridLayer::validate(const ITensorInfo *input, const ITe
ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) != weights_to_use->dimension(1));
// Validate quantization kernel
- const ITensorInfo &quantized_input =
- TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_data_type(
- DataType::QASYMM8_SIGNED));
+ const ITensorInfo &quantized_input = TensorInfo(
+ input->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::QASYMM8_SIGNED));
const ITensorInfo &scale_factor = TensorInfo(TensorShape{output->dimension(1)}, 1, DataType::F32);
ARM_COMPUTE_RETURN_ON_ERROR(
- NEQuantizationSymmetricKernel::validate(input, &quantized_input, &scale_factor));
+ NEQuantizationSymmetricKernel::validate(input, &quantized_input, &scale_factor));
const ITensorInfo &gemmlowp_output = TensorInfo(
- output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
+ output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
// Validate matrix multiply kernel
ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(quantized_input, *weights_to_use, gemmlowp_output));
ARM_COMPUTE_RETURN_ON_ERROR(NEMultiplyScaleFactorKernel::validate(
- &gemmlowp_output, &scale_factor, output, weights->quantization_info().uniform().scale));
+ &gemmlowp_output, &scale_factor, output, weights->quantization_info().uniform().scale));
return Status{};
}
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp
index cb7557a5a..a639f2979 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp
@@ -69,14 +69,14 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I
// Validate gemmlowp function
ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixMultiplyCore::validate(
- &input.clone()->set_quantization_info(input_quantization_info),
- &weights.clone()->set_quantization_info(weights_quantization_info), nullptr, &output));
+ &input.clone()->set_quantization_info(input_quantization_info),
+ &weights.clone()->set_quantization_info(weights_quantization_info), nullptr, &output));
}
else
{
- ARM_COMPUTE_RETURN_ON_ERROR(NEGEMM::validate(
- &input, &weights, nullptr, &output, 1.f, 0.0f,
- GEMMInfo(false, false, false /* Reshape weights only for the first run */)));
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ NEGEMM::validate(&input, &weights, nullptr, &output, 1.f, 0.0f,
+ GEMMInfo(false, false, false /* Reshape weights only for the first run */)));
}
return Status{};
@@ -84,12 +84,12 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I
} // namespace
NEFullyConnectedLayerEx::NEFullyConnectedLayerEx(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _flatten_kernel(), _convert_weights(),
- _reshape_weights_function(), _mm_gemm(), _mm_gemmlowp(), _gemmlowp_output_stage(),
- _accumulate_biases_kernel(), _flatten_output(), _gemmlowp_output(),
- _converted_weights_output(), _reshape_weights_output(), _original_weights(nullptr),
- _are_weights_converted(true), _are_weights_reshaped(false), _is_fc_after_conv(false),
- _accumulate_biases(false), _is_quantized(false), _is_prepared(false)
+ : _memory_group(std::move(memory_manager)), _flatten_kernel(), _convert_weights(),
+ _reshape_weights_function(), _mm_gemm(), _mm_gemmlowp(), _gemmlowp_output_stage(),
+ _accumulate_biases_kernel(), _flatten_output(), _gemmlowp_output(), _converted_weights_output(),
+ _reshape_weights_output(), _original_weights(nullptr), _are_weights_converted(true),
+ _are_weights_reshaped(false), _is_fc_after_conv(false), _accumulate_biases(false),
+ _is_quantized(false), _is_prepared(false)
{
}
@@ -105,9 +105,9 @@ void NEFullyConnectedLayerEx::configure_mm(const ITensor *input, const ITensor *
const QuantizationInfo weights_quantization_info = weights->info()->quantization_info();
input->info()->set_quantization_info(QuantizationInfo(
- input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
+ input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
weights->info()->set_quantization_info(QuantizationInfo(
- weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
+ weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
// Configure gemmlowp function
_mm_gemmlowp.configure(input, weights, nullptr, output);
@@ -129,8 +129,8 @@ void NEFullyConnectedLayerEx::configure_conv_fc(const ITensor *input, const ITen
ITensor *output)
{
ARM_COMPUTE_ERROR_ON(
- (weights->info()->dimension(1) !=
- (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2))));
+ (weights->info()->dimension(1) !=
+ (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2))));
// If the fully connected layer is called after a convolution layer, the input tensor must be
// linearized
@@ -138,8 +138,7 @@ void NEFullyConnectedLayerEx::configure_conv_fc(const ITensor *input, const ITen
// Initialize output tensor for flatten
TensorShape shape_flatten = compute_flatten_shape(input->info());
_flatten_output.allocator()->init(
- input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
- shape_flatten));
+ input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_flatten));
// Configure flatten kernel
_memory_group.manage(&_flatten_output);
@@ -169,8 +168,8 @@ void NEFullyConnectedLayerEx::configure(const ITensor *input, const ITensor *wei
// Perform validate step
ARM_COMPUTE_ERROR_THROW_ON(NEFullyConnectedLayerEx::validate(
- input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(),
- fc_info));
+ input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(),
+ fc_info));
_are_weights_converted = true;
_are_weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
@@ -183,8 +182,7 @@ void NEFullyConnectedLayerEx::configure(const ITensor *input, const ITensor *wei
if (_is_quantized)
{
_gemmlowp_output.allocator()->init(
- output->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(
- DataType::S32));
+ output->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
}
// Configure accumulate biases kernel for non quantized asymmetric types
@@ -208,10 +206,10 @@ void NEFullyConnectedLayerEx::configure(const ITensor *input, const ITensor *wei
const bool is_batched_fc_layer = output->info()->dimension(1) > 1;
if (is_batched_fc_layer)
{
- _is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) &&
- (std::equal(input->info()->tensor_shape().cbegin() + 3,
- input->info()->tensor_shape().cend(),
- output->info()->tensor_shape().cbegin() + 1));
+ _is_fc_after_conv =
+ (TensorShape::num_max_dimensions >= 4) &&
+ (std::equal(input->info()->tensor_shape().cbegin() + 3, input->info()->tensor_shape().cend(),
+ output->info()->tensor_shape().cbegin() + 1));
}
else
{
@@ -284,16 +282,16 @@ Status NEFullyConnectedLayerEx::validate(const ITensorInfo *input, const ITensor
bool is_quantized = is_data_type_quantized_asymmetric(input->data_type());
const ITensorInfo &flatten_input =
- TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
- compute_flatten_shape(input)));
+ TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
+ compute_flatten_shape(input)));
const ITensorInfo &reshaped_weights =
- TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
- compute_transposed_shape(*weights)));
+ TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
+ compute_transposed_shape(*weights)));
const ITensorInfo &converted_weights =
- weights_reshaped ? TensorInfo(weights->clone()->set_is_resizable(true).reset_padding())
- : TensorInfo(*reshaped_weights.clone());
+ weights_reshaped ? TensorInfo(weights->clone()->set_is_resizable(true).reset_padding())
+ : TensorInfo(*reshaped_weights.clone());
const ITensorInfo &gemmlowp_output = TensorInfo(
- output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
+ output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
// Configure accumulate biases kernel for non quantized asymmetric types
if (biases != nullptr && !is_quantized)
@@ -330,7 +328,7 @@ Status NEFullyConnectedLayerEx::validate(const ITensorInfo *input, const ITensor
{
// Validate reshape weights kernel
ARM_COMPUTE_RETURN_ON_ERROR(
- NEFullyConnectedLayerReshapeWeights::validate(weights, &reshaped_weights));
+ NEFullyConnectedLayerReshapeWeights::validate(weights, &reshaped_weights));
weights_to_use = &reshaped_weights;
}
@@ -338,7 +336,7 @@ Status NEFullyConnectedLayerEx::validate(const ITensorInfo *input, const ITensor
{
// Validate convert weights kernel
ARM_COMPUTE_RETURN_ON_ERROR(NEConvertFullyConnectedWeights::validate(
- weights_to_use, &converted_weights, input->tensor_shape(), fc_info.weights_trained_layout));
+ weights_to_use, &converted_weights, input->tensor_shape(), fc_info.weights_trained_layout));
weights_to_use = &converted_weights;
}
@@ -346,8 +344,8 @@ Status NEFullyConnectedLayerEx::validate(const ITensorInfo *input, const ITensor
{
// Fully Connected layer after a Convolution Layer without batches
ARM_COMPUTE_RETURN_ERROR_ON(
- (weights_to_use->dimension(1) !=
- (input->dimension(0) * input->dimension(1) * input->dimension(2))));
+ (weights_to_use->dimension(1) !=
+ (input->dimension(0) * input->dimension(1) * input->dimension(2))));
// Validate flatten kernel
ARM_COMPUTE_RETURN_ON_ERROR(NEFlattenLayerKernel::validate(input, &flatten_input));
@@ -365,7 +363,7 @@ Status NEFullyConnectedLayerEx::validate(const ITensorInfo *input, const ITensor
if (is_quantized)
{
ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint::validate(
- &gemmlowp_output, biases, output));
+ &gemmlowp_output, biases, output));
}
return Status{};
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedReshapingLayer.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedReshapingLayer.cpp
index dc6c78478..234c783f9 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedReshapingLayer.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedReshapingLayer.cpp
@@ -56,7 +56,7 @@ void NEFullyConnectedReshapingLayer::configure(const arm_compute::ITensor *input
assert(kernel_type == KernelType::PREPROCESSED_WEIGHTS);
bool is_hybrid = input->info()->data_type() == DataType::F32 &&
- (weights->info()->data_type() == DataType::S8 ||
+ (weights->info()->data_type() == DataType::QSYMM8 ||
weights->info()->data_type() == DataType::QASYMM8_SIGNED);
if (is_hybrid)
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEInstanceNormalizationLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEInstanceNormalizationLayerEx.cpp
index 16d74e62d..451aa0997 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEInstanceNormalizationLayerEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEInstanceNormalizationLayerEx.cpp
@@ -46,9 +46,9 @@
namespace arm_compute
{
NEInstanceNormalizationLayerEx::NEInstanceNormalizationLayerEx(
- std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _normalization_kernel(), _is_nchw(false),
- _permute_input(), _permute_output(), _permuted_input(), _permuted_output()
+ std::shared_ptr<IMemoryManager> memory_manager)
+ : _memory_group(std::move(memory_manager)), _normalization_kernel(), _is_nchw(false),
+ _permute_input(), _permute_output(), _permuted_input(), _permuted_output()
{
}
@@ -88,8 +88,8 @@ Status NEInstanceNormalizationLayerEx::validate(const ITensorInfo *input, const
float epsilon)
{
return NEInstanceNormalizationLayerKernelEx::validate(
- &input->clone()->set_data_layout(DataLayout::NCHW),
- &output->clone()->set_data_layout(DataLayout::NCHW), gamma, beta, epsilon);
+ &input->clone()->set_data_layout(DataLayout::NCHW),
+ &output->clone()->set_data_layout(DataLayout::NCHW), gamma, beta, epsilon);
}
void NEInstanceNormalizationLayerEx::run()
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp
index cb1a26304..c45c335b3 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp
@@ -49,8 +49,8 @@
using namespace arm_compute;
NEReduceOperation::NEReduceOperation(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _reduction_kernels(), _reduced_outs(), _reshape(),
- _reduction_ops(), _keep_dims()
+ : _memory_group(std::move(memory_manager)), _reduction_kernels(), _reduced_outs(), _reshape(),
+ _reduction_ops(), _keep_dims()
{
}
@@ -125,7 +125,7 @@ void NEReduceOperation::configure(ITensor *input, const Coordinates &reduction_a
for (unsigned int i = 0; i < _reduction_ops; ++i)
{
TensorShape out_shape =
- i == 0 ? input->info()->tensor_shape() : (&_reduced_outs[i - 1])->info()->tensor_shape();
+ i == 0 ? input->info()->tensor_shape() : (&_reduced_outs[i - 1])->info()->tensor_shape();
out_shape.set(axis_local[i], 1);
auto in = (i == 0) ? input : (&_reduced_outs[i - 1]);
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceSum.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceSum.cpp
index 26a887912..b21717e86 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceSum.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceSum.cpp
@@ -47,8 +47,8 @@
using namespace arm_compute;
NEReduceSum::NEReduceSum(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _reduction_kernels(), _reduced_outs(), _reshape(),
- _reduction_ops(), _keep_dims()
+ : _memory_group(std::move(memory_manager)), _reduction_kernels(), _reduced_outs(), _reshape(),
+ _reduction_ops(), _keep_dims()
{
}
@@ -122,7 +122,7 @@ void NEReduceSum::configure(ITensor *input, const Coordinates &reduction_axis, b
for (unsigned int i = 0; i < _reduction_ops; ++i)
{
TensorShape out_shape =
- i == 0 ? input->info()->tensor_shape() : (&_reduced_outs[i - 1])->info()->tensor_shape();
+ i == 0 ? input->info()->tensor_shape() : (&_reduced_outs[i - 1])->info()->tensor_shape();
out_shape.set(axis_local[i], 1);
auto in = (i == 0) ? input : (&_reduced_outs[i - 1]);
@@ -135,7 +135,7 @@ void NEReduceSum::configure(ITensor *input, const Coordinates &reduction_axis, b
_reduced_outs[i].allocator()->init(TensorInfo(out_shape, input->info()->num_channels(),
input->info()->data_type(),
input->info()->quantization_info())
- .set_data_layout(input->info()->data_layout()));
+ .set_data_layout(input->info()->data_layout()));
_memory_group.manage(&_reduced_outs[i]);
_reduction_kernels[i].configure(in, &_reduced_outs[i], axis_local[i],
ReductionOperation::SUM);
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp
index aa165cc15..50311071b 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp
@@ -51,17 +51,9 @@ namespace arm_compute
{
NETransposeConvLayer::NETransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
- : _memory_group(std::move(memory_manager)),
- _conv_f(),
- _upsample_f(),
- _flip_weights(),
- _scaled_output(),
- _weights_flipped(),
- _flip_axis(),
- _original_weights(nullptr),
- _input(nullptr),
- _info(),
- _is_prepared(false)
+ : _memory_group(std::move(memory_manager)), _conv_f(), _upsample_f(), _flip_weights(),
+ _scaled_output(), _weights_flipped(), _flip_axis(), _original_weights(nullptr), _input(nullptr),
+ _info(), _is_prepared(false)
{
}
@@ -76,15 +68,15 @@ Status NETransposeConvLayer::validate(const ITensorInfo *input, const ITensorInf
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(weights, input);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(weights, input);
const unsigned int width_idx =
- get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::WIDTH);
+ get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::WIDTH);
const unsigned int height_idx =
- get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::HEIGHT);
+ get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::HEIGHT);
ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(width_idx) != weights->dimension(height_idx));
ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(width_idx) < 1);
auto out_dims = transposeconv_output_dimensions(
- input->dimension(width_idx), input->dimension(height_idx), weights->dimension(width_idx),
- weights->dimension(height_idx), info, invalid_right, invalid_bottom);
+ input->dimension(width_idx), input->dimension(height_idx), weights->dimension(width_idx),
+ weights->dimension(height_idx), info, invalid_right, invalid_bottom);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
if (bias != nullptr)
@@ -117,24 +109,24 @@ Status NETransposeConvLayer::validate(const ITensorInfo *input, const ITensorInf
unsigned int pad_right = 0;
unsigned int pad_top = 0;
unsigned int pad_bottom = 0;
- const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape(
- *input, *weights, info, out_dims, invalid_right, invalid_bottom, pad_left, pad_right, pad_top,
- pad_bottom);
+ const TensorShape scale_out_shape =
+ compute_transposeconv_upsampled_shape(*input, *weights, info, out_dims, invalid_right,
+ invalid_bottom, pad_left, pad_right, pad_top, pad_bottom);
TensorInfo scale_out_info(
- input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(scale_out_shape));
+ input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(scale_out_shape));
const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
const unsigned int batches_idx =
- get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::BATCHES);
+ get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::BATCHES);
const unsigned int channel_idx =
- get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::CHANNEL);
+ get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::CHANNEL);
ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(batches_idx) !=
scale_out_info.dimension(batches_idx));
ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(channel_idx) !=
scale_out_info.dimension(channel_idx));
- ARM_COMPUTE_RETURN_ON_ERROR(NEConvolutionLayer::validate(&scale_out_info, weights, bias, output,
- conv_info, WeightsInfo()));
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ NEConvolutionLayer::validate(&scale_out_info, weights, bias, output, conv_info, WeightsInfo()));
return Status{};
}
@@ -146,21 +138,21 @@ void NETransposeConvLayer::configure(ITensor *input, const ITensor *weights, con
// Perform validation step
ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
ARM_COMPUTE_ERROR_THROW_ON(NETransposeConvLayer::validate(
- input->info(), weights->info(), (bias == nullptr) ? nullptr : bias->info(), output->info(),
- info, invalid_right, invalid_bottom));
+ input->info(), weights->info(), (bias == nullptr) ? nullptr : bias->info(), output->info(),
+ info, invalid_right, invalid_bottom));
const DataLayout data_layout = input->info()->data_layout();
const unsigned int width_idx =
- get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+ get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
const unsigned int height_idx =
- get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+ get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
auto out_dims = transposeconv_output_dimensions(
- input->info()->dimension(width_idx), input->info()->dimension(height_idx),
- weights->info()->dimension(width_idx), weights->info()->dimension(height_idx), info,
- invalid_right, invalid_bottom);
+ input->info()->dimension(width_idx), input->info()->dimension(height_idx),
+ weights->info()->dimension(width_idx), weights->info()->dimension(height_idx), info,
+ invalid_right, invalid_bottom);
const TensorShape output_shape =
- compute_transposeconv_output_shape(out_dims, *input->info(), *weights->info());
+ compute_transposeconv_output_shape(out_dims, *input->info(), *weights->info());
_input = input;
_original_weights = weights;
@@ -188,8 +180,8 @@ void NETransposeConvLayer::configure(ITensor *input, const ITensor *weights, con
const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape(
- *input->info(), *weights->info(), info, out_dims, invalid_right, invalid_bottom, pad_left,
- pad_right, pad_top, pad_bottom);
+ *input->info(), *weights->info(), info, out_dims, invalid_right, invalid_bottom, pad_left,
+ pad_right, pad_top, pad_bottom);
const PadStrideInfo upsample_info(stride_x, stride_y, pad_left, pad_right, pad_top, pad_bottom,
DimensionRoundingType::FLOOR);