diff options
author | Michalis Spyrou <michalis.spyrou@arm.com> | 2019-12-12 16:16:09 +0000 |
---|---|---|
committer | Michalis Spyrou <michalis.spyrou@arm.com> | 2019-12-19 16:58:40 +0000 |
commit | e7be8a072967f9ae547468a7625e11477ea32221 (patch) | |
tree | 1ea27ef9ac9d4896decfac4e5431e80ec84e3885 | |
parent | 62bdd8c4d605d75214ac3ca674cd647911ea9bbc (diff) | |
download | armcl-e7be8a072967f9ae547468a7625e11477ea32221.tar.gz armcl-e7be8a072967f9ae547468a7625e11477ea32221.tar.bz2 armcl-e7be8a072967f9ae547468a7625e11477ea32221.zip |
COMPMID-2980 (Nightly) armv7a build failures
Change-Id: I8c2a20fc345694d1ad6e0fe63e4f22fb73e6c1df
Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com>
Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-on: https://review.mlplatform.org/c/2463
Tested-by: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r-- | arm_compute/core/CPP/Validate.h | 12 | ||||
-rw-r--r-- | arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_quantized.hpp | 13 | ||||
-rw-r--r-- | arm_compute/core/PixelValue.h | 9 | ||||
-rw-r--r-- | arm_compute/core/Types.h | 10 | ||||
-rw-r--r-- | arm_compute/core/Utils.h | 20 | ||||
-rw-r--r-- | arm_compute/core/utils/quantization/AsymmHelpers.h | 6 | ||||
-rw-r--r-- | src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp | 4 | ||||
-rw-r--r-- | src/core/utils/quantization/AsymmHelpers.cpp | 36 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp | 6 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NEFullyConnectedLayer.cpp | 6 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp | 20 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NELSTMLayerQuantized.cpp | 8 | ||||
-rw-r--r-- | src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp | 14 |
13 files changed, 83 insertions, 81 deletions
diff --git a/arm_compute/core/CPP/Validate.h b/arm_compute/core/CPP/Validate.h index 1ec41a9f1..f195a31d0 100644 --- a/arm_compute/core/CPP/Validate.h +++ b/arm_compute/core/CPP/Validate.h @@ -37,15 +37,15 @@ namespace arm_compute * * @return Status */ -inline arm_compute::Status error_on_unsupported_cpu_fp16(const char *function, const char *file, const int line, - const ITensorInfo *tensor_info) +inline Status error_on_unsupported_cpu_fp16(const char *function, const char *file, const int line, + const ITensorInfo *tensor_info) { ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info == nullptr, function, file, line); #ifndef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(tensor_info->data_type() == DataType::F16, function, file, line, "This CPU architecture does not support F16 data type, you need v8.2 or above"); #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ - return arm_compute::Status {}; + return Status {}; } /** Return an error if the data type of the passed tensor is FP16 and FP16 support is not compiled in. @@ -57,12 +57,12 @@ inline arm_compute::Status error_on_unsupported_cpu_fp16(const char *function, c * * @return Status */ -inline arm_compute::Status error_on_unsupported_cpu_fp16(const char *function, const char *file, const int line, - const ITensor *tensor) +inline Status error_on_unsupported_cpu_fp16(const char *function, const char *file, const int line, + const ITensor *tensor) { ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line); ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_cpu_fp16(function, file, line, tensor->info())); - return arm_compute::Status{}; + return Status{}; } #define ARM_COMPUTE_ERROR_ON_CPU_F16_UNSUPPORTED(tensor) \ diff --git a/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_quantized.hpp b/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_quantized.hpp index ef3adc4c0..37c1f1bc8 100644 --- a/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_quantized.hpp +++ b/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_quantized.hpp @@ -31,31 +31,21 @@ using namespace neon_convolution_kernels; using namespace qasymm8; -template <typename T, typename U = int32_t> -inline T saturating_doubling_high_mul(const T&, const U&); - -template <> inline int32x4_t saturating_doubling_high_mul(const int32x4_t& a, const int32x4_t& b) { return vqrdmulhq_s32(a, b); } -template <> inline int32x4_t saturating_doubling_high_mul(const int32x4_t& a, const int32_t& b) { return vqrdmulhq_n_s32(a, b); } -template <> inline int32_t saturating_doubling_high_mul(const int32_t& a, const int32_t& b) { return vget_lane_s32(vqrdmulh_n_s32(vdup_n_s32(a), b), 0); } -template <typename T, typename U = int32_t> -inline T rounding_divide_by_exp2(const T& x, const U exponent); - -template <> inline int32x4_t rounding_divide_by_exp2(const int32x4_t& x, const int32x4_t shift) { const int32x4_t fixup = vshrq_n_s32(vandq_s32(x, shift), 31); @@ -63,7 +53,6 @@ inline int32x4_t rounding_divide_by_exp2(const int32x4_t& x, const int32x4_t shi return vrshlq_s32(fixed, shift); } -template <> inline int32x4_t rounding_divide_by_exp2(const int32x4_t& x, const int exponent) { const int32x4_t shift = vdupq_n_s32(-exponent); @@ -72,7 +61,6 @@ inline int32x4_t rounding_divide_by_exp2(const int32x4_t& x, const int exponent) return vrshlq_s32(fixed, shift); } -template <> inline int32x2_t rounding_divide_by_exp2(const int32x2_t& x, const int exponent) { const int32x2_t shift = vdup_n_s32(-exponent); @@ -81,7 +69,6 @@ inline int32x2_t rounding_divide_by_exp2(const int32x2_t& x, const int exponent) return vrshl_s32(fixed, shift); } -template <> inline int32_t rounding_divide_by_exp2(const int32_t& x, const int exponent) { const int32x2_t xs = vdup_n_s32(x); diff --git a/arm_compute/core/PixelValue.h b/arm_compute/core/PixelValue.h index 8c2ab92ad..52b1f654a 100644 --- a/arm_compute/core/PixelValue.h +++ b/arm_compute/core/PixelValue.h @@ -103,6 +103,15 @@ public: break; } } + /** Initialize the union with a S8 pixel value + * + * @param[in] v S8 value. + */ + PixelValue(int8_t v) + : PixelValue() + { + value.s8 = v; + } /** Initialize the union with a U8 pixel value * * @param[in] v U8 value. diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h index c2813122b..901d080b0 100644 --- a/arm_compute/core/Types.h +++ b/arm_compute/core/Types.h @@ -1876,11 +1876,11 @@ enum class GEMMLowpOutputStageType struct GEMMLowpOutputStageInfo { GEMMLowpOutputStageType type{ GEMMLowpOutputStageType::NONE }; /**< GEMMLowp output stage type */ - int gemmlowp_offset{ 0 }; /**< GEMMLowp output stage offset used for quantizing to QASYMM8 */ - int gemmlowp_multiplier{ 0 }; /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */ - int gemmlowp_shift{ 0 }; /**< GEMMLowp output stage shift used for quantizing to uint8 */ - int gemmlowp_min_bound{ 0 }; /**< GEMMLowp min value used to saturate down the output result before converting back to QASYMM8 */ - int gemmlowp_max_bound{ 0 }; /**< GEMMLowp max value used to saturate down the output result before converting back to QASYMM8 */ + int32_t gemmlowp_offset{ 0 }; /**< GEMMLowp output stage offset used for quantizing to QASYMM8 */ + int32_t gemmlowp_multiplier{ 0 }; /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */ + int32_t gemmlowp_shift{ 0 }; /**< GEMMLowp output stage shift used for quantizing to uint8 */ + int32_t gemmlowp_min_bound{ 0 }; /**< GEMMLowp min value used to saturate down the output result before converting back to QASYMM8 */ + int32_t gemmlowp_max_bound{ 0 }; /**< GEMMLowp max value used to saturate down the output result before converting back to QASYMM8 */ std::vector<int32_t> gemmlowp_multipliers{}; /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */ std::vector<int32_t> gemmlowp_shifts{}; /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */ bool is_quantized_per_channel{ false }; /**< GEMMLowp quantized per-channel flag */ diff --git a/arm_compute/core/Utils.h b/arm_compute/core/Utils.h index c11fffec9..18c5471f8 100644 --- a/arm_compute/core/Utils.h +++ b/arm_compute/core/Utils.h @@ -557,15 +557,15 @@ inline DataType get_promoted_data_type(DataType dt) */ inline std::tuple<PixelValue, PixelValue> get_min_max(DataType dt) { - PixelValue min(0); - PixelValue max(0); + PixelValue min{}; + PixelValue max{}; switch(dt) { case DataType::U8: case DataType::QASYMM8: { - min = PixelValue(std::numeric_limits<uint8_t>::lowest()); - max = PixelValue(std::numeric_limits<uint8_t>::max()); + min = PixelValue(static_cast<int32_t>(std::numeric_limits<uint8_t>::lowest())); + max = PixelValue(static_cast<int32_t>(std::numeric_limits<uint8_t>::max())); break; } case DataType::S8: @@ -573,22 +573,22 @@ inline std::tuple<PixelValue, PixelValue> get_min_max(DataType dt) case DataType::QASYMM8_SIGNED: case DataType::QSYMM8_PER_CHANNEL: { - min = PixelValue(std::numeric_limits<int8_t>::lowest()); - max = PixelValue(std::numeric_limits<int8_t>::max()); + min = PixelValue(static_cast<int32_t>(std::numeric_limits<int8_t>::lowest())); + max = PixelValue(static_cast<int32_t>(std::numeric_limits<int8_t>::max())); break; } case DataType::U16: case DataType::QASYMM16: { - min = PixelValue(std::numeric_limits<uint16_t>::lowest()); - max = PixelValue(std::numeric_limits<uint16_t>::max()); + min = PixelValue(static_cast<int32_t>(std::numeric_limits<uint16_t>::lowest())); + max = PixelValue(static_cast<int32_t>(std::numeric_limits<uint16_t>::max())); break; } case DataType::S16: case DataType::QSYMM16: { - min = PixelValue(std::numeric_limits<int16_t>::lowest()); - max = PixelValue(std::numeric_limits<int16_t>::max()); + min = PixelValue(static_cast<int32_t>(std::numeric_limits<int16_t>::lowest())); + max = PixelValue(static_cast<int32_t>(std::numeric_limits<int16_t>::max())); break; } case DataType::U32: diff --git a/arm_compute/core/utils/quantization/AsymmHelpers.h b/arm_compute/core/utils/quantization/AsymmHelpers.h index f0b077096..1bdc9959c 100644 --- a/arm_compute/core/utils/quantization/AsymmHelpers.h +++ b/arm_compute/core/utils/quantization/AsymmHelpers.h @@ -40,7 +40,7 @@ namespace quantization * * @return a status */ -Status calculate_quantized_multiplier(float multiplier, int *quant_multiplier, int *shift); +Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplier, int32_t *shift); /** Calculate quantized representation of multiplier with value less than one. * * @param[in] multiplier Real multiplier. @@ -49,7 +49,7 @@ Status calculate_quantized_multiplier(float multiplier, int *quant_multiplier, i * * @return a status */ -Status calculate_quantized_multiplier_less_than_one(float multiplier, int *quant_multiplier, int *right_shift); +Status calculate_quantized_multiplier_less_than_one(float multiplier, int32_t *quant_multiplier, int32_t *right_shift); /** Calculate quantized representation of multiplier having value greater than one. * * @param[in] multiplier Real multiplier. @@ -58,7 +58,7 @@ Status calculate_quantized_multiplier_less_than_one(float multiplier, int *quant * * @return a status */ -Status calculate_quantized_multiplier_greater_than_one(float multiplier, int *quantized_multiplier, int *left_shift); +Status calculate_quantized_multiplier_greater_than_one(float multiplier, int32_t *quantized_multiplier, int32_t *left_shift); /** Calculate quantized representation of per-channel multipliers with value less than one. * diff --git a/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp b/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp index a0d45afd2..a9a3183c5 100644 --- a/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp +++ b/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp @@ -528,8 +528,8 @@ void NEDepthwiseConvolutionLayerNativeKernel::configure(const ITensor *input, co for(size_t i = 0; i < weights_scale.size(); ++i) { - int out_mult = 0; - int out_shift = 0; + int32_t out_mult = 0; + int32_t out_shift = 0; const float multiplier = input_scale * weights_scale.at(i) / output_scale; ARM_COMPUTE_ERROR_ON(multiplier > 1.f); arm_compute::quantization::calculate_quantized_multiplier_less_than_one(multiplier, &out_mult, &out_shift); diff --git a/src/core/utils/quantization/AsymmHelpers.cpp b/src/core/utils/quantization/AsymmHelpers.cpp index 619666244..11241e83a 100644 --- a/src/core/utils/quantization/AsymmHelpers.cpp +++ b/src/core/utils/quantization/AsymmHelpers.cpp @@ -35,7 +35,7 @@ namespace quantization constexpr int64_t fixed_point_one_Q0 = (1LL << 31); constexpr float epsilon = 0.00001f; -Status calculate_quantized_multiplier(float multiplier, int *quant_multiplier, int *shift) +Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplier, int32_t *shift) { if(multiplier >= 1.f) { @@ -49,9 +49,9 @@ Status calculate_quantized_multiplier(float multiplier, int *quant_multiplier, i } } -Status calculate_quantized_multiplier_less_than_one(float multiplier, - int *quant_multiplier, - int *right_shift) +Status calculate_quantized_multiplier_less_than_one(float multiplier, + int32_t *quant_multiplier, + int32_t *right_shift) { ARM_COMPUTE_RETURN_ERROR_ON(quant_multiplier == nullptr); ARM_COMPUTE_RETURN_ERROR_ON(right_shift == nullptr); @@ -64,9 +64,10 @@ Status calculate_quantized_multiplier_less_than_one(float multiplier, return Status{}; } - const double q = std::frexp(multiplier, right_shift); - *right_shift *= -1; - auto q_fixed = static_cast<int64_t>(support::cpp11::round(q * fixed_point_one_Q0)); + int shift_exp = 0; + const double q = std::frexp(multiplier, &shift_exp); + *right_shift = -1 * shift_exp; + auto q_fixed = static_cast<int64_t>(support::cpp11::round(q * fixed_point_one_Q0)); ARM_COMPUTE_RETURN_ERROR_ON(q_fixed > fixed_point_one_Q0); if(q_fixed == fixed_point_one_Q0) { @@ -80,15 +81,18 @@ Status calculate_quantized_multiplier_less_than_one(float multiplier, return Status{}; } -Status calculate_quantized_multiplier_greater_than_one(float multiplier, - int *quantized_multiplier, - int *left_shift) +Status calculate_quantized_multiplier_greater_than_one(float multiplier, + int32_t *quantized_multiplier, + int32_t *left_shift) { ARM_COMPUTE_RETURN_ERROR_ON(quantized_multiplier == nullptr); ARM_COMPUTE_RETURN_ERROR_ON(left_shift == nullptr); ARM_COMPUTE_RETURN_ERROR_ON(multiplier < 1.f); - const double q = std::frexp(multiplier, left_shift); - auto q_fixed = static_cast<int64_t>(support::cpp11::round(q * fixed_point_one_Q0)); + + int shift_exp = 0; + const double q = std::frexp(multiplier, &shift_exp); + *left_shift = shift_exp; + auto q_fixed = static_cast<int64_t>(support::cpp11::round(q * fixed_point_one_Q0)); ARM_COMPUTE_RETURN_ERROR_ON(q_fixed > fixed_point_one_Q0); if(q_fixed == fixed_point_one_Q0) { @@ -125,8 +129,8 @@ arm_compute::Status calculate_quantized_multipliers_less_than_one(const Quantiza for(unsigned int i = 0; i < size; ++i) { const float multiplier = i_scale * w_scales[i] / o_scale; - int quant_multiplier = 0; - int quant_shift = 0; + int32_t quant_multiplier = 0; + int32_t quant_shift = 0; ARM_COMPUTE_RETURN_ON_ERROR(calculate_quantized_multiplier_less_than_one(multiplier, &quant_multiplier, &quant_shift)); quant_multipliers[i] = quant_multiplier; quant_shifts[i] = quant_shift; @@ -181,8 +185,8 @@ void compute_quantized_multipliers_and_shifts(const ITensorInfo *input, for(unsigned int i = 0; i < num_filters; ++i) { - int output_multiplier = 0; - int output_shift = 0; + int32_t output_multiplier = 0; + int32_t output_shift = 0; const float multiplier = iq_info.scale * wq_info.scale()[i] / oq_info.scale; calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift); diff --git a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp index 5e47dd56a..ca4fe732a 100644 --- a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp @@ -202,9 +202,9 @@ void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal:: const UniformQuantizationInfo wq_info = weights->info()->quantization_info().uniform(); const UniformQuantizationInfo oq_info = (output->info()->total_size() == 0) ? iq_info : output->info()->quantization_info().uniform(); - float multiplier = (iq_info.scale * wq_info.scale) / oq_info.scale; - int output_multiplier; - int output_shift; + float multiplier = (iq_info.scale * wq_info.scale) / oq_info.scale; + int32_t output_multiplier; + int32_t output_shift; quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift); _output_stage_kernel.configure(&_accumulator, biases, _is_nchw ? output : &_permuted_output, output_multiplier, output_shift, oq_info.offset); _accumulator.allocator()->allocate(); diff --git a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp index ee622f469..b3b90f859 100644 --- a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp +++ b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp @@ -255,9 +255,9 @@ void NEFullyConnectedLayer::configure(const ITensor *input, const ITensor *weigh const UniformQuantizationInfo wq_info = weights->info()->quantization_info().uniform(); const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform(); - float multiplier = (iq_info.scale * wq_info.scale) / oq_info.scale; - int output_multiplier; - int output_shift; + float multiplier = (iq_info.scale * wq_info.scale) / oq_info.scale; + int32_t output_multiplier; + int32_t output_shift; quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift); _gemmlowp_output_stage.configure(&_gemmlowp_output, biases, output, output_multiplier, output_shift, oq_info.offset); _gemmlowp_output.allocator()->allocate(); diff --git a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp index bb9620b29..0507c6b2b 100644 --- a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp @@ -33,7 +33,8 @@ #include <set> #include <tuple> -using namespace arm_compute; +namespace arm_compute +{ using namespace arm_compute::misc::shape_calculator; NEConvolutionLayerReshapeWeights::NEConvolutionLayerReshapeWeights() @@ -131,11 +132,11 @@ void NEGEMMConvolutionLayer::configure_mm(const ITensor *input, const ITensor *w } // Merge activation with output stage - PixelValue type_min = 0; - PixelValue type_max = 0; + PixelValue type_min{}; + PixelValue type_max{}; std::tie(type_min, type_max) = get_min_max(data_type); - int min_activation = type_min.get<int>(); - int max_activation = type_max.get<int>(); + int32_t min_activation = type_min.get<int32_t>(); + int32_t max_activation = type_max.get<int32_t>(); if(supported_acts.count(act_info.activation()) != 0) { @@ -190,11 +191,11 @@ Status NEGEMMConvolutionLayer::validate_mm(const ITensorInfo *input, const ITens const UniformQuantizationInfo uoqinfo = oqinfo.uniform(); // Merge activation with output stage - PixelValue type_min = 0; - PixelValue type_max = 0; + PixelValue type_min{}; + PixelValue type_max{}; std::tie(type_min, type_max) = get_min_max(data_type); - int min_activation = type_min.get<int>(); - int max_activation = type_max.get<int>(); + int32_t min_activation = type_min.get<int32_t>(); + int32_t max_activation = type_max.get<int32_t>(); const std::set<ActivationLayerInfo::ActivationFunction> supported_acts = { ActivationLayerInfo::ActivationFunction::RELU, ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, @@ -595,3 +596,4 @@ void NEGEMMConvolutionLayer::prepare() _is_prepared = true; } } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NELSTMLayerQuantized.cpp b/src/runtime/NEON/functions/NELSTMLayerQuantized.cpp index cfd996b53..cdfc03540 100644 --- a/src/runtime/NEON/functions/NELSTMLayerQuantized.cpp +++ b/src/runtime/NEON/functions/NELSTMLayerQuantized.cpp @@ -136,8 +136,8 @@ void NELSTMLayerQuantized::configure(const ITensor *input, _output_lowp.allocator()->init(TensorInfo(_output_highp.info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_3)); const float multiplier = 4096.f * qasymm.uniform().scale * qweights.uniform().scale; - int output_multiplier = 0; - int output_shift = 0; + int32_t output_multiplier = 0; + int32_t output_shift = 0; quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift); _memory_group.manage(&_output_lowp); @@ -342,8 +342,8 @@ Status NELSTMLayerQuantized::validate(const ITensorInfo *input, const TensorInfo output_lowp(output_highp.tensor_shape(), 1, DataType::QSYMM16, qsymm_3); const float multiplier = 4096.f * qasymm.uniform().scale * qweights.uniform().scale; - int output_multiplier = 0; - int output_shift = 0; + int32_t output_multiplier = 0; + int32_t output_shift = 0; ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift)); // _output_stage diff --git a/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp b/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp index c564e22d4..3235eee19 100644 --- a/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp +++ b/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp @@ -263,8 +263,8 @@ std::unique_ptr<depthwise::IDepthwiseConvolution> create_convolver(const ITensor // Calculate rescale parameters const float fmultipler = iqinfo.scale * wqinfo.scale / oqinfo.scale; - int qmultiplier = 0; - int qshift = 0; + int32_t qmultiplier = 0; + int32_t qshift = 0; quantization::calculate_quantized_multiplier_less_than_one(fmultipler, &qmultiplier, &qshift); qasymm8::QAsymm8RescaleParams rescale_params(qshift, qmultiplier, fmultipler); @@ -285,15 +285,15 @@ std::unique_ptr<depthwise::IDepthwiseConvolution> create_convolver(const ITensor const qasymm8::QAsymm8Params oqinfo{ static_cast<uint8_t>(output_qinfo.offset), output_qinfo.scale }; // Calculate rescale parameters - std::vector<float> fmultipliers; - std::vector<int> qmultipliers; - std::vector<int> qshifts; + std::vector<float> fmultipliers; + std::vector<int32_t> qmultipliers; + std::vector<int32_t> qshifts; for(auto const s : wqinfo.scales) { const float fmultipler = iqinfo.scale * s / oqinfo.scale; - int qmultiplier = 0; - int qshift = 0; + int32_t qmultiplier = 0; + int32_t qshift = 0; quantization::calculate_quantized_multiplier_less_than_one(fmultipler, &qmultiplier, &qshift); fmultipliers.push_back(fmultipler); qmultipliers.push_back(qmultiplier); |