summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGiorgio Arena <giorgio.arena@arm.com>2021-02-10 11:54:47 +0000
committerGiorgio Arena <giorgio.arena@arm.com>2021-02-10 16:10:32 +0000
commitedc524ef7ed38e0521c874f28bb9a1f2407b44c6 (patch)
tree64eab5750f0284ddc184f44a5e5b6e8b7472d59e
parentdb35345753e4ba81384c8a92ece6a8f598fd841a (diff)
downloadarmcl-edc524ef7ed38e0521c874f28bb9a1f2407b44c6.tar.gz
armcl-edc524ef7ed38e0521c874f28bb9a1f2407b44c6.tar.bz2
armcl-edc524ef7ed38e0521c874f28bb9a1f2407b44c6.zip
Revert changes on tensor's strides and fix CLDepthwiseConvolution 3x3 Quantized
- Revert changes in strides > num_dimensions. Set them to 0 - Fix offset calculcation in depthwise 3x3 quantized using select and stride_y for max offset Resolve COMPMID-4254 Change-Id: Ia99b9637f18b99b1fa3d4b7b4892046027d3e7e5 Signed-off-by: Giorgio Arena <giorgio.arena@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5040 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--src/core/CL/cl_kernels/depthwise_convolution_quantized.cl23
-rw-r--r--src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp4
-rw-r--r--src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp4
-rw-r--r--src/core/helpers/Utils.h24
-rw-r--r--tests/validation/UNIT/TensorInfo.cpp10
11 files changed, 28 insertions, 61 deletions
diff --git a/src/core/CL/cl_kernels/depthwise_convolution_quantized.cl b/src/core/CL/cl_kernels/depthwise_convolution_quantized.cl
index 285c00a71..c7fe401f8 100644
--- a/src/core/CL/cl_kernels/depthwise_convolution_quantized.cl
+++ b/src/core/CL/cl_kernels/depthwise_convolution_quantized.cl
@@ -910,9 +910,7 @@ __kernel void dwc_3x3_reshaped_quantized8_nhwc(
// A "-1" 32 bit signed variable converted to unsigned gives 4294967295
z_coord = z * (int)CONV_STRIDE_Y - (int)CONV_PAD_TOP;
z_coord = min((uint)z_coord, (uint)SRC_DIM_2);
- offset = y_offset + (int4)(z_coord * src_stride_z);
- offset = min(offset, (int4)max_offset);
-
+ offset = select(y_offset + (int4)(z_coord * src_stride_z), (int4)max_offset, (int4)z_coord < 0 || (int4)z_coord >= SRC_DIM_2);
VEC_TYPE(VEC_SIZE)
values0 = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(src_addr + offset.s0));
VEC_TYPE(VEC_SIZE)
@@ -925,8 +923,7 @@ __kernel void dwc_3x3_reshaped_quantized8_nhwc(
// Moreover z_coord cannot be out-of-bound for z = 1 so we do not need to clamp the offset
z_coord = z * (int)CONV_STRIDE_Y - (int)CONV_PAD_TOP + DILATION_Y;
z_coord = min((uint)z_coord, (uint)SRC_DIM_2);
- offset = y_offset + (int4)(z_coord * src_stride_z);
- offset = min(offset, (int4)max_offset);
+ offset = select(y_offset + (int4)(z_coord * src_stride_z), (int4)max_offset, (int4)z_coord < 0 || (int4)z_coord >= SRC_DIM_2);
VEC_TYPE(VEC_SIZE)
values3 = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(src_addr + offset.s0));
VEC_TYPE(VEC_SIZE)
@@ -938,8 +935,7 @@ __kernel void dwc_3x3_reshaped_quantized8_nhwc(
// Offset can be out-of-bound so we need to check if it is greater than max_offset
z_coord = z * (int)CONV_STRIDE_Y - (int)CONV_PAD_TOP + DILATION_Y * 2;
z_coord = min((uint)z_coord, (uint)SRC_DIM_2);
- offset = y_offset + (int4)(z_coord * src_stride_z);
- offset = min(offset, (int4)max_offset);
+ offset = select(y_offset + (int4)(z_coord * src_stride_z), (int4)max_offset, (int4)z_coord < 0 || (int4)z_coord >= SRC_DIM_2);
VEC_TYPE(VEC_SIZE)
values6 = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(src_addr + offset.s0));
VEC_TYPE(VEC_SIZE)
@@ -1156,9 +1152,7 @@ __kernel void dwc_3x3_reshaped_quantized8_stride1_nhwc(
// A "-1" 32 bit signed variable converted to unsigned gives 4294967295
z_coord = z * (int)NUM_PLANES_PROCESSED - (int)CONV_PAD_TOP;
z_coord = min((uint)z_coord, (uint)SRC_DIM_2);
- offset = y_offset + (int4)(z_coord * src_stride_z);
- offset = min(offset, (int4)max_offset);
-
+ offset = select(y_offset + (int4)(z_coord * src_stride_z), (int4)max_offset, (int4)z_coord < 0 || (int4)z_coord >= SRC_DIM_2);
VEC_TYPE(VEC_SIZE)
values0 = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(src_addr + offset.s0));
VEC_TYPE(VEC_SIZE)
@@ -1171,8 +1165,7 @@ __kernel void dwc_3x3_reshaped_quantized8_stride1_nhwc(
// z == 1
z_coord = z * (int)NUM_PLANES_PROCESSED - (int)CONV_PAD_TOP + 1;
z_coord = min((uint)z_coord, (uint)SRC_DIM_2);
- offset = y_offset + (int4)(z_coord * src_stride_z);
- offset = min(offset, (int4)max_offset);
+ offset = select(y_offset + (int4)(z_coord * src_stride_z), (int4)max_offset, (int4)z_coord < 0 || (int4)z_coord >= SRC_DIM_2);
VEC_TYPE(VEC_SIZE)
values4 = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(src_addr + offset.s0));
VEC_TYPE(VEC_SIZE)
@@ -1185,8 +1178,7 @@ __kernel void dwc_3x3_reshaped_quantized8_stride1_nhwc(
// z == 2
z_coord = z * (int)NUM_PLANES_PROCESSED - (int)CONV_PAD_TOP + 2;
z_coord = min((uint)z_coord, (uint)SRC_DIM_2);
- offset = y_offset + (int4)(z_coord * src_stride_z);
- offset = min(offset, (int4)max_offset);
+ offset = select(y_offset + (int4)(z_coord * src_stride_z), (int4)max_offset, (int4)z_coord < 0 || (int4)z_coord >= SRC_DIM_2);
VEC_TYPE(VEC_SIZE)
values8 = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(src_addr + offset.s0));
VEC_TYPE(VEC_SIZE)
@@ -1199,8 +1191,7 @@ __kernel void dwc_3x3_reshaped_quantized8_stride1_nhwc(
// z == 3
z_coord = z * (int)NUM_PLANES_PROCESSED - (int)CONV_PAD_TOP + 3;
z_coord = min((uint)z_coord, (uint)SRC_DIM_2);
- offset = y_offset + (int4)(z_coord * src_stride_z);
- offset = min(offset, (int4)max_offset);
+ offset = select(y_offset + (int4)(z_coord * src_stride_z), (int4)max_offset, (int4)z_coord < 0 || (int4)z_coord >= SRC_DIM_2);
VEC_TYPE(VEC_SIZE)
values12 = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(src_addr + offset.s0));
VEC_TYPE(VEC_SIZE)
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp
index 43c3ff3bf..d13afd201 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp
@@ -438,8 +438,8 @@ void CLDepthwiseConvolutionLayer3x3NHWCKernel::run(const Window &window, cl::Com
// |__________________|
// | pad_bottom |
// |******************|
- const int max_offset = _input->info()->strides_in_bytes().z() * _input->info()->dimension(2) - (_input->info()->padding().bottom + _input->info()->padding().top) *
- _input->info()->strides_in_bytes().y();
+ const int max_offset = ((_input->info()->dimension(1) * _input->info()->dimension(2)) + (_input->info()->padding().bottom + _input->info()->padding().top) * (_input->info()->dimension(
+ 2) - 1)) * _input->info()->strides_in_bytes().y();
_kernel.setArg(idx, max_offset);
}
diff --git a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.cpp b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.cpp
index 5633ee5a2..9215fd602 100644
--- a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.cpp
@@ -284,8 +284,8 @@ void CLGEMMLowpMatrixMultiplyNativeKernel::run(const Window &window, cl::Command
if(_input1->info()->num_dimensions() < 3)
{
- // The stride_w for matrix B must be the same as stride_z if we do not slice
- ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != _input1->info()->strides_in_bytes()[2]);
+ // The stride_z for matrix B must be zero if we do not slice
+ ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != 0);
}
Window slice = window.first_slice_window_3D();
diff --git a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.cpp b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.cpp
index 3043e0151..848f272e5 100644
--- a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.cpp
@@ -254,8 +254,8 @@ void CLGEMMLowpMatrixMultiplyReshapedKernel::run(const Window &window, cl::Comma
if(_input1->info()->num_dimensions() < 3)
{
- // The stride_w for matrix B must be the same as stride_z if we do not slice
- ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != _input1->info()->strides_in_bytes()[2]);
+ // The stride_z for matrix B must be zero if we do not slice
+ ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != 0);
}
Window slice = window.first_slice_window_3D();
diff --git a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp
index 0122e3ba4..eba52b08b 100644
--- a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp
@@ -488,8 +488,8 @@ void CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel::run(const Window &window, cl
if(_input1->info()->num_dimensions() < 3)
{
- // The stride_w for matrix B must be the same as stride_z if we do not slice
- ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != _input1->info()->strides_in_bytes()[2]);
+ // The stride_z for matrix B must be zero if we do not slice
+ ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != 0);
}
Window slice = window.first_slice_window_3D();
diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp
index a653e29f8..6d3b1e589 100644
--- a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp
@@ -484,8 +484,8 @@ void CLGEMMMatrixMultiplyKernel::run(const Window &window, cl::CommandQueue &que
if(_input1->info()->num_dimensions() < 3)
{
- // The stride_w for matrix B must be the same as stride_z if we do not slice
- ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != _input1->info()->strides_in_bytes()[2]);
+ // The stride_z for matrix B must be zero if we do not slice
+ ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != 0);
}
Window slice = window.first_slice_window_3D();
diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp
index fefcd2f74..f07166e4b 100644
--- a/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp
@@ -348,8 +348,8 @@ void CLGEMMMatrixMultiplyNativeKernel::run(const Window &window, cl::CommandQueu
if(_input1->info()->num_dimensions() < 3)
{
- // The stride_w for matrix B must be the same as stride_z if we do not slice
- ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != _input1->info()->strides_in_bytes()[2]);
+ // The stride_z for matrix B must be zero if we do not slice
+ ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != 0);
}
Window slice = window.first_slice_window_3D();
diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp
index 8a403555f..9f1ffa48e 100644
--- a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp
@@ -359,8 +359,8 @@ void CLGEMMMatrixMultiplyReshapedKernel::run(const Window &window, cl::CommandQu
if(_input1->info()->num_dimensions() < 3)
{
- // The stride_w for matrix B must be the same as stride_z if we do not slice
- ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != _input1->info()->strides_in_bytes()[2]);
+ // The stride_z for matrix B must be zero if we do not slice
+ ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != 0);
}
Window slice = window.first_slice_window_3D();
diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp
index de986de9f..3dee4f24c 100644
--- a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp
@@ -352,8 +352,8 @@ void CLGEMMMatrixMultiplyReshapedOnlyRHSKernel::run(const Window &window, cl::Co
if(_input1->info()->num_dimensions() < 3)
{
- // The stride_w for matrix B must be the same as stride_z if we do not slice
- ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != _input1->info()->strides_in_bytes()[2]);
+ // The stride_z for matrix B must be zero if we do not slice
+ ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != 0);
}
const size_t lhs_idx_batch_size = _reinterpret_input_as_3d && !_has_pad_y ? 3u : 2u;
diff --git a/src/core/helpers/Utils.h b/src/core/helpers/Utils.h
index d64eddb9a..326dc962c 100644
--- a/src/core/helpers/Utils.h
+++ b/src/core/helpers/Utils.h
@@ -50,30 +50,6 @@ inline Strides compute_strides(const ITensorInfo &info, T stride_x, Ts &&... fix
strides.set(i, shape[i - 1] * strides[i - 1]);
}
- size_t first_zero = std::distance(strides.begin(), std::find_if(strides.begin(), strides.end(), [](uint32_t val)
- {
- return val == 0U;
- }));
-
- if(first_zero > 0)
- {
- if(first_zero == 1)
- {
- strides.set(1, strides[0] * (shape[0] + info.padding().left + info.padding().right));
- ++first_zero;
- }
- else if(first_zero == 2)
- {
- strides.set(2, strides[1] * (shape[1] + info.padding().top + info.padding().bottom));
- ++first_zero;
- }
-
- for(size_t i = first_zero; i < Strides::num_max_dimensions; ++i)
- {
- strides.set(i, strides[first_zero - 1]);
- }
- }
-
return strides;
}
diff --git a/tests/validation/UNIT/TensorInfo.cpp b/tests/validation/UNIT/TensorInfo.cpp
index 44c934238..cf9dfeabe 100644
--- a/tests/validation/UNIT/TensorInfo.cpp
+++ b/tests/validation/UNIT/TensorInfo.cpp
@@ -60,11 +60,11 @@ DATA_TEST_CASE(AutoPadding, framework::DatasetMode::ALL, zip(zip(zip(
PaddingSize{ 4, 36, 4, 4 }})),
framework::dataset::make("Strides", {
Strides{},
- Strides{ 1U, 50U, 50U, 50U, 50U, 50U },
- Strides{ 1U, 50U, 900U, 900U, 900U, 900U },
- Strides{ 1U, 50U, 900U, 900U, 900U, 900U },
- Strides{ 1U, 50U, 900U, 9000U, 9000U, 9000U },
- Strides{ 1U, 50U, 900U, 9000U, 90000U, 90000U },
+ Strides{ 1U, 50U },
+ Strides{ 1U, 50U },
+ Strides{ 1U, 50U, 900U },
+ Strides{ 1U, 50U, 900U, 9000U },
+ Strides{ 1U, 50U, 900U, 9000U, 90000U },
Strides{ 1U, 50U, 900U, 9000U, 90000U, 900000U }})),
framework::dataset::make("Offset", { 0U, 4U, 204U, 204U, 204U, 204U, 204U })),
shape, auto_padding, strides, offset)