summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichele Di Giorgio <michele.digiorgio@arm.com>2021-02-02 10:54:26 +0000
committerMichele Di Giorgio <michele.digiorgio@arm.com>2021-02-02 14:33:51 +0000
commit74a142c11ec0b2f2b3fe1feb3fdfd98e9190762e (patch)
tree29e769901e8c6841c9926ea88ef7fa7db6be53ec
parentea857273d8b4a94fb7f1e63ce9068a60259fb9d3 (diff)
downloadarmcl-74a142c11ec0b2f2b3fe1feb3fdfd98e9190762e.tar.gz
armcl-74a142c11ec0b2f2b3fe1feb3fdfd98e9190762e.tar.bz2
armcl-74a142c11ec0b2f2b3fe1feb3fdfd98e9190762e.zip
Account for eventual padding in Pooling assembly kernels
Resolves: COMPMID-4217 Change-Id: I8426a494ab0d3c6479de977cdab803d35bbe1e40 Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4967 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: TeresaARM <teresa.charlinreyes@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--src/core/NEON/kernels/assembly/NEPoolingAssemblyWrapperKernel.cpp16
1 files changed, 15 insertions, 1 deletions
diff --git a/src/core/NEON/kernels/assembly/NEPoolingAssemblyWrapperKernel.cpp b/src/core/NEON/kernels/assembly/NEPoolingAssemblyWrapperKernel.cpp
index 7e58eaf44..04406663f 100644
--- a/src/core/NEON/kernels/assembly/NEPoolingAssemblyWrapperKernel.cpp
+++ b/src/core/NEON/kernels/assembly/NEPoolingAssemblyWrapperKernel.cpp
@@ -148,7 +148,21 @@ void NEPoolingAssemblyWrapperKernel::run_op(ITensorPack &tensors, const Window &
auto out_ptr = output->buffer() + output->info()->offset_first_element_in_bytes();
auto working_space = workspace->buffer() + workspace->info()->offset_first_element_in_bytes();
- _kernel_asm->execute(in_ptr, out_ptr, working_space, info.thread_id, info.num_threads);
+ const auto input_shape = input->info()->tensor_shape();
+ const auto output_shape = output->info()->tensor_shape();
+ const auto input_padding = input->info()->padding();
+ const auto output_padding = output->info()->padding();
+
+ const size_t ld_input_col = input_shape[0] + input_padding.left + input_padding.right;
+ const size_t ld_input_row = ld_input_col * (input_shape[1] + input_padding.top + input_padding.bottom);
+ const size_t ld_input_batch = ld_input_row * input_shape[2];
+ const size_t ld_output_col = output_shape[0] + output_padding.right;
+ const size_t ld_output_row = ld_output_col * (output_shape[1] + output_padding.top + output_padding.bottom);
+ const size_t ld_output_batch = ld_output_row * output_shape[2];
+
+ _kernel_asm->execute(in_ptr, ld_input_col, ld_input_row, ld_input_batch,
+ out_ptr, ld_output_col, ld_output_row, ld_output_batch,
+ working_space, info.thread_id, info.num_threads);
}
size_t NEPoolingAssemblyWrapperKernel::get_working_size(unsigned int num_threads) const