Handle Conv2d layer with implicit output padding in NHWC

Corner cases exist when output top/bottom padding is non-zero for Convolution Layer. This can cause invalid output from the NEGEMMConvolutionLayer as assembly kernel integration does not efficiently handles such cases. As a workaround we always allocate a memory-managed auxiliary tensor which we use as an output for GEMM when padding exists and then we copy to the padded output. If no padding exists we import the output tensor memory to the temporary buffer and perform calculation as we did before. Resolves: COMPMID-4114 Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Change-Id: If82d0e115b8369b91d775895d5315b044306cc74 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5083 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-by: Giorgio Arena <giorgio.arena@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
author: Georgios Pinitas <georgios.pinitas@arm.com> 2021-02-15 20:42:39 +0000
committer: Giorgio Arena <giorgio.arena@arm.com> 2021-02-16 11:59:52 +0000
commit: 274733dbd323321a9c09668e4f60396bef150e39 (patch)
tree: 46395a6fe0d99aecbb2b17d99802b4f134637252
parent: cab1ab92813a346779bacd728ef8d7d4159abac6 (diff)
download: armcl-274733dbd323321a9c09668e4f60396bef150e39.tar.gz
armcl-274733dbd323321a9c09668e4f60396bef150e39.tar.bz2
armcl-274733dbd323321a9c09668e4f60396bef150e39.zip
3 files changed, 33 insertions, 9 deletions
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
index aadc42986..65c2ef7e0 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
@@ -275,10 +275,12 @@ private:
     NEReshapeLayer                                                     _reshape_layer;
 
     const ITensor *_original_weights;
+    const ITensor *_original_output;
 
     Tensor _im2col_output;
     Tensor _weights_reshaped;
     Tensor _gemm_output;
+    Tensor _gemm_output_3d;
     Tensor _tmp_output;
 
     DataLayout _data_layout;
diff --git a/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.h b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.h
index 466e60183..381fa4de3 100644
--- a/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.h
+++ b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -117,7 +117,7 @@ public:
     void run() override;
 
 private:
-    std::unique_ptr<IFallback> _arm_gemm;        /** Interface for the arm_gemm fallback */
+    std::unique_ptr<IFallback> _arm_gemm;        /**< Interface for the arm_gemm fallback */
     MemoryGroup                _memory_group;    /**< Function memory group */
     IWeightsManager           *_weights_manager; /**< Pointer to the weights manager */
 };
diff --git a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
index 8fc788c40..74ef3eef5 100644
--- a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -109,8 +109,8 @@ NEGEMMConvolutionLayer::~NEGEMMConvolutionLayer() = default;
 
 NEGEMMConvolutionLayer::NEGEMMConvolutionLayer(const std::shared_ptr<IMemoryManager> &memory_manager, IWeightsManager *weights_manager)
     : _memory_group(memory_manager), _weights_manager(weights_manager), _reshape_weights(), _reshape_weights_managed(), _im2col_kernel(), _mm_gemm(memory_manager), _mm_gemmlowp(memory_manager),
-      _col2im_kernel(), _reshape_layer(), _original_weights(nullptr), _im2col_output(), _weights_reshaped(), _gemm_output(), _tmp_output(), _data_layout(DataLayout::NCHW), _skip_im2col(false),
-      _skip_col2im(false), _is_quantized(false), _is_prepared(false)
+      _col2im_kernel(), _reshape_layer(), _original_weights(nullptr), _original_output(nullptr), _im2col_output(), _weights_reshaped(), _gemm_output(), _gemm_output_3d(), _tmp_output(),
+      _data_layout(DataLayout::NCHW), _skip_im2col(false), _skip_col2im(false), _is_quantized(false), _is_prepared(false)
 {
 }
 
@@ -281,6 +281,7 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig
 
     _is_prepared      = weights_info.retain_internal_weights();
     _original_weights = weights;
+    _original_output  = output;
     _is_quantized     = is_data_type_quantized_asymmetric(input->info()->data_type());
     _data_layout      = data_layout;
     _skip_im2col      = (data_layout == DataLayout::NHWC && kernel_width == 1 && kernel_height == 1 && conv_info.stride().first == 1 && conv_info.stride().second == 1);
@@ -368,6 +369,15 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig
         // Update GEMM output
         gemm_output_to_use = &_gemm_output;
     }
+    else
+    {
+        _gemm_output.allocator()->init(*output->info());
+        _memory_group.manage(&_gemm_output);
+        _gemm_output_3d.allocator()->init(*output->info());
+
+        // Update GEMM output
+        gemm_output_to_use = &_gemm_output_3d;
+    }
 
     // Configure GEMM
     // In case we need to skip col2im, GEMM3D (gemm_3d_depth != 0) must be called in order to avoid reshaping the output matrix
@@ -393,16 +403,18 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig
             _reshape_layer.configure(gemm_output_to_use, output);
         }
     }
+    else
+    {
+        // Configure reshape layer
+        _reshape_layer.configure(gemm_output_to_use, output);
+    }
 
     if(_is_quantized && !_skip_col2im)
     {
         _tmp_output.allocator()->allocate();
     }
 
-    if(!_skip_col2im || _is_quantized)
-    {
-        _gemm_output.allocator()->allocate();
-    }
+    _gemm_output.allocator()->allocate();
 
     ARM_COMPUTE_ERROR_ON_MSG((output->info()->dimension(idx_width) != conv_w) || (output->info()->dimension(idx_height) != conv_h),
                              "Output shape does not match the expected one");
@@ -554,6 +566,8 @@ void NEGEMMConvolutionLayer::run()
 
     MemoryGroupResourceScope scope_mg(_memory_group);
 
+    bool out_has_padding = _skip_col2im && (_original_output->info()->padding().bottom != 0 || _original_output->info()->padding().top != 0);
+
     if(!_skip_im2col)
     {
         // Run input reshaping
@@ -561,6 +575,10 @@ void NEGEMMConvolutionLayer::run()
         NEScheduler::get().schedule(_im2col_kernel.get(), y_dim);
     }
 
+    // Handle the case where output has top/bottom padding
+    const ITensor *out_to_use = out_has_padding ? &_gemm_output : _original_output;
+    _gemm_output_3d.allocator()->import_memory(out_to_use->buffer());
+
     // Runs NEGEMM or NEGEMMLowpMatrixMultiplyCore functions
     if(_is_quantized)
     {
@@ -585,6 +603,10 @@ void NEGEMMConvolutionLayer::run()
             _reshape_layer.run();
         }
     }
+    else if(out_has_padding)
+    {
+        _reshape_layer.run();
+    }
 }
 
 void NEGEMMConvolutionLayer::prepare()
author	Georgios Pinitas <georgios.pinitas@arm.com>	2021-02-15 20:42:39 +0000
committer	Giorgio Arena <giorgio.arena@arm.com>	2021-02-16 11:59:52 +0000
commit	274733dbd323321a9c09668e4f60396bef150e39 (patch)
tree	46395a6fe0d99aecbb2b17d99802b4f134637252
parent	cab1ab92813a346779bacd728ef8d7d4159abac6 (diff)
download	armcl-274733dbd323321a9c09668e4f60396bef150e39.tar.gz armcl-274733dbd323321a9c09668e4f60396bef150e39.tar.bz2 armcl-274733dbd323321a9c09668e4f60396bef150e39.zip