From 6a00ecae67a95cf39e1961aaddc3be1f5a828bb4 Mon Sep 17 00:00:00 2001 From: Jonathan L Long Date: Sun, 20 Sep 2015 15:31:59 -0700 Subject: fix broken conv/deconv reshaping caused by reading bottom shape in LayerSetUp This also eliminates the extra copying of bottom's shape. --- include/caffe/vision_layers.hpp | 7 +++++-- src/caffe/layers/base_conv_layer.cpp | 10 ++-------- src/caffe/layers/conv_layer.cpp | 5 ++--- src/caffe/layers/deconv_layer.cpp | 5 ++--- 4 files changed, 11 insertions(+), 16 deletions(-) diff --git a/include/caffe/vision_layers.hpp b/include/caffe/vision_layers.hpp index eae65820..06bc0457 100644 --- a/include/caffe/vision_layers.hpp +++ b/include/caffe/vision_layers.hpp @@ -58,6 +58,10 @@ class BaseConvolutionLayer : public Layer { void backward_gpu_bias(Dtype* bias, const Dtype* input); #endif + /// @brief The spatial dimensions of the input. + inline int input_shape(int i) { + return (*bottom_shape_)[channel_axis_ + i]; + } // reverse_dimensions should return true iff we are implementing deconv, so // that conv helpers know which dimensions are which. virtual bool reverse_dimensions() = 0; @@ -72,12 +76,11 @@ class BaseConvolutionLayer : public Layer { Blob pad_; /// @brief The spatial dimensions of the convolution input. Blob conv_input_shape_; - /// @brief The spatial dimensions of the input. - Blob input_shape_; /// @brief The spatial dimensions of the col_buffer. vector col_buffer_shape_; /// @brief The spatial dimensions of the output. vector output_shape_; + const vector* bottom_shape_; int num_spatial_axes_; int bottom_dim_; diff --git a/src/caffe/layers/base_conv_layer.cpp b/src/caffe/layers/base_conv_layer.cpp index a5b90a54..c6b47550 100644 --- a/src/caffe/layers/base_conv_layer.cpp +++ b/src/caffe/layers/base_conv_layer.cpp @@ -20,13 +20,7 @@ void BaseConvolutionLayer::LayerSetUp(const vector*>& bottom, const int num_axes = bottom[0]->num_axes(); num_spatial_axes_ = num_axes - first_spatial_axis; CHECK_GE(num_spatial_axes_, 0); - // Setup input dimensions (input_shape_). vector bottom_dim_blob_shape(1, num_spatial_axes_ + 1); - input_shape_.Reshape(bottom_dim_blob_shape); - int* input_shape_data = input_shape_.mutable_cpu_data(); - for (int i = 0; i < num_spatial_axes_ + 1; ++i) { - input_shape_data[i] = bottom[0]->shape(channel_axis_ + i); - } vector spatial_dim_blob_shape(1, std::max(num_spatial_axes_, 1)); // Setup filter kernel dimensions (kernel_shape_). kernel_shape_.Reshape(spatial_dim_blob_shape); @@ -190,6 +184,7 @@ void BaseConvolutionLayer::Reshape(const vector*>& bottom, << "All inputs must have the same shape."; } // Shape the tops. + bottom_shape_ = &bottom[0]->shape(); compute_output_shape(); vector top_shape(bottom[0]->shape().begin(), bottom[0]->shape().begin() + channel_axis_); @@ -223,10 +218,9 @@ void BaseConvolutionLayer::Reshape(const vector*>& bottom, // it goes lazily unused to save memory. col_buffer_shape_.clear(); col_buffer_shape_.push_back(kernel_dim_ * group_); - const int* input_shape_data = input_shape_.cpu_data() + 1; for (int i = 0; i < num_spatial_axes_; ++i) { if (reverse_dimensions()) { - col_buffer_shape_.push_back(input_shape_data[i]); + col_buffer_shape_.push_back(input_shape(i + 1)); } else { col_buffer_shape_.push_back(output_shape_[i]); } diff --git a/src/caffe/layers/conv_layer.cpp b/src/caffe/layers/conv_layer.cpp index 5cf26970..fb50bb09 100644 --- a/src/caffe/layers/conv_layer.cpp +++ b/src/caffe/layers/conv_layer.cpp @@ -10,14 +10,13 @@ namespace caffe { template void ConvolutionLayer::compute_output_shape() { - // input_shape_ + 1 to skip channel axis - const int* input_shape_data = this->input_shape_.cpu_data() + 1; const int* kernel_shape_data = this->kernel_shape_.cpu_data(); const int* stride_data = this->stride_.cpu_data(); const int* pad_data = this->pad_.cpu_data(); this->output_shape_.clear(); for (int i = 0; i < this->num_spatial_axes_; ++i) { - const int input_dim = input_shape_data[i]; + // i + 1 to skip channel axis + const int input_dim = this->input_shape(i + 1); const int output_dim = (input_dim + 2 * pad_data[i] - kernel_shape_data[i]) / stride_data[i] + 1; this->output_shape_.push_back(output_dim); diff --git a/src/caffe/layers/deconv_layer.cpp b/src/caffe/layers/deconv_layer.cpp index f1d1abf2..91aabb31 100644 --- a/src/caffe/layers/deconv_layer.cpp +++ b/src/caffe/layers/deconv_layer.cpp @@ -10,14 +10,13 @@ namespace caffe { template void DeconvolutionLayer::compute_output_shape() { - // input_shape_ + 1 to skip channel axis - const int* input_shape_data = this->input_shape_.cpu_data() + 1; const int* kernel_shape_data = this->kernel_shape_.cpu_data(); const int* stride_data = this->stride_.cpu_data(); const int* pad_data = this->pad_.cpu_data(); this->output_shape_.clear(); for (int i = 0; i < this->num_spatial_axes_; ++i) { - const int input_dim = input_shape_data[i]; + // i + 1 to skip channel axis + const int input_dim = this->input_shape(i + 1); const int output_dim = stride_data[i] * (input_dim - 1) + kernel_shape_data[i] - 2 * pad_data[i]; this->output_shape_.push_back(output_dim); -- cgit v1.2.3