diff options
Diffstat (limited to 'src/caffe/layers/cudnn_conv_layer.cpp')
-rw-r--r-- | src/caffe/layers/cudnn_conv_layer.cpp | 46 |
1 files changed, 27 insertions, 19 deletions
diff --git a/src/caffe/layers/cudnn_conv_layer.cpp b/src/caffe/layers/cudnn_conv_layer.cpp index 104d2b9d..3514fe2a 100644 --- a/src/caffe/layers/cudnn_conv_layer.cpp +++ b/src/caffe/layers/cudnn_conv_layer.cpp @@ -34,14 +34,15 @@ void CuDNNConvolutionLayer<Dtype>::LayerSetUp( } // Set the indexing parameters. - weight_offset_ = (this->num_output_ / this->group_) - * (this->channels_ / this->group_) * this->kernel_h_ * this->kernel_w_; bias_offset_ = (this->num_output_ / this->group_); // Create filter descriptor. + const int* kernel_shape_data = this->kernel_shape_.cpu_data(); + const int kernel_h = kernel_shape_data[0]; + const int kernel_w = kernel_shape_data[1]; cudnn::createFilterDesc<Dtype>(&filter_desc_, this->num_output_ / this->group_, this->channels_ / this->group_, - this->kernel_h_, this->kernel_w_); + kernel_h, kernel_w); // Create tensor descriptor(s) for data and corresponding convolution(s). for (int i = 0; i < bottom.size(); i++) { @@ -68,29 +69,36 @@ template <typename Dtype> void CuDNNConvolutionLayer<Dtype>::Reshape( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { ConvolutionLayer<Dtype>::Reshape(bottom, top); - bottom_offset_ = (this->channels_ / this->group_) - * this->height_ * this->width_; - top_offset_ = (this->num_output_ / this->group_) - * this->height_out_ * this->width_out_; + CHECK_EQ(2, this->num_spatial_axes_) + << "CuDNNConvolution input must have 2 spatial axes " + << "(e.g., height and width). " + << "Use 'engine: CAFFE' for general ND convolution."; + bottom_offset_ = this->bottom_dim_ / this->group_; + top_offset_ = this->top_dim_ / this->group_; + const int height = bottom[0]->shape(this->channel_axis_ + 1); + const int width = bottom[0]->shape(this->channel_axis_ + 2); + const int height_out = top[0]->shape(this->channel_axis_ + 1); + const int width_out = top[0]->shape(this->channel_axis_ + 2); + const int* pad_data = this->pad_.cpu_data(); + const int pad_h = pad_data[0]; + const int pad_w = pad_data[1]; + const int* stride_data = this->stride_.cpu_data(); + const int stride_h = stride_data[0]; + const int stride_w = stride_data[1]; for (int i = 0; i < bottom.size(); i++) { cudnn::setTensor4dDesc<Dtype>(&bottom_descs_[i], this->num_, - this->channels_ / this->group_, - this->height_, this->width_, - this->channels_ * this->height_ * this->width_, - this->height_ * this->width_, - this->width_, 1); + this->channels_ / this->group_, height, width, + this->channels_ * height * width, + height * width, width, 1); cudnn::setTensor4dDesc<Dtype>(&top_descs_[i], this->num_, - this->num_output_ / this->group_, - this->height_out_, this->width_out_, - this->num_output_ * this->height_out_ * this->width_out_, - this->height_out_ * this->width_out_, - this->width_out_, 1); + this->num_output_ / this->group_, height_out, width_out, + this->num_output_ * this->out_spatial_dim_, + this->out_spatial_dim_, width_out, 1); cudnn::setConvolutionDesc<Dtype>(&conv_descs_[i], bottom_descs_[i], - filter_desc_, this->pad_h_, this->pad_w_, - this->stride_h_, this->stride_w_); + filter_desc_, pad_h, pad_w, stride_h, stride_w); } // Tensor descriptor for bias. |