summaryrefslogtreecommitdiff
path: root/src/caffe/layers/cudnn_conv_layer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/caffe/layers/cudnn_conv_layer.cpp')
-rw-r--r--src/caffe/layers/cudnn_conv_layer.cpp46
1 files changed, 27 insertions, 19 deletions
diff --git a/src/caffe/layers/cudnn_conv_layer.cpp b/src/caffe/layers/cudnn_conv_layer.cpp
index 104d2b9d..3514fe2a 100644
--- a/src/caffe/layers/cudnn_conv_layer.cpp
+++ b/src/caffe/layers/cudnn_conv_layer.cpp
@@ -34,14 +34,15 @@ void CuDNNConvolutionLayer<Dtype>::LayerSetUp(
}
// Set the indexing parameters.
- weight_offset_ = (this->num_output_ / this->group_)
- * (this->channels_ / this->group_) * this->kernel_h_ * this->kernel_w_;
bias_offset_ = (this->num_output_ / this->group_);
// Create filter descriptor.
+ const int* kernel_shape_data = this->kernel_shape_.cpu_data();
+ const int kernel_h = kernel_shape_data[0];
+ const int kernel_w = kernel_shape_data[1];
cudnn::createFilterDesc<Dtype>(&filter_desc_,
this->num_output_ / this->group_, this->channels_ / this->group_,
- this->kernel_h_, this->kernel_w_);
+ kernel_h, kernel_w);
// Create tensor descriptor(s) for data and corresponding convolution(s).
for (int i = 0; i < bottom.size(); i++) {
@@ -68,29 +69,36 @@ template <typename Dtype>
void CuDNNConvolutionLayer<Dtype>::Reshape(
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
ConvolutionLayer<Dtype>::Reshape(bottom, top);
- bottom_offset_ = (this->channels_ / this->group_)
- * this->height_ * this->width_;
- top_offset_ = (this->num_output_ / this->group_)
- * this->height_out_ * this->width_out_;
+ CHECK_EQ(2, this->num_spatial_axes_)
+ << "CuDNNConvolution input must have 2 spatial axes "
+ << "(e.g., height and width). "
+ << "Use 'engine: CAFFE' for general ND convolution.";
+ bottom_offset_ = this->bottom_dim_ / this->group_;
+ top_offset_ = this->top_dim_ / this->group_;
+ const int height = bottom[0]->shape(this->channel_axis_ + 1);
+ const int width = bottom[0]->shape(this->channel_axis_ + 2);
+ const int height_out = top[0]->shape(this->channel_axis_ + 1);
+ const int width_out = top[0]->shape(this->channel_axis_ + 2);
+ const int* pad_data = this->pad_.cpu_data();
+ const int pad_h = pad_data[0];
+ const int pad_w = pad_data[1];
+ const int* stride_data = this->stride_.cpu_data();
+ const int stride_h = stride_data[0];
+ const int stride_w = stride_data[1];
for (int i = 0; i < bottom.size(); i++) {
cudnn::setTensor4dDesc<Dtype>(&bottom_descs_[i],
this->num_,
- this->channels_ / this->group_,
- this->height_, this->width_,
- this->channels_ * this->height_ * this->width_,
- this->height_ * this->width_,
- this->width_, 1);
+ this->channels_ / this->group_, height, width,
+ this->channels_ * height * width,
+ height * width, width, 1);
cudnn::setTensor4dDesc<Dtype>(&top_descs_[i],
this->num_,
- this->num_output_ / this->group_,
- this->height_out_, this->width_out_,
- this->num_output_ * this->height_out_ * this->width_out_,
- this->height_out_ * this->width_out_,
- this->width_out_, 1);
+ this->num_output_ / this->group_, height_out, width_out,
+ this->num_output_ * this->out_spatial_dim_,
+ this->out_spatial_dim_, width_out, 1);
cudnn::setConvolutionDesc<Dtype>(&conv_descs_[i], bottom_descs_[i],
- filter_desc_, this->pad_h_, this->pad_w_,
- this->stride_h_, this->stride_w_);
+ filter_desc_, pad_h, pad_w, stride_h, stride_w);
}
// Tensor descriptor for bias.