diff options
author | Jeff Donahue <jeff.donahue@gmail.com> | 2014-07-03 15:33:12 -0700 |
---|---|---|
committer | Jeff Donahue <jeff.donahue@gmail.com> | 2014-07-03 15:33:12 -0700 |
commit | a2b287472d7d85f997d2621bc4a2486c3837f6ba (patch) | |
tree | 70d4b71526464ad235aa7e2817f4006f14138f74 /src/caffe/layers/conv_layer.cu | |
parent | 872e6c5bc794c8535f4c6d7211f12e8c597dabf7 (diff) | |
download | caffeonacl-a2b287472d7d85f997d2621bc4a2486c3837f6ba.tar.gz caffeonacl-a2b287472d7d85f997d2621bc4a2486c3837f6ba.tar.bz2 caffeonacl-a2b287472d7d85f997d2621bc4a2486c3837f6ba.zip |
ConvolutionLayer can take N bottom blobs and N top blobs
Diffstat (limited to 'src/caffe/layers/conv_layer.cu')
-rw-r--r-- | src/caffe/layers/conv_layer.cu | 124 |
1 files changed, 64 insertions, 60 deletions
diff --git a/src/caffe/layers/conv_layer.cu b/src/caffe/layers/conv_layer.cu index 85f95fd3..71b00c95 100644 --- a/src/caffe/layers/conv_layer.cu +++ b/src/caffe/layers/conv_layer.cu @@ -13,29 +13,31 @@ namespace caffe { template <typename Dtype> Dtype ConvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top) { - const Dtype* bottom_data = bottom[0]->gpu_data(); - Dtype* top_data = (*top)[0]->mutable_gpu_data(); - Dtype* col_data = col_buffer_.mutable_gpu_data(); - const Dtype* weight = this->blobs_[0]->gpu_data(); - int weight_offset = M_ * K_; - int col_offset = K_ * N_; - int top_offset = M_ * N_; - for (int n = 0; n < num_; ++n) { - // First, im2col - im2col_gpu(bottom_data + bottom[0]->offset(n), channels_, height_, - width_, kernel_size_, pad_, stride_, col_data); - // Second, innerproduct with groups - for (int g = 0; g < group_; ++g) { - caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_, - (Dtype)1., weight + weight_offset * g, col_data + col_offset * g, - (Dtype)0., top_data + (*top)[0]->offset(n) + top_offset * g); - } - // third, add bias - if (bias_term_) { - caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_output_, - N_, 1, (Dtype)1., this->blobs_[1]->gpu_data(), - reinterpret_cast<const Dtype*>(bias_multiplier_->gpu_data()), - (Dtype)1., top_data + (*top)[0]->offset(n)); + for (int i = 0; i < bottom.size(); ++i) { + const Dtype* bottom_data = bottom[i]->gpu_data(); + Dtype* top_data = (*top)[i]->mutable_gpu_data(); + Dtype* col_data = col_buffer_.mutable_gpu_data(); + const Dtype* weight = this->blobs_[0]->gpu_data(); + int weight_offset = M_ * K_; + int col_offset = K_ * N_; + int top_offset = M_ * N_; + for (int n = 0; n < num_; ++n) { + // First, im2col + im2col_gpu(bottom_data + bottom[i]->offset(n), channels_, height_, + width_, kernel_size_, pad_, stride_, col_data); + // Second, innerproduct with groups + for (int g = 0; g < group_; ++g) { + caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_, + (Dtype)1., weight + weight_offset * g, col_data + col_offset * g, + (Dtype)0., top_data + (*top)[i]->offset(n) + top_offset * g); + } + // third, add bias + if (bias_term_) { + caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_output_, + N_, 1, (Dtype)1., this->blobs_[1]->gpu_data(), + reinterpret_cast<const Dtype*>(bias_multiplier_->gpu_data()), + (Dtype)1., top_data + (*top)[i]->offset(n)); + } } } return Dtype(0.); @@ -44,56 +46,58 @@ Dtype ConvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom, template <typename Dtype> void ConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom) { - const Dtype* top_diff = top[0]->gpu_diff(); const Dtype* weight = this->blobs_[0]->gpu_data(); Dtype* weight_diff = this->blobs_[0]->mutable_gpu_diff(); - const Dtype* bottom_data = (*bottom)[0]->gpu_data(); - Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff(); + CUDA_CHECK(cudaMemset(weight_diff, 0, + sizeof(Dtype) * this->blobs_[0]->count())); Dtype* col_data = col_buffer_.mutable_gpu_data(); Dtype* col_diff = col_buffer_.mutable_gpu_diff(); - // bias gradient if necessary Dtype* bias_diff = NULL; - if (bias_term_) { bias_diff = this->blobs_[1]->mutable_gpu_diff(); CUDA_CHECK(cudaMemset(bias_diff, 0, sizeof(Dtype) * this->blobs_[1]->count())); - for (int n = 0; n < num_; ++n) { - caffe_gpu_gemv<Dtype>(CblasNoTrans, num_output_, N_, - 1., top_diff + top[0]->offset(n), - reinterpret_cast<const Dtype*>(bias_multiplier_->gpu_data()), - 1., bias_diff); - } } - - int weight_offset = M_ * K_; - int col_offset = K_ * N_; - int top_offset = M_ * N_; - CUDA_CHECK(cudaMemset(weight_diff, 0, - sizeof(Dtype) * this->blobs_[0]->count())); - for (int n = 0; n < num_; ++n) { - // since we saved memory in the forward pass by not storing all col data, - // we will need to recompute them. - im2col_gpu(bottom_data + (*bottom)[0]->offset(n), channels_, height_, - width_, kernel_size_, pad_, stride_, col_data); - // gradient w.r.t. weight. Note that we will accumulate diffs. - for (int g = 0; g < group_; ++g) { - caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_, - (Dtype)1., top_diff + top[0]->offset(n) + top_offset * g, - col_data + col_offset * g, (Dtype)1., - weight_diff + weight_offset * g); + const int weight_offset = M_ * K_; + const int col_offset = K_ * N_; + const int top_offset = M_ * N_; + for (int i = 0; i < top.size(); ++i) { + const Dtype* top_diff = top[i]->gpu_diff(); + const Dtype* bottom_data = (*bottom)[i]->gpu_data(); + Dtype* bottom_diff = (*bottom)[i]->mutable_gpu_diff(); + // Bias gradient, if necessary. + if (bias_term_) { + for (int n = 0; n < num_; ++n) { + caffe_gpu_gemv<Dtype>(CblasNoTrans, num_output_, N_, + 1., top_diff + top[0]->offset(n), + static_cast<const Dtype*>(bias_multiplier_->gpu_data()), + 1., bias_diff); + } } - // gradient w.r.t. bottom data, if necessary - if (propagate_down[0]) { + for (int n = 0; n < num_; ++n) { + // since we saved memory in the forward pass by not storing all col data, + // we will need to recompute them. + im2col_gpu(bottom_data + (*bottom)[i]->offset(n), channels_, height_, + width_, kernel_size_, pad_, stride_, col_data); + // gradient w.r.t. weight. Note that we will accumulate diffs. for (int g = 0; g < group_; ++g) { - caffe_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_, - (Dtype)1., weight + weight_offset * g, - top_diff + top[0]->offset(n) + top_offset * g, - (Dtype)0., col_diff + col_offset * g); + caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_, + (Dtype)1., top_diff + top[i]->offset(n) + top_offset * g, + col_data + col_offset * g, (Dtype)1., + weight_diff + weight_offset * g); + } + // gradient w.r.t. bottom data, if necessary + if (propagate_down[i]) { + for (int g = 0; g < group_; ++g) { + caffe_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_, + (Dtype)1., weight + weight_offset * g, + top_diff + top[i]->offset(n) + top_offset * g, + (Dtype)0., col_diff + col_offset * g); + } + // col2im back to the data + col2im_gpu(col_diff, channels_, height_, width_, kernel_size_, pad_, + stride_, bottom_diff + (*bottom)[i]->offset(n)); } - // col2im back to the data - col2im_gpu(col_diff, channels_, height_, width_, kernel_size_, pad_, - stride_, bottom_diff + (*bottom)[0]->offset(n)); } } } |