summaryrefslogtreecommitdiff
path: root/src/caffe/layers/conv_layer.cu
diff options
context:
space:
mode:
authorJeff Donahue <jeff.donahue@gmail.com>2014-07-03 15:33:12 -0700
committerJeff Donahue <jeff.donahue@gmail.com>2014-07-03 15:33:12 -0700
commita2b287472d7d85f997d2621bc4a2486c3837f6ba (patch)
tree70d4b71526464ad235aa7e2817f4006f14138f74 /src/caffe/layers/conv_layer.cu
parent872e6c5bc794c8535f4c6d7211f12e8c597dabf7 (diff)
downloadcaffeonacl-a2b287472d7d85f997d2621bc4a2486c3837f6ba.tar.gz
caffeonacl-a2b287472d7d85f997d2621bc4a2486c3837f6ba.tar.bz2
caffeonacl-a2b287472d7d85f997d2621bc4a2486c3837f6ba.zip
ConvolutionLayer can take N bottom blobs and N top blobs
Diffstat (limited to 'src/caffe/layers/conv_layer.cu')
-rw-r--r--src/caffe/layers/conv_layer.cu124
1 files changed, 64 insertions, 60 deletions
diff --git a/src/caffe/layers/conv_layer.cu b/src/caffe/layers/conv_layer.cu
index 85f95fd3..71b00c95 100644
--- a/src/caffe/layers/conv_layer.cu
+++ b/src/caffe/layers/conv_layer.cu
@@ -13,29 +13,31 @@ namespace caffe {
template <typename Dtype>
Dtype ConvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top) {
- const Dtype* bottom_data = bottom[0]->gpu_data();
- Dtype* top_data = (*top)[0]->mutable_gpu_data();
- Dtype* col_data = col_buffer_.mutable_gpu_data();
- const Dtype* weight = this->blobs_[0]->gpu_data();
- int weight_offset = M_ * K_;
- int col_offset = K_ * N_;
- int top_offset = M_ * N_;
- for (int n = 0; n < num_; ++n) {
- // First, im2col
- im2col_gpu(bottom_data + bottom[0]->offset(n), channels_, height_,
- width_, kernel_size_, pad_, stride_, col_data);
- // Second, innerproduct with groups
- for (int g = 0; g < group_; ++g) {
- caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_,
- (Dtype)1., weight + weight_offset * g, col_data + col_offset * g,
- (Dtype)0., top_data + (*top)[0]->offset(n) + top_offset * g);
- }
- // third, add bias
- if (bias_term_) {
- caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_output_,
- N_, 1, (Dtype)1., this->blobs_[1]->gpu_data(),
- reinterpret_cast<const Dtype*>(bias_multiplier_->gpu_data()),
- (Dtype)1., top_data + (*top)[0]->offset(n));
+ for (int i = 0; i < bottom.size(); ++i) {
+ const Dtype* bottom_data = bottom[i]->gpu_data();
+ Dtype* top_data = (*top)[i]->mutable_gpu_data();
+ Dtype* col_data = col_buffer_.mutable_gpu_data();
+ const Dtype* weight = this->blobs_[0]->gpu_data();
+ int weight_offset = M_ * K_;
+ int col_offset = K_ * N_;
+ int top_offset = M_ * N_;
+ for (int n = 0; n < num_; ++n) {
+ // First, im2col
+ im2col_gpu(bottom_data + bottom[i]->offset(n), channels_, height_,
+ width_, kernel_size_, pad_, stride_, col_data);
+ // Second, innerproduct with groups
+ for (int g = 0; g < group_; ++g) {
+ caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_,
+ (Dtype)1., weight + weight_offset * g, col_data + col_offset * g,
+ (Dtype)0., top_data + (*top)[i]->offset(n) + top_offset * g);
+ }
+ // third, add bias
+ if (bias_term_) {
+ caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_output_,
+ N_, 1, (Dtype)1., this->blobs_[1]->gpu_data(),
+ reinterpret_cast<const Dtype*>(bias_multiplier_->gpu_data()),
+ (Dtype)1., top_data + (*top)[i]->offset(n));
+ }
}
}
return Dtype(0.);
@@ -44,56 +46,58 @@ Dtype ConvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
template <typename Dtype>
void ConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom) {
- const Dtype* top_diff = top[0]->gpu_diff();
const Dtype* weight = this->blobs_[0]->gpu_data();
Dtype* weight_diff = this->blobs_[0]->mutable_gpu_diff();
- const Dtype* bottom_data = (*bottom)[0]->gpu_data();
- Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff();
+ CUDA_CHECK(cudaMemset(weight_diff, 0,
+ sizeof(Dtype) * this->blobs_[0]->count()));
Dtype* col_data = col_buffer_.mutable_gpu_data();
Dtype* col_diff = col_buffer_.mutable_gpu_diff();
- // bias gradient if necessary
Dtype* bias_diff = NULL;
-
if (bias_term_) {
bias_diff = this->blobs_[1]->mutable_gpu_diff();
CUDA_CHECK(cudaMemset(bias_diff, 0,
sizeof(Dtype) * this->blobs_[1]->count()));
- for (int n = 0; n < num_; ++n) {
- caffe_gpu_gemv<Dtype>(CblasNoTrans, num_output_, N_,
- 1., top_diff + top[0]->offset(n),
- reinterpret_cast<const Dtype*>(bias_multiplier_->gpu_data()),
- 1., bias_diff);
- }
}
-
- int weight_offset = M_ * K_;
- int col_offset = K_ * N_;
- int top_offset = M_ * N_;
- CUDA_CHECK(cudaMemset(weight_diff, 0,
- sizeof(Dtype) * this->blobs_[0]->count()));
- for (int n = 0; n < num_; ++n) {
- // since we saved memory in the forward pass by not storing all col data,
- // we will need to recompute them.
- im2col_gpu(bottom_data + (*bottom)[0]->offset(n), channels_, height_,
- width_, kernel_size_, pad_, stride_, col_data);
- // gradient w.r.t. weight. Note that we will accumulate diffs.
- for (int g = 0; g < group_; ++g) {
- caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_,
- (Dtype)1., top_diff + top[0]->offset(n) + top_offset * g,
- col_data + col_offset * g, (Dtype)1.,
- weight_diff + weight_offset * g);
+ const int weight_offset = M_ * K_;
+ const int col_offset = K_ * N_;
+ const int top_offset = M_ * N_;
+ for (int i = 0; i < top.size(); ++i) {
+ const Dtype* top_diff = top[i]->gpu_diff();
+ const Dtype* bottom_data = (*bottom)[i]->gpu_data();
+ Dtype* bottom_diff = (*bottom)[i]->mutable_gpu_diff();
+ // Bias gradient, if necessary.
+ if (bias_term_) {
+ for (int n = 0; n < num_; ++n) {
+ caffe_gpu_gemv<Dtype>(CblasNoTrans, num_output_, N_,
+ 1., top_diff + top[0]->offset(n),
+ static_cast<const Dtype*>(bias_multiplier_->gpu_data()),
+ 1., bias_diff);
+ }
}
- // gradient w.r.t. bottom data, if necessary
- if (propagate_down[0]) {
+ for (int n = 0; n < num_; ++n) {
+ // since we saved memory in the forward pass by not storing all col data,
+ // we will need to recompute them.
+ im2col_gpu(bottom_data + (*bottom)[i]->offset(n), channels_, height_,
+ width_, kernel_size_, pad_, stride_, col_data);
+ // gradient w.r.t. weight. Note that we will accumulate diffs.
for (int g = 0; g < group_; ++g) {
- caffe_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_,
- (Dtype)1., weight + weight_offset * g,
- top_diff + top[0]->offset(n) + top_offset * g,
- (Dtype)0., col_diff + col_offset * g);
+ caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_,
+ (Dtype)1., top_diff + top[i]->offset(n) + top_offset * g,
+ col_data + col_offset * g, (Dtype)1.,
+ weight_diff + weight_offset * g);
+ }
+ // gradient w.r.t. bottom data, if necessary
+ if (propagate_down[i]) {
+ for (int g = 0; g < group_; ++g) {
+ caffe_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_,
+ (Dtype)1., weight + weight_offset * g,
+ top_diff + top[i]->offset(n) + top_offset * g,
+ (Dtype)0., col_diff + col_offset * g);
+ }
+ // col2im back to the data
+ col2im_gpu(col_diff, channels_, height_, width_, kernel_size_, pad_,
+ stride_, bottom_diff + (*bottom)[i]->offset(n));
}
- // col2im back to the data
- col2im_gpu(col_diff, channels_, height_, width_, kernel_size_, pad_,
- stride_, bottom_diff + (*bottom)[0]->offset(n));
}
}
}