ConvolutionLayer can take N bottom blobs and N top blobs

author: Jeff Donahue <jeff.donahue@gmail.com> 2014-07-03 15:33:12 -0700
committer: Jeff Donahue <jeff.donahue@gmail.com> 2014-07-03 15:33:12 -0700
commit: a2b287472d7d85f997d2621bc4a2486c3837f6ba (patch)
tree: 70d4b71526464ad235aa7e2817f4006f14138f74 /src/caffe/layers/conv_layer.cu
parent: 872e6c5bc794c8535f4c6d7211f12e8c597dabf7 (diff)
download: caffeonacl-a2b287472d7d85f997d2621bc4a2486c3837f6ba.tar.gz
caffeonacl-a2b287472d7d85f997d2621bc4a2486c3837f6ba.tar.bz2
caffeonacl-a2b287472d7d85f997d2621bc4a2486c3837f6ba.zip
1 files changed, 64 insertions, 60 deletions
diff --git a/src/caffe/layers/conv_layer.cu b/src/caffe/layers/conv_layer.cu
index 85f95fd3..71b00c95 100644
--- a/src/caffe/layers/conv_layer.cu
+++ b/src/caffe/layers/conv_layer.cu
@@ -13,29 +13,31 @@ namespace caffe {
 template <typename Dtype>
 Dtype ConvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top) {
-  const Dtype* bottom_data = bottom[0]->gpu_data();
-  Dtype* top_data = (*top)[0]->mutable_gpu_data();
-  Dtype* col_data = col_buffer_.mutable_gpu_data();
-  const Dtype* weight = this->blobs_[0]->gpu_data();
-  int weight_offset = M_ * K_;
-  int col_offset = K_ * N_;
-  int top_offset = M_ * N_;
-  for (int n = 0; n < num_; ++n) {
-    // First, im2col
-    im2col_gpu(bottom_data + bottom[0]->offset(n), channels_, height_,
-                      width_, kernel_size_, pad_, stride_, col_data);
-    // Second, innerproduct with groups
-    for (int g = 0; g < group_; ++g) {
-      caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_,
-        (Dtype)1., weight + weight_offset * g, col_data + col_offset * g,
-        (Dtype)0., top_data + (*top)[0]->offset(n) + top_offset * g);
-    }
-    // third, add bias
-    if (bias_term_) {
-      caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_output_,
-          N_, 1, (Dtype)1., this->blobs_[1]->gpu_data(),
-          reinterpret_cast<const Dtype*>(bias_multiplier_->gpu_data()),
-          (Dtype)1., top_data + (*top)[0]->offset(n));
+  for (int i = 0; i < bottom.size(); ++i) {
+    const Dtype* bottom_data = bottom[i]->gpu_data();
+    Dtype* top_data = (*top)[i]->mutable_gpu_data();
+    Dtype* col_data = col_buffer_.mutable_gpu_data();
+    const Dtype* weight = this->blobs_[0]->gpu_data();
+    int weight_offset = M_ * K_;
+    int col_offset = K_ * N_;
+    int top_offset = M_ * N_;
+    for (int n = 0; n < num_; ++n) {
+      // First, im2col
+      im2col_gpu(bottom_data + bottom[i]->offset(n), channels_, height_,
+                        width_, kernel_size_, pad_, stride_, col_data);
+      // Second, innerproduct with groups
+      for (int g = 0; g < group_; ++g) {
+        caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_,
+          (Dtype)1., weight + weight_offset * g, col_data + col_offset * g,
+          (Dtype)0., top_data + (*top)[i]->offset(n) + top_offset * g);
+      }
+      // third, add bias
+      if (bias_term_) {
+        caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_output_,
+            N_, 1, (Dtype)1., this->blobs_[1]->gpu_data(),
+            reinterpret_cast<const Dtype*>(bias_multiplier_->gpu_data()),
+            (Dtype)1., top_data + (*top)[i]->offset(n));
+      }
     }
   }
   return Dtype(0.);
@@ -44,56 +46,58 @@ Dtype ConvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
 template <typename Dtype>
 void ConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
       const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom) {
-  const Dtype* top_diff = top[0]->gpu_diff();
   const Dtype* weight = this->blobs_[0]->gpu_data();
   Dtype* weight_diff = this->blobs_[0]->mutable_gpu_diff();
-  const Dtype* bottom_data = (*bottom)[0]->gpu_data();
-  Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff();
+  CUDA_CHECK(cudaMemset(weight_diff, 0,
+      sizeof(Dtype) * this->blobs_[0]->count()));
   Dtype* col_data = col_buffer_.mutable_gpu_data();
   Dtype* col_diff = col_buffer_.mutable_gpu_diff();
-  // bias gradient if necessary
   Dtype* bias_diff = NULL;
-
   if (bias_term_) {
     bias_diff = this->blobs_[1]->mutable_gpu_diff();
     CUDA_CHECK(cudaMemset(bias_diff, 0,
         sizeof(Dtype) * this->blobs_[1]->count()));
-    for (int n = 0; n < num_; ++n) {
-      caffe_gpu_gemv<Dtype>(CblasNoTrans, num_output_, N_,
-          1., top_diff + top[0]->offset(n),
-          reinterpret_cast<const Dtype*>(bias_multiplier_->gpu_data()),
-          1., bias_diff);
-    }
   }
-
-  int weight_offset = M_ * K_;
-  int col_offset = K_ * N_;
-  int top_offset = M_ * N_;
-  CUDA_CHECK(cudaMemset(weight_diff, 0,
-      sizeof(Dtype) * this->blobs_[0]->count()));
-  for (int n = 0; n < num_; ++n) {
-    // since we saved memory in the forward pass by not storing all col data,
-    // we will need to recompute them.
-    im2col_gpu(bottom_data + (*bottom)[0]->offset(n), channels_, height_,
-                      width_, kernel_size_, pad_, stride_, col_data);
-    // gradient w.r.t. weight. Note that we will accumulate diffs.
-    for (int g = 0; g < group_; ++g) {
-      caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_,
-        (Dtype)1., top_diff + top[0]->offset(n) + top_offset * g,
-        col_data + col_offset * g, (Dtype)1.,
-        weight_diff + weight_offset * g);
+  const int weight_offset = M_ * K_;
+  const int col_offset = K_ * N_;
+  const int top_offset = M_ * N_;
+  for (int i = 0; i < top.size(); ++i) {
+    const Dtype* top_diff = top[i]->gpu_diff();
+    const Dtype* bottom_data = (*bottom)[i]->gpu_data();
+    Dtype* bottom_diff = (*bottom)[i]->mutable_gpu_diff();
+    // Bias gradient, if necessary.
+    if (bias_term_) {
+      for (int n = 0; n < num_; ++n) {
+        caffe_gpu_gemv<Dtype>(CblasNoTrans, num_output_, N_,
+            1., top_diff + top[0]->offset(n),
+            static_cast<const Dtype*>(bias_multiplier_->gpu_data()),
+            1., bias_diff);
+      }
     }
-    // gradient w.r.t. bottom data, if necessary
-    if (propagate_down[0]) {
+    for (int n = 0; n < num_; ++n) {
+      // since we saved memory in the forward pass by not storing all col data,
+      // we will need to recompute them.
+      im2col_gpu(bottom_data + (*bottom)[i]->offset(n), channels_, height_,
+                        width_, kernel_size_, pad_, stride_, col_data);
+      // gradient w.r.t. weight. Note that we will accumulate diffs.
       for (int g = 0; g < group_; ++g) {
-        caffe_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_,
-          (Dtype)1., weight + weight_offset * g,
-          top_diff + top[0]->offset(n) + top_offset * g,
-          (Dtype)0., col_diff + col_offset * g);
+        caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_,
+          (Dtype)1., top_diff + top[i]->offset(n) + top_offset * g,
+          col_data + col_offset * g, (Dtype)1.,
+          weight_diff + weight_offset * g);
+      }
+      // gradient w.r.t. bottom data, if necessary
+      if (propagate_down[i]) {
+        for (int g = 0; g < group_; ++g) {
+          caffe_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_,
+            (Dtype)1., weight + weight_offset * g,
+            top_diff + top[i]->offset(n) + top_offset * g,
+            (Dtype)0., col_diff + col_offset * g);
+        }
+        // col2im back to the data
+        col2im_gpu(col_diff, channels_, height_, width_, kernel_size_, pad_,
+            stride_, bottom_diff + (*bottom)[i]->offset(n));
       }
-      // col2im back to the data
-      col2im_gpu(col_diff, channels_, height_, width_, kernel_size_, pad_,
-          stride_, bottom_diff + (*bottom)[0]->offset(n));
     }
   }
 }
author	Jeff Donahue <jeff.donahue@gmail.com>	2014-07-03 15:33:12 -0700
committer	Jeff Donahue <jeff.donahue@gmail.com>	2014-07-03 15:33:12 -0700
commit	a2b287472d7d85f997d2621bc4a2486c3837f6ba (patch)
tree	70d4b71526464ad235aa7e2817f4006f14138f74 /src/caffe/layers/conv_layer.cu
parent	872e6c5bc794c8535f4c6d7211f12e8c597dabf7 (diff)
download	caffeonacl-a2b287472d7d85f997d2621bc4a2486c3837f6ba.tar.gz caffeonacl-a2b287472d7d85f997d2621bc4a2486c3837f6ba.tar.bz2 caffeonacl-a2b287472d7d85f997d2621bc4a2486c3837f6ba.zip