diff options
author | Jonathan L Long <jonlong@cs.berkeley.edu> | 2014-12-21 19:42:29 -0800 |
---|---|---|
committer | Jonathan L Long <jonlong@cs.berkeley.edu> | 2015-01-11 00:28:44 -0800 |
commit | e3e2f2d3139880f77355e6837e72ad6c2848b448 (patch) | |
tree | 0430b5a351a3f81f85912872392f1ceef89c1ac5 /src/caffe/layers/conv_layer.cu | |
parent | a0e9db1347c325ff007166e79d1ca693e2e5de18 (diff) | |
download | caffeonacl-e3e2f2d3139880f77355e6837e72ad6c2848b448.tar.gz caffeonacl-e3e2f2d3139880f77355e6837e72ad6c2848b448.tar.bz2 caffeonacl-e3e2f2d3139880f77355e6837e72ad6c2848b448.zip |
rewrite ConvolutionLayer to use BaseConvolutionLayer helpers
Diffstat (limited to 'src/caffe/layers/conv_layer.cu')
-rw-r--r-- | src/caffe/layers/conv_layer.cu | 117 |
1 files changed, 23 insertions, 94 deletions
diff --git a/src/caffe/layers/conv_layer.cu b/src/caffe/layers/conv_layer.cu index af14facb..3902fdf3 100644 --- a/src/caffe/layers/conv_layer.cu +++ b/src/caffe/layers/conv_layer.cu @@ -8,135 +8,64 @@ namespace caffe { -/// @brief refer to CPU forward -- the BLAS implementation is the same. template <typename Dtype> void ConvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { + const Dtype* weight = this->blobs_[0]->gpu_data(); for (int i = 0; i < bottom.size(); ++i) { const Dtype* bottom_data = bottom[i]->gpu_data(); Dtype* top_data = top[i]->mutable_gpu_data(); - Dtype* col_buff = NULL; - if (!is_1x1_) { - col_buff = col_buffer_.mutable_gpu_data(); - } - const Dtype* weight = this->blobs_[0]->gpu_data(); - int weight_offset = M_ * K_; - int col_offset = K_ * N_; - int top_offset = M_ * N_; - for (int n = 0; n < num_; ++n) { - // im2col transformation: unroll input regions for filtering - // into column matrix for multplication. - if (!is_1x1_) { - im2col_gpu(bottom_data + bottom[i]->offset(n), channels_, height_, - width_, kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_, - col_buff); - } else { - col_buff = bottom[i]->mutable_gpu_data() + bottom[i]->offset(n); - } - // Take inner products for groups. - for (int g = 0; g < group_; ++g) { - caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_, - (Dtype)1., weight + weight_offset * g, col_buff + col_offset * g, - (Dtype)0., top_data + top[i]->offset(n) + top_offset * g); - } - // Add bias. - if (bias_term_) { - caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_output_, - N_, 1, (Dtype)1., this->blobs_[1]->gpu_data(), - bias_multiplier_.gpu_data(), - (Dtype)1., top_data + top[i]->offset(n)); + for (int n = 0; n < this->num_; ++n) { + this->forward_gpu_gemm(bottom_data + bottom[i]->offset(n), weight, + top_data + top[i]->offset(n)); + if (this->bias_term_) { + const Dtype* bias = this->blobs_[1]->gpu_data(); + this->forward_gpu_bias(top_data + top[i]->offset(n), bias); } } } } -/// @brief refer to CPU backward -- the BLAS implementation is the same. template <typename Dtype> void ConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { - const Dtype* weight = NULL; - Dtype* weight_diff = NULL; + const Dtype* weight = this->blobs_[0]->gpu_data(); + Dtype* weight_diff = this->blobs_[0]->mutable_gpu_diff(); if (this->param_propagate_down_[0]) { - weight = this->blobs_[0]->gpu_data(); - weight_diff = this->blobs_[0]->mutable_gpu_diff(); caffe_gpu_set(this->blobs_[0]->count(), Dtype(0), weight_diff); } - Dtype* bias_diff = NULL; - if (bias_term_ && this->param_propagate_down_[1]) { - bias_diff = this->blobs_[1]->mutable_gpu_diff(); - caffe_gpu_set(this->blobs_[1]->count(), Dtype(0), bias_diff); + if (this->bias_term_ && this->param_propagate_down_[1]) { + caffe_gpu_set(this->blobs_[1]->count(), Dtype(0), + this->blobs_[1]->mutable_gpu_diff()); } - const int weight_offset = M_ * K_; - const int col_offset = K_ * N_; - const int top_offset = M_ * N_; for (int i = 0; i < top.size(); ++i) { - const Dtype* top_diff = NULL; + const Dtype* top_diff = top[i]->gpu_diff(); // Bias gradient, if necessary. - if (bias_term_ && this->param_propagate_down_[1]) { - top_diff = top[i]->gpu_diff(); - for (int n = 0; n < num_; ++n) { - caffe_gpu_gemv<Dtype>(CblasNoTrans, num_output_, N_, - 1., top_diff + top[0]->offset(n), - bias_multiplier_.gpu_data(), 1., - bias_diff); + if (this->bias_term_ && this->param_propagate_down_[1]) { + Dtype* bias_diff = this->blobs_[1]->mutable_gpu_diff(); + for (int n = 0; n < this->num_; ++n) { + this->backward_gpu_bias(bias_diff, top_diff + top[i]->offset(n)); } } if (this->param_propagate_down_[0] || propagate_down[i]) { - if (!top_diff) { - top_diff = top[i]->gpu_diff(); - } - Dtype* col_buff = NULL; - if (!is_1x1_) { - col_buff = col_buffer_.mutable_gpu_data(); - } const Dtype* bottom_data = bottom[i]->gpu_data(); Dtype* bottom_diff = bottom[i]->mutable_gpu_diff(); - for (int n = 0; n < num_; ++n) { - // Since we saved memory in the forward pass by not storing all col - // data, we will need to recompute them. - if (!is_1x1_) { - im2col_gpu(bottom_data + bottom[i]->offset(n), channels_, height_, - width_, kernel_h_, kernel_w_, pad_h_, pad_w_, - stride_h_, stride_w_, col_buff); - } else { - col_buff = bottom[i]->mutable_gpu_data() + bottom[i]->offset(n); - } + for (int n = 0; n < this->num_; ++n) { // gradient w.r.t. weight. Note that we will accumulate diffs. if (this->param_propagate_down_[0]) { - for (int g = 0; g < group_; ++g) { - caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_, - (Dtype)1., top_diff + top[i]->offset(n) + top_offset * g, - col_buff + col_offset * g, (Dtype)1., - weight_diff + weight_offset * g); - } + this->weight_gpu_gemm(bottom_data + bottom[i]->offset(n), + top_diff + top[i]->offset(n), weight_diff); } - // gradient w.r.t. bottom data, if necessary + // gradient w.r.t. bottom data, if necessary. if (propagate_down[i]) { - if (weight == NULL) { - weight = this->blobs_[0]->gpu_data(); - } - if (is_1x1_) { - col_buff = bottom[i]->mutable_gpu_diff() + bottom[i]->offset(n); - } - for (int g = 0; g < group_; ++g) { - caffe_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_, - (Dtype)1., weight + weight_offset * g, - top_diff + top[i]->offset(n) + top_offset * g, - (Dtype)0., col_buff + col_offset * g); - } - // col2im back to the data - if (!is_1x1_) { - col2im_gpu(col_buff, channels_, height_, width_, - kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_, - bottom_diff + bottom[i]->offset(n)); - } + this->backward_gpu_gemm(top_diff + top[i]->offset(n), weight, + bottom_diff + bottom[i]->offset(n)); } } } } } - INSTANTIATE_LAYER_GPU_FUNCS(ConvolutionLayer); } // namespace caffe |