summaryrefslogtreecommitdiff
path: root/src/caffe/layers/conv_layer.cu
diff options
context:
space:
mode:
authorJonathan L Long <jonlong@cs.berkeley.edu>2014-12-21 19:42:29 -0800
committerJonathan L Long <jonlong@cs.berkeley.edu>2015-01-11 00:28:44 -0800
commite3e2f2d3139880f77355e6837e72ad6c2848b448 (patch)
tree0430b5a351a3f81f85912872392f1ceef89c1ac5 /src/caffe/layers/conv_layer.cu
parenta0e9db1347c325ff007166e79d1ca693e2e5de18 (diff)
downloadcaffeonacl-e3e2f2d3139880f77355e6837e72ad6c2848b448.tar.gz
caffeonacl-e3e2f2d3139880f77355e6837e72ad6c2848b448.tar.bz2
caffeonacl-e3e2f2d3139880f77355e6837e72ad6c2848b448.zip
rewrite ConvolutionLayer to use BaseConvolutionLayer helpers
Diffstat (limited to 'src/caffe/layers/conv_layer.cu')
-rw-r--r--src/caffe/layers/conv_layer.cu117
1 files changed, 23 insertions, 94 deletions
diff --git a/src/caffe/layers/conv_layer.cu b/src/caffe/layers/conv_layer.cu
index af14facb..3902fdf3 100644
--- a/src/caffe/layers/conv_layer.cu
+++ b/src/caffe/layers/conv_layer.cu
@@ -8,135 +8,64 @@
namespace caffe {
-/// @brief refer to CPU forward -- the BLAS implementation is the same.
template <typename Dtype>
void ConvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
+ const Dtype* weight = this->blobs_[0]->gpu_data();
for (int i = 0; i < bottom.size(); ++i) {
const Dtype* bottom_data = bottom[i]->gpu_data();
Dtype* top_data = top[i]->mutable_gpu_data();
- Dtype* col_buff = NULL;
- if (!is_1x1_) {
- col_buff = col_buffer_.mutable_gpu_data();
- }
- const Dtype* weight = this->blobs_[0]->gpu_data();
- int weight_offset = M_ * K_;
- int col_offset = K_ * N_;
- int top_offset = M_ * N_;
- for (int n = 0; n < num_; ++n) {
- // im2col transformation: unroll input regions for filtering
- // into column matrix for multplication.
- if (!is_1x1_) {
- im2col_gpu(bottom_data + bottom[i]->offset(n), channels_, height_,
- width_, kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_,
- col_buff);
- } else {
- col_buff = bottom[i]->mutable_gpu_data() + bottom[i]->offset(n);
- }
- // Take inner products for groups.
- for (int g = 0; g < group_; ++g) {
- caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_,
- (Dtype)1., weight + weight_offset * g, col_buff + col_offset * g,
- (Dtype)0., top_data + top[i]->offset(n) + top_offset * g);
- }
- // Add bias.
- if (bias_term_) {
- caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_output_,
- N_, 1, (Dtype)1., this->blobs_[1]->gpu_data(),
- bias_multiplier_.gpu_data(),
- (Dtype)1., top_data + top[i]->offset(n));
+ for (int n = 0; n < this->num_; ++n) {
+ this->forward_gpu_gemm(bottom_data + bottom[i]->offset(n), weight,
+ top_data + top[i]->offset(n));
+ if (this->bias_term_) {
+ const Dtype* bias = this->blobs_[1]->gpu_data();
+ this->forward_gpu_bias(top_data + top[i]->offset(n), bias);
}
}
}
}
-/// @brief refer to CPU backward -- the BLAS implementation is the same.
template <typename Dtype>
void ConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
- const Dtype* weight = NULL;
- Dtype* weight_diff = NULL;
+ const Dtype* weight = this->blobs_[0]->gpu_data();
+ Dtype* weight_diff = this->blobs_[0]->mutable_gpu_diff();
if (this->param_propagate_down_[0]) {
- weight = this->blobs_[0]->gpu_data();
- weight_diff = this->blobs_[0]->mutable_gpu_diff();
caffe_gpu_set(this->blobs_[0]->count(), Dtype(0), weight_diff);
}
- Dtype* bias_diff = NULL;
- if (bias_term_ && this->param_propagate_down_[1]) {
- bias_diff = this->blobs_[1]->mutable_gpu_diff();
- caffe_gpu_set(this->blobs_[1]->count(), Dtype(0), bias_diff);
+ if (this->bias_term_ && this->param_propagate_down_[1]) {
+ caffe_gpu_set(this->blobs_[1]->count(), Dtype(0),
+ this->blobs_[1]->mutable_gpu_diff());
}
- const int weight_offset = M_ * K_;
- const int col_offset = K_ * N_;
- const int top_offset = M_ * N_;
for (int i = 0; i < top.size(); ++i) {
- const Dtype* top_diff = NULL;
+ const Dtype* top_diff = top[i]->gpu_diff();
// Bias gradient, if necessary.
- if (bias_term_ && this->param_propagate_down_[1]) {
- top_diff = top[i]->gpu_diff();
- for (int n = 0; n < num_; ++n) {
- caffe_gpu_gemv<Dtype>(CblasNoTrans, num_output_, N_,
- 1., top_diff + top[0]->offset(n),
- bias_multiplier_.gpu_data(), 1.,
- bias_diff);
+ if (this->bias_term_ && this->param_propagate_down_[1]) {
+ Dtype* bias_diff = this->blobs_[1]->mutable_gpu_diff();
+ for (int n = 0; n < this->num_; ++n) {
+ this->backward_gpu_bias(bias_diff, top_diff + top[i]->offset(n));
}
}
if (this->param_propagate_down_[0] || propagate_down[i]) {
- if (!top_diff) {
- top_diff = top[i]->gpu_diff();
- }
- Dtype* col_buff = NULL;
- if (!is_1x1_) {
- col_buff = col_buffer_.mutable_gpu_data();
- }
const Dtype* bottom_data = bottom[i]->gpu_data();
Dtype* bottom_diff = bottom[i]->mutable_gpu_diff();
- for (int n = 0; n < num_; ++n) {
- // Since we saved memory in the forward pass by not storing all col
- // data, we will need to recompute them.
- if (!is_1x1_) {
- im2col_gpu(bottom_data + bottom[i]->offset(n), channels_, height_,
- width_, kernel_h_, kernel_w_, pad_h_, pad_w_,
- stride_h_, stride_w_, col_buff);
- } else {
- col_buff = bottom[i]->mutable_gpu_data() + bottom[i]->offset(n);
- }
+ for (int n = 0; n < this->num_; ++n) {
// gradient w.r.t. weight. Note that we will accumulate diffs.
if (this->param_propagate_down_[0]) {
- for (int g = 0; g < group_; ++g) {
- caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_,
- (Dtype)1., top_diff + top[i]->offset(n) + top_offset * g,
- col_buff + col_offset * g, (Dtype)1.,
- weight_diff + weight_offset * g);
- }
+ this->weight_gpu_gemm(bottom_data + bottom[i]->offset(n),
+ top_diff + top[i]->offset(n), weight_diff);
}
- // gradient w.r.t. bottom data, if necessary
+ // gradient w.r.t. bottom data, if necessary.
if (propagate_down[i]) {
- if (weight == NULL) {
- weight = this->blobs_[0]->gpu_data();
- }
- if (is_1x1_) {
- col_buff = bottom[i]->mutable_gpu_diff() + bottom[i]->offset(n);
- }
- for (int g = 0; g < group_; ++g) {
- caffe_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_,
- (Dtype)1., weight + weight_offset * g,
- top_diff + top[i]->offset(n) + top_offset * g,
- (Dtype)0., col_buff + col_offset * g);
- }
- // col2im back to the data
- if (!is_1x1_) {
- col2im_gpu(col_buff, channels_, height_, width_,
- kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_,
- bottom_diff + bottom[i]->offset(n));
- }
+ this->backward_gpu_gemm(top_diff + top[i]->offset(n), weight,
+ bottom_diff + bottom[i]->offset(n));
}
}
}
}
}
-
INSTANTIATE_LAYER_GPU_FUNCS(ConvolutionLayer);
} // namespace caffe