summaryrefslogtreecommitdiff
path: root/src/caffe/layers
diff options
context:
space:
mode:
Diffstat (limited to 'src/caffe/layers')
-rw-r--r--src/caffe/layers/bnll_layer.cpp48
-rw-r--r--src/caffe/layers/bnll_layer.cu31
-rw-r--r--src/caffe/layers/conv_layer.cpp88
-rw-r--r--src/caffe/layers/conv_layer.cu104
-rw-r--r--src/caffe/layers/data_layer.cpp23
-rw-r--r--src/caffe/layers/data_layer.cu44
-rw-r--r--src/caffe/layers/dropout_layer.cpp63
-rw-r--r--src/caffe/layers/dropout_layer.cu48
-rw-r--r--src/caffe/layers/flatten_layer.cpp18
-rw-r--r--src/caffe/layers/flatten_layer.cu30
-rw-r--r--src/caffe/layers/hdf5_data_layer.cpp28
-rw-r--r--src/caffe/layers/hdf5_data_layer.cu53
-rw-r--r--src/caffe/layers/im2col_layer.cpp24
-rw-r--r--src/caffe/layers/im2col_layer.cu38
-rw-r--r--src/caffe/layers/inner_product_layer.cpp40
-rw-r--r--src/caffe/layers/inner_product_layer.cu59
-rw-r--r--src/caffe/layers/loss_layer.cpp (renamed from src/caffe/layers/loss_layer.cu)6
-rw-r--r--src/caffe/layers/relu_layer.cpp42
-rw-r--r--src/caffe/layers/relu_layer.cu27
-rw-r--r--src/caffe/layers/sigmoid_layer.cpp46
-rw-r--r--src/caffe/layers/sigmoid_layer.cu35
-rw-r--r--src/caffe/layers/softmax_layer.cpp86
-rw-r--r--src/caffe/layers/softmax_layer.cu71
-rw-r--r--src/caffe/layers/softmax_loss_layer.cpp59
-rw-r--r--src/caffe/layers/softmax_loss_layer.cu40
-rw-r--r--src/caffe/layers/split_layer.cpp34
-rw-r--r--src/caffe/layers/split_layer.cu48
-rw-r--r--src/caffe/layers/tanh_layer.cpp48
-rw-r--r--src/caffe/layers/tanh_layer.cu33
29 files changed, 771 insertions, 543 deletions
diff --git a/src/caffe/layers/bnll_layer.cpp b/src/caffe/layers/bnll_layer.cpp
new file mode 100644
index 00000000..ab0e0f09
--- /dev/null
+++ b/src/caffe/layers/bnll_layer.cpp
@@ -0,0 +1,48 @@
+// Copyright 2013 Yangqing Jia
+
+#include "caffe/layer.hpp"
+#include "caffe/vision_layers.hpp"
+#include <algorithm>
+
+using std::min;
+
+namespace caffe {
+
+const float kBNLL_THRESHOLD = 50.;
+
+template <typename Dtype>
+void BNLLLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+ vector<Blob<Dtype>*>* top) {
+ const Dtype* bottom_data = bottom[0]->cpu_data();
+ Dtype* top_data = (*top)[0]->mutable_cpu_data();
+ const int count = bottom[0]->count();
+ for (int i = 0; i < count; ++i) {
+ top_data[i] = bottom_data[i] > 0 ?
+ bottom_data[i] + log(1. + exp(-bottom_data[i])) :
+ log(1. + exp(bottom_data[i]));
+ }
+}
+
+template <typename Dtype>
+Dtype BNLLLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
+ const bool propagate_down,
+ vector<Blob<Dtype>*>* bottom) {
+ if (propagate_down) {
+ const Dtype* bottom_data = (*bottom)[0]->cpu_data();
+ const Dtype* top_diff = top[0]->cpu_diff();
+ Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
+ const int count = (*bottom)[0]->count();
+ Dtype expval;
+ for (int i = 0; i < count; ++i) {
+ expval = exp(min(bottom_data[i], Dtype(kBNLL_THRESHOLD)));
+ bottom_diff[i] = top_diff[i] * expval / (expval + 1.);
+ }
+ }
+ return Dtype(0);
+}
+
+
+INSTANTIATE_CLASS(BNLLLayer);
+
+
+} // namespace caffe
diff --git a/src/caffe/layers/bnll_layer.cu b/src/caffe/layers/bnll_layer.cu
index f61cffa9..1edec33a 100644
--- a/src/caffe/layers/bnll_layer.cu
+++ b/src/caffe/layers/bnll_layer.cu
@@ -13,37 +13,6 @@ namespace caffe {
const float kBNLL_THRESHOLD = 50.;
template <typename Dtype>
-void BNLLLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
- vector<Blob<Dtype>*>* top) {
- const Dtype* bottom_data = bottom[0]->cpu_data();
- Dtype* top_data = (*top)[0]->mutable_cpu_data();
- const int count = bottom[0]->count();
- for (int i = 0; i < count; ++i) {
- top_data[i] = bottom_data[i] > 0 ?
- bottom_data[i] + log(1. + exp(-bottom_data[i])) :
- log(1. + exp(bottom_data[i]));
- }
-}
-
-template <typename Dtype>
-Dtype BNLLLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
- const bool propagate_down,
- vector<Blob<Dtype>*>* bottom) {
- if (propagate_down) {
- const Dtype* bottom_data = (*bottom)[0]->cpu_data();
- const Dtype* top_diff = top[0]->cpu_diff();
- Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
- const int count = (*bottom)[0]->count();
- Dtype expval;
- for (int i = 0; i < count; ++i) {
- expval = exp(min(bottom_data[i], Dtype(kBNLL_THRESHOLD)));
- bottom_diff[i] = top_diff[i] * expval / (expval + 1.);
- }
- }
- return Dtype(0);
-}
-
-template <typename Dtype>
__global__ void BNLLForward(const int n, const Dtype* in, Dtype* out) {
int index = threadIdx.x + blockIdx.x * blockDim.x;
if (index < n) {
diff --git a/src/caffe/layers/conv_layer.cpp b/src/caffe/layers/conv_layer.cpp
index 21256f90..64a652a8 100644
--- a/src/caffe/layers/conv_layer.cpp
+++ b/src/caffe/layers/conv_layer.cpp
@@ -107,36 +107,6 @@ void ConvolutionLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
}
template <typename Dtype>
-void ConvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
- vector<Blob<Dtype>*>* top) {
- const Dtype* bottom_data = bottom[0]->gpu_data();
- Dtype* top_data = (*top)[0]->mutable_gpu_data();
- Dtype* col_data = col_buffer_.mutable_gpu_data();
- const Dtype* weight = this->blobs_[0]->gpu_data();
- int weight_offset = M_ * K_;
- int col_offset = K_ * N_;
- int top_offset = M_ * N_;
- for (int n = 0; n < NUM_; ++n) {
- // First, im2col
- im2col_gpu(bottom_data + bottom[0]->offset(n), CHANNELS_, HEIGHT_,
- WIDTH_, KSIZE_, PAD_, STRIDE_, col_data);
- // Second, innerproduct with groups
- for (int g = 0; g < GROUP_; ++g) {
- caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_,
- (Dtype)1., weight + weight_offset * g, col_data + col_offset * g,
- (Dtype)0., top_data + (*top)[0]->offset(n) + top_offset * g);
- }
- // third, add bias
- if (biasterm_) {
- caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, NUM_OUTPUT_,
- N_, 1, (Dtype)1., this->blobs_[1]->gpu_data(),
- reinterpret_cast<const Dtype*>(bias_multiplier_->gpu_data()),
- (Dtype)1., top_data + (*top)[0]->offset(n));
- }
- }
-}
-
-template <typename Dtype>
Dtype ConvolutionLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
const Dtype* top_diff = top[0]->cpu_diff();
@@ -192,64 +162,6 @@ Dtype ConvolutionLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
return Dtype(0.);
}
-template <typename Dtype>
-Dtype ConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
- const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
- const Dtype* top_diff = top[0]->gpu_diff();
- const Dtype* weight = this->blobs_[0]->gpu_data();
- Dtype* weight_diff = this->blobs_[0]->mutable_gpu_diff();
- const Dtype* bottom_data = (*bottom)[0]->gpu_data();
- Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff();
- Dtype* col_data = col_buffer_.mutable_gpu_data();
- Dtype* col_diff = col_buffer_.mutable_gpu_diff();
- // bias gradient if necessary
- Dtype* bias_diff = NULL;
-
- if (biasterm_) {
- bias_diff = this->blobs_[1]->mutable_gpu_diff();
- CUDA_CHECK(cudaMemset(bias_diff, 0,
- sizeof(Dtype) * this->blobs_[1]->count()));
- for (int n = 0; n < NUM_; ++n) {
- caffe_gpu_gemv<Dtype>(CblasNoTrans, NUM_OUTPUT_, N_,
- 1., top_diff + top[0]->offset(n),
- reinterpret_cast<const Dtype*>(bias_multiplier_->gpu_data()),
- 1., bias_diff);
- }
- }
-
- int weight_offset = M_ * K_;
- int col_offset = K_ * N_;
- int top_offset = M_ * N_;
- CUDA_CHECK(cudaMemset(weight_diff, 0,
- sizeof(Dtype) * this->blobs_[0]->count()));
- for (int n = 0; n < NUM_; ++n) {
- // since we saved memory in the forward pass by not storing all col data,
- // we will need to recompute them.
- im2col_gpu(bottom_data + (*bottom)[0]->offset(n), CHANNELS_, HEIGHT_,
- WIDTH_, KSIZE_, PAD_, STRIDE_, col_data);
- // gradient w.r.t. weight. Note that we will accumulate diffs.
- for (int g = 0; g < GROUP_; ++g) {
- caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_,
- (Dtype)1., top_diff + top[0]->offset(n) + top_offset * g,
- col_data + col_offset * g, (Dtype)1.,
- weight_diff + weight_offset * g);
- }
- // gradient w.r.t. bottom data, if necessary
- if (propagate_down) {
- for (int g = 0; g < GROUP_; ++g) {
- caffe_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_,
- (Dtype)1., weight + weight_offset * g,
- top_diff + top[0]->offset(n) + top_offset * g,
- (Dtype)0., col_diff + col_offset * g);
- }
- // col2im back to the data
- col2im_gpu(col_diff, CHANNELS_, HEIGHT_, WIDTH_, KSIZE_, PAD_, STRIDE_,
- bottom_diff + (*bottom)[0]->offset(n));
- }
- }
- return Dtype(0.);
-}
-
INSTANTIATE_CLASS(ConvolutionLayer);
} // namespace caffe
diff --git a/src/caffe/layers/conv_layer.cu b/src/caffe/layers/conv_layer.cu
new file mode 100644
index 00000000..a7f56faa
--- /dev/null
+++ b/src/caffe/layers/conv_layer.cu
@@ -0,0 +1,104 @@
+// Copyright 2013 Yangqing Jia
+
+#include <vector>
+
+#include "caffe/layer.hpp"
+#include "caffe/vision_layers.hpp"
+#include "caffe/util/im2col.hpp"
+#include "caffe/filler.hpp"
+#include "caffe/util/math_functions.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void ConvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+ vector<Blob<Dtype>*>* top) {
+ const Dtype* bottom_data = bottom[0]->gpu_data();
+ Dtype* top_data = (*top)[0]->mutable_gpu_data();
+ Dtype* col_data = col_buffer_.mutable_gpu_data();
+ const Dtype* weight = this->blobs_[0]->gpu_data();
+ int weight_offset = M_ * K_;
+ int col_offset = K_ * N_;
+ int top_offset = M_ * N_;
+ for (int n = 0; n < NUM_; ++n) {
+ // First, im2col
+ im2col_gpu(bottom_data + bottom[0]->offset(n), CHANNELS_, HEIGHT_,
+ WIDTH_, KSIZE_, PAD_, STRIDE_, col_data);
+ // Second, innerproduct with groups
+ for (int g = 0; g < GROUP_; ++g) {
+ caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_,
+ (Dtype)1., weight + weight_offset * g, col_data + col_offset * g,
+ (Dtype)0., top_data + (*top)[0]->offset(n) + top_offset * g);
+ }
+ // third, add bias
+ if (biasterm_) {
+ caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, NUM_OUTPUT_,
+ N_, 1, (Dtype)1., this->blobs_[1]->gpu_data(),
+ reinterpret_cast<const Dtype*>(bias_multiplier_->gpu_data()),
+ (Dtype)1., top_data + (*top)[0]->offset(n));
+ }
+ }
+}
+
+template <typename Dtype>
+Dtype ConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
+ const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
+ const Dtype* top_diff = top[0]->gpu_diff();
+ const Dtype* weight = this->blobs_[0]->gpu_data();
+ Dtype* weight_diff = this->blobs_[0]->mutable_gpu_diff();
+ const Dtype* bottom_data = (*bottom)[0]->gpu_data();
+ Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff();
+ Dtype* col_data = col_buffer_.mutable_gpu_data();
+ Dtype* col_diff = col_buffer_.mutable_gpu_diff();
+ // bias gradient if necessary
+ Dtype* bias_diff = NULL;
+
+ if (biasterm_) {
+ bias_diff = this->blobs_[1]->mutable_gpu_diff();
+ CUDA_CHECK(cudaMemset(bias_diff, 0,
+ sizeof(Dtype) * this->blobs_[1]->count()));
+ for (int n = 0; n < NUM_; ++n) {
+ caffe_gpu_gemv<Dtype>(CblasNoTrans, NUM_OUTPUT_, N_,
+ 1., top_diff + top[0]->offset(n),
+ reinterpret_cast<const Dtype*>(bias_multiplier_->gpu_data()),
+ 1., bias_diff);
+ }
+ }
+
+ int weight_offset = M_ * K_;
+ int col_offset = K_ * N_;
+ int top_offset = M_ * N_;
+ CUDA_CHECK(cudaMemset(weight_diff, 0,
+ sizeof(Dtype) * this->blobs_[0]->count()));
+ for (int n = 0; n < NUM_; ++n) {
+ // since we saved memory in the forward pass by not storing all col data,
+ // we will need to recompute them.
+ im2col_gpu(bottom_data + (*bottom)[0]->offset(n), CHANNELS_, HEIGHT_,
+ WIDTH_, KSIZE_, PAD_, STRIDE_, col_data);
+ // gradient w.r.t. weight. Note that we will accumulate diffs.
+ for (int g = 0; g < GROUP_; ++g) {
+ caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_,
+ (Dtype)1., top_diff + top[0]->offset(n) + top_offset * g,
+ col_data + col_offset * g, (Dtype)1.,
+ weight_diff + weight_offset * g);
+ }
+ // gradient w.r.t. bottom data, if necessary
+ if (propagate_down) {
+ for (int g = 0; g < GROUP_; ++g) {
+ caffe_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_,
+ (Dtype)1., weight + weight_offset * g,
+ top_diff + top[0]->offset(n) + top_offset * g,
+ (Dtype)0., col_diff + col_offset * g);
+ }
+ // col2im back to the data
+ col2im_gpu(col_diff, CHANNELS_, HEIGHT_, WIDTH_, KSIZE_, PAD_, STRIDE_,
+ bottom_diff + (*bottom)[0]->offset(n));
+ }
+ }
+ return Dtype(0.);
+}
+
+
+INSTANTIATE_CLASS(ConvolutionLayer);
+
+} // namespace caffe
diff --git a/src/caffe/layers/data_layer.cpp b/src/caffe/layers/data_layer.cpp
index f973a565..cc03cdbf 100644
--- a/src/caffe/layers/data_layer.cpp
+++ b/src/caffe/layers/data_layer.cpp
@@ -227,23 +227,6 @@ void DataLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
reinterpret_cast<void*>(this))) << "Pthread execution failed.";
}
-template <typename Dtype>
-void DataLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
- vector<Blob<Dtype>*>* top) {
- // First, join the thread
- CHECK(!pthread_join(thread_, NULL)) << "Pthread joining failed.";
- // Copy the data
- CUDA_CHECK(cudaMemcpy((*top)[0]->mutable_gpu_data(),
- prefetch_data_->cpu_data(), sizeof(Dtype) * prefetch_data_->count(),
- cudaMemcpyHostToDevice));
- CUDA_CHECK(cudaMemcpy((*top)[1]->mutable_gpu_data(),
- prefetch_label_->cpu_data(), sizeof(Dtype) * prefetch_label_->count(),
- cudaMemcpyHostToDevice));
- // Start a new prefetch thread
- CHECK(!pthread_create(&thread_, NULL, DataLayerPrefetch<Dtype>,
- reinterpret_cast<void*>(this))) << "Pthread execution failed.";
-}
-
// The backward operations are dummy - they do not carry any computation.
template <typename Dtype>
Dtype DataLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
@@ -251,12 +234,6 @@ Dtype DataLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
return Dtype(0.);
}
-template <typename Dtype>
-Dtype DataLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
- const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
- return Dtype(0.);
-}
-
INSTANTIATE_CLASS(DataLayer);
} // namespace caffe
diff --git a/src/caffe/layers/data_layer.cu b/src/caffe/layers/data_layer.cu
new file mode 100644
index 00000000..946f30f3
--- /dev/null
+++ b/src/caffe/layers/data_layer.cu
@@ -0,0 +1,44 @@
+// Copyright 2013 Yangqing Jia
+
+#include <stdint.h>
+#include <leveldb/db.h>
+#include <pthread.h>
+
+#include <string>
+#include <vector>
+
+#include "caffe/layer.hpp"
+#include "caffe/util/io.hpp"
+#include "caffe/vision_layers.hpp"
+
+using std::string;
+
+namespace caffe {
+
+template <typename Dtype>
+void DataLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+ vector<Blob<Dtype>*>* top) {
+ // First, join the thread
+ CHECK(!pthread_join(thread_, NULL)) << "Pthread joining failed.";
+ // Copy the data
+ CUDA_CHECK(cudaMemcpy((*top)[0]->mutable_gpu_data(),
+ prefetch_data_->cpu_data(), sizeof(Dtype) * prefetch_data_->count(),
+ cudaMemcpyHostToDevice));
+ CUDA_CHECK(cudaMemcpy((*top)[1]->mutable_gpu_data(),
+ prefetch_label_->cpu_data(), sizeof(Dtype) * prefetch_label_->count(),
+ cudaMemcpyHostToDevice));
+ // Start a new prefetch thread
+ CHECK(!pthread_create(&thread_, NULL, DataLayerPrefetch<Dtype>,
+ reinterpret_cast<void*>(this))) << "Pthread execution failed.";
+}
+
+// The backward operations are dummy - they do not carry any computation.
+template <typename Dtype>
+Dtype DataLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
+ const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
+ return Dtype(0.);
+}
+
+INSTANTIATE_CLASS(DataLayer);
+
+} // namespace caffe
diff --git a/src/caffe/layers/dropout_layer.cpp b/src/caffe/layers/dropout_layer.cpp
new file mode 100644
index 00000000..4e1fbfae
--- /dev/null
+++ b/src/caffe/layers/dropout_layer.cpp
@@ -0,0 +1,63 @@
+// Copyright 2013 Yangqing Jia
+
+#include "caffe/common.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/syncedmem.hpp"
+#include "caffe/vision_layers.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void DropoutLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
+ vector<Blob<Dtype>*>* top) {
+ NeuronLayer<Dtype>::SetUp(bottom, top);
+ // Set up the cache for random number generation
+ rand_vec_.reset(new SyncedMemory(bottom[0]->count() * sizeof(int)));
+ threshold_ = this->layer_param_.dropout_ratio();
+ DCHECK(threshold_ > 0.);
+ DCHECK(threshold_ < 1.);
+ scale_ = 1. / (1. - threshold_);
+ uint_thres_ = (unsigned int)(UINT_MAX * threshold_);
+}
+
+template <typename Dtype>
+void DropoutLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+ vector<Blob<Dtype>*>* top) {
+ const Dtype* bottom_data = bottom[0]->cpu_data();
+ Dtype* top_data = (*top)[0]->mutable_cpu_data();
+ int* mask = reinterpret_cast<int*>(rand_vec_->mutable_cpu_data());
+ const int count = bottom[0]->count();
+ if (Caffe::phase() == Caffe::TRAIN) {
+ // Create random numbers
+ viRngBernoulli(VSL_RNG_METHOD_BERNOULLI_ICDF, Caffe::vsl_stream(),
+ count, mask, 1. - threshold_);
+ for (int i = 0; i < count; ++i) {
+ top_data[i] = bottom_data[i] * mask[i] * scale_;
+ }
+ } else {
+ memcpy(top_data, bottom_data, bottom[0]->count() * sizeof(Dtype));
+ }
+}
+
+template <typename Dtype>
+Dtype DropoutLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
+ const bool propagate_down,
+ vector<Blob<Dtype>*>* bottom) {
+ CHECK(Caffe::phase() == Caffe::TRAIN);
+ if (propagate_down) {
+ const Dtype* top_diff = top[0]->cpu_diff();
+ Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
+ const int* mask = reinterpret_cast<const int*>(rand_vec_->cpu_data());
+ const int count = (*bottom)[0]->count();
+ for (int i = 0; i < count; ++i) {
+ bottom_diff[i] = top_diff[i] * mask[i] * scale_;
+ }
+ }
+ return Dtype(0);
+}
+
+
+INSTANTIATE_CLASS(DropoutLayer);
+
+
+} // namespace caffe
diff --git a/src/caffe/layers/dropout_layer.cu b/src/caffe/layers/dropout_layer.cu
index efba2952..2b6a68b7 100644
--- a/src/caffe/layers/dropout_layer.cu
+++ b/src/caffe/layers/dropout_layer.cu
@@ -13,54 +13,6 @@ using std::max;
namespace caffe {
-template <typename Dtype>
-void DropoutLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
- vector<Blob<Dtype>*>* top) {
- NeuronLayer<Dtype>::SetUp(bottom, top);
- // Set up the cache for random number generation
- rand_vec_.reset(new SyncedMemory(bottom[0]->count() * sizeof(int)));
- threshold_ = this->layer_param_.dropout_ratio();
- DCHECK(threshold_ > 0.);
- DCHECK(threshold_ < 1.);
- scale_ = 1. / (1. - threshold_);
- uint_thres_ = (unsigned int)(UINT_MAX * threshold_);
-}
-
-template <typename Dtype>
-void DropoutLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
- vector<Blob<Dtype>*>* top) {
- const Dtype* bottom_data = bottom[0]->cpu_data();
- Dtype* top_data = (*top)[0]->mutable_cpu_data();
- int* mask = reinterpret_cast<int*>(rand_vec_->mutable_cpu_data());
- const int count = bottom[0]->count();
- if (Caffe::phase() == Caffe::TRAIN) {
- // Create random numbers
- viRngBernoulli(VSL_RNG_METHOD_BERNOULLI_ICDF, Caffe::vsl_stream(),
- count, mask, 1. - threshold_);
- for (int i = 0; i < count; ++i) {
- top_data[i] = bottom_data[i] * mask[i] * scale_;
- }
- } else {
- memcpy(top_data, bottom_data, bottom[0]->count() * sizeof(Dtype));
- }
-}
-
-template <typename Dtype>
-Dtype DropoutLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
- const bool propagate_down,
- vector<Blob<Dtype>*>* bottom) {
- CHECK(Caffe::phase() == Caffe::TRAIN);
- if (propagate_down) {
- const Dtype* top_diff = top[0]->cpu_diff();
- Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
- const int* mask = reinterpret_cast<const int*>(rand_vec_->cpu_data());
- const int count = (*bottom)[0]->count();
- for (int i = 0; i < count; ++i) {
- bottom_diff[i] = top_diff[i] * mask[i] * scale_;
- }
- }
- return Dtype(0);
-}
template <typename Dtype>
__global__ void DropoutForward(const int n, const Dtype* in,
diff --git a/src/caffe/layers/flatten_layer.cpp b/src/caffe/layers/flatten_layer.cpp
index bedf2963..9e17a820 100644
--- a/src/caffe/layers/flatten_layer.cpp
+++ b/src/caffe/layers/flatten_layer.cpp
@@ -30,14 +30,6 @@ void FlattenLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
}
template <typename Dtype>
-void FlattenLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
- vector<Blob<Dtype>*>* top) {
- const Dtype* bottom_data = bottom[0]->gpu_data();
- Dtype* top_data = (*top)[0]->mutable_gpu_data();
- caffe_gpu_copy(count_, bottom_data, top_data);
-}
-
-template <typename Dtype>
Dtype FlattenLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
const Dtype* top_diff = top[0]->cpu_diff();
@@ -46,16 +38,6 @@ Dtype FlattenLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
return Dtype(0.);
}
-
-template <typename Dtype>
-Dtype FlattenLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
- const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
- const Dtype* top_diff = top[0]->gpu_diff();
- Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff();
- caffe_gpu_copy(count_, top_diff, bottom_diff);
- return Dtype(0.);
-}
-
INSTANTIATE_CLASS(FlattenLayer);
} // namespace caffe
diff --git a/src/caffe/layers/flatten_layer.cu b/src/caffe/layers/flatten_layer.cu
new file mode 100644
index 00000000..571e22e2
--- /dev/null
+++ b/src/caffe/layers/flatten_layer.cu
@@ -0,0 +1,30 @@
+// Copyright 2013 Yangqing Jia
+
+#include <vector>
+
+#include "caffe/layer.hpp"
+#include "caffe/vision_layers.hpp"
+#include "caffe/util/math_functions.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void FlattenLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+ vector<Blob<Dtype>*>* top) {
+ const Dtype* bottom_data = bottom[0]->gpu_data();
+ Dtype* top_data = (*top)[0]->mutable_gpu_data();
+ caffe_gpu_copy(count_, bottom_data, top_data);
+}
+
+template <typename Dtype>
+Dtype FlattenLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
+ const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
+ const Dtype* top_diff = top[0]->gpu_diff();
+ Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff();
+ caffe_gpu_copy(count_, top_diff, bottom_diff);
+ return Dtype(0.);
+}
+
+INSTANTIATE_CLASS(FlattenLayer);
+
+} // namespace caffe
diff --git a/src/caffe/layers/hdf5_data_layer.cpp b/src/caffe/layers/hdf5_data_layer.cpp
index 11b7d29f..c31213e0 100644
--- a/src/caffe/layers/hdf5_data_layer.cpp
+++ b/src/caffe/layers/hdf5_data_layer.cpp
@@ -65,28 +65,6 @@ void HDF5DataLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
}
}
-template <typename Dtype>
-void HDF5DataLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
- vector<Blob<Dtype>*>* top) {
- const int batchsize = this->layer_param_.batchsize();
- for (int i = 0; i < batchsize; ++i, ++current_row) {
- if (current_row == data_dims[0]) {
- current_row = 0;
- }
-
- CUDA_CHECK(cudaMemcpy(
- &(*top)[0]->mutable_gpu_data()[i * data_dims[1]],
- &(data.get()[current_row * data_dims[1]]),
- sizeof(Dtype) * data_dims[1],
- cudaMemcpyHostToDevice));
-
- CUDA_CHECK(cudaMemcpy(
- &(*top)[1]->mutable_gpu_data()[i * label_dims[1]],
- &(label.get()[current_row * label_dims[1]]),
- sizeof(Dtype) * label_dims[1],
- cudaMemcpyHostToDevice));
- }
-}
// The backward operations are dummy - they do not carry any computation.
template <typename Dtype>
@@ -95,12 +73,6 @@ Dtype HDF5DataLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
return Dtype(0.);
}
-template <typename Dtype>
-Dtype HDF5DataLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
- const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
- return Dtype(0.);
-}
-
INSTANTIATE_CLASS(HDF5DataLayer);
} // namespace caffe
diff --git a/src/caffe/layers/hdf5_data_layer.cu b/src/caffe/layers/hdf5_data_layer.cu
new file mode 100644
index 00000000..1ecf149b
--- /dev/null
+++ b/src/caffe/layers/hdf5_data_layer.cu
@@ -0,0 +1,53 @@
+// Copyright Sergey Karayev 2014
+/*
+TODO:
+- only load parts of the file, in accordance with a prototxt param "max_mem"
+*/
+
+#include <stdint.h>
+#include <string>
+#include <vector>
+
+#include "hdf5.h"
+#include "hdf5_hl.h"
+
+#include "caffe/layer.hpp"
+#include "caffe/util/io.hpp"
+#include "caffe/vision_layers.hpp"
+
+using std::string;
+
+namespace caffe {
+
+template <typename Dtype>
+void HDF5DataLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+ vector<Blob<Dtype>*>* top) {
+ const int batchsize = this->layer_param_.batchsize();
+ for (int i = 0; i < batchsize; ++i, ++current_row) {
+ if (current_row == data_dims[0]) {
+ current_row = 0;
+ }
+
+ CUDA_CHECK(cudaMemcpy(
+ &(*top)[0]->mutable_gpu_data()[i * data_dims[1]],
+ &(data.get()[current_row * data_dims[1]]),
+ sizeof(Dtype) * data_dims[1],
+ cudaMemcpyHostToDevice));
+
+ CUDA_CHECK(cudaMemcpy(
+ &(*top)[1]->mutable_gpu_data()[i * label_dims[1]],
+ &(label.get()[current_row * label_dims[1]]),
+ sizeof(Dtype) * label_dims[1],
+ cudaMemcpyHostToDevice));
+ }
+}
+
+template <typename Dtype>
+Dtype HDF5DataLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
+ const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
+ return Dtype(0.);
+}
+
+INSTANTIATE_CLASS(HDF5DataLayer);
+
+} // namespace caffe
diff --git a/src/caffe/layers/im2col_layer.cpp b/src/caffe/layers/im2col_layer.cpp
index a94209b3..e711713b 100644
--- a/src/caffe/layers/im2col_layer.cpp
+++ b/src/caffe/layers/im2col_layer.cpp
@@ -37,17 +37,6 @@ void Im2colLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
}
template <typename Dtype>
-void Im2colLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
- vector<Blob<Dtype>*>* top) {
- const Dtype* bottom_data = bottom[0]->gpu_data();
- Dtype* top_data = (*top)[0]->mutable_gpu_data();
- for (int n = 0; n < bottom[0]->num(); ++n) {
- im2col_gpu(bottom_data + bottom[0]->offset(n), CHANNELS_, HEIGHT_,
- WIDTH_, KSIZE_, PAD_, STRIDE_, top_data + (*top)[0]->offset(n));
- }
-}
-
-template <typename Dtype>
Dtype Im2colLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
const Dtype* top_diff = top[0]->cpu_diff();
@@ -59,19 +48,6 @@ Dtype Im2colLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
return Dtype(0.);
}
-
-template <typename Dtype>
-Dtype Im2colLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
- const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
- const Dtype* top_diff = top[0]->gpu_diff();
- Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff();
- for (int n = 0; n < top[0]->num(); ++n) {
- col2im_gpu(top_diff + top[0]->offset(n), CHANNELS_, HEIGHT_,
- WIDTH_, KSIZE_, PAD_, STRIDE_, bottom_diff + (*bottom)[0]->offset(n));
- }
- return Dtype(0.);
-}
-
INSTANTIATE_CLASS(Im2colLayer);
} // namespace caffe
diff --git a/src/caffe/layers/im2col_layer.cu b/src/caffe/layers/im2col_layer.cu
new file mode 100644
index 00000000..2d949b12
--- /dev/null
+++ b/src/caffe/layers/im2col_layer.cu
@@ -0,0 +1,38 @@
+// Copyright 2013 Yangqing Jia
+
+#include <vector>
+
+#include "caffe/layer.hpp"
+#include "caffe/util/im2col.hpp"
+#include "caffe/vision_layers.hpp"
+#include "caffe/common.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void Im2colLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+ vector<Blob<Dtype>*>* top) {
+ const Dtype* bottom_data = bottom[0]->gpu_data();
+ Dtype* top_data = (*top)[0]->mutable_gpu_data();
+ for (int n = 0; n < bottom[0]->num(); ++n) {
+ im2col_gpu(bottom_data + bottom[0]->offset(n), CHANNELS_, HEIGHT_,
+ WIDTH_, KSIZE_, PAD_, STRIDE_, top_data + (*top)[0]->offset(n));
+ }
+}
+
+template <typename Dtype>
+Dtype Im2colLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
+ const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
+ const Dtype* top_diff = top[0]->gpu_diff();
+ Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff();
+ for (int n = 0; n < top[0]->num(); ++n) {
+ col2im_gpu(top_diff + top[0]->offset(n), CHANNELS_, HEIGHT_,
+ WIDTH_, KSIZE_, PAD_, STRIDE_, bottom_diff + (*bottom)[0]->offset(n));
+ }
+ return Dtype(0.);
+}
+
+
+INSTANTIATE_CLASS(Im2colLayer);
+
+} // namespace caffe
diff --git a/src/caffe/layers/inner_product_layer.cpp b/src/caffe/layers/inner_product_layer.cpp
index d770e23a..6987a787 100644
--- a/src/caffe/layers/inner_product_layer.cpp
+++ b/src/caffe/layers/inner_product_layer.cpp
@@ -2,7 +2,6 @@
#include <mkl.h>
-#include <cublas_v2.h>
#include <vector>
@@ -100,45 +99,6 @@ Dtype InnerProductLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
return Dtype(0);
}
-template <typename Dtype>
-void InnerProductLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
- vector<Blob<Dtype>*>* top) {
- const Dtype* bottom_data = bottom[0]->gpu_data();
- Dtype* top_data = (*top)[0]->mutable_gpu_data();
- const Dtype* weight = this->blobs_[0]->gpu_data();
- caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, N_, K_, (Dtype)1.,
- bottom_data, weight, (Dtype)0., top_data);
- if (biasterm_) {
- caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, 1, (Dtype)1.,
- reinterpret_cast<const Dtype*>(bias_multiplier_->gpu_data()),
- this->blobs_[1]->gpu_data(), (Dtype)1., top_data);
- }
-}
-
-template <typename Dtype>
-Dtype InnerProductLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
- const bool propagate_down,
- vector<Blob<Dtype>*>* bottom) {
- const Dtype* top_diff = top[0]->gpu_diff();
- const Dtype* bottom_data = (*bottom)[0]->gpu_data();
- // Gradient with respect to weight
- caffe_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, N_, K_, M_, (Dtype)1.,
- top_diff, bottom_data, (Dtype)0., this->blobs_[0]->mutable_gpu_diff());
- if (biasterm_) {
- // Gradient with respect to bias
- caffe_gpu_gemv<Dtype>(CblasTrans, M_, N_, (Dtype)1., top_diff,
- reinterpret_cast<const Dtype*>(bias_multiplier_->gpu_data()),
- (Dtype)0., this->blobs_[1]->mutable_gpu_diff());
- }
- if (propagate_down) {
- // Gradient with respect to bottom data
- caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, K_, N_, (Dtype)1.,
- top_diff, this->blobs_[0]->gpu_data(), (Dtype)0.,
- (*bottom)[0]->mutable_gpu_diff());
- }
- return Dtype(0);
-}
-
INSTANTIATE_CLASS(InnerProductLayer);
} // namespace caffe
diff --git a/src/caffe/layers/inner_product_layer.cu b/src/caffe/layers/inner_product_layer.cu
new file mode 100644
index 00000000..c7c3e2a9
--- /dev/null
+++ b/src/caffe/layers/inner_product_layer.cu
@@ -0,0 +1,59 @@
+// Copyright 2013 Yangqing Jia
+
+
+#include <mkl.h>
+#include <cublas_v2.h>
+
+#include <vector>
+
+#include "caffe/blob.hpp"
+#include "caffe/common.hpp"
+#include "caffe/filler.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/vision_layers.hpp"
+#include "caffe/util/math_functions.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void InnerProductLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+ vector<Blob<Dtype>*>* top) {
+ const Dtype* bottom_data = bottom[0]->gpu_data();
+ Dtype* top_data = (*top)[0]->mutable_gpu_data();
+ const Dtype* weight = this->blobs_[0]->gpu_data();
+ caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, N_, K_, (Dtype)1.,
+ bottom_data, weight, (Dtype)0., top_data);
+ if (biasterm_) {
+ caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, 1, (Dtype)1.,
+ reinterpret_cast<const Dtype*>(bias_multiplier_->gpu_data()),
+ this->blobs_[1]->gpu_data(), (Dtype)1., top_data);
+ }
+}
+
+template <typename Dtype>
+Dtype InnerProductLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
+ const bool propagate_down,
+ vector<Blob<Dtype>*>* bottom) {
+ const Dtype* top_diff = top[0]->gpu_diff();
+ const Dtype* bottom_data = (*bottom)[0]->gpu_data();
+ // Gradient with respect to weight
+ caffe_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, N_, K_, M_, (Dtype)1.,
+ top_diff, bottom_data, (Dtype)0., this->blobs_[0]->mutable_gpu_diff());
+ if (biasterm_) {
+ // Gradient with respect to bias
+ caffe_gpu_gemv<Dtype>(CblasTrans, M_, N_, (Dtype)1., top_diff,
+ reinterpret_cast<const Dtype*>(bias_multiplier_->gpu_data()),
+ (Dtype)0., this->blobs_[1]->mutable_gpu_diff());
+ }
+ if (propagate_down) {
+ // Gradient with respect to bottom data
+ caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, K_, N_, (Dtype)1.,
+ top_diff, this->blobs_[0]->gpu_data(), (Dtype)0.,
+ (*bottom)[0]->mutable_gpu_diff());
+ }
+ return Dtype(0);
+}
+
+INSTANTIATE_CLASS(InnerProductLayer);
+
+} // namespace caffe
diff --git a/src/caffe/layers/loss_layer.cu b/src/caffe/layers/loss_layer.cpp
index 745bfa4b..1c4303d9 100644
--- a/src/caffe/layers/loss_layer.cu
+++ b/src/caffe/layers/loss_layer.cpp
@@ -42,7 +42,7 @@ Dtype MultinomialLogisticLossLayer<Dtype>::Backward_cpu(
Dtype loss = 0;
for (int i = 0; i < num; ++i) {
int label = static_cast<int>(bottom_label[i]);
- Dtype prob = max(bottom_data[i * dim + label], kLOG_THRESHOLD);
+ Dtype prob = max(bottom_data[i * dim + label], Dtype(kLOG_THRESHOLD));
loss -= log(prob);
bottom_diff[i * dim + label] = - 1. / prob / num;
}
@@ -86,7 +86,7 @@ Dtype InfogainLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
for (int i = 0; i < num; ++i) {
int label = static_cast<int>(bottom_label[i]);
for (int j = 0; j < dim; ++j) {
- Dtype prob = max(bottom_data[i * dim + j], kLOG_THRESHOLD);
+ Dtype prob = max(bottom_data[i * dim + j], Dtype(kLOG_THRESHOLD));
loss -= infogain_mat[label * dim + j] * log(prob);
bottom_diff[i * dim + j] = - infogain_mat[label * dim + j] / prob / num;
}
@@ -160,7 +160,7 @@ void AccuracyLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
++accuracy;
}
Dtype prob = max(bottom_data[i * dim + static_cast<int>(bottom_label[i])],
- kLOG_THRESHOLD);
+ Dtype(kLOG_THRESHOLD));
logprob -= log(prob);
}
// LOG(INFO) << "Accuracy: " << accuracy;
diff --git a/src/caffe/layers/relu_layer.cpp b/src/caffe/layers/relu_layer.cpp
new file mode 100644
index 00000000..478ed31a
--- /dev/null
+++ b/src/caffe/layers/relu_layer.cpp
@@ -0,0 +1,42 @@
+// Copyright 2013 Yangqing Jia
+
+#include "caffe/layer.hpp"
+#include "caffe/vision_layers.hpp"
+#include <algorithm>
+
+using std::max;
+
+namespace caffe {
+
+template <typename Dtype>
+void ReLULayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+ vector<Blob<Dtype>*>* top) {
+ const Dtype* bottom_data = bottom[0]->cpu_data();
+ Dtype* top_data = (*top)[0]->mutable_cpu_data();
+ const int count = bottom[0]->count();
+ for (int i = 0; i < count; ++i) {
+ top_data[i] = max(bottom_data[i], Dtype(0));
+ }
+}
+
+template <typename Dtype>
+Dtype ReLULayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
+ const bool propagate_down,
+ vector<Blob<Dtype>*>* bottom) {
+ if (propagate_down) {
+ const Dtype* bottom_data = (*bottom)[0]->cpu_data();
+ const Dtype* top_diff = top[0]->cpu_diff();
+ Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
+ const int count = (*bottom)[0]->count();
+ for (int i = 0; i < count; ++i) {
+ bottom_diff[i] = top_diff[i] * (bottom_data[i] > 0);
+ }
+ }
+ return Dtype(0);
+}
+
+
+INSTANTIATE_CLASS(ReLULayer);
+
+
+} // namespace caffe
diff --git a/src/caffe/layers/relu_layer.cu b/src/caffe/layers/relu_layer.cu
index ed1aab40..e2e58d96 100644
--- a/src/caffe/layers/relu_layer.cu
+++ b/src/caffe/layers/relu_layer.cu
@@ -11,33 +11,6 @@ using std::max;
namespace caffe {
template <typename Dtype>
-void ReLULayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
- vector<Blob<Dtype>*>* top) {
- const Dtype* bottom_data = bottom[0]->cpu_data();
- Dtype* top_data = (*top)[0]->mutable_cpu_data();
- const int count = bottom[0]->count();
- for (int i = 0; i < count; ++i) {
- top_data[i] = max(bottom_data[i], Dtype(0));
- }
-}
-
-template <typename Dtype>
-Dtype ReLULayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
- const bool propagate_down,
- vector<Blob<Dtype>*>* bottom) {
- if (propagate_down) {
- const Dtype* bottom_data = (*bottom)[0]->cpu_data();
- const Dtype* top_diff = top[0]->cpu_diff();
- Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
- const int count = (*bottom)[0]->count();
- for (int i = 0; i < count; ++i) {
- bottom_diff[i] = top_diff[i] * (bottom_data[i] > 0);
- }
- }
- return Dtype(0);
-}
-
-template <typename Dtype>
__global__ void ReLUForward(const int n, const Dtype* in, Dtype* out) {
int index = threadIdx.x + blockIdx.x * blockDim.x;
if (index < n) {
diff --git a/src/caffe/layers/sigmoid_layer.cpp b/src/caffe/layers/sigmoid_layer.cpp
new file mode 100644
index 00000000..112771f0
--- /dev/null
+++ b/src/caffe/layers/sigmoid_layer.cpp
@@ -0,0 +1,46 @@
+// Copyright 2014 Tobias Domhan
+
+#include "caffe/layer.hpp"
+#include "caffe/vision_layers.hpp"
+#include <algorithm>
+#include <cmath>
+
+namespace caffe {
+
+template <typename Dtype>
+inline Dtype sigmoid(Dtype x) {
+ return 1. / (1. + exp(-x));
+}
+
+template <typename Dtype>
+void SigmoidLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+ vector<Blob<Dtype>*>* top) {
+ const Dtype* bottom_data = bottom[0]->cpu_data();
+ Dtype* top_data = (*top)[0]->mutable_cpu_data();
+ const int count = bottom[0]->count();
+ for (int i = 0; i < count; ++i) {
+ top_data[i] = sigmoid(bottom_data[i]);
+ }
+}
+
+template <typename Dtype>
+Dtype SigmoidLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
+ const bool propagate_down,
+ vector<Blob<Dtype>*>* bottom) {
+ if (propagate_down) {
+ const Dtype* bottom_data = (*bottom)[0]->cpu_data();
+ const Dtype* top_diff = top[0]->cpu_diff();
+ Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
+ const int count = (*bottom)[0]->count();
+ for (int i = 0; i < count; ++i) {
+ Dtype sigmoid_x = sigmoid(bottom_data[i]);
+ bottom_diff[i] = top_diff[i] * sigmoid_x * (1. - sigmoid_x);
+ }
+ }
+ return Dtype(0);
+}
+
+INSTANTIATE_CLASS(SigmoidLayer);
+
+
+} // namespace caffe
diff --git a/src/caffe/layers/sigmoid_layer.cu b/src/caffe/layers/sigmoid_layer.cu
index e50260d7..785d1441 100644
--- a/src/caffe/layers/sigmoid_layer.cu
+++ b/src/caffe/layers/sigmoid_layer.cu
@@ -12,45 +12,10 @@ using std::max;
namespace caffe {
template <typename Dtype>
-inline Dtype sigmoid(Dtype x) {
- return 1. / (1. + exp(-x));
-}
-
-template <typename Dtype>
-void SigmoidLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
- vector<Blob<Dtype>*>* top) {
- const Dtype* bottom_data = bottom[0]->cpu_data();
- Dtype* top_data = (*top)[0]->mutable_cpu_data();
- const int count = bottom[0]->count();
- for (int i = 0; i < count; ++i) {
- top_data[i] = sigmoid(bottom_data[i]);
- }
-}
-
-template <typename Dtype>
-Dtype SigmoidLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
- const bool propagate_down,
- vector<Blob<Dtype>*>* bottom) {
- if (propagate_down) {
- const Dtype* bottom_data = (*bottom)[0]->cpu_data();
- const Dtype* top_diff = top[0]->cpu_diff();
- Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
- const int count = (*bottom)[0]->count();
- for (int i = 0; i < count; ++i) {
- Dtype sigmoid_x = sigmoid(bottom_data[i]);
- bottom_diff[i] = top_diff[i] * sigmoid_x * (1. - sigmoid_x);
- }
- }
- return Dtype(0);
-}
-
-
-template <typename Dtype>
__device__ inline Dtype sigmoid_gpu(Dtype x) {
return 1. / (1. + exp(-x));
}
-
template <typename Dtype>
__global__ void SigmoidForward(const int n, const Dtype* in, Dtype* out) {
int index = threadIdx.x + blockIdx.x * blockDim.x;
diff --git a/src/caffe/layers/softmax_layer.cpp b/src/caffe/layers/softmax_layer.cpp
new file mode 100644
index 00000000..172094df
--- /dev/null
+++ b/src/caffe/layers/softmax_layer.cpp
@@ -0,0 +1,86 @@
+// Copyright 2013 Yangqing Jia
+
+#include "caffe/layer.hpp"
+#include "caffe/vision_layers.hpp"
+#include "caffe/util/math_functions.hpp"
+
+using std::max;
+
+namespace caffe {
+
+template <typename Dtype>
+void SoftmaxLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
+ vector<Blob<Dtype>*>* top) {
+ CHECK_EQ(bottom.size(), 1) << "Softmax Layer takes a single blob as input.";
+ CHECK_EQ(top->size(), 1) << "Softmax Layer takes a single blob as output.";
+ (*top)[0]->Reshape(bottom[0]->num(), bottom[0]->channels(),
+ bottom[0]->height(), bottom[0]->width());
+ sum_multiplier_.Reshape(1, bottom[0]->channels(),
+ bottom[0]->height(), bottom[0]->width());
+ Dtype* multiplier_data = sum_multiplier_.mutable_cpu_data();
+ for (int i = 0; i < sum_multiplier_.count(); ++i) {
+ multiplier_data[i] = 1.;
+ }
+ scale_.Reshape(bottom[0]->num(), 1, 1, 1);
+}
+
+template <typename Dtype>
+void SoftmaxLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+ vector<Blob<Dtype>*>* top) {
+ const Dtype* bottom_data = bottom[0]->cpu_data();
+ Dtype* top_data = (*top)[0]->mutable_cpu_data();
+ Dtype* scale_data = scale_.mutable_cpu_data();
+ int num = bottom[0]->num();
+ int dim = bottom[0]->count() / bottom[0]->num();
+ memcpy(top_data, bottom_data, sizeof(Dtype) * bottom[0]->count());
+ // we need to subtract the max to avoid numerical issues, compute the exp,
+ // and then normalize.
+ for (int i = 0; i < num; ++i) {
+ scale_data[i] = bottom_data[i*dim];
+ for (int j = 0; j < dim; ++j) {
+ scale_data[i] = max(scale_data[i], bottom_data[i * dim + j]);
+ }
+ }
+ // subtraction
+ caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, -1.,
+ scale_data, sum_multiplier_.cpu_data(), 1., top_data);
+ // Perform exponentiation
+ caffe_exp<Dtype>(num * dim, top_data, top_data);
+ // sum after exp
+ caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1., top_data,
+ sum_multiplier_.cpu_data(), 0., scale_data);
+ // Do division
+ for (int i = 0; i < num; ++i) {
+ caffe_scal<Dtype>(dim, Dtype(1.) / scale_data[i], top_data + i * dim);
+ }
+}
+
+template <typename Dtype>
+Dtype SoftmaxLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
+ const bool propagate_down,
+ vector<Blob<Dtype>*>* bottom) {
+ const Dtype* top_diff = top[0]->cpu_diff();
+ const Dtype* top_data = top[0]->cpu_data();
+ Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
+ Dtype* scale_data = scale_.mutable_cpu_data();
+ int num = top[0]->num();
+ int dim = top[0]->count() / top[0]->num();
+ memcpy(bottom_diff, top_diff, sizeof(Dtype) * top[0]->count());
+ // Compute inner1d(top_diff, top_data) and subtract them from the bottom diff
+ for (int i = 0; i < num; ++i) {
+ scale_data[i] = caffe_cpu_dot<Dtype>(dim, top_diff + i * dim,
+ top_data + i * dim);
+ }
+ // subtraction
+ caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, -1.,
+ scale_data, sum_multiplier_.cpu_data(), 1., bottom_diff);
+ // elementwise multiplication
+ caffe_mul<Dtype>(top[0]->count(), bottom_diff, top_data, bottom_diff);
+ return Dtype(0);
+}
+
+
+INSTANTIATE_CLASS(SoftmaxLayer);
+
+
+} // namespace caffe
diff --git a/src/caffe/layers/softmax_layer.cu b/src/caffe/layers/softmax_layer.cu
index af73260e..fe2a89e5 100644
--- a/src/caffe/layers/softmax_layer.cu
+++ b/src/caffe/layers/softmax_layer.cu
@@ -15,53 +15,6 @@ using std::max;
namespace caffe {
template <typename Dtype>
-void SoftmaxLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
- vector<Blob<Dtype>*>* top) {
- CHECK_EQ(bottom.size(), 1) << "Softmax Layer takes a single blob as input.";
- CHECK_EQ(top->size(), 1) << "Softmax Layer takes a single blob as output.";
- (*top)[0]->Reshape(bottom[0]->num(), bottom[0]->channels(),
- bottom[0]->height(), bottom[0]->width());
- sum_multiplier_.Reshape(1, bottom[0]->channels(),
- bottom[0]->height(), bottom[0]->width());
- Dtype* multiplier_data = sum_multiplier_.mutable_cpu_data();
- for (int i = 0; i < sum_multiplier_.count(); ++i) {
- multiplier_data[i] = 1.;
- }
- scale_.Reshape(bottom[0]->num(), 1, 1, 1);
-}
-
-template <typename Dtype>
-void SoftmaxLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
- vector<Blob<Dtype>*>* top) {
- const Dtype* bottom_data = bottom[0]->cpu_data();
- Dtype* top_data = (*top)[0]->mutable_cpu_data();
- Dtype* scale_data = scale_.mutable_cpu_data();
- int num = bottom[0]->num();
- int dim = bottom[0]->count() / bottom[0]->num();
- memcpy(top_data, bottom_data, sizeof(Dtype) * bottom[0]->count());
- // we need to subtract the max to avoid numerical issues, compute the exp,
- // and then normalize.
- for (int i = 0; i < num; ++i) {
- scale_data[i] = bottom_data[i*dim];
- for (int j = 0; j < dim; ++j) {
- scale_data[i] = max(scale_data[i], bottom_data[i * dim + j]);
- }
- }
- // subtraction
- caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, -1.,
- scale_data, sum_multiplier_.cpu_data(), 1., top_data);
- // Perform exponentiation
- caffe_exp<Dtype>(num * dim, top_data, top_data);
- // sum after exp
- caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1., top_data,
- sum_multiplier_.cpu_data(), 0., scale_data);
- // Do division
- for (int i = 0; i < num; ++i) {
- caffe_scal<Dtype>(dim, Dtype(1.) / scale_data[i], top_data + i * dim);
- }
-}
-
-template <typename Dtype>
__global__ void kernel_get_max(const int num, const int dim,
const Dtype* data, Dtype* out) {
int index = threadIdx.x + blockIdx.x * blockDim.x;
@@ -125,30 +78,6 @@ void SoftmaxLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
num, dim, scale_data, top_data);
}
-template <typename Dtype>
-Dtype SoftmaxLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
- const bool propagate_down,
- vector<Blob<Dtype>*>* bottom) {
- const Dtype* top_diff = top[0]->cpu_diff();
- const Dtype* top_data = top[0]->cpu_data();
- Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
- Dtype* scale_data = scale_.mutable_cpu_data();
- int num = top[0]->num();
- int dim = top[0]->count() / top[0]->num();
- memcpy(bottom_diff, top_diff, sizeof(Dtype) * top[0]->count());
- // Compute inner1d(top_diff, top_data) and subtract them from the bottom diff
- for (int i = 0; i < num; ++i) {
- scale_data[i] = caffe_cpu_dot<Dtype>(dim, top_diff + i * dim,
- top_data + i * dim);
- }
- // subtraction
- caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, -1.,
- scale_data, sum_multiplier_.cpu_data(), 1., bottom_diff);
- // elementwise multiplication
- caffe_mul<Dtype>(top[0]->count(), bottom_diff, top_data, bottom_diff);
- return Dtype(0);
-}
-
// TODO(Yangqing): implement the GPU version of softmax.
template <typename Dtype>
Dtype SoftmaxLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
diff --git a/src/caffe/layers/softmax_loss_layer.cpp b/src/caffe/layers/softmax_loss_layer.cpp
new file mode 100644
index 00000000..2ec73080
--- /dev/null
+++ b/src/caffe/layers/softmax_loss_layer.cpp
@@ -0,0 +1,59 @@
+// Copyright 2013 Yangqing Jia
+
+#include <algorithm>
+#include <cfloat>
+#include <vector>
+
+#include "caffe/layer.hpp"
+#include "caffe/vision_layers.hpp"
+#include "caffe/util/math_functions.hpp"
+
+using std::max;
+
+namespace caffe {
+
+template <typename Dtype>
+void SoftmaxWithLossLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
+ vector<Blob<Dtype>*>* top) {
+ CHECK_EQ(bottom.size(), 2) << "SoftmaxLoss Layer takes two blobs as input.";
+ CHECK_EQ(top->size(), 0) << "SoftmaxLoss Layer takes no blob as output.";
+ softmax_bottom_vec_.clear();
+ softmax_bottom_vec_.push_back(bottom[0]);
+ softmax_top_vec_.push_back(&prob_);
+ softmax_layer_->SetUp(softmax_bottom_vec_, &softmax_top_vec_);
+}
+
+template <typename Dtype>
+void SoftmaxWithLossLayer<Dtype>::Forward_cpu(
+ const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top) {
+ // The forward pass computes the softmax prob values.
+ softmax_bottom_vec_[0] = bottom[0];
+ softmax_layer_->Forward(softmax_bottom_vec_, &softmax_top_vec_);
+}
+
+template <typename Dtype>
+Dtype SoftmaxWithLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
+ const bool propagate_down,
+ vector<Blob<Dtype>*>* bottom) {
+ // First, compute the diff
+ Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
+ const Dtype* prob_data = prob_.cpu_data();
+ memcpy(bottom_diff, prob_data, sizeof(Dtype) * prob_.count());
+ const Dtype* label = (*bottom)[1]->cpu_data();
+ int num = prob_.num();
+ int dim = prob_.count() / num;
+ Dtype loss = 0;
+ for (int i = 0; i < num; ++i) {
+ bottom_diff[i * dim + static_cast<int>(label[i])] -= 1;
+ loss += -log(max(prob_data[i * dim + static_cast<int>(label[i])], Dtype(FLT_MIN)));
+ }
+ // Scale down gradient
+ caffe_scal(prob_.count(), Dtype(1) / num, bottom_diff);
+ return loss / num;
+}
+
+
+INSTANTIATE_CLASS(SoftmaxWithLossLayer);
+
+
+} // namespace caffe
diff --git a/src/caffe/layers/softmax_loss_layer.cu b/src/caffe/layers/softmax_loss_layer.cu
index 3e265869..100393ca 100644
--- a/src/caffe/layers/softmax_loss_layer.cu
+++ b/src/caffe/layers/softmax_loss_layer.cu
@@ -13,25 +13,6 @@ using std::max;
namespace caffe {
template <typename Dtype>
-void SoftmaxWithLossLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
- vector<Blob<Dtype>*>* top) {
- CHECK_EQ(bottom.size(), 2) << "SoftmaxLoss Layer takes two blobs as input.";
- CHECK_EQ(top->size(), 0) << "SoftmaxLoss Layer takes no blob as output.";
- softmax_bottom_vec_.clear();
- softmax_bottom_vec_.push_back(bottom[0]);
- softmax_top_vec_.push_back(&prob_);
- softmax_layer_->SetUp(softmax_bottom_vec_, &softmax_top_vec_);
-}
-
-template <typename Dtype>
-void SoftmaxWithLossLayer<Dtype>::Forward_cpu(
- const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top) {
- // The forward pass computes the softmax prob values.
- softmax_bottom_vec_[0] = bottom[0];
- softmax_layer_->Forward(softmax_bottom_vec_, &softmax_top_vec_);
-}
-
-template <typename Dtype>
void SoftmaxWithLossLayer<Dtype>::Forward_gpu(
const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top) {
// The forward pass computes the softmax prob values.
@@ -40,27 +21,6 @@ void SoftmaxWithLossLayer<Dtype>::Forward_gpu(
}
template <typename Dtype>
-Dtype SoftmaxWithLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
- const bool propagate_down,
- vector<Blob<Dtype>*>* bottom) {
- // First, compute the diff
- Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
- const Dtype* prob_data = prob_.cpu_data();
- memcpy(bottom_diff, prob_data, sizeof(Dtype) * prob_.count());
- const Dtype* label = (*bottom)[1]->cpu_data();
- int num = prob_.num();
- int dim = prob_.count() / num;
- Dtype loss = 0;
- for (int i = 0; i < num; ++i) {
- bottom_diff[i * dim + static_cast<int>(label[i])] -= 1;
- loss += -log(max(prob_data[i * dim + static_cast<int>(label[i])], FLT_MIN));
- }
- // Scale down gradient
- caffe_scal(prob_.count(), Dtype(1) / num, bottom_diff);
- return loss / num;
-}
-
-template <typename Dtype>
Dtype SoftmaxWithLossLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
// TODO(Yangqing): implement the GPU version of softmax.
diff --git a/src/caffe/layers/split_layer.cpp b/src/caffe/layers/split_layer.cpp
index 56e95610..f9fc461a 100644
--- a/src/caffe/layers/split_layer.cpp
+++ b/src/caffe/layers/split_layer.cpp
@@ -41,19 +41,6 @@ void SplitLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
}
template <typename Dtype>
-void SplitLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
- vector<Blob<Dtype>*>* top) {
- const Dtype* bottom_data = bottom[0]->gpu_data();
- for (int i = 0; i < top->size(); ++i) {
- if (i == 0 && (*top)[i] == bottom[0]) {
- continue;
- }
- Dtype* top_data = (*top)[i]->mutable_gpu_data();
- caffe_gpu_copy(count_, bottom_data, top_data);
- }
-}
-
-template <typename Dtype>
Dtype SplitLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
if (propagate_down) {
@@ -75,27 +62,6 @@ Dtype SplitLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
}
-template <typename Dtype>
-Dtype SplitLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
- const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
- if (propagate_down) {
- const Dtype* top_diff = top[0]->gpu_diff();
- Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff();
- // Initialize by copying first top blob diff to our diff, unless we're
- // doing in-place computation for the first blob, in which case the diff is
- // already initialized.
- if (top[0] != (*bottom)[0]) {
- caffe_gpu_copy(count_, top_diff, bottom_diff);
- }
- // Add remaining top blob diffs.
- for (int i = 1; i < top.size(); ++i) {
- top_diff = top[i]->gpu_diff();
- caffe_gpu_axpy(count_, Dtype(1.), top_diff, bottom_diff);
- }
- }
- return Dtype(0.);
-}
-
INSTANTIATE_CLASS(SplitLayer);
} // namespace caffe
diff --git a/src/caffe/layers/split_layer.cu b/src/caffe/layers/split_layer.cu
new file mode 100644
index 00000000..5f25a460
--- /dev/null
+++ b/src/caffe/layers/split_layer.cu
@@ -0,0 +1,48 @@
+// Copyright 2014 Jeff Donahue
+
+#include <vector>
+
+#include "caffe/layer.hpp"
+#include "caffe/vision_layers.hpp"
+#include "caffe/util/math_functions.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void SplitLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+ vector<Blob<Dtype>*>* top) {
+ const Dtype* bottom_data = bottom[0]->gpu_data();
+ for (int i = 0; i < top->size(); ++i) {
+ if (i == 0 && (*top)[i] == bottom[0]) {
+ continue;
+ }
+ Dtype* top_data = (*top)[i]->mutable_gpu_data();
+ caffe_gpu_copy(count_, bottom_data, top_data);
+ }
+}
+
+template <typename Dtype>
+Dtype SplitLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
+ const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
+ if (propagate_down) {
+ const Dtype* top_diff = top[0]->gpu_diff();
+ Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff();
+ // Initialize by copying first top blob diff to our diff, unless we're
+ // doing in-place computation for the first blob, in which case the diff is
+ // already initialized.
+ if (top[0] != (*bottom)[0]) {
+ caffe_gpu_copy(count_, top_diff, bottom_diff);
+ }
+ // Add remaining top blob diffs.
+ for (int i = 1; i < top.size(); ++i) {
+ top_diff = top[i]->gpu_diff();
+ caffe_gpu_axpy(count_, Dtype(1.), top_diff, bottom_diff);
+ }
+ }
+ return Dtype(0.);
+}
+
+
+INSTANTIATE_CLASS(SplitLayer);
+
+} // namespace caffe
diff --git a/src/caffe/layers/tanh_layer.cpp b/src/caffe/layers/tanh_layer.cpp
new file mode 100644
index 00000000..d6f99560
--- /dev/null
+++ b/src/caffe/layers/tanh_layer.cpp
@@ -0,0 +1,48 @@
+// Copyright 2014 Aravindh Mahendran
+// TanH neuron activation function layer.
+// Adapted from ReLU layer code written by Yangqing Jia
+
+#include <algorithm>
+#include <vector>
+
+#include "caffe/layer.hpp"
+#include "caffe/vision_layers.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void TanHLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+ vector<Blob<Dtype>*>* top) {
+ const Dtype* bottom_data = bottom[0]->cpu_data();
+ Dtype* top_data = (*top)[0]->mutable_cpu_data();
+ Dtype exp2x;
+ const int count = bottom[0]->count();
+ for (int i = 0; i < count; ++i) {
+ exp2x = exp(2*bottom_data[i]);
+ top_data[i] = (exp2x - Dtype(1))/(exp2x + Dtype(1));
+ }
+}
+
+template <typename Dtype>
+Dtype TanHLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
+ const bool propagate_down,
+ vector<Blob<Dtype>*>* bottom) {
+ if (propagate_down) {
+ const Dtype* bottom_data = (*bottom)[0]->cpu_data();
+ const Dtype* top_diff = top[0]->cpu_diff();
+ Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
+ const int count = (*bottom)[0]->count();
+ Dtype exp2x;
+ Dtype tanhx;
+ for (int i = 0; i < count; ++i) {
+ exp2x = exp(2*bottom_data[i]);
+ tanhx = (exp2x - Dtype(1))/(exp2x + Dtype(1));
+ bottom_diff[i] = top_diff[i] * (1 - tanhx*tanhx);
+ }
+ }
+ return Dtype(0);
+}
+
+INSTANTIATE_CLASS(TanHLayer);
+
+} // namespace caffe
diff --git a/src/caffe/layers/tanh_layer.cu b/src/caffe/layers/tanh_layer.cu
index a309a605..743e3147 100644
--- a/src/caffe/layers/tanh_layer.cu
+++ b/src/caffe/layers/tanh_layer.cu
@@ -11,39 +11,6 @@
namespace caffe {
template <typename Dtype>
-void TanHLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
- vector<Blob<Dtype>*>* top) {
- const Dtype* bottom_data = bottom[0]->cpu_data();
- Dtype* top_data = (*top)[0]->mutable_cpu_data();
- Dtype exp2x;
- const int count = bottom[0]->count();
- for (int i = 0; i < count; ++i) {
- exp2x = exp(2*bottom_data[i]);
- top_data[i] = (exp2x - Dtype(1))/(exp2x + Dtype(1));
- }
-}
-
-template <typename Dtype>
-Dtype TanHLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
- const bool propagate_down,
- vector<Blob<Dtype>*>* bottom) {
- if (propagate_down) {
- const Dtype* bottom_data = (*bottom)[0]->cpu_data();
- const Dtype* top_diff = top[0]->cpu_diff();
- Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
- const int count = (*bottom)[0]->count();
- Dtype exp2x;
- Dtype tanhx;
- for (int i = 0; i < count; ++i) {
- exp2x = exp(2*bottom_data[i]);
- tanhx = (exp2x - Dtype(1))/(exp2x + Dtype(1));
- bottom_diff[i] = top_diff[i] * (1 - tanhx*tanhx);
- }
- }
- return Dtype(0);
-}
-
-template <typename Dtype>
__global__ void TanHForward(const int n, const Dtype* in, Dtype* out) {
int index = threadIdx.x + blockIdx.x * blockDim.x;
if (index < n) {