diff options
author | Aravindh Mahendran <aravindh@aravindh-VirtualBox.(none)> | 2014-02-16 10:43:34 -0500 |
---|---|---|
committer | Evan Shelhamer <shelhamer@imaginarynumber.net> | 2014-02-26 13:03:39 -0800 |
commit | 8448708ba37c920ae6b126a3b6f0f4353e848b01 (patch) | |
tree | c817e9a0b5d7fd6dd288e04b790c5ca61893f520 | |
parent | f0b76ea244a07dd258671015d0e944da5deac7c6 (diff) | |
download | caffeonacl-8448708ba37c920ae6b126a3b6f0f4353e848b01.tar.gz caffeonacl-8448708ba37c920ae6b126a3b6f0f4353e848b01.tar.bz2 caffeonacl-8448708ba37c920ae6b126a3b6f0f4353e848b01.zip |
Added tanh activation function layer.
-rw-r--r-- | include/caffe/vision_layers.hpp | 17 | ||||
-rw-r--r-- | src/caffe/layer_factory.cpp | 2 | ||||
-rw-r--r-- | src/caffe/layers/tanh_layer.cu | 97 |
3 files changed, 116 insertions, 0 deletions
diff --git a/include/caffe/vision_layers.hpp b/include/caffe/vision_layers.hpp index 82e52cd5..47909a21 100644 --- a/include/caffe/vision_layers.hpp +++ b/include/caffe/vision_layers.hpp @@ -44,6 +44,23 @@ class ReLULayer : public NeuronLayer<Dtype> { const bool propagate_down, vector<Blob<Dtype>*>* bottom); }; +template <typename Dtype> +class TanHLayer : public NeuronLayer<Dtype> { + public: + explicit TanHLayer(const LayerParameter& param) + : NeuronLayer<Dtype>(param) {} + + protected: + virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom, + vector<Blob<Dtype>*>* top); + virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom, + vector<Blob<Dtype>*>* top); + + virtual Dtype Backward_cpu(const vector<Blob<Dtype>*>& top, + const bool propagate_down, vector<Blob<Dtype>*>* bottom); + virtual Dtype Backward_gpu(const vector<Blob<Dtype>*>& top, + const bool propagate_down, vector<Blob<Dtype>*>* bottom); +}; template <typename Dtype> class SigmoidLayer : public NeuronLayer<Dtype> { diff --git a/src/caffe/layer_factory.cpp b/src/caffe/layer_factory.cpp index b62ba383..cb65e8f7 100644 --- a/src/caffe/layer_factory.cpp +++ b/src/caffe/layer_factory.cpp @@ -47,6 +47,8 @@ Layer<Dtype>* GetLayer(const LayerParameter& param) { return new PoolingLayer<Dtype>(param); } else if (type == "relu") { return new ReLULayer<Dtype>(param); + } else if (type == "tanh") { + return new TanHLayer<Dtype>(param); } else if (type == "sigmoid") { return new SigmoidLayer<Dtype>(param); } else if (type == "softmax") { diff --git a/src/caffe/layers/tanh_layer.cu b/src/caffe/layers/tanh_layer.cu new file mode 100644 index 00000000..22e0831a --- /dev/null +++ b/src/caffe/layers/tanh_layer.cu @@ -0,0 +1,97 @@ +// Copyright 2014 Aravindh Mahendran +// TanH neuron activation function layer. Adapted from ReLU layer code written by Yangqing Jia + +#include "caffe/layer.hpp" +#include "caffe/vision_layers.hpp" +#include <algorithm> + +namespace caffe { + +template <typename Dtype> +void TanHLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, + vector<Blob<Dtype>*>* top) { + const Dtype* bottom_data = bottom[0]->cpu_data(); + Dtype* top_data = (*top)[0]->mutable_cpu_data(); + Dtype exp2x; + const int count = bottom[0]->count(); + for (int i = 0; i < count; ++i) { + exp2x = exp(2*bottom_data[i]); + top_data[i] = (exp2x - Dtype(1))/(exp2x + Dtype(1)); + } +} + +template <typename Dtype> +Dtype TanHLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, + const bool propagate_down, + vector<Blob<Dtype>*>* bottom) { + if (propagate_down) { + const Dtype* bottom_data = (*bottom)[0]->cpu_data(); + const Dtype* top_diff = top[0]->cpu_diff(); + Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); + const int count = (*bottom)[0]->count(); + Dtype exp2x; + Dtype tanhx; + for (int i = 0; i < count; ++i) { + exp2x = exp(2*bottom_data[i]); + tanhx = (exp2x - Dtype(1))/(exp2x + Dtype(1)); + bottom_diff[i] = top_diff[i] * (1 - tanhx*tanhx); + } + } + return Dtype(0); +} + +template <typename Dtype> +__global__ void TanHForward(const int n, const Dtype* in, Dtype* out) { + int index = threadIdx.x + blockIdx.x * blockDim.x; + if (index < n) { + Dtype exp2x = exp(2*in[index]); + out[index] = (exp2x - Dtype(1))/(exp2x + Dtype(1)); + } +} + +template <typename Dtype> +void TanHLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom, + vector<Blob<Dtype>*>* top) { + const Dtype* bottom_data = bottom[0]->gpu_data(); + Dtype* top_data = (*top)[0]->mutable_gpu_data(); + const int count = bottom[0]->count(); + TanHForward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>( + count, bottom_data, top_data); + CUDA_POST_KERNEL_CHECK; + // << " count: " << count << " bottom_data: " + // << (unsigned long)bottom_data << " top_data: " << (unsigned long)top_data + // << " blocks: " << CAFFE_GET_BLOCKS(count) + // << " threads: " << CAFFE_CUDA_NUM_THREADS; +} + +template <typename Dtype> +__global__ void TanHBackward(const int n, const Dtype* in_diff, + const Dtype* in_data, Dtype* out_diff) { + int index = threadIdx.x + blockIdx.x * blockDim.x; + if (index < n) { + Dtype exp2x = exp(2*in_data[index]); + Dtype tanhx = (exp2x - Dtype(1))/(exp2x + Dtype(1)); + out_diff[index] = in_diff[index] * (1 - tanhx*tanhx); + } +} + +template <typename Dtype> +Dtype TanHLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top, + const bool propagate_down, + vector<Blob<Dtype>*>* bottom) { + if (propagate_down) { + const Dtype* bottom_data = (*bottom)[0]->gpu_data(); + const Dtype* top_diff = top[0]->gpu_diff(); + Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff(); + const int count = (*bottom)[0]->count(); + TanHBackward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>( + count, top_diff, bottom_data, bottom_diff); + CUDA_POST_KERNEL_CHECK; + } + return Dtype(0); +} + +INSTANTIATE_CLASS(TanHLayer); + + +} // namespace caffe |