From e98023af4a570e3105486b661e4c4d1855c0dd79 Mon Sep 17 00:00:00 2001 From: Patrick Follmann Date: Thu, 29 Dec 2016 14:37:21 +0100 Subject: Add GPU sqrt functions --- include/caffe/util/math_functions.hpp | 3 +++ src/caffe/util/math_functions.cu | 21 +++++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/include/caffe/util/math_functions.hpp b/include/caffe/util/math_functions.hpp index 37abce5e..60a8404a 100644 --- a/include/caffe/util/math_functions.hpp +++ b/include/caffe/util/math_functions.hpp @@ -214,6 +214,9 @@ void caffe_gpu_log(const int n, const Dtype* a, Dtype* y); template void caffe_gpu_powx(const int n, const Dtype* a, const Dtype b, Dtype* y); +template +void caffe_gpu_sqrt(const int n, const Dtype* a, Dtype* y); + // caffe_gpu_rng_uniform with two arguments generates integers in the range // [0, UINT_MAX]. void caffe_gpu_rng_uniform(const int n, unsigned int* r); diff --git a/src/caffe/util/math_functions.cu b/src/caffe/util/math_functions.cu index 6d001026..314e6ba0 100644 --- a/src/caffe/util/math_functions.cu +++ b/src/caffe/util/math_functions.cu @@ -387,6 +387,27 @@ void caffe_gpu_powx(const int N, const double* a, N, a, alpha, y); } +template +__global__ void sqrt_kernel(const int n, const Dtype* a, Dtype* y) { + CUDA_KERNEL_LOOP(index, n) { + y[index] = sqrt(a[index]); + } +} + +template <> +void caffe_gpu_sqrt(const int N, const float* a, float* y) { + // NOLINT_NEXT_LINE(whitespace/operators) + sqrt_kernel<<>>( + N, a, y); +} + +template <> +void caffe_gpu_sqrt(const int N, const double* a, double* y) { + // NOLINT_NEXT_LINE(whitespace/operators) + sqrt_kernel<<>>( + N, a, y); +} + DEFINE_AND_INSTANTIATE_GPU_UNARY_FUNC(sign, y[index] = (Dtype(0) < x[index]) - (x[index] < Dtype(0))); DEFINE_AND_INSTANTIATE_GPU_UNARY_FUNC(sgnbit, y[index] = signbit(x[index])); -- cgit v1.2.3 From e93fcd267582888f960ca48d6e0c2e719d4ea09b Mon Sep 17 00:00:00 2001 From: Patrick Follmann Date: Thu, 29 Dec 2016 14:46:16 +0100 Subject: GPU BatchNormLayer: replace powx with mul and sqrt --- src/caffe/layers/batch_norm_layer.cu | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/caffe/layers/batch_norm_layer.cu b/src/caffe/layers/batch_norm_layer.cu index c21713c8..a35e778e 100644 --- a/src/caffe/layers/batch_norm_layer.cu +++ b/src/caffe/layers/batch_norm_layer.cu @@ -48,14 +48,14 @@ void BatchNormLayer::Forward_gpu(const vector*>& bottom, if (!use_global_stats_) { // compute variance using var(X) = E((X-EX)^2) - caffe_gpu_powx(top[0]->count(), top_data, Dtype(2), + caffe_gpu_mul(top[0]->count(), top[0]->gpu_data(), top[0]->gpu_data(), temp_.mutable_gpu_data()); // (X-EX)^2 caffe_gpu_gemv(CblasNoTrans, channels_ * num, spatial_dim, 1. / (num * spatial_dim), temp_.gpu_data(), spatial_sum_multiplier_.gpu_data(), 0., num_by_chans_.mutable_gpu_data()); - caffe_gpu_gemv(CblasTrans, num, channels_, 1., - num_by_chans_.gpu_data(), batch_sum_multiplier_.gpu_data(), 0., + caffe_gpu_gemv(CblasTrans, num, channels_, Dtype(1.), + num_by_chans_.gpu_data(), batch_sum_multiplier_.gpu_data(), Dtype(0.), variance_.mutable_gpu_data()); // E((X_EX)^2) // compute and save moving average @@ -72,7 +72,7 @@ void BatchNormLayer::Forward_gpu(const vector*>& bottom, // normalize variance caffe_gpu_add_scalar(variance_.count(), eps_, variance_.mutable_gpu_data()); - caffe_gpu_powx(variance_.count(), variance_.gpu_data(), Dtype(0.5), + caffe_gpu_sqrt(variance_.count(), variance_.gpu_data(), variance_.mutable_gpu_data()); // replicate variance to input size -- cgit v1.2.3 From ab3398832964c1ff1bf6b78501e4e43a11f282a1 Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Thu, 13 Apr 2017 13:25:16 -0700 Subject: Add CPU sqrt functions --- include/caffe/util/math_functions.hpp | 3 +++ src/caffe/util/math_functions.cpp | 10 ++++++++++ 2 files changed, 13 insertions(+) diff --git a/include/caffe/util/math_functions.hpp b/include/caffe/util/math_functions.hpp index 60a8404a..e549120a 100644 --- a/include/caffe/util/math_functions.hpp +++ b/include/caffe/util/math_functions.hpp @@ -52,6 +52,9 @@ void caffe_scal(const int N, const Dtype alpha, Dtype *X); template void caffe_sqr(const int N, const Dtype* a, Dtype* y); +template +void caffe_sqrt(const int N, const Dtype* a, Dtype* y); + template void caffe_add(const int N, const Dtype* a, const Dtype* b, Dtype* y); diff --git a/src/caffe/util/math_functions.cpp b/src/caffe/util/math_functions.cpp index 71c02274..59625bc0 100644 --- a/src/caffe/util/math_functions.cpp +++ b/src/caffe/util/math_functions.cpp @@ -196,6 +196,16 @@ void caffe_sqr(const int n, const double* a, double* y) { vdSqr(n, a, y); } +template <> +void caffe_sqrt(const int n, const float* a, float* y) { + vsSqrt(n, a, y); +} + +template <> +void caffe_sqrt(const int n, const double* a, double* y) { + vdSqrt(n, a, y); +} + template <> void caffe_exp(const int n, const float* a, float* y) { vsExp(n, a, y); -- cgit v1.2.3 From 1c15d94f7da736945450e6ed321077f3045445b1 Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Thu, 13 Apr 2017 13:26:16 -0700 Subject: CPU BatchNormLayer: replace powx with sqr and sqrt --- src/caffe/layers/batch_norm_layer.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/caffe/layers/batch_norm_layer.cpp b/src/caffe/layers/batch_norm_layer.cpp index 0a08ed4c..c6a1d5b1 100644 --- a/src/caffe/layers/batch_norm_layer.cpp +++ b/src/caffe/layers/batch_norm_layer.cpp @@ -124,8 +124,8 @@ void BatchNormLayer::Forward_cpu(const vector*>& bottom, if (!use_global_stats_) { // compute variance using var(X) = E((X-EX)^2) - caffe_powx(top[0]->count(), top_data, Dtype(2), - temp_.mutable_cpu_data()); // (X-EX)^2 + caffe_sqr(top[0]->count(), top_data, + temp_.mutable_cpu_data()); // (X-EX)^2 caffe_cpu_gemv(CblasNoTrans, channels_ * num, spatial_dim, 1. / (num * spatial_dim), temp_.cpu_data(), spatial_sum_multiplier_.cpu_data(), 0., @@ -148,7 +148,7 @@ void BatchNormLayer::Forward_cpu(const vector*>& bottom, // normalize variance caffe_add_scalar(variance_.count(), eps_, variance_.mutable_cpu_data()); - caffe_powx(variance_.count(), variance_.cpu_data(), Dtype(0.5), + caffe_sqrt(variance_.count(), variance_.cpu_data(), variance_.mutable_cpu_data()); // replicate variance to input size -- cgit v1.2.3