diff options
Diffstat (limited to 'src/caffe/util/math_functions.cpp')
-rw-r--r-- | src/caffe/util/math_functions.cpp | 135 |
1 files changed, 4 insertions, 131 deletions
diff --git a/src/caffe/util/math_functions.cpp b/src/caffe/util/math_functions.cpp index b989ca2a..36d8877d 100644 --- a/src/caffe/util/math_functions.cpp +++ b/src/caffe/util/math_functions.cpp @@ -2,7 +2,6 @@ #include <boost/math/special_functions/next.hpp> #include <boost/random.hpp> -#include <cublas_v2.h> #include <limits> @@ -35,38 +34,6 @@ void caffe_cpu_gemm<double>(const CBLAS_TRANSPOSE TransA, } template <> -void caffe_gpu_gemm<float>(const CBLAS_TRANSPOSE TransA, - const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, - const float alpha, const float* A, const float* B, const float beta, - float* C) { - // Note that cublas follows fortran order. - int lda = (TransA == CblasNoTrans) ? K : M; - int ldb = (TransB == CblasNoTrans) ? N : K; - cublasOperation_t cuTransA = - (TransA == CblasNoTrans) ? CUBLAS_OP_N : CUBLAS_OP_T; - cublasOperation_t cuTransB = - (TransB == CblasNoTrans) ? CUBLAS_OP_N : CUBLAS_OP_T; - CUBLAS_CHECK(cublasSgemm(Caffe::cublas_handle(), cuTransB, cuTransA, - N, M, K, &alpha, B, ldb, A, lda, &beta, C, N)); -} - -template <> -void caffe_gpu_gemm<double>(const CBLAS_TRANSPOSE TransA, - const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, - const double alpha, const double* A, const double* B, const double beta, - double* C) { - // Note that cublas follows fortran order. - int lda = (TransA == CblasNoTrans) ? K : M; - int ldb = (TransB == CblasNoTrans) ? N : K; - cublasOperation_t cuTransA = - (TransA == CblasNoTrans) ? CUBLAS_OP_N : CUBLAS_OP_T; - cublasOperation_t cuTransB = - (TransB == CblasNoTrans) ? CUBLAS_OP_N : CUBLAS_OP_T; - CUBLAS_CHECK(cublasDgemm(Caffe::cublas_handle(), cuTransB, cuTransA, - N, M, K, &alpha, B, ldb, A, lda, &beta, C, N)); -} - -template <> void caffe_cpu_gemv<float>(const CBLAS_TRANSPOSE TransA, const int M, const int N, const float alpha, const float* A, const float* x, const float beta, float* y) { @@ -81,26 +48,6 @@ void caffe_cpu_gemv<double>(const CBLAS_TRANSPOSE TransA, const int M, } template <> -void caffe_gpu_gemv<float>(const CBLAS_TRANSPOSE TransA, const int M, - const int N, const float alpha, const float* A, const float* x, - const float beta, float* y) { - cublasOperation_t cuTransA = - (TransA == CblasNoTrans) ? CUBLAS_OP_T : CUBLAS_OP_N; - CUBLAS_CHECK(cublasSgemv(Caffe::cublas_handle(), cuTransA, N, M, &alpha, - A, N, x, 1, &beta, y, 1)); -} - -template <> -void caffe_gpu_gemv<double>(const CBLAS_TRANSPOSE TransA, const int M, - const int N, const double alpha, const double* A, const double* x, - const double beta, double* y) { - cublasOperation_t cuTransA = - (TransA == CblasNoTrans) ? CUBLAS_OP_T : CUBLAS_OP_N; - CUBLAS_CHECK(cublasDgemv(Caffe::cublas_handle(), cuTransA, N, M, &alpha, - A, N, x, 1, &beta, y, 1)); -} - -template <> void caffe_axpy<float>(const int N, const float alpha, const float* X, float* Y) { cblas_saxpy(N, alpha, X, 1, Y, 1); } @@ -108,18 +55,6 @@ template <> void caffe_axpy<double>(const int N, const double alpha, const double* X, double* Y) { cblas_daxpy(N, alpha, X, 1, Y, 1); } -template <> -void caffe_gpu_axpy<float>(const int N, const float alpha, const float* X, - float* Y) { - CUBLAS_CHECK(cublasSaxpy(Caffe::cublas_handle(), N, &alpha, X, 1, Y, 1)); -} - -template <> -void caffe_gpu_axpy<double>(const int N, const double alpha, const double* X, - double* Y) { - CUBLAS_CHECK(cublasDaxpy(Caffe::cublas_handle(), N, &alpha, X, 1, Y, 1)); -} - template <typename Dtype> void caffe_set(const int N, const Dtype alpha, Dtype* Y) { if (alpha == 0) { @@ -153,7 +88,11 @@ template <typename Dtype> void caffe_copy(const int N, const Dtype* X, Dtype* Y) { if (X != Y) { if (Caffe::mode() == Caffe::GPU) { +#ifndef CPU_ONLY CUDA_CHECK(cudaMemcpy(Y, X, sizeof(Dtype) * N, cudaMemcpyDefault)); +#else + NO_GPU; +#endif } else { memcpy(Y, X, sizeof(Dtype) * N); } @@ -166,12 +105,6 @@ template void caffe_copy<unsigned int>(const int N, const unsigned int* X, template void caffe_copy<float>(const int N, const float* X, float* Y); template void caffe_copy<double>(const int N, const double* X, double* Y); -void caffe_gpu_memcpy(const size_t N, const void* X, void* Y) { - if (X != Y) { - CUDA_CHECK(cudaMemcpy(Y, X, N, cudaMemcpyDefault)); - } -} - template <> void caffe_scal<float>(const int N, const float alpha, float *X) { cblas_sscal(N, alpha, X, 1); @@ -183,30 +116,6 @@ void caffe_scal<double>(const int N, const double alpha, double *X) { } template <> -void caffe_gpu_scal<float>(const int N, const float alpha, float *X) { - CUBLAS_CHECK(cublasSscal(Caffe::cublas_handle(), N, &alpha, X, 1)); -} - -template <> -void caffe_gpu_scal<double>(const int N, const double alpha, double *X) { - CUBLAS_CHECK(cublasDscal(Caffe::cublas_handle(), N, &alpha, X, 1)); -} - -template <> -void caffe_gpu_axpby<float>(const int N, const float alpha, const float* X, - const float beta, float* Y) { - caffe_gpu_scal<float>(N, beta, Y); - caffe_gpu_axpy<float>(N, alpha, X, Y); -} - -template <> -void caffe_gpu_axpby<double>(const int N, const double alpha, const double* X, - const double beta, double* Y) { - caffe_gpu_scal<double>(N, beta, Y); - caffe_gpu_axpy<double>(N, alpha, X, Y); -} - -template <> void caffe_cpu_axpby<float>(const int N, const float alpha, const float* X, const float beta, float* Y) { cblas_saxpby(N, alpha, X, 1, beta, Y, 1); @@ -408,18 +317,6 @@ double caffe_cpu_dot<double>(const int n, const double* x, const double* y) { } template <> -void caffe_gpu_dot<float>(const int n, const float* x, const float* y, - float* out) { - CUBLAS_CHECK(cublasSdot(Caffe::cublas_handle(), n, x, 1, y, 1, out)); -} - -template <> -void caffe_gpu_dot<double>(const int n, const double* x, const double* y, - double * out) { - CUBLAS_CHECK(cublasDdot(Caffe::cublas_handle(), n, x, 1, y, 1, out)); -} - -template <> int caffe_cpu_hamming_distance<float>(const int n, const float* x, const float* y) { int dist = 0; @@ -451,16 +348,6 @@ double caffe_cpu_asum<double>(const int n, const double* x) { return cblas_dasum(n, x, 1); } -template <> -void caffe_gpu_asum<float>(const int n, const float* x, float* y) { - CUBLAS_CHECK(cublasSasum(Caffe::cublas_handle(), n, x, 1, y)); -} - -template <> -void caffe_gpu_asum<double>(const int n, const double* x, double* y) { - CUBLAS_CHECK(cublasDasum(Caffe::cublas_handle(), n, x, 1, y)); -} - INSTANTIATE_CAFFE_CPU_UNARY_FUNC(sign); INSTANTIATE_CAFFE_CPU_UNARY_FUNC(sgnbit); INSTANTIATE_CAFFE_CPU_UNARY_FUNC(fabs); @@ -479,18 +366,4 @@ void caffe_cpu_scale<double>(const int n, const double alpha, const double *x, cblas_dscal(n, alpha, y, 1); } -template <> -void caffe_gpu_scale<float>(const int n, const float alpha, const float *x, - float* y) { - CUBLAS_CHECK(cublasScopy(Caffe::cublas_handle(), n, x, 1, y, 1)); - CUBLAS_CHECK(cublasSscal(Caffe::cublas_handle(), n, &alpha, y, 1)); -} - -template <> -void caffe_gpu_scale<double>(const int n, const double alpha, const double *x, - double* y) { - CUBLAS_CHECK(cublasDcopy(Caffe::cublas_handle(), n, x, 1, y, 1)); - CUBLAS_CHECK(cublasDscal(Caffe::cublas_handle(), n, &alpha, y, 1)); -} - } // namespace caffe |