#include "caffe2/operators/asin_op.h" #include #include #include "caffe2/core/context_gpu.h" namespace caffe2 { namespace { __global__ void AsinGradientCUDAKernel( const int N, const float* dY, const float* X, float* dX) { CUDA_1D_KERNEL_LOOP(i, N) { #if __CUDA_ARCH__ >= 350 dX[i] = __ldg(dY + i) * rsqrtf(1.0f - __ldg(X + i) * __ldg(X + i)); #else dX[i] = dY[i] * rsqrtf(1.0f - X[i] * X[i]); #endif } } } // namespace template <> template bool AsinGradientFunctor::Forward( const std::vector& X_dims, const std::vector& /* dY_dims */, const T* X, const T* dY, T* dX, CUDAContext* context) const { const int size = std::accumulate( X_dims.cbegin(), X_dims.cend(), 1, std::multiplies()); AsinGradientCUDAKernel<<< CAFFE_GET_BLOCKS(size), CAFFE_CUDA_NUM_THREADS, 0, context->cuda_stream()>>>(size, dY, X, dX); return true; } REGISTER_CUDA_OPERATOR( Asin, UnaryElementwiseOp< TensorTypes, CUDAContext, AsinFunctor>); REGISTER_CUDA_OPERATOR( AsinGradient, BinaryElementwiseOp< TensorTypes, CUDAContext, AsinGradientFunctor>); } // namespace caffe2