#include "caffe2/operators/atan_op.h" #include #include #include "caffe2/core/context_gpu.h" namespace caffe2 { namespace { template __global__ void AtanGradientCUDAKernel(const int N, const T* dY, const T* X, T* dX) { CUDA_1D_KERNEL_LOOP(i, N) { #if __CUDA_ARCH__ >= 350 dX[i] = __ldg(dY + i) / (T(1) + __ldg(X + i) * __ldg(X + i)); #else dX[i] = dY[i] / (T(1) + X[i] * X[i]); #endif } } } // namespace template <> template bool AtanGradientFunctor::Forward( const std::vector& X_dims, const std::vector& /* dY_dims */, const T* X, const T* dY, T* dX, CUDAContext* context) const { const int size = std::accumulate( X_dims.cbegin(), X_dims.cend(), 1, std::multiplies()); AtanGradientCUDAKernel <<cuda_stream()>>>(size, dY, X, dX); return true; } REGISTER_CUDA_OPERATOR( Atan, UnaryElementwiseOp< TensorTypes, CUDAContext, AtanFunctor>); REGISTER_CUDA_OPERATOR( AtanGradient, BinaryElementwiseOp< TensorTypes, CUDAContext, AtanGradientFunctor>); } // namespace caffe2