#include "caffe2/operators/elu_op.h" #include #include #include #include "caffe2/utils/eigen_utils.h" namespace caffe2 { template <> template bool EluFunctor:: operator()(const int N, const T* X, T* Y, CPUContext* /* context */) const { ConstEigenVectorArrayMap X_arr(X, N); EigenVectorMap(Y, N) = (X_arr < 0).select(alpha * (X_arr.exp() - T(1)), X_arr); return true; } template <> template bool EluGradientFunctor::Forward( const std::vector& Y_dims, const std::vector& /* dY_dims */, const T* Y, const T* dY, T* dX, CPUContext* /* context */) const { const int size = std::accumulate( Y_dims.cbegin(), Y_dims.cend(), 1, std::multiplies()); ConstEigenVectorArrayMap Y_arr(Y, size); ConstEigenVectorArrayMap dY_arr(dY, size); EigenVectorArrayMap(dX, size) = (Y_arr < 0).select(dY_arr * (Y_arr + alpha), dY_arr); return true; } REGISTER_CPU_OPERATOR( Elu, UnaryElementwiseWithArgsOp< TensorTypes, CPUContext, EluFunctor>); REGISTER_CPU_GRADIENT_OPERATOR( EluGradient, BinaryElementwiseWithArgsOp< TensorTypes, CPUContext, EluGradientFunctor>); // Input: X, output: Y OPERATOR_SCHEMA(Elu) .NumInputs(1) .NumOutputs(1) .AllowInplace({{0, 0}}) .IdenticalTypeAndShape() .SetDoc(R"DOC( This op implements the exponential linear unit (ELU) activation function as described in [Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs)](https://arxiv.org/abs/1511.07289). The op takes an input tensor $X$ of arbitrary shape, computes the elementwise elu operation, and returns a vector $Y$ of the same shape as output. The alpha parameter may be passed as an argument, but defaults to 1. The elu operation is defined as $$y=f(x) =\begin{cases}\alpha(e^x-1) & x < 0 \\ x & otherwise\end{cases}$$ Github Links: - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/elu_op.h - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/elu_op.cc
Example **Code** ``` workspace.ResetWorkspace() op = core.CreateOperator( "Elu", ["X"], ["Y"], alpha=1.1 ) workspace.FeedBlob("X", np.random.randn(3, 3).astype(np.float32)) print("X:\n", workspace.FetchBlob("X"), "\n") workspace.RunOperatorOnce(op) print("Y:\n", workspace.FetchBlob("Y")) ``` **Result** ``` X: [[ 0.35339102 1.1860217 -0.10710736] [-3.1173866 -0.1889988 -0.20330353] [ 1.8525308 -0.368949 0.506277 ]] Y: [[ 0.35339102 1.1860217 -0.11172786] [-1.0513 -0.18943374 -0.20236646] [ 1.8525308 -0.33939326 0.506277 ]] ```
)DOC") .Input(0, "X", "1D input tensor of data to be operated on.") .Output(0, "Y", "1D input tensor, calculated as described above.") .Arg( "alpha", "*(type: float; default: 1.0)* Defines alpha parameter used in calculation.") .InheritOnnxSchema(); // Input: Y, dY, output: dX GRADIENT_OPERATOR_SCHEMA(EluGradient) .NumInputs(2) .NumOutputs(1) .AllowInplace({{1, 0}}) .SetDoc(R"DOC( EluGradient takes both Y and dY and uses this to update dX according to the chain rule and derivatives of the rectified linear function. )DOC"); namespace { class GetEluGradient : public GradientMakerBase { using GradientMakerBase::GradientMakerBase; std::vector GetGradientDefs() override { return SingleGradientDef( def_.type() + "Gradient", "", std::vector{O(0), GO(0)}, std::vector{GI(0)}); } }; } // namespace REGISTER_GRADIENT(Elu, GetEluGradient); } // namespace caffe2