#include "caffe2/operators/pow_op.h" #include "caffe2/utils/eigen_utils.h" #include "caffe2/utils/math.h" // definition of NumericTypes and SameTypeAsInput is in below header file //#include "caffe2/operators/elementwise_op.h" #include namespace caffe2 { #define EIGEN_POW(x, y) (x.pow(y)) struct EigenPowFunctor { template inline void Run(size_t n, const T1* a, const T2* b, T2 e, R* out, CPUContext*) { if (b == NULL) { EigenVectorArrayMap(out, n) = EIGEN_POW((ConstEigenVectorArrayMap(a, n)), (e)); } else { if (b_is_scalar) { EigenVectorArrayMap(out, n) = EIGEN_POW((ConstEigenVectorArrayMap(a, n)), (b[0])); } else { EigenVectorArrayMap(out, n) = EIGEN_POW( (ConstEigenVectorArrayMap(a, n)), (ConstEigenVectorArrayMap(b, n))); } } } template void RunWithBroadcast( const T1* a, const T2* b, R* out, size_t pre, size_t n, CPUContext*) { EigenArrayMap(out, n, pre) = EIGEN_POW( (ConstEigenArrayMap(a, n, pre)), (ConstEigenVectorArrayMap(b, n)).rowwise().replicate(pre)); /* //below code only allows elementary ops, such as +, -, * and /, //and does not allow operations, such as pow, exp and log EIGEN_POW( (ConstEigenArrayMap(a, n, pre).colwise()), (ConstEigenVectorArrayMap(b, n))); */ } template void RunWithBroadcast2( const T1* a, const T2* b, R* out, size_t pre, size_t n, size_t post, CPUContext*) { for (int i = 0; i < pre; ++i) { EigenArrayMap(out + i * n * post, post, n) = EIGEN_POW( (ConstEigenArrayMap(a + i * n * post, post, n)), (Eigen::Map>(b, n)) .colwise() .replicate(post)); /* //below code only allows elementary ops, such as +, -, * and /, //and does not allow for operations, such as pow, exp and log EIEGN_POW( (ConstEigenArrayMap(a + i * n * post, post, n).rowwise()), (Eigen::Map>(b, n))); */ } } }; REGISTER_CPU_OPERATOR( Pow, PowOp< TensorTypes /*NumericTypes*/, CPUContext, EigenPowFunctor, SameTypeAsInput>) OPERATOR_SCHEMA(Pow) .NumInputs(1, 2) .NumOutputs(1) .AllowInplace({{0, 0}, {1, 0}}) .IdenticalTypeAndShapeOfInput(0) .SetDoc(R"DOC( The *Pow* op takes an input data tensor $X$ and an exponent parameter *exponent*, which can be a scalar or another tensor. As output, it produces a single output data tensor $Y$, where the function $f(x) = x^{exponent}$ has been applied to $X$ elementwise. Github Links: - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/pow_op.h - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/pow_op.cc
Example **Code** ``` workspace.ResetWorkspace() op = core.CreateOperator( "Pow", ["X", "exponent"], ["Y"], broadcast=1 ) workspace.FeedBlob("X", np.array([1,2,3,4,5,6]).astype(np.float32)) print("X: ", workspace.FetchBlob("X")) workspace.FeedBlob("exponent", np.array([2]).astype(np.float32)) print("exponent: ", workspace.FetchBlob("exponent")) workspace.RunOperatorOnce(op) print("Y: ", workspace.FetchBlob("Y")) ``` **Result** ``` X: [1. 2. 3. 4. 5. 6.] exponent: [2.] Y: [ 1. 4. 9. 16. 25. 36.] ```
)DOC") .Input(0, "X", "Input data blob to be operated on.") .Input(1, "exponent", "Exponent blob containing the exponent(s) for calculation. Do not use if setting exponent via argument.") .Output(0, "Y", "Output data blob with the same shape as the input.") .Arg("exponent", "The exponent of the power function. Do not use if setting exponent via input.") .Arg("axis", "*(type: int; default: -1)*") .Arg("broadcast", "*(type: bool; default: False)*"); class GetPowGradient : public GradientMakerBase { using GradientMakerBase::GradientMakerBase; vector GetGradientDefs() override { ArgumentHelper arg_helper(def_); if (arg_helper.HasArgument("exponent")) { // second input is a scalar // function f(w,a) = w^a // gradient operator with respect to first input tensor // df/dw = a * w^(a-1) (all operations are component-wise) float exponent = arg_helper.GetSingleArgument("exponent", 0.0); Argument scale_arg; scale_arg.set_name("scale"); scale_arg.set_f(exponent); Argument pow_arg; pow_arg.set_name("exponent"); if (I(0) != O(0)) { pow_arg.set_f(exponent - 1); } else { LOG(WARNING) << "In-place Pow gradient, possible loss of precision"; constexpr float kEps = 1e-12f; CAFFE_ENFORCE(std::fabs(exponent) > kEps); pow_arg.set_f((exponent - 1) / exponent); } return vector{CreateOperatorDef( "Pow", "", std::vector{I(0)}, std::vector{GI(0)}, std::vector{pow_arg}), CreateOperatorDef( "Mul", "", std::vector{GI(0), GO(0)}, std::vector{GI(0)}), CreateOperatorDef( "Scale", "", std::vector{GI(0)}, std::vector{GI(0)}, std::vector{scale_arg})}; /* // Alternative gradient computation return vector{CreateOperatorDef( "Div", "", std::vector{O(0), I(0)}, std::vector{GI(0)}), CreateOperatorDef( "Mul", "", std::vector{GI(0), GO(0)}, std::vector{GI(0)}), CreateOperatorDef( "Scale", "", std::vector{GI(0)}, std::vector{GI(0)}, std::vector{scale_arg})}; */ } else { // second input is a tensor CAFFE_ENFORCE( Def().input(0) != Def().output(0) && Def().input(1) != Def().output(0), "Gradient computation cannot be carried out if Pow uses in-place " "computation: ", ProtoDebugString(Def())); vector grad_ops; Argument one_arg; one_arg.set_name("value"); one_arg.set_f(1); Argument broadcast, axis, axis_str, order; bool bflag = ArgumentHelper::HasArgument(Def(), "broadcast"); if (bflag) { if (ArgumentHelper::HasArgument(Def(), "broadcast")) { broadcast = GetArgument(Def(), "broadcast"); } else { broadcast = MakeArgument("broadcast", 0); } if (ArgumentHelper::HasArgument(Def(), "axis")) { axis = GetArgument(Def(), "axis"); } else { axis = MakeArgument("axis", -1); } if (ArgumentHelper::HasArgument(Def(), "axis_str")) { axis_str = GetArgument(Def(), "axis_str"); } else { axis_str = MakeArgument("axis_str", ""); } if (ArgumentHelper::HasArgument(Def(), "order")) { order = GetArgument(Def(), "order"); } else { order = MakeArgument("order", "NCHW"); } } // function f(w,a) = w^a // gradient operator with respect to first input tensor // df/dw = a * w^(a-1) (all operations are component-wise) grad_ops.push_back(CreateOperatorDef( "ConstantFill", "", std::vector{I(1)}, std::vector{GI(1)}, std::vector{one_arg})); grad_ops.push_back(CreateOperatorDef( "Sub", "", std::vector{I(1), GI(1)}, std::vector{GI(1)})); if (bflag) { grad_ops.push_back(CreateOperatorDef( "Pow", "", std::vector{I(0), GI(1)}, std::vector{GI(0)}, vector{broadcast, axis, axis_str, order})); } else { grad_ops.push_back(CreateOperatorDef( "Pow", "", std::vector{I(0), GI(1)}, std::vector{GI(0)})); } grad_ops.push_back(CreateOperatorDef( "Mul", "", std::vector{GI(0), GO(0)}, std::vector{GI(0)})); if (bflag) { grad_ops.push_back(CreateOperatorDef( "Mul", "", std::vector{GI(0), I(1)}, std::vector{GI(0)}, vector{broadcast, axis, axis_str, order})); } else { grad_ops.push_back(CreateOperatorDef( "Mul", "", std::vector{GI(0), I(1)}, std::vector{GI(0)})); } /* // Alternative gradient computation (no broadcast support) grad_ops.push_back(CreateOperatorDef( "Div", "", std::vector{O(0), I(0)}, std::vector{GI(0)})); grad_ops.push_back(CreateOperatorDef( "Mul", "", std::vector{GI(0), GO(0)}, std::vector{GI(0)})); grad_ops.push_back(CreateOperatorDef( "Mul", "", std::vector{GI(0), I(1)}, std::vector{GI(0)})); */ // gradient operator for with respect to second input tensor // df/da = w^a * ln w (all operations are component-wise) /* // reset GI(1) to zero Argument zero_arg; zero_arg.set_name("value"); zero_arg.set_f(0); grad_ops.push_back(CreateOperatorDef( "ConstantFill", "", std::vector{I(1)}, std::vector{GI(1)}, std::vector{zero_arg})); */ grad_ops.push_back(CreateOperatorDef( "Log", "", std::vector{I(0)}, std::vector{GI(1) + "_autogen_pre_red"})); grad_ops.push_back(CreateOperatorDef( "Mul", "", std::vector{GI(1) + "_autogen_pre_red", O(0)}, std::vector{GI(1) + "_autogen_pre_red"})); if (bflag) { grad_ops.push_back(CreateOperatorDef( "Mul", "", std::vector{GI(1) + "_autogen_pre_red", GO(0)}, std::vector{GI(1) + "_autogen_pre_red"})); grad_ops.push_back(CreateOperatorDef( "SumReduceLike", "", vector{GI(1) + "_autogen_pre_red", I(1)}, vector{GI(1)}, vector{axis, axis_str, order})); } else { grad_ops.push_back(CreateOperatorDef( "Mul", "", std::vector{GI(1) + "_autogen_pre_red", GO(0)}, std::vector{GI(1)})); } return grad_ops; } } // Argument `shape` is no longer needed in backprop. bool CopyArguments() const override { return false; } }; REGISTER_GRADIENT(Pow, GetPowGradient); } // namespace caffe2