summaryrefslogtreecommitdiff
path: root/caffe2/utils/math/elementwise.cc
diff options
context:
space:
mode:
Diffstat (limited to 'caffe2/utils/math/elementwise.cc')
-rw-r--r--caffe2/utils/math/elementwise.cc273
1 files changed, 195 insertions, 78 deletions
diff --git a/caffe2/utils/math/elementwise.cc b/caffe2/utils/math/elementwise.cc
index ba6fb96338..f392829ccc 100644
--- a/caffe2/utils/math/elementwise.cc
+++ b/caffe2/utils/math/elementwise.cc
@@ -1,5 +1,8 @@
#include "caffe2/utils/math/elementwise.h"
+#include <algorithm>
+#include <functional>
+
#ifdef CAFFE2_USE_MKL
#include <mkl.h>
#endif // CAFFE2_USE_MKL
@@ -12,7 +15,7 @@ namespace math {
////////////////////////////////////////////////////////////////////////////////
// MKL VML alternatives.
-// Depending on whether we are using MKL, we will delegate the Caffe math
+// Depending on whether we are using MKL, we will delegate the Caffe2 math
// functions that are VML-related to either the VML call or the Eigen
// implementation. If you are setting the flags (such as AVX) right for your CPU
// architecture, usually Eigen will deliver a throughput as fast as the VML
@@ -90,6 +93,22 @@ DELEGATE_POWX_FUNCTION(float, vsPowx)
DELEGATE_POWX_FUNCTION(double, vdPowx)
#undef DELEGATE_POWX_FUNCTION
+#define DELEGATE_SIMPLE_BINARY_FUNCTION(T, Func, MKLFunc) \
+ template <> \
+ C10_EXPORT void Func<T, CPUContext>( \
+ const int N, const T* A, const T* B, T* C, CPUContext* /* context */) { \
+ MKLFunc(N, A, B, C); \
+ }
+DELEGATE_SIMPLE_BINARY_FUNCTION(float, Add, vsAdd)
+DELEGATE_SIMPLE_BINARY_FUNCTION(double, Add, vdAdd)
+DELEGATE_SIMPLE_BINARY_FUNCTION(float, Sub, vsSub)
+DELEGATE_SIMPLE_BINARY_FUNCTION(double, Sub, vdSub)
+DELEGATE_SIMPLE_BINARY_FUNCTION(float, Mul, vsMul)
+DELEGATE_SIMPLE_BINARY_FUNCTION(double, Mul, vdMul)
+DELEGATE_SIMPLE_BINARY_FUNCTION(float, Div, vsDiv)
+DELEGATE_SIMPLE_BINARY_FUNCTION(double, Div, vdDiv)
+#undef DELEGATE_SIMPLE_BINARY_FUNCTION
+
#else // CAFFE2_USE_MKL
#define DELEGATE_SIMPLE_UNARY_FUNCTION(T, Func, EigenFunc) \
@@ -127,68 +146,85 @@ DELEGATE_SIMPLE_UNARY_FUNCTION(float, Inv, inverse)
DELEGATE_SIMPLE_UNARY_FUNCTION(double, Inv, inverse)
#undef DELEGATE_SIMPLE_UNARY_FUNCTION
-#define DELEGATE_SINH_FUNCTION(T) \
+#define CAFFE2_SPECIALIZED_SINH(T) \
template <> \
C10_EXPORT void Sinh<T, CPUContext>( \
const int N, const T* X, T* Y, CPUContext* /* context */) { \
ConstEigenVectorArrayMap<T> X_arr(X, N); \
EigenVectorArrayMap<T>(Y, N) = (X_arr.exp() - (-X_arr).exp()) / T(2); \
}
-DELEGATE_SINH_FUNCTION(float)
-DELEGATE_SINH_FUNCTION(double)
-#undef DELEGATE_SINH_FUNCTION
+CAFFE2_SPECIALIZED_SINH(float)
+CAFFE2_SPECIALIZED_SINH(double)
+#undef CAFFE2_SPECIALIZED_SINH
-#define DELEGATE_COSH_FUNCTION(T) \
+#define CAFFE2_SPECIALIZED_COSH(T) \
template <> \
C10_EXPORT void Cosh<T, CPUContext>( \
const int N, const T* X, T* Y, CPUContext* /* context */) { \
ConstEigenVectorArrayMap<T> X_arr(X, N); \
EigenVectorArrayMap<T>(Y, N) = (X_arr.exp() + (-X_arr).exp()) / T(2); \
}
-DELEGATE_COSH_FUNCTION(float)
-DELEGATE_COSH_FUNCTION(double)
-#undef DELEGATE_COSH_FUNCTION
+CAFFE2_SPECIALIZED_COSH(float)
+CAFFE2_SPECIALIZED_COSH(double)
+#undef CAFFE2_SPECIALIZED_COSH
-#define DELEGATE_SINCOS_FUNCTION(T) \
+#define CAFFE2_SPECIALIZED_SINCOS(T) \
template <> \
C10_EXPORT void SinCos<T, CPUContext>( \
const int N, const T* X, T* S, T* C, CPUContext* /* context */) { \
EigenVectorArrayMap<T>(S, N) = ConstEigenVectorArrayMap<T>(X, N).sin(); \
EigenVectorArrayMap<T>(C, N) = ConstEigenVectorArrayMap<T>(X, N).cos(); \
}
-DELEGATE_SINCOS_FUNCTION(float)
-DELEGATE_SINCOS_FUNCTION(double)
-#undef DELEGATE_SINCOS_FUNCTION
+CAFFE2_SPECIALIZED_SINCOS(float)
+CAFFE2_SPECIALIZED_SINCOS(double)
+#undef CAFFE2_SPECIALIZED_SINCOS
-#define DELEGATE_POWX_FUNCTION(T) \
+#define CAFFE2_SPECIALIZED_POWX(T) \
template <> \
C10_EXPORT void Powx<T, CPUContext>( \
const int N, const T* A, const T b, T* Y, CPUContext* /* context */) { \
EigenVectorArrayMap<T>(Y, N) = ConstEigenVectorArrayMap<T>(A, N).pow(b); \
}
-DELEGATE_POWX_FUNCTION(float)
-DELEGATE_POWX_FUNCTION(double)
-#undef DELEGATE_POWX_FUNCTION
+CAFFE2_SPECIALIZED_POWX(float)
+CAFFE2_SPECIALIZED_POWX(double)
+#undef CAFFE2_SPECIALIZED_POWX
-#define DELEGATE_CBRT_FUNCTION(T) \
+#define CAFFE2_SPECIALIZED_CBRT(T) \
template <> \
C10_EXPORT void Cbrt<T, CPUContext>( \
const int N, const T* X, T* Y, CPUContext* /* context */) { \
std::transform(X, X + N, Y, [](const T x) { return cbrt(x); }); \
}
-DELEGATE_CBRT_FUNCTION(float)
-DELEGATE_CBRT_FUNCTION(double)
-#undef DELEGATE_CBRT_FUNCTION
+CAFFE2_SPECIALIZED_CBRT(float)
+CAFFE2_SPECIALIZED_CBRT(double)
+#undef CAFFE2_SPECIALIZED_CBRT
-#define DELEGATE_ERF_FUNCTION(T) \
+#define CAFFE2_SPECIALIZED_ERF(T) \
template <> \
C10_EXPORT void Erf<T, CPUContext>( \
const int N, const T* X, T* Y, CPUContext* /* context */) { \
std::transform(X, X + N, Y, [](const T x) { return erf(x); }); \
}
-DELEGATE_ERF_FUNCTION(float)
-DELEGATE_ERF_FUNCTION(double)
-#undef DELEGATE_ERF_FUNCTION
+CAFFE2_SPECIALIZED_ERF(float)
+CAFFE2_SPECIALIZED_ERF(double)
+#undef CAFFE2_SPECIALIZED_ERF
+
+#define DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_OPERATOR(T, Func, EigenOp) \
+ template <> \
+ C10_EXPORT void Func<T, CPUContext>( \
+ const int N, const T* A, const T* B, T* C, CPUContext* /* context */) { \
+ EigenVectorMap<T>(C, N) = ConstEigenVectorArrayMap<T>(A, N) \
+ EigenOp ConstEigenVectorArrayMap<T>(B, N); \
+ }
+DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_OPERATOR(float, Add, +)
+DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_OPERATOR(double, Add, +)
+DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_OPERATOR(float, Sub, -)
+DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_OPERATOR(double, Sub, -)
+DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_OPERATOR(float, Mul, *)
+DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_OPERATOR(double, Mul, *)
+DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_OPERATOR(float, Div, /)
+DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_OPERATOR(double, Div, /)
+#undef DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_OPERATOR
#endif // CAFFE2_USE_MKL
@@ -202,74 +238,155 @@ DELEGATE_ERF_FUNCTION(double)
// Eigen's Tanh implementation is faster than MKL, so use Eigen here.
DELEGATE_SIMPLE_UNARY_FUNCTION(float, Tanh, tanh)
DELEGATE_SIMPLE_UNARY_FUNCTION(double, Tanh, tanh)
-DELEGATE_SIMPLE_UNARY_FUNCTION(float, Sign, sign)
-DELEGATE_SIMPLE_UNARY_FUNCTION(double, Sign, sign)
DELEGATE_SIMPLE_UNARY_FUNCTION(std::int32_t, Sign, sign)
DELEGATE_SIMPLE_UNARY_FUNCTION(std::int64_t, Sign, sign)
+DELEGATE_SIMPLE_UNARY_FUNCTION(float, Sign, sign)
+DELEGATE_SIMPLE_UNARY_FUNCTION(double, Sign, sign)
DELEGATE_SIMPLE_UNARY_FUNCTION(std::int32_t, Abs, abs)
DELEGATE_SIMPLE_UNARY_FUNCTION(std::int64_t, Abs, abs)
-DELEGATE_SIMPLE_UNARY_FUNCTION(float, Cube, cube)
-DELEGATE_SIMPLE_UNARY_FUNCTION(double, Cube, cube)
DELEGATE_SIMPLE_UNARY_FUNCTION(std::int32_t, Cube, cube)
DELEGATE_SIMPLE_UNARY_FUNCTION(std::int64_t, Cube, cube)
+DELEGATE_SIMPLE_UNARY_FUNCTION(float, Cube, cube)
+DELEGATE_SIMPLE_UNARY_FUNCTION(double, Cube, cube)
#undef DELEGATE_SIMPLE_UNARY_FUNCTION
-#define DELEGATE_NEG_FUNCTION(T) \
+#define CAFFE2_SPECIALIZED_NEG(T) \
template <> \
C10_EXPORT void Neg<T, CPUContext>( \
const int N, const T* X, T* Y, CPUContext* /* context */) { \
EigenVectorArrayMap<T>(Y, N) = -ConstEigenVectorArrayMap<T>(X, N); \
}
-DELEGATE_NEG_FUNCTION(float)
-DELEGATE_NEG_FUNCTION(double)
-DELEGATE_NEG_FUNCTION(std::int32_t)
-DELEGATE_NEG_FUNCTION(std::int64_t)
-#undef DELEGATE_NEG_FUNCTION
+CAFFE2_SPECIALIZED_NEG(std::int32_t)
+CAFFE2_SPECIALIZED_NEG(std::int64_t)
+CAFFE2_SPECIALIZED_NEG(float)
+CAFFE2_SPECIALIZED_NEG(double)
+#undef CAFFE2_SPECIALIZED_NEG
+
+#define DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_OPERATOR(T, Func, EigenOp) \
+ template <> \
+ C10_EXPORT void Func<T, CPUContext>( \
+ const int N, const T* A, const T* B, T* C, CPUContext* /* context */) { \
+ EigenVectorMap<T>(C, N) = ConstEigenVectorArrayMap<T>(A, N) \
+ EigenOp ConstEigenVectorArrayMap<T>(B, N); \
+ }
+DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_OPERATOR(std::int32_t, Add, +)
+DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_OPERATOR(std::int64_t, Add, +)
+DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_OPERATOR(std::int32_t, Sub, -)
+DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_OPERATOR(std::int64_t, Sub, -)
+DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_OPERATOR(std::int32_t, Mul, *)
+DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_OPERATOR(std::int64_t, Mul, *)
+DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_OPERATOR(std::int32_t, Div, /)
+DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_OPERATOR(std::int64_t, Div, /)
+#undef DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_OPERATOR
-#define CAFFE2_SPECIALIZED_AFFINE_CHANNEL(T) \
- template <> \
- void AffineChannel<T, CPUContext, StorageOrder::NCHW>( \
- const int N, \
- const int C, \
- const int HxW, \
- const T* X, \
- const T* scale, \
- const T* bias, \
- T* Y, \
- CPUContext* /* context */) { \
- ConstEigenVectorArrayMap<T> scale_arr(scale, C); \
- ConstEigenVectorArrayMap<T> bias_arr(bias, C); \
- const int stride = C * HxW; \
- const T* X_ptr = X; \
- T* Y_ptr = Y; \
- for (int i = 0; i < N; ++i) { \
- EigenArrayMap<T>(Y_ptr, HxW, C) = \
- (ConstEigenArrayMap<T>(X_ptr, HxW, C).rowwise() * \
- scale_arr.transpose()) \
- .rowwise() + \
- bias_arr.transpose(); \
- X_ptr += stride; \
- Y_ptr += stride; \
- } \
- } \
- template <> \
- void AffineChannel<T, CPUContext, StorageOrder::NHWC>( \
- const int N, \
- const int C, \
- const int HxW, \
- const T* X, \
- const T* scale, \
- const T* bias, \
- T* Y, \
- CPUContext* /* context */) { \
- EigenArrayMap<T>(Y, C, N * HxW) = \
- (ConstEigenArrayMap<T>(X, C, N * HxW).colwise() * \
- ConstEigenVectorArrayMap<T>(scale, C)) \
- .colwise() + \
- ConstEigenVectorArrayMap<T>(bias, C); \
+#define DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_FUNCTION(T, Func, EigenFunc) \
+ template <> \
+ C10_EXPORT void Func<T, CPUContext>( \
+ const int N, const T* A, const T* B, T* C, CPUContext* /* context */) { \
+ EigenVectorMap<T>(C, N) = ConstEigenVectorArrayMap<T>(A, N).EigenFunc( \
+ ConstEigenVectorArrayMap<T>(B, N)); \
+ }
+DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_FUNCTION(float, Min, min)
+DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_FUNCTION(double, Min, min)
+DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_FUNCTION(float, Max, max)
+DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_FUNCTION(double, Max, max)
+#undef DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_FUNCTION
+
+#define DELEGATE_SIMPLE_BINARY_FUNCTION_BY_STD_FUNCTION(T, Func, StdFunc) \
+ template <> \
+ C10_EXPORT void Func<T, CPUContext>( \
+ const int N, const T* A, const T* B, T* C, CPUContext* /* context */) { \
+ std::transform(A, A + N, B, C, StdFunc); \
+ }
+DELEGATE_SIMPLE_BINARY_FUNCTION_BY_STD_FUNCTION(
+ bool,
+ And,
+ std::logical_and<bool>())
+DELEGATE_SIMPLE_BINARY_FUNCTION_BY_STD_FUNCTION(
+ bool,
+ Or,
+ std::logical_or<bool>())
+DELEGATE_SIMPLE_BINARY_FUNCTION_BY_STD_FUNCTION(bool, Xor, std::bit_xor<bool>())
+DELEGATE_SIMPLE_BINARY_FUNCTION_BY_STD_FUNCTION(
+ bool,
+ BitwiseAnd,
+ std::bit_and<bool>())
+DELEGATE_SIMPLE_BINARY_FUNCTION_BY_STD_FUNCTION(
+ std::int32_t,
+ BitwiseAnd,
+ std::bit_and<std::int32_t>())
+DELEGATE_SIMPLE_BINARY_FUNCTION_BY_STD_FUNCTION(
+ std::int64_t,
+ BitwiseAnd,
+ std::bit_and<std::int64_t>())
+DELEGATE_SIMPLE_BINARY_FUNCTION_BY_STD_FUNCTION(
+ bool,
+ BitwiseOr,
+ std::bit_or<bool>())
+DELEGATE_SIMPLE_BINARY_FUNCTION_BY_STD_FUNCTION(
+ std::int32_t,
+ BitwiseOr,
+ std::bit_or<std::int32_t>())
+DELEGATE_SIMPLE_BINARY_FUNCTION_BY_STD_FUNCTION(
+ std::int64_t,
+ BitwiseOr,
+ std::bit_or<std::int64_t>())
+DELEGATE_SIMPLE_BINARY_FUNCTION_BY_STD_FUNCTION(
+ bool,
+ BitwiseXor,
+ std::bit_xor<bool>())
+DELEGATE_SIMPLE_BINARY_FUNCTION_BY_STD_FUNCTION(
+ std::int32_t,
+ BitwiseXor,
+ std::bit_xor<std::int32_t>())
+DELEGATE_SIMPLE_BINARY_FUNCTION_BY_STD_FUNCTION(
+ std::int64_t,
+ BitwiseXor,
+ std::bit_xor<std::int64_t>())
+#undef DELEGATE_SIMPLE_BINARY_FUNCTION_BY_STD_FUNCTION
+
+#define DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(T, Func, EigenOp) \
+ template <> \
+ C10_EXPORT void Func<T, CPUContext>( \
+ const int N, \
+ const T* A, \
+ const T* B, \
+ bool* C, \
+ CPUContext* /* context */) { \
+ EigenVectorArrayMap<bool>(C, N) = ConstEigenVectorArrayMap<T>(A, N) \
+ EigenOp ConstEigenVectorArrayMap<T>(B, N); \
}
-CAFFE2_SPECIALIZED_AFFINE_CHANNEL(float)
-#undef CAFFE2_SPECIALIZED_AFFINE_CHANNEL
+DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(bool, EQ, ==)
+DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(std::int32_t, EQ, ==)
+DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(std::int64_t, EQ, ==)
+DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(float, EQ, ==)
+DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(double, EQ, ==)
+DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(bool, NE, !=)
+DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(std::int32_t, NE, !=)
+DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(std::int64_t, NE, !=)
+DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(float, NE, !=)
+DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(double, NE, !=)
+DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(bool, LT, <)
+DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(std::int32_t, LT, <)
+DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(std::int64_t, LT, <)
+DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(float, LT, <)
+DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(double, LT, <)
+DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(bool, LE, <=)
+DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(std::int32_t, LE, <=)
+DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(std::int64_t, LE, <=)
+DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(float, LE, <=)
+DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(double, LE, <=)
+DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(bool, GT, >)
+DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(std::int32_t, GT, >)
+DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(std::int64_t, GT, >)
+DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(float, GT, >)
+DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(double, GT, >)
+DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(bool, GE, >=)
+DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(std::int32_t, GE, >=)
+DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(std::int64_t, GE, >=)
+DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(float, GE, >=)
+DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(double, GE, >=)
+#undef DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR
} // namespace math
} // namespace caffe2