summaryrefslogtreecommitdiff
path: root/compiler/nnc/backends/soft_backend/code_snippets/eigen.def
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/nnc/backends/soft_backend/code_snippets/eigen.def')
-rw-r--r--compiler/nnc/backends/soft_backend/code_snippets/eigen.def29033
1 files changed, 29033 insertions, 0 deletions
diff --git a/compiler/nnc/backends/soft_backend/code_snippets/eigen.def b/compiler/nnc/backends/soft_backend/code_snippets/eigen.def
new file mode 100644
index 000000000..b02f84bed
--- /dev/null
+++ b/compiler/nnc/backends/soft_backend/code_snippets/eigen.def
@@ -0,0 +1,29033 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2007-2011 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+// #include "src/Core/util/DisableStupidWarnings.h"
+#ifndef EIGEN_WARNINGS_DISABLED
+#define EIGEN_WARNINGS_DISABLED
+#ifdef _MSC_VER
+ #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
+ #pragma warning( push )
+ #endif
+ #pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4503 4512 4522 4700 4714 4717 4800)
+#elif defined __INTEL_COMPILER
+ #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
+ #pragma warning push
+ #endif
+ #pragma warning disable 2196 279 1684 2259
+#elif defined __clang__
+ #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
+ #pragma clang diagnostic push
+ #endif
+ #pragma clang diagnostic ignored "-Wconstant-logical-operand"
+#elif defined __GNUC__ && __GNUC__>=6
+ #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
+ #pragma GCC diagnostic push
+ #endif
+ #pragma GCC diagnostic ignored "-Wignored-attributes"
+#endif
+#if defined __NVCC__
+ #pragma diag_suppress code_is_unreachable
+ #pragma diag_suppress initialization_not_reachable
+ #pragma diag_suppress 1222
+ #pragma diag_suppress 2527
+ #pragma diag_suppress 2529
+ #pragma diag_suppress 2651
+ #pragma diag_suppress 2653
+ #pragma diag_suppress 2668
+ #pragma diag_suppress 2669
+ #pragma diag_suppress 2670
+ #pragma diag_suppress 2671
+ #pragma diag_suppress 2735
+ #pragma diag_suppress 2737
+#endif
+#endif
+// end #include "src/Core/util/DisableStupidWarnings.h"
+#if defined(__CUDACC__) || defined(__SYCL_DEVICE_ONLY__)
+ #ifndef EIGEN_NO_DEBUG
+ #define EIGEN_NO_DEBUG
+ #endif
+ #ifdef EIGEN_INTERNAL_DEBUGGING
+ #undef EIGEN_INTERNAL_DEBUGGING
+ #endif
+ #ifdef EIGEN_EXCEPTIONS
+ #undef EIGEN_EXCEPTIONS
+ #endif
+ #ifdef __CUDACC__
+ #ifndef EIGEN_DONT_VECTORIZE
+ #define EIGEN_DONT_VECTORIZE
+ #endif
+ #define EIGEN_DEVICE_FUNC __host__ __device__
+ #include <math_functions.hpp>
+ #else
+ #define EIGEN_DEVICE_FUNC
+ #endif
+#else
+ #define EIGEN_DEVICE_FUNC
+#endif
+#if defined(__CUDA_ARCH__) && defined(__NVCC__)
+ #define EIGEN_USING_STD_MATH(FUNC) using ::FUNC;
+#else
+ #define EIGEN_USING_STD_MATH(FUNC) using std::FUNC;
+#endif
+#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(__CUDA_ARCH__) && !defined(EIGEN_EXCEPTIONS) && !defined(EIGEN_USE_SYCL)
+ #define EIGEN_EXCEPTIONS
+#endif
+#ifdef EIGEN_EXCEPTIONS
+ #include <new>
+#endif
+// #include "src/Core/util/Macros.h"
+#ifndef EIGEN_MACROS_H
+#define EIGEN_MACROS_H
+#define EIGEN_WORLD_VERSION 3
+#define EIGEN_MAJOR_VERSION 3
+#define EIGEN_MINOR_VERSION 4
+#define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
+ (EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \
+ EIGEN_MINOR_VERSION>=z))))
+#ifdef __GNUC__
+ #define EIGEN_COMP_GNUC 1
+#else
+ #define EIGEN_COMP_GNUC 0
+#endif
+#if defined(__clang__)
+ #define EIGEN_COMP_CLANG (__clang_major__*100+__clang_minor__)
+#else
+ #define EIGEN_COMP_CLANG 0
+#endif
+#if defined(__llvm__)
+ #define EIGEN_COMP_LLVM 1
+#else
+ #define EIGEN_COMP_LLVM 0
+#endif
+#if defined(__INTEL_COMPILER)
+ #define EIGEN_COMP_ICC __INTEL_COMPILER
+#else
+ #define EIGEN_COMP_ICC 0
+#endif
+#if defined(__MINGW32__)
+ #define EIGEN_COMP_MINGW 1
+#else
+ #define EIGEN_COMP_MINGW 0
+#endif
+#if defined(__SUNPRO_CC)
+ #define EIGEN_COMP_SUNCC 1
+#else
+ #define EIGEN_COMP_SUNCC 0
+#endif
+#if defined(_MSC_VER)
+ #define EIGEN_COMP_MSVC _MSC_VER
+#else
+ #define EIGEN_COMP_MSVC 0
+#endif
+#if EIGEN_COMP_MSVC && !(EIGEN_COMP_ICC || EIGEN_COMP_LLVM || EIGEN_COMP_CLANG)
+ #define EIGEN_COMP_MSVC_STRICT _MSC_VER
+#else
+ #define EIGEN_COMP_MSVC_STRICT 0
+#endif
+#if defined(__IBMCPP__) || defined(__xlc__)
+ #define EIGEN_COMP_IBM 1
+#else
+ #define EIGEN_COMP_IBM 0
+#endif
+#if defined(__PGI)
+ #define EIGEN_COMP_PGI 1
+#else
+ #define EIGEN_COMP_PGI 0
+#endif
+#if defined(__CC_ARM) || defined(__ARMCC_VERSION)
+ #define EIGEN_COMP_ARM 1
+#else
+ #define EIGEN_COMP_ARM 0
+#endif
+#if defined(__EMSCRIPTEN__)
+ #define EIGEN_COMP_EMSCRIPTEN 1
+#else
+ #define EIGEN_COMP_EMSCRIPTEN 0
+#endif
+#if EIGEN_COMP_GNUC && !(EIGEN_COMP_CLANG || EIGEN_COMP_ICC || EIGEN_COMP_MINGW || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM || EIGEN_COMP_EMSCRIPTEN)
+ #define EIGEN_COMP_GNUC_STRICT 1
+#else
+ #define EIGEN_COMP_GNUC_STRICT 0
+#endif
+#if EIGEN_COMP_GNUC
+ #define EIGEN_GNUC_AT_LEAST(x,y) ((__GNUC__==x && __GNUC_MINOR__>=y) || __GNUC__>x)
+ #define EIGEN_GNUC_AT_MOST(x,y) ((__GNUC__==x && __GNUC_MINOR__<=y) || __GNUC__<x)
+ #define EIGEN_GNUC_AT(x,y) ( __GNUC__==x && __GNUC_MINOR__==y )
+#else
+ #define EIGEN_GNUC_AT_LEAST(x,y) 0
+ #define EIGEN_GNUC_AT_MOST(x,y) 0
+ #define EIGEN_GNUC_AT(x,y) 0
+#endif
+#if EIGEN_COMP_GNUC && (__GNUC__ <= 3)
+#define EIGEN_GCC3_OR_OLDER 1
+#else
+#define EIGEN_GCC3_OR_OLDER 0
+#endif
+#if defined(__x86_64__) || defined(_M_X64) || defined(__amd64)
+ #define EIGEN_ARCH_x86_64 1
+#else
+ #define EIGEN_ARCH_x86_64 0
+#endif
+#if defined(__i386__) || defined(_M_IX86) || defined(_X86_) || defined(__i386)
+ #define EIGEN_ARCH_i386 1
+#else
+ #define EIGEN_ARCH_i386 0
+#endif
+#if EIGEN_ARCH_x86_64 || EIGEN_ARCH_i386
+ #define EIGEN_ARCH_i386_OR_x86_64 1
+#else
+ #define EIGEN_ARCH_i386_OR_x86_64 0
+#endif
+#if defined(__arm__)
+ #define EIGEN_ARCH_ARM 1
+#else
+ #define EIGEN_ARCH_ARM 0
+#endif
+#if defined(__aarch64__)
+ #define EIGEN_ARCH_ARM64 1
+#else
+ #define EIGEN_ARCH_ARM64 0
+#endif
+#if EIGEN_ARCH_ARM || EIGEN_ARCH_ARM64
+ #define EIGEN_ARCH_ARM_OR_ARM64 1
+#else
+ #define EIGEN_ARCH_ARM_OR_ARM64 0
+#endif
+#if defined(__mips__) || defined(__mips)
+ #define EIGEN_ARCH_MIPS 1
+#else
+ #define EIGEN_ARCH_MIPS 0
+#endif
+#if defined(__sparc__) || defined(__sparc)
+ #define EIGEN_ARCH_SPARC 1
+#else
+ #define EIGEN_ARCH_SPARC 0
+#endif
+#if defined(__ia64__)
+ #define EIGEN_ARCH_IA64 1
+#else
+ #define EIGEN_ARCH_IA64 0
+#endif
+#if defined(__powerpc__) || defined(__ppc__) || defined(_M_PPC)
+ #define EIGEN_ARCH_PPC 1
+#else
+ #define EIGEN_ARCH_PPC 0
+#endif
+#if defined(__unix__) || defined(__unix)
+ #define EIGEN_OS_UNIX 1
+#else
+ #define EIGEN_OS_UNIX 0
+#endif
+#if defined(__linux__)
+ #define EIGEN_OS_LINUX 1
+#else
+ #define EIGEN_OS_LINUX 0
+#endif
+#if defined(__ANDROID__) || defined(ANDROID)
+ #define EIGEN_OS_ANDROID 1
+#else
+ #define EIGEN_OS_ANDROID 0
+#endif
+#if defined(__gnu_linux__) && !(EIGEN_OS_ANDROID)
+ #define EIGEN_OS_GNULINUX 1
+#else
+ #define EIGEN_OS_GNULINUX 0
+#endif
+#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__)
+ #define EIGEN_OS_BSD 1
+#else
+ #define EIGEN_OS_BSD 0
+#endif
+#if defined(__APPLE__)
+ #define EIGEN_OS_MAC 1
+#else
+ #define EIGEN_OS_MAC 0
+#endif
+#if defined(__QNX__)
+ #define EIGEN_OS_QNX 1
+#else
+ #define EIGEN_OS_QNX 0
+#endif
+#if defined(_WIN32)
+ #define EIGEN_OS_WIN 1
+#else
+ #define EIGEN_OS_WIN 0
+#endif
+#if defined(_WIN64)
+ #define EIGEN_OS_WIN64 1
+#else
+ #define EIGEN_OS_WIN64 0
+#endif
+#if defined(_WIN32_WCE)
+ #define EIGEN_OS_WINCE 1
+#else
+ #define EIGEN_OS_WINCE 0
+#endif
+#if defined(__CYGWIN__)
+ #define EIGEN_OS_CYGWIN 1
+#else
+ #define EIGEN_OS_CYGWIN 0
+#endif
+#if EIGEN_OS_WIN && !( EIGEN_OS_WINCE || EIGEN_OS_CYGWIN )
+ #define EIGEN_OS_WIN_STRICT 1
+#else
+ #define EIGEN_OS_WIN_STRICT 0
+#endif
+#if (defined(sun) || defined(__sun)) && !(defined(__SVR4) || defined(__svr4__))
+ #define EIGEN_OS_SUN 1
+#else
+ #define EIGEN_OS_SUN 0
+#endif
+#if (defined(sun) || defined(__sun)) && (defined(__SVR4) || defined(__svr4__))
+ #define EIGEN_OS_SOLARIS 1
+#else
+ #define EIGEN_OS_SOLARIS 0
+#endif
+#if EIGEN_GNUC_AT_MOST(4,3) && !EIGEN_COMP_CLANG
+ #define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 0
+#else
+ #define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 1
+#endif
+#define EIGEN_NOT_A_MACRO
+#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
+#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION Eigen::RowMajor
+#else
+#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION Eigen::ColMajor
+#endif
+#ifndef EIGEN_DEFAULT_DENSE_INDEX_TYPE
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE std::ptrdiff_t
+#endif
+#ifdef __has_builtin
+# define EIGEN_HAS_BUILTIN(x) __has_builtin(x)
+#else
+# define EIGEN_HAS_BUILTIN(x) 0
+#endif
+#ifndef __has_feature
+# define __has_feature(x) 0
+#endif
+#ifndef EIGEN_MAX_CPP_VER
+#define EIGEN_MAX_CPP_VER 99
+#endif
+#if EIGEN_MAX_CPP_VER>=11 && (defined(__cplusplus) && (__cplusplus >= 201103L) || EIGEN_COMP_MSVC >= 1900)
+#define EIGEN_HAS_CXX11 1
+#else
+#define EIGEN_HAS_CXX11 0
+#endif
+#ifndef EIGEN_HAS_RVALUE_REFERENCES
+#if EIGEN_MAX_CPP_VER>=11 && \
+ (__has_feature(cxx_rvalue_references) || \
+ (defined(__cplusplus) && __cplusplus >= 201103L) || \
+ (EIGEN_COMP_MSVC >= 1600))
+ #define EIGEN_HAS_RVALUE_REFERENCES 1
+#else
+ #define EIGEN_HAS_RVALUE_REFERENCES 0
+#endif
+#endif
+#ifndef EIGEN_HAS_C99_MATH
+#if EIGEN_MAX_CPP_VER>=11 && \
+ ((defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901)) \
+ || (defined(__GNUC__) && defined(_GLIBCXX_USE_C99)) \
+ || (defined(_LIBCPP_VERSION) && !defined(_MSC_VER)))
+ #define EIGEN_HAS_C99_MATH 1
+#else
+ #define EIGEN_HAS_C99_MATH 0
+#endif
+#endif
+#ifndef EIGEN_HAS_STD_RESULT_OF
+#if EIGEN_MAX_CPP_VER>=11 && ((__has_feature(cxx_lambdas) || (defined(__cplusplus) && __cplusplus >= 201103L)))
+#define EIGEN_HAS_STD_RESULT_OF 1
+#else
+#define EIGEN_HAS_STD_RESULT_OF 0
+#endif
+#endif
+#ifndef EIGEN_HAS_VARIADIC_TEMPLATES
+#if EIGEN_MAX_CPP_VER>=11 && (__cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900) \
+ && ( !defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000) )
+#define EIGEN_HAS_VARIADIC_TEMPLATES 1
+#else
+#define EIGEN_HAS_VARIADIC_TEMPLATES 0
+#endif
+#endif
+#ifndef EIGEN_HAS_CONSTEXPR
+#ifdef __CUDACC__
+#if EIGEN_MAX_CPP_VER>=14 && (__cplusplus > 199711L && defined(__CUDACC_VER__) && (EIGEN_COMP_CLANG || __CUDACC_VER__ >= 70500))
+ #define EIGEN_HAS_CONSTEXPR 1
+#endif
+#elif EIGEN_MAX_CPP_VER>=14 && (__has_feature(cxx_relaxed_constexpr) || (defined(__cplusplus) && __cplusplus >= 201402L) || \
+ (EIGEN_GNUC_AT_LEAST(4,8) && (__cplusplus > 199711L)))
+#define EIGEN_HAS_CONSTEXPR 1
+#endif
+#ifndef EIGEN_HAS_CONSTEXPR
+#define EIGEN_HAS_CONSTEXPR 0
+#endif
+#endif
+#ifndef EIGEN_HAS_CXX11_MATH
+ #if EIGEN_MAX_CPP_VER>=11 && ((__cplusplus > 201103L) || (__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC || EIGEN_COMP_ICC) \
+ && (EIGEN_ARCH_i386_OR_x86_64) && (EIGEN_OS_GNULINUX || EIGEN_OS_WIN_STRICT || EIGEN_OS_MAC))
+ #define EIGEN_HAS_CXX11_MATH 1
+ #else
+ #define EIGEN_HAS_CXX11_MATH 0
+ #endif
+#endif
+#ifndef EIGEN_HAS_CXX11_CONTAINERS
+ #if EIGEN_MAX_CPP_VER>=11 && \
+ ((__cplusplus > 201103L) \
+ || ((__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_ICC>=1400)) \
+ || EIGEN_COMP_MSVC >= 1900)
+ #define EIGEN_HAS_CXX11_CONTAINERS 1
+ #else
+ #define EIGEN_HAS_CXX11_CONTAINERS 0
+ #endif
+#endif
+#ifndef EIGEN_HAS_CXX11_NOEXCEPT
+ #if EIGEN_MAX_CPP_VER>=11 && \
+ (__has_feature(cxx_noexcept) \
+ || (__cplusplus > 201103L) \
+ || ((__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_ICC>=1400)) \
+ || EIGEN_COMP_MSVC >= 1900)
+ #define EIGEN_HAS_CXX11_NOEXCEPT 1
+ #else
+ #define EIGEN_HAS_CXX11_NOEXCEPT 0
+ #endif
+#endif
+#ifndef EIGEN_FAST_MATH
+#define EIGEN_FAST_MATH 1
+#endif
+#define EIGEN_DEBUG_VAR(x) std::cerr << #x << " = " << x << std::endl;
+#define EIGEN_CAT2(a,b) a ## b
+#define EIGEN_CAT(a,b) EIGEN_CAT2(a,b)
+#define EIGEN_COMMA ,
+#define EIGEN_MAKESTRING2(a) #a
+#define EIGEN_MAKESTRING(a) EIGEN_MAKESTRING2(a)
+#if EIGEN_COMP_MSVC || EIGEN_COMP_ICC
+#define EIGEN_STRONG_INLINE __forceinline
+#else
+#define EIGEN_STRONG_INLINE inline
+#endif
+#if EIGEN_GNUC_AT_LEAST(4,2)
+#define EIGEN_ALWAYS_INLINE __attribute__((always_inline)) inline
+#else
+#define EIGEN_ALWAYS_INLINE EIGEN_STRONG_INLINE
+#endif
+#if EIGEN_COMP_GNUC
+#define EIGEN_DONT_INLINE __attribute__((noinline))
+#elif EIGEN_COMP_MSVC
+#define EIGEN_DONT_INLINE __declspec(noinline)
+#else
+#define EIGEN_DONT_INLINE
+#endif
+#if EIGEN_COMP_GNUC
+#define EIGEN_PERMISSIVE_EXPR __extension__
+#else
+#define EIGEN_PERMISSIVE_EXPR
+#endif
+#define EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+#define EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS inline
+#ifdef NDEBUG
+# ifndef EIGEN_NO_DEBUG
+# define EIGEN_NO_DEBUG
+# endif
+#endif
+#ifdef EIGEN_NO_DEBUG
+ #define eigen_plain_assert(x)
+#else
+ #if EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO
+ namespace Eigen {
+ namespace internal {
+ inline bool copy_bool(bool b) { return b; }
+ }
+ }
+ #define eigen_plain_assert(x) assert(x)
+ #else
+ #include <cstdlib>
+ #include <iostream>
+ namespace Eigen {
+ namespace internal {
+ namespace {
+ EIGEN_DONT_INLINE bool copy_bool(bool b) { return b; }
+ }
+ inline void assert_fail(const char *condition, const char *function, const char *file, int line)
+ {
+ std::cerr << "assertion failed: " << condition << " in function " << function << " at " << file << ":" << line << std::endl;
+ abort();
+ }
+ }
+ }
+ #define eigen_plain_assert(x) \
+ do { \
+ if(!Eigen::internal::copy_bool(x)) \
+ Eigen::internal::assert_fail(EIGEN_MAKESTRING(x), __PRETTY_FUNCTION__, __FILE__, __LINE__); \
+ } while(false)
+ #endif
+#endif
+#ifndef eigen_assert
+#define eigen_assert(x) eigen_plain_assert(x)
+#endif
+#ifdef EIGEN_INTERNAL_DEBUGGING
+#define eigen_internal_assert(x) eigen_assert(x)
+#else
+#define eigen_internal_assert(x)
+#endif
+#ifdef EIGEN_NO_DEBUG
+#define EIGEN_ONLY_USED_FOR_DEBUG(x) EIGEN_UNUSED_VARIABLE(x)
+#else
+#define EIGEN_ONLY_USED_FOR_DEBUG(x)
+#endif
+#ifndef EIGEN_NO_DEPRECATED_WARNING
+ #if EIGEN_COMP_GNUC
+ #define EIGEN_DEPRECATED __attribute__((deprecated))
+ #elif EIGEN_COMP_MSVC
+ #define EIGEN_DEPRECATED __declspec(deprecated)
+ #else
+ #define EIGEN_DEPRECATED
+ #endif
+#else
+ #define EIGEN_DEPRECATED
+#endif
+#if EIGEN_COMP_GNUC
+#define EIGEN_UNUSED __attribute__((unused))
+#else
+#define EIGEN_UNUSED
+#endif
+namespace Eigen {
+ namespace internal {
+ template<typename T> EIGEN_DEVICE_FUNC void ignore_unused_variable(const T&) {}
+ }
+}
+#define EIGEN_UNUSED_VARIABLE(var) Eigen::internal::ignore_unused_variable(var);
+#if !defined(EIGEN_ASM_COMMENT)
+ #if EIGEN_COMP_GNUC && (EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64)
+ #define EIGEN_ASM_COMMENT(X) __asm__("#" X)
+ #else
+ #define EIGEN_ASM_COMMENT(X)
+ #endif
+#endif
+#if (defined __CUDACC__)
+ #define EIGEN_ALIGN_TO_BOUNDARY(n) __align__(n)
+#elif EIGEN_COMP_GNUC || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM
+ #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
+#elif EIGEN_COMP_MSVC
+ #define EIGEN_ALIGN_TO_BOUNDARY(n) __declspec(align(n))
+#elif EIGEN_COMP_SUNCC
+ #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
+#else
+ #error Please tell me what is the equivalent of __attribute__((aligned(n))) for your compiler
+#endif
+#if defined(EIGEN_DONT_VECTORIZE)
+ #define EIGEN_IDEAL_MAX_ALIGN_BYTES 0
+#elif defined(EIGEN_VECTORIZE_AVX512)
+ #define EIGEN_IDEAL_MAX_ALIGN_BYTES 64
+#elif defined(__AVX__)
+ #define EIGEN_IDEAL_MAX_ALIGN_BYTES 32
+#else
+ #define EIGEN_IDEAL_MAX_ALIGN_BYTES 16
+#endif
+#define EIGEN_MIN_ALIGN_BYTES 16
+#if (defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)) && defined(EIGEN_MAX_STATIC_ALIGN_BYTES) && EIGEN_MAX_STATIC_ALIGN_BYTES>0
+#error EIGEN_MAX_STATIC_ALIGN_BYTES and EIGEN_DONT_ALIGN[_STATICALLY] are both defined with EIGEN_MAX_STATIC_ALIGN_BYTES!=0. Use EIGEN_MAX_STATIC_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN_STATICALLY.
+#endif
+#if defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)
+ #ifdef EIGEN_MAX_STATIC_ALIGN_BYTES
+ #undef EIGEN_MAX_STATIC_ALIGN_BYTES
+ #endif
+ #define EIGEN_MAX_STATIC_ALIGN_BYTES 0
+#endif
+#ifndef EIGEN_MAX_STATIC_ALIGN_BYTES
+ #if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64)
+ #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
+ #elif EIGEN_ARCH_ARM_OR_ARM64 && EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_MOST(4, 6)
+ #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
+ #else
+ #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0
+ #endif
+ #if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT \
+ && !EIGEN_GCC3_OR_OLDER \
+ && !EIGEN_COMP_SUNCC \
+ && !EIGEN_OS_QNX
+ #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 1
+ #else
+ #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0
+ #endif
+ #if EIGEN_ARCH_WANTS_STACK_ALIGNMENT
+ #define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
+ #else
+ #define EIGEN_MAX_STATIC_ALIGN_BYTES 0
+ #endif
+#endif
+#if defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES<EIGEN_MAX_STATIC_ALIGN_BYTES
+#undef EIGEN_MAX_STATIC_ALIGN_BYTES
+#define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
+#endif
+#if EIGEN_MAX_STATIC_ALIGN_BYTES==0 && !defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT)
+ #define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
+#endif
+#define EIGEN_ALIGN8 EIGEN_ALIGN_TO_BOUNDARY(8)
+#define EIGEN_ALIGN16 EIGEN_ALIGN_TO_BOUNDARY(16)
+#define EIGEN_ALIGN32 EIGEN_ALIGN_TO_BOUNDARY(32)
+#define EIGEN_ALIGN64 EIGEN_ALIGN_TO_BOUNDARY(64)
+#if EIGEN_MAX_STATIC_ALIGN_BYTES>0
+#define EIGEN_ALIGN_MAX EIGEN_ALIGN_TO_BOUNDARY(EIGEN_MAX_STATIC_ALIGN_BYTES)
+#else
+#define EIGEN_ALIGN_MAX
+#endif
+#if defined(EIGEN_DONT_ALIGN) && defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES>0
+#error EIGEN_MAX_ALIGN_BYTES and EIGEN_DONT_ALIGN are both defined with EIGEN_MAX_ALIGN_BYTES!=0. Use EIGEN_MAX_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN.
+#endif
+#ifdef EIGEN_DONT_ALIGN
+ #ifdef EIGEN_MAX_ALIGN_BYTES
+ #undef EIGEN_MAX_ALIGN_BYTES
+ #endif
+ #define EIGEN_MAX_ALIGN_BYTES 0
+#elif !defined(EIGEN_MAX_ALIGN_BYTES)
+ #define EIGEN_MAX_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
+#endif
+#if EIGEN_IDEAL_MAX_ALIGN_BYTES > EIGEN_MAX_ALIGN_BYTES
+#define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
+#else
+#define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
+#endif
+#ifndef EIGEN_UNALIGNED_VECTORIZE
+#define EIGEN_UNALIGNED_VECTORIZE 1
+#endif
+#ifdef EIGEN_DONT_USE_RESTRICT_KEYWORD
+ #define EIGEN_RESTRICT
+#endif
+#ifndef EIGEN_RESTRICT
+ #define EIGEN_RESTRICT __restrict
+#endif
+#ifndef EIGEN_STACK_ALLOCATION_LIMIT
+#define EIGEN_STACK_ALLOCATION_LIMIT 131072
+#endif
+#ifndef EIGEN_DEFAULT_IO_FORMAT
+#ifdef EIGEN_MAKING_DOCS
+#define EIGEN_DEFAULT_IO_FORMAT Eigen::IOFormat(3, 0, " ", "\n", "", "")
+#else
+#define EIGEN_DEFAULT_IO_FORMAT Eigen::IOFormat()
+#endif
+#endif
+#define EIGEN_EMPTY
+#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || defined(__CUDACC_VER__))
+ #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
+ using Base::operator =;
+#elif EIGEN_COMP_CLANG
+ #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
+ using Base::operator =; \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const Derived& other) { Base::operator=(other); return *this; } \
+ template <typename OtherDerived> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const DenseBase<OtherDerived>& other) { Base::operator=(other.derived()); return *this; }
+#else
+ #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
+ using Base::operator =; \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const Derived& other) \
+ { \
+ Base::operator=(other); \
+ return *this; \
+ }
+#endif
+#define EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Derived) EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived)
+#define EIGEN_GENERIC_PUBLIC_INTERFACE(Derived) \
+ typedef typename Eigen::internal::traits<Derived>::Scalar Scalar; \
+ typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; \
+ typedef typename Base::CoeffReturnType CoeffReturnType; \
+ typedef typename Eigen::internal::ref_selector<Derived>::type Nested; \
+ typedef typename Eigen::internal::traits<Derived>::StorageKind StorageKind; \
+ typedef typename Eigen::internal::traits<Derived>::StorageIndex StorageIndex; \
+ enum { RowsAtCompileTime = Eigen::internal::traits<Derived>::RowsAtCompileTime, \
+ ColsAtCompileTime = Eigen::internal::traits<Derived>::ColsAtCompileTime, \
+ Flags = Eigen::internal::traits<Derived>::Flags, \
+ SizeAtCompileTime = Base::SizeAtCompileTime, \
+ MaxSizeAtCompileTime = Base::MaxSizeAtCompileTime, \
+ IsVectorAtCompileTime = Base::IsVectorAtCompileTime }; \
+ using Base::derived; \
+ using Base::const_cast_derived;
+#define EIGEN_DENSE_PUBLIC_INTERFACE(Derived) \
+ EIGEN_GENERIC_PUBLIC_INTERFACE(Derived) \
+ typedef typename Base::PacketScalar PacketScalar;
+#define EIGEN_PLAIN_ENUM_MIN(a,b) (((int)a <= (int)b) ? (int)a : (int)b)
+#define EIGEN_PLAIN_ENUM_MAX(a,b) (((int)a >= (int)b) ? (int)a : (int)b)
+#define EIGEN_SIZE_MIN_PREFER_DYNAMIC(a,b) (((int)a == 0 || (int)b == 0) ? 0 \
+ : ((int)a == 1 || (int)b == 1) ? 1 \
+ : ((int)a == Dynamic || (int)b == Dynamic) ? Dynamic \
+ : ((int)a <= (int)b) ? (int)a : (int)b)
+#define EIGEN_SIZE_MIN_PREFER_FIXED(a,b) (((int)a == 0 || (int)b == 0) ? 0 \
+ : ((int)a == 1 || (int)b == 1) ? 1 \
+ : ((int)a == Dynamic && (int)b == Dynamic) ? Dynamic \
+ : ((int)a == Dynamic) ? (int)b \
+ : ((int)b == Dynamic) ? (int)a \
+ : ((int)a <= (int)b) ? (int)a : (int)b)
+#define EIGEN_SIZE_MAX(a,b) (((int)a == Dynamic || (int)b == Dynamic) ? Dynamic \
+ : ((int)a >= (int)b) ? (int)a : (int)b)
+#define EIGEN_LOGICAL_XOR(a,b) (((a) || (b)) && !((a) && (b)))
+#define EIGEN_IMPLIES(a,b) (!(a) || (b))
+#define EIGEN_CWISE_BINARY_RETURN_TYPE(LHS,RHS,OPNAME) \
+ CwiseBinaryOp< \
+ EIGEN_CAT(EIGEN_CAT(internal::scalar_,OPNAME),_op)< \
+ typename internal::traits<LHS>::Scalar, \
+ typename internal::traits<RHS>::Scalar \
+ >, \
+ const LHS, \
+ const RHS \
+ >
+#define EIGEN_MAKE_CWISE_BINARY_OP(METHOD,OPNAME) \
+ template<typename OtherDerived> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,OPNAME) \
+ (METHOD)(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const \
+ { \
+ return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,OPNAME)(derived(), other.derived()); \
+ }
+#define EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,TYPEA,TYPEB) \
+ (Eigen::internal::has_ReturnType<Eigen::ScalarBinaryOpTraits<TYPEA,TYPEB,EIGEN_CAT(EIGEN_CAT(Eigen::internal::scalar_,OPNAME),_op)<TYPEA,TYPEB> > >::value)
+#define EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(EXPR,SCALAR,OPNAME) \
+ CwiseBinaryOp<EIGEN_CAT(EIGEN_CAT(internal::scalar_,OPNAME),_op)<typename internal::traits<EXPR>::Scalar,SCALAR>, const EXPR, \
+ const typename internal::plain_constant_type<EXPR,SCALAR>::type>
+#define EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(SCALAR,EXPR,OPNAME) \
+ CwiseBinaryOp<EIGEN_CAT(EIGEN_CAT(internal::scalar_,OPNAME),_op)<SCALAR,typename internal::traits<EXPR>::Scalar>, \
+ const typename internal::plain_constant_type<EXPR,SCALAR>::type, const EXPR>
+#if EIGEN_COMP_MSVC_STRICT<=1600
+#define EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(X) typename internal::enable_if<true,X>::type
+#else
+#define EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(X) X
+#endif
+#define EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(METHOD,OPNAME) \
+ template <typename T> EIGEN_DEVICE_FUNC inline \
+ EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,typename internal::promote_scalar_arg<Scalar EIGEN_COMMA T EIGEN_COMMA EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,Scalar,T)>::type,OPNAME))\
+ (METHOD)(const T& scalar) const { \
+ typedef typename internal::promote_scalar_arg<Scalar,T,EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,Scalar,T)>::type PromotedT; \
+ return EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,PromotedT,OPNAME)(derived(), \
+ typename internal::plain_constant_type<Derived,PromotedT>::type(derived().rows(), derived().cols(), internal::scalar_constant_op<PromotedT>(scalar))); \
+ }
+#define EIGEN_MAKE_SCALAR_BINARY_OP_ONTHELEFT(METHOD,OPNAME) \
+ template <typename T> EIGEN_DEVICE_FUNC inline friend \
+ EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename internal::promote_scalar_arg<Scalar EIGEN_COMMA T EIGEN_COMMA EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,T,Scalar)>::type,Derived,OPNAME)) \
+ (METHOD)(const T& scalar, const StorageBaseType& matrix) { \
+ typedef typename internal::promote_scalar_arg<Scalar,T,EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,T,Scalar)>::type PromotedT; \
+ return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(PromotedT,Derived,OPNAME)( \
+ typename internal::plain_constant_type<Derived,PromotedT>::type(matrix.derived().rows(), matrix.derived().cols(), internal::scalar_constant_op<PromotedT>(scalar)), matrix.derived()); \
+ }
+#define EIGEN_MAKE_SCALAR_BINARY_OP(METHOD,OPNAME) \
+ EIGEN_MAKE_SCALAR_BINARY_OP_ONTHELEFT(METHOD,OPNAME) \
+ EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(METHOD,OPNAME)
+#ifdef EIGEN_EXCEPTIONS
+# define EIGEN_THROW_X(X) throw X
+# define EIGEN_THROW throw
+# define EIGEN_TRY try
+# define EIGEN_CATCH(X) catch (X)
+#else
+# ifdef __CUDA_ARCH__
+# define EIGEN_THROW_X(X) asm("trap;")
+# define EIGEN_THROW asm("trap;")
+# else
+# define EIGEN_THROW_X(X) std::abort()
+# define EIGEN_THROW std::abort()
+# endif
+# define EIGEN_TRY if (true)
+# define EIGEN_CATCH(X) else
+#endif
+#if EIGEN_HAS_CXX11_NOEXCEPT
+# define EIGEN_INCLUDE_TYPE_TRAITS
+# define EIGEN_NOEXCEPT noexcept
+# define EIGEN_NOEXCEPT_IF(x) noexcept(x)
+# define EIGEN_NO_THROW noexcept(true)
+# define EIGEN_EXCEPTION_SPEC(X) noexcept(false)
+#else
+# define EIGEN_NOEXCEPT
+# define EIGEN_NOEXCEPT_IF(x)
+# define EIGEN_NO_THROW throw()
+# define EIGEN_EXCEPTION_SPEC(X) throw(X)
+#endif
+#endif
+// end #include "src/Core/util/Macros.h"
+#if EIGEN_COMP_MINGW && EIGEN_GNUC_AT_LEAST(4,6)
+ #pragma GCC optimize ("-fno-ipa-cp-clone")
+#endif
+#include <complex>
+#if EIGEN_MAX_ALIGN_BYTES==0
+ #ifndef EIGEN_DONT_VECTORIZE
+ #define EIGEN_DONT_VECTORIZE
+ #endif
+#endif
+#if EIGEN_COMP_MSVC
+ #include <malloc.h>
+ #if (EIGEN_COMP_MSVC >= 1500)
+ #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || EIGEN_ARCH_x86_64
+ #define EIGEN_SSE2_ON_MSVC_2008_OR_LATER
+ #endif
+ #endif
+#else
+ #if (defined __SSE2__) && ( (!EIGEN_COMP_GNUC) || EIGEN_COMP_ICC || EIGEN_GNUC_AT_LEAST(4,2) )
+ #define EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC
+ #endif
+#endif
+#ifndef EIGEN_DONT_VECTORIZE
+ #if defined (EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER)
+ #define EIGEN_VECTORIZE
+ #define EIGEN_VECTORIZE_SSE
+ #define EIGEN_VECTORIZE_SSE2
+ #ifdef __SSE3__
+ #define EIGEN_VECTORIZE_SSE3
+ #endif
+ #ifdef __SSSE3__
+ #define EIGEN_VECTORIZE_SSSE3
+ #endif
+ #ifdef __SSE4_1__
+ #define EIGEN_VECTORIZE_SSE4_1
+ #endif
+ #ifdef __SSE4_2__
+ #define EIGEN_VECTORIZE_SSE4_2
+ #endif
+ #ifdef __AVX__
+ #define EIGEN_VECTORIZE_AVX
+ #define EIGEN_VECTORIZE_SSE3
+ #define EIGEN_VECTORIZE_SSSE3
+ #define EIGEN_VECTORIZE_SSE4_1
+ #define EIGEN_VECTORIZE_SSE4_2
+ #endif
+ #ifdef __AVX2__
+ #define EIGEN_VECTORIZE_AVX2
+ #endif
+ #ifdef __FMA__
+ #define EIGEN_VECTORIZE_FMA
+ #endif
+ #if defined(__AVX512F__) && defined(EIGEN_ENABLE_AVX512)
+ #define EIGEN_VECTORIZE_AVX512
+ #define EIGEN_VECTORIZE_AVX2
+ #define EIGEN_VECTORIZE_AVX
+ #define EIGEN_VECTORIZE_FMA
+ #ifdef __AVX512DQ__
+ #define EIGEN_VECTORIZE_AVX512DQ
+ #endif
+ #endif
+ extern "C" {
+ #if EIGEN_COMP_ICC >= 1110
+ #include <immintrin.h>
+ #else
+ #include <mmintrin.h>
+ #include <emmintrin.h>
+ #include <xmmintrin.h>
+ #ifdef EIGEN_VECTORIZE_SSE3
+ #include <pmmintrin.h>
+ #endif
+ #ifdef EIGEN_VECTORIZE_SSSE3
+ #include <tmmintrin.h>
+ #endif
+ #ifdef EIGEN_VECTORIZE_SSE4_1
+ #include <smmintrin.h>
+ #endif
+ #ifdef EIGEN_VECTORIZE_SSE4_2
+ #include <nmmintrin.h>
+ #endif
+ #if defined(EIGEN_VECTORIZE_AVX) || defined(EIGEN_VECTORIZE_AVX512)
+ #include <immintrin.h>
+ #endif
+ #endif
+ }
+ #elif defined __VSX__
+ #define EIGEN_VECTORIZE
+ #define EIGEN_VECTORIZE_VSX
+ #include <altivec.h>
+ #undef bool
+ #undef vector
+ #undef pixel
+ #elif defined __ALTIVEC__
+ #define EIGEN_VECTORIZE
+ #define EIGEN_VECTORIZE_ALTIVEC
+ #include <altivec.h>
+ #undef bool
+ #undef vector
+ #undef pixel
+ #elif (defined __ARM_NEON) || (defined __ARM_NEON__)
+ #define EIGEN_VECTORIZE
+ #define EIGEN_VECTORIZE_NEON
+ #include <arm_neon.h>
+ #elif (defined __s390x__ && defined __VEC__)
+ #define EIGEN_VECTORIZE
+ #define EIGEN_VECTORIZE_ZVECTOR
+ #include <vecintrin.h>
+ #endif
+#endif
+#if defined(__F16C__) && !defined(EIGEN_COMP_CLANG)
+ #define EIGEN_HAS_FP16_C
+#endif
+#if defined __CUDACC__
+ #define EIGEN_VECTORIZE_CUDA
+ #include <vector_types.h>
+ #if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
+ #define EIGEN_HAS_CUDA_FP16
+ #endif
+#endif
+#if defined EIGEN_HAS_CUDA_FP16
+ #include <host_defines.h>
+ #include <cuda_fp16.h>
+#endif
+#if (defined _OPENMP) && (!defined EIGEN_DONT_PARALLELIZE)
+ #define EIGEN_HAS_OPENMP
+#endif
+#ifdef EIGEN_HAS_OPENMP
+#include <omp.h>
+#endif
+#if !(EIGEN_COMP_MSVC && EIGEN_OS_WINCE) && !EIGEN_COMP_ARM
+#define EIGEN_HAS_ERRNO
+#endif
+#ifdef EIGEN_HAS_ERRNO
+#include <cerrno>
+#endif
+#include <cstddef>
+#include <cstdlib>
+#include <cmath>
+#include <cassert>
+#include <functional>
+#include <iosfwd>
+#include <cstring>
+#include <string>
+#include <limits>
+#include <climits>
+#include <algorithm>
+#ifdef EIGEN_INCLUDE_TYPE_TRAITS
+#include <type_traits>
+#endif
+#ifdef EIGEN_DEBUG_ASSIGN
+#include <iostream>
+#endif
+#if EIGEN_COMP_MSVC && EIGEN_ARCH_i386_OR_x86_64 && !EIGEN_OS_WINCE
+ #include <intrin.h>
+#endif
+namespace Eigen {
+inline static const char *SimdInstructionSetsInUse(void) {
+#if defined(EIGEN_VECTORIZE_AVX512)
+ return "AVX512, FMA, AVX2, AVX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
+#elif defined(EIGEN_VECTORIZE_AVX)
+ return "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
+#elif defined(EIGEN_VECTORIZE_SSE4_2)
+ return "SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
+#elif defined(EIGEN_VECTORIZE_SSE4_1)
+ return "SSE, SSE2, SSE3, SSSE3, SSE4.1";
+#elif defined(EIGEN_VECTORIZE_SSSE3)
+ return "SSE, SSE2, SSE3, SSSE3";
+#elif defined(EIGEN_VECTORIZE_SSE3)
+ return "SSE, SSE2, SSE3";
+#elif defined(EIGEN_VECTORIZE_SSE2)
+ return "SSE, SSE2";
+#elif defined(EIGEN_VECTORIZE_ALTIVEC)
+ return "AltiVec";
+#elif defined(EIGEN_VECTORIZE_VSX)
+ return "VSX";
+#elif defined(EIGEN_VECTORIZE_NEON)
+ return "ARM NEON";
+#elif defined(EIGEN_VECTORIZE_ZVECTOR)
+ return "S390X ZVECTOR";
+#else
+ return "None";
+#endif
+}
+}
+#if defined EIGEN2_SUPPORT_STAGE40_FULL_EIGEN3_STRICTNESS || defined EIGEN2_SUPPORT_STAGE30_FULL_EIGEN3_API || defined EIGEN2_SUPPORT_STAGE20_RESOLVE_API_CONFLICTS || defined EIGEN2_SUPPORT_STAGE10_FULL_EIGEN2_API || defined EIGEN2_SUPPORT
+#error Eigen2-support is only available up to version 3.2. Please go to "http://eigen.tuxfamily.org/index.php?title=Eigen2" for further information
+#endif
+namespace Eigen {
+using std::size_t;
+using std::ptrdiff_t;
+}
+// #include "src/Core/util/Constants.h"
+#ifndef EIGEN_CONSTANTS_H
+#define EIGEN_CONSTANTS_H
+namespace Eigen {
+const int Dynamic = -1;
+const int DynamicIndex = 0xffffff;
+const int Infinity = -1;
+const int HugeCost = 10000;
+const unsigned int RowMajorBit = 0x1;
+const unsigned int EvalBeforeNestingBit = 0x2;
+EIGEN_DEPRECATED
+const unsigned int EvalBeforeAssigningBit = 0x4;
+const unsigned int PacketAccessBit = 0x8;
+#ifdef EIGEN_VECTORIZE
+const unsigned int ActualPacketAccessBit = PacketAccessBit;
+#else
+const unsigned int ActualPacketAccessBit = 0x0;
+#endif
+const unsigned int LinearAccessBit = 0x10;
+const unsigned int LvalueBit = 0x20;
+const unsigned int DirectAccessBit = 0x40;
+EIGEN_DEPRECATED const unsigned int AlignedBit = 0x80;
+const unsigned int NestByRefBit = 0x100;
+const unsigned int NoPreferredStorageOrderBit = 0x200;
+const unsigned int CompressedAccessBit = 0x400;
+const unsigned int HereditaryBits = RowMajorBit
+ | EvalBeforeNestingBit;
+enum UpLoType {
+ Lower=0x1,
+ Upper=0x2,
+ UnitDiag=0x4,
+ ZeroDiag=0x8,
+ UnitLower=UnitDiag|Lower,
+ UnitUpper=UnitDiag|Upper,
+ StrictlyLower=ZeroDiag|Lower,
+ StrictlyUpper=ZeroDiag|Upper,
+ SelfAdjoint=0x10,
+ Symmetric=0x20
+};
+enum AlignmentType {
+ Unaligned=0,
+ Aligned8=8,
+ Aligned16=16,
+ Aligned32=32,
+ Aligned64=64,
+ Aligned128=128,
+ AlignedMask=255,
+ Aligned=16,
+#if EIGEN_MAX_ALIGN_BYTES==128
+ AlignedMax = Aligned128
+#elif EIGEN_MAX_ALIGN_BYTES==64
+ AlignedMax = Aligned64
+#elif EIGEN_MAX_ALIGN_BYTES==32
+ AlignedMax = Aligned32
+#elif EIGEN_MAX_ALIGN_BYTES==16
+ AlignedMax = Aligned16
+#elif EIGEN_MAX_ALIGN_BYTES==8
+ AlignedMax = Aligned8
+#elif EIGEN_MAX_ALIGN_BYTES==0
+ AlignedMax = Unaligned
+#else
+#error Invalid value for EIGEN_MAX_ALIGN_BYTES
+#endif
+};
+enum CornerType { TopLeft, TopRight, BottomLeft, BottomRight };
+enum DirectionType {
+ Vertical,
+ Horizontal,
+ BothDirections
+};
+enum TraversalType {
+ DefaultTraversal,
+ LinearTraversal,
+ InnerVectorizedTraversal,
+ LinearVectorizedTraversal,
+ SliceVectorizedTraversal,
+ InvalidTraversal,
+ AllAtOnceTraversal
+};
+enum UnrollingType {
+ NoUnrolling,
+ InnerUnrolling,
+ CompleteUnrolling
+};
+enum SpecializedType {
+ Specialized,
+ BuiltIn
+};
+enum StorageOptions {
+ ColMajor = 0,
+ RowMajor = 0x1,
+ AutoAlign = 0,
+ DontAlign = 0x2
+};
+enum SideType {
+ OnTheLeft = 1,
+ OnTheRight = 2
+};
+enum NoChange_t { NoChange };
+enum Sequential_t { Sequential };
+enum Default_t { Default };
+enum AmbiVectorMode {
+ IsDense = 0,
+ IsSparse
+};
+enum AccessorLevels {
+ ReadOnlyAccessors,
+ WriteAccessors,
+ DirectAccessors,
+ DirectWriteAccessors
+};
+enum DecompositionOptions {
+ Pivoting = 0x01,
+ NoPivoting = 0x02,
+ ComputeFullU = 0x04,
+ ComputeThinU = 0x08,
+ ComputeFullV = 0x10,
+ ComputeThinV = 0x20,
+ EigenvaluesOnly = 0x40,
+ ComputeEigenvectors = 0x80,
+ EigVecMask = EigenvaluesOnly | ComputeEigenvectors,
+ Ax_lBx = 0x100,
+ ABx_lx = 0x200,
+ BAx_lx = 0x400,
+ GenEigMask = Ax_lBx | ABx_lx | BAx_lx
+};
+enum QRPreconditioners {
+ NoQRPreconditioner,
+ HouseholderQRPreconditioner,
+ ColPivHouseholderQRPreconditioner,
+ FullPivHouseholderQRPreconditioner
+};
+#ifdef Success
+#error The preprocessor symbol 'Success' is defined, possibly by the X11 header file X.h
+#endif
+enum ComputationInfo {
+ Success = 0,
+ NumericalIssue = 1,
+ NoConvergence = 2,
+ InvalidInput = 3
+};
+enum TransformTraits {
+ Isometry = 0x1,
+ Affine = 0x2,
+ AffineCompact = 0x10 | Affine,
+ Projective = 0x20
+};
+namespace Architecture
+{
+ enum Type {
+ Generic = 0x0,
+ SSE = 0x1,
+ AltiVec = 0x2,
+ VSX = 0x3,
+ NEON = 0x4,
+#if defined EIGEN_VECTORIZE_SSE
+ Target = SSE
+#elif defined EIGEN_VECTORIZE_ALTIVEC
+ Target = AltiVec
+#elif defined EIGEN_VECTORIZE_VSX
+ Target = VSX
+#elif defined EIGEN_VECTORIZE_NEON
+ Target = NEON
+#else
+ Target = Generic
+#endif
+ };
+}
+enum ProductImplType
+{ DefaultProduct=0, LazyProduct, AliasFreeProduct, CoeffBasedProductMode, LazyCoeffBasedProductMode, OuterProduct, InnerProduct, GemvProduct, GemmProduct };
+enum Action {GetAction, SetAction};
+struct Dense {};
+struct Sparse {};
+struct SolverStorage {};
+struct PermutationStorage {};
+struct TranspositionsStorage {};
+struct MatrixXpr {};
+struct ArrayXpr {};
+struct DenseShape { static std::string debugName() { return "DenseShape"; } };
+struct SolverShape { static std::string debugName() { return "SolverShape"; } };
+struct HomogeneousShape { static std::string debugName() { return "HomogeneousShape"; } };
+struct DiagonalShape { static std::string debugName() { return "DiagonalShape"; } };
+struct BandShape { static std::string debugName() { return "BandShape"; } };
+struct TriangularShape { static std::string debugName() { return "TriangularShape"; } };
+struct SelfAdjointShape { static std::string debugName() { return "SelfAdjointShape"; } };
+struct PermutationShape { static std::string debugName() { return "PermutationShape"; } };
+struct TranspositionsShape { static std::string debugName() { return "TranspositionsShape"; } };
+struct SparseShape { static std::string debugName() { return "SparseShape"; } };
+namespace internal {
+struct IndexBased {};
+struct IteratorBased {};
+enum ComparisonName {
+ cmp_EQ = 0,
+ cmp_LT = 1,
+ cmp_LE = 2,
+ cmp_UNORD = 3,
+ cmp_NEQ = 4,
+ cmp_GT = 5,
+ cmp_GE = 6
+};
+}
+}
+#endif
+// end #include "src/Core/util/Constants.h"
+// #include "src/Core/util/Meta.h"
+#ifndef EIGEN_META_H
+#define EIGEN_META_H
+#if defined(__CUDA_ARCH__)
+#include <cfloat>
+#include <math_constants.h>
+#endif
+#if EIGEN_COMP_ICC>=1600 && __cplusplus >= 201103L
+#include <cstdint>
+#endif
+namespace Eigen {
+typedef EIGEN_DEFAULT_DENSE_INDEX_TYPE DenseIndex;
+typedef EIGEN_DEFAULT_DENSE_INDEX_TYPE Index;
+namespace internal {
+#if EIGEN_COMP_ICC>=1600 && __cplusplus >= 201103L
+typedef std::intptr_t IntPtr;
+typedef std::uintptr_t UIntPtr;
+#else
+typedef std::ptrdiff_t IntPtr;
+typedef std::size_t UIntPtr;
+#endif
+struct true_type { enum { value = 1 }; };
+struct false_type { enum { value = 0 }; };
+template<bool Condition, typename Then, typename Else>
+struct conditional { typedef Then type; };
+template<typename Then, typename Else>
+struct conditional <false, Then, Else> { typedef Else type; };
+template<typename T, typename U> struct is_same { enum { value = 0 }; };
+template<typename T> struct is_same<T,T> { enum { value = 1 }; };
+template<typename T> struct remove_reference { typedef T type; };
+template<typename T> struct remove_reference<T&> { typedef T type; };
+template<typename T> struct remove_pointer { typedef T type; };
+template<typename T> struct remove_pointer<T*> { typedef T type; };
+template<typename T> struct remove_pointer<T*const> { typedef T type; };
+template <class T> struct remove_const { typedef T type; };
+template <class T> struct remove_const<const T> { typedef T type; };
+template <class T> struct remove_const<const T[]> { typedef T type[]; };
+template <class T, unsigned int Size> struct remove_const<const T[Size]> { typedef T type[Size]; };
+template<typename T> struct remove_all { typedef T type; };
+template<typename T> struct remove_all<const T> { typedef typename remove_all<T>::type type; };
+template<typename T> struct remove_all<T const&> { typedef typename remove_all<T>::type type; };
+template<typename T> struct remove_all<T&> { typedef typename remove_all<T>::type type; };
+template<typename T> struct remove_all<T const*> { typedef typename remove_all<T>::type type; };
+template<typename T> struct remove_all<T*> { typedef typename remove_all<T>::type type; };
+template<typename T> struct is_arithmetic { enum { value = false }; };
+template<> struct is_arithmetic<float> { enum { value = true }; };
+template<> struct is_arithmetic<double> { enum { value = true }; };
+template<> struct is_arithmetic<long double> { enum { value = true }; };
+template<> struct is_arithmetic<bool> { enum { value = true }; };
+template<> struct is_arithmetic<char> { enum { value = true }; };
+template<> struct is_arithmetic<signed char> { enum { value = true }; };
+template<> struct is_arithmetic<unsigned char> { enum { value = true }; };
+template<> struct is_arithmetic<signed short> { enum { value = true }; };
+template<> struct is_arithmetic<unsigned short>{ enum { value = true }; };
+template<> struct is_arithmetic<signed int> { enum { value = true }; };
+template<> struct is_arithmetic<unsigned int> { enum { value = true }; };
+template<> struct is_arithmetic<signed long> { enum { value = true }; };
+template<> struct is_arithmetic<unsigned long> { enum { value = true }; };
+template<typename T> struct is_integral { enum { value = false }; };
+template<> struct is_integral<bool> { enum { value = true }; };
+template<> struct is_integral<char> { enum { value = true }; };
+template<> struct is_integral<signed char> { enum { value = true }; };
+template<> struct is_integral<unsigned char> { enum { value = true }; };
+template<> struct is_integral<signed short> { enum { value = true }; };
+template<> struct is_integral<unsigned short> { enum { value = true }; };
+template<> struct is_integral<signed int> { enum { value = true }; };
+template<> struct is_integral<unsigned int> { enum { value = true }; };
+template<> struct is_integral<signed long> { enum { value = true }; };
+template<> struct is_integral<unsigned long> { enum { value = true }; };
+template <typename T> struct add_const { typedef const T type; };
+template <typename T> struct add_const<T&> { typedef T& type; };
+template <typename T> struct is_const { enum { value = 0 }; };
+template <typename T> struct is_const<T const> { enum { value = 1 }; };
+template<typename T> struct add_const_on_value_type { typedef const T type; };
+template<typename T> struct add_const_on_value_type<T&> { typedef T const& type; };
+template<typename T> struct add_const_on_value_type<T*> { typedef T const* type; };
+template<typename T> struct add_const_on_value_type<T* const> { typedef T const* const type; };
+template<typename T> struct add_const_on_value_type<T const* const> { typedef T const* const type; };
+template<typename From, typename To>
+struct is_convertible_impl
+{
+private:
+ struct any_conversion
+ {
+ template <typename T> any_conversion(const volatile T&);
+ template <typename T> any_conversion(T&);
+ };
+ struct yes {int a[1];};
+ struct no {int a[2];};
+ static yes test(const To&, int);
+ static no test(any_conversion, ...);
+public:
+ static From ms_from;
+#ifdef __INTEL_COMPILER
+ #pragma warning push
+ #pragma warning ( disable : 2259 )
+#endif
+ enum { value = sizeof(test(ms_from, 0))==sizeof(yes) };
+#ifdef __INTEL_COMPILER
+ #pragma warning pop
+#endif
+};
+template<typename From, typename To>
+struct is_convertible
+{
+ enum { value = is_convertible_impl<typename remove_all<From>::type,
+ typename remove_all<To >::type>::value };
+};
+template<bool Condition, typename T=void> struct enable_if;
+template<typename T> struct enable_if<true,T>
+{ typedef T type; };
+#if defined(__CUDA_ARCH__)
+#if !defined(__FLT_EPSILON__)
+#define __FLT_EPSILON__ FLT_EPSILON
+#define __DBL_EPSILON__ DBL_EPSILON
+#endif
+namespace device {
+template<typename T> struct numeric_limits
+{
+ EIGEN_DEVICE_FUNC
+ static T epsilon() { return 0; }
+ static T (max)() { assert(false && "Highest not supported for this type"); }
+ static T (min)() { assert(false && "Lowest not supported for this type"); }
+ static T infinity() { assert(false && "Infinity not supported for this type"); }
+ static T quiet_NaN() { assert(false && "quiet_NaN not supported for this type"); }
+};
+template<> struct numeric_limits<float>
+{
+ EIGEN_DEVICE_FUNC
+ static float epsilon() { return __FLT_EPSILON__; }
+ EIGEN_DEVICE_FUNC
+ static float (max)() { return CUDART_MAX_NORMAL_F; }
+ EIGEN_DEVICE_FUNC
+ static float (min)() { return FLT_MIN; }
+ EIGEN_DEVICE_FUNC
+ static float infinity() { return CUDART_INF_F; }
+ EIGEN_DEVICE_FUNC
+ static float quiet_NaN() { return CUDART_NAN_F; }
+};
+template<> struct numeric_limits<double>
+{
+ EIGEN_DEVICE_FUNC
+ static double epsilon() { return __DBL_EPSILON__; }
+ EIGEN_DEVICE_FUNC
+ static double (max)() { return DBL_MAX; }
+ EIGEN_DEVICE_FUNC
+ static double (min)() { return DBL_MIN; }
+ EIGEN_DEVICE_FUNC
+ static double infinity() { return CUDART_INF; }
+ EIGEN_DEVICE_FUNC
+ static double quiet_NaN() { return CUDART_NAN; }
+};
+template<> struct numeric_limits<int>
+{
+ EIGEN_DEVICE_FUNC
+ static int epsilon() { return 0; }
+ EIGEN_DEVICE_FUNC
+ static int (max)() { return INT_MAX; }
+ EIGEN_DEVICE_FUNC
+ static int (min)() { return INT_MIN; }
+};
+template<> struct numeric_limits<unsigned int>
+{
+ EIGEN_DEVICE_FUNC
+ static unsigned int epsilon() { return 0; }
+ EIGEN_DEVICE_FUNC
+ static unsigned int (max)() { return UINT_MAX; }
+ EIGEN_DEVICE_FUNC
+ static unsigned int (min)() { return 0; }
+};
+template<> struct numeric_limits<long>
+{
+ EIGEN_DEVICE_FUNC
+ static long epsilon() { return 0; }
+ EIGEN_DEVICE_FUNC
+ static long (max)() { return LONG_MAX; }
+ EIGEN_DEVICE_FUNC
+ static long (min)() { return LONG_MIN; }
+};
+template<> struct numeric_limits<unsigned long>
+{
+ EIGEN_DEVICE_FUNC
+ static unsigned long epsilon() { return 0; }
+ EIGEN_DEVICE_FUNC
+ static unsigned long (max)() { return ULONG_MAX; }
+ EIGEN_DEVICE_FUNC
+ static unsigned long (min)() { return 0; }
+};
+template<> struct numeric_limits<long long>
+{
+ EIGEN_DEVICE_FUNC
+ static long long epsilon() { return 0; }
+ EIGEN_DEVICE_FUNC
+ static long long (max)() { return LLONG_MAX; }
+ EIGEN_DEVICE_FUNC
+ static long long (min)() { return LLONG_MIN; }
+};
+template<> struct numeric_limits<unsigned long long>
+{
+ EIGEN_DEVICE_FUNC
+ static unsigned long long epsilon() { return 0; }
+ EIGEN_DEVICE_FUNC
+ static unsigned long long (max)() { return ULLONG_MAX; }
+ EIGEN_DEVICE_FUNC
+ static unsigned long long (min)() { return 0; }
+};
+}
+#endif
+class noncopyable
+{
+ EIGEN_DEVICE_FUNC noncopyable(const noncopyable&);
+ EIGEN_DEVICE_FUNC const noncopyable& operator=(const noncopyable&);
+protected:
+ EIGEN_DEVICE_FUNC noncopyable() {}
+ EIGEN_DEVICE_FUNC ~noncopyable() {}
+};
+#if EIGEN_HAS_STD_RESULT_OF
+template<typename T> struct result_of {
+ typedef typename std::result_of<T>::type type1;
+ typedef typename remove_all<type1>::type type;
+};
+#else
+template<typename T> struct result_of { };
+struct has_none {int a[1];};
+struct has_std_result_type {int a[2];};
+struct has_tr1_result {int a[3];};
+template<typename Func, typename ArgType, int SizeOf=sizeof(has_none)>
+struct unary_result_of_select {typedef typename internal::remove_all<ArgType>::type type;};
+template<typename Func, typename ArgType>
+struct unary_result_of_select<Func, ArgType, sizeof(has_std_result_type)> {typedef typename Func::result_type type;};
+template<typename Func, typename ArgType>
+struct unary_result_of_select<Func, ArgType, sizeof(has_tr1_result)> {typedef typename Func::template result<Func(ArgType)>::type type;};
+template<typename Func, typename ArgType>
+struct result_of<Func(ArgType)> {
+ template<typename T>
+ static has_std_result_type testFunctor(T const *, typename T::result_type const * = 0);
+ template<typename T>
+ static has_tr1_result testFunctor(T const *, typename T::template result<T(ArgType)>::type const * = 0);
+ static has_none testFunctor(...);
+ enum {FunctorType = sizeof(testFunctor(static_cast<Func*>(0)))};
+ typedef typename unary_result_of_select<Func, ArgType, FunctorType>::type type;
+};
+template<typename Func, typename ArgType0, typename ArgType1, int SizeOf=sizeof(has_none)>
+struct binary_result_of_select {typedef typename internal::remove_all<ArgType0>::type type;};
+template<typename Func, typename ArgType0, typename ArgType1>
+struct binary_result_of_select<Func, ArgType0, ArgType1, sizeof(has_std_result_type)>
+{typedef typename Func::result_type type;};
+template<typename Func, typename ArgType0, typename ArgType1>
+struct binary_result_of_select<Func, ArgType0, ArgType1, sizeof(has_tr1_result)>
+{typedef typename Func::template result<Func(ArgType0,ArgType1)>::type type;};
+template<typename Func, typename ArgType0, typename ArgType1>
+struct result_of<Func(ArgType0,ArgType1)> {
+ template<typename T>
+ static has_std_result_type testFunctor(T const *, typename T::result_type const * = 0);
+ template<typename T>
+ static has_tr1_result testFunctor(T const *, typename T::template result<T(ArgType0,ArgType1)>::type const * = 0);
+ static has_none testFunctor(...);
+ enum {FunctorType = sizeof(testFunctor(static_cast<Func*>(0)))};
+ typedef typename binary_result_of_select<Func, ArgType0, ArgType1, FunctorType>::type type;
+};
+template<typename Func, typename ArgType0, typename ArgType1, typename ArgType2, int SizeOf=sizeof(has_none)>
+struct ternary_result_of_select {typedef typename internal::remove_all<ArgType0>::type type;};
+template<typename Func, typename ArgType0, typename ArgType1, typename ArgType2>
+struct ternary_result_of_select<Func, ArgType0, ArgType1, ArgType2, sizeof(has_std_result_type)>
+{typedef typename Func::result_type type;};
+template<typename Func, typename ArgType0, typename ArgType1, typename ArgType2>
+struct ternary_result_of_select<Func, ArgType0, ArgType1, ArgType2, sizeof(has_tr1_result)>
+{typedef typename Func::template result<Func(ArgType0,ArgType1,ArgType2)>::type type;};
+template<typename Func, typename ArgType0, typename ArgType1, typename ArgType2>
+struct result_of<Func(ArgType0,ArgType1,ArgType2)> {
+ template<typename T>
+ static has_std_result_type testFunctor(T const *, typename T::result_type const * = 0);
+ template<typename T>
+ static has_tr1_result testFunctor(T const *, typename T::template result<T(ArgType0,ArgType1,ArgType2)>::type const * = 0);
+ static has_none testFunctor(...);
+ enum {FunctorType = sizeof(testFunctor(static_cast<Func*>(0)))};
+ typedef typename ternary_result_of_select<Func, ArgType0, ArgType1, ArgType2, FunctorType>::type type;
+};
+#endif
+struct meta_yes { char a[1]; };
+struct meta_no { char a[2]; };
+template <typename T>
+struct has_ReturnType
+{
+ template <typename C> static meta_yes testFunctor(typename C::ReturnType const *);
+ template <typename C> static meta_no testFunctor(...);
+ enum { value = sizeof(testFunctor<T>(0)) == sizeof(meta_yes) };
+};
+template<typename T> const T* return_ptr();
+template <typename T, typename IndexType=Index>
+struct has_nullary_operator
+{
+ template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ptr<C>()->operator()())>0)>::type * = 0);
+ static meta_no testFunctor(...);
+ enum { value = sizeof(testFunctor(static_cast<T*>(0))) == sizeof(meta_yes) };
+};
+template <typename T, typename IndexType=Index>
+struct has_unary_operator
+{
+ template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ptr<C>()->operator()(IndexType(0)))>0)>::type * = 0);
+ static meta_no testFunctor(...);
+ enum { value = sizeof(testFunctor(static_cast<T*>(0))) == sizeof(meta_yes) };
+};
+template <typename T, typename IndexType=Index>
+struct has_binary_operator
+{
+ template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ptr<C>()->operator()(IndexType(0),IndexType(0)))>0)>::type * = 0);
+ static meta_no testFunctor(...);
+ enum { value = sizeof(testFunctor(static_cast<T*>(0))) == sizeof(meta_yes) };
+};
+template<int Y,
+ int InfX = 0,
+ int SupX = ((Y==1) ? 1 : Y/2),
+ bool Done = ((SupX-InfX)<=1 ? true : ((SupX*SupX <= Y) && ((SupX+1)*(SupX+1) > Y))) >
+class meta_sqrt
+{
+ enum {
+ MidX = (InfX+SupX)/2,
+ TakeInf = MidX*MidX > Y ? 1 : 0,
+ NewInf = int(TakeInf) ? InfX : int(MidX),
+ NewSup = int(TakeInf) ? int(MidX) : SupX
+ };
+ public:
+ enum { ret = meta_sqrt<Y,NewInf,NewSup>::ret };
+};
+template<int Y, int InfX, int SupX>
+class meta_sqrt<Y, InfX, SupX, true> { public: enum { ret = (SupX*SupX <= Y) ? SupX : InfX }; };
+template<int A, int B, int K=1, bool Done = ((A*K)%B)==0>
+struct meta_least_common_multiple
+{
+ enum { ret = meta_least_common_multiple<A,B,K+1>::ret };
+};
+template<int A, int B, int K>
+struct meta_least_common_multiple<A,B,K,true>
+{
+ enum { ret = A*K };
+};
+template<typename T, typename U> struct scalar_product_traits
+{
+ enum { Defined = 0 };
+};
+}
+namespace numext {
+#if defined(__CUDA_ARCH__)
+template<typename T> EIGEN_DEVICE_FUNC void swap(T &a, T &b) { T tmp = b; b = a; a = tmp; }
+#else
+template<typename T> EIGEN_STRONG_INLINE void swap(T &a, T &b) { std::swap(a,b); }
+#endif
+#if defined(__CUDA_ARCH__)
+using internal::device::numeric_limits;
+#else
+using std::numeric_limits;
+#endif
+template<typename T>
+T div_ceil(const T &a, const T &b)
+{
+ return (a+b-1) / b;
+}
+}
+}
+#endif
+// end #include "src/Core/util/Meta.h"
+// #include "src/Core/util/ForwardDeclarations.h"
+#ifndef EIGEN_FORWARDDECLARATIONS_H
+#define EIGEN_FORWARDDECLARATIONS_H
+namespace Eigen {
+namespace internal {
+template<typename T> struct traits;
+template<typename T> struct traits<const T> : traits<T> {};
+template<typename Derived> struct has_direct_access
+{
+ enum { ret = (traits<Derived>::Flags & DirectAccessBit) ? 1 : 0 };
+};
+template<typename Derived> struct accessors_level
+{
+ enum { has_direct_access = (traits<Derived>::Flags & DirectAccessBit) ? 1 : 0,
+ has_write_access = (traits<Derived>::Flags & LvalueBit) ? 1 : 0,
+ value = has_direct_access ? (has_write_access ? DirectWriteAccessors : DirectAccessors)
+ : (has_write_access ? WriteAccessors : ReadOnlyAccessors)
+ };
+};
+template<typename T> struct evaluator_traits;
+template< typename T> struct evaluator;
+}
+template<typename T> struct NumTraits;
+template<typename Derived> struct EigenBase;
+template<typename Derived> class DenseBase;
+template<typename Derived> class PlainObjectBase;
+template<typename Derived,
+ int Level = internal::accessors_level<Derived>::value >
+class DenseCoeffsBase;
+template<typename _Scalar, int _Rows, int _Cols,
+ int _Options = AutoAlign |
+#if EIGEN_GNUC_AT(3,4)
+ ( (_Rows==1 && _Cols!=1) ? Eigen::RowMajor
+ : !(_Cols==1 && _Rows!=1) ? EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION
+ : Eigen::ColMajor ),
+#else
+ ( (_Rows==1 && _Cols!=1) ? Eigen::RowMajor
+ : (_Cols==1 && _Rows!=1) ? Eigen::ColMajor
+ : EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION ),
+#endif
+ int _MaxRows = _Rows,
+ int _MaxCols = _Cols
+> class Matrix;
+template<typename Derived> class MatrixBase;
+template<typename Derived> class ArrayBase;
+template<typename ExpressionType, unsigned int Added, unsigned int Removed> class Flagged;
+template<typename ExpressionType, template <typename> class StorageBase > class NoAlias;
+template<typename ExpressionType> class NestByValue;
+template<typename ExpressionType> class ForceAlignedAccess;
+template<typename ExpressionType> class SwapWrapper;
+template<typename XprType, int BlockRows=Dynamic, int BlockCols=Dynamic, bool InnerPanel = false> class Block;
+template<typename MatrixType, int Size=Dynamic> class VectorBlock;
+template<typename MatrixType> class Transpose;
+template<typename MatrixType> class Conjugate;
+template<typename NullaryOp, typename MatrixType> class CwiseNullaryOp;
+template<typename UnaryOp, typename MatrixType> class CwiseUnaryOp;
+template<typename ViewOp, typename MatrixType> class CwiseUnaryView;
+template<typename BinaryOp, typename Lhs, typename Rhs> class CwiseBinaryOp;
+template<typename TernaryOp, typename Arg1, typename Arg2, typename Arg3> class CwiseTernaryOp;
+template<typename Decomposition, typename Rhstype> class Solve;
+template<typename XprType> class Inverse;
+template<typename Lhs, typename Rhs, int Option = DefaultProduct> class Product;
+template<typename Derived> class DiagonalBase;
+template<typename _DiagonalVectorType> class DiagonalWrapper;
+template<typename _Scalar, int SizeAtCompileTime, int MaxSizeAtCompileTime=SizeAtCompileTime> class DiagonalMatrix;
+template<typename MatrixType, typename DiagonalType, int ProductOrder> class DiagonalProduct;
+template<typename MatrixType, int Index = 0> class Diagonal;
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime = SizeAtCompileTime, typename IndexType=int> class PermutationMatrix;
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime = SizeAtCompileTime, typename IndexType=int> class Transpositions;
+template<typename Derived> class PermutationBase;
+template<typename Derived> class TranspositionsBase;
+template<typename _IndicesType> class PermutationWrapper;
+template<typename _IndicesType> class TranspositionsWrapper;
+template<typename Derived,
+ int Level = internal::accessors_level<Derived>::has_write_access ? WriteAccessors : ReadOnlyAccessors
+> class MapBase;
+template<int InnerStrideAtCompileTime, int OuterStrideAtCompileTime> class Stride;
+template<int Value = Dynamic> class InnerStride;
+template<int Value = Dynamic> class OuterStride;
+template<typename MatrixType, int MapOptions=Unaligned, typename StrideType = Stride<0,0> > class Map;
+template<typename Derived> class RefBase;
+template<typename PlainObjectType, int Options = 0,
+ typename StrideType = typename internal::conditional<PlainObjectType::IsVectorAtCompileTime,InnerStride<1>,OuterStride<> >::type > class Ref;
+template<typename Derived> class TriangularBase;
+template<typename MatrixType, unsigned int Mode> class TriangularView;
+template<typename MatrixType, unsigned int Mode> class SelfAdjointView;
+template<typename MatrixType> class SparseView;
+template<typename ExpressionType> class WithFormat;
+template<typename MatrixType> struct CommaInitializer;
+template<typename Derived> class ReturnByValue;
+template<typename ExpressionType> class ArrayWrapper;
+template<typename Derived> class SolverBase;
+template<typename XprType> class InnerIterator;
+namespace internal {
+template<typename DecompositionType> struct kernel_retval_base;
+template<typename DecompositionType> struct kernel_retval;
+template<typename DecompositionType> struct image_retval_base;
+template<typename DecompositionType> struct image_retval;
+}
+namespace internal {
+template<typename _Scalar, int Rows=Dynamic, int Cols=Dynamic, int Supers=Dynamic, int Subs=Dynamic, int Options=0> class BandMatrix;
+}
+namespace internal {
+template<typename Lhs, typename Rhs> struct product_type;
+template<bool> struct EnableIf;
+template< typename T,
+ int ProductTag = internal::product_type<typename T::Lhs,typename T::Rhs>::ret,
+ typename LhsShape = typename evaluator_traits<typename T::Lhs>::Shape,
+ typename RhsShape = typename evaluator_traits<typename T::Rhs>::Shape,
+ typename LhsScalar = typename traits<typename T::Lhs>::Scalar,
+ typename RhsScalar = typename traits<typename T::Rhs>::Scalar
+ > struct product_evaluator;
+}
+template<typename Lhs, typename Rhs,
+ int ProductType = internal::product_type<Lhs,Rhs>::value>
+struct ProductReturnType;
+template<typename Lhs, typename Rhs> struct LazyProductReturnType;
+namespace internal {
+template<typename LhsScalar, typename RhsScalar, bool ConjLhs=false, bool ConjRhs=false> struct conj_helper;
+template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_sum_op;
+template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_difference_op;
+template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_conj_product_op;
+template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_min_op;
+template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_max_op;
+template<typename Scalar> struct scalar_opposite_op;
+template<typename Scalar> struct scalar_conjugate_op;
+template<typename Scalar> struct scalar_real_op;
+template<typename Scalar> struct scalar_imag_op;
+template<typename Scalar> struct scalar_abs_op;
+template<typename Scalar> struct scalar_abs2_op;
+template<typename Scalar> struct scalar_sqrt_op;
+template<typename Scalar> struct scalar_rsqrt_op;
+template<typename Scalar> struct scalar_exp_op;
+template<typename Scalar> struct scalar_log_op;
+template<typename Scalar> struct scalar_cos_op;
+template<typename Scalar> struct scalar_sin_op;
+template<typename Scalar> struct scalar_acos_op;
+template<typename Scalar> struct scalar_asin_op;
+template<typename Scalar> struct scalar_tan_op;
+template<typename Scalar> struct scalar_inverse_op;
+template<typename Scalar> struct scalar_square_op;
+template<typename Scalar> struct scalar_cube_op;
+template<typename Scalar, typename NewType> struct scalar_cast_op;
+template<typename Scalar> struct scalar_random_op;
+template<typename Scalar> struct scalar_constant_op;
+template<typename Scalar> struct scalar_identity_op;
+template<typename Scalar,bool iscpx> struct scalar_sign_op;
+template<typename Scalar,typename ScalarExponent> struct scalar_pow_op;
+template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_hypot_op;
+template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_product_op;
+template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_quotient_op;
+template<typename Scalar> struct scalar_lgamma_op;
+template<typename Scalar> struct scalar_digamma_op;
+template<typename Scalar> struct scalar_erf_op;
+template<typename Scalar> struct scalar_erfc_op;
+template<typename Scalar> struct scalar_igamma_op;
+template<typename Scalar> struct scalar_igammac_op;
+template<typename Scalar> struct scalar_zeta_op;
+template<typename Scalar> struct scalar_betainc_op;
+}
+struct IOFormat;
+template<typename _Scalar, int _Rows, int _Cols,
+ int _Options = AutoAlign |
+#if EIGEN_GNUC_AT(3,4)
+ ( (_Rows==1 && _Cols!=1) ? Eigen::RowMajor
+ : !(_Cols==1 && _Rows!=1) ? EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION
+ : Eigen::ColMajor ),
+#else
+ ( (_Rows==1 && _Cols!=1) ? Eigen::RowMajor
+ : (_Cols==1 && _Rows!=1) ? Eigen::ColMajor
+ : EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION ),
+#endif
+ int _MaxRows = _Rows, int _MaxCols = _Cols> class Array;
+template<typename ConditionMatrixType, typename ThenMatrixType, typename ElseMatrixType> class Select;
+template<typename MatrixType, typename BinaryOp, int Direction> class PartialReduxExpr;
+template<typename ExpressionType, int Direction> class VectorwiseOp;
+template<typename MatrixType,int RowFactor,int ColFactor> class Replicate;
+template<typename MatrixType, int Direction = BothDirections> class Reverse;
+template<typename MatrixType> class FullPivLU;
+template<typename MatrixType> class PartialPivLU;
+namespace internal {
+template<typename MatrixType> struct inverse_impl;
+}
+template<typename MatrixType> class HouseholderQR;
+template<typename MatrixType> class ColPivHouseholderQR;
+template<typename MatrixType> class FullPivHouseholderQR;
+template<typename MatrixType> class CompleteOrthogonalDecomposition;
+template<typename MatrixType, int QRPreconditioner = ColPivHouseholderQRPreconditioner> class JacobiSVD;
+template<typename MatrixType> class BDCSVD;
+template<typename MatrixType, int UpLo = Lower> class LLT;
+template<typename MatrixType, int UpLo = Lower> class LDLT;
+template<typename VectorsType, typename CoeffsType, int Side=OnTheLeft> class HouseholderSequence;
+template<typename Scalar> class JacobiRotation;
+template<typename Derived, int _Dim> class RotationBase;
+template<typename Lhs, typename Rhs> class Cross;
+template<typename Derived> class QuaternionBase;
+template<typename Scalar> class Rotation2D;
+template<typename Scalar> class AngleAxis;
+template<typename Scalar,int Dim> class Translation;
+template<typename Scalar,int Dim> class AlignedBox;
+template<typename Scalar, int Options = AutoAlign> class Quaternion;
+template<typename Scalar,int Dim,int Mode,int _Options=AutoAlign> class Transform;
+template <typename _Scalar, int _AmbientDim, int Options=AutoAlign> class ParametrizedLine;
+template <typename _Scalar, int _AmbientDim, int Options=AutoAlign> class Hyperplane;
+template<typename Scalar> class UniformScaling;
+template<typename MatrixType,int Direction> class Homogeneous;
+template<typename Derived> class SparseMatrixBase;
+template<typename Derived> struct MatrixExponentialReturnValue;
+template<typename Derived> class MatrixFunctionReturnValue;
+template<typename Derived> class MatrixSquareRootReturnValue;
+template<typename Derived> class MatrixLogarithmReturnValue;
+template<typename Derived> class MatrixPowerReturnValue;
+template<typename Derived> class MatrixComplexPowerReturnValue;
+namespace internal {
+template <typename Scalar>
+struct stem_function
+{
+ typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar;
+ typedef ComplexScalar type(ComplexScalar, int);
+};
+}
+}
+#endif
+// end #include "src/Core/util/ForwardDeclarations.h"
+// #include "src/Core/util/StaticAssert.h"
+#ifndef EIGEN_STATIC_ASSERT_H
+#define EIGEN_STATIC_ASSERT_H
+#ifndef EIGEN_NO_STATIC_ASSERT
+ #if EIGEN_MAX_CPP_VER>=11 && (__has_feature(cxx_static_assert) || (defined(__cplusplus) && __cplusplus >= 201103L) || (EIGEN_COMP_MSVC >= 1600))
+ #define EIGEN_STATIC_ASSERT(X,MSG) static_assert(X,#MSG);
+ #else
+ namespace Eigen {
+ namespace internal {
+ template<bool condition>
+ struct static_assertion {};
+ template<>
+ struct static_assertion<true>
+ {
+ enum {
+ YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX,
+ YOU_MIXED_VECTORS_OF_DIFFERENT_SIZES,
+ YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES,
+ THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE,
+ THIS_METHOD_IS_ONLY_FOR_MATRICES_OF_A_SPECIFIC_SIZE,
+ THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE,
+ OUT_OF_RANGE_ACCESS,
+ YOU_MADE_A_PROGRAMMING_MISTAKE,
+ EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT,
+ EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE,
+ YOU_CALLED_A_FIXED_SIZE_METHOD_ON_A_DYNAMIC_SIZE_MATRIX_OR_VECTOR,
+ YOU_CALLED_A_DYNAMIC_SIZE_METHOD_ON_A_FIXED_SIZE_MATRIX_OR_VECTOR,
+ UNALIGNED_LOAD_AND_STORE_OPERATIONS_UNIMPLEMENTED_ON_ALTIVEC,
+ THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES,
+ FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED,
+ NUMERIC_TYPE_MUST_BE_REAL,
+ COEFFICIENT_WRITE_ACCESS_TO_SELFADJOINT_NOT_SUPPORTED,
+ WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED,
+ THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE,
+ INVALID_MATRIX_PRODUCT,
+ INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS,
+ INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION,
+ YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY,
+ THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES,
+ THIS_METHOD_IS_ONLY_FOR_ROW_MAJOR_MATRICES,
+ INVALID_MATRIX_TEMPLATE_PARAMETERS,
+ INVALID_MATRIXBASE_TEMPLATE_PARAMETERS,
+ BOTH_MATRICES_MUST_HAVE_THE_SAME_STORAGE_ORDER,
+ THIS_METHOD_IS_ONLY_FOR_DIAGONAL_MATRIX,
+ THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE,
+ THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES,
+ YOU_ALREADY_SPECIFIED_THIS_STRIDE,
+ INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION,
+ THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD,
+ PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1,
+ THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS,
+ YOU_CANNOT_MIX_ARRAYS_AND_MATRICES,
+ YOU_PERFORMED_AN_INVALID_TRANSFORMATION_CONVERSION,
+ THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY,
+ YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT,
+ THIS_METHOD_IS_ONLY_FOR_1x1_EXPRESSIONS,
+ THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS,
+ THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL,
+ THIS_METHOD_IS_ONLY_FOR_ARRAYS_NOT_MATRICES,
+ YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED,
+ YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED,
+ THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE,
+ THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH,
+ OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG,
+ IMPLICIT_CONVERSION_TO_SCALAR_IS_FOR_INNER_PRODUCT_ONLY,
+ STORAGE_LAYOUT_DOES_NOT_MATCH,
+ EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE,
+ THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS,
+ MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY,
+ THIS_TYPE_IS_NOT_SUPPORTED,
+ STORAGE_KIND_MUST_MATCH,
+ STORAGE_INDEX_MUST_MATCH,
+ CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY
+ };
+ };
+ }
+ }
+ #if EIGEN_COMP_MSVC
+ #define EIGEN_STATIC_ASSERT(CONDITION,MSG) \
+ {Eigen::internal::static_assertion<bool(CONDITION)>::MSG;}
+ #else
+ #define EIGEN_STATIC_ASSERT(CONDITION,MSG) \
+ if (Eigen::internal::static_assertion<static_cast<bool>(CONDITION)>::MSG) {}
+ #endif
+ #endif
+#else
+ #define EIGEN_STATIC_ASSERT(CONDITION,MSG) eigen_assert((CONDITION) && #MSG);
+#endif
+#define EIGEN_STATIC_ASSERT_VECTOR_ONLY(TYPE) \
+ EIGEN_STATIC_ASSERT(TYPE::IsVectorAtCompileTime, \
+ YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX)
+#define EIGEN_STATIC_ASSERT_FIXED_SIZE(TYPE) \
+ EIGEN_STATIC_ASSERT(TYPE::SizeAtCompileTime!=Eigen::Dynamic, \
+ YOU_CALLED_A_FIXED_SIZE_METHOD_ON_A_DYNAMIC_SIZE_MATRIX_OR_VECTOR)
+#define EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(TYPE) \
+ EIGEN_STATIC_ASSERT(TYPE::SizeAtCompileTime==Eigen::Dynamic, \
+ YOU_CALLED_A_DYNAMIC_SIZE_METHOD_ON_A_FIXED_SIZE_MATRIX_OR_VECTOR)
+#define EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(TYPE, SIZE) \
+ EIGEN_STATIC_ASSERT(TYPE::IsVectorAtCompileTime && TYPE::SizeAtCompileTime==SIZE, \
+ THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE)
+#define EIGEN_STATIC_ASSERT_MATRIX_SPECIFIC_SIZE(TYPE, ROWS, COLS) \
+ EIGEN_STATIC_ASSERT(TYPE::RowsAtCompileTime==ROWS && TYPE::ColsAtCompileTime==COLS, \
+ THIS_METHOD_IS_ONLY_FOR_MATRICES_OF_A_SPECIFIC_SIZE)
+#define EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(TYPE0,TYPE1) \
+ EIGEN_STATIC_ASSERT( \
+ (int(TYPE0::SizeAtCompileTime)==Eigen::Dynamic \
+ || int(TYPE1::SizeAtCompileTime)==Eigen::Dynamic \
+ || int(TYPE0::SizeAtCompileTime)==int(TYPE1::SizeAtCompileTime)),\
+ YOU_MIXED_VECTORS_OF_DIFFERENT_SIZES)
+#define EIGEN_PREDICATE_SAME_MATRIX_SIZE(TYPE0,TYPE1) \
+ ( \
+ (int(Eigen::internal::size_of_xpr_at_compile_time<TYPE0>::ret)==0 && int(Eigen::internal::size_of_xpr_at_compile_time<TYPE1>::ret)==0) \
+ || (\
+ (int(TYPE0::RowsAtCompileTime)==Eigen::Dynamic \
+ || int(TYPE1::RowsAtCompileTime)==Eigen::Dynamic \
+ || int(TYPE0::RowsAtCompileTime)==int(TYPE1::RowsAtCompileTime)) \
+ && (int(TYPE0::ColsAtCompileTime)==Eigen::Dynamic \
+ || int(TYPE1::ColsAtCompileTime)==Eigen::Dynamic \
+ || int(TYPE0::ColsAtCompileTime)==int(TYPE1::ColsAtCompileTime))\
+ ) \
+ )
+#define EIGEN_STATIC_ASSERT_NON_INTEGER(TYPE) \
+ EIGEN_STATIC_ASSERT(!NumTraits<TYPE>::IsInteger, THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES)
+#define EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(TYPE0,TYPE1) \
+ EIGEN_STATIC_ASSERT( \
+ EIGEN_PREDICATE_SAME_MATRIX_SIZE(TYPE0,TYPE1),\
+ YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES)
+#define EIGEN_STATIC_ASSERT_SIZE_1x1(TYPE) \
+ EIGEN_STATIC_ASSERT((TYPE::RowsAtCompileTime == 1 || TYPE::RowsAtCompileTime == Dynamic) && \
+ (TYPE::ColsAtCompileTime == 1 || TYPE::ColsAtCompileTime == Dynamic), \
+ THIS_METHOD_IS_ONLY_FOR_1x1_EXPRESSIONS)
+#define EIGEN_STATIC_ASSERT_LVALUE(Derived) \
+ EIGEN_STATIC_ASSERT(Eigen::internal::is_lvalue<Derived>::value, \
+ THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY)
+#define EIGEN_STATIC_ASSERT_ARRAYXPR(Derived) \
+ EIGEN_STATIC_ASSERT((Eigen::internal::is_same<typename Eigen::internal::traits<Derived>::XprKind, ArrayXpr>::value), \
+ THIS_METHOD_IS_ONLY_FOR_ARRAYS_NOT_MATRICES)
+#define EIGEN_STATIC_ASSERT_SAME_XPR_KIND(Derived1, Derived2) \
+ EIGEN_STATIC_ASSERT((Eigen::internal::is_same<typename Eigen::internal::traits<Derived1>::XprKind, \
+ typename Eigen::internal::traits<Derived2>::XprKind \
+ >::value), \
+ YOU_CANNOT_MIX_ARRAYS_AND_MATRICES)
+#define EIGEN_INTERNAL_CHECK_COST_VALUE(C) \
+ EIGEN_STATIC_ASSERT((C)>=0 && (C)<=HugeCost*HugeCost, EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE);
+#endif
+// end #include "src/Core/util/StaticAssert.h"
+// #include "src/Core/util/XprHelper.h"
+#ifndef EIGEN_XPRHELPER_H
+#define EIGEN_XPRHELPER_H
+#if EIGEN_COMP_GNUC && !EIGEN_GNUC_AT(4,3)
+ #define EIGEN_EMPTY_STRUCT_CTOR(X) \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE X() {} \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE X(const X& ) {}
+#else
+ #define EIGEN_EMPTY_STRUCT_CTOR(X)
+#endif
+namespace Eigen {
+namespace internal {
+template<typename IndexDest, typename IndexSrc>
+EIGEN_DEVICE_FUNC
+inline IndexDest convert_index(const IndexSrc& idx) {
+ eigen_internal_assert(idx <= NumTraits<IndexDest>::highest() && "Index value to big for target type");
+ return IndexDest(idx);
+}
+template<typename ExprScalar,typename T, bool IsSupported>
+struct promote_scalar_arg;
+template<typename S,typename T>
+struct promote_scalar_arg<S,T,true>
+{
+ typedef T type;
+};
+template<typename ExprScalar,typename T,typename PromotedType,
+ bool ConvertibleToLiteral = internal::is_convertible<T,PromotedType>::value,
+ bool IsSafe = NumTraits<T>::IsInteger || !NumTraits<PromotedType>::IsInteger>
+struct promote_scalar_arg_unsupported;
+template<typename S,typename T>
+struct promote_scalar_arg<S,T,false> : promote_scalar_arg_unsupported<S,T,typename NumTraits<S>::Literal> {};
+template<typename S,typename T, typename PromotedType>
+struct promote_scalar_arg_unsupported<S,T,PromotedType,true,true>
+{
+ typedef PromotedType type;
+};
+template<typename ExprScalar,typename T, typename PromotedType>
+struct promote_scalar_arg_unsupported<ExprScalar,T,PromotedType,false,true>
+ : promote_scalar_arg_unsupported<ExprScalar,T,ExprScalar>
+{};
+template<typename S,typename T, typename PromotedType, bool ConvertibleToLiteral>
+struct promote_scalar_arg_unsupported<S,T,PromotedType,ConvertibleToLiteral,false> {};
+template<typename S,typename T>
+struct promote_scalar_arg_unsupported<S,T,S,false,true> {};
+class no_assignment_operator
+{
+ private:
+ no_assignment_operator& operator=(const no_assignment_operator&);
+};
+template<typename I1, typename I2>
+struct promote_index_type
+{
+ typedef typename conditional<(sizeof(I1)<sizeof(I2)), I2, I1>::type type;
+};
+template<typename T, int Value> class variable_if_dynamic
+{
+ public:
+ EIGEN_EMPTY_STRUCT_CTOR(variable_if_dynamic)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamic(T v) { EIGEN_ONLY_USED_FOR_DEBUG(v); eigen_assert(v == T(Value)); }
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T value() { return T(Value); }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void setValue(T) {}
+};
+template<typename T> class variable_if_dynamic<T, Dynamic>
+{
+ T m_value;
+ EIGEN_DEVICE_FUNC variable_if_dynamic() { eigen_assert(false); }
+ public:
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamic(T value) : m_value(value) {}
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T value() const { return m_value; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void setValue(T value) { m_value = value; }
+};
+template<typename T, int Value> class variable_if_dynamicindex
+{
+ public:
+ EIGEN_EMPTY_STRUCT_CTOR(variable_if_dynamicindex)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamicindex(T v) { EIGEN_ONLY_USED_FOR_DEBUG(v); eigen_assert(v == T(Value)); }
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T value() { return T(Value); }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void setValue(T) {}
+};
+template<typename T> class variable_if_dynamicindex<T, DynamicIndex>
+{
+ T m_value;
+ EIGEN_DEVICE_FUNC variable_if_dynamicindex() { eigen_assert(false); }
+ public:
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamicindex(T value) : m_value(value) {}
+ EIGEN_DEVICE_FUNC T EIGEN_STRONG_INLINE value() const { return m_value; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void setValue(T value) { m_value = value; }
+};
+template<typename T> struct functor_traits
+{
+ enum
+ {
+ Cost = 10,
+ PacketAccess = false,
+ IsRepeatable = false
+ };
+};
+template<typename T> struct packet_traits;
+template<typename T> struct unpacket_traits
+{
+ typedef T type;
+ typedef T half;
+ enum
+ {
+ size = 1,
+ alignment = 1
+ };
+};
+template<int Size, typename PacketType,
+ bool Stop = Size==Dynamic || (Size%unpacket_traits<PacketType>::size)==0 || is_same<PacketType,typename unpacket_traits<PacketType>::half>::value>
+struct find_best_packet_helper;
+template< int Size, typename PacketType>
+struct find_best_packet_helper<Size,PacketType,true>
+{
+ typedef PacketType type;
+};
+template<int Size, typename PacketType>
+struct find_best_packet_helper<Size,PacketType,false>
+{
+ typedef typename find_best_packet_helper<Size,typename unpacket_traits<PacketType>::half>::type type;
+};
+template<typename T, int Size>
+struct find_best_packet
+{
+ typedef typename find_best_packet_helper<Size,typename packet_traits<T>::type>::type type;
+};
+#if EIGEN_MAX_STATIC_ALIGN_BYTES>0
+template<int ArrayBytes, int AlignmentBytes,
+ bool Match = bool((ArrayBytes%AlignmentBytes)==0),
+ bool TryHalf = bool(EIGEN_MIN_ALIGN_BYTES<AlignmentBytes) >
+struct compute_default_alignment_helper
+{
+ enum { value = 0 };
+};
+template<int ArrayBytes, int AlignmentBytes, bool TryHalf>
+struct compute_default_alignment_helper<ArrayBytes, AlignmentBytes, true, TryHalf>
+{
+ enum { value = AlignmentBytes };
+};
+template<int ArrayBytes, int AlignmentBytes>
+struct compute_default_alignment_helper<ArrayBytes, AlignmentBytes, false, true>
+{
+ enum { value = compute_default_alignment_helper<ArrayBytes, AlignmentBytes/2>::value };
+};
+#else
+template<int ArrayBytes, int AlignmentBytes>
+struct compute_default_alignment_helper
+{
+ enum { value = 0 };
+};
+#endif
+template<typename T, int Size> struct compute_default_alignment {
+ enum { value = compute_default_alignment_helper<Size*sizeof(T),EIGEN_MAX_STATIC_ALIGN_BYTES>::value };
+};
+template<typename T> struct compute_default_alignment<T,Dynamic> {
+ enum { value = EIGEN_MAX_ALIGN_BYTES };
+};
+template<typename _Scalar, int _Rows, int _Cols,
+ int _Options = AutoAlign |
+ ( (_Rows==1 && _Cols!=1) ? RowMajor
+ : (_Cols==1 && _Rows!=1) ? ColMajor
+ : EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION ),
+ int _MaxRows = _Rows,
+ int _MaxCols = _Cols
+> class make_proper_matrix_type
+{
+ enum {
+ IsColVector = _Cols==1 && _Rows!=1,
+ IsRowVector = _Rows==1 && _Cols!=1,
+ Options = IsColVector ? (_Options | ColMajor) & ~RowMajor
+ : IsRowVector ? (_Options | RowMajor) & ~ColMajor
+ : _Options
+ };
+ public:
+ typedef Matrix<_Scalar, _Rows, _Cols, Options, _MaxRows, _MaxCols> type;
+};
+template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
+class compute_matrix_flags
+{
+ enum { row_major_bit = Options&RowMajor ? RowMajorBit : 0 };
+ public:
+ enum { ret = DirectAccessBit | LvalueBit | NestByRefBit | row_major_bit };
+};
+template<int _Rows, int _Cols> struct size_at_compile_time
+{
+ enum { ret = (_Rows==Dynamic || _Cols==Dynamic) ? Dynamic : _Rows * _Cols };
+};
+template<typename XprType> struct size_of_xpr_at_compile_time
+{
+ enum { ret = size_at_compile_time<traits<XprType>::RowsAtCompileTime,traits<XprType>::ColsAtCompileTime>::ret };
+};
+template<typename T, typename StorageKind = typename traits<T>::StorageKind> struct plain_matrix_type;
+template<typename T, typename BaseClassType, int Flags> struct plain_matrix_type_dense;
+template<typename T> struct plain_matrix_type<T,Dense>
+{
+ typedef typename plain_matrix_type_dense<T,typename traits<T>::XprKind, traits<T>::Flags>::type type;
+};
+template<typename T> struct plain_matrix_type<T,DiagonalShape>
+{
+ typedef typename T::PlainObject type;
+};
+template<typename T, int Flags> struct plain_matrix_type_dense<T,MatrixXpr,Flags>
+{
+ typedef Matrix<typename traits<T>::Scalar,
+ traits<T>::RowsAtCompileTime,
+ traits<T>::ColsAtCompileTime,
+ AutoAlign | (Flags&RowMajorBit ? RowMajor : ColMajor),
+ traits<T>::MaxRowsAtCompileTime,
+ traits<T>::MaxColsAtCompileTime
+ > type;
+};
+template<typename T, int Flags> struct plain_matrix_type_dense<T,ArrayXpr,Flags>
+{
+ typedef Array<typename traits<T>::Scalar,
+ traits<T>::RowsAtCompileTime,
+ traits<T>::ColsAtCompileTime,
+ AutoAlign | (Flags&RowMajorBit ? RowMajor : ColMajor),
+ traits<T>::MaxRowsAtCompileTime,
+ traits<T>::MaxColsAtCompileTime
+ > type;
+};
+template<typename T, typename StorageKind = typename traits<T>::StorageKind> struct eval;
+template<typename T> struct eval<T,Dense>
+{
+ typedef typename plain_matrix_type<T>::type type;
+};
+template<typename T> struct eval<T,DiagonalShape>
+{
+ typedef typename plain_matrix_type<T>::type type;
+};
+template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
+struct eval<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>, Dense>
+{
+ typedef const Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>& type;
+};
+template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
+struct eval<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>, Dense>
+{
+ typedef const Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>& type;
+};
+template<typename T, typename StorageKind = typename traits<T>::StorageKind> struct plain_object_eval;
+template<typename T>
+struct plain_object_eval<T,Dense>
+{
+ typedef typename plain_matrix_type_dense<T,typename traits<T>::XprKind, evaluator<T>::Flags>::type type;
+};
+template<typename T> struct plain_matrix_type_column_major
+{
+ enum { Rows = traits<T>::RowsAtCompileTime,
+ Cols = traits<T>::ColsAtCompileTime,
+ MaxRows = traits<T>::MaxRowsAtCompileTime,
+ MaxCols = traits<T>::MaxColsAtCompileTime
+ };
+ typedef Matrix<typename traits<T>::Scalar,
+ Rows,
+ Cols,
+ (MaxRows==1&&MaxCols!=1) ? RowMajor : ColMajor,
+ MaxRows,
+ MaxCols
+ > type;
+};
+template<typename T> struct plain_matrix_type_row_major
+{
+ enum { Rows = traits<T>::RowsAtCompileTime,
+ Cols = traits<T>::ColsAtCompileTime,
+ MaxRows = traits<T>::MaxRowsAtCompileTime,
+ MaxCols = traits<T>::MaxColsAtCompileTime
+ };
+ typedef Matrix<typename traits<T>::Scalar,
+ Rows,
+ Cols,
+ (MaxCols==1&&MaxRows!=1) ? RowMajor : ColMajor,
+ MaxRows,
+ MaxCols
+ > type;
+};
+template <typename T>
+struct ref_selector
+{
+ typedef typename conditional<
+ bool(traits<T>::Flags & NestByRefBit),
+ T const&,
+ const T
+ >::type type;
+ typedef typename conditional<
+ bool(traits<T>::Flags & NestByRefBit),
+ T &,
+ T
+ >::type non_const_type;
+};
+template<typename T1, typename T2>
+struct transfer_constness
+{
+ typedef typename conditional<
+ bool(internal::is_const<T1>::value),
+ typename internal::add_const_on_value_type<T2>::type,
+ T2
+ >::type type;
+};
+template<typename T, int n, typename PlainObject = typename plain_object_eval<T>::type> struct nested_eval
+{
+ enum {
+ ScalarReadCost = NumTraits<typename traits<T>::Scalar>::ReadCost,
+ CoeffReadCost = evaluator<T>::CoeffReadCost,
+ NAsInteger = n == Dynamic ? HugeCost : n,
+ CostEval = (NAsInteger+1) * ScalarReadCost + CoeffReadCost,
+ CostNoEval = NAsInteger * CoeffReadCost,
+ Evaluate = (int(evaluator<T>::Flags) & EvalBeforeNestingBit) || (int(CostEval) < int(CostNoEval))
+ };
+ typedef typename conditional<Evaluate, PlainObject, typename ref_selector<T>::type>::type type;
+};
+template<typename T>
+EIGEN_DEVICE_FUNC
+inline T* const_cast_ptr(const T* ptr)
+{
+ return const_cast<T*>(ptr);
+}
+template<typename Derived, typename XprKind = typename traits<Derived>::XprKind>
+struct dense_xpr_base
+{
+};
+template<typename Derived>
+struct dense_xpr_base<Derived, MatrixXpr>
+{
+ typedef MatrixBase<Derived> type;
+};
+template<typename Derived>
+struct dense_xpr_base<Derived, ArrayXpr>
+{
+ typedef ArrayBase<Derived> type;
+};
+template<typename Derived, typename XprKind = typename traits<Derived>::XprKind, typename StorageKind = typename traits<Derived>::StorageKind>
+struct generic_xpr_base;
+template<typename Derived, typename XprKind>
+struct generic_xpr_base<Derived, XprKind, Dense>
+{
+ typedef typename dense_xpr_base<Derived,XprKind>::type type;
+};
+template<typename XprType, typename CastType> struct cast_return_type
+{
+ typedef typename XprType::Scalar CurrentScalarType;
+ typedef typename remove_all<CastType>::type _CastType;
+ typedef typename _CastType::Scalar NewScalarType;
+ typedef typename conditional<is_same<CurrentScalarType,NewScalarType>::value,
+ const XprType&,CastType>::type type;
+};
+template <typename A, typename B> struct promote_storage_type;
+template <typename A> struct promote_storage_type<A,A>
+{
+ typedef A ret;
+};
+template <typename A> struct promote_storage_type<A, const A>
+{
+ typedef A ret;
+};
+template <typename A> struct promote_storage_type<const A, A>
+{
+ typedef A ret;
+};
+template <typename A, typename B, typename Functor> struct cwise_promote_storage_type;
+template <typename A, typename Functor> struct cwise_promote_storage_type<A,A,Functor> { typedef A ret; };
+template <typename Functor> struct cwise_promote_storage_type<Dense,Dense,Functor> { typedef Dense ret; };
+template <typename A, typename Functor> struct cwise_promote_storage_type<A,Dense,Functor> { typedef Dense ret; };
+template <typename B, typename Functor> struct cwise_promote_storage_type<Dense,B,Functor> { typedef Dense ret; };
+template <typename Functor> struct cwise_promote_storage_type<Sparse,Dense,Functor> { typedef Sparse ret; };
+template <typename Functor> struct cwise_promote_storage_type<Dense,Sparse,Functor> { typedef Sparse ret; };
+template <typename LhsKind, typename RhsKind, int LhsOrder, int RhsOrder> struct cwise_promote_storage_order {
+ enum { value = LhsOrder };
+};
+template <typename LhsKind, int LhsOrder, int RhsOrder> struct cwise_promote_storage_order<LhsKind,Sparse,LhsOrder,RhsOrder> { enum { value = RhsOrder }; };
+template <typename RhsKind, int LhsOrder, int RhsOrder> struct cwise_promote_storage_order<Sparse,RhsKind,LhsOrder,RhsOrder> { enum { value = LhsOrder }; };
+template <int Order> struct cwise_promote_storage_order<Sparse,Sparse,Order,Order> { enum { value = Order }; };
+template <typename A, typename B, int ProductTag> struct product_promote_storage_type;
+template <typename A, int ProductTag> struct product_promote_storage_type<A, A, ProductTag> { typedef A ret;};
+template <int ProductTag> struct product_promote_storage_type<Dense, Dense, ProductTag> { typedef Dense ret;};
+template <typename A, int ProductTag> struct product_promote_storage_type<A, Dense, ProductTag> { typedef Dense ret; };
+template <typename B, int ProductTag> struct product_promote_storage_type<Dense, B, ProductTag> { typedef Dense ret; };
+template <typename A, int ProductTag> struct product_promote_storage_type<A, DiagonalShape, ProductTag> { typedef A ret; };
+template <typename B, int ProductTag> struct product_promote_storage_type<DiagonalShape, B, ProductTag> { typedef B ret; };
+template <int ProductTag> struct product_promote_storage_type<Dense, DiagonalShape, ProductTag> { typedef Dense ret; };
+template <int ProductTag> struct product_promote_storage_type<DiagonalShape, Dense, ProductTag> { typedef Dense ret; };
+template <typename A, int ProductTag> struct product_promote_storage_type<A, PermutationStorage, ProductTag> { typedef A ret; };
+template <typename B, int ProductTag> struct product_promote_storage_type<PermutationStorage, B, ProductTag> { typedef B ret; };
+template <int ProductTag> struct product_promote_storage_type<Dense, PermutationStorage, ProductTag> { typedef Dense ret; };
+template <int ProductTag> struct product_promote_storage_type<PermutationStorage, Dense, ProductTag> { typedef Dense ret; };
+template<typename ExpressionType, typename Scalar = typename ExpressionType::Scalar>
+struct plain_row_type
+{
+ typedef Matrix<Scalar, 1, ExpressionType::ColsAtCompileTime,
+ ExpressionType::PlainObject::Options | RowMajor, 1, ExpressionType::MaxColsAtCompileTime> MatrixRowType;
+ typedef Array<Scalar, 1, ExpressionType::ColsAtCompileTime,
+ ExpressionType::PlainObject::Options | RowMajor, 1, ExpressionType::MaxColsAtCompileTime> ArrayRowType;
+ typedef typename conditional<
+ is_same< typename traits<ExpressionType>::XprKind, MatrixXpr >::value,
+ MatrixRowType,
+ ArrayRowType
+ >::type type;
+};
+template<typename ExpressionType, typename Scalar = typename ExpressionType::Scalar>
+struct plain_col_type
+{
+ typedef Matrix<Scalar, ExpressionType::RowsAtCompileTime, 1,
+ ExpressionType::PlainObject::Options & ~RowMajor, ExpressionType::MaxRowsAtCompileTime, 1> MatrixColType;
+ typedef Array<Scalar, ExpressionType::RowsAtCompileTime, 1,
+ ExpressionType::PlainObject::Options & ~RowMajor, ExpressionType::MaxRowsAtCompileTime, 1> ArrayColType;
+ typedef typename conditional<
+ is_same< typename traits<ExpressionType>::XprKind, MatrixXpr >::value,
+ MatrixColType,
+ ArrayColType
+ >::type type;
+};
+template<typename ExpressionType, typename Scalar = typename ExpressionType::Scalar>
+struct plain_diag_type
+{
+ enum { diag_size = EIGEN_SIZE_MIN_PREFER_DYNAMIC(ExpressionType::RowsAtCompileTime, ExpressionType::ColsAtCompileTime),
+ max_diag_size = EIGEN_SIZE_MIN_PREFER_FIXED(ExpressionType::MaxRowsAtCompileTime, ExpressionType::MaxColsAtCompileTime)
+ };
+ typedef Matrix<Scalar, diag_size, 1, ExpressionType::PlainObject::Options & ~RowMajor, max_diag_size, 1> MatrixDiagType;
+ typedef Array<Scalar, diag_size, 1, ExpressionType::PlainObject::Options & ~RowMajor, max_diag_size, 1> ArrayDiagType;
+ typedef typename conditional<
+ is_same< typename traits<ExpressionType>::XprKind, MatrixXpr >::value,
+ MatrixDiagType,
+ ArrayDiagType
+ >::type type;
+};
+template<typename Expr,typename Scalar = typename Expr::Scalar>
+struct plain_constant_type
+{
+ enum { Options = (traits<Expr>::Flags&RowMajorBit)?RowMajor:0 };
+ typedef Array<Scalar, traits<Expr>::RowsAtCompileTime, traits<Expr>::ColsAtCompileTime,
+ Options, traits<Expr>::MaxRowsAtCompileTime,traits<Expr>::MaxColsAtCompileTime> array_type;
+ typedef Matrix<Scalar, traits<Expr>::RowsAtCompileTime, traits<Expr>::ColsAtCompileTime,
+ Options, traits<Expr>::MaxRowsAtCompileTime,traits<Expr>::MaxColsAtCompileTime> matrix_type;
+ typedef CwiseNullaryOp<scalar_constant_op<Scalar>, const typename conditional<is_same< typename traits<Expr>::XprKind, MatrixXpr >::value, matrix_type, array_type>::type > type;
+};
+template<typename ExpressionType>
+struct is_lvalue
+{
+ enum { value = (!bool(is_const<ExpressionType>::value)) &&
+ bool(traits<ExpressionType>::Flags & LvalueBit) };
+};
+template<typename T> struct is_diagonal
+{ enum { ret = false }; };
+template<typename T> struct is_diagonal<DiagonalBase<T> >
+{ enum { ret = true }; };
+template<typename T> struct is_diagonal<DiagonalWrapper<T> >
+{ enum { ret = true }; };
+template<typename T, int S> struct is_diagonal<DiagonalMatrix<T,S> >
+{ enum { ret = true }; };
+template<typename S1, typename S2> struct glue_shapes;
+template<> struct glue_shapes<DenseShape,TriangularShape> { typedef TriangularShape type; };
+template<typename T1, typename T2>
+bool is_same_dense(const T1 &mat1, const T2 &mat2, typename enable_if<has_direct_access<T1>::ret&&has_direct_access<T2>::ret, T1>::type * = 0)
+{
+ return (mat1.data()==mat2.data()) && (mat1.innerStride()==mat2.innerStride()) && (mat1.outerStride()==mat2.outerStride());
+}
+template<typename T1, typename T2>
+bool is_same_dense(const T1 &, const T2 &, typename enable_if<!(has_direct_access<T1>::ret&&has_direct_access<T2>::ret), T1>::type * = 0)
+{
+ return false;
+}
+template<typename T,bool Vectorized=false,typename EnaleIf = void>
+struct scalar_div_cost {
+ enum { value = 8*NumTraits<T>::MulCost };
+};
+template<typename T,bool Vectorized>
+struct scalar_div_cost<std::complex<T>, Vectorized> {
+ enum { value = 2*scalar_div_cost<T>::value
+ + 6*NumTraits<T>::MulCost
+ + 3*NumTraits<T>::AddCost
+ };
+};
+template<bool Vectorized>
+struct scalar_div_cost<signed long,Vectorized,typename conditional<sizeof(long)==8,void,false_type>::type> { enum { value = 24 }; };
+template<bool Vectorized>
+struct scalar_div_cost<unsigned long,Vectorized,typename conditional<sizeof(long)==8,void,false_type>::type> { enum { value = 21 }; };
+#ifdef EIGEN_DEBUG_ASSIGN
+std::string demangle_traversal(int t)
+{
+ if(t==DefaultTraversal) return "DefaultTraversal";
+ if(t==LinearTraversal) return "LinearTraversal";
+ if(t==InnerVectorizedTraversal) return "InnerVectorizedTraversal";
+ if(t==LinearVectorizedTraversal) return "LinearVectorizedTraversal";
+ if(t==SliceVectorizedTraversal) return "SliceVectorizedTraversal";
+ return "?";
+}
+std::string demangle_unrolling(int t)
+{
+ if(t==NoUnrolling) return "NoUnrolling";
+ if(t==InnerUnrolling) return "InnerUnrolling";
+ if(t==CompleteUnrolling) return "CompleteUnrolling";
+ return "?";
+}
+std::string demangle_flags(int f)
+{
+ std::string res;
+ if(f&RowMajorBit) res += " | RowMajor";
+ if(f&PacketAccessBit) res += " | Packet";
+ if(f&LinearAccessBit) res += " | Linear";
+ if(f&LvalueBit) res += " | Lvalue";
+ if(f&DirectAccessBit) res += " | Direct";
+ if(f&NestByRefBit) res += " | NestByRef";
+ if(f&NoPreferredStorageOrderBit) res += " | NoPreferredStorageOrderBit";
+ return res;
+}
+#endif
+}
+template<typename ScalarA, typename ScalarB, typename BinaryOp=internal::scalar_product_op<ScalarA,ScalarB> >
+struct ScalarBinaryOpTraits
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+ : internal::scalar_product_traits<ScalarA,ScalarB>
+#endif
+{};
+template<typename T, typename BinaryOp>
+struct ScalarBinaryOpTraits<T,T,BinaryOp>
+{
+ typedef T ReturnType;
+};
+template <typename T, typename BinaryOp>
+struct ScalarBinaryOpTraits<T, typename NumTraits<typename internal::enable_if<NumTraits<T>::IsComplex,T>::type>::Real, BinaryOp>
+{
+ typedef T ReturnType;
+};
+template <typename T, typename BinaryOp>
+struct ScalarBinaryOpTraits<typename NumTraits<typename internal::enable_if<NumTraits<T>::IsComplex,T>::type>::Real, T, BinaryOp>
+{
+ typedef T ReturnType;
+};
+template<typename T, typename BinaryOp>
+struct ScalarBinaryOpTraits<T,void,BinaryOp>
+{
+ typedef T ReturnType;
+};
+template<typename T, typename BinaryOp>
+struct ScalarBinaryOpTraits<void,T,BinaryOp>
+{
+ typedef T ReturnType;
+};
+template<typename BinaryOp>
+struct ScalarBinaryOpTraits<void,void,BinaryOp>
+{
+ typedef void ReturnType;
+};
+#define EIGEN_CHECK_BINARY_COMPATIBILIY(BINOP,LHS,RHS) \
+ EIGEN_STATIC_ASSERT((Eigen::internal::has_ReturnType<ScalarBinaryOpTraits<LHS, RHS,BINOP> >::value), \
+ YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
+}
+#endif
+// end #include "src/Core/util/XprHelper.h"
+// #include "src/Core/util/Memory.h"
+#ifndef EIGEN_MEMORY_H
+#define EIGEN_MEMORY_H
+#ifndef EIGEN_MALLOC_ALREADY_ALIGNED
+#if defined(__GLIBC__) && ((__GLIBC__>=2 && __GLIBC_MINOR__ >= 8) || __GLIBC__>2) \
+ && defined(__LP64__) && ! defined( __SANITIZE_ADDRESS__ ) && (EIGEN_DEFAULT_ALIGN_BYTES == 16)
+ #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 1
+#else
+ #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 0
+#endif
+#if defined(__FreeBSD__) && !(EIGEN_ARCH_ARM || EIGEN_ARCH_MIPS) && (EIGEN_DEFAULT_ALIGN_BYTES == 16)
+ #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 1
+#else
+ #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 0
+#endif
+#if (EIGEN_OS_MAC && (EIGEN_DEFAULT_ALIGN_BYTES == 16)) \
+ || (EIGEN_OS_WIN64 && (EIGEN_DEFAULT_ALIGN_BYTES == 16)) \
+ || EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED \
+ || EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED
+ #define EIGEN_MALLOC_ALREADY_ALIGNED 1
+#else
+ #define EIGEN_MALLOC_ALREADY_ALIGNED 0
+#endif
+#endif
+namespace Eigen {
+namespace internal {
+EIGEN_DEVICE_FUNC
+inline void throw_std_bad_alloc()
+{
+ #ifdef EIGEN_EXCEPTIONS
+ throw std::bad_alloc();
+ #else
+ std::size_t huge = static_cast<std::size_t>(-1);
+ new int[huge];
+ #endif
+}
+inline void* handmade_aligned_malloc(std::size_t size)
+{
+ void *original = std::malloc(size+EIGEN_DEFAULT_ALIGN_BYTES);
+ if (original == 0) return 0;
+ void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1))) + EIGEN_DEFAULT_ALIGN_BYTES);
+ *(reinterpret_cast<void**>(aligned) - 1) = original;
+ return aligned;
+}
+inline void handmade_aligned_free(void *ptr)
+{
+ if (ptr) std::free(*(reinterpret_cast<void**>(ptr) - 1));
+}
+inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t = 0)
+{
+ if (ptr == 0) return handmade_aligned_malloc(size);
+ void *original = *(reinterpret_cast<void**>(ptr) - 1);
+ std::ptrdiff_t previous_offset = static_cast<char *>(ptr)-static_cast<char *>(original);
+ original = std::realloc(original,size+EIGEN_DEFAULT_ALIGN_BYTES);
+ if (original == 0) return 0;
+ void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1))) + EIGEN_DEFAULT_ALIGN_BYTES);
+ void *previous_aligned = static_cast<char *>(original)+previous_offset;
+ if(aligned!=previous_aligned)
+ std::memmove(aligned, previous_aligned, size);
+ *(reinterpret_cast<void**>(aligned) - 1) = original;
+ return aligned;
+}
+#ifdef EIGEN_NO_MALLOC
+EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
+{
+ eigen_assert(false && "heap allocation is forbidden (EIGEN_NO_MALLOC is defined)");
+}
+#elif defined EIGEN_RUNTIME_NO_MALLOC
+EIGEN_DEVICE_FUNC inline bool is_malloc_allowed_impl(bool update, bool new_value = false)
+{
+ static bool value = true;
+ if (update == 1)
+ value = new_value;
+ return value;
+}
+EIGEN_DEVICE_FUNC inline bool is_malloc_allowed() { return is_malloc_allowed_impl(false); }
+EIGEN_DEVICE_FUNC inline bool set_is_malloc_allowed(bool new_value) { return is_malloc_allowed_impl(true, new_value); }
+EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
+{
+ eigen_assert(is_malloc_allowed() && "heap allocation is forbidden (EIGEN_RUNTIME_NO_MALLOC is defined and g_is_malloc_allowed is false)");
+}
+#else
+EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
+{}
+#endif
+EIGEN_DEVICE_FUNC inline void* aligned_malloc(std::size_t size)
+{
+ check_that_malloc_is_allowed();
+ void *result;
+ #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
+ result = std::malloc(size);
+ #if EIGEN_DEFAULT_ALIGN_BYTES==16
+ eigen_assert((size<16 || (std::size_t(result)%16)==0) && "System's malloc returned an unaligned pointer. Compile with EIGEN_MALLOC_ALREADY_ALIGNED=0 to fallback to handmade alignd memory allocator.");
+ #endif
+ #else
+ result = handmade_aligned_malloc(size);
+ #endif
+ if(!result && size)
+ throw_std_bad_alloc();
+ return result;
+}
+EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr)
+{
+ #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
+ std::free(ptr);
+ #else
+ handmade_aligned_free(ptr);
+ #endif
+}
+inline void* aligned_realloc(void *ptr, std::size_t new_size, std::size_t old_size)
+{
+ EIGEN_UNUSED_VARIABLE(old_size);
+ void *result;
+#if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
+ result = std::realloc(ptr,new_size);
+#else
+ result = handmade_aligned_realloc(ptr,new_size,old_size);
+#endif
+ if (!result && new_size)
+ throw_std_bad_alloc();
+ return result;
+}
+template<bool Align> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc(std::size_t size)
+{
+ return aligned_malloc(size);
+}
+template<> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc<false>(std::size_t size)
+{
+ check_that_malloc_is_allowed();
+ void *result = std::malloc(size);
+ if(!result && size)
+ throw_std_bad_alloc();
+ return result;
+}
+template<bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_free(void *ptr)
+{
+ aligned_free(ptr);
+}
+template<> EIGEN_DEVICE_FUNC inline void conditional_aligned_free<false>(void *ptr)
+{
+ std::free(ptr);
+}
+template<bool Align> inline void* conditional_aligned_realloc(void* ptr, std::size_t new_size, std::size_t old_size)
+{
+ return aligned_realloc(ptr, new_size, old_size);
+}
+template<> inline void* conditional_aligned_realloc<false>(void* ptr, std::size_t new_size, std::size_t)
+{
+ return std::realloc(ptr, new_size);
+}
+template<typename T> EIGEN_DEVICE_FUNC inline void destruct_elements_of_array(T *ptr, std::size_t size)
+{
+ if(ptr)
+ while(size) ptr[--size].~T();
+}
+template<typename T> EIGEN_DEVICE_FUNC inline T* construct_elements_of_array(T *ptr, std::size_t size)
+{
+ std::size_t i;
+ EIGEN_TRY
+ {
+ for (i = 0; i < size; ++i) ::new (ptr + i) T;
+ return ptr;
+ }
+ EIGEN_CATCH(...)
+ {
+ destruct_elements_of_array(ptr, i);
+ EIGEN_THROW;
+ }
+ return NULL;
+}
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void check_size_for_overflow(std::size_t size)
+{
+ if(size > std::size_t(-1) / sizeof(T))
+ throw_std_bad_alloc();
+}
+template<typename T> EIGEN_DEVICE_FUNC inline T* aligned_new(std::size_t size)
+{
+ check_size_for_overflow<T>(size);
+ T *result = reinterpret_cast<T*>(aligned_malloc(sizeof(T)*size));
+ EIGEN_TRY
+ {
+ return construct_elements_of_array(result, size);
+ }
+ EIGEN_CATCH(...)
+ {
+ aligned_free(result);
+ EIGEN_THROW;
+ }
+ return result;
+}
+template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_new(std::size_t size)
+{
+ check_size_for_overflow<T>(size);
+ T *result = reinterpret_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size));
+ EIGEN_TRY
+ {
+ return construct_elements_of_array(result, size);
+ }
+ EIGEN_CATCH(...)
+ {
+ conditional_aligned_free<Align>(result);
+ EIGEN_THROW;
+ }
+ return result;
+}
+template<typename T> EIGEN_DEVICE_FUNC inline void aligned_delete(T *ptr, std::size_t size)
+{
+ destruct_elements_of_array<T>(ptr, size);
+ aligned_free(ptr);
+}
+template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_delete(T *ptr, std::size_t size)
+{
+ destruct_elements_of_array<T>(ptr, size);
+ conditional_aligned_free<Align>(ptr);
+}
+template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_realloc_new(T* pts, std::size_t new_size, std::size_t old_size)
+{
+ check_size_for_overflow<T>(new_size);
+ check_size_for_overflow<T>(old_size);
+ if(new_size < old_size)
+ destruct_elements_of_array(pts+new_size, old_size-new_size);
+ T *result = reinterpret_cast<T*>(conditional_aligned_realloc<Align>(reinterpret_cast<void*>(pts), sizeof(T)*new_size, sizeof(T)*old_size));
+ if(new_size > old_size)
+ {
+ EIGEN_TRY
+ {
+ construct_elements_of_array(result+old_size, new_size-old_size);
+ }
+ EIGEN_CATCH(...)
+ {
+ conditional_aligned_free<Align>(result);
+ EIGEN_THROW;
+ }
+ }
+ return result;
+}
+template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_new_auto(std::size_t size)
+{
+ if(size==0)
+ return 0;
+ check_size_for_overflow<T>(size);
+ T *result = reinterpret_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size));
+ if(NumTraits<T>::RequireInitialization)
+ {
+ EIGEN_TRY
+ {
+ construct_elements_of_array(result, size);
+ }
+ EIGEN_CATCH(...)
+ {
+ conditional_aligned_free<Align>(result);
+ EIGEN_THROW;
+ }
+ }
+ return result;
+}
+template<typename T, bool Align> inline T* conditional_aligned_realloc_new_auto(T* pts, std::size_t new_size, std::size_t old_size)
+{
+ check_size_for_overflow<T>(new_size);
+ check_size_for_overflow<T>(old_size);
+ if(NumTraits<T>::RequireInitialization && (new_size < old_size))
+ destruct_elements_of_array(pts+new_size, old_size-new_size);
+ T *result = reinterpret_cast<T*>(conditional_aligned_realloc<Align>(reinterpret_cast<void*>(pts), sizeof(T)*new_size, sizeof(T)*old_size));
+ if(NumTraits<T>::RequireInitialization && (new_size > old_size))
+ {
+ EIGEN_TRY
+ {
+ construct_elements_of_array(result+old_size, new_size-old_size);
+ }
+ EIGEN_CATCH(...)
+ {
+ conditional_aligned_free<Align>(result);
+ EIGEN_THROW;
+ }
+ }
+ return result;
+}
+template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_delete_auto(T *ptr, std::size_t size)
+{
+ if(NumTraits<T>::RequireInitialization)
+ destruct_elements_of_array<T>(ptr, size);
+ conditional_aligned_free<Align>(ptr);
+}
+template<int Alignment, typename Scalar, typename Index>
+EIGEN_DEVICE_FUNC inline Index first_aligned(const Scalar* array, Index size)
+{
+ const Index ScalarSize = sizeof(Scalar);
+ const Index AlignmentSize = Alignment / ScalarSize;
+ const Index AlignmentMask = AlignmentSize-1;
+ if(AlignmentSize<=1)
+ {
+ return 0;
+ }
+ else if( (UIntPtr(array) & (sizeof(Scalar)-1)) || (Alignment%ScalarSize)!=0)
+ {
+ return size;
+ }
+ else
+ {
+ Index first = (AlignmentSize - (Index((UIntPtr(array)/sizeof(Scalar))) & AlignmentMask)) & AlignmentMask;
+ return (first < size) ? first : size;
+ }
+}
+template<typename Scalar, typename Index>
+EIGEN_DEVICE_FUNC inline Index first_default_aligned(const Scalar* array, Index size)
+{
+ typedef typename packet_traits<Scalar>::type DefaultPacketType;
+ return first_aligned<unpacket_traits<DefaultPacketType>::alignment>(array, size);
+}
+template<typename Index>
+inline Index first_multiple(Index size, Index base)
+{
+ return ((size+base-1)/base)*base;
+}
+template<typename T, bool UseMemcpy> struct smart_copy_helper;
+template<typename T> EIGEN_DEVICE_FUNC void smart_copy(const T* start, const T* end, T* target)
+{
+ smart_copy_helper<T,!NumTraits<T>::RequireInitialization>::run(start, end, target);
+}
+template<typename T> struct smart_copy_helper<T,true> {
+ EIGEN_DEVICE_FUNC static inline void run(const T* start, const T* end, T* target)
+ {
+ IntPtr size = IntPtr(end)-IntPtr(start);
+ if(size==0) return;
+ eigen_internal_assert(start!=0 && end!=0 && target!=0);
+ memcpy(target, start, size);
+ }
+};
+template<typename T> struct smart_copy_helper<T,false> {
+ EIGEN_DEVICE_FUNC static inline void run(const T* start, const T* end, T* target)
+ { std::copy(start, end, target); }
+};
+template<typename T, bool UseMemmove> struct smart_memmove_helper;
+template<typename T> void smart_memmove(const T* start, const T* end, T* target)
+{
+ smart_memmove_helper<T,!NumTraits<T>::RequireInitialization>::run(start, end, target);
+}
+template<typename T> struct smart_memmove_helper<T,true> {
+ static inline void run(const T* start, const T* end, T* target)
+ {
+ IntPtr size = IntPtr(end)-IntPtr(start);
+ if(size==0) return;
+ eigen_internal_assert(start!=0 && end!=0 && target!=0);
+ std::memmove(target, start, size);
+ }
+};
+template<typename T> struct smart_memmove_helper<T,false> {
+ static inline void run(const T* start, const T* end, T* target)
+ {
+ if (UIntPtr(target) < UIntPtr(start))
+ {
+ std::copy(start, end, target);
+ }
+ else
+ {
+ std::ptrdiff_t count = (std::ptrdiff_t(end)-std::ptrdiff_t(start)) / sizeof(T);
+ std::copy_backward(start, end, target + count);
+ }
+ }
+};
+#ifndef EIGEN_ALLOCA
+ #if EIGEN_OS_LINUX || EIGEN_OS_MAC || (defined alloca)
+ #define EIGEN_ALLOCA alloca
+ #elif EIGEN_COMP_MSVC
+ #define EIGEN_ALLOCA _alloca
+ #endif
+#endif
+template<typename T> class aligned_stack_memory_handler : noncopyable
+{
+ public:
+ aligned_stack_memory_handler(T* ptr, std::size_t size, bool dealloc)
+ : m_ptr(ptr), m_size(size), m_deallocate(dealloc)
+ {
+ if(NumTraits<T>::RequireInitialization && m_ptr)
+ Eigen::internal::construct_elements_of_array(m_ptr, size);
+ }
+ ~aligned_stack_memory_handler()
+ {
+ if(NumTraits<T>::RequireInitialization && m_ptr)
+ Eigen::internal::destruct_elements_of_array<T>(m_ptr, m_size);
+ if(m_deallocate)
+ Eigen::internal::aligned_free(m_ptr);
+ }
+ protected:
+ T* m_ptr;
+ std::size_t m_size;
+ bool m_deallocate;
+};
+template<typename T> class scoped_array : noncopyable
+{
+ T* m_ptr;
+public:
+ explicit scoped_array(std::ptrdiff_t size)
+ {
+ m_ptr = new T[size];
+ }
+ ~scoped_array()
+ {
+ delete[] m_ptr;
+ }
+ T& operator[](std::ptrdiff_t i) { return m_ptr[i]; }
+ const T& operator[](std::ptrdiff_t i) const { return m_ptr[i]; }
+ T* &ptr() { return m_ptr; }
+ const T* ptr() const { return m_ptr; }
+ operator const T*() const { return m_ptr; }
+};
+template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
+{
+ std::swap(a.ptr(),b.ptr());
+}
+}
+#ifdef EIGEN_ALLOCA
+ #if EIGEN_DEFAULT_ALIGN_BYTES>0
+ #define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast<void*>((internal::UIntPtr(EIGEN_ALLOCA(SIZE+EIGEN_DEFAULT_ALIGN_BYTES-1)) + EIGEN_DEFAULT_ALIGN_BYTES-1) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1)))
+ #else
+ #define EIGEN_ALIGNED_ALLOCA(SIZE) EIGEN_ALLOCA(SIZE)
+ #endif
+ #define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \
+ Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
+ TYPE* NAME = (BUFFER)!=0 ? (BUFFER) \
+ : reinterpret_cast<TYPE*>( \
+ (sizeof(TYPE)*SIZE<=EIGEN_STACK_ALLOCATION_LIMIT) ? EIGEN_ALIGNED_ALLOCA(sizeof(TYPE)*SIZE) \
+ : Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE) ); \
+ Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,sizeof(TYPE)*SIZE>EIGEN_STACK_ALLOCATION_LIMIT)
+#else
+ #define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \
+ Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
+ TYPE* NAME = (BUFFER)!=0 ? BUFFER : reinterpret_cast<TYPE*>(Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE)); \
+ Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,true)
+#endif
+#if EIGEN_MAX_ALIGN_BYTES!=0
+ #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
+ void* operator new(std::size_t size, const std::nothrow_t&) EIGEN_NO_THROW { \
+ EIGEN_TRY { return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); } \
+ EIGEN_CATCH (...) { return 0; } \
+ }
+ #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) \
+ void *operator new(std::size_t size) { \
+ return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
+ } \
+ void *operator new[](std::size_t size) { \
+ return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
+ } \
+ void operator delete(void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
+ void operator delete[](void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
+ void operator delete(void * ptr, std::size_t ) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
+ void operator delete[](void * ptr, std::size_t ) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
+ \
+ \
+ \
+ static void *operator new(std::size_t size, void *ptr) { return ::operator new(size,ptr); } \
+ static void *operator new[](std::size_t size, void* ptr) { return ::operator new[](size,ptr); } \
+ void operator delete(void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete(memory,ptr); } \
+ void operator delete[](void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete[](memory,ptr); } \
+ \
+ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
+ void operator delete(void *ptr, const std::nothrow_t&) EIGEN_NO_THROW { \
+ Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); \
+ } \
+ typedef void eigen_aligned_operator_new_marker_type;
+#else
+ #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
+#endif
+#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(true)
+#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) \
+ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(((Size)!=Eigen::Dynamic) && ((sizeof(Scalar)*(Size))%EIGEN_MAX_ALIGN_BYTES==0)))
+template<class T>
+class aligned_allocator : public std::allocator<T>
+{
+public:
+ typedef std::size_t size_type;
+ typedef std::ptrdiff_t difference_type;
+ typedef T* pointer;
+ typedef const T* const_pointer;
+ typedef T& reference;
+ typedef const T& const_reference;
+ typedef T value_type;
+ template<class U>
+ struct rebind
+ {
+ typedef aligned_allocator<U> other;
+ };
+ aligned_allocator() : std::allocator<T>() {}
+ aligned_allocator(const aligned_allocator& other) : std::allocator<T>(other) {}
+ template<class U>
+ aligned_allocator(const aligned_allocator<U>& other) : std::allocator<T>(other) {}
+ ~aligned_allocator() {}
+ pointer allocate(size_type num, const void* = 0)
+ {
+ internal::check_size_for_overflow<T>(num);
+ return static_cast<pointer>( internal::aligned_malloc(num * sizeof(T)) );
+ }
+ void deallocate(pointer p, size_type )
+ {
+ internal::aligned_free(p);
+ }
+};
+#if !defined(EIGEN_NO_CPUID)
+# if EIGEN_COMP_GNUC && EIGEN_ARCH_i386_OR_x86_64
+# if defined(__PIC__) && EIGEN_ARCH_i386
+# define EIGEN_CPUID(abcd,func,id) \
+ __asm__ __volatile__ ("xchgl %%ebx, %k1;cpuid; xchgl %%ebx,%k1": "=a" (abcd[0]), "=&r" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id));
+# elif defined(__PIC__) && EIGEN_ARCH_x86_64
+# define EIGEN_CPUID(abcd,func,id) \
+ __asm__ __volatile__ ("xchg{q}\t{%%}rbx, %q1; cpuid; xchg{q}\t{%%}rbx, %q1": "=a" (abcd[0]), "=&r" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "0" (func), "2" (id));
+# else
+# define EIGEN_CPUID(abcd,func,id) \
+ __asm__ __volatile__ ("cpuid": "=a" (abcd[0]), "=b" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "0" (func), "2" (id) );
+# endif
+# elif EIGEN_COMP_MSVC
+# if (EIGEN_COMP_MSVC > 1500) && EIGEN_ARCH_i386_OR_x86_64
+# define EIGEN_CPUID(abcd,func,id) __cpuidex((int*)abcd,func,id)
+# endif
+# endif
+#endif
+namespace internal {
+#ifdef EIGEN_CPUID
+inline bool cpuid_is_vendor(int abcd[4], const int vendor[3])
+{
+ return abcd[1]==vendor[0] && abcd[3]==vendor[1] && abcd[2]==vendor[2];
+}
+inline void queryCacheSizes_intel_direct(int& l1, int& l2, int& l3)
+{
+ int abcd[4];
+ l1 = l2 = l3 = 0;
+ int cache_id = 0;
+ int cache_type = 0;
+ do {
+ abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
+ EIGEN_CPUID(abcd,0x4,cache_id);
+ cache_type = (abcd[0] & 0x0F) >> 0;
+ if(cache_type==1||cache_type==3)
+ {
+ int cache_level = (abcd[0] & 0xE0) >> 5;
+ int ways = (abcd[1] & 0xFFC00000) >> 22;
+ int partitions = (abcd[1] & 0x003FF000) >> 12;
+ int line_size = (abcd[1] & 0x00000FFF) >> 0;
+ int sets = (abcd[2]);
+ int cache_size = (ways+1) * (partitions+1) * (line_size+1) * (sets+1);
+ switch(cache_level)
+ {
+ case 1: l1 = cache_size; break;
+ case 2: l2 = cache_size; break;
+ case 3: l3 = cache_size; break;
+ default: break;
+ }
+ }
+ cache_id++;
+ } while(cache_type>0 && cache_id<16);
+}
+inline void queryCacheSizes_intel_codes(int& l1, int& l2, int& l3)
+{
+ int abcd[4];
+ abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
+ l1 = l2 = l3 = 0;
+ EIGEN_CPUID(abcd,0x00000002,0);
+ unsigned char * bytes = reinterpret_cast<unsigned char *>(abcd)+2;
+ bool check_for_p2_core2 = false;
+ for(int i=0; i<14; ++i)
+ {
+ switch(bytes[i])
+ {
+ case 0x0A: l1 = 8; break;
+ case 0x0C: l1 = 16; break;
+ case 0x0E: l1 = 24; break;
+ case 0x10: l1 = 16; break;
+ case 0x15: l1 = 16; break;
+ case 0x2C: l1 = 32; break;
+ case 0x30: l1 = 32; break;
+ case 0x60: l1 = 16; break;
+ case 0x66: l1 = 8; break;
+ case 0x67: l1 = 16; break;
+ case 0x68: l1 = 32; break;
+ case 0x1A: l2 = 96; break;
+ case 0x22: l3 = 512; break;
+ case 0x23: l3 = 1024; break;
+ case 0x25: l3 = 2048; break;
+ case 0x29: l3 = 4096; break;
+ case 0x39: l2 = 128; break;
+ case 0x3A: l2 = 192; break;
+ case 0x3B: l2 = 128; break;
+ case 0x3C: l2 = 256; break;
+ case 0x3D: l2 = 384; break;
+ case 0x3E: l2 = 512; break;
+ case 0x40: l2 = 0; break;
+ case 0x41: l2 = 128; break;
+ case 0x42: l2 = 256; break;
+ case 0x43: l2 = 512; break;
+ case 0x44: l2 = 1024; break;
+ case 0x45: l2 = 2048; break;
+ case 0x46: l3 = 4096; break;
+ case 0x47: l3 = 8192; break;
+ case 0x48: l2 = 3072; break;
+ case 0x49: if(l2!=0) l3 = 4096; else {check_for_p2_core2=true; l3 = l2 = 4096;} break;
+ case 0x4A: l3 = 6144; break;
+ case 0x4B: l3 = 8192; break;
+ case 0x4C: l3 = 12288; break;
+ case 0x4D: l3 = 16384; break;
+ case 0x4E: l2 = 6144; break;
+ case 0x78: l2 = 1024; break;
+ case 0x79: l2 = 128; break;
+ case 0x7A: l2 = 256; break;
+ case 0x7B: l2 = 512; break;
+ case 0x7C: l2 = 1024; break;
+ case 0x7D: l2 = 2048; break;
+ case 0x7E: l2 = 256; break;
+ case 0x7F: l2 = 512; break;
+ case 0x80: l2 = 512; break;
+ case 0x81: l2 = 128; break;
+ case 0x82: l2 = 256; break;
+ case 0x83: l2 = 512; break;
+ case 0x84: l2 = 1024; break;
+ case 0x85: l2 = 2048; break;
+ case 0x86: l2 = 512; break;
+ case 0x87: l2 = 1024; break;
+ case 0x88: l3 = 2048; break;
+ case 0x89: l3 = 4096; break;
+ case 0x8A: l3 = 8192; break;
+ case 0x8D: l3 = 3072; break;
+ default: break;
+ }
+ }
+ if(check_for_p2_core2 && l2 == l3)
+ l3 = 0;
+ l1 *= 1024;
+ l2 *= 1024;
+ l3 *= 1024;
+}
+inline void queryCacheSizes_intel(int& l1, int& l2, int& l3, int max_std_funcs)
+{
+ if(max_std_funcs>=4)
+ queryCacheSizes_intel_direct(l1,l2,l3);
+ else
+ queryCacheSizes_intel_codes(l1,l2,l3);
+}
+inline void queryCacheSizes_amd(int& l1, int& l2, int& l3)
+{
+ int abcd[4];
+ abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
+ EIGEN_CPUID(abcd,0x80000005,0);
+ l1 = (abcd[2] >> 24) * 1024;
+ abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
+ EIGEN_CPUID(abcd,0x80000006,0);
+ l2 = (abcd[2] >> 16) * 1024;
+ l3 = ((abcd[3] & 0xFFFC000) >> 18) * 512 * 1024;
+}
+#endif
+inline void queryCacheSizes(int& l1, int& l2, int& l3)
+{
+ #ifdef EIGEN_CPUID
+ int abcd[4];
+ const int GenuineIntel[] = {0x756e6547, 0x49656e69, 0x6c65746e};
+ const int AuthenticAMD[] = {0x68747541, 0x69746e65, 0x444d4163};
+ const int AMDisbetter_[] = {0x69444d41, 0x74656273, 0x21726574};
+ EIGEN_CPUID(abcd,0x0,0);
+ int max_std_funcs = abcd[1];
+ if(cpuid_is_vendor(abcd,GenuineIntel))
+ queryCacheSizes_intel(l1,l2,l3,max_std_funcs);
+ else if(cpuid_is_vendor(abcd,AuthenticAMD) || cpuid_is_vendor(abcd,AMDisbetter_))
+ queryCacheSizes_amd(l1,l2,l3);
+ else
+ queryCacheSizes_intel(l1,l2,l3,max_std_funcs);
+ #else
+ l1 = l2 = l3 = -1;
+ #endif
+}
+inline int queryL1CacheSize()
+{
+ int l1(-1), l2, l3;
+ queryCacheSizes(l1,l2,l3);
+ return l1;
+}
+inline int queryTopLevelCacheSize()
+{
+ int l1, l2(-1), l3(-1);
+ queryCacheSizes(l1,l2,l3);
+ return (std::max)(l2,l3);
+}
+}
+}
+#endif
+// end #include "src/Core/util/Memory.h"
+// #include "src/Core/NumTraits.h"
+#ifndef EIGEN_NUMTRAITS_H
+#define EIGEN_NUMTRAITS_H
+namespace Eigen {
+namespace internal {
+template< typename T,
+ bool use_numeric_limits = std::numeric_limits<T>::is_specialized,
+ bool is_integer = NumTraits<T>::IsInteger>
+struct default_digits10_impl
+{
+ static int run() { return std::numeric_limits<T>::digits10; }
+};
+template<typename T>
+struct default_digits10_impl<T,false,false>
+{
+ static int run() {
+ using std::log10;
+ using std::ceil;
+ typedef typename NumTraits<T>::Real Real;
+ return int(ceil(-log10(NumTraits<Real>::epsilon())));
+ }
+};
+template<typename T>
+struct default_digits10_impl<T,false,true>
+{
+ static int run() { return 0; }
+};
+}
+template<typename T> struct GenericNumTraits
+{
+ enum {
+ IsInteger = std::numeric_limits<T>::is_integer,
+ IsSigned = std::numeric_limits<T>::is_signed,
+ IsComplex = 0,
+ RequireInitialization = internal::is_arithmetic<T>::value ? 0 : 1,
+ ReadCost = 1,
+ AddCost = 1,
+ MulCost = 1
+ };
+ typedef T Real;
+ typedef typename internal::conditional<
+ IsInteger,
+ typename internal::conditional<sizeof(T)<=2, float, double>::type,
+ T
+ >::type NonInteger;
+ typedef T Nested;
+ typedef T Literal;
+ EIGEN_DEVICE_FUNC
+ static inline Real epsilon()
+ {
+ return numext::numeric_limits<T>::epsilon();
+ }
+ EIGEN_DEVICE_FUNC
+ static inline int digits10()
+ {
+ return internal::default_digits10_impl<T>::run();
+ }
+ EIGEN_DEVICE_FUNC
+ static inline Real dummy_precision()
+ {
+ return Real(0);
+ }
+ EIGEN_DEVICE_FUNC
+ static inline T highest() {
+ return (numext::numeric_limits<T>::max)();
+ }
+ EIGEN_DEVICE_FUNC
+ static inline T lowest() {
+ return IsInteger ? (numext::numeric_limits<T>::min)() : (-(numext::numeric_limits<T>::max)());
+ }
+ EIGEN_DEVICE_FUNC
+ static inline T infinity() {
+ return numext::numeric_limits<T>::infinity();
+ }
+ EIGEN_DEVICE_FUNC
+ static inline T quiet_NaN() {
+ return numext::numeric_limits<T>::quiet_NaN();
+ }
+};
+template<typename T> struct NumTraits : GenericNumTraits<T>
+{};
+template<> struct NumTraits<float>
+ : GenericNumTraits<float>
+{
+ EIGEN_DEVICE_FUNC
+ static inline float dummy_precision() { return 1e-5f; }
+};
+template<> struct NumTraits<double> : GenericNumTraits<double>
+{
+ EIGEN_DEVICE_FUNC
+ static inline double dummy_precision() { return 1e-12; }
+};
+template<> struct NumTraits<long double>
+ : GenericNumTraits<long double>
+{
+ static inline long double dummy_precision() { return 1e-15l; }
+};
+template<typename _Real> struct NumTraits<std::complex<_Real> >
+ : GenericNumTraits<std::complex<_Real> >
+{
+ typedef _Real Real;
+ typedef typename NumTraits<_Real>::Literal Literal;
+ enum {
+ IsComplex = 1,
+ RequireInitialization = NumTraits<_Real>::RequireInitialization,
+ ReadCost = 2 * NumTraits<_Real>::ReadCost,
+ AddCost = 2 * NumTraits<Real>::AddCost,
+ MulCost = 4 * NumTraits<Real>::MulCost + 2 * NumTraits<Real>::AddCost
+ };
+ EIGEN_DEVICE_FUNC
+ static inline Real epsilon() { return NumTraits<Real>::epsilon(); }
+ EIGEN_DEVICE_FUNC
+ static inline Real dummy_precision() { return NumTraits<Real>::dummy_precision(); }
+ EIGEN_DEVICE_FUNC
+ static inline int digits10() { return NumTraits<Real>::digits10(); }
+};
+template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
+struct NumTraits<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
+{
+ typedef Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> ArrayType;
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ typedef Array<RealScalar, Rows, Cols, Options, MaxRows, MaxCols> Real;
+ typedef typename NumTraits<Scalar>::NonInteger NonIntegerScalar;
+ typedef Array<NonIntegerScalar, Rows, Cols, Options, MaxRows, MaxCols> NonInteger;
+ typedef ArrayType & Nested;
+ typedef typename NumTraits<Scalar>::Literal Literal;
+ enum {
+ IsComplex = NumTraits<Scalar>::IsComplex,
+ IsInteger = NumTraits<Scalar>::IsInteger,
+ IsSigned = NumTraits<Scalar>::IsSigned,
+ RequireInitialization = 1,
+ ReadCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::ReadCost,
+ AddCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::AddCost,
+ MulCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::MulCost
+ };
+ EIGEN_DEVICE_FUNC
+ static inline RealScalar epsilon() { return NumTraits<RealScalar>::epsilon(); }
+ EIGEN_DEVICE_FUNC
+ static inline RealScalar dummy_precision() { return NumTraits<RealScalar>::dummy_precision(); }
+ static inline int digits10() { return NumTraits<Scalar>::digits10(); }
+};
+template<> struct NumTraits<std::string>
+ : GenericNumTraits<std::string>
+{
+ enum {
+ RequireInitialization = 1,
+ ReadCost = HugeCost,
+ AddCost = HugeCost,
+ MulCost = HugeCost
+ };
+ static inline int digits10() { return 0; }
+private:
+ static inline std::string epsilon();
+ static inline std::string dummy_precision();
+ static inline std::string lowest();
+ static inline std::string highest();
+ static inline std::string infinity();
+ static inline std::string quiet_NaN();
+};
+template<> struct NumTraits<void> {};
+}
+#endif
+// end #include "src/Core/NumTraits.h"
+// #include "src/Core/MathFunctions.h"
+#ifndef EIGEN_MATHFUNCTIONS_H
+#define EIGEN_MATHFUNCTIONS_H
+#define EIGEN_PI 3.141592653589793238462643383279502884197169399375105820974944592307816406L
+namespace Eigen {
+#if EIGEN_OS_WINCE && EIGEN_COMP_MSVC && EIGEN_COMP_MSVC<=1500
+long abs(long x) { return (labs(x)); }
+double abs(double x) { return (fabs(x)); }
+float abs(float x) { return (fabsf(x)); }
+long double abs(long double x) { return (fabsl(x)); }
+#endif
+namespace internal {
+template<typename T, typename dummy = void>
+struct global_math_functions_filtering_base
+{
+ typedef T type;
+};
+template<typename T> struct always_void { typedef void type; };
+template<typename T>
+struct global_math_functions_filtering_base
+ <T,
+ typename always_void<typename T::Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl>::type
+ >
+{
+ typedef typename T::Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl type;
+};
+#define EIGEN_MATHFUNC_IMPL(func, scalar) Eigen::internal::func##_impl<typename Eigen::internal::global_math_functions_filtering_base<scalar>::type>
+#define EIGEN_MATHFUNC_RETVAL(func, scalar) typename Eigen::internal::func##_retval<typename Eigen::internal::global_math_functions_filtering_base<scalar>::type>::type
+template<typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
+struct real_default_impl
+{
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ EIGEN_DEVICE_FUNC
+ static inline RealScalar run(const Scalar& x)
+ {
+ return x;
+ }
+};
+template<typename Scalar>
+struct real_default_impl<Scalar,true>
+{
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ EIGEN_DEVICE_FUNC
+ static inline RealScalar run(const Scalar& x)
+ {
+ using std::real;
+ return real(x);
+ }
+};
+template<typename Scalar> struct real_impl : real_default_impl<Scalar> {};
+#ifdef __CUDA_ARCH__
+template<typename T>
+struct real_impl<std::complex<T> >
+{
+ typedef T RealScalar;
+ EIGEN_DEVICE_FUNC
+ static inline T run(const std::complex<T>& x)
+ {
+ return x.real();
+ }
+};
+#endif
+template<typename Scalar>
+struct real_retval
+{
+ typedef typename NumTraits<Scalar>::Real type;
+};
+template<typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
+struct imag_default_impl
+{
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ EIGEN_DEVICE_FUNC
+ static inline RealScalar run(const Scalar&)
+ {
+ return RealScalar(0);
+ }
+};
+template<typename Scalar>
+struct imag_default_impl<Scalar,true>
+{
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ EIGEN_DEVICE_FUNC
+ static inline RealScalar run(const Scalar& x)
+ {
+ using std::imag;
+ return imag(x);
+ }
+};
+template<typename Scalar> struct imag_impl : imag_default_impl<Scalar> {};
+#ifdef __CUDA_ARCH__
+template<typename T>
+struct imag_impl<std::complex<T> >
+{
+ typedef T RealScalar;
+ EIGEN_DEVICE_FUNC
+ static inline T run(const std::complex<T>& x)
+ {
+ return x.imag();
+ }
+};
+#endif
+template<typename Scalar>
+struct imag_retval
+{
+ typedef typename NumTraits<Scalar>::Real type;
+};
+template<typename Scalar>
+struct real_ref_impl
+{
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ EIGEN_DEVICE_FUNC
+ static inline RealScalar& run(Scalar& x)
+ {
+ return reinterpret_cast<RealScalar*>(&x)[0];
+ }
+ EIGEN_DEVICE_FUNC
+ static inline const RealScalar& run(const Scalar& x)
+ {
+ return reinterpret_cast<const RealScalar*>(&x)[0];
+ }
+};
+template<typename Scalar>
+struct real_ref_retval
+{
+ typedef typename NumTraits<Scalar>::Real & type;
+};
+template<typename Scalar, bool IsComplex>
+struct imag_ref_default_impl
+{
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ EIGEN_DEVICE_FUNC
+ static inline RealScalar& run(Scalar& x)
+ {
+ return reinterpret_cast<RealScalar*>(&x)[1];
+ }
+ EIGEN_DEVICE_FUNC
+ static inline const RealScalar& run(const Scalar& x)
+ {
+ return reinterpret_cast<RealScalar*>(&x)[1];
+ }
+};
+template<typename Scalar>
+struct imag_ref_default_impl<Scalar, false>
+{
+ EIGEN_DEVICE_FUNC
+ static inline Scalar run(Scalar&)
+ {
+ return Scalar(0);
+ }
+ EIGEN_DEVICE_FUNC
+ static inline const Scalar run(const Scalar&)
+ {
+ return Scalar(0);
+ }
+};
+template<typename Scalar>
+struct imag_ref_impl : imag_ref_default_impl<Scalar, NumTraits<Scalar>::IsComplex> {};
+template<typename Scalar>
+struct imag_ref_retval
+{
+ typedef typename NumTraits<Scalar>::Real & type;
+};
+template<typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
+struct conj_impl
+{
+ EIGEN_DEVICE_FUNC
+ static inline Scalar run(const Scalar& x)
+ {
+ return x;
+ }
+};
+template<typename Scalar>
+struct conj_impl<Scalar,true>
+{
+ EIGEN_DEVICE_FUNC
+ static inline Scalar run(const Scalar& x)
+ {
+ using std::conj;
+ return conj(x);
+ }
+};
+template<typename Scalar>
+struct conj_retval
+{
+ typedef Scalar type;
+};
+template<typename Scalar,bool IsComplex>
+struct abs2_impl_default
+{
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ EIGEN_DEVICE_FUNC
+ static inline RealScalar run(const Scalar& x)
+ {
+ return x*x;
+ }
+};
+template<typename Scalar>
+struct abs2_impl_default<Scalar, true>
+{
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ EIGEN_DEVICE_FUNC
+ static inline RealScalar run(const Scalar& x)
+ {
+ return real(x)*real(x) + imag(x)*imag(x);
+ }
+};
+template<typename Scalar>
+struct abs2_impl
+{
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ EIGEN_DEVICE_FUNC
+ static inline RealScalar run(const Scalar& x)
+ {
+ return abs2_impl_default<Scalar,NumTraits<Scalar>::IsComplex>::run(x);
+ }
+};
+template<typename Scalar>
+struct abs2_retval
+{
+ typedef typename NumTraits<Scalar>::Real type;
+};
+template<typename Scalar, bool IsComplex>
+struct norm1_default_impl
+{
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ EIGEN_DEVICE_FUNC
+ static inline RealScalar run(const Scalar& x)
+ {
+ EIGEN_USING_STD_MATH(abs);
+ return abs(real(x)) + abs(imag(x));
+ }
+};
+template<typename Scalar>
+struct norm1_default_impl<Scalar, false>
+{
+ EIGEN_DEVICE_FUNC
+ static inline Scalar run(const Scalar& x)
+ {
+ EIGEN_USING_STD_MATH(abs);
+ return abs(x);
+ }
+};
+template<typename Scalar>
+struct norm1_impl : norm1_default_impl<Scalar, NumTraits<Scalar>::IsComplex> {};
+template<typename Scalar>
+struct norm1_retval
+{
+ typedef typename NumTraits<Scalar>::Real type;
+};
+template<typename Scalar>
+struct hypot_impl
+{
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ static inline RealScalar run(const Scalar& x, const Scalar& y)
+ {
+ EIGEN_USING_STD_MATH(abs);
+ EIGEN_USING_STD_MATH(sqrt);
+ RealScalar _x = abs(x);
+ RealScalar _y = abs(y);
+ Scalar p, qp;
+ if(_x>_y)
+ {
+ p = _x;
+ qp = _y / p;
+ }
+ else
+ {
+ p = _y;
+ qp = _x / p;
+ }
+ if(p==RealScalar(0)) return RealScalar(0);
+ return p * sqrt(RealScalar(1) + qp*qp);
+ }
+};
+template<typename Scalar>
+struct hypot_retval
+{
+ typedef typename NumTraits<Scalar>::Real type;
+};
+template<typename OldType, typename NewType>
+struct cast_impl
+{
+ EIGEN_DEVICE_FUNC
+ static inline NewType run(const OldType& x)
+ {
+ return static_cast<NewType>(x);
+ }
+};
+template<typename OldType, typename NewType>
+EIGEN_DEVICE_FUNC
+inline NewType cast(const OldType& x)
+{
+ return cast_impl<OldType, NewType>::run(x);
+}
+#if EIGEN_HAS_CXX11_MATH
+ template<typename Scalar>
+ struct round_impl {
+ static inline Scalar run(const Scalar& x)
+ {
+ EIGEN_STATIC_ASSERT((!NumTraits<Scalar>::IsComplex), NUMERIC_TYPE_MUST_BE_REAL)
+ using std::round;
+ return round(x);
+ }
+ };
+#else
+ template<typename Scalar>
+ struct round_impl
+ {
+ static inline Scalar run(const Scalar& x)
+ {
+ EIGEN_STATIC_ASSERT((!NumTraits<Scalar>::IsComplex), NUMERIC_TYPE_MUST_BE_REAL)
+ EIGEN_USING_STD_MATH(floor);
+ EIGEN_USING_STD_MATH(ceil);
+ return (x > Scalar(0)) ? floor(x + Scalar(0.5)) : ceil(x - Scalar(0.5));
+ }
+ };
+#endif
+template<typename Scalar>
+struct round_retval
+{
+ typedef Scalar type;
+};
+#if EIGEN_HAS_CXX11_MATH
+ template<typename Scalar>
+ struct arg_impl {
+ static inline Scalar run(const Scalar& x)
+ {
+ EIGEN_USING_STD_MATH(arg);
+ return arg(x);
+ }
+ };
+#else
+ template<typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
+ struct arg_default_impl
+ {
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ EIGEN_DEVICE_FUNC
+ static inline RealScalar run(const Scalar& x)
+ {
+ return (x < Scalar(0)) ? Scalar(EIGEN_PI) : Scalar(0); }
+ };
+ template<typename Scalar>
+ struct arg_default_impl<Scalar,true>
+ {
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ EIGEN_DEVICE_FUNC
+ static inline RealScalar run(const Scalar& x)
+ {
+ EIGEN_USING_STD_MATH(arg);
+ return arg(x);
+ }
+ };
+ template<typename Scalar> struct arg_impl : arg_default_impl<Scalar> {};
+#endif
+template<typename Scalar>
+struct arg_retval
+{
+ typedef typename NumTraits<Scalar>::Real type;
+};
+namespace std_fallback {
+ template<typename Scalar>
+ EIGEN_DEVICE_FUNC inline Scalar log1p(const Scalar& x) {
+ EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ EIGEN_USING_STD_MATH(log);
+ Scalar x1p = RealScalar(1) + x;
+ return ( x1p == Scalar(1) ) ? x : x * ( log(x1p) / (x1p - RealScalar(1)) );
+ }
+}
+template<typename Scalar>
+struct log1p_impl {
+ static inline Scalar run(const Scalar& x)
+ {
+ EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
+ #if EIGEN_HAS_CXX11_MATH
+ using std::log1p;
+ #endif
+ using std_fallback::log1p;
+ return log1p(x);
+ }
+};
+template<typename Scalar>
+struct log1p_retval
+{
+ typedef Scalar type;
+};
+template<typename ScalarX,typename ScalarY, bool IsInteger = NumTraits<ScalarX>::IsInteger&&NumTraits<ScalarY>::IsInteger>
+struct pow_impl
+{
+ typedef typename ScalarBinaryOpTraits<ScalarX,ScalarY,internal::scalar_pow_op<ScalarX,ScalarY> >::ReturnType result_type;
+ static EIGEN_DEVICE_FUNC inline result_type run(const ScalarX& x, const ScalarY& y)
+ {
+ EIGEN_USING_STD_MATH(pow);
+ return pow(x, y);
+ }
+};
+template<typename ScalarX,typename ScalarY>
+struct pow_impl<ScalarX,ScalarY, true>
+{
+ typedef ScalarX result_type;
+ static EIGEN_DEVICE_FUNC inline ScalarX run(ScalarX x, ScalarY y)
+ {
+ ScalarX res(1);
+ eigen_assert(!NumTraits<ScalarY>::IsSigned || y >= 0);
+ if(y & 1) res *= x;
+ y >>= 1;
+ while(y)
+ {
+ x *= x;
+ if(y&1) res *= x;
+ y >>= 1;
+ }
+ return res;
+ }
+};
+template<typename Scalar,
+ bool IsComplex,
+ bool IsInteger>
+struct random_default_impl {};
+template<typename Scalar>
+struct random_impl : random_default_impl<Scalar, NumTraits<Scalar>::IsComplex, NumTraits<Scalar>::IsInteger> {};
+template<typename Scalar>
+struct random_retval
+{
+ typedef Scalar type;
+};
+template<typename Scalar> inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random(const Scalar& x, const Scalar& y);
+template<typename Scalar> inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random();
+template<typename Scalar>
+struct random_default_impl<Scalar, false, false>
+{
+ static inline Scalar run(const Scalar& x, const Scalar& y)
+ {
+ return x + (y-x) * Scalar(std::rand()) / Scalar(RAND_MAX);
+ }
+ static inline Scalar run()
+ {
+ return run(Scalar(NumTraits<Scalar>::IsSigned ? -1 : 0), Scalar(1));
+ }
+};
+enum {
+ meta_floor_log2_terminate,
+ meta_floor_log2_move_up,
+ meta_floor_log2_move_down,
+ meta_floor_log2_bogus
+};
+template<unsigned int n, int lower, int upper> struct meta_floor_log2_selector
+{
+ enum { middle = (lower + upper) / 2,
+ value = (upper <= lower + 1) ? int(meta_floor_log2_terminate)
+ : (n < (1 << middle)) ? int(meta_floor_log2_move_down)
+ : (n==0) ? int(meta_floor_log2_bogus)
+ : int(meta_floor_log2_move_up)
+ };
+};
+template<unsigned int n,
+ int lower = 0,
+ int upper = sizeof(unsigned int) * CHAR_BIT - 1,
+ int selector = meta_floor_log2_selector<n, lower, upper>::value>
+struct meta_floor_log2 {};
+template<unsigned int n, int lower, int upper>
+struct meta_floor_log2<n, lower, upper, meta_floor_log2_move_down>
+{
+ enum { value = meta_floor_log2<n, lower, meta_floor_log2_selector<n, lower, upper>::middle>::value };
+};
+template<unsigned int n, int lower, int upper>
+struct meta_floor_log2<n, lower, upper, meta_floor_log2_move_up>
+{
+ enum { value = meta_floor_log2<n, meta_floor_log2_selector<n, lower, upper>::middle, upper>::value };
+};
+template<unsigned int n, int lower, int upper>
+struct meta_floor_log2<n, lower, upper, meta_floor_log2_terminate>
+{
+ enum { value = (n >= ((unsigned int)(1) << (lower+1))) ? lower+1 : lower };
+};
+template<unsigned int n, int lower, int upper>
+struct meta_floor_log2<n, lower, upper, meta_floor_log2_bogus>
+{
+};
+template<typename Scalar>
+struct random_default_impl<Scalar, false, true>
+{
+ static inline Scalar run(const Scalar& x, const Scalar& y)
+ {
+ typedef typename conditional<NumTraits<Scalar>::IsSigned,std::ptrdiff_t,std::size_t>::type ScalarX;
+ if(y<x)
+ return x;
+ std::size_t range = ScalarX(y)-ScalarX(x);
+ std::size_t offset = 0;
+ std::size_t divisor = 1;
+ std::size_t multiplier = 1;
+ if(range<RAND_MAX) divisor = (std::size_t(RAND_MAX)+1)/(range+1);
+ else multiplier = 1 + range/(std::size_t(RAND_MAX)+1);
+ do {
+ offset = (std::size_t(std::rand()) * multiplier) / divisor;
+ } while (offset > range);
+ return Scalar(ScalarX(x) + offset);
+ }
+ static inline Scalar run()
+ {
+#ifdef EIGEN_MAKING_DOCS
+ return run(Scalar(NumTraits<Scalar>::IsSigned ? -10 : 0), Scalar(10));
+#else
+ enum { rand_bits = meta_floor_log2<(unsigned int)(RAND_MAX)+1>::value,
+ scalar_bits = sizeof(Scalar) * CHAR_BIT,
+ shift = EIGEN_PLAIN_ENUM_MAX(0, int(rand_bits) - int(scalar_bits)),
+ offset = NumTraits<Scalar>::IsSigned ? (1 << (EIGEN_PLAIN_ENUM_MIN(rand_bits,scalar_bits)-1)) : 0
+ };
+ return Scalar((std::rand() >> shift) - offset);
+#endif
+ }
+};
+template<typename Scalar>
+struct random_default_impl<Scalar, true, false>
+{
+ static inline Scalar run(const Scalar& x, const Scalar& y)
+ {
+ return Scalar(random(real(x), real(y)),
+ random(imag(x), imag(y)));
+ }
+ static inline Scalar run()
+ {
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ return Scalar(random<RealScalar>(), random<RealScalar>());
+ }
+};
+template<typename Scalar>
+inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random(const Scalar& x, const Scalar& y)
+{
+ return EIGEN_MATHFUNC_IMPL(random, Scalar)::run(x, y);
+}
+template<typename Scalar>
+inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random()
+{
+ return EIGEN_MATHFUNC_IMPL(random, Scalar)::run();
+}
+#if (EIGEN_HAS_CXX11_MATH && !(EIGEN_COMP_GNUC_STRICT && __FINITE_MATH_ONLY__)) || (EIGEN_COMP_MSVC>=1800) || (EIGEN_COMP_CLANG)
+#define EIGEN_USE_STD_FPCLASSIFY 1
+#else
+#define EIGEN_USE_STD_FPCLASSIFY 0
+#endif
+template<typename T>
+EIGEN_DEVICE_FUNC
+typename internal::enable_if<internal::is_integral<T>::value,bool>::type
+isnan_impl(const T&) { return false; }
+template<typename T>
+EIGEN_DEVICE_FUNC
+typename internal::enable_if<internal::is_integral<T>::value,bool>::type
+isinf_impl(const T&) { return false; }
+template<typename T>
+EIGEN_DEVICE_FUNC
+typename internal::enable_if<internal::is_integral<T>::value,bool>::type
+isfinite_impl(const T&) { return true; }
+template<typename T>
+EIGEN_DEVICE_FUNC
+typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type
+isfinite_impl(const T& x)
+{
+ #ifdef __CUDA_ARCH__
+ return (::isfinite)(x);
+ #elif EIGEN_USE_STD_FPCLASSIFY
+ using std::isfinite;
+ return isfinite EIGEN_NOT_A_MACRO (x);
+ #else
+ return x<=NumTraits<T>::highest() && x>=NumTraits<T>::lowest();
+ #endif
+}
+template<typename T>
+EIGEN_DEVICE_FUNC
+typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type
+isinf_impl(const T& x)
+{
+ #ifdef __CUDA_ARCH__
+ return (::isinf)(x);
+ #elif EIGEN_USE_STD_FPCLASSIFY
+ using std::isinf;
+ return isinf EIGEN_NOT_A_MACRO (x);
+ #else
+ return x>NumTraits<T>::highest() || x<NumTraits<T>::lowest();
+ #endif
+}
+template<typename T>
+EIGEN_DEVICE_FUNC
+typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type
+isnan_impl(const T& x)
+{
+ #ifdef __CUDA_ARCH__
+ return (::isnan)(x);
+ #elif EIGEN_USE_STD_FPCLASSIFY
+ using std::isnan;
+ return isnan EIGEN_NOT_A_MACRO (x);
+ #else
+ return x != x;
+ #endif
+}
+#if (!EIGEN_USE_STD_FPCLASSIFY)
+#if EIGEN_COMP_MSVC
+template<typename T> EIGEN_DEVICE_FUNC bool isinf_msvc_helper(T x)
+{
+ return _fpclass(x)==_FPCLASS_NINF || _fpclass(x)==_FPCLASS_PINF;
+}
+EIGEN_DEVICE_FUNC inline bool isnan_impl(const long double& x) { return _isnan(x)!=0; }
+EIGEN_DEVICE_FUNC inline bool isnan_impl(const double& x) { return _isnan(x)!=0; }
+EIGEN_DEVICE_FUNC inline bool isnan_impl(const float& x) { return _isnan(x)!=0; }
+EIGEN_DEVICE_FUNC inline bool isinf_impl(const long double& x) { return isinf_msvc_helper(x); }
+EIGEN_DEVICE_FUNC inline bool isinf_impl(const double& x) { return isinf_msvc_helper(x); }
+EIGEN_DEVICE_FUNC inline bool isinf_impl(const float& x) { return isinf_msvc_helper(x); }
+#elif (defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ && EIGEN_COMP_GNUC)
+#if EIGEN_GNUC_AT_LEAST(5,0)
+ #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC inline __attribute__((optimize("no-finite-math-only")))
+#else
+ #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC inline __attribute__((noinline,optimize("no-finite-math-only")))
+#endif
+template<> EIGEN_TMP_NOOPT_ATTRIB bool isnan_impl(const long double& x) { return __builtin_isnan(x); }
+template<> EIGEN_TMP_NOOPT_ATTRIB bool isnan_impl(const double& x) { return __builtin_isnan(x); }
+template<> EIGEN_TMP_NOOPT_ATTRIB bool isnan_impl(const float& x) { return __builtin_isnan(x); }
+template<> EIGEN_TMP_NOOPT_ATTRIB bool isinf_impl(const double& x) { return __builtin_isinf(x); }
+template<> EIGEN_TMP_NOOPT_ATTRIB bool isinf_impl(const float& x) { return __builtin_isinf(x); }
+template<> EIGEN_TMP_NOOPT_ATTRIB bool isinf_impl(const long double& x) { return __builtin_isinf(x); }
+#undef EIGEN_TMP_NOOPT_ATTRIB
+#endif
+#endif
+template<typename T> EIGEN_DEVICE_FUNC bool isfinite_impl(const std::complex<T>& x);
+template<typename T> EIGEN_DEVICE_FUNC bool isnan_impl(const std::complex<T>& x);
+template<typename T> EIGEN_DEVICE_FUNC bool isinf_impl(const std::complex<T>& x);
+template<typename T> T generic_fast_tanh_float(const T& a_x);
+}
+namespace numext {
+#ifndef __CUDA_ARCH__
+template<typename T>
+EIGEN_DEVICE_FUNC
+EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y)
+{
+ EIGEN_USING_STD_MATH(min);
+ return min EIGEN_NOT_A_MACRO (x,y);
+}
+template<typename T>
+EIGEN_DEVICE_FUNC
+EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y)
+{
+ EIGEN_USING_STD_MATH(max);
+ return max EIGEN_NOT_A_MACRO (x,y);
+}
+#else
+template<typename T>
+EIGEN_DEVICE_FUNC
+EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y)
+{
+ return y < x ? y : x;
+}
+template<>
+EIGEN_DEVICE_FUNC
+EIGEN_ALWAYS_INLINE float mini(const float& x, const float& y)
+{
+ return fminf(x, y);
+}
+template<typename T>
+EIGEN_DEVICE_FUNC
+EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y)
+{
+ return x < y ? y : x;
+}
+template<>
+EIGEN_DEVICE_FUNC
+EIGEN_ALWAYS_INLINE float maxi(const float& x, const float& y)
+{
+ return fmaxf(x, y);
+}
+#endif
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(real, Scalar) real(const Scalar& x)
+{
+ return EIGEN_MATHFUNC_IMPL(real, Scalar)::run(x);
+}
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline typename internal::add_const_on_value_type< EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) >::type real_ref(const Scalar& x)
+{
+ return internal::real_ref_impl<Scalar>::run(x);
+}
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) real_ref(Scalar& x)
+{
+ return EIGEN_MATHFUNC_IMPL(real_ref, Scalar)::run(x);
+}
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(imag, Scalar) imag(const Scalar& x)
+{
+ return EIGEN_MATHFUNC_IMPL(imag, Scalar)::run(x);
+}
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(arg, Scalar) arg(const Scalar& x)
+{
+ return EIGEN_MATHFUNC_IMPL(arg, Scalar)::run(x);
+}
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline typename internal::add_const_on_value_type< EIGEN_MATHFUNC_RETVAL(imag_ref, Scalar) >::type imag_ref(const Scalar& x)
+{
+ return internal::imag_ref_impl<Scalar>::run(x);
+}
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(imag_ref, Scalar) imag_ref(Scalar& x)
+{
+ return EIGEN_MATHFUNC_IMPL(imag_ref, Scalar)::run(x);
+}
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(conj, Scalar) conj(const Scalar& x)
+{
+ return EIGEN_MATHFUNC_IMPL(conj, Scalar)::run(x);
+}
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(abs2, Scalar) abs2(const Scalar& x)
+{
+ return EIGEN_MATHFUNC_IMPL(abs2, Scalar)::run(x);
+}
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(norm1, Scalar) norm1(const Scalar& x)
+{
+ return EIGEN_MATHFUNC_IMPL(norm1, Scalar)::run(x);
+}
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(hypot, Scalar) hypot(const Scalar& x, const Scalar& y)
+{
+ return EIGEN_MATHFUNC_IMPL(hypot, Scalar)::run(x, y);
+}
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(log1p, Scalar) log1p(const Scalar& x)
+{
+ return EIGEN_MATHFUNC_IMPL(log1p, Scalar)::run(x);
+}
+#ifdef __CUDACC__
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float log1p(const float &x) { return ::log1pf(x); }
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double log1p(const double &x) { return ::log1p(x); }
+#endif
+template<typename ScalarX,typename ScalarY>
+EIGEN_DEVICE_FUNC
+inline typename internal::pow_impl<ScalarX,ScalarY>::result_type pow(const ScalarX& x, const ScalarY& y)
+{
+ return internal::pow_impl<ScalarX,ScalarY>::run(x, y);
+}
+template<typename T> EIGEN_DEVICE_FUNC bool (isnan) (const T &x) { return internal::isnan_impl(x); }
+template<typename T> EIGEN_DEVICE_FUNC bool (isinf) (const T &x) { return internal::isinf_impl(x); }
+template<typename T> EIGEN_DEVICE_FUNC bool (isfinite)(const T &x) { return internal::isfinite_impl(x); }
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(round, Scalar) round(const Scalar& x)
+{
+ return EIGEN_MATHFUNC_IMPL(round, Scalar)::run(x);
+}
+template<typename T>
+EIGEN_DEVICE_FUNC
+T (floor)(const T& x)
+{
+ EIGEN_USING_STD_MATH(floor);
+ return floor(x);
+}
+#ifdef __CUDACC__
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float floor(const float &x) { return ::floorf(x); }
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double floor(const double &x) { return ::floor(x); }
+#endif
+template<typename T>
+EIGEN_DEVICE_FUNC
+T (ceil)(const T& x)
+{
+ EIGEN_USING_STD_MATH(ceil);
+ return ceil(x);
+}
+#ifdef __CUDACC__
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float ceil(const float &x) { return ::ceilf(x); }
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double ceil(const double &x) { return ::ceil(x); }
+#endif
+inline int log2(int x)
+{
+ eigen_assert(x>=0);
+ unsigned int v(x);
+ static const int table[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+ v |= v >> 1;
+ v |= v >> 2;
+ v |= v >> 4;
+ v |= v >> 8;
+ v |= v >> 16;
+ return table[(v * 0x07C4ACDDU) >> 27];
+}
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T sqrt(const T &x)
+{
+ EIGEN_USING_STD_MATH(sqrt);
+ return sqrt(x);
+}
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T log(const T &x) {
+ EIGEN_USING_STD_MATH(log);
+ return log(x);
+}
+#ifdef __CUDACC__
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float log(const float &x) { return ::logf(x); }
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double log(const double &x) { return ::log(x); }
+#endif
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+typename internal::enable_if<NumTraits<T>::IsSigned || NumTraits<T>::IsComplex,typename NumTraits<T>::Real>::type
+abs(const T &x) {
+ EIGEN_USING_STD_MATH(abs);
+ return abs(x);
+}
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+typename internal::enable_if<!(NumTraits<T>::IsSigned || NumTraits<T>::IsComplex),typename NumTraits<T>::Real>::type
+abs(const T &x) {
+ return x;
+}
+#if defined(__SYCL_DEVICE_ONLY__)
+EIGEN_ALWAYS_INLINE float abs(float x) { return cl::sycl::fabs(x); }
+EIGEN_ALWAYS_INLINE double abs(double x) { return cl::sycl::fabs(x); }
+#endif
+#ifdef __CUDACC__
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float abs(const float &x) { return ::fabsf(x); }
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double abs(const double &x) { return ::fabs(x); }
+template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float abs(const std::complex<float>& x) {
+ return ::hypotf(x.real(), x.imag());
+}
+template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double abs(const std::complex<double>& x) {
+ return ::hypot(x.real(), x.imag());
+}
+#endif
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T exp(const T &x) {
+ EIGEN_USING_STD_MATH(exp);
+ return exp(x);
+}
+#ifdef __CUDACC__
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float exp(const float &x) { return ::expf(x); }
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double exp(const double &x) { return ::exp(x); }
+#endif
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T cos(const T &x) {
+ EIGEN_USING_STD_MATH(cos);
+ return cos(x);
+}
+#ifdef __CUDACC__
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float cos(const float &x) { return ::cosf(x); }
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double cos(const double &x) { return ::cos(x); }
+#endif
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T sin(const T &x) {
+ EIGEN_USING_STD_MATH(sin);
+ return sin(x);
+}
+#ifdef __CUDACC__
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float sin(const float &x) { return ::sinf(x); }
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double sin(const double &x) { return ::sin(x); }
+#endif
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T tan(const T &x) {
+ EIGEN_USING_STD_MATH(tan);
+ return tan(x);
+}
+#ifdef __CUDACC__
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float tan(const float &x) { return ::tanf(x); }
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double tan(const double &x) { return ::tan(x); }
+#endif
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T acos(const T &x) {
+ EIGEN_USING_STD_MATH(acos);
+ return acos(x);
+}
+#ifdef __CUDACC__
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float acos(const float &x) { return ::acosf(x); }
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double acos(const double &x) { return ::acos(x); }
+#endif
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T asin(const T &x) {
+ EIGEN_USING_STD_MATH(asin);
+ return asin(x);
+}
+#ifdef __CUDACC__
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float asin(const float &x) { return ::asinf(x); }
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double asin(const double &x) { return ::asin(x); }
+#endif
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T atan(const T &x) {
+ EIGEN_USING_STD_MATH(atan);
+ return atan(x);
+}
+#ifdef __CUDACC__
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float atan(const float &x) { return ::atanf(x); }
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double atan(const double &x) { return ::atan(x); }
+#endif
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T cosh(const T &x) {
+ EIGEN_USING_STD_MATH(cosh);
+ return cosh(x);
+}
+#ifdef __CUDACC__
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float cosh(const float &x) { return ::coshf(x); }
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double cosh(const double &x) { return ::cosh(x); }
+#endif
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T sinh(const T &x) {
+ EIGEN_USING_STD_MATH(sinh);
+ return sinh(x);
+}
+#ifdef __CUDACC__
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float sinh(const float &x) { return ::sinhf(x); }
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double sinh(const double &x) { return ::sinh(x); }
+#endif
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T tanh(const T &x) {
+ EIGEN_USING_STD_MATH(tanh);
+ return tanh(x);
+}
+#if (!defined(__CUDACC__)) && EIGEN_FAST_MATH
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float tanh(float x) { return internal::generic_fast_tanh_float(x); }
+#endif
+#ifdef __CUDACC__
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float tanh(const float &x) { return ::tanhf(x); }
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double tanh(const double &x) { return ::tanh(x); }
+#endif
+template <typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T fmod(const T& a, const T& b) {
+ EIGEN_USING_STD_MATH(fmod);
+ return fmod(a, b);
+}
+#ifdef __CUDACC__
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float fmod(const float& a, const float& b) {
+ return ::fmodf(a, b);
+}
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double fmod(const double& a, const double& b) {
+ return ::fmod(a, b);
+}
+#endif
+}
+namespace internal {
+template<typename T>
+EIGEN_DEVICE_FUNC bool isfinite_impl(const std::complex<T>& x)
+{
+ return (numext::isfinite)(numext::real(x)) && (numext::isfinite)(numext::imag(x));
+}
+template<typename T>
+EIGEN_DEVICE_FUNC bool isnan_impl(const std::complex<T>& x)
+{
+ return (numext::isnan)(numext::real(x)) || (numext::isnan)(numext::imag(x));
+}
+template<typename T>
+EIGEN_DEVICE_FUNC bool isinf_impl(const std::complex<T>& x)
+{
+ return ((numext::isinf)(numext::real(x)) || (numext::isinf)(numext::imag(x))) && (!(numext::isnan)(x));
+}
+template<typename Scalar,
+ bool IsComplex,
+ bool IsInteger>
+struct scalar_fuzzy_default_impl {};
+template<typename Scalar>
+struct scalar_fuzzy_default_impl<Scalar, false, false>
+{
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ template<typename OtherScalar> EIGEN_DEVICE_FUNC
+ static inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, const RealScalar& prec)
+ {
+ return numext::abs(x) <= numext::abs(y) * prec;
+ }
+ EIGEN_DEVICE_FUNC
+ static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec)
+ {
+ return numext::abs(x - y) <= numext::mini(numext::abs(x), numext::abs(y)) * prec;
+ }
+ EIGEN_DEVICE_FUNC
+ static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar& prec)
+ {
+ return x <= y || isApprox(x, y, prec);
+ }
+};
+template<typename Scalar>
+struct scalar_fuzzy_default_impl<Scalar, false, true>
+{
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ template<typename OtherScalar> EIGEN_DEVICE_FUNC
+ static inline bool isMuchSmallerThan(const Scalar& x, const Scalar&, const RealScalar&)
+ {
+ return x == Scalar(0);
+ }
+ EIGEN_DEVICE_FUNC
+ static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar&)
+ {
+ return x == y;
+ }
+ EIGEN_DEVICE_FUNC
+ static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar&)
+ {
+ return x <= y;
+ }
+};
+template<typename Scalar>
+struct scalar_fuzzy_default_impl<Scalar, true, false>
+{
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ template<typename OtherScalar> EIGEN_DEVICE_FUNC
+ static inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, const RealScalar& prec)
+ {
+ return numext::abs2(x) <= numext::abs2(y) * prec * prec;
+ }
+ EIGEN_DEVICE_FUNC
+ static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec)
+ {
+ return numext::abs2(x - y) <= numext::mini(numext::abs2(x), numext::abs2(y)) * prec * prec;
+ }
+};
+template<typename Scalar>
+struct scalar_fuzzy_impl : scalar_fuzzy_default_impl<Scalar, NumTraits<Scalar>::IsComplex, NumTraits<Scalar>::IsInteger> {};
+template<typename Scalar, typename OtherScalar> EIGEN_DEVICE_FUNC
+inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y,
+ const typename NumTraits<Scalar>::Real &precision = NumTraits<Scalar>::dummy_precision())
+{
+ return scalar_fuzzy_impl<Scalar>::template isMuchSmallerThan<OtherScalar>(x, y, precision);
+}
+template<typename Scalar> EIGEN_DEVICE_FUNC
+inline bool isApprox(const Scalar& x, const Scalar& y,
+ const typename NumTraits<Scalar>::Real &precision = NumTraits<Scalar>::dummy_precision())
+{
+ return scalar_fuzzy_impl<Scalar>::isApprox(x, y, precision);
+}
+template<typename Scalar> EIGEN_DEVICE_FUNC
+inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y,
+ const typename NumTraits<Scalar>::Real &precision = NumTraits<Scalar>::dummy_precision())
+{
+ return scalar_fuzzy_impl<Scalar>::isApproxOrLessThan(x, y, precision);
+}
+template<> struct random_impl<bool>
+{
+ static inline bool run()
+ {
+ return random<int>(0,1)==0 ? false : true;
+ }
+};
+template<> struct scalar_fuzzy_impl<bool>
+{
+ typedef bool RealScalar;
+ template<typename OtherScalar> EIGEN_DEVICE_FUNC
+ static inline bool isMuchSmallerThan(const bool& x, const bool&, const bool&)
+ {
+ return !x;
+ }
+ EIGEN_DEVICE_FUNC
+ static inline bool isApprox(bool x, bool y, bool)
+ {
+ return x == y;
+ }
+ EIGEN_DEVICE_FUNC
+ static inline bool isApproxOrLessThan(const bool& x, const bool& y, const bool&)
+ {
+ return (!x) || y;
+ }
+};
+}
+}
+#endif
+// end #include "src/Core/MathFunctions.h"
+// #include "src/Core/GenericPacketMath.h"
+#ifndef EIGEN_GENERIC_PACKET_MATH_H
+#define EIGEN_GENERIC_PACKET_MATH_H
+namespace Eigen {
+namespace internal {
+#ifndef EIGEN_DEBUG_ALIGNED_LOAD
+#define EIGEN_DEBUG_ALIGNED_LOAD
+#endif
+#ifndef EIGEN_DEBUG_UNALIGNED_LOAD
+#define EIGEN_DEBUG_UNALIGNED_LOAD
+#endif
+#ifndef EIGEN_DEBUG_ALIGNED_STORE
+#define EIGEN_DEBUG_ALIGNED_STORE
+#endif
+#ifndef EIGEN_DEBUG_UNALIGNED_STORE
+#define EIGEN_DEBUG_UNALIGNED_STORE
+#endif
+struct default_packet_traits
+{
+ enum {
+ HasHalfPacket = 0,
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasNegate = 1,
+ HasAbs = 1,
+ HasArg = 0,
+ HasAbs2 = 1,
+ HasMin = 1,
+ HasMax = 1,
+ HasConj = 1,
+ HasSetLinear = 1,
+ HasBlend = 0,
+ HasDiv = 0,
+ HasSqrt = 0,
+ HasRsqrt = 0,
+ HasExp = 0,
+ HasLog = 0,
+ HasLog1p = 0,
+ HasLog10 = 0,
+ HasPow = 0,
+ HasSin = 0,
+ HasCos = 0,
+ HasTan = 0,
+ HasASin = 0,
+ HasACos = 0,
+ HasATan = 0,
+ HasSinh = 0,
+ HasCosh = 0,
+ HasTanh = 0,
+ HasLGamma = 0,
+ HasDiGamma = 0,
+ HasZeta = 0,
+ HasPolygamma = 0,
+ HasErf = 0,
+ HasErfc = 0,
+ HasIGamma = 0,
+ HasIGammac = 0,
+ HasBetaInc = 0,
+ HasRound = 0,
+ HasFloor = 0,
+ HasCeil = 0,
+ HasSign = 0
+ };
+};
+template<typename T> struct packet_traits : default_packet_traits
+{
+ typedef T type;
+ typedef T half;
+ enum {
+ Vectorizable = 0,
+ size = 1,
+ AlignedOnScalar = 0,
+ HasHalfPacket = 0
+ };
+ enum {
+ HasAdd = 0,
+ HasSub = 0,
+ HasMul = 0,
+ HasNegate = 0,
+ HasAbs = 0,
+ HasAbs2 = 0,
+ HasMin = 0,
+ HasMax = 0,
+ HasConj = 0,
+ HasSetLinear = 0
+ };
+};
+template<typename T> struct packet_traits<const T> : packet_traits<T> { };
+template <typename Src, typename Tgt> struct type_casting_traits {
+ enum {
+ VectorizedCast = 0,
+ SrcCoeffRatio = 1,
+ TgtCoeffRatio = 1
+ };
+};
+template <typename SrcPacket, typename TgtPacket>
+EIGEN_DEVICE_FUNC inline TgtPacket
+pcast(const SrcPacket& a) {
+ return static_cast<TgtPacket>(a);
+}
+template <typename SrcPacket, typename TgtPacket>
+EIGEN_DEVICE_FUNC inline TgtPacket
+pcast(const SrcPacket& a, const SrcPacket& ) {
+ return static_cast<TgtPacket>(a);
+}
+template <typename SrcPacket, typename TgtPacket>
+EIGEN_DEVICE_FUNC inline TgtPacket
+pcast(const SrcPacket& a, const SrcPacket& , const SrcPacket& , const SrcPacket& ) {
+ return static_cast<TgtPacket>(a);
+}
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+padd(const Packet& a,
+ const Packet& b) { return a+b; }
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+psub(const Packet& a,
+ const Packet& b) { return a-b; }
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pnegate(const Packet& a) { return -a; }
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pconj(const Packet& a) { return numext::conj(a); }
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pmul(const Packet& a,
+ const Packet& b) { return a*b; }
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pdiv(const Packet& a,
+ const Packet& b) { return a/b; }
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pmin(const Packet& a,
+ const Packet& b) { return numext::mini(a, b); }
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pmax(const Packet& a,
+ const Packet& b) { return numext::maxi(a, b); }
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pabs(const Packet& a) { using std::abs; return abs(a); }
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+parg(const Packet& a) { using numext::arg; return arg(a); }
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pand(const Packet& a, const Packet& b) { return a & b; }
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+por(const Packet& a, const Packet& b) { return a | b; }
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pxor(const Packet& a, const Packet& b) { return a ^ b; }
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pandnot(const Packet& a, const Packet& b) { return a & (!b); }
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pload(const typename unpacket_traits<Packet>::type* from) { return *from; }
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+ploadu(const typename unpacket_traits<Packet>::type* from) { return *from; }
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pset1(const typename unpacket_traits<Packet>::type& a) { return a; }
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pload1(const typename unpacket_traits<Packet>::type *a) { return pset1<Packet>(*a); }
+template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet
+ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; }
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+ploadquad(const typename unpacket_traits<Packet>::type* from)
+{ return pload1<Packet>(from); }
+template<typename Packet> EIGEN_DEVICE_FUNC
+inline void pbroadcast4(const typename unpacket_traits<Packet>::type *a,
+ Packet& a0, Packet& a1, Packet& a2, Packet& a3)
+{
+ a0 = pload1<Packet>(a+0);
+ a1 = pload1<Packet>(a+1);
+ a2 = pload1<Packet>(a+2);
+ a3 = pload1<Packet>(a+3);
+}
+template<typename Packet> EIGEN_DEVICE_FUNC
+inline void pbroadcast2(const typename unpacket_traits<Packet>::type *a,
+ Packet& a0, Packet& a1)
+{
+ a0 = pload1<Packet>(a+0);
+ a1 = pload1<Packet>(a+1);
+}
+template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet
+plset(const typename unpacket_traits<Packet>::type& a) { return a; }
+template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstore(Scalar* to, const Packet& from)
+{ (*to) = from; }
+template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu(Scalar* to, const Packet& from)
+{ (*to) = from; }
+ template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, Index )
+ { return ploadu<Packet>(from); }
+ template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from, Index )
+ { pstore(to, from); }
+template<typename Scalar> EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* addr)
+{
+#ifdef __CUDA_ARCH__
+#if defined(__LP64__)
+ asm(" prefetch.L1 [ %1 ];" : "=l"(addr) : "l"(addr));
+#else
+ asm(" prefetch.L1 [ %1 ];" : "=r"(addr) : "r"(addr));
+#endif
+#elif (!EIGEN_COMP_MSVC) && (EIGEN_COMP_GNUC || EIGEN_COMP_CLANG || EIGEN_COMP_ICC)
+ __builtin_prefetch(addr);
+#endif
+}
+template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type pfirst(const Packet& a)
+{ return a; }
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+preduxp(const Packet* vecs) { return vecs[0]; }
+template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux(const Packet& a)
+{ return a; }
+template<typename Packet> EIGEN_DEVICE_FUNC inline
+typename conditional<(unpacket_traits<Packet>::size%8)==0,typename unpacket_traits<Packet>::half,Packet>::type
+predux_downto4(const Packet& a)
+{ return a; }
+template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_mul(const Packet& a)
+{ return a; }
+template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(const Packet& a)
+{ return a; }
+template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(const Packet& a)
+{ return a; }
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a)
+{ return a; }
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a)
+{
+ return Packet(imag(a),real(a));
+}
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet psin(const Packet& a) { using std::sin; return sin(a); }
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pcos(const Packet& a) { using std::cos; return cos(a); }
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet ptan(const Packet& a) { using std::tan; return tan(a); }
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pasin(const Packet& a) { using std::asin; return asin(a); }
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pacos(const Packet& a) { using std::acos; return acos(a); }
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet patan(const Packet& a) { using std::atan; return atan(a); }
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet psinh(const Packet& a) { using std::sinh; return sinh(a); }
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pcosh(const Packet& a) { using std::cosh; return cosh(a); }
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet ptanh(const Packet& a) { using std::tanh; return tanh(a); }
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pexp(const Packet& a) { using std::exp; return exp(a); }
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet plog(const Packet& a) { using std::log; return log(a); }
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet plog1p(const Packet& a) { return numext::log1p(a); }
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet plog10(const Packet& a) { using std::log10; return log10(a); }
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet psqrt(const Packet& a) { using std::sqrt; return sqrt(a); }
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet prsqrt(const Packet& a) {
+ return pdiv(pset1<Packet>(1), psqrt(a));
+}
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pround(const Packet& a) { using numext::round; return round(a); }
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pfloor(const Packet& a) { using numext::floor; return floor(a); }
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); }
+template<typename Packet>
+inline void pstore1(typename unpacket_traits<Packet>::type* to, const typename unpacket_traits<Packet>::type& a)
+{
+ pstore(to, pset1<Packet>(a));
+}
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pmadd(const Packet& a,
+ const Packet& b,
+ const Packet& c)
+{ return padd(pmul(a, b),c); }
+template<typename Packet, int Alignment>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_traits<Packet>::type* from)
+{
+ if(Alignment >= unpacket_traits<Packet>::alignment)
+ return pload<Packet>(from);
+ else
+ return ploadu<Packet>(from);
+}
+template<typename Scalar, typename Packet, int Alignment>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& from)
+{
+ if(Alignment >= unpacket_traits<Packet>::alignment)
+ pstore(to, from);
+ else
+ pstoreu(to, from);
+}
+template<typename Packet, int LoadMode>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_ro(const typename unpacket_traits<Packet>::type* from)
+{
+ return ploadt<Packet, LoadMode>(from);
+}
+template<int Offset,typename PacketType>
+struct palign_impl
+{
+ static inline void run(PacketType&, const PacketType&) {}
+};
+template<int Offset,typename PacketType>
+inline void palign(PacketType& first, const PacketType& second)
+{
+ palign_impl<Offset,PacketType>::run(first,second);
+}
+#ifndef __CUDACC__
+template<> inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b)
+{ return std::complex<float>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
+template<> inline std::complex<double> pmul(const std::complex<double>& a, const std::complex<double>& b)
+{ return std::complex<double>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
+#endif
+template <typename Packet,int N=unpacket_traits<Packet>::size> struct PacketBlock {
+ Packet packet[N];
+};
+template<typename Packet> EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet,1>& ) {
+}
+template <size_t N> struct Selector {
+ bool select[N];
+};
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket, const Packet& thenPacket, const Packet& elsePacket) {
+ return ifPacket.select[0] ? thenPacket : elsePacket;
+}
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pinsertfirst(const Packet& a, typename unpacket_traits<Packet>::type b)
+{
+ Selector<unpacket_traits<Packet>::size> mask;
+ mask.select[0] = true;
+ for(Index i=1; i<unpacket_traits<Packet>::size; ++i)
+ mask.select[i] = false;
+ return pblend(mask, pset1<Packet>(b), a);
+}
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pinsertlast(const Packet& a, typename unpacket_traits<Packet>::type b)
+{
+ Selector<unpacket_traits<Packet>::size> mask;
+ for(Index i=0; i<unpacket_traits<Packet>::size-1; ++i)
+ mask.select[i] = false;
+ mask.select[unpacket_traits<Packet>::size-1] = true;
+ return pblend(mask, pset1<Packet>(b), a);
+}
+}
+}
+#endif
+// end #include "src/Core/GenericPacketMath.h"
+#if defined EIGEN_VECTORIZE_AVX512
+// #include "src/Core/arch/SSE/PacketMath.h"
+#ifndef EIGEN_PACKET_MATH_SSE_H
+#define EIGEN_PACKET_MATH_SSE_H
+namespace Eigen {
+namespace internal {
+#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
+#endif
+#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
+#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
+#endif
+#ifdef __FMA__
+#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD 1
+#endif
+#endif
+#if (defined EIGEN_VECTORIZE_AVX) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_MINGW) && (__GXX_ABI_VERSION < 1004)
+template<typename T>
+struct eigen_packet_wrapper
+{
+ EIGEN_ALWAYS_INLINE operator T&() { return m_val; }
+ EIGEN_ALWAYS_INLINE operator const T&() const { return m_val; }
+ EIGEN_ALWAYS_INLINE eigen_packet_wrapper() {}
+ EIGEN_ALWAYS_INLINE eigen_packet_wrapper(const T &v) : m_val(v) {}
+ EIGEN_ALWAYS_INLINE eigen_packet_wrapper& operator=(const T &v) {
+ m_val = v;
+ return *this;
+ }
+ T m_val;
+};
+typedef eigen_packet_wrapper<__m128> Packet4f;
+typedef eigen_packet_wrapper<__m128i> Packet4i;
+typedef eigen_packet_wrapper<__m128d> Packet2d;
+#else
+typedef __m128 Packet4f;
+typedef __m128i Packet4i;
+typedef __m128d Packet2d;
+#endif
+template<> struct is_arithmetic<__m128> { enum { value = true }; };
+template<> struct is_arithmetic<__m128i> { enum { value = true }; };
+template<> struct is_arithmetic<__m128d> { enum { value = true }; };
+#define vec4f_swizzle1(v,p,q,r,s) \
+ (_mm_castsi128_ps(_mm_shuffle_epi32( _mm_castps_si128(v), ((s)<<6|(r)<<4|(q)<<2|(p)))))
+#define vec4i_swizzle1(v,p,q,r,s) \
+ (_mm_shuffle_epi32( v, ((s)<<6|(r)<<4|(q)<<2|(p))))
+#define vec2d_swizzle1(v,p,q) \
+ (_mm_castsi128_pd(_mm_shuffle_epi32( _mm_castpd_si128(v), ((q*2+1)<<6|(q*2)<<4|(p*2+1)<<2|(p*2)))))
+#define vec4f_swizzle2(a,b,p,q,r,s) \
+ (_mm_shuffle_ps( (a), (b), ((s)<<6|(r)<<4|(q)<<2|(p))))
+#define vec4i_swizzle2(a,b,p,q,r,s) \
+ (_mm_castps_si128( (_mm_shuffle_ps( _mm_castsi128_ps(a), _mm_castsi128_ps(b), ((s)<<6|(r)<<4|(q)<<2|(p))))))
+#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
+ const Packet4f p4f_##NAME = pset1<Packet4f>(X)
+#define _EIGEN_DECLARE_CONST_Packet2d(NAME,X) \
+ const Packet2d p2d_##NAME = pset1<Packet2d>(X)
+#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
+ const Packet4f p4f_##NAME = _mm_castsi128_ps(pset1<Packet4i>(X))
+#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
+ const Packet4i p4i_##NAME = pset1<Packet4i>(X)
+#ifndef EIGEN_VECTORIZE_AVX
+template<> struct packet_traits<float> : default_packet_traits
+{
+ typedef Packet4f type;
+ typedef Packet4f half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size=4,
+ HasHalfPacket = 0,
+ HasDiv = 1,
+ HasSin = EIGEN_FAST_MATH,
+ HasCos = EIGEN_FAST_MATH,
+ HasLog = 1,
+ HasExp = 1,
+ HasSqrt = 1,
+ HasRsqrt = 1,
+ HasTanh = EIGEN_FAST_MATH,
+ HasBlend = 1
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ ,
+ HasRound = 1,
+ HasFloor = 1,
+ HasCeil = 1
+#endif
+ };
+};
+template<> struct packet_traits<double> : default_packet_traits
+{
+ typedef Packet2d type;
+ typedef Packet2d half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size=2,
+ HasHalfPacket = 0,
+ HasDiv = 1,
+ HasExp = 1,
+ HasSqrt = 1,
+ HasRsqrt = 1,
+ HasBlend = 1
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ ,
+ HasRound = 1,
+ HasFloor = 1,
+ HasCeil = 1
+#endif
+ };
+};
+#endif
+template<> struct packet_traits<int> : default_packet_traits
+{
+ typedef Packet4i type;
+ typedef Packet4i half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size=4,
+ HasBlend = 1
+ };
+};
+template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; };
+template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; };
+template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; };
+#ifndef EIGEN_VECTORIZE_AVX
+template<> struct scalar_div_cost<float,true> { enum { value = 7 }; };
+template<> struct scalar_div_cost<double,true> { enum { value = 8 }; };
+#endif
+#if EIGEN_COMP_MSVC==1500
+template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return _mm_set_ps(from,from,from,from); }
+template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set_pd(from,from); }
+template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set_epi32(from,from,from,from); }
+#else
+template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return _mm_set_ps1(from); }
+template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set1_pd(from); }
+template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set1_epi32(from); }
+#endif
+#if EIGEN_COMP_GNUC_STRICT && (!defined __AVX__)
+template<> EIGEN_STRONG_INLINE Packet4f pload1<Packet4f>(const float *from) {
+ return vec4f_swizzle1(_mm_load_ss(from),0,0,0,0);
+}
+#endif
+template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) { return _mm_add_ps(pset1<Packet4f>(a), _mm_set_ps(3,2,1,0)); }
+template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) { return _mm_add_pd(pset1<Packet2d>(a),_mm_set_pd(1,0)); }
+template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a) { return _mm_add_epi32(pset1<Packet4i>(a),_mm_set_epi32(3,2,1,0)); }
+template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_add_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_add_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_add_epi32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_sub_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_sub_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_sub_epi32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a)
+{
+ const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000));
+ return _mm_xor_ps(a,mask);
+}
+template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a)
+{
+ const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0,0x80000000,0x0,0x80000000));
+ return _mm_xor_pd(a,mask);
+}
+template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a)
+{
+ return psub(Packet4i(_mm_setr_epi32(0,0,0,0)), a);
+}
+template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_mul_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_mul_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_mullo_epi32(a,b);
+#else
+ return vec4i_swizzle1(
+ vec4i_swizzle2(
+ _mm_mul_epu32(a,b),
+ _mm_mul_epu32(vec4i_swizzle1(a,1,0,3,2),
+ vec4i_swizzle1(b,1,0,3,2)),
+ 0,2,0,2),
+ 0,2,1,3);
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_div_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_div_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd(pmul(a,b), c); }
+#ifdef __FMA__
+template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return _mm_fmadd_ps(a,b,c); }
+template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return _mm_fmadd_pd(a,b,c); }
+#endif
+template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_min_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_min_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_min_epi32(a,b);
+#else
+ Packet4i mask = _mm_cmplt_epi32(a,b);
+ return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_max_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_max_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_max_epi32(a,b);
+#else
+ Packet4i mask = _mm_cmpgt_epi32(a,b);
+ return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
+#endif
+}
+#ifdef EIGEN_VECTORIZE_SSE4_1
+template<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a) { return _mm_round_ps(a, 0); }
+template<> EIGEN_STRONG_INLINE Packet2d pround<Packet2d>(const Packet2d& a) { return _mm_round_pd(a, 0); }
+template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a) { return _mm_ceil_ps(a); }
+template<> EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const Packet2d& a) { return _mm_ceil_pd(a); }
+template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a) { return _mm_floor_ps(a); }
+template<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a) { return _mm_floor_pd(a); }
+#endif
+template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_and_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_and_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_and_si128(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_or_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_or_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_or_si128(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_xor_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_xor_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_xor_si128(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_andnot_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_andnot_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_andnot_si128(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_ps(from); }
+template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_pd(from); }
+template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_si128(reinterpret_cast<const __m128i*>(from)); }
+#if EIGEN_COMP_MSVC
+ template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) {
+ EIGEN_DEBUG_UNALIGNED_LOAD
+ #if (EIGEN_COMP_MSVC==1600)
+ __m128 res = _mm_loadl_pi(_mm_set1_ps(0.0f), (const __m64*)(from));
+ res = _mm_loadh_pi(res, (const __m64*)(from+2));
+ return res;
+ #else
+ return _mm_loadu_ps(from);
+ #endif
+ }
+#else
+template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)
+{
+ EIGEN_DEBUG_UNALIGNED_LOAD
+ return _mm_loadu_ps(from);
+}
+#endif
+template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from)
+{
+ EIGEN_DEBUG_UNALIGNED_LOAD
+ return _mm_loadu_pd(from);
+}
+template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
+{
+ EIGEN_DEBUG_UNALIGNED_LOAD
+ return _mm_loadu_si128(reinterpret_cast<const __m128i*>(from));
+}
+template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
+{
+ return vec4f_swizzle1(_mm_castpd_ps(_mm_load_sd(reinterpret_cast<const double*>(from))), 0, 0, 1, 1);
+}
+template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
+{ return pset1<Packet2d>(from[0]); }
+template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
+{
+ Packet4i tmp;
+ tmp = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(from));
+ return vec4i_swizzle1(tmp, 0, 0, 1, 1);
+}
+template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_ps(to, from); }
+template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_pd(to, from); }
+template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_si128(reinterpret_cast<__m128i*>(to), from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_pd(to, from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_ps(to, from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_si128(reinterpret_cast<__m128i*>(to), from); }
+template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride)
+{
+ return _mm_set_ps(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
+}
+template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride)
+{
+ return _mm_set_pd(from[1*stride], from[0*stride]);
+}
+template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, Index stride)
+{
+ return _mm_set_epi32(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
+ }
+template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride)
+{
+ to[stride*0] = _mm_cvtss_f32(from);
+ to[stride*1] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 1));
+ to[stride*2] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 2));
+ to[stride*3] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 3));
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride)
+{
+ to[stride*0] = _mm_cvtsd_f64(from);
+ to[stride*1] = _mm_cvtsd_f64(_mm_shuffle_pd(from, from, 1));
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, Index stride)
+{
+ to[stride*0] = _mm_cvtsi128_si32(from);
+ to[stride*1] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 1));
+ to[stride*2] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 2));
+ to[stride*3] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 3));
+}
+template<> EIGEN_STRONG_INLINE void pstore1<Packet4f>(float* to, const float& a)
+{
+ Packet4f pa = _mm_set_ss(a);
+ pstore(to, Packet4f(vec4f_swizzle1(pa,0,0,0,0)));
+}
+template<> EIGEN_STRONG_INLINE void pstore1<Packet2d>(double* to, const double& a)
+{
+ Packet2d pa = _mm_set_sd(a);
+ pstore(to, Packet2d(vec2d_swizzle1(pa,0,0)));
+}
+#ifndef EIGEN_VECTORIZE_AVX
+template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+#endif
+#if EIGEN_COMP_MSVC_STRICT && EIGEN_OS_WIN64
+template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { return a.m128_f32[0]; }
+template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { return a.m128d_f64[0]; }
+template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int x = _mm_cvtsi128_si32(a); return x; }
+#elif EIGEN_COMP_MSVC_STRICT
+template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float x = _mm_cvtss_f32(a); return x; }
+template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { double x = _mm_cvtsd_f64(a); return x; }
+template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int x = _mm_cvtsi128_si32(a); return x; }
+#else
+template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { return _mm_cvtss_f32(a); }
+template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { return _mm_cvtsd_f64(a); }
+template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { return _mm_cvtsi128_si32(a); }
+#endif
+template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a)
+{ return _mm_shuffle_ps(a,a,0x1B); }
+template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a)
+{ return _mm_shuffle_pd(a,a,0x1); }
+template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a)
+{ return _mm_shuffle_epi32(a,0x1B); }
+template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a)
+{
+ const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF));
+ return _mm_and_ps(a,mask);
+}
+template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a)
+{
+ const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF));
+ return _mm_and_pd(a,mask);
+}
+template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a)
+{
+ #ifdef EIGEN_VECTORIZE_SSSE3
+ return _mm_abs_epi32(a);
+ #else
+ Packet4i aux = _mm_srai_epi32(a,31);
+ return _mm_sub_epi32(_mm_xor_si128(a,aux),aux);
+ #endif
+}
+#ifndef __AVX__
+template<> EIGEN_STRONG_INLINE void
+pbroadcast4<Packet4f>(const float *a,
+ Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
+{
+ a3 = pload<Packet4f>(a);
+ a0 = vec4f_swizzle1(a3, 0,0,0,0);
+ a1 = vec4f_swizzle1(a3, 1,1,1,1);
+ a2 = vec4f_swizzle1(a3, 2,2,2,2);
+ a3 = vec4f_swizzle1(a3, 3,3,3,3);
+}
+template<> EIGEN_STRONG_INLINE void
+pbroadcast4<Packet2d>(const double *a,
+ Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3)
+{
+#ifdef EIGEN_VECTORIZE_SSE3
+ a0 = _mm_loaddup_pd(a+0);
+ a1 = _mm_loaddup_pd(a+1);
+ a2 = _mm_loaddup_pd(a+2);
+ a3 = _mm_loaddup_pd(a+3);
+#else
+ a1 = pload<Packet2d>(a);
+ a0 = vec2d_swizzle1(a1, 0,0);
+ a1 = vec2d_swizzle1(a1, 1,1);
+ a3 = pload<Packet2d>(a+2);
+ a2 = vec2d_swizzle1(a3, 0,0);
+ a3 = vec2d_swizzle1(a3, 1,1);
+#endif
+}
+#endif
+EIGEN_STRONG_INLINE void punpackp(Packet4f* vecs)
+{
+ vecs[1] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x55));
+ vecs[2] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0xAA));
+ vecs[3] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0xFF));
+ vecs[0] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x00));
+}
+#ifdef EIGEN_VECTORIZE_SSE3
+template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
+{
+ return _mm_hadd_ps(_mm_hadd_ps(vecs[0], vecs[1]),_mm_hadd_ps(vecs[2], vecs[3]));
+}
+template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
+{
+ return _mm_hadd_pd(vecs[0], vecs[1]);
+}
+#else
+template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
+{
+ Packet4f tmp0, tmp1, tmp2;
+ tmp0 = _mm_unpacklo_ps(vecs[0], vecs[1]);
+ tmp1 = _mm_unpackhi_ps(vecs[0], vecs[1]);
+ tmp2 = _mm_unpackhi_ps(vecs[2], vecs[3]);
+ tmp0 = _mm_add_ps(tmp0, tmp1);
+ tmp1 = _mm_unpacklo_ps(vecs[2], vecs[3]);
+ tmp1 = _mm_add_ps(tmp1, tmp2);
+ tmp2 = _mm_movehl_ps(tmp1, tmp0);
+ tmp0 = _mm_movelh_ps(tmp0, tmp1);
+ return _mm_add_ps(tmp0, tmp2);
+}
+template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
+{
+ return _mm_add_pd(_mm_unpacklo_pd(vecs[0], vecs[1]), _mm_unpackhi_pd(vecs[0], vecs[1]));
+}
+#endif
+template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
+{
+ Packet4f tmp = _mm_add_ps(a, _mm_movehl_ps(a,a));
+ return pfirst<Packet4f>(_mm_add_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
+}
+template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
+{
+ return pfirst<Packet2d>(_mm_add_sd(a, _mm_unpackhi_pd(a,a)));
+}
+#ifdef EIGEN_VECTORIZE_SSSE3
+template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
+{
+ return _mm_hadd_epi32(_mm_hadd_epi32(vecs[0], vecs[1]),_mm_hadd_epi32(vecs[2], vecs[3]));
+}
+template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
+{
+ Packet4i tmp0 = _mm_hadd_epi32(a,a);
+ return pfirst<Packet4i>(_mm_hadd_epi32(tmp0,tmp0));
+}
+#else
+template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
+{
+ Packet4i tmp = _mm_add_epi32(a, _mm_unpackhi_epi64(a,a));
+ return pfirst(tmp) + pfirst<Packet4i>(_mm_shuffle_epi32(tmp, 1));
+}
+template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
+{
+ Packet4i tmp0, tmp1, tmp2;
+ tmp0 = _mm_unpacklo_epi32(vecs[0], vecs[1]);
+ tmp1 = _mm_unpackhi_epi32(vecs[0], vecs[1]);
+ tmp2 = _mm_unpackhi_epi32(vecs[2], vecs[3]);
+ tmp0 = _mm_add_epi32(tmp0, tmp1);
+ tmp1 = _mm_unpacklo_epi32(vecs[2], vecs[3]);
+ tmp1 = _mm_add_epi32(tmp1, tmp2);
+ tmp2 = _mm_unpacklo_epi64(tmp0, tmp1);
+ tmp0 = _mm_unpackhi_epi64(tmp0, tmp1);
+ return _mm_add_epi32(tmp0, tmp2);
+}
+#endif
+template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
+{
+ Packet4f tmp = _mm_mul_ps(a, _mm_movehl_ps(a,a));
+ return pfirst<Packet4f>(_mm_mul_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
+}
+template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a)
+{
+ return pfirst<Packet2d>(_mm_mul_sd(a, _mm_unpackhi_pd(a,a)));
+}
+template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
+{
+ EIGEN_ALIGN16 int aux[4];
+ pstore(aux, a);
+ return (aux[0] * aux[1]) * (aux[2] * aux[3]);;
+}
+template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
+{
+ Packet4f tmp = _mm_min_ps(a, _mm_movehl_ps(a,a));
+ return pfirst<Packet4f>(_mm_min_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
+}
+template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a)
+{
+ return pfirst<Packet2d>(_mm_min_sd(a, _mm_unpackhi_pd(a,a)));
+}
+template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ Packet4i tmp = _mm_min_epi32(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0,0,3,2)));
+ return pfirst<Packet4i>(_mm_min_epi32(tmp,_mm_shuffle_epi32(tmp, 1)));
+#else
+ EIGEN_ALIGN16 int aux[4];
+ pstore(aux, a);
+ int aux0 = aux[0]<aux[1] ? aux[0] : aux[1];
+ int aux2 = aux[2]<aux[3] ? aux[2] : aux[3];
+ return aux0<aux2 ? aux0 : aux2;
+#endif
+}
+template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
+{
+ Packet4f tmp = _mm_max_ps(a, _mm_movehl_ps(a,a));
+ return pfirst<Packet4f>(_mm_max_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
+}
+template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)
+{
+ return pfirst<Packet2d>(_mm_max_sd(a, _mm_unpackhi_pd(a,a)));
+}
+template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ Packet4i tmp = _mm_max_epi32(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0,0,3,2)));
+ return pfirst<Packet4i>(_mm_max_epi32(tmp,_mm_shuffle_epi32(tmp, 1)));
+#else
+ EIGEN_ALIGN16 int aux[4];
+ pstore(aux, a);
+ int aux0 = aux[0]>aux[1] ? aux[0] : aux[1];
+ int aux2 = aux[2]>aux[3] ? aux[2] : aux[3];
+ return aux0>aux2 ? aux0 : aux2;
+#endif
+}
+#if EIGEN_COMP_GNUC
+#endif
+#ifdef EIGEN_VECTORIZE_SSSE3
+template<int Offset>
+struct palign_impl<Offset,Packet4f>
+{
+ static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
+ {
+ if (Offset!=0)
+ first = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(second), _mm_castps_si128(first), Offset*4));
+ }
+};
+template<int Offset>
+struct palign_impl<Offset,Packet4i>
+{
+ static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
+ {
+ if (Offset!=0)
+ first = _mm_alignr_epi8(second,first, Offset*4);
+ }
+};
+template<int Offset>
+struct palign_impl<Offset,Packet2d>
+{
+ static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
+ {
+ if (Offset==1)
+ first = _mm_castsi128_pd(_mm_alignr_epi8(_mm_castpd_si128(second), _mm_castpd_si128(first), 8));
+ }
+};
+#else
+template<int Offset>
+struct palign_impl<Offset,Packet4f>
+{
+ static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
+ {
+ if (Offset==1)
+ {
+ first = _mm_move_ss(first,second);
+ first = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(first),0x39));
+ }
+ else if (Offset==2)
+ {
+ first = _mm_movehl_ps(first,first);
+ first = _mm_movelh_ps(first,second);
+ }
+ else if (Offset==3)
+ {
+ first = _mm_move_ss(first,second);
+ first = _mm_shuffle_ps(first,second,0x93);
+ }
+ }
+};
+template<int Offset>
+struct palign_impl<Offset,Packet4i>
+{
+ static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
+ {
+ if (Offset==1)
+ {
+ first = _mm_castps_si128(_mm_move_ss(_mm_castsi128_ps(first),_mm_castsi128_ps(second)));
+ first = _mm_shuffle_epi32(first,0x39);
+ }
+ else if (Offset==2)
+ {
+ first = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(first),_mm_castsi128_ps(first)));
+ first = _mm_castps_si128(_mm_movelh_ps(_mm_castsi128_ps(first),_mm_castsi128_ps(second)));
+ }
+ else if (Offset==3)
+ {
+ first = _mm_castps_si128(_mm_move_ss(_mm_castsi128_ps(first),_mm_castsi128_ps(second)));
+ first = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(first),_mm_castsi128_ps(second),0x93));
+ }
+ }
+};
+template<int Offset>
+struct palign_impl<Offset,Packet2d>
+{
+ static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
+ {
+ if (Offset==1)
+ {
+ first = _mm_castps_pd(_mm_movehl_ps(_mm_castpd_ps(first),_mm_castpd_ps(first)));
+ first = _mm_castps_pd(_mm_movelh_ps(_mm_castpd_ps(first),_mm_castpd_ps(second)));
+ }
+ }
+};
+#endif
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4f,4>& kernel) {
+ _MM_TRANSPOSE4_PS(kernel.packet[0], kernel.packet[1], kernel.packet[2], kernel.packet[3]);
+}
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet2d,2>& kernel) {
+ __m128d tmp = _mm_unpackhi_pd(kernel.packet[0], kernel.packet[1]);
+ kernel.packet[0] = _mm_unpacklo_pd(kernel.packet[0], kernel.packet[1]);
+ kernel.packet[1] = tmp;
+}
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4i,4>& kernel) {
+ __m128i T0 = _mm_unpacklo_epi32(kernel.packet[0], kernel.packet[1]);
+ __m128i T1 = _mm_unpacklo_epi32(kernel.packet[2], kernel.packet[3]);
+ __m128i T2 = _mm_unpackhi_epi32(kernel.packet[0], kernel.packet[1]);
+ __m128i T3 = _mm_unpackhi_epi32(kernel.packet[2], kernel.packet[3]);
+ kernel.packet[0] = _mm_unpacklo_epi64(T0, T1);
+ kernel.packet[1] = _mm_unpackhi_epi64(T0, T1);
+ kernel.packet[2] = _mm_unpacklo_epi64(T2, T3);
+ kernel.packet[3] = _mm_unpackhi_epi64(T2, T3);
+}
+template<> EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket) {
+ const __m128i zero = _mm_setzero_si128();
+ const __m128i select = _mm_set_epi32(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
+ __m128i false_mask = _mm_cmpeq_epi32(select, zero);
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blendv_epi8(thenPacket, elsePacket, false_mask);
+#else
+ return _mm_or_si128(_mm_andnot_si128(false_mask, thenPacket), _mm_and_si128(false_mask, elsePacket));
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket) {
+ const __m128 zero = _mm_setzero_ps();
+ const __m128 select = _mm_set_ps(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
+ __m128 false_mask = _mm_cmpeq_ps(select, zero);
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blendv_ps(thenPacket, elsePacket, false_mask);
+#else
+ return _mm_or_ps(_mm_andnot_ps(false_mask, thenPacket), _mm_and_ps(false_mask, elsePacket));
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) {
+ const __m128d zero = _mm_setzero_pd();
+ const __m128d select = _mm_set_pd(ifPacket.select[1], ifPacket.select[0]);
+ __m128d false_mask = _mm_cmpeq_pd(select, zero);
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blendv_pd(thenPacket, elsePacket, false_mask);
+#else
+ return _mm_or_pd(_mm_andnot_pd(false_mask, thenPacket), _mm_and_pd(false_mask, elsePacket));
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet4f pinsertfirst(const Packet4f& a, float b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blend_ps(a,pset1<Packet4f>(b),1);
+#else
+ return _mm_move_ss(a, _mm_load_ss(&b));
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet2d pinsertfirst(const Packet2d& a, double b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blend_pd(a,pset1<Packet2d>(b),1);
+#else
+ return _mm_move_sd(a, _mm_load_sd(&b));
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet4f pinsertlast(const Packet4f& a, float b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blend_ps(a,pset1<Packet4f>(b),(1<<3));
+#else
+ const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x0,0x0,0x0,0xFFFFFFFF));
+ return _mm_or_ps(_mm_andnot_ps(mask, a), _mm_and_ps(mask, pset1<Packet4f>(b)));
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet2d pinsertlast(const Packet2d& a, double b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blend_pd(a,pset1<Packet2d>(b),(1<<1));
+#else
+ const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0,0x0,0xFFFFFFFF,0xFFFFFFFF));
+ return _mm_or_pd(_mm_andnot_pd(mask, a), _mm_and_pd(mask, pset1<Packet2d>(b)));
+#endif
+}
+#ifdef __FMA__
+template<> EIGEN_STRONG_INLINE float pmadd(const float& a, const float& b, const float& c) {
+ return ::fmaf(a,b,c);
+}
+template<> EIGEN_STRONG_INLINE double pmadd(const double& a, const double& b, const double& c) {
+ return ::fma(a,b,c);
+}
+#endif
+}
+}
+#endif
+// end #include "src/Core/arch/SSE/PacketMath.h"
+// #include "src/Core/arch/AVX/PacketMath.h"
+#ifndef EIGEN_PACKET_MATH_AVX_H
+#define EIGEN_PACKET_MATH_AVX_H
+namespace Eigen {
+namespace internal {
+#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
+#endif
+#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
+#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
+#endif
+#ifdef __FMA__
+#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#endif
+#endif
+typedef __m256 Packet8f;
+typedef __m256i Packet8i;
+typedef __m256d Packet4d;
+template<> struct is_arithmetic<__m256> { enum { value = true }; };
+template<> struct is_arithmetic<__m256i> { enum { value = true }; };
+template<> struct is_arithmetic<__m256d> { enum { value = true }; };
+#define _EIGEN_DECLARE_CONST_Packet8f(NAME,X) \
+ const Packet8f p8f_##NAME = pset1<Packet8f>(X)
+#define _EIGEN_DECLARE_CONST_Packet4d(NAME,X) \
+ const Packet4d p4d_##NAME = pset1<Packet4d>(X)
+#define _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(NAME,X) \
+ const Packet8f p8f_##NAME = _mm256_castsi256_ps(pset1<Packet8i>(X))
+#define _EIGEN_DECLARE_CONST_Packet8i(NAME,X) \
+ const Packet8i p8i_##NAME = pset1<Packet8i>(X)
+#ifndef EIGEN_VECTORIZE_AVX512
+template<> struct packet_traits<float> : default_packet_traits
+{
+ typedef Packet8f type;
+ typedef Packet4f half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size=8,
+ HasHalfPacket = 1,
+ HasDiv = 1,
+ HasSin = EIGEN_FAST_MATH,
+ HasCos = 0,
+ HasLog = 1,
+ HasExp = 1,
+ HasSqrt = 1,
+ HasRsqrt = 1,
+ HasTanh = EIGEN_FAST_MATH,
+ HasBlend = 1,
+ HasRound = 1,
+ HasFloor = 1,
+ HasCeil = 1
+ };
+};
+template<> struct packet_traits<double> : default_packet_traits
+{
+ typedef Packet4d type;
+ typedef Packet2d half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size=4,
+ HasHalfPacket = 1,
+ HasDiv = 1,
+ HasExp = 1,
+ HasSqrt = 1,
+ HasRsqrt = 1,
+ HasBlend = 1,
+ HasRound = 1,
+ HasFloor = 1,
+ HasCeil = 1
+ };
+};
+#endif
+template<> struct scalar_div_cost<float,true> { enum { value = 14 }; };
+template<> struct scalar_div_cost<double,true> { enum { value = 16 }; };
+template<> struct unpacket_traits<Packet8f> { typedef float type; typedef Packet4f half; enum {size=8, alignment=Aligned32}; };
+template<> struct unpacket_traits<Packet4d> { typedef double type; typedef Packet2d half; enum {size=4, alignment=Aligned32}; };
+template<> struct unpacket_traits<Packet8i> { typedef int type; typedef Packet4i half; enum {size=8, alignment=Aligned32}; };
+template<> EIGEN_STRONG_INLINE Packet8f pset1<Packet8f>(const float& from) { return _mm256_set1_ps(from); }
+template<> EIGEN_STRONG_INLINE Packet4d pset1<Packet4d>(const double& from) { return _mm256_set1_pd(from); }
+template<> EIGEN_STRONG_INLINE Packet8i pset1<Packet8i>(const int& from) { return _mm256_set1_epi32(from); }
+template<> EIGEN_STRONG_INLINE Packet8f pload1<Packet8f>(const float* from) { return _mm256_broadcast_ss(from); }
+template<> EIGEN_STRONG_INLINE Packet4d pload1<Packet4d>(const double* from) { return _mm256_broadcast_sd(from); }
+template<> EIGEN_STRONG_INLINE Packet8f plset<Packet8f>(const float& a) { return _mm256_add_ps(_mm256_set1_ps(a), _mm256_set_ps(7.0,6.0,5.0,4.0,3.0,2.0,1.0,0.0)); }
+template<> EIGEN_STRONG_INLINE Packet4d plset<Packet4d>(const double& a) { return _mm256_add_pd(_mm256_set1_pd(a), _mm256_set_pd(3.0,2.0,1.0,0.0)); }
+template<> EIGEN_STRONG_INLINE Packet8f padd<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_add_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d padd<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_add_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet8f psub<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_sub_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d psub<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_sub_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet8f pnegate(const Packet8f& a)
+{
+ return _mm256_sub_ps(_mm256_set1_ps(0.0),a);
+}
+template<> EIGEN_STRONG_INLINE Packet4d pnegate(const Packet4d& a)
+{
+ return _mm256_sub_pd(_mm256_set1_pd(0.0),a);
+}
+template<> EIGEN_STRONG_INLINE Packet8f pconj(const Packet8f& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet4d pconj(const Packet4d& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet8i pconj(const Packet8i& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet8f pmul<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_mul_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d pmul<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_mul_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet8f pdiv<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_div_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d pdiv<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_div_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet8i pdiv<Packet8i>(const Packet8i& , const Packet8i& )
+{ eigen_assert(false && "packet integer division are not supported by AVX");
+ return pset1<Packet8i>(0);
+}
+#ifdef __FMA__
+template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) {
+#if ( EIGEN_COMP_GNUC_STRICT || (EIGEN_COMP_CLANG && (EIGEN_COMP_CLANG<308)) )
+ Packet8f res = c;
+ __asm__("vfmadd231ps %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
+ return res;
+#else
+ return _mm256_fmadd_ps(a,b,c);
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d& b, const Packet4d& c) {
+#if ( EIGEN_COMP_GNUC_STRICT || (EIGEN_COMP_CLANG && (EIGEN_COMP_CLANG<308)) )
+ Packet4d res = c;
+ __asm__("vfmadd231pd %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
+ return res;
+#else
+ return _mm256_fmadd_pd(a,b,c);
+#endif
+}
+#endif
+template<> EIGEN_STRONG_INLINE Packet8f pmin<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_min_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d pmin<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_min_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet8f pmax<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_max_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d pmax<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_max_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet8f pround<Packet8f>(const Packet8f& a) { return _mm256_round_ps(a, _MM_FROUND_CUR_DIRECTION); }
+template<> EIGEN_STRONG_INLINE Packet4d pround<Packet4d>(const Packet4d& a) { return _mm256_round_pd(a, _MM_FROUND_CUR_DIRECTION); }
+template<> EIGEN_STRONG_INLINE Packet8f pceil<Packet8f>(const Packet8f& a) { return _mm256_ceil_ps(a); }
+template<> EIGEN_STRONG_INLINE Packet4d pceil<Packet4d>(const Packet4d& a) { return _mm256_ceil_pd(a); }
+template<> EIGEN_STRONG_INLINE Packet8f pfloor<Packet8f>(const Packet8f& a) { return _mm256_floor_ps(a); }
+template<> EIGEN_STRONG_INLINE Packet4d pfloor<Packet4d>(const Packet4d& a) { return _mm256_floor_pd(a); }
+template<> EIGEN_STRONG_INLINE Packet8f pand<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_and_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d pand<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_and_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet8f por<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_or_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d por<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_or_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet8f pxor<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_xor_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d pxor<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_xor_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet8f pandnot<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_andnot_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d pandnot<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_andnot_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet8f pload<Packet8f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_ps(from); }
+template<> EIGEN_STRONG_INLINE Packet4d pload<Packet4d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_pd(from); }
+template<> EIGEN_STRONG_INLINE Packet8i pload<Packet8i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_si256(reinterpret_cast<const __m256i*>(from)); }
+template<> EIGEN_STRONG_INLINE Packet8f ploadu<Packet8f>(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_ps(from); }
+template<> EIGEN_STRONG_INLINE Packet4d ploadu<Packet4d>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_pd(from); }
+template<> EIGEN_STRONG_INLINE Packet8i ploadu<Packet8i>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_si256(reinterpret_cast<const __m256i*>(from)); }
+template<> EIGEN_STRONG_INLINE Packet8f ploaddup<Packet8f>(const float* from)
+{
+ Packet8f tmp = _mm256_broadcast_ps((const __m128*)(const void*)from);
+ tmp = _mm256_blend_ps(tmp,_mm256_castps128_ps256(_mm_permute_ps( _mm256_castps256_ps128(tmp), _MM_SHUFFLE(1,0,1,0))), 15);
+ return _mm256_permute_ps(tmp, _MM_SHUFFLE(3,3,2,2));
+}
+template<> EIGEN_STRONG_INLINE Packet4d ploaddup<Packet4d>(const double* from)
+{
+ Packet4d tmp = _mm256_broadcast_pd((const __m128d*)(const void*)from);
+ return _mm256_permute_pd(tmp, 3<<2);
+}
+template<> EIGEN_STRONG_INLINE Packet8f ploadquad<Packet8f>(const float* from)
+{
+ Packet8f tmp = _mm256_castps128_ps256(_mm_broadcast_ss(from));
+ return _mm256_insertf128_ps(tmp, _mm_broadcast_ss(from+1), 1);
+}
+template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet8f& from) { EIGEN_DEBUG_ALIGNED_STORE _mm256_store_ps(to, from); }
+template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet4d& from) { EIGEN_DEBUG_ALIGNED_STORE _mm256_store_pd(to, from); }
+template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet8i& from) { EIGEN_DEBUG_ALIGNED_STORE _mm256_storeu_si256(reinterpret_cast<__m256i*>(to), from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet8f& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_ps(to, from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet4d& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_pd(to, from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet8i& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_si256(reinterpret_cast<__m256i*>(to), from); }
+template<> EIGEN_DEVICE_FUNC inline Packet8f pgather<float, Packet8f>(const float* from, Index stride)
+{
+ return _mm256_set_ps(from[7*stride], from[6*stride], from[5*stride], from[4*stride],
+ from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
+}
+template<> EIGEN_DEVICE_FUNC inline Packet4d pgather<double, Packet4d>(const double* from, Index stride)
+{
+ return _mm256_set_pd(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet8f>(float* to, const Packet8f& from, Index stride)
+{
+ __m128 low = _mm256_extractf128_ps(from, 0);
+ to[stride*0] = _mm_cvtss_f32(low);
+ to[stride*1] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 1));
+ to[stride*2] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 2));
+ to[stride*3] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 3));
+ __m128 high = _mm256_extractf128_ps(from, 1);
+ to[stride*4] = _mm_cvtss_f32(high);
+ to[stride*5] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 1));
+ to[stride*6] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 2));
+ to[stride*7] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 3));
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet4d>(double* to, const Packet4d& from, Index stride)
+{
+ __m128d low = _mm256_extractf128_pd(from, 0);
+ to[stride*0] = _mm_cvtsd_f64(low);
+ to[stride*1] = _mm_cvtsd_f64(_mm_shuffle_pd(low, low, 1));
+ __m128d high = _mm256_extractf128_pd(from, 1);
+ to[stride*2] = _mm_cvtsd_f64(high);
+ to[stride*3] = _mm_cvtsd_f64(_mm_shuffle_pd(high, high, 1));
+}
+template<> EIGEN_STRONG_INLINE void pstore1<Packet8f>(float* to, const float& a)
+{
+ Packet8f pa = pset1<Packet8f>(a);
+ pstore(to, pa);
+}
+template<> EIGEN_STRONG_INLINE void pstore1<Packet4d>(double* to, const double& a)
+{
+ Packet4d pa = pset1<Packet4d>(a);
+ pstore(to, pa);
+}
+template<> EIGEN_STRONG_INLINE void pstore1<Packet8i>(int* to, const int& a)
+{
+ Packet8i pa = pset1<Packet8i>(a);
+ pstore(to, pa);
+}
+#ifndef EIGEN_VECTORIZE_AVX512
+template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+#endif
+template<> EIGEN_STRONG_INLINE float pfirst<Packet8f>(const Packet8f& a) {
+ return _mm_cvtss_f32(_mm256_castps256_ps128(a));
+}
+template<> EIGEN_STRONG_INLINE double pfirst<Packet4d>(const Packet4d& a) {
+ return _mm_cvtsd_f64(_mm256_castpd256_pd128(a));
+}
+template<> EIGEN_STRONG_INLINE int pfirst<Packet8i>(const Packet8i& a) {
+ return _mm_cvtsi128_si32(_mm256_castsi256_si128(a));
+}
+template<> EIGEN_STRONG_INLINE Packet8f preverse(const Packet8f& a)
+{
+ __m256 tmp = _mm256_shuffle_ps(a,a,0x1b);
+ return _mm256_permute2f128_ps(tmp, tmp, 1);
+}
+template<> EIGEN_STRONG_INLINE Packet4d preverse(const Packet4d& a)
+{
+ __m256d tmp = _mm256_shuffle_pd(a,a,5);
+ return _mm256_permute2f128_pd(tmp, tmp, 1);
+ __m256d swap_halves = _mm256_permute2f128_pd(a,a,1);
+ return _mm256_permute_pd(swap_halves,5);
+}
+template<> EIGEN_STRONG_INLINE Packet8f pabs(const Packet8f& a)
+{
+ const Packet8f mask = _mm256_castsi256_ps(_mm256_setr_epi32(0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF));
+ return _mm256_and_ps(a,mask);
+}
+template<> EIGEN_STRONG_INLINE Packet4d pabs(const Packet4d& a)
+{
+ const Packet4d mask = _mm256_castsi256_pd(_mm256_setr_epi32(0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF));
+ return _mm256_and_pd(a,mask);
+}
+template<> EIGEN_STRONG_INLINE Packet8f preduxp<Packet8f>(const Packet8f* vecs)
+{
+ __m256 hsum1 = _mm256_hadd_ps(vecs[0], vecs[1]);
+ __m256 hsum2 = _mm256_hadd_ps(vecs[2], vecs[3]);
+ __m256 hsum3 = _mm256_hadd_ps(vecs[4], vecs[5]);
+ __m256 hsum4 = _mm256_hadd_ps(vecs[6], vecs[7]);
+ __m256 hsum5 = _mm256_hadd_ps(hsum1, hsum1);
+ __m256 hsum6 = _mm256_hadd_ps(hsum2, hsum2);
+ __m256 hsum7 = _mm256_hadd_ps(hsum3, hsum3);
+ __m256 hsum8 = _mm256_hadd_ps(hsum4, hsum4);
+ __m256 perm1 = _mm256_permute2f128_ps(hsum5, hsum5, 0x23);
+ __m256 perm2 = _mm256_permute2f128_ps(hsum6, hsum6, 0x23);
+ __m256 perm3 = _mm256_permute2f128_ps(hsum7, hsum7, 0x23);
+ __m256 perm4 = _mm256_permute2f128_ps(hsum8, hsum8, 0x23);
+ __m256 sum1 = _mm256_add_ps(perm1, hsum5);
+ __m256 sum2 = _mm256_add_ps(perm2, hsum6);
+ __m256 sum3 = _mm256_add_ps(perm3, hsum7);
+ __m256 sum4 = _mm256_add_ps(perm4, hsum8);
+ __m256 blend1 = _mm256_blend_ps(sum1, sum2, 0xcc);
+ __m256 blend2 = _mm256_blend_ps(sum3, sum4, 0xcc);
+ __m256 final = _mm256_blend_ps(blend1, blend2, 0xf0);
+ return final;
+}
+template<> EIGEN_STRONG_INLINE Packet4d preduxp<Packet4d>(const Packet4d* vecs)
+{
+ Packet4d tmp0, tmp1;
+ tmp0 = _mm256_hadd_pd(vecs[0], vecs[1]);
+ tmp0 = _mm256_add_pd(tmp0, _mm256_permute2f128_pd(tmp0, tmp0, 1));
+ tmp1 = _mm256_hadd_pd(vecs[2], vecs[3]);
+ tmp1 = _mm256_add_pd(tmp1, _mm256_permute2f128_pd(tmp1, tmp1, 1));
+ return _mm256_blend_pd(tmp0, tmp1, 0xC);
+}
+template<> EIGEN_STRONG_INLINE float predux<Packet8f>(const Packet8f& a)
+{
+ return predux(Packet4f(_mm_add_ps(_mm256_castps256_ps128(a),_mm256_extractf128_ps(a,1))));
+}
+template<> EIGEN_STRONG_INLINE double predux<Packet4d>(const Packet4d& a)
+{
+ return predux(Packet2d(_mm_add_pd(_mm256_castpd256_pd128(a),_mm256_extractf128_pd(a,1))));
+}
+template<> EIGEN_STRONG_INLINE Packet4f predux_downto4<Packet8f>(const Packet8f& a)
+{
+ return _mm_add_ps(_mm256_castps256_ps128(a),_mm256_extractf128_ps(a,1));
+}
+template<> EIGEN_STRONG_INLINE float predux_mul<Packet8f>(const Packet8f& a)
+{
+ Packet8f tmp;
+ tmp = _mm256_mul_ps(a, _mm256_permute2f128_ps(a,a,1));
+ tmp = _mm256_mul_ps(tmp, _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(1,0,3,2)));
+ return pfirst(_mm256_mul_ps(tmp, _mm256_shuffle_ps(tmp,tmp,1)));
+}
+template<> EIGEN_STRONG_INLINE double predux_mul<Packet4d>(const Packet4d& a)
+{
+ Packet4d tmp;
+ tmp = _mm256_mul_pd(a, _mm256_permute2f128_pd(a,a,1));
+ return pfirst(_mm256_mul_pd(tmp, _mm256_shuffle_pd(tmp,tmp,1)));
+}
+template<> EIGEN_STRONG_INLINE float predux_min<Packet8f>(const Packet8f& a)
+{
+ Packet8f tmp = _mm256_min_ps(a, _mm256_permute2f128_ps(a,a,1));
+ tmp = _mm256_min_ps(tmp, _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(1,0,3,2)));
+ return pfirst(_mm256_min_ps(tmp, _mm256_shuffle_ps(tmp,tmp,1)));
+}
+template<> EIGEN_STRONG_INLINE double predux_min<Packet4d>(const Packet4d& a)
+{
+ Packet4d tmp = _mm256_min_pd(a, _mm256_permute2f128_pd(a,a,1));
+ return pfirst(_mm256_min_pd(tmp, _mm256_shuffle_pd(tmp, tmp, 1)));
+}
+template<> EIGEN_STRONG_INLINE float predux_max<Packet8f>(const Packet8f& a)
+{
+ Packet8f tmp = _mm256_max_ps(a, _mm256_permute2f128_ps(a,a,1));
+ tmp = _mm256_max_ps(tmp, _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(1,0,3,2)));
+ return pfirst(_mm256_max_ps(tmp, _mm256_shuffle_ps(tmp,tmp,1)));
+}
+template<> EIGEN_STRONG_INLINE double predux_max<Packet4d>(const Packet4d& a)
+{
+ Packet4d tmp = _mm256_max_pd(a, _mm256_permute2f128_pd(a,a,1));
+ return pfirst(_mm256_max_pd(tmp, _mm256_shuffle_pd(tmp, tmp, 1)));
+}
+template<int Offset>
+struct palign_impl<Offset,Packet8f>
+{
+ static EIGEN_STRONG_INLINE void run(Packet8f& first, const Packet8f& second)
+ {
+ if (Offset==1)
+ {
+ first = _mm256_blend_ps(first, second, 1);
+ Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(0,3,2,1));
+ Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
+ first = _mm256_blend_ps(tmp1, tmp2, 0x88);
+ }
+ else if (Offset==2)
+ {
+ first = _mm256_blend_ps(first, second, 3);
+ Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(1,0,3,2));
+ Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
+ first = _mm256_blend_ps(tmp1, tmp2, 0xcc);
+ }
+ else if (Offset==3)
+ {
+ first = _mm256_blend_ps(first, second, 7);
+ Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(2,1,0,3));
+ Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
+ first = _mm256_blend_ps(tmp1, tmp2, 0xee);
+ }
+ else if (Offset==4)
+ {
+ first = _mm256_blend_ps(first, second, 15);
+ Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(3,2,1,0));
+ Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
+ first = _mm256_permute_ps(tmp2, _MM_SHUFFLE(3,2,1,0));
+ }
+ else if (Offset==5)
+ {
+ first = _mm256_blend_ps(first, second, 31);
+ first = _mm256_permute2f128_ps(first, first, 1);
+ Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(0,3,2,1));
+ first = _mm256_permute2f128_ps(tmp, tmp, 1);
+ first = _mm256_blend_ps(tmp, first, 0x88);
+ }
+ else if (Offset==6)
+ {
+ first = _mm256_blend_ps(first, second, 63);
+ first = _mm256_permute2f128_ps(first, first, 1);
+ Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(1,0,3,2));
+ first = _mm256_permute2f128_ps(tmp, tmp, 1);
+ first = _mm256_blend_ps(tmp, first, 0xcc);
+ }
+ else if (Offset==7)
+ {
+ first = _mm256_blend_ps(first, second, 127);
+ first = _mm256_permute2f128_ps(first, first, 1);
+ Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(2,1,0,3));
+ first = _mm256_permute2f128_ps(tmp, tmp, 1);
+ first = _mm256_blend_ps(tmp, first, 0xee);
+ }
+ }
+};
+template<int Offset>
+struct palign_impl<Offset,Packet4d>
+{
+ static EIGEN_STRONG_INLINE void run(Packet4d& first, const Packet4d& second)
+ {
+ if (Offset==1)
+ {
+ first = _mm256_blend_pd(first, second, 1);
+ __m256d tmp = _mm256_permute_pd(first, 5);
+ first = _mm256_permute2f128_pd(tmp, tmp, 1);
+ first = _mm256_blend_pd(tmp, first, 0xA);
+ }
+ else if (Offset==2)
+ {
+ first = _mm256_blend_pd(first, second, 3);
+ first = _mm256_permute2f128_pd(first, first, 1);
+ }
+ else if (Offset==3)
+ {
+ first = _mm256_blend_pd(first, second, 7);
+ __m256d tmp = _mm256_permute_pd(first, 5);
+ first = _mm256_permute2f128_pd(tmp, tmp, 1);
+ first = _mm256_blend_pd(tmp, first, 5);
+ }
+ }
+};
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet8f,8>& kernel) {
+ __m256 T0 = _mm256_unpacklo_ps(kernel.packet[0], kernel.packet[1]);
+ __m256 T1 = _mm256_unpackhi_ps(kernel.packet[0], kernel.packet[1]);
+ __m256 T2 = _mm256_unpacklo_ps(kernel.packet[2], kernel.packet[3]);
+ __m256 T3 = _mm256_unpackhi_ps(kernel.packet[2], kernel.packet[3]);
+ __m256 T4 = _mm256_unpacklo_ps(kernel.packet[4], kernel.packet[5]);
+ __m256 T5 = _mm256_unpackhi_ps(kernel.packet[4], kernel.packet[5]);
+ __m256 T6 = _mm256_unpacklo_ps(kernel.packet[6], kernel.packet[7]);
+ __m256 T7 = _mm256_unpackhi_ps(kernel.packet[6], kernel.packet[7]);
+ __m256 S0 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(1,0,1,0));
+ __m256 S1 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(3,2,3,2));
+ __m256 S2 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(1,0,1,0));
+ __m256 S3 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(3,2,3,2));
+ __m256 S4 = _mm256_shuffle_ps(T4,T6,_MM_SHUFFLE(1,0,1,0));
+ __m256 S5 = _mm256_shuffle_ps(T4,T6,_MM_SHUFFLE(3,2,3,2));
+ __m256 S6 = _mm256_shuffle_ps(T5,T7,_MM_SHUFFLE(1,0,1,0));
+ __m256 S7 = _mm256_shuffle_ps(T5,T7,_MM_SHUFFLE(3,2,3,2));
+ kernel.packet[0] = _mm256_permute2f128_ps(S0, S4, 0x20);
+ kernel.packet[1] = _mm256_permute2f128_ps(S1, S5, 0x20);
+ kernel.packet[2] = _mm256_permute2f128_ps(S2, S6, 0x20);
+ kernel.packet[3] = _mm256_permute2f128_ps(S3, S7, 0x20);
+ kernel.packet[4] = _mm256_permute2f128_ps(S0, S4, 0x31);
+ kernel.packet[5] = _mm256_permute2f128_ps(S1, S5, 0x31);
+ kernel.packet[6] = _mm256_permute2f128_ps(S2, S6, 0x31);
+ kernel.packet[7] = _mm256_permute2f128_ps(S3, S7, 0x31);
+}
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet8f,4>& kernel) {
+ __m256 T0 = _mm256_unpacklo_ps(kernel.packet[0], kernel.packet[1]);
+ __m256 T1 = _mm256_unpackhi_ps(kernel.packet[0], kernel.packet[1]);
+ __m256 T2 = _mm256_unpacklo_ps(kernel.packet[2], kernel.packet[3]);
+ __m256 T3 = _mm256_unpackhi_ps(kernel.packet[2], kernel.packet[3]);
+ __m256 S0 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(1,0,1,0));
+ __m256 S1 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(3,2,3,2));
+ __m256 S2 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(1,0,1,0));
+ __m256 S3 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(3,2,3,2));
+ kernel.packet[0] = _mm256_permute2f128_ps(S0, S1, 0x20);
+ kernel.packet[1] = _mm256_permute2f128_ps(S2, S3, 0x20);
+ kernel.packet[2] = _mm256_permute2f128_ps(S0, S1, 0x31);
+ kernel.packet[3] = _mm256_permute2f128_ps(S2, S3, 0x31);
+}
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4d,4>& kernel) {
+ __m256d T0 = _mm256_shuffle_pd(kernel.packet[0], kernel.packet[1], 15);
+ __m256d T1 = _mm256_shuffle_pd(kernel.packet[0], kernel.packet[1], 0);
+ __m256d T2 = _mm256_shuffle_pd(kernel.packet[2], kernel.packet[3], 15);
+ __m256d T3 = _mm256_shuffle_pd(kernel.packet[2], kernel.packet[3], 0);
+ kernel.packet[1] = _mm256_permute2f128_pd(T0, T2, 32);
+ kernel.packet[3] = _mm256_permute2f128_pd(T0, T2, 49);
+ kernel.packet[0] = _mm256_permute2f128_pd(T1, T3, 32);
+ kernel.packet[2] = _mm256_permute2f128_pd(T1, T3, 49);
+}
+template<> EIGEN_STRONG_INLINE Packet8f pblend(const Selector<8>& ifPacket, const Packet8f& thenPacket, const Packet8f& elsePacket) {
+ const __m256 zero = _mm256_setzero_ps();
+ const __m256 select = _mm256_set_ps(ifPacket.select[7], ifPacket.select[6], ifPacket.select[5], ifPacket.select[4], ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
+ __m256 false_mask = _mm256_cmp_ps(select, zero, _CMP_EQ_UQ);
+ return _mm256_blendv_ps(thenPacket, elsePacket, false_mask);
+}
+template<> EIGEN_STRONG_INLINE Packet4d pblend(const Selector<4>& ifPacket, const Packet4d& thenPacket, const Packet4d& elsePacket) {
+ const __m256d zero = _mm256_setzero_pd();
+ const __m256d select = _mm256_set_pd(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
+ __m256d false_mask = _mm256_cmp_pd(select, zero, _CMP_EQ_UQ);
+ return _mm256_blendv_pd(thenPacket, elsePacket, false_mask);
+}
+template<> EIGEN_STRONG_INLINE Packet8f pinsertfirst(const Packet8f& a, float b)
+{
+ return _mm256_blend_ps(a,pset1<Packet8f>(b),1);
+}
+template<> EIGEN_STRONG_INLINE Packet4d pinsertfirst(const Packet4d& a, double b)
+{
+ return _mm256_blend_pd(a,pset1<Packet4d>(b),1);
+}
+template<> EIGEN_STRONG_INLINE Packet8f pinsertlast(const Packet8f& a, float b)
+{
+ return _mm256_blend_ps(a,pset1<Packet8f>(b),(1<<7));
+}
+template<> EIGEN_STRONG_INLINE Packet4d pinsertlast(const Packet4d& a, double b)
+{
+ return _mm256_blend_pd(a,pset1<Packet4d>(b),(1<<3));
+}
+}
+}
+#endif
+// end #include "src/Core/arch/AVX/PacketMath.h"
+// #include "src/Core/arch/AVX512/PacketMath.h"
+#ifndef EIGEN_PACKET_MATH_AVX512_H
+#define EIGEN_PACKET_MATH_AVX512_H
+namespace Eigen {
+namespace internal {
+#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
+#endif
+#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
+#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
+#endif
+#ifdef __FMA__
+#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#endif
+#endif
+typedef __m512 Packet16f;
+typedef __m512i Packet16i;
+typedef __m512d Packet8d;
+template <>
+struct is_arithmetic<__m512> {
+ enum { value = true };
+};
+template <>
+struct is_arithmetic<__m512i> {
+ enum { value = true };
+};
+template <>
+struct is_arithmetic<__m512d> {
+ enum { value = true };
+};
+template<> struct packet_traits<float> : default_packet_traits
+{
+ typedef Packet16f type;
+ typedef Packet8f half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size = 16,
+ HasHalfPacket = 1,
+#if EIGEN_GNUC_AT_LEAST(5, 3)
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+ HasLog = 1,
+#endif
+ HasExp = 1,
+ HasSqrt = 1,
+ HasRsqrt = 1,
+#endif
+ HasDiv = 1
+ };
+ };
+template<> struct packet_traits<double> : default_packet_traits
+{
+ typedef Packet8d type;
+ typedef Packet4d half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size = 8,
+ HasHalfPacket = 1,
+#if EIGEN_GNUC_AT_LEAST(5, 3)
+ HasSqrt = 1,
+ HasRsqrt = EIGEN_FAST_MATH,
+#endif
+ HasDiv = 1
+ };
+};
+template <>
+struct unpacket_traits<Packet16f> {
+ typedef float type;
+ typedef Packet8f half;
+ enum { size = 16, alignment=Aligned64 };
+};
+template <>
+struct unpacket_traits<Packet8d> {
+ typedef double type;
+ typedef Packet4d half;
+ enum { size = 8, alignment=Aligned64 };
+};
+template <>
+struct unpacket_traits<Packet16i> {
+ typedef int type;
+ typedef Packet8i half;
+ enum { size = 16, alignment=Aligned64 };
+};
+template <>
+EIGEN_STRONG_INLINE Packet16f pset1<Packet16f>(const float& from) {
+ return _mm512_set1_ps(from);
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d pset1<Packet8d>(const double& from) {
+ return _mm512_set1_pd(from);
+}
+template <>
+EIGEN_STRONG_INLINE Packet16i pset1<Packet16i>(const int& from) {
+ return _mm512_set1_epi32(from);
+}
+template <>
+EIGEN_STRONG_INLINE Packet16f pload1<Packet16f>(const float* from) {
+ return _mm512_broadcastss_ps(_mm_load_ps1(from));
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d pload1<Packet8d>(const double* from) {
+ return _mm512_broadcastsd_pd(_mm_load_pd1(from));
+}
+template <>
+EIGEN_STRONG_INLINE Packet16f plset<Packet16f>(const float& a) {
+ return _mm512_add_ps(
+ _mm512_set1_ps(a),
+ _mm512_set_ps(15.0f, 14.0f, 13.0f, 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f,
+ 4.0f, 3.0f, 2.0f, 1.0f, 0.0f));
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d plset<Packet8d>(const double& a) {
+ return _mm512_add_pd(_mm512_set1_pd(a),
+ _mm512_set_pd(7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, 0.0));
+}
+template <>
+EIGEN_STRONG_INLINE Packet16f padd<Packet16f>(const Packet16f& a,
+ const Packet16f& b) {
+ return _mm512_add_ps(a, b);
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d padd<Packet8d>(const Packet8d& a,
+ const Packet8d& b) {
+ return _mm512_add_pd(a, b);
+}
+template <>
+EIGEN_STRONG_INLINE Packet16f psub<Packet16f>(const Packet16f& a,
+ const Packet16f& b) {
+ return _mm512_sub_ps(a, b);
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d psub<Packet8d>(const Packet8d& a,
+ const Packet8d& b) {
+ return _mm512_sub_pd(a, b);
+}
+template <>
+EIGEN_STRONG_INLINE Packet16f pnegate(const Packet16f& a) {
+ return _mm512_sub_ps(_mm512_set1_ps(0.0), a);
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d pnegate(const Packet8d& a) {
+ return _mm512_sub_pd(_mm512_set1_pd(0.0), a);
+}
+template <>
+EIGEN_STRONG_INLINE Packet16f pconj(const Packet16f& a) {
+ return a;
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d pconj(const Packet8d& a) {
+ return a;
+}
+template <>
+EIGEN_STRONG_INLINE Packet16i pconj(const Packet16i& a) {
+ return a;
+}
+template <>
+EIGEN_STRONG_INLINE Packet16f pmul<Packet16f>(const Packet16f& a,
+ const Packet16f& b) {
+ return _mm512_mul_ps(a, b);
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d pmul<Packet8d>(const Packet8d& a,
+ const Packet8d& b) {
+ return _mm512_mul_pd(a, b);
+}
+template <>
+EIGEN_STRONG_INLINE Packet16f pdiv<Packet16f>(const Packet16f& a,
+ const Packet16f& b) {
+ return _mm512_div_ps(a, b);
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d pdiv<Packet8d>(const Packet8d& a,
+ const Packet8d& b) {
+ return _mm512_div_pd(a, b);
+}
+#ifdef __FMA__
+template <>
+EIGEN_STRONG_INLINE Packet16f pmadd(const Packet16f& a, const Packet16f& b,
+ const Packet16f& c) {
+ return _mm512_fmadd_ps(a, b, c);
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d pmadd(const Packet8d& a, const Packet8d& b,
+ const Packet8d& c) {
+ return _mm512_fmadd_pd(a, b, c);
+}
+#endif
+template <>
+EIGEN_STRONG_INLINE Packet16f pmin<Packet16f>(const Packet16f& a,
+ const Packet16f& b) {
+ return _mm512_min_ps(a, b);
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d pmin<Packet8d>(const Packet8d& a,
+ const Packet8d& b) {
+ return _mm512_min_pd(a, b);
+}
+template <>
+EIGEN_STRONG_INLINE Packet16f pmax<Packet16f>(const Packet16f& a,
+ const Packet16f& b) {
+ return _mm512_max_ps(a, b);
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d pmax<Packet8d>(const Packet8d& a,
+ const Packet8d& b) {
+ return _mm512_max_pd(a, b);
+}
+template <>
+EIGEN_STRONG_INLINE Packet16f pand<Packet16f>(const Packet16f& a,
+ const Packet16f& b) {
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+ return _mm512_and_ps(a, b);
+#else
+ Packet16f res = _mm512_undefined_ps();
+ Packet4f lane0_a = _mm512_extractf32x4_ps(a, 0);
+ Packet4f lane0_b = _mm512_extractf32x4_ps(b, 0);
+ res = _mm512_insertf32x4(res, _mm_and_ps(lane0_a, lane0_b), 0);
+ Packet4f lane1_a = _mm512_extractf32x4_ps(a, 1);
+ Packet4f lane1_b = _mm512_extractf32x4_ps(b, 1);
+ res = _mm512_insertf32x4(res, _mm_and_ps(lane1_a, lane1_b), 1);
+ Packet4f lane2_a = _mm512_extractf32x4_ps(a, 2);
+ Packet4f lane2_b = _mm512_extractf32x4_ps(b, 2);
+ res = _mm512_insertf32x4(res, _mm_and_ps(lane2_a, lane2_b), 2);
+ Packet4f lane3_a = _mm512_extractf32x4_ps(a, 3);
+ Packet4f lane3_b = _mm512_extractf32x4_ps(b, 3);
+ res = _mm512_insertf32x4(res, _mm_and_ps(lane3_a, lane3_b), 3);
+ return res;
+#endif
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d pand<Packet8d>(const Packet8d& a,
+ const Packet8d& b) {
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+ return _mm512_and_pd(a, b);
+#else
+ Packet8d res = _mm512_undefined_pd();
+ Packet4d lane0_a = _mm512_extractf64x4_pd(a, 0);
+ Packet4d lane0_b = _mm512_extractf64x4_pd(b, 0);
+ res = _mm512_insertf64x4(res, _mm256_and_pd(lane0_a, lane0_b), 0);
+ Packet4d lane1_a = _mm512_extractf64x4_pd(a, 1);
+ Packet4d lane1_b = _mm512_extractf64x4_pd(b, 1);
+ res = _mm512_insertf64x4(res, _mm256_and_pd(lane1_a, lane1_b), 1);
+ return res;
+#endif
+}
+template <>
+EIGEN_STRONG_INLINE Packet16f por<Packet16f>(const Packet16f& a,
+ const Packet16f& b) {
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+ return _mm512_or_ps(a, b);
+#else
+ Packet16f res = _mm512_undefined_ps();
+ Packet4f lane0_a = _mm512_extractf32x4_ps(a, 0);
+ Packet4f lane0_b = _mm512_extractf32x4_ps(b, 0);
+ res = _mm512_insertf32x4(res, _mm_or_ps(lane0_a, lane0_b), 0);
+ Packet4f lane1_a = _mm512_extractf32x4_ps(a, 1);
+ Packet4f lane1_b = _mm512_extractf32x4_ps(b, 1);
+ res = _mm512_insertf32x4(res, _mm_or_ps(lane1_a, lane1_b), 1);
+ Packet4f lane2_a = _mm512_extractf32x4_ps(a, 2);
+ Packet4f lane2_b = _mm512_extractf32x4_ps(b, 2);
+ res = _mm512_insertf32x4(res, _mm_or_ps(lane2_a, lane2_b), 2);
+ Packet4f lane3_a = _mm512_extractf32x4_ps(a, 3);
+ Packet4f lane3_b = _mm512_extractf32x4_ps(b, 3);
+ res = _mm512_insertf32x4(res, _mm_or_ps(lane3_a, lane3_b), 3);
+ return res;
+#endif
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d por<Packet8d>(const Packet8d& a,
+ const Packet8d& b) {
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+ return _mm512_or_pd(a, b);
+#else
+ Packet8d res = _mm512_undefined_pd();
+ Packet4d lane0_a = _mm512_extractf64x4_pd(a, 0);
+ Packet4d lane0_b = _mm512_extractf64x4_pd(b, 0);
+ res = _mm512_insertf64x4(res, _mm256_or_pd(lane0_a, lane0_b), 0);
+ Packet4d lane1_a = _mm512_extractf64x4_pd(a, 1);
+ Packet4d lane1_b = _mm512_extractf64x4_pd(b, 1);
+ res = _mm512_insertf64x4(res, _mm256_or_pd(lane1_a, lane1_b), 1);
+ return res;
+#endif
+}
+template <>
+EIGEN_STRONG_INLINE Packet16f pxor<Packet16f>(const Packet16f& a,
+ const Packet16f& b) {
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+ return _mm512_xor_ps(a, b);
+#else
+ Packet16f res = _mm512_undefined_ps();
+ Packet4f lane0_a = _mm512_extractf32x4_ps(a, 0);
+ Packet4f lane0_b = _mm512_extractf32x4_ps(b, 0);
+ res = _mm512_insertf32x4(res, _mm_xor_ps(lane0_a, lane0_b), 0);
+ Packet4f lane1_a = _mm512_extractf32x4_ps(a, 1);
+ Packet4f lane1_b = _mm512_extractf32x4_ps(b, 1);
+ res = _mm512_insertf32x4(res, _mm_xor_ps(lane1_a, lane1_b), 1);
+ Packet4f lane2_a = _mm512_extractf32x4_ps(a, 2);
+ Packet4f lane2_b = _mm512_extractf32x4_ps(b, 2);
+ res = _mm512_insertf32x4(res, _mm_xor_ps(lane2_a, lane2_b), 2);
+ Packet4f lane3_a = _mm512_extractf32x4_ps(a, 3);
+ Packet4f lane3_b = _mm512_extractf32x4_ps(b, 3);
+ res = _mm512_insertf32x4(res, _mm_xor_ps(lane3_a, lane3_b), 3);
+ return res;
+#endif
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d pxor<Packet8d>(const Packet8d& a,
+ const Packet8d& b) {
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+ return _mm512_xor_pd(a, b);
+#else
+ Packet8d res = _mm512_undefined_pd();
+ Packet4d lane0_a = _mm512_extractf64x4_pd(a, 0);
+ Packet4d lane0_b = _mm512_extractf64x4_pd(b, 0);
+ res = _mm512_insertf64x4(res, _mm256_xor_pd(lane0_a, lane0_b), 0);
+ Packet4d lane1_a = _mm512_extractf64x4_pd(a, 1);
+ Packet4d lane1_b = _mm512_extractf64x4_pd(b, 1);
+ res = _mm512_insertf64x4(res, _mm256_xor_pd(lane1_a, lane1_b), 1);
+ return res;
+#endif
+}
+template <>
+EIGEN_STRONG_INLINE Packet16f pandnot<Packet16f>(const Packet16f& a,
+ const Packet16f& b) {
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+ return _mm512_andnot_ps(a, b);
+#else
+ Packet16f res = _mm512_undefined_ps();
+ Packet4f lane0_a = _mm512_extractf32x4_ps(a, 0);
+ Packet4f lane0_b = _mm512_extractf32x4_ps(b, 0);
+ res = _mm512_insertf32x4(res, _mm_andnot_ps(lane0_a, lane0_b), 0);
+ Packet4f lane1_a = _mm512_extractf32x4_ps(a, 1);
+ Packet4f lane1_b = _mm512_extractf32x4_ps(b, 1);
+ res = _mm512_insertf32x4(res, _mm_andnot_ps(lane1_a, lane1_b), 1);
+ Packet4f lane2_a = _mm512_extractf32x4_ps(a, 2);
+ Packet4f lane2_b = _mm512_extractf32x4_ps(b, 2);
+ res = _mm512_insertf32x4(res, _mm_andnot_ps(lane2_a, lane2_b), 2);
+ Packet4f lane3_a = _mm512_extractf32x4_ps(a, 3);
+ Packet4f lane3_b = _mm512_extractf32x4_ps(b, 3);
+ res = _mm512_insertf32x4(res, _mm_andnot_ps(lane3_a, lane3_b), 3);
+ return res;
+#endif
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d pandnot<Packet8d>(const Packet8d& a,
+ const Packet8d& b) {
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+ return _mm512_andnot_pd(a, b);
+#else
+ Packet8d res = _mm512_undefined_pd();
+ Packet4d lane0_a = _mm512_extractf64x4_pd(a, 0);
+ Packet4d lane0_b = _mm512_extractf64x4_pd(b, 0);
+ res = _mm512_insertf64x4(res, _mm256_andnot_pd(lane0_a, lane0_b), 0);
+ Packet4d lane1_a = _mm512_extractf64x4_pd(a, 1);
+ Packet4d lane1_b = _mm512_extractf64x4_pd(b, 1);
+ res = _mm512_insertf64x4(res, _mm256_andnot_pd(lane1_a, lane1_b), 1);
+ return res;
+#endif
+}
+template <>
+EIGEN_STRONG_INLINE Packet16f pload<Packet16f>(const float* from) {
+ EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_ps(from);
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d pload<Packet8d>(const double* from) {
+ EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_pd(from);
+}
+template <>
+EIGEN_STRONG_INLINE Packet16i pload<Packet16i>(const int* from) {
+ EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_si512(
+ reinterpret_cast<const __m512i*>(from));
+}
+template <>
+EIGEN_STRONG_INLINE Packet16f ploadu<Packet16f>(const float* from) {
+ EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_ps(from);
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d ploadu<Packet8d>(const double* from) {
+ EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_pd(from);
+}
+template <>
+EIGEN_STRONG_INLINE Packet16i ploadu<Packet16i>(const int* from) {
+ EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_si512(
+ reinterpret_cast<const __m512i*>(from));
+}
+template <>
+EIGEN_STRONG_INLINE Packet16f ploaddup<Packet16f>(const float* from) {
+ Packet8f lane0 = _mm256_broadcast_ps((const __m128*)(const void*)from);
+ lane0 = _mm256_blend_ps(
+ lane0, _mm256_castps128_ps256(_mm_permute_ps(
+ _mm256_castps256_ps128(lane0), _MM_SHUFFLE(1, 0, 1, 0))),
+ 15);
+ lane0 = _mm256_permute_ps(lane0, _MM_SHUFFLE(3, 3, 2, 2));
+ Packet8f lane1 = _mm256_broadcast_ps((const __m128*)(const void*)(from + 4));
+ lane1 = _mm256_blend_ps(
+ lane1, _mm256_castps128_ps256(_mm_permute_ps(
+ _mm256_castps256_ps128(lane1), _MM_SHUFFLE(1, 0, 1, 0))),
+ 15);
+ lane1 = _mm256_permute_ps(lane1, _MM_SHUFFLE(3, 3, 2, 2));
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+ Packet16f res = _mm512_undefined_ps();
+ return _mm512_insertf32x8(res, lane0, 0);
+ return _mm512_insertf32x8(res, lane1, 1);
+ return res;
+#else
+ Packet16f res = _mm512_undefined_ps();
+ res = _mm512_insertf32x4(res, _mm256_extractf128_ps(lane0, 0), 0);
+ res = _mm512_insertf32x4(res, _mm256_extractf128_ps(lane0, 1), 1);
+ res = _mm512_insertf32x4(res, _mm256_extractf128_ps(lane1, 0), 2);
+ res = _mm512_insertf32x4(res, _mm256_extractf128_ps(lane1, 1), 3);
+ return res;
+#endif
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d ploaddup<Packet8d>(const double* from) {
+ Packet4d lane0 = _mm256_broadcast_pd((const __m128d*)(const void*)from);
+ lane0 = _mm256_permute_pd(lane0, 3 << 2);
+ Packet4d lane1 = _mm256_broadcast_pd((const __m128d*)(const void*)(from + 2));
+ lane1 = _mm256_permute_pd(lane1, 3 << 2);
+ Packet8d res = _mm512_undefined_pd();
+ res = _mm512_insertf64x4(res, lane0, 0);
+ return _mm512_insertf64x4(res, lane1, 1);
+}
+template <>
+EIGEN_STRONG_INLINE Packet16f ploadquad<Packet16f>(const float* from) {
+ Packet16f tmp = _mm512_undefined_ps();
+ tmp = _mm512_insertf32x4(tmp, _mm_load_ps1(from), 0);
+ tmp = _mm512_insertf32x4(tmp, _mm_load_ps1(from + 1), 1);
+ tmp = _mm512_insertf32x4(tmp, _mm_load_ps1(from + 2), 2);
+ tmp = _mm512_insertf32x4(tmp, _mm_load_ps1(from + 3), 3);
+ return tmp;
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d ploadquad<Packet8d>(const double* from) {
+ Packet8d tmp = _mm512_undefined_pd();
+ Packet2d tmp0 = _mm_load_pd1(from);
+ Packet2d tmp1 = _mm_load_pd1(from + 1);
+ Packet4d lane0 = _mm256_broadcastsd_pd(tmp0);
+ Packet4d lane1 = _mm256_broadcastsd_pd(tmp1);
+ tmp = _mm512_insertf64x4(tmp, lane0, 0);
+ return _mm512_insertf64x4(tmp, lane1, 1);
+}
+template <>
+EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet16f& from) {
+ EIGEN_DEBUG_ALIGNED_STORE _mm512_store_ps(to, from);
+}
+template <>
+EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet8d& from) {
+ EIGEN_DEBUG_ALIGNED_STORE _mm512_store_pd(to, from);
+}
+template <>
+EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet16i& from) {
+ EIGEN_DEBUG_ALIGNED_STORE _mm512_storeu_si512(reinterpret_cast<__m512i*>(to),
+ from);
+}
+template <>
+EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet16f& from) {
+ EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_ps(to, from);
+}
+template <>
+EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet8d& from) {
+ EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_pd(to, from);
+}
+template <>
+EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet16i& from) {
+ EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_si512(
+ reinterpret_cast<__m512i*>(to), from);
+}
+template <>
+EIGEN_DEVICE_FUNC inline Packet16f pgather<float, Packet16f>(const float* from,
+ Index stride) {
+ Packet16i stride_vector = _mm512_set1_epi32(stride);
+ Packet16i stride_multiplier =
+ _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ Packet16i indices = _mm512_mullo_epi32(stride_vector, stride_multiplier);
+ return _mm512_i32gather_ps(indices, from, 4);
+}
+template <>
+EIGEN_DEVICE_FUNC inline Packet8d pgather<double, Packet8d>(const double* from,
+ Index stride) {
+ Packet8i stride_vector = _mm256_set1_epi32(stride);
+ Packet8i stride_multiplier = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
+ Packet8i indices = _mm256_mullo_epi32(stride_vector, stride_multiplier);
+ return _mm512_i32gather_pd(indices, from, 8);
+}
+template <>
+EIGEN_DEVICE_FUNC inline void pscatter<float, Packet16f>(float* to,
+ const Packet16f& from,
+ Index stride) {
+ Packet16i stride_vector = _mm512_set1_epi32(stride);
+ Packet16i stride_multiplier =
+ _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ Packet16i indices = _mm512_mullo_epi32(stride_vector, stride_multiplier);
+ _mm512_i32scatter_ps(to, indices, from, 4);
+}
+template <>
+EIGEN_DEVICE_FUNC inline void pscatter<double, Packet8d>(double* to,
+ const Packet8d& from,
+ Index stride) {
+ Packet8i stride_vector = _mm256_set1_epi32(stride);
+ Packet8i stride_multiplier = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
+ Packet8i indices = _mm256_mullo_epi32(stride_vector, stride_multiplier);
+ _mm512_i32scatter_pd(to, indices, from, 8);
+}
+template <>
+EIGEN_STRONG_INLINE void pstore1<Packet16f>(float* to, const float& a) {
+ Packet16f pa = pset1<Packet16f>(a);
+ pstore(to, pa);
+}
+template <>
+EIGEN_STRONG_INLINE void pstore1<Packet8d>(double* to, const double& a) {
+ Packet8d pa = pset1<Packet8d>(a);
+ pstore(to, pa);
+}
+template <>
+EIGEN_STRONG_INLINE void pstore1<Packet16i>(int* to, const int& a) {
+ Packet16i pa = pset1<Packet16i>(a);
+ pstore(to, pa);
+}
+template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template <>
+EIGEN_STRONG_INLINE float pfirst<Packet16f>(const Packet16f& a) {
+ return _mm_cvtss_f32(_mm512_extractf32x4_ps(a, 0));
+}
+template <>
+EIGEN_STRONG_INLINE double pfirst<Packet8d>(const Packet8d& a) {
+ return _mm_cvtsd_f64(_mm256_extractf128_pd(_mm512_extractf64x4_pd(a, 0), 0));
+}
+template <>
+EIGEN_STRONG_INLINE int pfirst<Packet16i>(const Packet16i& a) {
+ return _mm_extract_epi32(_mm512_extracti32x4_epi32(a, 0), 0);
+}
+template<> EIGEN_STRONG_INLINE Packet16f preverse(const Packet16f& a)
+{
+ return _mm512_permutexvar_ps(_mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15), a);
+}
+template<> EIGEN_STRONG_INLINE Packet8d preverse(const Packet8d& a)
+{
+ return _mm512_permutexvar_pd(_mm512_set_epi32(0, 0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7), a);
+}
+template<> EIGEN_STRONG_INLINE Packet16f pabs(const Packet16f& a)
+{
+ return (__m512)_mm512_and_si512((__m512i)a, _mm512_set1_epi32(0x7fffffff));
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d pabs(const Packet8d& a) {
+ return (__m512d)_mm512_and_si512((__m512i)a,
+ _mm512_set1_epi64(0x7fffffffffffffff));
+}
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+#define EIGEN_EXTRACT_8f_FROM_16f(INPUT, OUTPUT) \
+ __m256 OUTPUT##_0 = _mm512_extractf32x8_ps(INPUT, 0) __m256 OUTPUT##_1 = \
+ _mm512_extractf32x8_ps(INPUT, 1)
+#else
+#define EIGEN_EXTRACT_8f_FROM_16f(INPUT, OUTPUT) \
+ __m256 OUTPUT##_0 = _mm256_insertf128_ps( \
+ _mm256_castps128_ps256(_mm512_extractf32x4_ps(INPUT, 0)), \
+ _mm512_extractf32x4_ps(INPUT, 1), 1); \
+ __m256 OUTPUT##_1 = _mm256_insertf128_ps( \
+ _mm256_castps128_ps256(_mm512_extractf32x4_ps(INPUT, 2)), \
+ _mm512_extractf32x4_ps(INPUT, 3), 1);
+#endif
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+#define EIGEN_INSERT_8f_INTO_16f(OUTPUT, INPUTA, INPUTB) \
+ OUTPUT = _mm512_insertf32x8(OUTPUT, INPUTA, 0); \
+ OUTPUT = _mm512_insertf32x8(OUTPUT, INPUTB, 1);
+#else
+#define EIGEN_INSERT_8f_INTO_16f(OUTPUT, INPUTA, INPUTB) \
+ OUTPUT = _mm512_insertf32x4(OUTPUT, _mm256_extractf128_ps(INPUTA, 0), 0); \
+ OUTPUT = _mm512_insertf32x4(OUTPUT, _mm256_extractf128_ps(INPUTA, 1), 1); \
+ OUTPUT = _mm512_insertf32x4(OUTPUT, _mm256_extractf128_ps(INPUTB, 0), 2); \
+ OUTPUT = _mm512_insertf32x4(OUTPUT, _mm256_extractf128_ps(INPUTB, 1), 3);
+#endif
+template<> EIGEN_STRONG_INLINE Packet16f preduxp<Packet16f>(const Packet16f*
+vecs)
+{
+ EIGEN_EXTRACT_8f_FROM_16f(vecs[0], vecs0);
+ EIGEN_EXTRACT_8f_FROM_16f(vecs[1], vecs1);
+ EIGEN_EXTRACT_8f_FROM_16f(vecs[2], vecs2);
+ EIGEN_EXTRACT_8f_FROM_16f(vecs[3], vecs3);
+ EIGEN_EXTRACT_8f_FROM_16f(vecs[4], vecs4);
+ EIGEN_EXTRACT_8f_FROM_16f(vecs[5], vecs5);
+ EIGEN_EXTRACT_8f_FROM_16f(vecs[6], vecs6);
+ EIGEN_EXTRACT_8f_FROM_16f(vecs[7], vecs7);
+ EIGEN_EXTRACT_8f_FROM_16f(vecs[8], vecs8);
+ EIGEN_EXTRACT_8f_FROM_16f(vecs[9], vecs9);
+ EIGEN_EXTRACT_8f_FROM_16f(vecs[10], vecs10);
+ EIGEN_EXTRACT_8f_FROM_16f(vecs[11], vecs11);
+ EIGEN_EXTRACT_8f_FROM_16f(vecs[12], vecs12);
+ EIGEN_EXTRACT_8f_FROM_16f(vecs[13], vecs13);
+ EIGEN_EXTRACT_8f_FROM_16f(vecs[14], vecs14);
+ EIGEN_EXTRACT_8f_FROM_16f(vecs[15], vecs15);
+ __m256 hsum1 = _mm256_hadd_ps(vecs0_0, vecs1_0);
+ __m256 hsum2 = _mm256_hadd_ps(vecs2_0, vecs3_0);
+ __m256 hsum3 = _mm256_hadd_ps(vecs4_0, vecs5_0);
+ __m256 hsum4 = _mm256_hadd_ps(vecs6_0, vecs7_0);
+ __m256 hsum5 = _mm256_hadd_ps(hsum1, hsum1);
+ __m256 hsum6 = _mm256_hadd_ps(hsum2, hsum2);
+ __m256 hsum7 = _mm256_hadd_ps(hsum3, hsum3);
+ __m256 hsum8 = _mm256_hadd_ps(hsum4, hsum4);
+ __m256 perm1 = _mm256_permute2f128_ps(hsum5, hsum5, 0x23);
+ __m256 perm2 = _mm256_permute2f128_ps(hsum6, hsum6, 0x23);
+ __m256 perm3 = _mm256_permute2f128_ps(hsum7, hsum7, 0x23);
+ __m256 perm4 = _mm256_permute2f128_ps(hsum8, hsum8, 0x23);
+ __m256 sum1 = _mm256_add_ps(perm1, hsum5);
+ __m256 sum2 = _mm256_add_ps(perm2, hsum6);
+ __m256 sum3 = _mm256_add_ps(perm3, hsum7);
+ __m256 sum4 = _mm256_add_ps(perm4, hsum8);
+ __m256 blend1 = _mm256_blend_ps(sum1, sum2, 0xcc);
+ __m256 blend2 = _mm256_blend_ps(sum3, sum4, 0xcc);
+ __m256 final = _mm256_blend_ps(blend1, blend2, 0xf0);
+ hsum1 = _mm256_hadd_ps(vecs0_1, vecs1_1);
+ hsum2 = _mm256_hadd_ps(vecs2_1, vecs3_1);
+ hsum3 = _mm256_hadd_ps(vecs4_1, vecs5_1);
+ hsum4 = _mm256_hadd_ps(vecs6_1, vecs7_1);
+ hsum5 = _mm256_hadd_ps(hsum1, hsum1);
+ hsum6 = _mm256_hadd_ps(hsum2, hsum2);
+ hsum7 = _mm256_hadd_ps(hsum3, hsum3);
+ hsum8 = _mm256_hadd_ps(hsum4, hsum4);
+ perm1 = _mm256_permute2f128_ps(hsum5, hsum5, 0x23);
+ perm2 = _mm256_permute2f128_ps(hsum6, hsum6, 0x23);
+ perm3 = _mm256_permute2f128_ps(hsum7, hsum7, 0x23);
+ perm4 = _mm256_permute2f128_ps(hsum8, hsum8, 0x23);
+ sum1 = _mm256_add_ps(perm1, hsum5);
+ sum2 = _mm256_add_ps(perm2, hsum6);
+ sum3 = _mm256_add_ps(perm3, hsum7);
+ sum4 = _mm256_add_ps(perm4, hsum8);
+ blend1 = _mm256_blend_ps(sum1, sum2, 0xcc);
+ blend2 = _mm256_blend_ps(sum3, sum4, 0xcc);
+ final = padd(final, _mm256_blend_ps(blend1, blend2, 0xf0));
+ hsum1 = _mm256_hadd_ps(vecs8_0, vecs9_0);
+ hsum2 = _mm256_hadd_ps(vecs10_0, vecs11_0);
+ hsum3 = _mm256_hadd_ps(vecs12_0, vecs13_0);
+ hsum4 = _mm256_hadd_ps(vecs14_0, vecs15_0);
+ hsum5 = _mm256_hadd_ps(hsum1, hsum1);
+ hsum6 = _mm256_hadd_ps(hsum2, hsum2);
+ hsum7 = _mm256_hadd_ps(hsum3, hsum3);
+ hsum8 = _mm256_hadd_ps(hsum4, hsum4);
+ perm1 = _mm256_permute2f128_ps(hsum5, hsum5, 0x23);
+ perm2 = _mm256_permute2f128_ps(hsum6, hsum6, 0x23);
+ perm3 = _mm256_permute2f128_ps(hsum7, hsum7, 0x23);
+ perm4 = _mm256_permute2f128_ps(hsum8, hsum8, 0x23);
+ sum1 = _mm256_add_ps(perm1, hsum5);
+ sum2 = _mm256_add_ps(perm2, hsum6);
+ sum3 = _mm256_add_ps(perm3, hsum7);
+ sum4 = _mm256_add_ps(perm4, hsum8);
+ blend1 = _mm256_blend_ps(sum1, sum2, 0xcc);
+ blend2 = _mm256_blend_ps(sum3, sum4, 0xcc);
+ __m256 final_1 = _mm256_blend_ps(blend1, blend2, 0xf0);
+ hsum1 = _mm256_hadd_ps(vecs8_1, vecs9_1);
+ hsum2 = _mm256_hadd_ps(vecs10_1, vecs11_1);
+ hsum3 = _mm256_hadd_ps(vecs12_1, vecs13_1);
+ hsum4 = _mm256_hadd_ps(vecs14_1, vecs15_1);
+ hsum5 = _mm256_hadd_ps(hsum1, hsum1);
+ hsum6 = _mm256_hadd_ps(hsum2, hsum2);
+ hsum7 = _mm256_hadd_ps(hsum3, hsum3);
+ hsum8 = _mm256_hadd_ps(hsum4, hsum4);
+ perm1 = _mm256_permute2f128_ps(hsum5, hsum5, 0x23);
+ perm2 = _mm256_permute2f128_ps(hsum6, hsum6, 0x23);
+ perm3 = _mm256_permute2f128_ps(hsum7, hsum7, 0x23);
+ perm4 = _mm256_permute2f128_ps(hsum8, hsum8, 0x23);
+ sum1 = _mm256_add_ps(perm1, hsum5);
+ sum2 = _mm256_add_ps(perm2, hsum6);
+ sum3 = _mm256_add_ps(perm3, hsum7);
+ sum4 = _mm256_add_ps(perm4, hsum8);
+ blend1 = _mm256_blend_ps(sum1, sum2, 0xcc);
+ blend2 = _mm256_blend_ps(sum3, sum4, 0xcc);
+ final_1 = padd(final_1, _mm256_blend_ps(blend1, blend2, 0xf0));
+ __m512 final_output;
+ EIGEN_INSERT_8f_INTO_16f(final_output, final, final_1);
+ return final_output;
+}
+template<> EIGEN_STRONG_INLINE Packet8d preduxp<Packet8d>(const Packet8d* vecs)
+{
+ Packet4d vecs0_0 = _mm512_extractf64x4_pd(vecs[0], 0);
+ Packet4d vecs0_1 = _mm512_extractf64x4_pd(vecs[0], 1);
+ Packet4d vecs1_0 = _mm512_extractf64x4_pd(vecs[1], 0);
+ Packet4d vecs1_1 = _mm512_extractf64x4_pd(vecs[1], 1);
+ Packet4d vecs2_0 = _mm512_extractf64x4_pd(vecs[2], 0);
+ Packet4d vecs2_1 = _mm512_extractf64x4_pd(vecs[2], 1);
+ Packet4d vecs3_0 = _mm512_extractf64x4_pd(vecs[3], 0);
+ Packet4d vecs3_1 = _mm512_extractf64x4_pd(vecs[3], 1);
+ Packet4d vecs4_0 = _mm512_extractf64x4_pd(vecs[4], 0);
+ Packet4d vecs4_1 = _mm512_extractf64x4_pd(vecs[4], 1);
+ Packet4d vecs5_0 = _mm512_extractf64x4_pd(vecs[5], 0);
+ Packet4d vecs5_1 = _mm512_extractf64x4_pd(vecs[5], 1);
+ Packet4d vecs6_0 = _mm512_extractf64x4_pd(vecs[6], 0);
+ Packet4d vecs6_1 = _mm512_extractf64x4_pd(vecs[6], 1);
+ Packet4d vecs7_0 = _mm512_extractf64x4_pd(vecs[7], 0);
+ Packet4d vecs7_1 = _mm512_extractf64x4_pd(vecs[7], 1);
+ Packet4d tmp0, tmp1;
+ tmp0 = _mm256_hadd_pd(vecs0_0, vecs1_0);
+ tmp0 = _mm256_add_pd(tmp0, _mm256_permute2f128_pd(tmp0, tmp0, 1));
+ tmp1 = _mm256_hadd_pd(vecs2_0, vecs3_0);
+ tmp1 = _mm256_add_pd(tmp1, _mm256_permute2f128_pd(tmp1, tmp1, 1));
+ __m256d final_0 = _mm256_blend_pd(tmp0, tmp1, 0xC);
+ tmp0 = _mm256_hadd_pd(vecs0_1, vecs1_1);
+ tmp0 = _mm256_add_pd(tmp0, _mm256_permute2f128_pd(tmp0, tmp0, 1));
+ tmp1 = _mm256_hadd_pd(vecs2_1, vecs3_1);
+ tmp1 = _mm256_add_pd(tmp1, _mm256_permute2f128_pd(tmp1, tmp1, 1));
+ final_0 = padd(final_0, _mm256_blend_pd(tmp0, tmp1, 0xC));
+ tmp0 = _mm256_hadd_pd(vecs4_0, vecs5_0);
+ tmp0 = _mm256_add_pd(tmp0, _mm256_permute2f128_pd(tmp0, tmp0, 1));
+ tmp1 = _mm256_hadd_pd(vecs6_0, vecs7_0);
+ tmp1 = _mm256_add_pd(tmp1, _mm256_permute2f128_pd(tmp1, tmp1, 1));
+ __m256d final_1 = _mm256_blend_pd(tmp0, tmp1, 0xC);
+ tmp0 = _mm256_hadd_pd(vecs4_1, vecs5_1);
+ tmp0 = _mm256_add_pd(tmp0, _mm256_permute2f128_pd(tmp0, tmp0, 1));
+ tmp1 = _mm256_hadd_pd(vecs6_1, vecs7_1);
+ tmp1 = _mm256_add_pd(tmp1, _mm256_permute2f128_pd(tmp1, tmp1, 1));
+ final_1 = padd(final_1, _mm256_blend_pd(tmp0, tmp1, 0xC));
+ __m512d final_output = _mm512_insertf64x4(final_output, final_0, 0);
+ return _mm512_insertf64x4(final_output, final_1, 1);
+}
+template <>
+EIGEN_STRONG_INLINE float predux<Packet16f>(const Packet16f& a) {
+#if 0
+ Packet8f lane0 = _mm512_extractf32x8_ps(a, 0);
+ Packet8f lane1 = _mm512_extractf32x8_ps(a, 1);
+ Packet8f sum = padd(lane0, lane1);
+ Packet8f tmp0 = _mm256_hadd_ps(sum, _mm256_permute2f128_ps(a, a, 1));
+ tmp0 = _mm256_hadd_ps(tmp0, tmp0);
+ return pfirst(_mm256_hadd_ps(tmp0, tmp0));
+#else
+ Packet4f lane0 = _mm512_extractf32x4_ps(a, 0);
+ Packet4f lane1 = _mm512_extractf32x4_ps(a, 1);
+ Packet4f lane2 = _mm512_extractf32x4_ps(a, 2);
+ Packet4f lane3 = _mm512_extractf32x4_ps(a, 3);
+ Packet4f sum = padd(padd(lane0, lane1), padd(lane2, lane3));
+ sum = _mm_hadd_ps(sum, sum);
+ sum = _mm_hadd_ps(sum, _mm_permute_ps(sum, 1));
+ return pfirst(sum);
+#endif
+}
+template <>
+EIGEN_STRONG_INLINE double predux<Packet8d>(const Packet8d& a) {
+ Packet4d lane0 = _mm512_extractf64x4_pd(a, 0);
+ Packet4d lane1 = _mm512_extractf64x4_pd(a, 1);
+ Packet4d sum = padd(lane0, lane1);
+ Packet4d tmp0 = _mm256_hadd_pd(sum, _mm256_permute2f128_pd(sum, sum, 1));
+ return pfirst(_mm256_hadd_pd(tmp0, tmp0));
+}
+template <>
+EIGEN_STRONG_INLINE Packet8f predux_downto4<Packet16f>(const Packet16f& a) {
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+ Packet8f lane0 = _mm512_extractf32x8_ps(a, 0);
+ Packet8f lane1 = _mm512_extractf32x8_ps(a, 1);
+ return padd(lane0, lane1);
+#else
+ Packet4f lane0 = _mm512_extractf32x4_ps(a, 0);
+ Packet4f lane1 = _mm512_extractf32x4_ps(a, 1);
+ Packet4f lane2 = _mm512_extractf32x4_ps(a, 2);
+ Packet4f lane3 = _mm512_extractf32x4_ps(a, 3);
+ Packet4f sum0 = padd(lane0, lane2);
+ Packet4f sum1 = padd(lane1, lane3);
+ return _mm256_insertf128_ps(_mm256_castps128_ps256(sum0), sum1, 1);
+#endif
+}
+template <>
+EIGEN_STRONG_INLINE Packet4d predux_downto4<Packet8d>(const Packet8d& a) {
+ Packet4d lane0 = _mm512_extractf64x4_pd(a, 0);
+ Packet4d lane1 = _mm512_extractf64x4_pd(a, 1);
+ Packet4d res = padd(lane0, lane1);
+ return res;
+}
+template <>
+EIGEN_STRONG_INLINE float predux_mul<Packet16f>(const Packet16f& a) {
+#if 0
+ Packet8f lane0 = _mm512_extractf32x8_ps(a, 0);
+ Packet8f lane1 = _mm512_extractf32x8_ps(a, 1);
+ Packet8f res = pmul(lane0, lane1);
+ res = pmul(res, _mm256_permute2f128_ps(res, res, 1));
+ res = pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2)));
+ return pfirst(pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1))));
+#else
+ Packet4f lane0 = _mm512_extractf32x4_ps(a, 0);
+ Packet4f lane1 = _mm512_extractf32x4_ps(a, 1);
+ Packet4f lane2 = _mm512_extractf32x4_ps(a, 2);
+ Packet4f lane3 = _mm512_extractf32x4_ps(a, 3);
+ Packet4f res = pmul(pmul(lane0, lane1), pmul(lane2, lane3));
+ res = pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2)));
+ return pfirst(pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1))));
+#endif
+}
+template <>
+EIGEN_STRONG_INLINE double predux_mul<Packet8d>(const Packet8d& a) {
+ Packet4d lane0 = _mm512_extractf64x4_pd(a, 0);
+ Packet4d lane1 = _mm512_extractf64x4_pd(a, 1);
+ Packet4d res = pmul(lane0, lane1);
+ res = pmul(res, _mm256_permute2f128_pd(res, res, 1));
+ return pfirst(pmul(res, _mm256_shuffle_pd(res, res, 1)));
+}
+template <>
+EIGEN_STRONG_INLINE float predux_min<Packet16f>(const Packet16f& a) {
+ Packet4f lane0 = _mm512_extractf32x4_ps(a, 0);
+ Packet4f lane1 = _mm512_extractf32x4_ps(a, 1);
+ Packet4f lane2 = _mm512_extractf32x4_ps(a, 2);
+ Packet4f lane3 = _mm512_extractf32x4_ps(a, 3);
+ Packet4f res = _mm_min_ps(_mm_min_ps(lane0, lane1), _mm_min_ps(lane2, lane3));
+ res = _mm_min_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2)));
+ return pfirst(_mm_min_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1))));
+}
+template <>
+EIGEN_STRONG_INLINE double predux_min<Packet8d>(const Packet8d& a) {
+ Packet4d lane0 = _mm512_extractf64x4_pd(a, 0);
+ Packet4d lane1 = _mm512_extractf64x4_pd(a, 1);
+ Packet4d res = _mm256_min_pd(lane0, lane1);
+ res = _mm256_min_pd(res, _mm256_permute2f128_pd(res, res, 1));
+ return pfirst(_mm256_min_pd(res, _mm256_shuffle_pd(res, res, 1)));
+}
+template <>
+EIGEN_STRONG_INLINE float predux_max<Packet16f>(const Packet16f& a) {
+ Packet4f lane0 = _mm512_extractf32x4_ps(a, 0);
+ Packet4f lane1 = _mm512_extractf32x4_ps(a, 1);
+ Packet4f lane2 = _mm512_extractf32x4_ps(a, 2);
+ Packet4f lane3 = _mm512_extractf32x4_ps(a, 3);
+ Packet4f res = _mm_max_ps(_mm_max_ps(lane0, lane1), _mm_max_ps(lane2, lane3));
+ res = _mm_max_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2)));
+ return pfirst(_mm_max_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1))));
+}
+template <>
+EIGEN_STRONG_INLINE double predux_max<Packet8d>(const Packet8d& a) {
+ Packet4d lane0 = _mm512_extractf64x4_pd(a, 0);
+ Packet4d lane1 = _mm512_extractf64x4_pd(a, 1);
+ Packet4d res = _mm256_max_pd(lane0, lane1);
+ res = _mm256_max_pd(res, _mm256_permute2f128_pd(res, res, 1));
+ return pfirst(_mm256_max_pd(res, _mm256_shuffle_pd(res, res, 1)));
+}
+template <int Offset>
+struct palign_impl<Offset, Packet16f> {
+ static EIGEN_STRONG_INLINE void run(Packet16f& first,
+ const Packet16f& second) {
+ if (Offset != 0) {
+ __m512i first_idx = _mm512_set_epi32(
+ Offset + 15, Offset + 14, Offset + 13, Offset + 12, Offset + 11,
+ Offset + 10, Offset + 9, Offset + 8, Offset + 7, Offset + 6,
+ Offset + 5, Offset + 4, Offset + 3, Offset + 2, Offset + 1, Offset);
+ __m512i second_idx =
+ _mm512_set_epi32(Offset - 1, Offset - 2, Offset - 3, Offset - 4,
+ Offset - 5, Offset - 6, Offset - 7, Offset - 8,
+ Offset - 9, Offset - 10, Offset - 11, Offset - 12,
+ Offset - 13, Offset - 14, Offset - 15, Offset - 16);
+ unsigned short mask = 0xFFFF;
+ mask <<= (16 - Offset);
+ first = _mm512_permutexvar_ps(first_idx, first);
+ Packet16f tmp = _mm512_permutexvar_ps(second_idx, second);
+ first = _mm512_mask_blend_ps(mask, first, tmp);
+ }
+ }
+};
+template <int Offset>
+struct palign_impl<Offset, Packet8d> {
+ static EIGEN_STRONG_INLINE void run(Packet8d& first, const Packet8d& second) {
+ if (Offset != 0) {
+ __m512i first_idx = _mm512_set_epi32(
+ 0, Offset + 7, 0, Offset + 6, 0, Offset + 5, 0, Offset + 4, 0,
+ Offset + 3, 0, Offset + 2, 0, Offset + 1, 0, Offset);
+ __m512i second_idx = _mm512_set_epi32(
+ 0, Offset - 1, 0, Offset - 2, 0, Offset - 3, 0, Offset - 4, 0,
+ Offset - 5, 0, Offset - 6, 0, Offset - 7, 0, Offset - 8);
+ unsigned char mask = 0xFF;
+ mask <<= (8 - Offset);
+ first = _mm512_permutexvar_pd(first_idx, first);
+ Packet8d tmp = _mm512_permutexvar_pd(second_idx, second);
+ first = _mm512_mask_blend_pd(mask, first, tmp);
+ }
+ }
+};
+#define PACK_OUTPUT(OUTPUT, INPUT, INDEX, STRIDE) \
+ EIGEN_INSERT_8f_INTO_16f(OUTPUT[INDEX], INPUT[INDEX], INPUT[INDEX + STRIDE]);
+EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet16f, 16>& kernel) {
+ __m512 T0 = _mm512_unpacklo_ps(kernel.packet[0], kernel.packet[1]);
+ __m512 T1 = _mm512_unpackhi_ps(kernel.packet[0], kernel.packet[1]);
+ __m512 T2 = _mm512_unpacklo_ps(kernel.packet[2], kernel.packet[3]);
+ __m512 T3 = _mm512_unpackhi_ps(kernel.packet[2], kernel.packet[3]);
+ __m512 T4 = _mm512_unpacklo_ps(kernel.packet[4], kernel.packet[5]);
+ __m512 T5 = _mm512_unpackhi_ps(kernel.packet[4], kernel.packet[5]);
+ __m512 T6 = _mm512_unpacklo_ps(kernel.packet[6], kernel.packet[7]);
+ __m512 T7 = _mm512_unpackhi_ps(kernel.packet[6], kernel.packet[7]);
+ __m512 T8 = _mm512_unpacklo_ps(kernel.packet[8], kernel.packet[9]);
+ __m512 T9 = _mm512_unpackhi_ps(kernel.packet[8], kernel.packet[9]);
+ __m512 T10 = _mm512_unpacklo_ps(kernel.packet[10], kernel.packet[11]);
+ __m512 T11 = _mm512_unpackhi_ps(kernel.packet[10], kernel.packet[11]);
+ __m512 T12 = _mm512_unpacklo_ps(kernel.packet[12], kernel.packet[13]);
+ __m512 T13 = _mm512_unpackhi_ps(kernel.packet[12], kernel.packet[13]);
+ __m512 T14 = _mm512_unpacklo_ps(kernel.packet[14], kernel.packet[15]);
+ __m512 T15 = _mm512_unpackhi_ps(kernel.packet[14], kernel.packet[15]);
+ __m512 S0 = _mm512_shuffle_ps(T0, T2, _MM_SHUFFLE(1, 0, 1, 0));
+ __m512 S1 = _mm512_shuffle_ps(T0, T2, _MM_SHUFFLE(3, 2, 3, 2));
+ __m512 S2 = _mm512_shuffle_ps(T1, T3, _MM_SHUFFLE(1, 0, 1, 0));
+ __m512 S3 = _mm512_shuffle_ps(T1, T3, _MM_SHUFFLE(3, 2, 3, 2));
+ __m512 S4 = _mm512_shuffle_ps(T4, T6, _MM_SHUFFLE(1, 0, 1, 0));
+ __m512 S5 = _mm512_shuffle_ps(T4, T6, _MM_SHUFFLE(3, 2, 3, 2));
+ __m512 S6 = _mm512_shuffle_ps(T5, T7, _MM_SHUFFLE(1, 0, 1, 0));
+ __m512 S7 = _mm512_shuffle_ps(T5, T7, _MM_SHUFFLE(3, 2, 3, 2));
+ __m512 S8 = _mm512_shuffle_ps(T8, T10, _MM_SHUFFLE(1, 0, 1, 0));
+ __m512 S9 = _mm512_shuffle_ps(T8, T10, _MM_SHUFFLE(3, 2, 3, 2));
+ __m512 S10 = _mm512_shuffle_ps(T9, T11, _MM_SHUFFLE(1, 0, 1, 0));
+ __m512 S11 = _mm512_shuffle_ps(T9, T11, _MM_SHUFFLE(3, 2, 3, 2));
+ __m512 S12 = _mm512_shuffle_ps(T12, T14, _MM_SHUFFLE(1, 0, 1, 0));
+ __m512 S13 = _mm512_shuffle_ps(T12, T14, _MM_SHUFFLE(3, 2, 3, 2));
+ __m512 S14 = _mm512_shuffle_ps(T13, T15, _MM_SHUFFLE(1, 0, 1, 0));
+ __m512 S15 = _mm512_shuffle_ps(T13, T15, _MM_SHUFFLE(3, 2, 3, 2));
+ EIGEN_EXTRACT_8f_FROM_16f(S0, S0);
+ EIGEN_EXTRACT_8f_FROM_16f(S1, S1);
+ EIGEN_EXTRACT_8f_FROM_16f(S2, S2);
+ EIGEN_EXTRACT_8f_FROM_16f(S3, S3);
+ EIGEN_EXTRACT_8f_FROM_16f(S4, S4);
+ EIGEN_EXTRACT_8f_FROM_16f(S5, S5);
+ EIGEN_EXTRACT_8f_FROM_16f(S6, S6);
+ EIGEN_EXTRACT_8f_FROM_16f(S7, S7);
+ EIGEN_EXTRACT_8f_FROM_16f(S8, S8);
+ EIGEN_EXTRACT_8f_FROM_16f(S9, S9);
+ EIGEN_EXTRACT_8f_FROM_16f(S10, S10);
+ EIGEN_EXTRACT_8f_FROM_16f(S11, S11);
+ EIGEN_EXTRACT_8f_FROM_16f(S12, S12);
+ EIGEN_EXTRACT_8f_FROM_16f(S13, S13);
+ EIGEN_EXTRACT_8f_FROM_16f(S14, S14);
+ EIGEN_EXTRACT_8f_FROM_16f(S15, S15);
+ PacketBlock<Packet8f, 32> tmp;
+ tmp.packet[0] = _mm256_permute2f128_ps(S0_0, S4_0, 0x20);
+ tmp.packet[1] = _mm256_permute2f128_ps(S1_0, S5_0, 0x20);
+ tmp.packet[2] = _mm256_permute2f128_ps(S2_0, S6_0, 0x20);
+ tmp.packet[3] = _mm256_permute2f128_ps(S3_0, S7_0, 0x20);
+ tmp.packet[4] = _mm256_permute2f128_ps(S0_0, S4_0, 0x31);
+ tmp.packet[5] = _mm256_permute2f128_ps(S1_0, S5_0, 0x31);
+ tmp.packet[6] = _mm256_permute2f128_ps(S2_0, S6_0, 0x31);
+ tmp.packet[7] = _mm256_permute2f128_ps(S3_0, S7_0, 0x31);
+ tmp.packet[8] = _mm256_permute2f128_ps(S0_1, S4_1, 0x20);
+ tmp.packet[9] = _mm256_permute2f128_ps(S1_1, S5_1, 0x20);
+ tmp.packet[10] = _mm256_permute2f128_ps(S2_1, S6_1, 0x20);
+ tmp.packet[11] = _mm256_permute2f128_ps(S3_1, S7_1, 0x20);
+ tmp.packet[12] = _mm256_permute2f128_ps(S0_1, S4_1, 0x31);
+ tmp.packet[13] = _mm256_permute2f128_ps(S1_1, S5_1, 0x31);
+ tmp.packet[14] = _mm256_permute2f128_ps(S2_1, S6_1, 0x31);
+ tmp.packet[15] = _mm256_permute2f128_ps(S3_1, S7_1, 0x31);
+ tmp.packet[16] = _mm256_permute2f128_ps(S8_0, S12_0, 0x20);
+ tmp.packet[17] = _mm256_permute2f128_ps(S9_0, S13_0, 0x20);
+ tmp.packet[18] = _mm256_permute2f128_ps(S10_0, S14_0, 0x20);
+ tmp.packet[19] = _mm256_permute2f128_ps(S11_0, S15_0, 0x20);
+ tmp.packet[20] = _mm256_permute2f128_ps(S8_0, S12_0, 0x31);
+ tmp.packet[21] = _mm256_permute2f128_ps(S9_0, S13_0, 0x31);
+ tmp.packet[22] = _mm256_permute2f128_ps(S10_0, S14_0, 0x31);
+ tmp.packet[23] = _mm256_permute2f128_ps(S11_0, S15_0, 0x31);
+ tmp.packet[24] = _mm256_permute2f128_ps(S8_1, S12_1, 0x20);
+ tmp.packet[25] = _mm256_permute2f128_ps(S9_1, S13_1, 0x20);
+ tmp.packet[26] = _mm256_permute2f128_ps(S10_1, S14_1, 0x20);
+ tmp.packet[27] = _mm256_permute2f128_ps(S11_1, S15_1, 0x20);
+ tmp.packet[28] = _mm256_permute2f128_ps(S8_1, S12_1, 0x31);
+ tmp.packet[29] = _mm256_permute2f128_ps(S9_1, S13_1, 0x31);
+ tmp.packet[30] = _mm256_permute2f128_ps(S10_1, S14_1, 0x31);
+ tmp.packet[31] = _mm256_permute2f128_ps(S11_1, S15_1, 0x31);
+ PACK_OUTPUT(kernel.packet, tmp.packet, 0, 16);
+ PACK_OUTPUT(kernel.packet, tmp.packet, 1, 16);
+ PACK_OUTPUT(kernel.packet, tmp.packet, 2, 16);
+ PACK_OUTPUT(kernel.packet, tmp.packet, 3, 16);
+ PACK_OUTPUT(kernel.packet, tmp.packet, 4, 16);
+ PACK_OUTPUT(kernel.packet, tmp.packet, 5, 16);
+ PACK_OUTPUT(kernel.packet, tmp.packet, 6, 16);
+ PACK_OUTPUT(kernel.packet, tmp.packet, 7, 16);
+ PACK_OUTPUT(kernel.packet, tmp.packet, 8, 16);
+ PACK_OUTPUT(kernel.packet, tmp.packet, 9, 16);
+ PACK_OUTPUT(kernel.packet, tmp.packet, 10, 16);
+ PACK_OUTPUT(kernel.packet, tmp.packet, 11, 16);
+ PACK_OUTPUT(kernel.packet, tmp.packet, 12, 16);
+ PACK_OUTPUT(kernel.packet, tmp.packet, 13, 16);
+ PACK_OUTPUT(kernel.packet, tmp.packet, 14, 16);
+ PACK_OUTPUT(kernel.packet, tmp.packet, 15, 16);
+}
+#define PACK_OUTPUT_2(OUTPUT, INPUT, INDEX, STRIDE) \
+ EIGEN_INSERT_8f_INTO_16f(OUTPUT[INDEX], INPUT[2 * INDEX], \
+ INPUT[2 * INDEX + STRIDE]);
+EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet16f, 4>& kernel) {
+ __m512 T0 = _mm512_unpacklo_ps(kernel.packet[0], kernel.packet[1]);
+ __m512 T1 = _mm512_unpackhi_ps(kernel.packet[0], kernel.packet[1]);
+ __m512 T2 = _mm512_unpacklo_ps(kernel.packet[2], kernel.packet[3]);
+ __m512 T3 = _mm512_unpackhi_ps(kernel.packet[2], kernel.packet[3]);
+ __m512 S0 = _mm512_shuffle_ps(T0, T2, _MM_SHUFFLE(1, 0, 1, 0));
+ __m512 S1 = _mm512_shuffle_ps(T0, T2, _MM_SHUFFLE(3, 2, 3, 2));
+ __m512 S2 = _mm512_shuffle_ps(T1, T3, _MM_SHUFFLE(1, 0, 1, 0));
+ __m512 S3 = _mm512_shuffle_ps(T1, T3, _MM_SHUFFLE(3, 2, 3, 2));
+ EIGEN_EXTRACT_8f_FROM_16f(S0, S0);
+ EIGEN_EXTRACT_8f_FROM_16f(S1, S1);
+ EIGEN_EXTRACT_8f_FROM_16f(S2, S2);
+ EIGEN_EXTRACT_8f_FROM_16f(S3, S3);
+ PacketBlock<Packet8f, 8> tmp;
+ tmp.packet[0] = _mm256_permute2f128_ps(S0_0, S1_0, 0x20);
+ tmp.packet[1] = _mm256_permute2f128_ps(S2_0, S3_0, 0x20);
+ tmp.packet[2] = _mm256_permute2f128_ps(S0_0, S1_0, 0x31);
+ tmp.packet[3] = _mm256_permute2f128_ps(S2_0, S3_0, 0x31);
+ tmp.packet[4] = _mm256_permute2f128_ps(S0_1, S1_1, 0x20);
+ tmp.packet[5] = _mm256_permute2f128_ps(S2_1, S3_1, 0x20);
+ tmp.packet[6] = _mm256_permute2f128_ps(S0_1, S1_1, 0x31);
+ tmp.packet[7] = _mm256_permute2f128_ps(S2_1, S3_1, 0x31);
+ PACK_OUTPUT_2(kernel.packet, tmp.packet, 0, 1);
+ PACK_OUTPUT_2(kernel.packet, tmp.packet, 1, 1);
+ PACK_OUTPUT_2(kernel.packet, tmp.packet, 2, 1);
+ PACK_OUTPUT_2(kernel.packet, tmp.packet, 3, 1);
+}
+#define PACK_OUTPUT_SQ_D(OUTPUT, INPUT, INDEX, STRIDE) \
+ OUTPUT[INDEX] = _mm512_insertf64x4(OUTPUT[INDEX], INPUT[INDEX], 0); \
+ OUTPUT[INDEX] = _mm512_insertf64x4(OUTPUT[INDEX], INPUT[INDEX + STRIDE], 1);
+#define PACK_OUTPUT_D(OUTPUT, INPUT, INDEX, STRIDE) \
+ OUTPUT[INDEX] = _mm512_insertf64x4(OUTPUT[INDEX], INPUT[(2 * INDEX)], 0); \
+ OUTPUT[INDEX] = \
+ _mm512_insertf64x4(OUTPUT[INDEX], INPUT[(2 * INDEX) + STRIDE], 1);
+EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet8d, 4>& kernel) {
+ __m512d T0 = _mm512_shuffle_pd(kernel.packet[0], kernel.packet[1], 0);
+ __m512d T1 = _mm512_shuffle_pd(kernel.packet[0], kernel.packet[1], 0xff);
+ __m512d T2 = _mm512_shuffle_pd(kernel.packet[2], kernel.packet[3], 0);
+ __m512d T3 = _mm512_shuffle_pd(kernel.packet[2], kernel.packet[3], 0xff);
+ PacketBlock<Packet4d, 8> tmp;
+ tmp.packet[0] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T0, 0),
+ _mm512_extractf64x4_pd(T2, 0), 0x20);
+ tmp.packet[1] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T1, 0),
+ _mm512_extractf64x4_pd(T3, 0), 0x20);
+ tmp.packet[2] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T0, 0),
+ _mm512_extractf64x4_pd(T2, 0), 0x31);
+ tmp.packet[3] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T1, 0),
+ _mm512_extractf64x4_pd(T3, 0), 0x31);
+ tmp.packet[4] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T0, 1),
+ _mm512_extractf64x4_pd(T2, 1), 0x20);
+ tmp.packet[5] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T1, 1),
+ _mm512_extractf64x4_pd(T3, 1), 0x20);
+ tmp.packet[6] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T0, 1),
+ _mm512_extractf64x4_pd(T2, 1), 0x31);
+ tmp.packet[7] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T1, 1),
+ _mm512_extractf64x4_pd(T3, 1), 0x31);
+ PACK_OUTPUT_D(kernel.packet, tmp.packet, 0, 1);
+ PACK_OUTPUT_D(kernel.packet, tmp.packet, 1, 1);
+ PACK_OUTPUT_D(kernel.packet, tmp.packet, 2, 1);
+ PACK_OUTPUT_D(kernel.packet, tmp.packet, 3, 1);
+}
+EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet8d, 8>& kernel) {
+ __m512d T0 = _mm512_unpacklo_pd(kernel.packet[0], kernel.packet[1]);
+ __m512d T1 = _mm512_unpackhi_pd(kernel.packet[0], kernel.packet[1]);
+ __m512d T2 = _mm512_unpacklo_pd(kernel.packet[2], kernel.packet[3]);
+ __m512d T3 = _mm512_unpackhi_pd(kernel.packet[2], kernel.packet[3]);
+ __m512d T4 = _mm512_unpacklo_pd(kernel.packet[4], kernel.packet[5]);
+ __m512d T5 = _mm512_unpackhi_pd(kernel.packet[4], kernel.packet[5]);
+ __m512d T6 = _mm512_unpacklo_pd(kernel.packet[6], kernel.packet[7]);
+ __m512d T7 = _mm512_unpackhi_pd(kernel.packet[6], kernel.packet[7]);
+ PacketBlock<Packet4d, 16> tmp;
+ tmp.packet[0] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T0, 0),
+ _mm512_extractf64x4_pd(T2, 0), 0x20);
+ tmp.packet[1] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T1, 0),
+ _mm512_extractf64x4_pd(T3, 0), 0x20);
+ tmp.packet[2] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T0, 0),
+ _mm512_extractf64x4_pd(T2, 0), 0x31);
+ tmp.packet[3] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T1, 0),
+ _mm512_extractf64x4_pd(T3, 0), 0x31);
+ tmp.packet[4] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T0, 1),
+ _mm512_extractf64x4_pd(T2, 1), 0x20);
+ tmp.packet[5] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T1, 1),
+ _mm512_extractf64x4_pd(T3, 1), 0x20);
+ tmp.packet[6] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T0, 1),
+ _mm512_extractf64x4_pd(T2, 1), 0x31);
+ tmp.packet[7] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T1, 1),
+ _mm512_extractf64x4_pd(T3, 1), 0x31);
+ tmp.packet[8] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T4, 0),
+ _mm512_extractf64x4_pd(T6, 0), 0x20);
+ tmp.packet[9] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T5, 0),
+ _mm512_extractf64x4_pd(T7, 0), 0x20);
+ tmp.packet[10] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T4, 0),
+ _mm512_extractf64x4_pd(T6, 0), 0x31);
+ tmp.packet[11] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T5, 0),
+ _mm512_extractf64x4_pd(T7, 0), 0x31);
+ tmp.packet[12] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T4, 1),
+ _mm512_extractf64x4_pd(T6, 1), 0x20);
+ tmp.packet[13] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T5, 1),
+ _mm512_extractf64x4_pd(T7, 1), 0x20);
+ tmp.packet[14] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T4, 1),
+ _mm512_extractf64x4_pd(T6, 1), 0x31);
+ tmp.packet[15] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T5, 1),
+ _mm512_extractf64x4_pd(T7, 1), 0x31);
+ PACK_OUTPUT_SQ_D(kernel.packet, tmp.packet, 0, 8);
+ PACK_OUTPUT_SQ_D(kernel.packet, tmp.packet, 1, 8);
+ PACK_OUTPUT_SQ_D(kernel.packet, tmp.packet, 2, 8);
+ PACK_OUTPUT_SQ_D(kernel.packet, tmp.packet, 3, 8);
+ PACK_OUTPUT_SQ_D(kernel.packet, tmp.packet, 4, 8);
+ PACK_OUTPUT_SQ_D(kernel.packet, tmp.packet, 5, 8);
+ PACK_OUTPUT_SQ_D(kernel.packet, tmp.packet, 6, 8);
+ PACK_OUTPUT_SQ_D(kernel.packet, tmp.packet, 7, 8);
+}
+template <>
+EIGEN_STRONG_INLINE Packet16f pblend(const Selector<16>& ,
+ const Packet16f& ,
+ const Packet16f& ) {
+ assert(false && "To be implemented");
+ return Packet16f();
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d pblend(const Selector<8>& ,
+ const Packet8d& ,
+ const Packet8d& ) {
+ assert(false && "To be implemented");
+ return Packet8d();
+}
+}
+}
+#endif
+// end #include "src/Core/arch/AVX512/PacketMath.h"
+// #include "src/Core/arch/AVX512/MathFunctions.h"
+#ifndef THIRD_PARTY_EIGEN3_EIGEN_SRC_CORE_ARCH_AVX512_MATHFUNCTIONS_H_
+#define THIRD_PARTY_EIGEN3_EIGEN_SRC_CORE_ARCH_AVX512_MATHFUNCTIONS_H_
+namespace Eigen {
+namespace internal {
+#if EIGEN_GNUC_AT_LEAST(5, 3)
+#define _EIGEN_DECLARE_CONST_Packet16f(NAME, X) \
+ const Packet16f p16f_##NAME = pset1<Packet16f>(X)
+#define _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(NAME, X) \
+ const Packet16f p16f_##NAME = (__m512)pset1<Packet16i>(X)
+#define _EIGEN_DECLARE_CONST_Packet8d(NAME, X) \
+ const Packet8d p8d_##NAME = pset1<Packet8d>(X)
+#define _EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(NAME, X) \
+ const Packet8d p8d_##NAME = _mm512_castsi512_pd(_mm512_set1_epi64(X))
+#if defined(EIGEN_VECTORIZE_AVX512DQ)
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
+plog<Packet16f>(const Packet16f& _x) {
+ Packet16f x = _x;
+ _EIGEN_DECLARE_CONST_Packet16f(1, 1.0f);
+ _EIGEN_DECLARE_CONST_Packet16f(half, 0.5f);
+ _EIGEN_DECLARE_CONST_Packet16f(126f, 126.0f);
+ _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(inv_mant_mask, ~0x7f800000);
+ _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(min_norm_pos, 0x00800000);
+ _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(minus_inf, 0xff800000);
+ _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(nan, 0x7fc00000);
+ _EIGEN_DECLARE_CONST_Packet16f(cephes_SQRTHF, 0.707106781186547524f);
+ _EIGEN_DECLARE_CONST_Packet16f(cephes_log_p0, 7.0376836292E-2f);
+ _EIGEN_DECLARE_CONST_Packet16f(cephes_log_p1, -1.1514610310E-1f);
+ _EIGEN_DECLARE_CONST_Packet16f(cephes_log_p2, 1.1676998740E-1f);
+ _EIGEN_DECLARE_CONST_Packet16f(cephes_log_p3, -1.2420140846E-1f);
+ _EIGEN_DECLARE_CONST_Packet16f(cephes_log_p4, +1.4249322787E-1f);
+ _EIGEN_DECLARE_CONST_Packet16f(cephes_log_p5, -1.6668057665E-1f);
+ _EIGEN_DECLARE_CONST_Packet16f(cephes_log_p6, +2.0000714765E-1f);
+ _EIGEN_DECLARE_CONST_Packet16f(cephes_log_p7, -2.4999993993E-1f);
+ _EIGEN_DECLARE_CONST_Packet16f(cephes_log_p8, +3.3333331174E-1f);
+ _EIGEN_DECLARE_CONST_Packet16f(cephes_log_q1, -2.12194440e-4f);
+ _EIGEN_DECLARE_CONST_Packet16f(cephes_log_q2, 0.693359375f);
+ __mmask16 invalid_mask =
+ _mm512_cmp_ps_mask(x, _mm512_setzero_ps(), _CMP_NGE_UQ);
+ __mmask16 iszero_mask =
+ _mm512_cmp_ps_mask(x, _mm512_setzero_ps(), _CMP_EQ_UQ);
+ x = pmax(x, p16f_min_norm_pos);
+ Packet16f emm0 = _mm512_cvtepi32_ps(_mm512_srli_epi32((__m512i)x, 23));
+ Packet16f e = _mm512_sub_ps(emm0, p16f_126f);
+ x = _mm512_and_ps(x, p16f_inv_mant_mask);
+ x = _mm512_or_ps(x, p16f_half);
+ __mmask16 mask = _mm512_cmp_ps_mask(x, p16f_cephes_SQRTHF, _CMP_LT_OQ);
+ Packet16f tmp = _mm512_mask_blend_ps(mask, x, _mm512_setzero_ps());
+ x = psub(x, p16f_1);
+ e = psub(e, _mm512_mask_blend_ps(mask, p16f_1, _mm512_setzero_ps()));
+ x = padd(x, tmp);
+ Packet16f x2 = pmul(x, x);
+ Packet16f x3 = pmul(x2, x);
+ Packet16f y, y1, y2;
+ y = pmadd(p16f_cephes_log_p0, x, p16f_cephes_log_p1);
+ y1 = pmadd(p16f_cephes_log_p3, x, p16f_cephes_log_p4);
+ y2 = pmadd(p16f_cephes_log_p6, x, p16f_cephes_log_p7);
+ y = pmadd(y, x, p16f_cephes_log_p2);
+ y1 = pmadd(y1, x, p16f_cephes_log_p5);
+ y2 = pmadd(y2, x, p16f_cephes_log_p8);
+ y = pmadd(y, x3, y1);
+ y = pmadd(y, x3, y2);
+ y = pmul(y, x3);
+ y1 = pmul(e, p16f_cephes_log_q1);
+ tmp = pmul(x2, p16f_half);
+ y = padd(y, y1);
+ x = psub(x, tmp);
+ y2 = pmul(e, p16f_cephes_log_q2);
+ x = padd(x, y);
+ x = padd(x, y2);
+ return _mm512_mask_blend_ps(iszero_mask, p16f_minus_inf,
+ _mm512_mask_blend_ps(invalid_mask, p16f_nan, x));
+}
+#endif
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
+pexp<Packet16f>(const Packet16f& _x) {
+ _EIGEN_DECLARE_CONST_Packet16f(1, 1.0f);
+ _EIGEN_DECLARE_CONST_Packet16f(half, 0.5f);
+ _EIGEN_DECLARE_CONST_Packet16f(127, 127.0f);
+ _EIGEN_DECLARE_CONST_Packet16f(exp_hi, 88.3762626647950f);
+ _EIGEN_DECLARE_CONST_Packet16f(exp_lo, -88.3762626647949f);
+ _EIGEN_DECLARE_CONST_Packet16f(cephes_LOG2EF, 1.44269504088896341f);
+ _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p0, 1.9875691500E-4f);
+ _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p1, 1.3981999507E-3f);
+ _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p2, 8.3334519073E-3f);
+ _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p3, 4.1665795894E-2f);
+ _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p4, 1.6666665459E-1f);
+ _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p5, 5.0000001201E-1f);
+ Packet16f x = pmax(pmin(_x, p16f_exp_hi), p16f_exp_lo);
+ Packet16f m = _mm512_floor_ps(pmadd(x, p16f_cephes_LOG2EF, p16f_half));
+ _EIGEN_DECLARE_CONST_Packet16f(nln2, -0.6931471805599453f);
+ Packet16f r = _mm512_fmadd_ps(m, p16f_nln2, x);
+ Packet16f r2 = pmul(r, r);
+ Packet16f y = p16f_cephes_exp_p0;
+ y = pmadd(y, r, p16f_cephes_exp_p1);
+ y = pmadd(y, r, p16f_cephes_exp_p2);
+ y = pmadd(y, r, p16f_cephes_exp_p3);
+ y = pmadd(y, r, p16f_cephes_exp_p4);
+ y = pmadd(y, r, p16f_cephes_exp_p5);
+ y = pmadd(y, r2, r);
+ y = padd(y, p16f_1);
+ Packet16i emm0 = _mm512_cvttps_epi32(padd(m, p16f_127));
+ emm0 = _mm512_slli_epi32(emm0, 23);
+ return pmax(pmul(y, _mm512_castsi512_ps(emm0)), _x);
+}
+#if EIGEN_FAST_MATH
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
+psqrt<Packet16f>(const Packet16f& _x) {
+ _EIGEN_DECLARE_CONST_Packet16f(one_point_five, 1.5f);
+ _EIGEN_DECLARE_CONST_Packet16f(minus_half, -0.5f);
+ _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(flt_min, 0x00800000);
+ Packet16f neg_half = pmul(_x, p16f_minus_half);
+ __mmask16 non_zero_mask = _mm512_cmp_ps_mask(_x, p16f_flt_min, _CMP_GE_OQ);
+ Packet16f x = _mm512_mask_blend_ps(non_zero_mask, _mm512_rsqrt14_ps(_x),
+ _mm512_setzero_ps());
+ x = pmul(x, pmadd(neg_half, pmul(x, x), p16f_one_point_five));
+ return pmul(_x, x);
+}
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
+psqrt<Packet8d>(const Packet8d& _x) {
+ _EIGEN_DECLARE_CONST_Packet8d(one_point_five, 1.5);
+ _EIGEN_DECLARE_CONST_Packet8d(minus_half, -0.5);
+ _EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(dbl_min, 0x0010000000000000LL);
+ Packet8d neg_half = pmul(_x, p8d_minus_half);
+ __mmask8 non_zero_mask = _mm512_cmp_pd_mask(_x, p8d_dbl_min, _CMP_GE_OQ);
+ Packet8d x = _mm512_mask_blend_pd(non_zero_mask, _mm512_rsqrt14_pd(_x),
+ _mm512_setzero_pd());
+ x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
+ x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
+ return pmul(_x, x);
+}
+#else
+template <>
+EIGEN_STRONG_INLINE Packet16f psqrt<Packet16f>(const Packet16f& x) {
+ return _mm512_sqrt_ps(x);
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d psqrt<Packet8d>(const Packet8d& x) {
+ return _mm512_sqrt_pd(x);
+}
+#endif
+#ifdef EIGEN_FAST_MATH
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
+prsqrt<Packet16f>(const Packet16f& _x) {
+ _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(inf, 0x7f800000);
+ _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(nan, 0x7fc00000);
+ _EIGEN_DECLARE_CONST_Packet16f(one_point_five, 1.5f);
+ _EIGEN_DECLARE_CONST_Packet16f(minus_half, -0.5f);
+ _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(flt_min, 0x00800000);
+ Packet16f neg_half = pmul(_x, p16f_minus_half);
+ __mmask16 le_zero_mask = _mm512_cmp_ps_mask(_x, p16f_flt_min, _CMP_LT_OQ);
+ Packet16f x = _mm512_mask_blend_ps(le_zero_mask, _mm512_setzero_ps(),
+ _mm512_rsqrt14_ps(_x));
+ __mmask16 neg_mask = _mm512_cmp_ps_mask(_x, _mm512_setzero_ps(), _CMP_LT_OQ);
+ Packet16f infs_and_nans = _mm512_mask_blend_ps(
+ neg_mask, p16f_nan,
+ _mm512_mask_blend_ps(le_zero_mask, p16f_inf, _mm512_setzero_ps()));
+ x = pmul(x, pmadd(neg_half, pmul(x, x), p16f_one_point_five));
+ return _mm512_mask_blend_ps(le_zero_mask, infs_and_nans, x);
+}
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
+prsqrt<Packet8d>(const Packet8d& _x) {
+ _EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(inf, 0x7ff0000000000000LL);
+ _EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(nan, 0x7ff1000000000000LL);
+ _EIGEN_DECLARE_CONST_Packet8d(one_point_five, 1.5);
+ _EIGEN_DECLARE_CONST_Packet8d(minus_half, -0.5);
+ _EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(dbl_min, 0x0010000000000000LL);
+ Packet8d neg_half = pmul(_x, p8d_minus_half);
+ __mmask8 le_zero_mask = _mm512_cmp_pd_mask(_x, p8d_dbl_min, _CMP_LT_OQ);
+ Packet8d x = _mm512_mask_blend_pd(le_zero_mask, _mm512_setzero_pd(),
+ _mm512_rsqrt14_pd(_x));
+ __mmask8 neg_mask = _mm512_cmp_pd_mask(_x, _mm512_setzero_pd(), _CMP_LT_OQ);
+ Packet8d infs_and_nans = _mm512_mask_blend_pd(
+ neg_mask, p8d_nan,
+ _mm512_mask_blend_pd(le_zero_mask, p8d_inf, _mm512_setzero_pd()));
+ x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
+ x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
+ return _mm512_mask_blend_pd(le_zero_mask, infs_and_nans, x);
+}
+#else
+template <>
+EIGEN_STRONG_INLINE Packet16f prsqrt<Packet16f>(const Packet16f& x) {
+ return _mm512_rsqrt28_ps(x);
+}
+#endif
+#endif
+}
+}
+#endif
+// end #include "src/Core/arch/AVX512/MathFunctions.h"
+#elif defined EIGEN_VECTORIZE_AVX
+// #include "src/Core/arch/SSE/PacketMath.h"
+#ifndef EIGEN_PACKET_MATH_SSE_H
+#define EIGEN_PACKET_MATH_SSE_H
+namespace Eigen {
+namespace internal {
+#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
+#endif
+#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
+#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
+#endif
+#ifdef __FMA__
+#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD 1
+#endif
+#endif
+#if (defined EIGEN_VECTORIZE_AVX) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_MINGW) && (__GXX_ABI_VERSION < 1004)
+template<typename T>
+struct eigen_packet_wrapper
+{
+ EIGEN_ALWAYS_INLINE operator T&() { return m_val; }
+ EIGEN_ALWAYS_INLINE operator const T&() const { return m_val; }
+ EIGEN_ALWAYS_INLINE eigen_packet_wrapper() {}
+ EIGEN_ALWAYS_INLINE eigen_packet_wrapper(const T &v) : m_val(v) {}
+ EIGEN_ALWAYS_INLINE eigen_packet_wrapper& operator=(const T &v) {
+ m_val = v;
+ return *this;
+ }
+ T m_val;
+};
+typedef eigen_packet_wrapper<__m128> Packet4f;
+typedef eigen_packet_wrapper<__m128i> Packet4i;
+typedef eigen_packet_wrapper<__m128d> Packet2d;
+#else
+typedef __m128 Packet4f;
+typedef __m128i Packet4i;
+typedef __m128d Packet2d;
+#endif
+template<> struct is_arithmetic<__m128> { enum { value = true }; };
+template<> struct is_arithmetic<__m128i> { enum { value = true }; };
+template<> struct is_arithmetic<__m128d> { enum { value = true }; };
+#define vec4f_swizzle1(v,p,q,r,s) \
+ (_mm_castsi128_ps(_mm_shuffle_epi32( _mm_castps_si128(v), ((s)<<6|(r)<<4|(q)<<2|(p)))))
+#define vec4i_swizzle1(v,p,q,r,s) \
+ (_mm_shuffle_epi32( v, ((s)<<6|(r)<<4|(q)<<2|(p))))
+#define vec2d_swizzle1(v,p,q) \
+ (_mm_castsi128_pd(_mm_shuffle_epi32( _mm_castpd_si128(v), ((q*2+1)<<6|(q*2)<<4|(p*2+1)<<2|(p*2)))))
+#define vec4f_swizzle2(a,b,p,q,r,s) \
+ (_mm_shuffle_ps( (a), (b), ((s)<<6|(r)<<4|(q)<<2|(p))))
+#define vec4i_swizzle2(a,b,p,q,r,s) \
+ (_mm_castps_si128( (_mm_shuffle_ps( _mm_castsi128_ps(a), _mm_castsi128_ps(b), ((s)<<6|(r)<<4|(q)<<2|(p))))))
+#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
+ const Packet4f p4f_##NAME = pset1<Packet4f>(X)
+#define _EIGEN_DECLARE_CONST_Packet2d(NAME,X) \
+ const Packet2d p2d_##NAME = pset1<Packet2d>(X)
+#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
+ const Packet4f p4f_##NAME = _mm_castsi128_ps(pset1<Packet4i>(X))
+#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
+ const Packet4i p4i_##NAME = pset1<Packet4i>(X)
+#ifndef EIGEN_VECTORIZE_AVX
+template<> struct packet_traits<float> : default_packet_traits
+{
+ typedef Packet4f type;
+ typedef Packet4f half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size=4,
+ HasHalfPacket = 0,
+ HasDiv = 1,
+ HasSin = EIGEN_FAST_MATH,
+ HasCos = EIGEN_FAST_MATH,
+ HasLog = 1,
+ HasExp = 1,
+ HasSqrt = 1,
+ HasRsqrt = 1,
+ HasTanh = EIGEN_FAST_MATH,
+ HasBlend = 1
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ ,
+ HasRound = 1,
+ HasFloor = 1,
+ HasCeil = 1
+#endif
+ };
+};
+template<> struct packet_traits<double> : default_packet_traits
+{
+ typedef Packet2d type;
+ typedef Packet2d half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size=2,
+ HasHalfPacket = 0,
+ HasDiv = 1,
+ HasExp = 1,
+ HasSqrt = 1,
+ HasRsqrt = 1,
+ HasBlend = 1
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ ,
+ HasRound = 1,
+ HasFloor = 1,
+ HasCeil = 1
+#endif
+ };
+};
+#endif
+template<> struct packet_traits<int> : default_packet_traits
+{
+ typedef Packet4i type;
+ typedef Packet4i half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size=4,
+ HasBlend = 1
+ };
+};
+template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; };
+template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; };
+template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; };
+#ifndef EIGEN_VECTORIZE_AVX
+template<> struct scalar_div_cost<float,true> { enum { value = 7 }; };
+template<> struct scalar_div_cost<double,true> { enum { value = 8 }; };
+#endif
+#if EIGEN_COMP_MSVC==1500
+template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return _mm_set_ps(from,from,from,from); }
+template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set_pd(from,from); }
+template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set_epi32(from,from,from,from); }
+#else
+template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return _mm_set_ps1(from); }
+template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set1_pd(from); }
+template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set1_epi32(from); }
+#endif
+#if EIGEN_COMP_GNUC_STRICT && (!defined __AVX__)
+template<> EIGEN_STRONG_INLINE Packet4f pload1<Packet4f>(const float *from) {
+ return vec4f_swizzle1(_mm_load_ss(from),0,0,0,0);
+}
+#endif
+template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) { return _mm_add_ps(pset1<Packet4f>(a), _mm_set_ps(3,2,1,0)); }
+template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) { return _mm_add_pd(pset1<Packet2d>(a),_mm_set_pd(1,0)); }
+template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a) { return _mm_add_epi32(pset1<Packet4i>(a),_mm_set_epi32(3,2,1,0)); }
+template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_add_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_add_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_add_epi32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_sub_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_sub_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_sub_epi32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a)
+{
+ const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000));
+ return _mm_xor_ps(a,mask);
+}
+template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a)
+{
+ const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0,0x80000000,0x0,0x80000000));
+ return _mm_xor_pd(a,mask);
+}
+template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a)
+{
+ return psub(Packet4i(_mm_setr_epi32(0,0,0,0)), a);
+}
+template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_mul_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_mul_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_mullo_epi32(a,b);
+#else
+ return vec4i_swizzle1(
+ vec4i_swizzle2(
+ _mm_mul_epu32(a,b),
+ _mm_mul_epu32(vec4i_swizzle1(a,1,0,3,2),
+ vec4i_swizzle1(b,1,0,3,2)),
+ 0,2,0,2),
+ 0,2,1,3);
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_div_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_div_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd(pmul(a,b), c); }
+#ifdef __FMA__
+template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return _mm_fmadd_ps(a,b,c); }
+template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return _mm_fmadd_pd(a,b,c); }
+#endif
+template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_min_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_min_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_min_epi32(a,b);
+#else
+ Packet4i mask = _mm_cmplt_epi32(a,b);
+ return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_max_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_max_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_max_epi32(a,b);
+#else
+ Packet4i mask = _mm_cmpgt_epi32(a,b);
+ return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
+#endif
+}
+#ifdef EIGEN_VECTORIZE_SSE4_1
+template<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a) { return _mm_round_ps(a, 0); }
+template<> EIGEN_STRONG_INLINE Packet2d pround<Packet2d>(const Packet2d& a) { return _mm_round_pd(a, 0); }
+template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a) { return _mm_ceil_ps(a); }
+template<> EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const Packet2d& a) { return _mm_ceil_pd(a); }
+template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a) { return _mm_floor_ps(a); }
+template<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a) { return _mm_floor_pd(a); }
+#endif
+template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_and_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_and_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_and_si128(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_or_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_or_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_or_si128(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_xor_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_xor_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_xor_si128(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_andnot_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_andnot_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_andnot_si128(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_ps(from); }
+template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_pd(from); }
+template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_si128(reinterpret_cast<const __m128i*>(from)); }
+#if EIGEN_COMP_MSVC
+ template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) {
+ EIGEN_DEBUG_UNALIGNED_LOAD
+ #if (EIGEN_COMP_MSVC==1600)
+ __m128 res = _mm_loadl_pi(_mm_set1_ps(0.0f), (const __m64*)(from));
+ res = _mm_loadh_pi(res, (const __m64*)(from+2));
+ return res;
+ #else
+ return _mm_loadu_ps(from);
+ #endif
+ }
+#else
+template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)
+{
+ EIGEN_DEBUG_UNALIGNED_LOAD
+ return _mm_loadu_ps(from);
+}
+#endif
+template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from)
+{
+ EIGEN_DEBUG_UNALIGNED_LOAD
+ return _mm_loadu_pd(from);
+}
+template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
+{
+ EIGEN_DEBUG_UNALIGNED_LOAD
+ return _mm_loadu_si128(reinterpret_cast<const __m128i*>(from));
+}
+template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
+{
+ return vec4f_swizzle1(_mm_castpd_ps(_mm_load_sd(reinterpret_cast<const double*>(from))), 0, 0, 1, 1);
+}
+template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
+{ return pset1<Packet2d>(from[0]); }
+template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
+{
+ Packet4i tmp;
+ tmp = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(from));
+ return vec4i_swizzle1(tmp, 0, 0, 1, 1);
+}
+template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_ps(to, from); }
+template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_pd(to, from); }
+template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_si128(reinterpret_cast<__m128i*>(to), from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_pd(to, from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_ps(to, from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_si128(reinterpret_cast<__m128i*>(to), from); }
+template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride)
+{
+ return _mm_set_ps(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
+}
+template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride)
+{
+ return _mm_set_pd(from[1*stride], from[0*stride]);
+}
+template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, Index stride)
+{
+ return _mm_set_epi32(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
+ }
+template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride)
+{
+ to[stride*0] = _mm_cvtss_f32(from);
+ to[stride*1] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 1));
+ to[stride*2] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 2));
+ to[stride*3] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 3));
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride)
+{
+ to[stride*0] = _mm_cvtsd_f64(from);
+ to[stride*1] = _mm_cvtsd_f64(_mm_shuffle_pd(from, from, 1));
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, Index stride)
+{
+ to[stride*0] = _mm_cvtsi128_si32(from);
+ to[stride*1] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 1));
+ to[stride*2] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 2));
+ to[stride*3] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 3));
+}
+template<> EIGEN_STRONG_INLINE void pstore1<Packet4f>(float* to, const float& a)
+{
+ Packet4f pa = _mm_set_ss(a);
+ pstore(to, Packet4f(vec4f_swizzle1(pa,0,0,0,0)));
+}
+template<> EIGEN_STRONG_INLINE void pstore1<Packet2d>(double* to, const double& a)
+{
+ Packet2d pa = _mm_set_sd(a);
+ pstore(to, Packet2d(vec2d_swizzle1(pa,0,0)));
+}
+#ifndef EIGEN_VECTORIZE_AVX
+template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+#endif
+#if EIGEN_COMP_MSVC_STRICT && EIGEN_OS_WIN64
+template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { return a.m128_f32[0]; }
+template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { return a.m128d_f64[0]; }
+template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int x = _mm_cvtsi128_si32(a); return x; }
+#elif EIGEN_COMP_MSVC_STRICT
+template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float x = _mm_cvtss_f32(a); return x; }
+template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { double x = _mm_cvtsd_f64(a); return x; }
+template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int x = _mm_cvtsi128_si32(a); return x; }
+#else
+template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { return _mm_cvtss_f32(a); }
+template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { return _mm_cvtsd_f64(a); }
+template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { return _mm_cvtsi128_si32(a); }
+#endif
+template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a)
+{ return _mm_shuffle_ps(a,a,0x1B); }
+template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a)
+{ return _mm_shuffle_pd(a,a,0x1); }
+template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a)
+{ return _mm_shuffle_epi32(a,0x1B); }
+template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a)
+{
+ const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF));
+ return _mm_and_ps(a,mask);
+}
+template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a)
+{
+ const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF));
+ return _mm_and_pd(a,mask);
+}
+template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a)
+{
+ #ifdef EIGEN_VECTORIZE_SSSE3
+ return _mm_abs_epi32(a);
+ #else
+ Packet4i aux = _mm_srai_epi32(a,31);
+ return _mm_sub_epi32(_mm_xor_si128(a,aux),aux);
+ #endif
+}
+#ifndef __AVX__
+template<> EIGEN_STRONG_INLINE void
+pbroadcast4<Packet4f>(const float *a,
+ Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
+{
+ a3 = pload<Packet4f>(a);
+ a0 = vec4f_swizzle1(a3, 0,0,0,0);
+ a1 = vec4f_swizzle1(a3, 1,1,1,1);
+ a2 = vec4f_swizzle1(a3, 2,2,2,2);
+ a3 = vec4f_swizzle1(a3, 3,3,3,3);
+}
+template<> EIGEN_STRONG_INLINE void
+pbroadcast4<Packet2d>(const double *a,
+ Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3)
+{
+#ifdef EIGEN_VECTORIZE_SSE3
+ a0 = _mm_loaddup_pd(a+0);
+ a1 = _mm_loaddup_pd(a+1);
+ a2 = _mm_loaddup_pd(a+2);
+ a3 = _mm_loaddup_pd(a+3);
+#else
+ a1 = pload<Packet2d>(a);
+ a0 = vec2d_swizzle1(a1, 0,0);
+ a1 = vec2d_swizzle1(a1, 1,1);
+ a3 = pload<Packet2d>(a+2);
+ a2 = vec2d_swizzle1(a3, 0,0);
+ a3 = vec2d_swizzle1(a3, 1,1);
+#endif
+}
+#endif
+EIGEN_STRONG_INLINE void punpackp(Packet4f* vecs)
+{
+ vecs[1] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x55));
+ vecs[2] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0xAA));
+ vecs[3] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0xFF));
+ vecs[0] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x00));
+}
+#ifdef EIGEN_VECTORIZE_SSE3
+template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
+{
+ return _mm_hadd_ps(_mm_hadd_ps(vecs[0], vecs[1]),_mm_hadd_ps(vecs[2], vecs[3]));
+}
+template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
+{
+ return _mm_hadd_pd(vecs[0], vecs[1]);
+}
+#else
+template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
+{
+ Packet4f tmp0, tmp1, tmp2;
+ tmp0 = _mm_unpacklo_ps(vecs[0], vecs[1]);
+ tmp1 = _mm_unpackhi_ps(vecs[0], vecs[1]);
+ tmp2 = _mm_unpackhi_ps(vecs[2], vecs[3]);
+ tmp0 = _mm_add_ps(tmp0, tmp1);
+ tmp1 = _mm_unpacklo_ps(vecs[2], vecs[3]);
+ tmp1 = _mm_add_ps(tmp1, tmp2);
+ tmp2 = _mm_movehl_ps(tmp1, tmp0);
+ tmp0 = _mm_movelh_ps(tmp0, tmp1);
+ return _mm_add_ps(tmp0, tmp2);
+}
+template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
+{
+ return _mm_add_pd(_mm_unpacklo_pd(vecs[0], vecs[1]), _mm_unpackhi_pd(vecs[0], vecs[1]));
+}
+#endif
+template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
+{
+ Packet4f tmp = _mm_add_ps(a, _mm_movehl_ps(a,a));
+ return pfirst<Packet4f>(_mm_add_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
+}
+template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
+{
+ return pfirst<Packet2d>(_mm_add_sd(a, _mm_unpackhi_pd(a,a)));
+}
+#ifdef EIGEN_VECTORIZE_SSSE3
+template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
+{
+ return _mm_hadd_epi32(_mm_hadd_epi32(vecs[0], vecs[1]),_mm_hadd_epi32(vecs[2], vecs[3]));
+}
+template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
+{
+ Packet4i tmp0 = _mm_hadd_epi32(a,a);
+ return pfirst<Packet4i>(_mm_hadd_epi32(tmp0,tmp0));
+}
+#else
+template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
+{
+ Packet4i tmp = _mm_add_epi32(a, _mm_unpackhi_epi64(a,a));
+ return pfirst(tmp) + pfirst<Packet4i>(_mm_shuffle_epi32(tmp, 1));
+}
+template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
+{
+ Packet4i tmp0, tmp1, tmp2;
+ tmp0 = _mm_unpacklo_epi32(vecs[0], vecs[1]);
+ tmp1 = _mm_unpackhi_epi32(vecs[0], vecs[1]);
+ tmp2 = _mm_unpackhi_epi32(vecs[2], vecs[3]);
+ tmp0 = _mm_add_epi32(tmp0, tmp1);
+ tmp1 = _mm_unpacklo_epi32(vecs[2], vecs[3]);
+ tmp1 = _mm_add_epi32(tmp1, tmp2);
+ tmp2 = _mm_unpacklo_epi64(tmp0, tmp1);
+ tmp0 = _mm_unpackhi_epi64(tmp0, tmp1);
+ return _mm_add_epi32(tmp0, tmp2);
+}
+#endif
+template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
+{
+ Packet4f tmp = _mm_mul_ps(a, _mm_movehl_ps(a,a));
+ return pfirst<Packet4f>(_mm_mul_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
+}
+template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a)
+{
+ return pfirst<Packet2d>(_mm_mul_sd(a, _mm_unpackhi_pd(a,a)));
+}
+template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
+{
+ EIGEN_ALIGN16 int aux[4];
+ pstore(aux, a);
+ return (aux[0] * aux[1]) * (aux[2] * aux[3]);;
+}
+template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
+{
+ Packet4f tmp = _mm_min_ps(a, _mm_movehl_ps(a,a));
+ return pfirst<Packet4f>(_mm_min_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
+}
+template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a)
+{
+ return pfirst<Packet2d>(_mm_min_sd(a, _mm_unpackhi_pd(a,a)));
+}
+template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ Packet4i tmp = _mm_min_epi32(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0,0,3,2)));
+ return pfirst<Packet4i>(_mm_min_epi32(tmp,_mm_shuffle_epi32(tmp, 1)));
+#else
+ EIGEN_ALIGN16 int aux[4];
+ pstore(aux, a);
+ int aux0 = aux[0]<aux[1] ? aux[0] : aux[1];
+ int aux2 = aux[2]<aux[3] ? aux[2] : aux[3];
+ return aux0<aux2 ? aux0 : aux2;
+#endif
+}
+template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
+{
+ Packet4f tmp = _mm_max_ps(a, _mm_movehl_ps(a,a));
+ return pfirst<Packet4f>(_mm_max_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
+}
+template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)
+{
+ return pfirst<Packet2d>(_mm_max_sd(a, _mm_unpackhi_pd(a,a)));
+}
+template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ Packet4i tmp = _mm_max_epi32(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0,0,3,2)));
+ return pfirst<Packet4i>(_mm_max_epi32(tmp,_mm_shuffle_epi32(tmp, 1)));
+#else
+ EIGEN_ALIGN16 int aux[4];
+ pstore(aux, a);
+ int aux0 = aux[0]>aux[1] ? aux[0] : aux[1];
+ int aux2 = aux[2]>aux[3] ? aux[2] : aux[3];
+ return aux0>aux2 ? aux0 : aux2;
+#endif
+}
+#if EIGEN_COMP_GNUC
+#endif
+#ifdef EIGEN_VECTORIZE_SSSE3
+template<int Offset>
+struct palign_impl<Offset,Packet4f>
+{
+ static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
+ {
+ if (Offset!=0)
+ first = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(second), _mm_castps_si128(first), Offset*4));
+ }
+};
+template<int Offset>
+struct palign_impl<Offset,Packet4i>
+{
+ static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
+ {
+ if (Offset!=0)
+ first = _mm_alignr_epi8(second,first, Offset*4);
+ }
+};
+template<int Offset>
+struct palign_impl<Offset,Packet2d>
+{
+ static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
+ {
+ if (Offset==1)
+ first = _mm_castsi128_pd(_mm_alignr_epi8(_mm_castpd_si128(second), _mm_castpd_si128(first), 8));
+ }
+};
+#else
+template<int Offset>
+struct palign_impl<Offset,Packet4f>
+{
+ static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
+ {
+ if (Offset==1)
+ {
+ first = _mm_move_ss(first,second);
+ first = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(first),0x39));
+ }
+ else if (Offset==2)
+ {
+ first = _mm_movehl_ps(first,first);
+ first = _mm_movelh_ps(first,second);
+ }
+ else if (Offset==3)
+ {
+ first = _mm_move_ss(first,second);
+ first = _mm_shuffle_ps(first,second,0x93);
+ }
+ }
+};
+template<int Offset>
+struct palign_impl<Offset,Packet4i>
+{
+ static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
+ {
+ if (Offset==1)
+ {
+ first = _mm_castps_si128(_mm_move_ss(_mm_castsi128_ps(first),_mm_castsi128_ps(second)));
+ first = _mm_shuffle_epi32(first,0x39);
+ }
+ else if (Offset==2)
+ {
+ first = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(first),_mm_castsi128_ps(first)));
+ first = _mm_castps_si128(_mm_movelh_ps(_mm_castsi128_ps(first),_mm_castsi128_ps(second)));
+ }
+ else if (Offset==3)
+ {
+ first = _mm_castps_si128(_mm_move_ss(_mm_castsi128_ps(first),_mm_castsi128_ps(second)));
+ first = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(first),_mm_castsi128_ps(second),0x93));
+ }
+ }
+};
+template<int Offset>
+struct palign_impl<Offset,Packet2d>
+{
+ static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
+ {
+ if (Offset==1)
+ {
+ first = _mm_castps_pd(_mm_movehl_ps(_mm_castpd_ps(first),_mm_castpd_ps(first)));
+ first = _mm_castps_pd(_mm_movelh_ps(_mm_castpd_ps(first),_mm_castpd_ps(second)));
+ }
+ }
+};
+#endif
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4f,4>& kernel) {
+ _MM_TRANSPOSE4_PS(kernel.packet[0], kernel.packet[1], kernel.packet[2], kernel.packet[3]);
+}
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet2d,2>& kernel) {
+ __m128d tmp = _mm_unpackhi_pd(kernel.packet[0], kernel.packet[1]);
+ kernel.packet[0] = _mm_unpacklo_pd(kernel.packet[0], kernel.packet[1]);
+ kernel.packet[1] = tmp;
+}
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4i,4>& kernel) {
+ __m128i T0 = _mm_unpacklo_epi32(kernel.packet[0], kernel.packet[1]);
+ __m128i T1 = _mm_unpacklo_epi32(kernel.packet[2], kernel.packet[3]);
+ __m128i T2 = _mm_unpackhi_epi32(kernel.packet[0], kernel.packet[1]);
+ __m128i T3 = _mm_unpackhi_epi32(kernel.packet[2], kernel.packet[3]);
+ kernel.packet[0] = _mm_unpacklo_epi64(T0, T1);
+ kernel.packet[1] = _mm_unpackhi_epi64(T0, T1);
+ kernel.packet[2] = _mm_unpacklo_epi64(T2, T3);
+ kernel.packet[3] = _mm_unpackhi_epi64(T2, T3);
+}
+template<> EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket) {
+ const __m128i zero = _mm_setzero_si128();
+ const __m128i select = _mm_set_epi32(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
+ __m128i false_mask = _mm_cmpeq_epi32(select, zero);
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blendv_epi8(thenPacket, elsePacket, false_mask);
+#else
+ return _mm_or_si128(_mm_andnot_si128(false_mask, thenPacket), _mm_and_si128(false_mask, elsePacket));
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket) {
+ const __m128 zero = _mm_setzero_ps();
+ const __m128 select = _mm_set_ps(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
+ __m128 false_mask = _mm_cmpeq_ps(select, zero);
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blendv_ps(thenPacket, elsePacket, false_mask);
+#else
+ return _mm_or_ps(_mm_andnot_ps(false_mask, thenPacket), _mm_and_ps(false_mask, elsePacket));
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) {
+ const __m128d zero = _mm_setzero_pd();
+ const __m128d select = _mm_set_pd(ifPacket.select[1], ifPacket.select[0]);
+ __m128d false_mask = _mm_cmpeq_pd(select, zero);
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blendv_pd(thenPacket, elsePacket, false_mask);
+#else
+ return _mm_or_pd(_mm_andnot_pd(false_mask, thenPacket), _mm_and_pd(false_mask, elsePacket));
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet4f pinsertfirst(const Packet4f& a, float b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blend_ps(a,pset1<Packet4f>(b),1);
+#else
+ return _mm_move_ss(a, _mm_load_ss(&b));
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet2d pinsertfirst(const Packet2d& a, double b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blend_pd(a,pset1<Packet2d>(b),1);
+#else
+ return _mm_move_sd(a, _mm_load_sd(&b));
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet4f pinsertlast(const Packet4f& a, float b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blend_ps(a,pset1<Packet4f>(b),(1<<3));
+#else
+ const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x0,0x0,0x0,0xFFFFFFFF));
+ return _mm_or_ps(_mm_andnot_ps(mask, a), _mm_and_ps(mask, pset1<Packet4f>(b)));
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet2d pinsertlast(const Packet2d& a, double b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blend_pd(a,pset1<Packet2d>(b),(1<<1));
+#else
+ const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0,0x0,0xFFFFFFFF,0xFFFFFFFF));
+ return _mm_or_pd(_mm_andnot_pd(mask, a), _mm_and_pd(mask, pset1<Packet2d>(b)));
+#endif
+}
+#ifdef __FMA__
+template<> EIGEN_STRONG_INLINE float pmadd(const float& a, const float& b, const float& c) {
+ return ::fmaf(a,b,c);
+}
+template<> EIGEN_STRONG_INLINE double pmadd(const double& a, const double& b, const double& c) {
+ return ::fma(a,b,c);
+}
+#endif
+}
+}
+#endif
+// end #include "src/Core/arch/SSE/PacketMath.h"
+// #include "src/Core/arch/SSE/Complex.h"
+#ifndef EIGEN_COMPLEX_SSE_H
+#define EIGEN_COMPLEX_SSE_H
+namespace Eigen {
+namespace internal {
+struct Packet2cf
+{
+ EIGEN_STRONG_INLINE Packet2cf() {}
+ EIGEN_STRONG_INLINE explicit Packet2cf(const __m128& a) : v(a) {}
+ __m128 v;
+};
+#ifndef EIGEN_VECTORIZE_AVX
+template<> struct packet_traits<std::complex<float> > : default_packet_traits
+{
+ typedef Packet2cf type;
+ typedef Packet2cf half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size = 2,
+ HasHalfPacket = 0,
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasDiv = 1,
+ HasNegate = 1,
+ HasAbs = 0,
+ HasAbs2 = 0,
+ HasMin = 0,
+ HasMax = 0,
+ HasSetLinear = 0,
+ HasBlend = 1
+ };
+};
+#endif
+template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; };
+template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_add_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_sub_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a)
+{
+ const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000));
+ return Packet2cf(_mm_xor_ps(a.v,mask));
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
+{
+ const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
+ return Packet2cf(_mm_xor_ps(a.v,mask));
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+ #ifdef EIGEN_VECTORIZE_SSE3
+ return Packet2cf(_mm_addsub_ps(_mm_mul_ps(_mm_moveldup_ps(a.v), b.v),
+ _mm_mul_ps(_mm_movehdup_ps(a.v),
+ vec4f_swizzle1(b.v, 1, 0, 3, 2))));
+ #else
+ const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x00000000,0x80000000,0x00000000));
+ return Packet2cf(_mm_add_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
+ _mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
+ vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask)));
+ #endif
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_and_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_or_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_xor_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_andnot_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pload <Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>(&numext::real_ref(*from))); }
+template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>(&numext::real_ref(*from))); }
+template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
+{
+ Packet2cf res;
+#if EIGEN_GNUC_AT_MOST(4,2)
+ res.v = _mm_loadl_pi(_mm_set1_ps(0.0f), reinterpret_cast<const __m64*>(&from));
+#elif EIGEN_GNUC_AT_LEAST(4,6)
+ #pragma GCC diagnostic push
+ #pragma GCC diagnostic ignored "-Wuninitialized"
+ res.v = _mm_loadl_pi(res.v, (const __m64*)&from);
+ #pragma GCC diagnostic pop
+#else
+ res.v = _mm_loadl_pi(res.v, (const __m64*)&from);
+#endif
+ return Packet2cf(_mm_movelh_ps(res.v,res.v));
+}
+template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
+template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore(&numext::real_ref(*to), Packet4f(from.v)); }
+template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), Packet4f(from.v)); }
+template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
+{
+ return Packet2cf(_mm_set_ps(std::imag(from[1*stride]), std::real(from[1*stride]),
+ std::imag(from[0*stride]), std::real(from[0*stride])));
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride)
+{
+ to[stride*0] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 0)),
+ _mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 1)));
+ to[stride*1] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 2)),
+ _mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 3)));
+}
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
+{
+ #if EIGEN_GNUC_AT_MOST(4,3)
+ EIGEN_ALIGN16 std::complex<float> res[2];
+ _mm_store_ps((float*)res, a.v);
+ return res[0];
+ #else
+ std::complex<float> res;
+ _mm_storel_pi((__m64*)&res, a.v);
+ return res;
+ #endif
+}
+template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a) { return Packet2cf(_mm_castpd_ps(preverse(Packet2d(_mm_castps_pd(a.v))))); }
+template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
+{
+ return pfirst(Packet2cf(_mm_add_ps(a.v, _mm_movehl_ps(a.v,a.v))));
+}
+template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
+{
+ return Packet2cf(_mm_add_ps(_mm_movelh_ps(vecs[0].v,vecs[1].v), _mm_movehl_ps(vecs[1].v,vecs[0].v)));
+}
+template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
+{
+ return pfirst(pmul(a, Packet2cf(_mm_movehl_ps(a.v,a.v))));
+}
+template<int Offset>
+struct palign_impl<Offset,Packet2cf>
+{
+ static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
+ {
+ if (Offset==1)
+ {
+ first.v = _mm_movehl_ps(first.v, first.v);
+ first.v = _mm_movelh_ps(first.v, second.v);
+ }
+ }
+};
+template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+ {
+ #ifdef EIGEN_VECTORIZE_SSE3
+ return internal::pmul(a, pconj(b));
+ #else
+ const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
+ return Packet2cf(_mm_add_ps(_mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), mask),
+ _mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
+ vec4f_swizzle1(b.v, 1, 0, 3, 2))));
+ #endif
+ }
+};
+template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+ {
+ #ifdef EIGEN_VECTORIZE_SSE3
+ return internal::pmul(pconj(a), b);
+ #else
+ const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
+ return Packet2cf(_mm_add_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
+ _mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
+ vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask)));
+ #endif
+ }
+};
+template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+ {
+ #ifdef EIGEN_VECTORIZE_SSE3
+ return pconj(internal::pmul(a, b));
+ #else
+ const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
+ return Packet2cf(_mm_sub_ps(_mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), mask),
+ _mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
+ vec4f_swizzle1(b.v, 1, 0, 3, 2))));
+ #endif
+ }
+};
+template<> struct conj_helper<Packet4f, Packet2cf, false,false>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet4f& x, const Packet2cf& y, const Packet2cf& c) const
+ { return padd(c, pmul(x,y)); }
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet4f& x, const Packet2cf& y) const
+ { return Packet2cf(Eigen::internal::pmul<Packet4f>(x, y.v)); }
+};
+template<> struct conj_helper<Packet2cf, Packet4f, false,false>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet4f& y, const Packet2cf& c) const
+ { return padd(c, pmul(x,y)); }
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& x, const Packet4f& y) const
+ { return Packet2cf(Eigen::internal::pmul<Packet4f>(x.v, y)); }
+};
+template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+ Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
+ __m128 s = _mm_mul_ps(b.v,b.v);
+ return Packet2cf(_mm_div_ps(res.v,_mm_add_ps(s,_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(s), 0xb1)))));
+}
+EIGEN_STRONG_INLINE Packet2cf pcplxflip(const Packet2cf& x)
+{
+ return Packet2cf(vec4f_swizzle1(x.v, 1, 0, 3, 2));
+}
+struct Packet1cd
+{
+ EIGEN_STRONG_INLINE Packet1cd() {}
+ EIGEN_STRONG_INLINE explicit Packet1cd(const __m128d& a) : v(a) {}
+ __m128d v;
+};
+#ifndef EIGEN_VECTORIZE_AVX
+template<> struct packet_traits<std::complex<double> > : default_packet_traits
+{
+ typedef Packet1cd type;
+ typedef Packet1cd half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 0,
+ size = 1,
+ HasHalfPacket = 0,
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasDiv = 1,
+ HasNegate = 1,
+ HasAbs = 0,
+ HasAbs2 = 0,
+ HasMin = 0,
+ HasMax = 0,
+ HasSetLinear = 0
+ };
+};
+#endif
+template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
+template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_add_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_sub_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); }
+template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a)
+{
+ const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
+ return Packet1cd(_mm_xor_pd(a.v,mask));
+}
+template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+ #ifdef EIGEN_VECTORIZE_SSE3
+ return Packet1cd(_mm_addsub_pd(_mm_mul_pd(_mm_movedup_pd(a.v), b.v),
+ _mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
+ vec2d_swizzle1(b.v, 1, 0))));
+ #else
+ const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0));
+ return Packet1cd(_mm_add_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v),
+ _mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
+ vec2d_swizzle1(b.v, 1, 0)), mask)));
+ #endif
+}
+template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_and_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_or_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_xor_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_andnot_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from)
+{ EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
+template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from)
+{ EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
+{ return ploadu<Packet1cd>(&from); }
+template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); }
+template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, Packet2d(from.v)); }
+template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, Packet2d(from.v)); }
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
+{
+ EIGEN_ALIGN16 double res[2];
+ _mm_store_pd(res, a.v);
+ return std::complex<double>(res[0],res[1]);
+}
+template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
+template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a)
+{
+ return pfirst(a);
+}
+template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs)
+{
+ return vecs[0];
+}
+template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a)
+{
+ return pfirst(a);
+}
+template<int Offset>
+struct palign_impl<Offset,Packet1cd>
+{
+ static EIGEN_STRONG_INLINE void run(Packet1cd& , const Packet1cd& )
+ {
+ }
+};
+template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+ {
+ #ifdef EIGEN_VECTORIZE_SSE3
+ return internal::pmul(a, pconj(b));
+ #else
+ const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
+ return Packet1cd(_mm_add_pd(_mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v), mask),
+ _mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
+ vec2d_swizzle1(b.v, 1, 0))));
+ #endif
+ }
+};
+template<> struct conj_helper<Packet1cd, Packet1cd, true,false>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+ {
+ #ifdef EIGEN_VECTORIZE_SSE3
+ return internal::pmul(pconj(a), b);
+ #else
+ const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
+ return Packet1cd(_mm_add_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v),
+ _mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
+ vec2d_swizzle1(b.v, 1, 0)), mask)));
+ #endif
+ }
+};
+template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+ {
+ #ifdef EIGEN_VECTORIZE_SSE3
+ return pconj(internal::pmul(a, b));
+ #else
+ const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
+ return Packet1cd(_mm_sub_pd(_mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v), mask),
+ _mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
+ vec2d_swizzle1(b.v, 1, 0))));
+ #endif
+ }
+};
+template<> struct conj_helper<Packet2d, Packet1cd, false,false>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet2d& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(c, pmul(x,y)); }
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet2d& x, const Packet1cd& y) const
+ { return Packet1cd(Eigen::internal::pmul<Packet2d>(x, y.v)); }
+};
+template<> struct conj_helper<Packet1cd, Packet2d, false,false>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet2d& y, const Packet1cd& c) const
+ { return padd(c, pmul(x,y)); }
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& x, const Packet2d& y) const
+ { return Packet1cd(Eigen::internal::pmul<Packet2d>(x.v, y)); }
+};
+template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+ Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
+ __m128d s = _mm_mul_pd(b.v,b.v);
+ return Packet1cd(_mm_div_pd(res.v, _mm_add_pd(s,_mm_shuffle_pd(s, s, 0x1))));
+}
+EIGEN_STRONG_INLINE Packet1cd pcplxflip(const Packet1cd& x)
+{
+ return Packet1cd(preverse(Packet2d(x.v)));
+}
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet2cf,2>& kernel) {
+ __m128d w1 = _mm_castps_pd(kernel.packet[0].v);
+ __m128d w2 = _mm_castps_pd(kernel.packet[1].v);
+ __m128 tmp = _mm_castpd_ps(_mm_unpackhi_pd(w1, w2));
+ kernel.packet[0].v = _mm_castpd_ps(_mm_unpacklo_pd(w1, w2));
+ kernel.packet[1].v = tmp;
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) {
+ __m128d result = pblend<Packet2d>(ifPacket, _mm_castps_pd(thenPacket.v), _mm_castps_pd(elsePacket.v));
+ return Packet2cf(_mm_castpd_ps(result));
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pinsertfirst(const Packet2cf& a, std::complex<float> b)
+{
+ return Packet2cf(_mm_loadl_pi(a.v, reinterpret_cast<const __m64*>(&b)));
+}
+template<> EIGEN_STRONG_INLINE Packet1cd pinsertfirst(const Packet1cd&, std::complex<double> b)
+{
+ return pset1<Packet1cd>(b);
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pinsertlast(const Packet2cf& a, std::complex<float> b)
+{
+ return Packet2cf(_mm_loadh_pi(a.v, reinterpret_cast<const __m64*>(&b)));
+}
+template<> EIGEN_STRONG_INLINE Packet1cd pinsertlast(const Packet1cd&, std::complex<double> b)
+{
+ return pset1<Packet1cd>(b);
+}
+}
+}
+#endif
+// end #include "src/Core/arch/SSE/Complex.h"
+// #include "src/Core/arch/SSE/MathFunctions.h"
+#ifndef EIGEN_MATH_FUNCTIONS_SSE_H
+#define EIGEN_MATH_FUNCTIONS_SSE_H
+namespace Eigen {
+namespace internal {
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f plog<Packet4f>(const Packet4f& _x)
+{
+ Packet4f x = _x;
+ _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
+ _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
+ _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
+ _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_mant_mask, ~0x7f800000);
+ _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos, 0x00800000);
+ _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_inf, 0xff800000);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_SQRTHF, 0.707106781186547524f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p0, 7.0376836292E-2f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p1, - 1.1514610310E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p2, 1.1676998740E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p3, - 1.2420140846E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p4, + 1.4249322787E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p5, - 1.6668057665E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p6, + 2.0000714765E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p7, - 2.4999993993E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p8, + 3.3333331174E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q1, -2.12194440e-4f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q2, 0.693359375f);
+ Packet4i emm0;
+ Packet4f invalid_mask = _mm_cmpnge_ps(x, _mm_setzero_ps());
+ Packet4f iszero_mask = _mm_cmpeq_ps(x, _mm_setzero_ps());
+ x = pmax(x, p4f_min_norm_pos);
+ emm0 = _mm_srli_epi32(_mm_castps_si128(x), 23);
+ x = _mm_and_ps(x, p4f_inv_mant_mask);
+ x = _mm_or_ps(x, p4f_half);
+ emm0 = _mm_sub_epi32(emm0, p4i_0x7f);
+ Packet4f e = padd(Packet4f(_mm_cvtepi32_ps(emm0)), p4f_1);
+ Packet4f mask = _mm_cmplt_ps(x, p4f_cephes_SQRTHF);
+ Packet4f tmp = pand(x, mask);
+ x = psub(x, p4f_1);
+ e = psub(e, pand(p4f_1, mask));
+ x = padd(x, tmp);
+ Packet4f x2 = pmul(x,x);
+ Packet4f x3 = pmul(x2,x);
+ Packet4f y, y1, y2;
+ y = pmadd(p4f_cephes_log_p0, x, p4f_cephes_log_p1);
+ y1 = pmadd(p4f_cephes_log_p3, x, p4f_cephes_log_p4);
+ y2 = pmadd(p4f_cephes_log_p6, x, p4f_cephes_log_p7);
+ y = pmadd(y , x, p4f_cephes_log_p2);
+ y1 = pmadd(y1, x, p4f_cephes_log_p5);
+ y2 = pmadd(y2, x, p4f_cephes_log_p8);
+ y = pmadd(y, x3, y1);
+ y = pmadd(y, x3, y2);
+ y = pmul(y, x3);
+ y1 = pmul(e, p4f_cephes_log_q1);
+ tmp = pmul(x2, p4f_half);
+ y = padd(y, y1);
+ x = psub(x, tmp);
+ y2 = pmul(e, p4f_cephes_log_q2);
+ x = padd(x, y);
+ x = padd(x, y2);
+ return _mm_or_ps(_mm_andnot_ps(iszero_mask, _mm_or_ps(x, invalid_mask)),
+ _mm_and_ps(iszero_mask, p4f_minus_inf));
+}
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f pexp<Packet4f>(const Packet4f& _x)
+{
+ Packet4f x = _x;
+ _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
+ _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
+ _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
+ _EIGEN_DECLARE_CONST_Packet4f(exp_hi, 88.3762626647950f);
+ _EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500E-4f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507E-3f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073E-3f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894E-2f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1f);
+ Packet4f tmp, fx;
+ Packet4i emm0;
+ x = pmax(pmin(x, p4f_exp_hi), p4f_exp_lo);
+ fx = pmadd(x, p4f_cephes_LOG2EF, p4f_half);
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ fx = _mm_floor_ps(fx);
+#else
+ emm0 = _mm_cvttps_epi32(fx);
+ tmp = _mm_cvtepi32_ps(emm0);
+ Packet4f mask = _mm_cmpgt_ps(tmp, fx);
+ mask = _mm_and_ps(mask, p4f_1);
+ fx = psub(tmp, mask);
+#endif
+ tmp = pmul(fx, p4f_cephes_exp_C1);
+ Packet4f z = pmul(fx, p4f_cephes_exp_C2);
+ x = psub(x, tmp);
+ x = psub(x, z);
+ z = pmul(x,x);
+ Packet4f y = p4f_cephes_exp_p0;
+ y = pmadd(y, x, p4f_cephes_exp_p1);
+ y = pmadd(y, x, p4f_cephes_exp_p2);
+ y = pmadd(y, x, p4f_cephes_exp_p3);
+ y = pmadd(y, x, p4f_cephes_exp_p4);
+ y = pmadd(y, x, p4f_cephes_exp_p5);
+ y = pmadd(y, z, x);
+ y = padd(y, p4f_1);
+ emm0 = _mm_cvttps_epi32(fx);
+ emm0 = _mm_add_epi32(emm0, p4i_0x7f);
+ emm0 = _mm_slli_epi32(emm0, 23);
+ return pmax(pmul(y, Packet4f(_mm_castsi128_ps(emm0))), _x);
+}
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet2d pexp<Packet2d>(const Packet2d& _x)
+{
+ Packet2d x = _x;
+ _EIGEN_DECLARE_CONST_Packet2d(1 , 1.0);
+ _EIGEN_DECLARE_CONST_Packet2d(2 , 2.0);
+ _EIGEN_DECLARE_CONST_Packet2d(half, 0.5);
+ _EIGEN_DECLARE_CONST_Packet2d(exp_hi, 709.437);
+ _EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
+ static const __m128i p4i_1023_0 = _mm_setr_epi32(1023, 1023, 0, 0);
+ Packet2d tmp, fx;
+ Packet4i emm0;
+ x = pmax(pmin(x, p2d_exp_hi), p2d_exp_lo);
+ fx = pmadd(p2d_cephes_LOG2EF, x, p2d_half);
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ fx = _mm_floor_pd(fx);
+#else
+ emm0 = _mm_cvttpd_epi32(fx);
+ tmp = _mm_cvtepi32_pd(emm0);
+ Packet2d mask = _mm_cmpgt_pd(tmp, fx);
+ mask = _mm_and_pd(mask, p2d_1);
+ fx = psub(tmp, mask);
+#endif
+ tmp = pmul(fx, p2d_cephes_exp_C1);
+ Packet2d z = pmul(fx, p2d_cephes_exp_C2);
+ x = psub(x, tmp);
+ x = psub(x, z);
+ Packet2d x2 = pmul(x,x);
+ Packet2d px = p2d_cephes_exp_p0;
+ px = pmadd(px, x2, p2d_cephes_exp_p1);
+ px = pmadd(px, x2, p2d_cephes_exp_p2);
+ px = pmul (px, x);
+ Packet2d qx = p2d_cephes_exp_q0;
+ qx = pmadd(qx, x2, p2d_cephes_exp_q1);
+ qx = pmadd(qx, x2, p2d_cephes_exp_q2);
+ qx = pmadd(qx, x2, p2d_cephes_exp_q3);
+ x = pdiv(px,psub(qx,px));
+ x = pmadd(p2d_2,x,p2d_1);
+ emm0 = _mm_cvttpd_epi32(fx);
+ emm0 = _mm_add_epi32(emm0, p4i_1023_0);
+ emm0 = _mm_slli_epi32(emm0, 20);
+ emm0 = _mm_shuffle_epi32(emm0, _MM_SHUFFLE(1,2,0,3));
+ return pmax(pmul(x, Packet2d(_mm_castsi128_pd(emm0))), _x);
+}
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f psin<Packet4f>(const Packet4f& _x)
+{
+ Packet4f x = _x;
+ _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
+ _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
+ _EIGEN_DECLARE_CONST_Packet4i(1, 1);
+ _EIGEN_DECLARE_CONST_Packet4i(not1, ~1);
+ _EIGEN_DECLARE_CONST_Packet4i(2, 2);
+ _EIGEN_DECLARE_CONST_Packet4i(4, 4);
+ _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(sign_mask, 0x80000000);
+ _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP1,-0.78515625f);
+ _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP2, -2.4187564849853515625e-4f);
+ _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP3, -3.77489497744594108e-8f);
+ _EIGEN_DECLARE_CONST_Packet4f(sincof_p0, -1.9515295891E-4f);
+ _EIGEN_DECLARE_CONST_Packet4f(sincof_p1, 8.3321608736E-3f);
+ _EIGEN_DECLARE_CONST_Packet4f(sincof_p2, -1.6666654611E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(coscof_p0, 2.443315711809948E-005f);
+ _EIGEN_DECLARE_CONST_Packet4f(coscof_p1, -1.388731625493765E-003f);
+ _EIGEN_DECLARE_CONST_Packet4f(coscof_p2, 4.166664568298827E-002f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516f);
+ Packet4f xmm1, xmm2, xmm3, sign_bit, y;
+ Packet4i emm0, emm2;
+ sign_bit = x;
+ x = pabs(x);
+ sign_bit = _mm_and_ps(sign_bit, p4f_sign_mask);
+ y = pmul(x, p4f_cephes_FOPI);
+ emm2 = _mm_cvttps_epi32(y);
+ emm2 = _mm_add_epi32(emm2, p4i_1);
+ emm2 = _mm_and_si128(emm2, p4i_not1);
+ y = _mm_cvtepi32_ps(emm2);
+ emm0 = _mm_and_si128(emm2, p4i_4);
+ emm0 = _mm_slli_epi32(emm0, 29);
+ emm2 = _mm_and_si128(emm2, p4i_2);
+ emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
+ Packet4f swap_sign_bit = _mm_castsi128_ps(emm0);
+ Packet4f poly_mask = _mm_castsi128_ps(emm2);
+ sign_bit = _mm_xor_ps(sign_bit, swap_sign_bit);
+ xmm1 = pmul(y, p4f_minus_cephes_DP1);
+ xmm2 = pmul(y, p4f_minus_cephes_DP2);
+ xmm3 = pmul(y, p4f_minus_cephes_DP3);
+ x = padd(x, xmm1);
+ x = padd(x, xmm2);
+ x = padd(x, xmm3);
+ y = p4f_coscof_p0;
+ Packet4f z = _mm_mul_ps(x,x);
+ y = pmadd(y, z, p4f_coscof_p1);
+ y = pmadd(y, z, p4f_coscof_p2);
+ y = pmul(y, z);
+ y = pmul(y, z);
+ Packet4f tmp = pmul(z, p4f_half);
+ y = psub(y, tmp);
+ y = padd(y, p4f_1);
+ Packet4f y2 = p4f_sincof_p0;
+ y2 = pmadd(y2, z, p4f_sincof_p1);
+ y2 = pmadd(y2, z, p4f_sincof_p2);
+ y2 = pmul(y2, z);
+ y2 = pmul(y2, x);
+ y2 = padd(y2, x);
+ y2 = _mm_and_ps(poly_mask, y2);
+ y = _mm_andnot_ps(poly_mask, y);
+ y = _mm_or_ps(y,y2);
+ return _mm_xor_ps(y, sign_bit);
+}
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f pcos<Packet4f>(const Packet4f& _x)
+{
+ Packet4f x = _x;
+ _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
+ _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
+ _EIGEN_DECLARE_CONST_Packet4i(1, 1);
+ _EIGEN_DECLARE_CONST_Packet4i(not1, ~1);
+ _EIGEN_DECLARE_CONST_Packet4i(2, 2);
+ _EIGEN_DECLARE_CONST_Packet4i(4, 4);
+ _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP1,-0.78515625f);
+ _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP2, -2.4187564849853515625e-4f);
+ _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP3, -3.77489497744594108e-8f);
+ _EIGEN_DECLARE_CONST_Packet4f(sincof_p0, -1.9515295891E-4f);
+ _EIGEN_DECLARE_CONST_Packet4f(sincof_p1, 8.3321608736E-3f);
+ _EIGEN_DECLARE_CONST_Packet4f(sincof_p2, -1.6666654611E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(coscof_p0, 2.443315711809948E-005f);
+ _EIGEN_DECLARE_CONST_Packet4f(coscof_p1, -1.388731625493765E-003f);
+ _EIGEN_DECLARE_CONST_Packet4f(coscof_p2, 4.166664568298827E-002f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516f);
+ Packet4f xmm1, xmm2, xmm3, y;
+ Packet4i emm0, emm2;
+ x = pabs(x);
+ y = pmul(x, p4f_cephes_FOPI);
+ emm2 = _mm_cvttps_epi32(y);
+ emm2 = _mm_add_epi32(emm2, p4i_1);
+ emm2 = _mm_and_si128(emm2, p4i_not1);
+ y = _mm_cvtepi32_ps(emm2);
+ emm2 = _mm_sub_epi32(emm2, p4i_2);
+ emm0 = _mm_andnot_si128(emm2, p4i_4);
+ emm0 = _mm_slli_epi32(emm0, 29);
+ emm2 = _mm_and_si128(emm2, p4i_2);
+ emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
+ Packet4f sign_bit = _mm_castsi128_ps(emm0);
+ Packet4f poly_mask = _mm_castsi128_ps(emm2);
+ xmm1 = pmul(y, p4f_minus_cephes_DP1);
+ xmm2 = pmul(y, p4f_minus_cephes_DP2);
+ xmm3 = pmul(y, p4f_minus_cephes_DP3);
+ x = padd(x, xmm1);
+ x = padd(x, xmm2);
+ x = padd(x, xmm3);
+ y = p4f_coscof_p0;
+ Packet4f z = pmul(x,x);
+ y = pmadd(y,z,p4f_coscof_p1);
+ y = pmadd(y,z,p4f_coscof_p2);
+ y = pmul(y, z);
+ y = pmul(y, z);
+ Packet4f tmp = _mm_mul_ps(z, p4f_half);
+ y = psub(y, tmp);
+ y = padd(y, p4f_1);
+ Packet4f y2 = p4f_sincof_p0;
+ y2 = pmadd(y2, z, p4f_sincof_p1);
+ y2 = pmadd(y2, z, p4f_sincof_p2);
+ y2 = pmul(y2, z);
+ y2 = pmadd(y2, x, x);
+ y2 = _mm_and_ps(poly_mask, y2);
+ y = _mm_andnot_ps(poly_mask, y);
+ y = _mm_or_ps(y,y2);
+ return _mm_xor_ps(y, sign_bit);
+}
+#if EIGEN_FAST_MATH
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f psqrt<Packet4f>(const Packet4f& _x)
+{
+ Packet4f half = pmul(_x, pset1<Packet4f>(.5f));
+ Packet4f denormal_mask = _mm_and_ps(
+ _mm_cmpge_ps(_x, _mm_setzero_ps()),
+ _mm_cmplt_ps(_x, pset1<Packet4f>((std::numeric_limits<float>::min)())));
+ Packet4f x = _mm_rsqrt_ps(_x);
+ x = pmul(x, psub(pset1<Packet4f>(1.5f), pmul(half, pmul(x,x))));
+ return _mm_andnot_ps(denormal_mask, pmul(_x,x));
+}
+#else
+template<>EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f psqrt<Packet4f>(const Packet4f& x) { return _mm_sqrt_ps(x); }
+#endif
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet2d psqrt<Packet2d>(const Packet2d& x) { return _mm_sqrt_pd(x); }
+#if EIGEN_FAST_MATH
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f prsqrt<Packet4f>(const Packet4f& _x) {
+ _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inf, 0x7f800000);
+ _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(nan, 0x7fc00000);
+ _EIGEN_DECLARE_CONST_Packet4f(one_point_five, 1.5f);
+ _EIGEN_DECLARE_CONST_Packet4f(minus_half, -0.5f);
+ _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(flt_min, 0x00800000);
+ Packet4f neg_half = pmul(_x, p4f_minus_half);
+ Packet4f le_zero_mask = _mm_cmple_ps(_x, p4f_flt_min);
+ Packet4f x = _mm_andnot_ps(le_zero_mask, _mm_rsqrt_ps(_x));
+ Packet4f neg_mask = _mm_cmplt_ps(_x, _mm_setzero_ps());
+ Packet4f zero_mask = _mm_andnot_ps(neg_mask, le_zero_mask);
+ Packet4f infs_and_nans = _mm_or_ps(_mm_and_ps(neg_mask, p4f_nan),
+ _mm_and_ps(zero_mask, p4f_inf));
+ x = pmul(x, pmadd(neg_half, pmul(x, x), p4f_one_point_five));
+ return _mm_or_ps(x, infs_and_nans);
+}
+#else
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f prsqrt<Packet4f>(const Packet4f& x) {
+ return _mm_div_ps(pset1<Packet4f>(1.0f), _mm_sqrt_ps(x));
+}
+#endif
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet2d prsqrt<Packet2d>(const Packet2d& x) {
+ return _mm_div_pd(pset1<Packet2d>(1.0), _mm_sqrt_pd(x));
+}
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
+ptanh<Packet4f>(const Packet4f& x) {
+ return internal::generic_fast_tanh_float(x);
+}
+}
+namespace numext {
+template<>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float sqrt(const float &x)
+{
+ return internal::pfirst(internal::Packet4f(_mm_sqrt_ss(_mm_set_ss(x))));
+}
+template<>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double sqrt(const double &x)
+{
+#if EIGEN_COMP_GNUC_STRICT
+ return internal::pfirst(internal::Packet2d(__builtin_ia32_sqrtsd(_mm_set_sd(x))));
+#else
+ return internal::pfirst(internal::Packet2d(_mm_sqrt_pd(_mm_set_sd(x))));
+#endif
+}
+}
+}
+#endif
+// end #include "src/Core/arch/SSE/MathFunctions.h"
+// #include "src/Core/arch/AVX/PacketMath.h"
+#ifndef EIGEN_PACKET_MATH_AVX_H
+#define EIGEN_PACKET_MATH_AVX_H
+namespace Eigen {
+namespace internal {
+#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
+#endif
+#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
+#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
+#endif
+#ifdef __FMA__
+#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#endif
+#endif
+typedef __m256 Packet8f;
+typedef __m256i Packet8i;
+typedef __m256d Packet4d;
+template<> struct is_arithmetic<__m256> { enum { value = true }; };
+template<> struct is_arithmetic<__m256i> { enum { value = true }; };
+template<> struct is_arithmetic<__m256d> { enum { value = true }; };
+#define _EIGEN_DECLARE_CONST_Packet8f(NAME,X) \
+ const Packet8f p8f_##NAME = pset1<Packet8f>(X)
+#define _EIGEN_DECLARE_CONST_Packet4d(NAME,X) \
+ const Packet4d p4d_##NAME = pset1<Packet4d>(X)
+#define _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(NAME,X) \
+ const Packet8f p8f_##NAME = _mm256_castsi256_ps(pset1<Packet8i>(X))
+#define _EIGEN_DECLARE_CONST_Packet8i(NAME,X) \
+ const Packet8i p8i_##NAME = pset1<Packet8i>(X)
+#ifndef EIGEN_VECTORIZE_AVX512
+template<> struct packet_traits<float> : default_packet_traits
+{
+ typedef Packet8f type;
+ typedef Packet4f half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size=8,
+ HasHalfPacket = 1,
+ HasDiv = 1,
+ HasSin = EIGEN_FAST_MATH,
+ HasCos = 0,
+ HasLog = 1,
+ HasExp = 1,
+ HasSqrt = 1,
+ HasRsqrt = 1,
+ HasTanh = EIGEN_FAST_MATH,
+ HasBlend = 1,
+ HasRound = 1,
+ HasFloor = 1,
+ HasCeil = 1
+ };
+};
+template<> struct packet_traits<double> : default_packet_traits
+{
+ typedef Packet4d type;
+ typedef Packet2d half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size=4,
+ HasHalfPacket = 1,
+ HasDiv = 1,
+ HasExp = 1,
+ HasSqrt = 1,
+ HasRsqrt = 1,
+ HasBlend = 1,
+ HasRound = 1,
+ HasFloor = 1,
+ HasCeil = 1
+ };
+};
+#endif
+template<> struct scalar_div_cost<float,true> { enum { value = 14 }; };
+template<> struct scalar_div_cost<double,true> { enum { value = 16 }; };
+template<> struct unpacket_traits<Packet8f> { typedef float type; typedef Packet4f half; enum {size=8, alignment=Aligned32}; };
+template<> struct unpacket_traits<Packet4d> { typedef double type; typedef Packet2d half; enum {size=4, alignment=Aligned32}; };
+template<> struct unpacket_traits<Packet8i> { typedef int type; typedef Packet4i half; enum {size=8, alignment=Aligned32}; };
+template<> EIGEN_STRONG_INLINE Packet8f pset1<Packet8f>(const float& from) { return _mm256_set1_ps(from); }
+template<> EIGEN_STRONG_INLINE Packet4d pset1<Packet4d>(const double& from) { return _mm256_set1_pd(from); }
+template<> EIGEN_STRONG_INLINE Packet8i pset1<Packet8i>(const int& from) { return _mm256_set1_epi32(from); }
+template<> EIGEN_STRONG_INLINE Packet8f pload1<Packet8f>(const float* from) { return _mm256_broadcast_ss(from); }
+template<> EIGEN_STRONG_INLINE Packet4d pload1<Packet4d>(const double* from) { return _mm256_broadcast_sd(from); }
+template<> EIGEN_STRONG_INLINE Packet8f plset<Packet8f>(const float& a) { return _mm256_add_ps(_mm256_set1_ps(a), _mm256_set_ps(7.0,6.0,5.0,4.0,3.0,2.0,1.0,0.0)); }
+template<> EIGEN_STRONG_INLINE Packet4d plset<Packet4d>(const double& a) { return _mm256_add_pd(_mm256_set1_pd(a), _mm256_set_pd(3.0,2.0,1.0,0.0)); }
+template<> EIGEN_STRONG_INLINE Packet8f padd<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_add_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d padd<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_add_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet8f psub<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_sub_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d psub<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_sub_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet8f pnegate(const Packet8f& a)
+{
+ return _mm256_sub_ps(_mm256_set1_ps(0.0),a);
+}
+template<> EIGEN_STRONG_INLINE Packet4d pnegate(const Packet4d& a)
+{
+ return _mm256_sub_pd(_mm256_set1_pd(0.0),a);
+}
+template<> EIGEN_STRONG_INLINE Packet8f pconj(const Packet8f& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet4d pconj(const Packet4d& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet8i pconj(const Packet8i& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet8f pmul<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_mul_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d pmul<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_mul_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet8f pdiv<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_div_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d pdiv<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_div_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet8i pdiv<Packet8i>(const Packet8i& , const Packet8i& )
+{ eigen_assert(false && "packet integer division are not supported by AVX");
+ return pset1<Packet8i>(0);
+}
+#ifdef __FMA__
+template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) {
+#if ( EIGEN_COMP_GNUC_STRICT || (EIGEN_COMP_CLANG && (EIGEN_COMP_CLANG<308)) )
+ Packet8f res = c;
+ __asm__("vfmadd231ps %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
+ return res;
+#else
+ return _mm256_fmadd_ps(a,b,c);
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d& b, const Packet4d& c) {
+#if ( EIGEN_COMP_GNUC_STRICT || (EIGEN_COMP_CLANG && (EIGEN_COMP_CLANG<308)) )
+ Packet4d res = c;
+ __asm__("vfmadd231pd %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
+ return res;
+#else
+ return _mm256_fmadd_pd(a,b,c);
+#endif
+}
+#endif
+template<> EIGEN_STRONG_INLINE Packet8f pmin<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_min_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d pmin<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_min_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet8f pmax<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_max_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d pmax<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_max_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet8f pround<Packet8f>(const Packet8f& a) { return _mm256_round_ps(a, _MM_FROUND_CUR_DIRECTION); }
+template<> EIGEN_STRONG_INLINE Packet4d pround<Packet4d>(const Packet4d& a) { return _mm256_round_pd(a, _MM_FROUND_CUR_DIRECTION); }
+template<> EIGEN_STRONG_INLINE Packet8f pceil<Packet8f>(const Packet8f& a) { return _mm256_ceil_ps(a); }
+template<> EIGEN_STRONG_INLINE Packet4d pceil<Packet4d>(const Packet4d& a) { return _mm256_ceil_pd(a); }
+template<> EIGEN_STRONG_INLINE Packet8f pfloor<Packet8f>(const Packet8f& a) { return _mm256_floor_ps(a); }
+template<> EIGEN_STRONG_INLINE Packet4d pfloor<Packet4d>(const Packet4d& a) { return _mm256_floor_pd(a); }
+template<> EIGEN_STRONG_INLINE Packet8f pand<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_and_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d pand<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_and_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet8f por<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_or_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d por<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_or_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet8f pxor<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_xor_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d pxor<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_xor_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet8f pandnot<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_andnot_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d pandnot<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_andnot_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet8f pload<Packet8f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_ps(from); }
+template<> EIGEN_STRONG_INLINE Packet4d pload<Packet4d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_pd(from); }
+template<> EIGEN_STRONG_INLINE Packet8i pload<Packet8i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_si256(reinterpret_cast<const __m256i*>(from)); }
+template<> EIGEN_STRONG_INLINE Packet8f ploadu<Packet8f>(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_ps(from); }
+template<> EIGEN_STRONG_INLINE Packet4d ploadu<Packet4d>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_pd(from); }
+template<> EIGEN_STRONG_INLINE Packet8i ploadu<Packet8i>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_si256(reinterpret_cast<const __m256i*>(from)); }
+template<> EIGEN_STRONG_INLINE Packet8f ploaddup<Packet8f>(const float* from)
+{
+ Packet8f tmp = _mm256_broadcast_ps((const __m128*)(const void*)from);
+ tmp = _mm256_blend_ps(tmp,_mm256_castps128_ps256(_mm_permute_ps( _mm256_castps256_ps128(tmp), _MM_SHUFFLE(1,0,1,0))), 15);
+ return _mm256_permute_ps(tmp, _MM_SHUFFLE(3,3,2,2));
+}
+template<> EIGEN_STRONG_INLINE Packet4d ploaddup<Packet4d>(const double* from)
+{
+ Packet4d tmp = _mm256_broadcast_pd((const __m128d*)(const void*)from);
+ return _mm256_permute_pd(tmp, 3<<2);
+}
+template<> EIGEN_STRONG_INLINE Packet8f ploadquad<Packet8f>(const float* from)
+{
+ Packet8f tmp = _mm256_castps128_ps256(_mm_broadcast_ss(from));
+ return _mm256_insertf128_ps(tmp, _mm_broadcast_ss(from+1), 1);
+}
+template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet8f& from) { EIGEN_DEBUG_ALIGNED_STORE _mm256_store_ps(to, from); }
+template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet4d& from) { EIGEN_DEBUG_ALIGNED_STORE _mm256_store_pd(to, from); }
+template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet8i& from) { EIGEN_DEBUG_ALIGNED_STORE _mm256_storeu_si256(reinterpret_cast<__m256i*>(to), from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet8f& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_ps(to, from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet4d& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_pd(to, from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet8i& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_si256(reinterpret_cast<__m256i*>(to), from); }
+template<> EIGEN_DEVICE_FUNC inline Packet8f pgather<float, Packet8f>(const float* from, Index stride)
+{
+ return _mm256_set_ps(from[7*stride], from[6*stride], from[5*stride], from[4*stride],
+ from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
+}
+template<> EIGEN_DEVICE_FUNC inline Packet4d pgather<double, Packet4d>(const double* from, Index stride)
+{
+ return _mm256_set_pd(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet8f>(float* to, const Packet8f& from, Index stride)
+{
+ __m128 low = _mm256_extractf128_ps(from, 0);
+ to[stride*0] = _mm_cvtss_f32(low);
+ to[stride*1] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 1));
+ to[stride*2] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 2));
+ to[stride*3] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 3));
+ __m128 high = _mm256_extractf128_ps(from, 1);
+ to[stride*4] = _mm_cvtss_f32(high);
+ to[stride*5] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 1));
+ to[stride*6] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 2));
+ to[stride*7] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 3));
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet4d>(double* to, const Packet4d& from, Index stride)
+{
+ __m128d low = _mm256_extractf128_pd(from, 0);
+ to[stride*0] = _mm_cvtsd_f64(low);
+ to[stride*1] = _mm_cvtsd_f64(_mm_shuffle_pd(low, low, 1));
+ __m128d high = _mm256_extractf128_pd(from, 1);
+ to[stride*2] = _mm_cvtsd_f64(high);
+ to[stride*3] = _mm_cvtsd_f64(_mm_shuffle_pd(high, high, 1));
+}
+template<> EIGEN_STRONG_INLINE void pstore1<Packet8f>(float* to, const float& a)
+{
+ Packet8f pa = pset1<Packet8f>(a);
+ pstore(to, pa);
+}
+template<> EIGEN_STRONG_INLINE void pstore1<Packet4d>(double* to, const double& a)
+{
+ Packet4d pa = pset1<Packet4d>(a);
+ pstore(to, pa);
+}
+template<> EIGEN_STRONG_INLINE void pstore1<Packet8i>(int* to, const int& a)
+{
+ Packet8i pa = pset1<Packet8i>(a);
+ pstore(to, pa);
+}
+#ifndef EIGEN_VECTORIZE_AVX512
+template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+#endif
+template<> EIGEN_STRONG_INLINE float pfirst<Packet8f>(const Packet8f& a) {
+ return _mm_cvtss_f32(_mm256_castps256_ps128(a));
+}
+template<> EIGEN_STRONG_INLINE double pfirst<Packet4d>(const Packet4d& a) {
+ return _mm_cvtsd_f64(_mm256_castpd256_pd128(a));
+}
+template<> EIGEN_STRONG_INLINE int pfirst<Packet8i>(const Packet8i& a) {
+ return _mm_cvtsi128_si32(_mm256_castsi256_si128(a));
+}
+template<> EIGEN_STRONG_INLINE Packet8f preverse(const Packet8f& a)
+{
+ __m256 tmp = _mm256_shuffle_ps(a,a,0x1b);
+ return _mm256_permute2f128_ps(tmp, tmp, 1);
+}
+template<> EIGEN_STRONG_INLINE Packet4d preverse(const Packet4d& a)
+{
+ __m256d tmp = _mm256_shuffle_pd(a,a,5);
+ return _mm256_permute2f128_pd(tmp, tmp, 1);
+ __m256d swap_halves = _mm256_permute2f128_pd(a,a,1);
+ return _mm256_permute_pd(swap_halves,5);
+}
+template<> EIGEN_STRONG_INLINE Packet8f pabs(const Packet8f& a)
+{
+ const Packet8f mask = _mm256_castsi256_ps(_mm256_setr_epi32(0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF));
+ return _mm256_and_ps(a,mask);
+}
+template<> EIGEN_STRONG_INLINE Packet4d pabs(const Packet4d& a)
+{
+ const Packet4d mask = _mm256_castsi256_pd(_mm256_setr_epi32(0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF));
+ return _mm256_and_pd(a,mask);
+}
+template<> EIGEN_STRONG_INLINE Packet8f preduxp<Packet8f>(const Packet8f* vecs)
+{
+ __m256 hsum1 = _mm256_hadd_ps(vecs[0], vecs[1]);
+ __m256 hsum2 = _mm256_hadd_ps(vecs[2], vecs[3]);
+ __m256 hsum3 = _mm256_hadd_ps(vecs[4], vecs[5]);
+ __m256 hsum4 = _mm256_hadd_ps(vecs[6], vecs[7]);
+ __m256 hsum5 = _mm256_hadd_ps(hsum1, hsum1);
+ __m256 hsum6 = _mm256_hadd_ps(hsum2, hsum2);
+ __m256 hsum7 = _mm256_hadd_ps(hsum3, hsum3);
+ __m256 hsum8 = _mm256_hadd_ps(hsum4, hsum4);
+ __m256 perm1 = _mm256_permute2f128_ps(hsum5, hsum5, 0x23);
+ __m256 perm2 = _mm256_permute2f128_ps(hsum6, hsum6, 0x23);
+ __m256 perm3 = _mm256_permute2f128_ps(hsum7, hsum7, 0x23);
+ __m256 perm4 = _mm256_permute2f128_ps(hsum8, hsum8, 0x23);
+ __m256 sum1 = _mm256_add_ps(perm1, hsum5);
+ __m256 sum2 = _mm256_add_ps(perm2, hsum6);
+ __m256 sum3 = _mm256_add_ps(perm3, hsum7);
+ __m256 sum4 = _mm256_add_ps(perm4, hsum8);
+ __m256 blend1 = _mm256_blend_ps(sum1, sum2, 0xcc);
+ __m256 blend2 = _mm256_blend_ps(sum3, sum4, 0xcc);
+ __m256 final = _mm256_blend_ps(blend1, blend2, 0xf0);
+ return final;
+}
+template<> EIGEN_STRONG_INLINE Packet4d preduxp<Packet4d>(const Packet4d* vecs)
+{
+ Packet4d tmp0, tmp1;
+ tmp0 = _mm256_hadd_pd(vecs[0], vecs[1]);
+ tmp0 = _mm256_add_pd(tmp0, _mm256_permute2f128_pd(tmp0, tmp0, 1));
+ tmp1 = _mm256_hadd_pd(vecs[2], vecs[3]);
+ tmp1 = _mm256_add_pd(tmp1, _mm256_permute2f128_pd(tmp1, tmp1, 1));
+ return _mm256_blend_pd(tmp0, tmp1, 0xC);
+}
+template<> EIGEN_STRONG_INLINE float predux<Packet8f>(const Packet8f& a)
+{
+ return predux(Packet4f(_mm_add_ps(_mm256_castps256_ps128(a),_mm256_extractf128_ps(a,1))));
+}
+template<> EIGEN_STRONG_INLINE double predux<Packet4d>(const Packet4d& a)
+{
+ return predux(Packet2d(_mm_add_pd(_mm256_castpd256_pd128(a),_mm256_extractf128_pd(a,1))));
+}
+template<> EIGEN_STRONG_INLINE Packet4f predux_downto4<Packet8f>(const Packet8f& a)
+{
+ return _mm_add_ps(_mm256_castps256_ps128(a),_mm256_extractf128_ps(a,1));
+}
+template<> EIGEN_STRONG_INLINE float predux_mul<Packet8f>(const Packet8f& a)
+{
+ Packet8f tmp;
+ tmp = _mm256_mul_ps(a, _mm256_permute2f128_ps(a,a,1));
+ tmp = _mm256_mul_ps(tmp, _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(1,0,3,2)));
+ return pfirst(_mm256_mul_ps(tmp, _mm256_shuffle_ps(tmp,tmp,1)));
+}
+template<> EIGEN_STRONG_INLINE double predux_mul<Packet4d>(const Packet4d& a)
+{
+ Packet4d tmp;
+ tmp = _mm256_mul_pd(a, _mm256_permute2f128_pd(a,a,1));
+ return pfirst(_mm256_mul_pd(tmp, _mm256_shuffle_pd(tmp,tmp,1)));
+}
+template<> EIGEN_STRONG_INLINE float predux_min<Packet8f>(const Packet8f& a)
+{
+ Packet8f tmp = _mm256_min_ps(a, _mm256_permute2f128_ps(a,a,1));
+ tmp = _mm256_min_ps(tmp, _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(1,0,3,2)));
+ return pfirst(_mm256_min_ps(tmp, _mm256_shuffle_ps(tmp,tmp,1)));
+}
+template<> EIGEN_STRONG_INLINE double predux_min<Packet4d>(const Packet4d& a)
+{
+ Packet4d tmp = _mm256_min_pd(a, _mm256_permute2f128_pd(a,a,1));
+ return pfirst(_mm256_min_pd(tmp, _mm256_shuffle_pd(tmp, tmp, 1)));
+}
+template<> EIGEN_STRONG_INLINE float predux_max<Packet8f>(const Packet8f& a)
+{
+ Packet8f tmp = _mm256_max_ps(a, _mm256_permute2f128_ps(a,a,1));
+ tmp = _mm256_max_ps(tmp, _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(1,0,3,2)));
+ return pfirst(_mm256_max_ps(tmp, _mm256_shuffle_ps(tmp,tmp,1)));
+}
+template<> EIGEN_STRONG_INLINE double predux_max<Packet4d>(const Packet4d& a)
+{
+ Packet4d tmp = _mm256_max_pd(a, _mm256_permute2f128_pd(a,a,1));
+ return pfirst(_mm256_max_pd(tmp, _mm256_shuffle_pd(tmp, tmp, 1)));
+}
+template<int Offset>
+struct palign_impl<Offset,Packet8f>
+{
+ static EIGEN_STRONG_INLINE void run(Packet8f& first, const Packet8f& second)
+ {
+ if (Offset==1)
+ {
+ first = _mm256_blend_ps(first, second, 1);
+ Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(0,3,2,1));
+ Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
+ first = _mm256_blend_ps(tmp1, tmp2, 0x88);
+ }
+ else if (Offset==2)
+ {
+ first = _mm256_blend_ps(first, second, 3);
+ Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(1,0,3,2));
+ Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
+ first = _mm256_blend_ps(tmp1, tmp2, 0xcc);
+ }
+ else if (Offset==3)
+ {
+ first = _mm256_blend_ps(first, second, 7);
+ Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(2,1,0,3));
+ Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
+ first = _mm256_blend_ps(tmp1, tmp2, 0xee);
+ }
+ else if (Offset==4)
+ {
+ first = _mm256_blend_ps(first, second, 15);
+ Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(3,2,1,0));
+ Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
+ first = _mm256_permute_ps(tmp2, _MM_SHUFFLE(3,2,1,0));
+ }
+ else if (Offset==5)
+ {
+ first = _mm256_blend_ps(first, second, 31);
+ first = _mm256_permute2f128_ps(first, first, 1);
+ Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(0,3,2,1));
+ first = _mm256_permute2f128_ps(tmp, tmp, 1);
+ first = _mm256_blend_ps(tmp, first, 0x88);
+ }
+ else if (Offset==6)
+ {
+ first = _mm256_blend_ps(first, second, 63);
+ first = _mm256_permute2f128_ps(first, first, 1);
+ Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(1,0,3,2));
+ first = _mm256_permute2f128_ps(tmp, tmp, 1);
+ first = _mm256_blend_ps(tmp, first, 0xcc);
+ }
+ else if (Offset==7)
+ {
+ first = _mm256_blend_ps(first, second, 127);
+ first = _mm256_permute2f128_ps(first, first, 1);
+ Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(2,1,0,3));
+ first = _mm256_permute2f128_ps(tmp, tmp, 1);
+ first = _mm256_blend_ps(tmp, first, 0xee);
+ }
+ }
+};
+template<int Offset>
+struct palign_impl<Offset,Packet4d>
+{
+ static EIGEN_STRONG_INLINE void run(Packet4d& first, const Packet4d& second)
+ {
+ if (Offset==1)
+ {
+ first = _mm256_blend_pd(first, second, 1);
+ __m256d tmp = _mm256_permute_pd(first, 5);
+ first = _mm256_permute2f128_pd(tmp, tmp, 1);
+ first = _mm256_blend_pd(tmp, first, 0xA);
+ }
+ else if (Offset==2)
+ {
+ first = _mm256_blend_pd(first, second, 3);
+ first = _mm256_permute2f128_pd(first, first, 1);
+ }
+ else if (Offset==3)
+ {
+ first = _mm256_blend_pd(first, second, 7);
+ __m256d tmp = _mm256_permute_pd(first, 5);
+ first = _mm256_permute2f128_pd(tmp, tmp, 1);
+ first = _mm256_blend_pd(tmp, first, 5);
+ }
+ }
+};
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet8f,8>& kernel) {
+ __m256 T0 = _mm256_unpacklo_ps(kernel.packet[0], kernel.packet[1]);
+ __m256 T1 = _mm256_unpackhi_ps(kernel.packet[0], kernel.packet[1]);
+ __m256 T2 = _mm256_unpacklo_ps(kernel.packet[2], kernel.packet[3]);
+ __m256 T3 = _mm256_unpackhi_ps(kernel.packet[2], kernel.packet[3]);
+ __m256 T4 = _mm256_unpacklo_ps(kernel.packet[4], kernel.packet[5]);
+ __m256 T5 = _mm256_unpackhi_ps(kernel.packet[4], kernel.packet[5]);
+ __m256 T6 = _mm256_unpacklo_ps(kernel.packet[6], kernel.packet[7]);
+ __m256 T7 = _mm256_unpackhi_ps(kernel.packet[6], kernel.packet[7]);
+ __m256 S0 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(1,0,1,0));
+ __m256 S1 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(3,2,3,2));
+ __m256 S2 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(1,0,1,0));
+ __m256 S3 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(3,2,3,2));
+ __m256 S4 = _mm256_shuffle_ps(T4,T6,_MM_SHUFFLE(1,0,1,0));
+ __m256 S5 = _mm256_shuffle_ps(T4,T6,_MM_SHUFFLE(3,2,3,2));
+ __m256 S6 = _mm256_shuffle_ps(T5,T7,_MM_SHUFFLE(1,0,1,0));
+ __m256 S7 = _mm256_shuffle_ps(T5,T7,_MM_SHUFFLE(3,2,3,2));
+ kernel.packet[0] = _mm256_permute2f128_ps(S0, S4, 0x20);
+ kernel.packet[1] = _mm256_permute2f128_ps(S1, S5, 0x20);
+ kernel.packet[2] = _mm256_permute2f128_ps(S2, S6, 0x20);
+ kernel.packet[3] = _mm256_permute2f128_ps(S3, S7, 0x20);
+ kernel.packet[4] = _mm256_permute2f128_ps(S0, S4, 0x31);
+ kernel.packet[5] = _mm256_permute2f128_ps(S1, S5, 0x31);
+ kernel.packet[6] = _mm256_permute2f128_ps(S2, S6, 0x31);
+ kernel.packet[7] = _mm256_permute2f128_ps(S3, S7, 0x31);
+}
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet8f,4>& kernel) {
+ __m256 T0 = _mm256_unpacklo_ps(kernel.packet[0], kernel.packet[1]);
+ __m256 T1 = _mm256_unpackhi_ps(kernel.packet[0], kernel.packet[1]);
+ __m256 T2 = _mm256_unpacklo_ps(kernel.packet[2], kernel.packet[3]);
+ __m256 T3 = _mm256_unpackhi_ps(kernel.packet[2], kernel.packet[3]);
+ __m256 S0 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(1,0,1,0));
+ __m256 S1 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(3,2,3,2));
+ __m256 S2 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(1,0,1,0));
+ __m256 S3 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(3,2,3,2));
+ kernel.packet[0] = _mm256_permute2f128_ps(S0, S1, 0x20);
+ kernel.packet[1] = _mm256_permute2f128_ps(S2, S3, 0x20);
+ kernel.packet[2] = _mm256_permute2f128_ps(S0, S1, 0x31);
+ kernel.packet[3] = _mm256_permute2f128_ps(S2, S3, 0x31);
+}
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4d,4>& kernel) {
+ __m256d T0 = _mm256_shuffle_pd(kernel.packet[0], kernel.packet[1], 15);
+ __m256d T1 = _mm256_shuffle_pd(kernel.packet[0], kernel.packet[1], 0);
+ __m256d T2 = _mm256_shuffle_pd(kernel.packet[2], kernel.packet[3], 15);
+ __m256d T3 = _mm256_shuffle_pd(kernel.packet[2], kernel.packet[3], 0);
+ kernel.packet[1] = _mm256_permute2f128_pd(T0, T2, 32);
+ kernel.packet[3] = _mm256_permute2f128_pd(T0, T2, 49);
+ kernel.packet[0] = _mm256_permute2f128_pd(T1, T3, 32);
+ kernel.packet[2] = _mm256_permute2f128_pd(T1, T3, 49);
+}
+template<> EIGEN_STRONG_INLINE Packet8f pblend(const Selector<8>& ifPacket, const Packet8f& thenPacket, const Packet8f& elsePacket) {
+ const __m256 zero = _mm256_setzero_ps();
+ const __m256 select = _mm256_set_ps(ifPacket.select[7], ifPacket.select[6], ifPacket.select[5], ifPacket.select[4], ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
+ __m256 false_mask = _mm256_cmp_ps(select, zero, _CMP_EQ_UQ);
+ return _mm256_blendv_ps(thenPacket, elsePacket, false_mask);
+}
+template<> EIGEN_STRONG_INLINE Packet4d pblend(const Selector<4>& ifPacket, const Packet4d& thenPacket, const Packet4d& elsePacket) {
+ const __m256d zero = _mm256_setzero_pd();
+ const __m256d select = _mm256_set_pd(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
+ __m256d false_mask = _mm256_cmp_pd(select, zero, _CMP_EQ_UQ);
+ return _mm256_blendv_pd(thenPacket, elsePacket, false_mask);
+}
+template<> EIGEN_STRONG_INLINE Packet8f pinsertfirst(const Packet8f& a, float b)
+{
+ return _mm256_blend_ps(a,pset1<Packet8f>(b),1);
+}
+template<> EIGEN_STRONG_INLINE Packet4d pinsertfirst(const Packet4d& a, double b)
+{
+ return _mm256_blend_pd(a,pset1<Packet4d>(b),1);
+}
+template<> EIGEN_STRONG_INLINE Packet8f pinsertlast(const Packet8f& a, float b)
+{
+ return _mm256_blend_ps(a,pset1<Packet8f>(b),(1<<7));
+}
+template<> EIGEN_STRONG_INLINE Packet4d pinsertlast(const Packet4d& a, double b)
+{
+ return _mm256_blend_pd(a,pset1<Packet4d>(b),(1<<3));
+}
+}
+}
+#endif
+// end #include "src/Core/arch/AVX/PacketMath.h"
+// #include "src/Core/arch/AVX/MathFunctions.h"
+#ifndef EIGEN_MATH_FUNCTIONS_AVX_H
+#define EIGEN_MATH_FUNCTIONS_AVX_H
+namespace Eigen {
+namespace internal {
+inline Packet8i pshiftleft(Packet8i v, int n)
+{
+#ifdef EIGEN_VECTORIZE_AVX2
+ return _mm256_slli_epi32(v, n);
+#else
+ __m128i lo = _mm_slli_epi32(_mm256_extractf128_si256(v, 0), n);
+ __m128i hi = _mm_slli_epi32(_mm256_extractf128_si256(v, 1), n);
+ return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
+#endif
+}
+inline Packet8f pshiftright(Packet8f v, int n)
+{
+#ifdef EIGEN_VECTORIZE_AVX2
+ return _mm256_cvtepi32_ps(_mm256_srli_epi32(_mm256_castps_si256(v), n));
+#else
+ __m128i lo = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(v), 0), n);
+ __m128i hi = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(v), 1), n);
+ return _mm256_cvtepi32_ps(_mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1));
+#endif
+}
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
+psin<Packet8f>(const Packet8f& _x) {
+ Packet8f x = _x;
+ _EIGEN_DECLARE_CONST_Packet8i(one, 1);
+ _EIGEN_DECLARE_CONST_Packet8f(one, 1.0f);
+ _EIGEN_DECLARE_CONST_Packet8f(two, 2.0f);
+ _EIGEN_DECLARE_CONST_Packet8f(one_over_four, 0.25f);
+ _EIGEN_DECLARE_CONST_Packet8f(one_over_pi, 3.183098861837907e-01f);
+ _EIGEN_DECLARE_CONST_Packet8f(neg_pi_first, -3.140625000000000e+00f);
+ _EIGEN_DECLARE_CONST_Packet8f(neg_pi_second, -9.670257568359375e-04f);
+ _EIGEN_DECLARE_CONST_Packet8f(neg_pi_third, -6.278329571784980e-07f);
+ _EIGEN_DECLARE_CONST_Packet8f(four_over_pi, 1.273239544735163e+00f);
+ Packet8f z = pmul(x, p8f_one_over_pi);
+ Packet8f shift = _mm256_floor_ps(padd(z, p8f_one_over_four));
+ x = pmadd(shift, p8f_neg_pi_first, x);
+ x = pmadd(shift, p8f_neg_pi_second, x);
+ x = pmadd(shift, p8f_neg_pi_third, x);
+ z = pmul(x, p8f_four_over_pi);
+ Packet8i shift_ints = _mm256_cvtps_epi32(shift);
+ Packet8i shift_isodd = _mm256_castps_si256(_mm256_and_ps(_mm256_castsi256_ps(shift_ints), _mm256_castsi256_ps(p8i_one)));
+ Packet8i sign_flip_mask = pshiftleft(shift_isodd, 31);
+ Packet8f ival_mask = _mm256_cmp_ps(z, p8f_one, _CMP_GT_OQ);
+ _EIGEN_DECLARE_CONST_Packet8f(coeff_right_0, 9.999999724233232e-01f);
+ _EIGEN_DECLARE_CONST_Packet8f(coeff_right_2, -3.084242535619928e-01f);
+ _EIGEN_DECLARE_CONST_Packet8f(coeff_right_4, 1.584991525700324e-02f);
+ _EIGEN_DECLARE_CONST_Packet8f(coeff_right_6, -3.188805084631342e-04f);
+ Packet8f z_minus_two = psub(z, p8f_two);
+ Packet8f z_minus_two2 = pmul(z_minus_two, z_minus_two);
+ Packet8f right = pmadd(p8f_coeff_right_6, z_minus_two2, p8f_coeff_right_4);
+ right = pmadd(right, z_minus_two2, p8f_coeff_right_2);
+ right = pmadd(right, z_minus_two2, p8f_coeff_right_0);
+ _EIGEN_DECLARE_CONST_Packet8f(coeff_left_1, 7.853981525427295e-01f);
+ _EIGEN_DECLARE_CONST_Packet8f(coeff_left_3, -8.074536727092352e-02f);
+ _EIGEN_DECLARE_CONST_Packet8f(coeff_left_5, 2.489871967827018e-03f);
+ _EIGEN_DECLARE_CONST_Packet8f(coeff_left_7, -3.587725841214251e-05f);
+ Packet8f z2 = pmul(z, z);
+ Packet8f left = pmadd(p8f_coeff_left_7, z2, p8f_coeff_left_5);
+ left = pmadd(left, z2, p8f_coeff_left_3);
+ left = pmadd(left, z2, p8f_coeff_left_1);
+ left = pmul(left, z);
+ left = _mm256_andnot_ps(ival_mask, left);
+ right = _mm256_and_ps(ival_mask, right);
+ Packet8f res = _mm256_or_ps(left, right);
+ res = _mm256_xor_ps(res, _mm256_castsi256_ps(sign_flip_mask));
+ return res;
+}
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
+plog<Packet8f>(const Packet8f& _x) {
+ Packet8f x = _x;
+ _EIGEN_DECLARE_CONST_Packet8f(1, 1.0f);
+ _EIGEN_DECLARE_CONST_Packet8f(half, 0.5f);
+ _EIGEN_DECLARE_CONST_Packet8f(126f, 126.0f);
+ _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(inv_mant_mask, ~0x7f800000);
+ _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(min_norm_pos, 0x00800000);
+ _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(minus_inf, 0xff800000);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_SQRTHF, 0.707106781186547524f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p0, 7.0376836292E-2f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p1, -1.1514610310E-1f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p2, 1.1676998740E-1f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p3, -1.2420140846E-1f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p4, +1.4249322787E-1f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p5, -1.6668057665E-1f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p6, +2.0000714765E-1f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p7, -2.4999993993E-1f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p8, +3.3333331174E-1f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_log_q1, -2.12194440e-4f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_log_q2, 0.693359375f);
+ Packet8f invalid_mask = _mm256_cmp_ps(x, _mm256_setzero_ps(), _CMP_NGE_UQ);
+ Packet8f iszero_mask = _mm256_cmp_ps(x, _mm256_setzero_ps(), _CMP_EQ_OQ);
+ x = pmax(x, p8f_min_norm_pos);
+ Packet8f emm0 = pshiftright(x,23);
+ Packet8f e = _mm256_sub_ps(emm0, p8f_126f);
+ x = _mm256_and_ps(x, p8f_inv_mant_mask);
+ x = _mm256_or_ps(x, p8f_half);
+ Packet8f mask = _mm256_cmp_ps(x, p8f_cephes_SQRTHF, _CMP_LT_OQ);
+ Packet8f tmp = _mm256_and_ps(x, mask);
+ x = psub(x, p8f_1);
+ e = psub(e, _mm256_and_ps(p8f_1, mask));
+ x = padd(x, tmp);
+ Packet8f x2 = pmul(x, x);
+ Packet8f x3 = pmul(x2, x);
+ Packet8f y, y1, y2;
+ y = pmadd(p8f_cephes_log_p0, x, p8f_cephes_log_p1);
+ y1 = pmadd(p8f_cephes_log_p3, x, p8f_cephes_log_p4);
+ y2 = pmadd(p8f_cephes_log_p6, x, p8f_cephes_log_p7);
+ y = pmadd(y, x, p8f_cephes_log_p2);
+ y1 = pmadd(y1, x, p8f_cephes_log_p5);
+ y2 = pmadd(y2, x, p8f_cephes_log_p8);
+ y = pmadd(y, x3, y1);
+ y = pmadd(y, x3, y2);
+ y = pmul(y, x3);
+ y1 = pmul(e, p8f_cephes_log_q1);
+ tmp = pmul(x2, p8f_half);
+ y = padd(y, y1);
+ x = psub(x, tmp);
+ y2 = pmul(e, p8f_cephes_log_q2);
+ x = padd(x, y);
+ x = padd(x, y2);
+ return _mm256_or_ps(
+ _mm256_andnot_ps(iszero_mask, _mm256_or_ps(x, invalid_mask)),
+ _mm256_and_ps(iszero_mask, p8f_minus_inf));
+}
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
+pexp<Packet8f>(const Packet8f& _x) {
+ _EIGEN_DECLARE_CONST_Packet8f(1, 1.0f);
+ _EIGEN_DECLARE_CONST_Packet8f(half, 0.5f);
+ _EIGEN_DECLARE_CONST_Packet8f(127, 127.0f);
+ _EIGEN_DECLARE_CONST_Packet8f(exp_hi, 88.3762626647950f);
+ _EIGEN_DECLARE_CONST_Packet8f(exp_lo, -88.3762626647949f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_LOG2EF, 1.44269504088896341f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p0, 1.9875691500E-4f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p1, 1.3981999507E-3f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p2, 8.3334519073E-3f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p3, 4.1665795894E-2f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p4, 1.6666665459E-1f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p5, 5.0000001201E-1f);
+ Packet8f x = pmax(pmin(_x, p8f_exp_hi), p8f_exp_lo);
+ Packet8f m = _mm256_floor_ps(pmadd(x, p8f_cephes_LOG2EF, p8f_half));
+#ifdef EIGEN_VECTORIZE_FMA
+ _EIGEN_DECLARE_CONST_Packet8f(nln2, -0.6931471805599453f);
+ Packet8f r = _mm256_fmadd_ps(m, p8f_nln2, x);
+#else
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_exp_C1, 0.693359375f);
+ _EIGEN_DECLARE_CONST_Packet8f(cephes_exp_C2, -2.12194440e-4f);
+ Packet8f r = psub(x, pmul(m, p8f_cephes_exp_C1));
+ r = psub(r, pmul(m, p8f_cephes_exp_C2));
+#endif
+ Packet8f r2 = pmul(r, r);
+ Packet8f y = p8f_cephes_exp_p0;
+ y = pmadd(y, r, p8f_cephes_exp_p1);
+ y = pmadd(y, r, p8f_cephes_exp_p2);
+ y = pmadd(y, r, p8f_cephes_exp_p3);
+ y = pmadd(y, r, p8f_cephes_exp_p4);
+ y = pmadd(y, r, p8f_cephes_exp_p5);
+ y = pmadd(y, r2, r);
+ y = padd(y, p8f_1);
+ Packet8i emm0 = _mm256_cvttps_epi32(padd(m, p8f_127));
+ emm0 = pshiftleft(emm0, 23);
+ return pmax(pmul(y, _mm256_castsi256_ps(emm0)), _x);
+}
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
+ptanh<Packet8f>(const Packet8f& x) {
+ return internal::generic_fast_tanh_float(x);
+}
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d
+pexp<Packet4d>(const Packet4d& _x) {
+ Packet4d x = _x;
+ _EIGEN_DECLARE_CONST_Packet4d(1, 1.0);
+ _EIGEN_DECLARE_CONST_Packet4d(2, 2.0);
+ _EIGEN_DECLARE_CONST_Packet4d(half, 0.5);
+ _EIGEN_DECLARE_CONST_Packet4d(exp_hi, 709.437);
+ _EIGEN_DECLARE_CONST_Packet4d(exp_lo, -709.436139303);
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_LOG2EF, 1.4426950408889634073599);
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_p0, 1.26177193074810590878e-4);
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_p1, 3.02994407707441961300e-2);
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_p2, 9.99999999999999999910e-1);
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q0, 3.00198505138664455042e-6);
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q1, 2.52448340349684104192e-3);
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q2, 2.27265548208155028766e-1);
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q3, 2.00000000000000000009e0);
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_C1, 0.693145751953125);
+ _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_C2, 1.42860682030941723212e-6);
+ _EIGEN_DECLARE_CONST_Packet4i(1023, 1023);
+ Packet4d tmp, fx;
+ x = pmax(pmin(x, p4d_exp_hi), p4d_exp_lo);
+ fx = pmadd(p4d_cephes_LOG2EF, x, p4d_half);
+ fx = _mm256_floor_pd(fx);
+ tmp = pmul(fx, p4d_cephes_exp_C1);
+ Packet4d z = pmul(fx, p4d_cephes_exp_C2);
+ x = psub(x, tmp);
+ x = psub(x, z);
+ Packet4d x2 = pmul(x, x);
+ Packet4d px = p4d_cephes_exp_p0;
+ px = pmadd(px, x2, p4d_cephes_exp_p1);
+ px = pmadd(px, x2, p4d_cephes_exp_p2);
+ px = pmul(px, x);
+ Packet4d qx = p4d_cephes_exp_q0;
+ qx = pmadd(qx, x2, p4d_cephes_exp_q1);
+ qx = pmadd(qx, x2, p4d_cephes_exp_q2);
+ qx = pmadd(qx, x2, p4d_cephes_exp_q3);
+ x = _mm256_div_pd(px, psub(qx, px));
+ x = pmadd(p4d_2, x, p4d_1);
+ __m128i emm0 = _mm256_cvtpd_epi32(fx);
+ emm0 = _mm_add_epi32(emm0, p4i_1023);
+ emm0 = _mm_shuffle_epi32(emm0, _MM_SHUFFLE(3, 1, 2, 0));
+ __m128i lo = _mm_slli_epi64(emm0, 52);
+ __m128i hi = _mm_slli_epi64(_mm_srli_epi64(emm0, 32), 52);
+ __m256i e = _mm256_insertf128_si256(_mm256_setzero_si256(), lo, 0);
+ e = _mm256_insertf128_si256(e, hi, 1);
+ return pmax(pmul(x, _mm256_castsi256_pd(e)), _x);
+}
+#if EIGEN_FAST_MATH
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
+psqrt<Packet8f>(const Packet8f& _x) {
+ Packet8f half = pmul(_x, pset1<Packet8f>(.5f));
+ Packet8f denormal_mask = _mm256_and_ps(
+ _mm256_cmp_ps(_x, pset1<Packet8f>((std::numeric_limits<float>::min)()),
+ _CMP_LT_OQ),
+ _mm256_cmp_ps(_x, _mm256_setzero_ps(), _CMP_GE_OQ));
+ Packet8f x = _mm256_rsqrt_ps(_x);
+ x = pmul(x, psub(pset1<Packet8f>(1.5f), pmul(half, pmul(x,x))));
+ return _mm256_andnot_ps(denormal_mask, pmul(_x,x));
+}
+#else
+template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet8f psqrt<Packet8f>(const Packet8f& x) {
+ return _mm256_sqrt_ps(x);
+}
+#endif
+template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4d psqrt<Packet4d>(const Packet4d& x) {
+ return _mm256_sqrt_pd(x);
+}
+#if EIGEN_FAST_MATH
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet8f prsqrt<Packet8f>(const Packet8f& _x) {
+ _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(inf, 0x7f800000);
+ _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(nan, 0x7fc00000);
+ _EIGEN_DECLARE_CONST_Packet8f(one_point_five, 1.5f);
+ _EIGEN_DECLARE_CONST_Packet8f(minus_half, -0.5f);
+ _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(flt_min, 0x00800000);
+ Packet8f neg_half = pmul(_x, p8f_minus_half);
+ Packet8f le_zero_mask = _mm256_cmp_ps(_x, p8f_flt_min, _CMP_LT_OQ);
+ Packet8f x = _mm256_andnot_ps(le_zero_mask, _mm256_rsqrt_ps(_x));
+ Packet8f neg_mask = _mm256_cmp_ps(_x, _mm256_setzero_ps(), _CMP_LT_OQ);
+ Packet8f zero_mask = _mm256_andnot_ps(neg_mask, le_zero_mask);
+ Packet8f infs_and_nans = _mm256_or_ps(_mm256_and_ps(neg_mask, p8f_nan),
+ _mm256_and_ps(zero_mask, p8f_inf));
+ x = pmul(x, pmadd(neg_half, pmul(x, x), p8f_one_point_five));
+ return _mm256_or_ps(x, infs_and_nans);
+}
+#else
+template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet8f prsqrt<Packet8f>(const Packet8f& x) {
+ _EIGEN_DECLARE_CONST_Packet8f(one, 1.0f);
+ return _mm256_div_ps(p8f_one, _mm256_sqrt_ps(x));
+}
+#endif
+template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4d prsqrt<Packet4d>(const Packet4d& x) {
+ _EIGEN_DECLARE_CONST_Packet4d(one, 1.0);
+ return _mm256_div_pd(p4d_one, _mm256_sqrt_pd(x));
+}
+}
+}
+#endif
+// end #include "src/Core/arch/AVX/MathFunctions.h"
+// #include "src/Core/arch/AVX/Complex.h"
+#ifndef EIGEN_COMPLEX_AVX_H
+#define EIGEN_COMPLEX_AVX_H
+namespace Eigen {
+namespace internal {
+struct Packet4cf
+{
+ EIGEN_STRONG_INLINE Packet4cf() {}
+ EIGEN_STRONG_INLINE explicit Packet4cf(const __m256& a) : v(a) {}
+ __m256 v;
+};
+template<> struct packet_traits<std::complex<float> > : default_packet_traits
+{
+ typedef Packet4cf type;
+ typedef Packet2cf half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size = 4,
+ HasHalfPacket = 1,
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasDiv = 1,
+ HasNegate = 1,
+ HasAbs = 0,
+ HasAbs2 = 0,
+ HasMin = 0,
+ HasMax = 0,
+ HasSetLinear = 0
+ };
+};
+template<> struct unpacket_traits<Packet4cf> { typedef std::complex<float> type; enum {size=4, alignment=Aligned32}; typedef Packet2cf half; };
+template<> EIGEN_STRONG_INLINE Packet4cf padd<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_add_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet4cf psub<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_sub_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet4cf pnegate(const Packet4cf& a)
+{
+ return Packet4cf(pnegate(a.v));
+}
+template<> EIGEN_STRONG_INLINE Packet4cf pconj(const Packet4cf& a)
+{
+ const __m256 mask = _mm256_castsi256_ps(_mm256_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000,0x00000000,0x80000000,0x00000000,0x80000000));
+ return Packet4cf(_mm256_xor_ps(a.v,mask));
+}
+template<> EIGEN_STRONG_INLINE Packet4cf pmul<Packet4cf>(const Packet4cf& a, const Packet4cf& b)
+{
+ __m256 tmp1 = _mm256_mul_ps(_mm256_moveldup_ps(a.v), b.v);
+ __m256 tmp2 = _mm256_mul_ps(_mm256_movehdup_ps(a.v), _mm256_permute_ps(b.v, _MM_SHUFFLE(2,3,0,1)));
+ __m256 result = _mm256_addsub_ps(tmp1, tmp2);
+ return Packet4cf(result);
+}
+template<> EIGEN_STRONG_INLINE Packet4cf pand <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_and_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet4cf por <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_or_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet4cf pxor <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_xor_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet4cf pandnot<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_andnot_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet4cf pload <Packet4cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet4cf(pload<Packet8f>(&numext::real_ref(*from))); }
+template<> EIGEN_STRONG_INLINE Packet4cf ploadu<Packet4cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet4cf(ploadu<Packet8f>(&numext::real_ref(*from))); }
+template<> EIGEN_STRONG_INLINE Packet4cf pset1<Packet4cf>(const std::complex<float>& from)
+{
+ return Packet4cf(_mm256_castpd_ps(_mm256_broadcast_sd((const double*)(const void*)&from)));
+}
+template<> EIGEN_STRONG_INLINE Packet4cf ploaddup<Packet4cf>(const std::complex<float>* from)
+{
+ Packet2cf a = ploaddup<Packet2cf>(from);
+ Packet2cf b = ploaddup<Packet2cf>(from+1);
+ return Packet4cf(_mm256_insertf128_ps(_mm256_castps128_ps256(a.v), b.v, 1));
+}
+template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float>* to, const Packet4cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore(&numext::real_ref(*to), from.v); }
+template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float>* to, const Packet4cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), from.v); }
+template<> EIGEN_DEVICE_FUNC inline Packet4cf pgather<std::complex<float>, Packet4cf>(const std::complex<float>* from, Index stride)
+{
+ return Packet4cf(_mm256_set_ps(std::imag(from[3*stride]), std::real(from[3*stride]),
+ std::imag(from[2*stride]), std::real(from[2*stride]),
+ std::imag(from[1*stride]), std::real(from[1*stride]),
+ std::imag(from[0*stride]), std::real(from[0*stride])));
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet4cf>(std::complex<float>* to, const Packet4cf& from, Index stride)
+{
+ __m128 low = _mm256_extractf128_ps(from.v, 0);
+ to[stride*0] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(low, low, 0)),
+ _mm_cvtss_f32(_mm_shuffle_ps(low, low, 1)));
+ to[stride*1] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(low, low, 2)),
+ _mm_cvtss_f32(_mm_shuffle_ps(low, low, 3)));
+ __m128 high = _mm256_extractf128_ps(from.v, 1);
+ to[stride*2] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(high, high, 0)),
+ _mm_cvtss_f32(_mm_shuffle_ps(high, high, 1)));
+ to[stride*3] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(high, high, 2)),
+ _mm_cvtss_f32(_mm_shuffle_ps(high, high, 3)));
+}
+template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet4cf>(const Packet4cf& a)
+{
+ return pfirst(Packet2cf(_mm256_castps256_ps128(a.v)));
+}
+template<> EIGEN_STRONG_INLINE Packet4cf preverse(const Packet4cf& a) {
+ __m128 low = _mm256_extractf128_ps(a.v, 0);
+ __m128 high = _mm256_extractf128_ps(a.v, 1);
+ __m128d lowd = _mm_castps_pd(low);
+ __m128d highd = _mm_castps_pd(high);
+ low = _mm_castpd_ps(_mm_shuffle_pd(lowd,lowd,0x1));
+ high = _mm_castpd_ps(_mm_shuffle_pd(highd,highd,0x1));
+ __m256 result = _mm256_setzero_ps();
+ result = _mm256_insertf128_ps(result, low, 1);
+ result = _mm256_insertf128_ps(result, high, 0);
+ return Packet4cf(result);
+}
+template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet4cf>(const Packet4cf& a)
+{
+ return predux(padd(Packet2cf(_mm256_extractf128_ps(a.v,0)),
+ Packet2cf(_mm256_extractf128_ps(a.v,1))));
+}
+template<> EIGEN_STRONG_INLINE Packet4cf preduxp<Packet4cf>(const Packet4cf* vecs)
+{
+ Packet8f t0 = _mm256_shuffle_ps(vecs[0].v, vecs[0].v, _MM_SHUFFLE(3, 1, 2 ,0));
+ Packet8f t1 = _mm256_shuffle_ps(vecs[1].v, vecs[1].v, _MM_SHUFFLE(3, 1, 2 ,0));
+ t0 = _mm256_hadd_ps(t0,t1);
+ Packet8f t2 = _mm256_shuffle_ps(vecs[2].v, vecs[2].v, _MM_SHUFFLE(3, 1, 2 ,0));
+ Packet8f t3 = _mm256_shuffle_ps(vecs[3].v, vecs[3].v, _MM_SHUFFLE(3, 1, 2 ,0));
+ t2 = _mm256_hadd_ps(t2,t3);
+ t1 = _mm256_permute2f128_ps(t0,t2, 0 + (2<<4));
+ t3 = _mm256_permute2f128_ps(t0,t2, 1 + (3<<4));
+ return Packet4cf(_mm256_add_ps(t1,t3));
+}
+template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet4cf>(const Packet4cf& a)
+{
+ return predux_mul(pmul(Packet2cf(_mm256_extractf128_ps(a.v, 0)),
+ Packet2cf(_mm256_extractf128_ps(a.v, 1))));
+}
+template<int Offset>
+struct palign_impl<Offset,Packet4cf>
+{
+ static EIGEN_STRONG_INLINE void run(Packet4cf& first, const Packet4cf& second)
+ {
+ if (Offset==0) return;
+ palign_impl<Offset*2,Packet8f>::run(first.v, second.v);
+ }
+};
+template<> struct conj_helper<Packet4cf, Packet4cf, false,true>
+{
+ EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet4cf& y, const Packet4cf& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, const Packet4cf& b) const
+ {
+ return internal::pmul(a, pconj(b));
+ }
+};
+template<> struct conj_helper<Packet4cf, Packet4cf, true,false>
+{
+ EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet4cf& y, const Packet4cf& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, const Packet4cf& b) const
+ {
+ return internal::pmul(pconj(a), b);
+ }
+};
+template<> struct conj_helper<Packet4cf, Packet4cf, true,true>
+{
+ EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet4cf& y, const Packet4cf& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, const Packet4cf& b) const
+ {
+ return pconj(internal::pmul(a, b));
+ }
+};
+template<> struct conj_helper<Packet8f, Packet4cf, false,false>
+{
+ EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet8f& x, const Packet4cf& y, const Packet4cf& c) const
+ { return padd(c, pmul(x,y)); }
+ EIGEN_STRONG_INLINE Packet4cf pmul(const Packet8f& x, const Packet4cf& y) const
+ { return Packet4cf(Eigen::internal::pmul(x, y.v)); }
+};
+template<> struct conj_helper<Packet4cf, Packet8f, false,false>
+{
+ EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet8f& y, const Packet4cf& c) const
+ { return padd(c, pmul(x,y)); }
+ EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& x, const Packet8f& y) const
+ { return Packet4cf(Eigen::internal::pmul(x.v, y)); }
+};
+template<> EIGEN_STRONG_INLINE Packet4cf pdiv<Packet4cf>(const Packet4cf& a, const Packet4cf& b)
+{
+ Packet4cf num = pmul(a, pconj(b));
+ __m256 tmp = _mm256_mul_ps(b.v, b.v);
+ __m256 tmp2 = _mm256_shuffle_ps(tmp,tmp,0xB1);
+ __m256 denom = _mm256_add_ps(tmp, tmp2);
+ return Packet4cf(_mm256_div_ps(num.v, denom));
+}
+template<> EIGEN_STRONG_INLINE Packet4cf pcplxflip<Packet4cf>(const Packet4cf& x)
+{
+ return Packet4cf(_mm256_shuffle_ps(x.v, x.v, _MM_SHUFFLE(2, 3, 0 ,1)));
+}
+struct Packet2cd
+{
+ EIGEN_STRONG_INLINE Packet2cd() {}
+ EIGEN_STRONG_INLINE explicit Packet2cd(const __m256d& a) : v(a) {}
+ __m256d v;
+};
+template<> struct packet_traits<std::complex<double> > : default_packet_traits
+{
+ typedef Packet2cd type;
+ typedef Packet1cd half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 0,
+ size = 2,
+ HasHalfPacket = 1,
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasDiv = 1,
+ HasNegate = 1,
+ HasAbs = 0,
+ HasAbs2 = 0,
+ HasMin = 0,
+ HasMax = 0,
+ HasSetLinear = 0
+ };
+};
+template<> struct unpacket_traits<Packet2cd> { typedef std::complex<double> type; enum {size=2, alignment=Aligned32}; typedef Packet1cd half; };
+template<> EIGEN_STRONG_INLINE Packet2cd padd<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_add_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cd psub<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_sub_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cd pnegate(const Packet2cd& a) { return Packet2cd(pnegate(a.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cd pconj(const Packet2cd& a)
+{
+ const __m256d mask = _mm256_castsi256_pd(_mm256_set_epi32(0x80000000,0x0,0x0,0x0,0x80000000,0x0,0x0,0x0));
+ return Packet2cd(_mm256_xor_pd(a.v,mask));
+}
+template<> EIGEN_STRONG_INLINE Packet2cd pmul<Packet2cd>(const Packet2cd& a, const Packet2cd& b)
+{
+ __m256d tmp1 = _mm256_shuffle_pd(a.v,a.v,0x0);
+ __m256d even = _mm256_mul_pd(tmp1, b.v);
+ __m256d tmp2 = _mm256_shuffle_pd(a.v,a.v,0xF);
+ __m256d tmp3 = _mm256_shuffle_pd(b.v,b.v,0x5);
+ __m256d odd = _mm256_mul_pd(tmp2, tmp3);
+ return Packet2cd(_mm256_addsub_pd(even, odd));
+}
+template<> EIGEN_STRONG_INLINE Packet2cd pand <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_and_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cd por <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_or_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cd pxor <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_xor_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cd pandnot<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_andnot_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cd pload <Packet2cd>(const std::complex<double>* from)
+{ EIGEN_DEBUG_ALIGNED_LOAD return Packet2cd(pload<Packet4d>((const double*)from)); }
+template<> EIGEN_STRONG_INLINE Packet2cd ploadu<Packet2cd>(const std::complex<double>* from)
+{ EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cd(ploadu<Packet4d>((const double*)from)); }
+template<> EIGEN_STRONG_INLINE Packet2cd pset1<Packet2cd>(const std::complex<double>& from)
+{
+ return Packet2cd(_mm256_broadcast_pd((const __m128d*)(const void*)&from));
+}
+template<> EIGEN_STRONG_INLINE Packet2cd ploaddup<Packet2cd>(const std::complex<double>* from) { return pset1<Packet2cd>(*from); }
+template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet2cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
+template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet2cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
+template<> EIGEN_DEVICE_FUNC inline Packet2cd pgather<std::complex<double>, Packet2cd>(const std::complex<double>* from, Index stride)
+{
+ return Packet2cd(_mm256_set_pd(std::imag(from[1*stride]), std::real(from[1*stride]),
+ std::imag(from[0*stride]), std::real(from[0*stride])));
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet2cd>(std::complex<double>* to, const Packet2cd& from, Index stride)
+{
+ __m128d low = _mm256_extractf128_pd(from.v, 0);
+ to[stride*0] = std::complex<double>(_mm_cvtsd_f64(low), _mm_cvtsd_f64(_mm_shuffle_pd(low, low, 1)));
+ __m128d high = _mm256_extractf128_pd(from.v, 1);
+ to[stride*1] = std::complex<double>(_mm_cvtsd_f64(high), _mm_cvtsd_f64(_mm_shuffle_pd(high, high, 1)));
+}
+template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet2cd>(const Packet2cd& a)
+{
+ __m128d low = _mm256_extractf128_pd(a.v, 0);
+ EIGEN_ALIGN16 double res[2];
+ _mm_store_pd(res, low);
+ return std::complex<double>(res[0],res[1]);
+}
+template<> EIGEN_STRONG_INLINE Packet2cd preverse(const Packet2cd& a) {
+ __m256d result = _mm256_permute2f128_pd(a.v, a.v, 1);
+ return Packet2cd(result);
+}
+template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet2cd>(const Packet2cd& a)
+{
+ return predux(padd(Packet1cd(_mm256_extractf128_pd(a.v,0)),
+ Packet1cd(_mm256_extractf128_pd(a.v,1))));
+}
+template<> EIGEN_STRONG_INLINE Packet2cd preduxp<Packet2cd>(const Packet2cd* vecs)
+{
+ Packet4d t0 = _mm256_permute2f128_pd(vecs[0].v,vecs[1].v, 0 + (2<<4));
+ Packet4d t1 = _mm256_permute2f128_pd(vecs[0].v,vecs[1].v, 1 + (3<<4));
+ return Packet2cd(_mm256_add_pd(t0,t1));
+}
+template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet2cd>(const Packet2cd& a)
+{
+ return predux(pmul(Packet1cd(_mm256_extractf128_pd(a.v,0)),
+ Packet1cd(_mm256_extractf128_pd(a.v,1))));
+}
+template<int Offset>
+struct palign_impl<Offset,Packet2cd>
+{
+ static EIGEN_STRONG_INLINE void run(Packet2cd& first, const Packet2cd& second)
+ {
+ if (Offset==0) return;
+ palign_impl<Offset*2,Packet4d>::run(first.v, second.v);
+ }
+};
+template<> struct conj_helper<Packet2cd, Packet2cd, false,true>
+{
+ EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet2cd& y, const Packet2cd& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, const Packet2cd& b) const
+ {
+ return internal::pmul(a, pconj(b));
+ }
+};
+template<> struct conj_helper<Packet2cd, Packet2cd, true,false>
+{
+ EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet2cd& y, const Packet2cd& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, const Packet2cd& b) const
+ {
+ return internal::pmul(pconj(a), b);
+ }
+};
+template<> struct conj_helper<Packet2cd, Packet2cd, true,true>
+{
+ EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet2cd& y, const Packet2cd& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, const Packet2cd& b) const
+ {
+ return pconj(internal::pmul(a, b));
+ }
+};
+template<> struct conj_helper<Packet4d, Packet2cd, false,false>
+{
+ EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet4d& x, const Packet2cd& y, const Packet2cd& c) const
+ { return padd(c, pmul(x,y)); }
+ EIGEN_STRONG_INLINE Packet2cd pmul(const Packet4d& x, const Packet2cd& y) const
+ { return Packet2cd(Eigen::internal::pmul(x, y.v)); }
+};
+template<> struct conj_helper<Packet2cd, Packet4d, false,false>
+{
+ EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet4d& y, const Packet2cd& c) const
+ { return padd(c, pmul(x,y)); }
+ EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& x, const Packet4d& y) const
+ { return Packet2cd(Eigen::internal::pmul(x.v, y)); }
+};
+template<> EIGEN_STRONG_INLINE Packet2cd pdiv<Packet2cd>(const Packet2cd& a, const Packet2cd& b)
+{
+ Packet2cd num = pmul(a, pconj(b));
+ __m256d tmp = _mm256_mul_pd(b.v, b.v);
+ __m256d denom = _mm256_hadd_pd(tmp, tmp);
+ return Packet2cd(_mm256_div_pd(num.v, denom));
+}
+template<> EIGEN_STRONG_INLINE Packet2cd pcplxflip<Packet2cd>(const Packet2cd& x)
+{
+ return Packet2cd(_mm256_shuffle_pd(x.v, x.v, 0x5));
+}
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4cf,4>& kernel) {
+ __m256d P0 = _mm256_castps_pd(kernel.packet[0].v);
+ __m256d P1 = _mm256_castps_pd(kernel.packet[1].v);
+ __m256d P2 = _mm256_castps_pd(kernel.packet[2].v);
+ __m256d P3 = _mm256_castps_pd(kernel.packet[3].v);
+ __m256d T0 = _mm256_shuffle_pd(P0, P1, 15);
+ __m256d T1 = _mm256_shuffle_pd(P0, P1, 0);
+ __m256d T2 = _mm256_shuffle_pd(P2, P3, 15);
+ __m256d T3 = _mm256_shuffle_pd(P2, P3, 0);
+ kernel.packet[1].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T0, T2, 32));
+ kernel.packet[3].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T0, T2, 49));
+ kernel.packet[0].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T1, T3, 32));
+ kernel.packet[2].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T1, T3, 49));
+}
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet2cd,2>& kernel) {
+ __m256d tmp = _mm256_permute2f128_pd(kernel.packet[0].v, kernel.packet[1].v, 0+(2<<4));
+ kernel.packet[1].v = _mm256_permute2f128_pd(kernel.packet[0].v, kernel.packet[1].v, 1+(3<<4));
+ kernel.packet[0].v = tmp;
+}
+template<> EIGEN_STRONG_INLINE Packet4cf pinsertfirst(const Packet4cf& a, std::complex<float> b)
+{
+ return Packet4cf(_mm256_blend_ps(a.v,pset1<Packet4cf>(b).v,1|2));
+}
+template<> EIGEN_STRONG_INLINE Packet2cd pinsertfirst(const Packet2cd& a, std::complex<double> b)
+{
+ return Packet2cd(_mm256_blend_pd(a.v,pset1<Packet2cd>(b).v,1|2));
+}
+template<> EIGEN_STRONG_INLINE Packet4cf pinsertlast(const Packet4cf& a, std::complex<float> b)
+{
+ return Packet4cf(_mm256_blend_ps(a.v,pset1<Packet4cf>(b).v,(1<<7)|(1<<6)));
+}
+template<> EIGEN_STRONG_INLINE Packet2cd pinsertlast(const Packet2cd& a, std::complex<double> b)
+{
+ return Packet2cd(_mm256_blend_pd(a.v,pset1<Packet2cd>(b).v,(1<<3)|(1<<2)));
+}
+}
+}
+#endif
+// end #include "src/Core/arch/AVX/Complex.h"
+// #include "src/Core/arch/AVX/TypeCasting.h"
+#ifndef EIGEN_TYPE_CASTING_AVX_H
+#define EIGEN_TYPE_CASTING_AVX_H
+namespace Eigen {
+namespace internal {
+template <>
+struct type_casting_traits<float, int> {
+ enum {
+ VectorizedCast = 0,
+ SrcCoeffRatio = 1,
+ TgtCoeffRatio = 1
+ };
+};
+template <>
+struct type_casting_traits<int, float> {
+ enum {
+ VectorizedCast = 0,
+ SrcCoeffRatio = 1,
+ TgtCoeffRatio = 1
+ };
+};
+template<> EIGEN_STRONG_INLINE Packet8i pcast<Packet8f, Packet8i>(const Packet8f& a) {
+ return _mm256_cvtps_epi32(a);
+}
+template<> EIGEN_STRONG_INLINE Packet8f pcast<Packet8i, Packet8f>(const Packet8i& a) {
+ return _mm256_cvtepi32_ps(a);
+}
+}
+}
+#endif
+// end #include "src/Core/arch/AVX/TypeCasting.h"
+#elif defined EIGEN_VECTORIZE_SSE
+// #include "src/Core/arch/SSE/PacketMath.h"
+#ifndef EIGEN_PACKET_MATH_SSE_H
+#define EIGEN_PACKET_MATH_SSE_H
+namespace Eigen {
+namespace internal {
+#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
+#endif
+#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
+#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
+#endif
+#ifdef __FMA__
+#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD 1
+#endif
+#endif
+#if (defined EIGEN_VECTORIZE_AVX) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_MINGW) && (__GXX_ABI_VERSION < 1004)
+template<typename T>
+struct eigen_packet_wrapper
+{
+ EIGEN_ALWAYS_INLINE operator T&() { return m_val; }
+ EIGEN_ALWAYS_INLINE operator const T&() const { return m_val; }
+ EIGEN_ALWAYS_INLINE eigen_packet_wrapper() {}
+ EIGEN_ALWAYS_INLINE eigen_packet_wrapper(const T &v) : m_val(v) {}
+ EIGEN_ALWAYS_INLINE eigen_packet_wrapper& operator=(const T &v) {
+ m_val = v;
+ return *this;
+ }
+ T m_val;
+};
+typedef eigen_packet_wrapper<__m128> Packet4f;
+typedef eigen_packet_wrapper<__m128i> Packet4i;
+typedef eigen_packet_wrapper<__m128d> Packet2d;
+#else
+typedef __m128 Packet4f;
+typedef __m128i Packet4i;
+typedef __m128d Packet2d;
+#endif
+template<> struct is_arithmetic<__m128> { enum { value = true }; };
+template<> struct is_arithmetic<__m128i> { enum { value = true }; };
+template<> struct is_arithmetic<__m128d> { enum { value = true }; };
+#define vec4f_swizzle1(v,p,q,r,s) \
+ (_mm_castsi128_ps(_mm_shuffle_epi32( _mm_castps_si128(v), ((s)<<6|(r)<<4|(q)<<2|(p)))))
+#define vec4i_swizzle1(v,p,q,r,s) \
+ (_mm_shuffle_epi32( v, ((s)<<6|(r)<<4|(q)<<2|(p))))
+#define vec2d_swizzle1(v,p,q) \
+ (_mm_castsi128_pd(_mm_shuffle_epi32( _mm_castpd_si128(v), ((q*2+1)<<6|(q*2)<<4|(p*2+1)<<2|(p*2)))))
+#define vec4f_swizzle2(a,b,p,q,r,s) \
+ (_mm_shuffle_ps( (a), (b), ((s)<<6|(r)<<4|(q)<<2|(p))))
+#define vec4i_swizzle2(a,b,p,q,r,s) \
+ (_mm_castps_si128( (_mm_shuffle_ps( _mm_castsi128_ps(a), _mm_castsi128_ps(b), ((s)<<6|(r)<<4|(q)<<2|(p))))))
+#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
+ const Packet4f p4f_##NAME = pset1<Packet4f>(X)
+#define _EIGEN_DECLARE_CONST_Packet2d(NAME,X) \
+ const Packet2d p2d_##NAME = pset1<Packet2d>(X)
+#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
+ const Packet4f p4f_##NAME = _mm_castsi128_ps(pset1<Packet4i>(X))
+#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
+ const Packet4i p4i_##NAME = pset1<Packet4i>(X)
+#ifndef EIGEN_VECTORIZE_AVX
+template<> struct packet_traits<float> : default_packet_traits
+{
+ typedef Packet4f type;
+ typedef Packet4f half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size=4,
+ HasHalfPacket = 0,
+ HasDiv = 1,
+ HasSin = EIGEN_FAST_MATH,
+ HasCos = EIGEN_FAST_MATH,
+ HasLog = 1,
+ HasExp = 1,
+ HasSqrt = 1,
+ HasRsqrt = 1,
+ HasTanh = EIGEN_FAST_MATH,
+ HasBlend = 1
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ ,
+ HasRound = 1,
+ HasFloor = 1,
+ HasCeil = 1
+#endif
+ };
+};
+template<> struct packet_traits<double> : default_packet_traits
+{
+ typedef Packet2d type;
+ typedef Packet2d half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size=2,
+ HasHalfPacket = 0,
+ HasDiv = 1,
+ HasExp = 1,
+ HasSqrt = 1,
+ HasRsqrt = 1,
+ HasBlend = 1
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ ,
+ HasRound = 1,
+ HasFloor = 1,
+ HasCeil = 1
+#endif
+ };
+};
+#endif
+template<> struct packet_traits<int> : default_packet_traits
+{
+ typedef Packet4i type;
+ typedef Packet4i half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size=4,
+ HasBlend = 1
+ };
+};
+template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; };
+template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; };
+template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; };
+#ifndef EIGEN_VECTORIZE_AVX
+template<> struct scalar_div_cost<float,true> { enum { value = 7 }; };
+template<> struct scalar_div_cost<double,true> { enum { value = 8 }; };
+#endif
+#if EIGEN_COMP_MSVC==1500
+template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return _mm_set_ps(from,from,from,from); }
+template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set_pd(from,from); }
+template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set_epi32(from,from,from,from); }
+#else
+template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return _mm_set_ps1(from); }
+template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set1_pd(from); }
+template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set1_epi32(from); }
+#endif
+#if EIGEN_COMP_GNUC_STRICT && (!defined __AVX__)
+template<> EIGEN_STRONG_INLINE Packet4f pload1<Packet4f>(const float *from) {
+ return vec4f_swizzle1(_mm_load_ss(from),0,0,0,0);
+}
+#endif
+template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) { return _mm_add_ps(pset1<Packet4f>(a), _mm_set_ps(3,2,1,0)); }
+template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) { return _mm_add_pd(pset1<Packet2d>(a),_mm_set_pd(1,0)); }
+template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a) { return _mm_add_epi32(pset1<Packet4i>(a),_mm_set_epi32(3,2,1,0)); }
+template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_add_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_add_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_add_epi32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_sub_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_sub_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_sub_epi32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a)
+{
+ const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000));
+ return _mm_xor_ps(a,mask);
+}
+template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a)
+{
+ const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0,0x80000000,0x0,0x80000000));
+ return _mm_xor_pd(a,mask);
+}
+template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a)
+{
+ return psub(Packet4i(_mm_setr_epi32(0,0,0,0)), a);
+}
+template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_mul_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_mul_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_mullo_epi32(a,b);
+#else
+ return vec4i_swizzle1(
+ vec4i_swizzle2(
+ _mm_mul_epu32(a,b),
+ _mm_mul_epu32(vec4i_swizzle1(a,1,0,3,2),
+ vec4i_swizzle1(b,1,0,3,2)),
+ 0,2,0,2),
+ 0,2,1,3);
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_div_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_div_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd(pmul(a,b), c); }
+#ifdef __FMA__
+template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return _mm_fmadd_ps(a,b,c); }
+template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return _mm_fmadd_pd(a,b,c); }
+#endif
+template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_min_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_min_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_min_epi32(a,b);
+#else
+ Packet4i mask = _mm_cmplt_epi32(a,b);
+ return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_max_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_max_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_max_epi32(a,b);
+#else
+ Packet4i mask = _mm_cmpgt_epi32(a,b);
+ return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
+#endif
+}
+#ifdef EIGEN_VECTORIZE_SSE4_1
+template<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a) { return _mm_round_ps(a, 0); }
+template<> EIGEN_STRONG_INLINE Packet2d pround<Packet2d>(const Packet2d& a) { return _mm_round_pd(a, 0); }
+template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a) { return _mm_ceil_ps(a); }
+template<> EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const Packet2d& a) { return _mm_ceil_pd(a); }
+template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a) { return _mm_floor_ps(a); }
+template<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a) { return _mm_floor_pd(a); }
+#endif
+template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_and_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_and_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_and_si128(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_or_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_or_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_or_si128(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_xor_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_xor_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_xor_si128(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_andnot_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_andnot_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_andnot_si128(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_ps(from); }
+template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_pd(from); }
+template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_si128(reinterpret_cast<const __m128i*>(from)); }
+#if EIGEN_COMP_MSVC
+ template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) {
+ EIGEN_DEBUG_UNALIGNED_LOAD
+ #if (EIGEN_COMP_MSVC==1600)
+ __m128 res = _mm_loadl_pi(_mm_set1_ps(0.0f), (const __m64*)(from));
+ res = _mm_loadh_pi(res, (const __m64*)(from+2));
+ return res;
+ #else
+ return _mm_loadu_ps(from);
+ #endif
+ }
+#else
+template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)
+{
+ EIGEN_DEBUG_UNALIGNED_LOAD
+ return _mm_loadu_ps(from);
+}
+#endif
+template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from)
+{
+ EIGEN_DEBUG_UNALIGNED_LOAD
+ return _mm_loadu_pd(from);
+}
+template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
+{
+ EIGEN_DEBUG_UNALIGNED_LOAD
+ return _mm_loadu_si128(reinterpret_cast<const __m128i*>(from));
+}
+template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
+{
+ return vec4f_swizzle1(_mm_castpd_ps(_mm_load_sd(reinterpret_cast<const double*>(from))), 0, 0, 1, 1);
+}
+template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
+{ return pset1<Packet2d>(from[0]); }
+template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
+{
+ Packet4i tmp;
+ tmp = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(from));
+ return vec4i_swizzle1(tmp, 0, 0, 1, 1);
+}
+template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_ps(to, from); }
+template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_pd(to, from); }
+template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_si128(reinterpret_cast<__m128i*>(to), from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_pd(to, from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_ps(to, from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_si128(reinterpret_cast<__m128i*>(to), from); }
+template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride)
+{
+ return _mm_set_ps(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
+}
+template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride)
+{
+ return _mm_set_pd(from[1*stride], from[0*stride]);
+}
+template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, Index stride)
+{
+ return _mm_set_epi32(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
+ }
+template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride)
+{
+ to[stride*0] = _mm_cvtss_f32(from);
+ to[stride*1] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 1));
+ to[stride*2] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 2));
+ to[stride*3] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 3));
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride)
+{
+ to[stride*0] = _mm_cvtsd_f64(from);
+ to[stride*1] = _mm_cvtsd_f64(_mm_shuffle_pd(from, from, 1));
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, Index stride)
+{
+ to[stride*0] = _mm_cvtsi128_si32(from);
+ to[stride*1] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 1));
+ to[stride*2] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 2));
+ to[stride*3] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 3));
+}
+template<> EIGEN_STRONG_INLINE void pstore1<Packet4f>(float* to, const float& a)
+{
+ Packet4f pa = _mm_set_ss(a);
+ pstore(to, Packet4f(vec4f_swizzle1(pa,0,0,0,0)));
+}
+template<> EIGEN_STRONG_INLINE void pstore1<Packet2d>(double* to, const double& a)
+{
+ Packet2d pa = _mm_set_sd(a);
+ pstore(to, Packet2d(vec2d_swizzle1(pa,0,0)));
+}
+#ifndef EIGEN_VECTORIZE_AVX
+template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+#endif
+#if EIGEN_COMP_MSVC_STRICT && EIGEN_OS_WIN64
+template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { return a.m128_f32[0]; }
+template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { return a.m128d_f64[0]; }
+template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int x = _mm_cvtsi128_si32(a); return x; }
+#elif EIGEN_COMP_MSVC_STRICT
+template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float x = _mm_cvtss_f32(a); return x; }
+template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { double x = _mm_cvtsd_f64(a); return x; }
+template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int x = _mm_cvtsi128_si32(a); return x; }
+#else
+template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { return _mm_cvtss_f32(a); }
+template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { return _mm_cvtsd_f64(a); }
+template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { return _mm_cvtsi128_si32(a); }
+#endif
+template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a)
+{ return _mm_shuffle_ps(a,a,0x1B); }
+template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a)
+{ return _mm_shuffle_pd(a,a,0x1); }
+template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a)
+{ return _mm_shuffle_epi32(a,0x1B); }
+template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a)
+{
+ const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF));
+ return _mm_and_ps(a,mask);
+}
+template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a)
+{
+ const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF));
+ return _mm_and_pd(a,mask);
+}
+template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a)
+{
+ #ifdef EIGEN_VECTORIZE_SSSE3
+ return _mm_abs_epi32(a);
+ #else
+ Packet4i aux = _mm_srai_epi32(a,31);
+ return _mm_sub_epi32(_mm_xor_si128(a,aux),aux);
+ #endif
+}
+#ifndef __AVX__
+template<> EIGEN_STRONG_INLINE void
+pbroadcast4<Packet4f>(const float *a,
+ Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
+{
+ a3 = pload<Packet4f>(a);
+ a0 = vec4f_swizzle1(a3, 0,0,0,0);
+ a1 = vec4f_swizzle1(a3, 1,1,1,1);
+ a2 = vec4f_swizzle1(a3, 2,2,2,2);
+ a3 = vec4f_swizzle1(a3, 3,3,3,3);
+}
+template<> EIGEN_STRONG_INLINE void
+pbroadcast4<Packet2d>(const double *a,
+ Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3)
+{
+#ifdef EIGEN_VECTORIZE_SSE3
+ a0 = _mm_loaddup_pd(a+0);
+ a1 = _mm_loaddup_pd(a+1);
+ a2 = _mm_loaddup_pd(a+2);
+ a3 = _mm_loaddup_pd(a+3);
+#else
+ a1 = pload<Packet2d>(a);
+ a0 = vec2d_swizzle1(a1, 0,0);
+ a1 = vec2d_swizzle1(a1, 1,1);
+ a3 = pload<Packet2d>(a+2);
+ a2 = vec2d_swizzle1(a3, 0,0);
+ a3 = vec2d_swizzle1(a3, 1,1);
+#endif
+}
+#endif
+EIGEN_STRONG_INLINE void punpackp(Packet4f* vecs)
+{
+ vecs[1] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x55));
+ vecs[2] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0xAA));
+ vecs[3] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0xFF));
+ vecs[0] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x00));
+}
+#ifdef EIGEN_VECTORIZE_SSE3
+template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
+{
+ return _mm_hadd_ps(_mm_hadd_ps(vecs[0], vecs[1]),_mm_hadd_ps(vecs[2], vecs[3]));
+}
+template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
+{
+ return _mm_hadd_pd(vecs[0], vecs[1]);
+}
+#else
+template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
+{
+ Packet4f tmp0, tmp1, tmp2;
+ tmp0 = _mm_unpacklo_ps(vecs[0], vecs[1]);
+ tmp1 = _mm_unpackhi_ps(vecs[0], vecs[1]);
+ tmp2 = _mm_unpackhi_ps(vecs[2], vecs[3]);
+ tmp0 = _mm_add_ps(tmp0, tmp1);
+ tmp1 = _mm_unpacklo_ps(vecs[2], vecs[3]);
+ tmp1 = _mm_add_ps(tmp1, tmp2);
+ tmp2 = _mm_movehl_ps(tmp1, tmp0);
+ tmp0 = _mm_movelh_ps(tmp0, tmp1);
+ return _mm_add_ps(tmp0, tmp2);
+}
+template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
+{
+ return _mm_add_pd(_mm_unpacklo_pd(vecs[0], vecs[1]), _mm_unpackhi_pd(vecs[0], vecs[1]));
+}
+#endif
+template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
+{
+ Packet4f tmp = _mm_add_ps(a, _mm_movehl_ps(a,a));
+ return pfirst<Packet4f>(_mm_add_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
+}
+template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
+{
+ return pfirst<Packet2d>(_mm_add_sd(a, _mm_unpackhi_pd(a,a)));
+}
+#ifdef EIGEN_VECTORIZE_SSSE3
+template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
+{
+ return _mm_hadd_epi32(_mm_hadd_epi32(vecs[0], vecs[1]),_mm_hadd_epi32(vecs[2], vecs[3]));
+}
+template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
+{
+ Packet4i tmp0 = _mm_hadd_epi32(a,a);
+ return pfirst<Packet4i>(_mm_hadd_epi32(tmp0,tmp0));
+}
+#else
+template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
+{
+ Packet4i tmp = _mm_add_epi32(a, _mm_unpackhi_epi64(a,a));
+ return pfirst(tmp) + pfirst<Packet4i>(_mm_shuffle_epi32(tmp, 1));
+}
+template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
+{
+ Packet4i tmp0, tmp1, tmp2;
+ tmp0 = _mm_unpacklo_epi32(vecs[0], vecs[1]);
+ tmp1 = _mm_unpackhi_epi32(vecs[0], vecs[1]);
+ tmp2 = _mm_unpackhi_epi32(vecs[2], vecs[3]);
+ tmp0 = _mm_add_epi32(tmp0, tmp1);
+ tmp1 = _mm_unpacklo_epi32(vecs[2], vecs[3]);
+ tmp1 = _mm_add_epi32(tmp1, tmp2);
+ tmp2 = _mm_unpacklo_epi64(tmp0, tmp1);
+ tmp0 = _mm_unpackhi_epi64(tmp0, tmp1);
+ return _mm_add_epi32(tmp0, tmp2);
+}
+#endif
+template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
+{
+ Packet4f tmp = _mm_mul_ps(a, _mm_movehl_ps(a,a));
+ return pfirst<Packet4f>(_mm_mul_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
+}
+template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a)
+{
+ return pfirst<Packet2d>(_mm_mul_sd(a, _mm_unpackhi_pd(a,a)));
+}
+template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
+{
+ EIGEN_ALIGN16 int aux[4];
+ pstore(aux, a);
+ return (aux[0] * aux[1]) * (aux[2] * aux[3]);;
+}
+template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
+{
+ Packet4f tmp = _mm_min_ps(a, _mm_movehl_ps(a,a));
+ return pfirst<Packet4f>(_mm_min_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
+}
+template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a)
+{
+ return pfirst<Packet2d>(_mm_min_sd(a, _mm_unpackhi_pd(a,a)));
+}
+template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ Packet4i tmp = _mm_min_epi32(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0,0,3,2)));
+ return pfirst<Packet4i>(_mm_min_epi32(tmp,_mm_shuffle_epi32(tmp, 1)));
+#else
+ EIGEN_ALIGN16 int aux[4];
+ pstore(aux, a);
+ int aux0 = aux[0]<aux[1] ? aux[0] : aux[1];
+ int aux2 = aux[2]<aux[3] ? aux[2] : aux[3];
+ return aux0<aux2 ? aux0 : aux2;
+#endif
+}
+template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
+{
+ Packet4f tmp = _mm_max_ps(a, _mm_movehl_ps(a,a));
+ return pfirst<Packet4f>(_mm_max_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
+}
+template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)
+{
+ return pfirst<Packet2d>(_mm_max_sd(a, _mm_unpackhi_pd(a,a)));
+}
+template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ Packet4i tmp = _mm_max_epi32(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0,0,3,2)));
+ return pfirst<Packet4i>(_mm_max_epi32(tmp,_mm_shuffle_epi32(tmp, 1)));
+#else
+ EIGEN_ALIGN16 int aux[4];
+ pstore(aux, a);
+ int aux0 = aux[0]>aux[1] ? aux[0] : aux[1];
+ int aux2 = aux[2]>aux[3] ? aux[2] : aux[3];
+ return aux0>aux2 ? aux0 : aux2;
+#endif
+}
+#if EIGEN_COMP_GNUC
+#endif
+#ifdef EIGEN_VECTORIZE_SSSE3
+template<int Offset>
+struct palign_impl<Offset,Packet4f>
+{
+ static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
+ {
+ if (Offset!=0)
+ first = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(second), _mm_castps_si128(first), Offset*4));
+ }
+};
+template<int Offset>
+struct palign_impl<Offset,Packet4i>
+{
+ static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
+ {
+ if (Offset!=0)
+ first = _mm_alignr_epi8(second,first, Offset*4);
+ }
+};
+template<int Offset>
+struct palign_impl<Offset,Packet2d>
+{
+ static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
+ {
+ if (Offset==1)
+ first = _mm_castsi128_pd(_mm_alignr_epi8(_mm_castpd_si128(second), _mm_castpd_si128(first), 8));
+ }
+};
+#else
+template<int Offset>
+struct palign_impl<Offset,Packet4f>
+{
+ static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
+ {
+ if (Offset==1)
+ {
+ first = _mm_move_ss(first,second);
+ first = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(first),0x39));
+ }
+ else if (Offset==2)
+ {
+ first = _mm_movehl_ps(first,first);
+ first = _mm_movelh_ps(first,second);
+ }
+ else if (Offset==3)
+ {
+ first = _mm_move_ss(first,second);
+ first = _mm_shuffle_ps(first,second,0x93);
+ }
+ }
+};
+template<int Offset>
+struct palign_impl<Offset,Packet4i>
+{
+ static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
+ {
+ if (Offset==1)
+ {
+ first = _mm_castps_si128(_mm_move_ss(_mm_castsi128_ps(first),_mm_castsi128_ps(second)));
+ first = _mm_shuffle_epi32(first,0x39);
+ }
+ else if (Offset==2)
+ {
+ first = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(first),_mm_castsi128_ps(first)));
+ first = _mm_castps_si128(_mm_movelh_ps(_mm_castsi128_ps(first),_mm_castsi128_ps(second)));
+ }
+ else if (Offset==3)
+ {
+ first = _mm_castps_si128(_mm_move_ss(_mm_castsi128_ps(first),_mm_castsi128_ps(second)));
+ first = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(first),_mm_castsi128_ps(second),0x93));
+ }
+ }
+};
+template<int Offset>
+struct palign_impl<Offset,Packet2d>
+{
+ static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
+ {
+ if (Offset==1)
+ {
+ first = _mm_castps_pd(_mm_movehl_ps(_mm_castpd_ps(first),_mm_castpd_ps(first)));
+ first = _mm_castps_pd(_mm_movelh_ps(_mm_castpd_ps(first),_mm_castpd_ps(second)));
+ }
+ }
+};
+#endif
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4f,4>& kernel) {
+ _MM_TRANSPOSE4_PS(kernel.packet[0], kernel.packet[1], kernel.packet[2], kernel.packet[3]);
+}
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet2d,2>& kernel) {
+ __m128d tmp = _mm_unpackhi_pd(kernel.packet[0], kernel.packet[1]);
+ kernel.packet[0] = _mm_unpacklo_pd(kernel.packet[0], kernel.packet[1]);
+ kernel.packet[1] = tmp;
+}
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4i,4>& kernel) {
+ __m128i T0 = _mm_unpacklo_epi32(kernel.packet[0], kernel.packet[1]);
+ __m128i T1 = _mm_unpacklo_epi32(kernel.packet[2], kernel.packet[3]);
+ __m128i T2 = _mm_unpackhi_epi32(kernel.packet[0], kernel.packet[1]);
+ __m128i T3 = _mm_unpackhi_epi32(kernel.packet[2], kernel.packet[3]);
+ kernel.packet[0] = _mm_unpacklo_epi64(T0, T1);
+ kernel.packet[1] = _mm_unpackhi_epi64(T0, T1);
+ kernel.packet[2] = _mm_unpacklo_epi64(T2, T3);
+ kernel.packet[3] = _mm_unpackhi_epi64(T2, T3);
+}
+template<> EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket) {
+ const __m128i zero = _mm_setzero_si128();
+ const __m128i select = _mm_set_epi32(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
+ __m128i false_mask = _mm_cmpeq_epi32(select, zero);
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blendv_epi8(thenPacket, elsePacket, false_mask);
+#else
+ return _mm_or_si128(_mm_andnot_si128(false_mask, thenPacket), _mm_and_si128(false_mask, elsePacket));
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket) {
+ const __m128 zero = _mm_setzero_ps();
+ const __m128 select = _mm_set_ps(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
+ __m128 false_mask = _mm_cmpeq_ps(select, zero);
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blendv_ps(thenPacket, elsePacket, false_mask);
+#else
+ return _mm_or_ps(_mm_andnot_ps(false_mask, thenPacket), _mm_and_ps(false_mask, elsePacket));
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) {
+ const __m128d zero = _mm_setzero_pd();
+ const __m128d select = _mm_set_pd(ifPacket.select[1], ifPacket.select[0]);
+ __m128d false_mask = _mm_cmpeq_pd(select, zero);
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blendv_pd(thenPacket, elsePacket, false_mask);
+#else
+ return _mm_or_pd(_mm_andnot_pd(false_mask, thenPacket), _mm_and_pd(false_mask, elsePacket));
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet4f pinsertfirst(const Packet4f& a, float b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blend_ps(a,pset1<Packet4f>(b),1);
+#else
+ return _mm_move_ss(a, _mm_load_ss(&b));
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet2d pinsertfirst(const Packet2d& a, double b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blend_pd(a,pset1<Packet2d>(b),1);
+#else
+ return _mm_move_sd(a, _mm_load_sd(&b));
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet4f pinsertlast(const Packet4f& a, float b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blend_ps(a,pset1<Packet4f>(b),(1<<3));
+#else
+ const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x0,0x0,0x0,0xFFFFFFFF));
+ return _mm_or_ps(_mm_andnot_ps(mask, a), _mm_and_ps(mask, pset1<Packet4f>(b)));
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet2d pinsertlast(const Packet2d& a, double b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ return _mm_blend_pd(a,pset1<Packet2d>(b),(1<<1));
+#else
+ const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0,0x0,0xFFFFFFFF,0xFFFFFFFF));
+ return _mm_or_pd(_mm_andnot_pd(mask, a), _mm_and_pd(mask, pset1<Packet2d>(b)));
+#endif
+}
+#ifdef __FMA__
+template<> EIGEN_STRONG_INLINE float pmadd(const float& a, const float& b, const float& c) {
+ return ::fmaf(a,b,c);
+}
+template<> EIGEN_STRONG_INLINE double pmadd(const double& a, const double& b, const double& c) {
+ return ::fma(a,b,c);
+}
+#endif
+}
+}
+#endif
+// end #include "src/Core/arch/SSE/PacketMath.h"
+// #include "src/Core/arch/SSE/MathFunctions.h"
+#ifndef EIGEN_MATH_FUNCTIONS_SSE_H
+#define EIGEN_MATH_FUNCTIONS_SSE_H
+namespace Eigen {
+namespace internal {
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f plog<Packet4f>(const Packet4f& _x)
+{
+ Packet4f x = _x;
+ _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
+ _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
+ _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
+ _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_mant_mask, ~0x7f800000);
+ _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos, 0x00800000);
+ _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_inf, 0xff800000);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_SQRTHF, 0.707106781186547524f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p0, 7.0376836292E-2f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p1, - 1.1514610310E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p2, 1.1676998740E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p3, - 1.2420140846E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p4, + 1.4249322787E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p5, - 1.6668057665E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p6, + 2.0000714765E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p7, - 2.4999993993E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p8, + 3.3333331174E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q1, -2.12194440e-4f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q2, 0.693359375f);
+ Packet4i emm0;
+ Packet4f invalid_mask = _mm_cmpnge_ps(x, _mm_setzero_ps());
+ Packet4f iszero_mask = _mm_cmpeq_ps(x, _mm_setzero_ps());
+ x = pmax(x, p4f_min_norm_pos);
+ emm0 = _mm_srli_epi32(_mm_castps_si128(x), 23);
+ x = _mm_and_ps(x, p4f_inv_mant_mask);
+ x = _mm_or_ps(x, p4f_half);
+ emm0 = _mm_sub_epi32(emm0, p4i_0x7f);
+ Packet4f e = padd(Packet4f(_mm_cvtepi32_ps(emm0)), p4f_1);
+ Packet4f mask = _mm_cmplt_ps(x, p4f_cephes_SQRTHF);
+ Packet4f tmp = pand(x, mask);
+ x = psub(x, p4f_1);
+ e = psub(e, pand(p4f_1, mask));
+ x = padd(x, tmp);
+ Packet4f x2 = pmul(x,x);
+ Packet4f x3 = pmul(x2,x);
+ Packet4f y, y1, y2;
+ y = pmadd(p4f_cephes_log_p0, x, p4f_cephes_log_p1);
+ y1 = pmadd(p4f_cephes_log_p3, x, p4f_cephes_log_p4);
+ y2 = pmadd(p4f_cephes_log_p6, x, p4f_cephes_log_p7);
+ y = pmadd(y , x, p4f_cephes_log_p2);
+ y1 = pmadd(y1, x, p4f_cephes_log_p5);
+ y2 = pmadd(y2, x, p4f_cephes_log_p8);
+ y = pmadd(y, x3, y1);
+ y = pmadd(y, x3, y2);
+ y = pmul(y, x3);
+ y1 = pmul(e, p4f_cephes_log_q1);
+ tmp = pmul(x2, p4f_half);
+ y = padd(y, y1);
+ x = psub(x, tmp);
+ y2 = pmul(e, p4f_cephes_log_q2);
+ x = padd(x, y);
+ x = padd(x, y2);
+ return _mm_or_ps(_mm_andnot_ps(iszero_mask, _mm_or_ps(x, invalid_mask)),
+ _mm_and_ps(iszero_mask, p4f_minus_inf));
+}
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f pexp<Packet4f>(const Packet4f& _x)
+{
+ Packet4f x = _x;
+ _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
+ _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
+ _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
+ _EIGEN_DECLARE_CONST_Packet4f(exp_hi, 88.3762626647950f);
+ _EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500E-4f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507E-3f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073E-3f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894E-2f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1f);
+ Packet4f tmp, fx;
+ Packet4i emm0;
+ x = pmax(pmin(x, p4f_exp_hi), p4f_exp_lo);
+ fx = pmadd(x, p4f_cephes_LOG2EF, p4f_half);
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ fx = _mm_floor_ps(fx);
+#else
+ emm0 = _mm_cvttps_epi32(fx);
+ tmp = _mm_cvtepi32_ps(emm0);
+ Packet4f mask = _mm_cmpgt_ps(tmp, fx);
+ mask = _mm_and_ps(mask, p4f_1);
+ fx = psub(tmp, mask);
+#endif
+ tmp = pmul(fx, p4f_cephes_exp_C1);
+ Packet4f z = pmul(fx, p4f_cephes_exp_C2);
+ x = psub(x, tmp);
+ x = psub(x, z);
+ z = pmul(x,x);
+ Packet4f y = p4f_cephes_exp_p0;
+ y = pmadd(y, x, p4f_cephes_exp_p1);
+ y = pmadd(y, x, p4f_cephes_exp_p2);
+ y = pmadd(y, x, p4f_cephes_exp_p3);
+ y = pmadd(y, x, p4f_cephes_exp_p4);
+ y = pmadd(y, x, p4f_cephes_exp_p5);
+ y = pmadd(y, z, x);
+ y = padd(y, p4f_1);
+ emm0 = _mm_cvttps_epi32(fx);
+ emm0 = _mm_add_epi32(emm0, p4i_0x7f);
+ emm0 = _mm_slli_epi32(emm0, 23);
+ return pmax(pmul(y, Packet4f(_mm_castsi128_ps(emm0))), _x);
+}
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet2d pexp<Packet2d>(const Packet2d& _x)
+{
+ Packet2d x = _x;
+ _EIGEN_DECLARE_CONST_Packet2d(1 , 1.0);
+ _EIGEN_DECLARE_CONST_Packet2d(2 , 2.0);
+ _EIGEN_DECLARE_CONST_Packet2d(half, 0.5);
+ _EIGEN_DECLARE_CONST_Packet2d(exp_hi, 709.437);
+ _EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
+ _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
+ static const __m128i p4i_1023_0 = _mm_setr_epi32(1023, 1023, 0, 0);
+ Packet2d tmp, fx;
+ Packet4i emm0;
+ x = pmax(pmin(x, p2d_exp_hi), p2d_exp_lo);
+ fx = pmadd(p2d_cephes_LOG2EF, x, p2d_half);
+#ifdef EIGEN_VECTORIZE_SSE4_1
+ fx = _mm_floor_pd(fx);
+#else
+ emm0 = _mm_cvttpd_epi32(fx);
+ tmp = _mm_cvtepi32_pd(emm0);
+ Packet2d mask = _mm_cmpgt_pd(tmp, fx);
+ mask = _mm_and_pd(mask, p2d_1);
+ fx = psub(tmp, mask);
+#endif
+ tmp = pmul(fx, p2d_cephes_exp_C1);
+ Packet2d z = pmul(fx, p2d_cephes_exp_C2);
+ x = psub(x, tmp);
+ x = psub(x, z);
+ Packet2d x2 = pmul(x,x);
+ Packet2d px = p2d_cephes_exp_p0;
+ px = pmadd(px, x2, p2d_cephes_exp_p1);
+ px = pmadd(px, x2, p2d_cephes_exp_p2);
+ px = pmul (px, x);
+ Packet2d qx = p2d_cephes_exp_q0;
+ qx = pmadd(qx, x2, p2d_cephes_exp_q1);
+ qx = pmadd(qx, x2, p2d_cephes_exp_q2);
+ qx = pmadd(qx, x2, p2d_cephes_exp_q3);
+ x = pdiv(px,psub(qx,px));
+ x = pmadd(p2d_2,x,p2d_1);
+ emm0 = _mm_cvttpd_epi32(fx);
+ emm0 = _mm_add_epi32(emm0, p4i_1023_0);
+ emm0 = _mm_slli_epi32(emm0, 20);
+ emm0 = _mm_shuffle_epi32(emm0, _MM_SHUFFLE(1,2,0,3));
+ return pmax(pmul(x, Packet2d(_mm_castsi128_pd(emm0))), _x);
+}
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f psin<Packet4f>(const Packet4f& _x)
+{
+ Packet4f x = _x;
+ _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
+ _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
+ _EIGEN_DECLARE_CONST_Packet4i(1, 1);
+ _EIGEN_DECLARE_CONST_Packet4i(not1, ~1);
+ _EIGEN_DECLARE_CONST_Packet4i(2, 2);
+ _EIGEN_DECLARE_CONST_Packet4i(4, 4);
+ _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(sign_mask, 0x80000000);
+ _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP1,-0.78515625f);
+ _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP2, -2.4187564849853515625e-4f);
+ _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP3, -3.77489497744594108e-8f);
+ _EIGEN_DECLARE_CONST_Packet4f(sincof_p0, -1.9515295891E-4f);
+ _EIGEN_DECLARE_CONST_Packet4f(sincof_p1, 8.3321608736E-3f);
+ _EIGEN_DECLARE_CONST_Packet4f(sincof_p2, -1.6666654611E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(coscof_p0, 2.443315711809948E-005f);
+ _EIGEN_DECLARE_CONST_Packet4f(coscof_p1, -1.388731625493765E-003f);
+ _EIGEN_DECLARE_CONST_Packet4f(coscof_p2, 4.166664568298827E-002f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516f);
+ Packet4f xmm1, xmm2, xmm3, sign_bit, y;
+ Packet4i emm0, emm2;
+ sign_bit = x;
+ x = pabs(x);
+ sign_bit = _mm_and_ps(sign_bit, p4f_sign_mask);
+ y = pmul(x, p4f_cephes_FOPI);
+ emm2 = _mm_cvttps_epi32(y);
+ emm2 = _mm_add_epi32(emm2, p4i_1);
+ emm2 = _mm_and_si128(emm2, p4i_not1);
+ y = _mm_cvtepi32_ps(emm2);
+ emm0 = _mm_and_si128(emm2, p4i_4);
+ emm0 = _mm_slli_epi32(emm0, 29);
+ emm2 = _mm_and_si128(emm2, p4i_2);
+ emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
+ Packet4f swap_sign_bit = _mm_castsi128_ps(emm0);
+ Packet4f poly_mask = _mm_castsi128_ps(emm2);
+ sign_bit = _mm_xor_ps(sign_bit, swap_sign_bit);
+ xmm1 = pmul(y, p4f_minus_cephes_DP1);
+ xmm2 = pmul(y, p4f_minus_cephes_DP2);
+ xmm3 = pmul(y, p4f_minus_cephes_DP3);
+ x = padd(x, xmm1);
+ x = padd(x, xmm2);
+ x = padd(x, xmm3);
+ y = p4f_coscof_p0;
+ Packet4f z = _mm_mul_ps(x,x);
+ y = pmadd(y, z, p4f_coscof_p1);
+ y = pmadd(y, z, p4f_coscof_p2);
+ y = pmul(y, z);
+ y = pmul(y, z);
+ Packet4f tmp = pmul(z, p4f_half);
+ y = psub(y, tmp);
+ y = padd(y, p4f_1);
+ Packet4f y2 = p4f_sincof_p0;
+ y2 = pmadd(y2, z, p4f_sincof_p1);
+ y2 = pmadd(y2, z, p4f_sincof_p2);
+ y2 = pmul(y2, z);
+ y2 = pmul(y2, x);
+ y2 = padd(y2, x);
+ y2 = _mm_and_ps(poly_mask, y2);
+ y = _mm_andnot_ps(poly_mask, y);
+ y = _mm_or_ps(y,y2);
+ return _mm_xor_ps(y, sign_bit);
+}
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f pcos<Packet4f>(const Packet4f& _x)
+{
+ Packet4f x = _x;
+ _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
+ _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
+ _EIGEN_DECLARE_CONST_Packet4i(1, 1);
+ _EIGEN_DECLARE_CONST_Packet4i(not1, ~1);
+ _EIGEN_DECLARE_CONST_Packet4i(2, 2);
+ _EIGEN_DECLARE_CONST_Packet4i(4, 4);
+ _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP1,-0.78515625f);
+ _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP2, -2.4187564849853515625e-4f);
+ _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP3, -3.77489497744594108e-8f);
+ _EIGEN_DECLARE_CONST_Packet4f(sincof_p0, -1.9515295891E-4f);
+ _EIGEN_DECLARE_CONST_Packet4f(sincof_p1, 8.3321608736E-3f);
+ _EIGEN_DECLARE_CONST_Packet4f(sincof_p2, -1.6666654611E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(coscof_p0, 2.443315711809948E-005f);
+ _EIGEN_DECLARE_CONST_Packet4f(coscof_p1, -1.388731625493765E-003f);
+ _EIGEN_DECLARE_CONST_Packet4f(coscof_p2, 4.166664568298827E-002f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516f);
+ Packet4f xmm1, xmm2, xmm3, y;
+ Packet4i emm0, emm2;
+ x = pabs(x);
+ y = pmul(x, p4f_cephes_FOPI);
+ emm2 = _mm_cvttps_epi32(y);
+ emm2 = _mm_add_epi32(emm2, p4i_1);
+ emm2 = _mm_and_si128(emm2, p4i_not1);
+ y = _mm_cvtepi32_ps(emm2);
+ emm2 = _mm_sub_epi32(emm2, p4i_2);
+ emm0 = _mm_andnot_si128(emm2, p4i_4);
+ emm0 = _mm_slli_epi32(emm0, 29);
+ emm2 = _mm_and_si128(emm2, p4i_2);
+ emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
+ Packet4f sign_bit = _mm_castsi128_ps(emm0);
+ Packet4f poly_mask = _mm_castsi128_ps(emm2);
+ xmm1 = pmul(y, p4f_minus_cephes_DP1);
+ xmm2 = pmul(y, p4f_minus_cephes_DP2);
+ xmm3 = pmul(y, p4f_minus_cephes_DP3);
+ x = padd(x, xmm1);
+ x = padd(x, xmm2);
+ x = padd(x, xmm3);
+ y = p4f_coscof_p0;
+ Packet4f z = pmul(x,x);
+ y = pmadd(y,z,p4f_coscof_p1);
+ y = pmadd(y,z,p4f_coscof_p2);
+ y = pmul(y, z);
+ y = pmul(y, z);
+ Packet4f tmp = _mm_mul_ps(z, p4f_half);
+ y = psub(y, tmp);
+ y = padd(y, p4f_1);
+ Packet4f y2 = p4f_sincof_p0;
+ y2 = pmadd(y2, z, p4f_sincof_p1);
+ y2 = pmadd(y2, z, p4f_sincof_p2);
+ y2 = pmul(y2, z);
+ y2 = pmadd(y2, x, x);
+ y2 = _mm_and_ps(poly_mask, y2);
+ y = _mm_andnot_ps(poly_mask, y);
+ y = _mm_or_ps(y,y2);
+ return _mm_xor_ps(y, sign_bit);
+}
+#if EIGEN_FAST_MATH
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f psqrt<Packet4f>(const Packet4f& _x)
+{
+ Packet4f half = pmul(_x, pset1<Packet4f>(.5f));
+ Packet4f denormal_mask = _mm_and_ps(
+ _mm_cmpge_ps(_x, _mm_setzero_ps()),
+ _mm_cmplt_ps(_x, pset1<Packet4f>((std::numeric_limits<float>::min)())));
+ Packet4f x = _mm_rsqrt_ps(_x);
+ x = pmul(x, psub(pset1<Packet4f>(1.5f), pmul(half, pmul(x,x))));
+ return _mm_andnot_ps(denormal_mask, pmul(_x,x));
+}
+#else
+template<>EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f psqrt<Packet4f>(const Packet4f& x) { return _mm_sqrt_ps(x); }
+#endif
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet2d psqrt<Packet2d>(const Packet2d& x) { return _mm_sqrt_pd(x); }
+#if EIGEN_FAST_MATH
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f prsqrt<Packet4f>(const Packet4f& _x) {
+ _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inf, 0x7f800000);
+ _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(nan, 0x7fc00000);
+ _EIGEN_DECLARE_CONST_Packet4f(one_point_five, 1.5f);
+ _EIGEN_DECLARE_CONST_Packet4f(minus_half, -0.5f);
+ _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(flt_min, 0x00800000);
+ Packet4f neg_half = pmul(_x, p4f_minus_half);
+ Packet4f le_zero_mask = _mm_cmple_ps(_x, p4f_flt_min);
+ Packet4f x = _mm_andnot_ps(le_zero_mask, _mm_rsqrt_ps(_x));
+ Packet4f neg_mask = _mm_cmplt_ps(_x, _mm_setzero_ps());
+ Packet4f zero_mask = _mm_andnot_ps(neg_mask, le_zero_mask);
+ Packet4f infs_and_nans = _mm_or_ps(_mm_and_ps(neg_mask, p4f_nan),
+ _mm_and_ps(zero_mask, p4f_inf));
+ x = pmul(x, pmadd(neg_half, pmul(x, x), p4f_one_point_five));
+ return _mm_or_ps(x, infs_and_nans);
+}
+#else
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f prsqrt<Packet4f>(const Packet4f& x) {
+ return _mm_div_ps(pset1<Packet4f>(1.0f), _mm_sqrt_ps(x));
+}
+#endif
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet2d prsqrt<Packet2d>(const Packet2d& x) {
+ return _mm_div_pd(pset1<Packet2d>(1.0), _mm_sqrt_pd(x));
+}
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
+ptanh<Packet4f>(const Packet4f& x) {
+ return internal::generic_fast_tanh_float(x);
+}
+}
+namespace numext {
+template<>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float sqrt(const float &x)
+{
+ return internal::pfirst(internal::Packet4f(_mm_sqrt_ss(_mm_set_ss(x))));
+}
+template<>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double sqrt(const double &x)
+{
+#if EIGEN_COMP_GNUC_STRICT
+ return internal::pfirst(internal::Packet2d(__builtin_ia32_sqrtsd(_mm_set_sd(x))));
+#else
+ return internal::pfirst(internal::Packet2d(_mm_sqrt_pd(_mm_set_sd(x))));
+#endif
+}
+}
+}
+#endif
+// end #include "src/Core/arch/SSE/MathFunctions.h"
+// #include "src/Core/arch/SSE/Complex.h"
+#ifndef EIGEN_COMPLEX_SSE_H
+#define EIGEN_COMPLEX_SSE_H
+namespace Eigen {
+namespace internal {
+struct Packet2cf
+{
+ EIGEN_STRONG_INLINE Packet2cf() {}
+ EIGEN_STRONG_INLINE explicit Packet2cf(const __m128& a) : v(a) {}
+ __m128 v;
+};
+#ifndef EIGEN_VECTORIZE_AVX
+template<> struct packet_traits<std::complex<float> > : default_packet_traits
+{
+ typedef Packet2cf type;
+ typedef Packet2cf half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size = 2,
+ HasHalfPacket = 0,
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasDiv = 1,
+ HasNegate = 1,
+ HasAbs = 0,
+ HasAbs2 = 0,
+ HasMin = 0,
+ HasMax = 0,
+ HasSetLinear = 0,
+ HasBlend = 1
+ };
+};
+#endif
+template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; };
+template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_add_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_sub_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a)
+{
+ const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000));
+ return Packet2cf(_mm_xor_ps(a.v,mask));
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
+{
+ const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
+ return Packet2cf(_mm_xor_ps(a.v,mask));
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+ #ifdef EIGEN_VECTORIZE_SSE3
+ return Packet2cf(_mm_addsub_ps(_mm_mul_ps(_mm_moveldup_ps(a.v), b.v),
+ _mm_mul_ps(_mm_movehdup_ps(a.v),
+ vec4f_swizzle1(b.v, 1, 0, 3, 2))));
+ #else
+ const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x00000000,0x80000000,0x00000000));
+ return Packet2cf(_mm_add_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
+ _mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
+ vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask)));
+ #endif
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_and_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_or_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_xor_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_andnot_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pload <Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>(&numext::real_ref(*from))); }
+template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>(&numext::real_ref(*from))); }
+template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
+{
+ Packet2cf res;
+#if EIGEN_GNUC_AT_MOST(4,2)
+ res.v = _mm_loadl_pi(_mm_set1_ps(0.0f), reinterpret_cast<const __m64*>(&from));
+#elif EIGEN_GNUC_AT_LEAST(4,6)
+ #pragma GCC diagnostic push
+ #pragma GCC diagnostic ignored "-Wuninitialized"
+ res.v = _mm_loadl_pi(res.v, (const __m64*)&from);
+ #pragma GCC diagnostic pop
+#else
+ res.v = _mm_loadl_pi(res.v, (const __m64*)&from);
+#endif
+ return Packet2cf(_mm_movelh_ps(res.v,res.v));
+}
+template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
+template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore(&numext::real_ref(*to), Packet4f(from.v)); }
+template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), Packet4f(from.v)); }
+template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
+{
+ return Packet2cf(_mm_set_ps(std::imag(from[1*stride]), std::real(from[1*stride]),
+ std::imag(from[0*stride]), std::real(from[0*stride])));
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride)
+{
+ to[stride*0] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 0)),
+ _mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 1)));
+ to[stride*1] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 2)),
+ _mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 3)));
+}
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
+{
+ #if EIGEN_GNUC_AT_MOST(4,3)
+ EIGEN_ALIGN16 std::complex<float> res[2];
+ _mm_store_ps((float*)res, a.v);
+ return res[0];
+ #else
+ std::complex<float> res;
+ _mm_storel_pi((__m64*)&res, a.v);
+ return res;
+ #endif
+}
+template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a) { return Packet2cf(_mm_castpd_ps(preverse(Packet2d(_mm_castps_pd(a.v))))); }
+template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
+{
+ return pfirst(Packet2cf(_mm_add_ps(a.v, _mm_movehl_ps(a.v,a.v))));
+}
+template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
+{
+ return Packet2cf(_mm_add_ps(_mm_movelh_ps(vecs[0].v,vecs[1].v), _mm_movehl_ps(vecs[1].v,vecs[0].v)));
+}
+template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
+{
+ return pfirst(pmul(a, Packet2cf(_mm_movehl_ps(a.v,a.v))));
+}
+template<int Offset>
+struct palign_impl<Offset,Packet2cf>
+{
+ static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
+ {
+ if (Offset==1)
+ {
+ first.v = _mm_movehl_ps(first.v, first.v);
+ first.v = _mm_movelh_ps(first.v, second.v);
+ }
+ }
+};
+template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+ {
+ #ifdef EIGEN_VECTORIZE_SSE3
+ return internal::pmul(a, pconj(b));
+ #else
+ const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
+ return Packet2cf(_mm_add_ps(_mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), mask),
+ _mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
+ vec4f_swizzle1(b.v, 1, 0, 3, 2))));
+ #endif
+ }
+};
+template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+ {
+ #ifdef EIGEN_VECTORIZE_SSE3
+ return internal::pmul(pconj(a), b);
+ #else
+ const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
+ return Packet2cf(_mm_add_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
+ _mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
+ vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask)));
+ #endif
+ }
+};
+template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+ {
+ #ifdef EIGEN_VECTORIZE_SSE3
+ return pconj(internal::pmul(a, b));
+ #else
+ const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
+ return Packet2cf(_mm_sub_ps(_mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), mask),
+ _mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
+ vec4f_swizzle1(b.v, 1, 0, 3, 2))));
+ #endif
+ }
+};
+template<> struct conj_helper<Packet4f, Packet2cf, false,false>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet4f& x, const Packet2cf& y, const Packet2cf& c) const
+ { return padd(c, pmul(x,y)); }
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet4f& x, const Packet2cf& y) const
+ { return Packet2cf(Eigen::internal::pmul<Packet4f>(x, y.v)); }
+};
+template<> struct conj_helper<Packet2cf, Packet4f, false,false>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet4f& y, const Packet2cf& c) const
+ { return padd(c, pmul(x,y)); }
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& x, const Packet4f& y) const
+ { return Packet2cf(Eigen::internal::pmul<Packet4f>(x.v, y)); }
+};
+template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+ Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
+ __m128 s = _mm_mul_ps(b.v,b.v);
+ return Packet2cf(_mm_div_ps(res.v,_mm_add_ps(s,_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(s), 0xb1)))));
+}
+EIGEN_STRONG_INLINE Packet2cf pcplxflip(const Packet2cf& x)
+{
+ return Packet2cf(vec4f_swizzle1(x.v, 1, 0, 3, 2));
+}
+struct Packet1cd
+{
+ EIGEN_STRONG_INLINE Packet1cd() {}
+ EIGEN_STRONG_INLINE explicit Packet1cd(const __m128d& a) : v(a) {}
+ __m128d v;
+};
+#ifndef EIGEN_VECTORIZE_AVX
+template<> struct packet_traits<std::complex<double> > : default_packet_traits
+{
+ typedef Packet1cd type;
+ typedef Packet1cd half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 0,
+ size = 1,
+ HasHalfPacket = 0,
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasDiv = 1,
+ HasNegate = 1,
+ HasAbs = 0,
+ HasAbs2 = 0,
+ HasMin = 0,
+ HasMax = 0,
+ HasSetLinear = 0
+ };
+};
+#endif
+template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
+template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_add_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_sub_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); }
+template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a)
+{
+ const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
+ return Packet1cd(_mm_xor_pd(a.v,mask));
+}
+template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+ #ifdef EIGEN_VECTORIZE_SSE3
+ return Packet1cd(_mm_addsub_pd(_mm_mul_pd(_mm_movedup_pd(a.v), b.v),
+ _mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
+ vec2d_swizzle1(b.v, 1, 0))));
+ #else
+ const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0));
+ return Packet1cd(_mm_add_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v),
+ _mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
+ vec2d_swizzle1(b.v, 1, 0)), mask)));
+ #endif
+}
+template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_and_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_or_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_xor_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_andnot_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from)
+{ EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
+template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from)
+{ EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
+{ return ploadu<Packet1cd>(&from); }
+template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); }
+template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, Packet2d(from.v)); }
+template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, Packet2d(from.v)); }
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
+{
+ EIGEN_ALIGN16 double res[2];
+ _mm_store_pd(res, a.v);
+ return std::complex<double>(res[0],res[1]);
+}
+template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
+template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a)
+{
+ return pfirst(a);
+}
+template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs)
+{
+ return vecs[0];
+}
+template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a)
+{
+ return pfirst(a);
+}
+template<int Offset>
+struct palign_impl<Offset,Packet1cd>
+{
+ static EIGEN_STRONG_INLINE void run(Packet1cd& , const Packet1cd& )
+ {
+ }
+};
+template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+ {
+ #ifdef EIGEN_VECTORIZE_SSE3
+ return internal::pmul(a, pconj(b));
+ #else
+ const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
+ return Packet1cd(_mm_add_pd(_mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v), mask),
+ _mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
+ vec2d_swizzle1(b.v, 1, 0))));
+ #endif
+ }
+};
+template<> struct conj_helper<Packet1cd, Packet1cd, true,false>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+ {
+ #ifdef EIGEN_VECTORIZE_SSE3
+ return internal::pmul(pconj(a), b);
+ #else
+ const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
+ return Packet1cd(_mm_add_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v),
+ _mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
+ vec2d_swizzle1(b.v, 1, 0)), mask)));
+ #endif
+ }
+};
+template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+ {
+ #ifdef EIGEN_VECTORIZE_SSE3
+ return pconj(internal::pmul(a, b));
+ #else
+ const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
+ return Packet1cd(_mm_sub_pd(_mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v), mask),
+ _mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
+ vec2d_swizzle1(b.v, 1, 0))));
+ #endif
+ }
+};
+template<> struct conj_helper<Packet2d, Packet1cd, false,false>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet2d& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(c, pmul(x,y)); }
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet2d& x, const Packet1cd& y) const
+ { return Packet1cd(Eigen::internal::pmul<Packet2d>(x, y.v)); }
+};
+template<> struct conj_helper<Packet1cd, Packet2d, false,false>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet2d& y, const Packet1cd& c) const
+ { return padd(c, pmul(x,y)); }
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& x, const Packet2d& y) const
+ { return Packet1cd(Eigen::internal::pmul<Packet2d>(x.v, y)); }
+};
+template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+ Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
+ __m128d s = _mm_mul_pd(b.v,b.v);
+ return Packet1cd(_mm_div_pd(res.v, _mm_add_pd(s,_mm_shuffle_pd(s, s, 0x1))));
+}
+EIGEN_STRONG_INLINE Packet1cd pcplxflip(const Packet1cd& x)
+{
+ return Packet1cd(preverse(Packet2d(x.v)));
+}
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet2cf,2>& kernel) {
+ __m128d w1 = _mm_castps_pd(kernel.packet[0].v);
+ __m128d w2 = _mm_castps_pd(kernel.packet[1].v);
+ __m128 tmp = _mm_castpd_ps(_mm_unpackhi_pd(w1, w2));
+ kernel.packet[0].v = _mm_castpd_ps(_mm_unpacklo_pd(w1, w2));
+ kernel.packet[1].v = tmp;
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) {
+ __m128d result = pblend<Packet2d>(ifPacket, _mm_castps_pd(thenPacket.v), _mm_castps_pd(elsePacket.v));
+ return Packet2cf(_mm_castpd_ps(result));
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pinsertfirst(const Packet2cf& a, std::complex<float> b)
+{
+ return Packet2cf(_mm_loadl_pi(a.v, reinterpret_cast<const __m64*>(&b)));
+}
+template<> EIGEN_STRONG_INLINE Packet1cd pinsertfirst(const Packet1cd&, std::complex<double> b)
+{
+ return pset1<Packet1cd>(b);
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pinsertlast(const Packet2cf& a, std::complex<float> b)
+{
+ return Packet2cf(_mm_loadh_pi(a.v, reinterpret_cast<const __m64*>(&b)));
+}
+template<> EIGEN_STRONG_INLINE Packet1cd pinsertlast(const Packet1cd&, std::complex<double> b)
+{
+ return pset1<Packet1cd>(b);
+}
+}
+}
+#endif
+// end #include "src/Core/arch/SSE/Complex.h"
+// #include "src/Core/arch/SSE/TypeCasting.h"
+#ifndef EIGEN_TYPE_CASTING_SSE_H
+#define EIGEN_TYPE_CASTING_SSE_H
+namespace Eigen {
+namespace internal {
+template <>
+struct type_casting_traits<float, int> {
+ enum {
+ VectorizedCast = 1,
+ SrcCoeffRatio = 1,
+ TgtCoeffRatio = 1
+ };
+};
+template<> EIGEN_STRONG_INLINE Packet4i pcast<Packet4f, Packet4i>(const Packet4f& a) {
+ return _mm_cvttps_epi32(a);
+}
+template <>
+struct type_casting_traits<int, float> {
+ enum {
+ VectorizedCast = 1,
+ SrcCoeffRatio = 1,
+ TgtCoeffRatio = 1
+ };
+};
+template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(const Packet4i& a) {
+ return _mm_cvtepi32_ps(a);
+}
+template <>
+struct type_casting_traits<double, float> {
+ enum {
+ VectorizedCast = 1,
+ SrcCoeffRatio = 2,
+ TgtCoeffRatio = 1
+ };
+};
+template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet2d, Packet4f>(const Packet2d& a, const Packet2d& b) {
+ return _mm_shuffle_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b), (1 << 2) | (1 << 6));
+}
+template <>
+struct type_casting_traits<float, double> {
+ enum {
+ VectorizedCast = 1,
+ SrcCoeffRatio = 1,
+ TgtCoeffRatio = 2
+ };
+};
+template<> EIGEN_STRONG_INLINE Packet2d pcast<Packet4f, Packet2d>(const Packet4f& a) {
+ return _mm_cvtps_pd(a);
+}
+}
+}
+#endif
+// end #include "src/Core/arch/SSE/TypeCasting.h"
+#elif defined(EIGEN_VECTORIZE_ALTIVEC) || defined(EIGEN_VECTORIZE_VSX)
+// #include "src/Core/arch/AltiVec/PacketMath.h"
+#ifndef EIGEN_PACKET_MATH_ALTIVEC_H
+#define EIGEN_PACKET_MATH_ALTIVEC_H
+namespace Eigen {
+namespace internal {
+#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 4
+#endif
+#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#endif
+#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
+#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
+#endif
+#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
+#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
+#endif
+typedef __vector float Packet4f;
+typedef __vector int Packet4i;
+typedef __vector unsigned int Packet4ui;
+typedef __vector __bool int Packet4bi;
+typedef __vector short int Packet8i;
+typedef __vector unsigned char Packet16uc;
+#define _EIGEN_DECLARE_CONST_FAST_Packet4f(NAME,X) \
+ Packet4f p4f_##NAME = reinterpret_cast<Packet4f>(vec_splat_s32(X))
+#define _EIGEN_DECLARE_CONST_FAST_Packet4i(NAME,X) \
+ Packet4i p4i_##NAME = vec_splat_s32(X)
+#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
+ Packet4f p4f_##NAME = pset1<Packet4f>(X)
+#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
+ Packet4i p4i_##NAME = pset1<Packet4i>(X)
+#define _EIGEN_DECLARE_CONST_Packet2d(NAME,X) \
+ Packet2d p2d_##NAME = pset1<Packet2d>(X)
+#define _EIGEN_DECLARE_CONST_Packet2l(NAME,X) \
+ Packet2l p2l_##NAME = pset1<Packet2l>(X)
+#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
+ const Packet4f p4f_##NAME = reinterpret_cast<Packet4f>(pset1<Packet4i>(X))
+#define DST_CHAN 1
+#define DST_CTRL(size, count, stride) (((size) << 24) | ((count) << 16) | (stride))
+static _EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0);
+static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0);
+static _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE,1);
+static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS16,-16);
+static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS1,-1);
+static Packet4f p4f_MZERO = (Packet4f) vec_sl((Packet4ui)p4i_MINUS1, (Packet4ui)p4i_MINUS1);
+#ifndef __VSX__
+static Packet4f p4f_ONE = vec_ctf(p4i_ONE, 0);
+#endif
+static Packet4f p4f_COUNTDOWN = { 0.0, 1.0, 2.0, 3.0 };
+static Packet4i p4i_COUNTDOWN = { 0, 1, 2, 3 };
+static Packet16uc p16uc_REVERSE32 = { 12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3 };
+static Packet16uc p16uc_DUPLICATE32_HI = { 0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7 };
+#ifdef __PPC64__
+#define _EIGEN_MASK_ALIGNMENT 0xfffffffffffffff0
+#else
+#define _EIGEN_MASK_ALIGNMENT 0xfffffff0
+#endif
+#define _EIGEN_ALIGNED_PTR(x) ((std::ptrdiff_t)(x) & _EIGEN_MASK_ALIGNMENT)
+#ifdef _BIG_ENDIAN
+static Packet16uc p16uc_FORWARD = vec_lvsl(0, (float*)0);
+#ifdef __VSX__
+static Packet16uc p16uc_REVERSE64 = { 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
+#endif
+static Packet16uc p16uc_PSET32_WODD = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);
+static Packet16uc p16uc_PSET32_WEVEN = vec_sld(p16uc_DUPLICATE32_HI, (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);
+static Packet16uc p16uc_HALF64_0_16 = vec_sld((Packet16uc)p4i_ZERO, vec_splat((Packet16uc) vec_abs(p4i_MINUS16), 3), 8);
+#else
+static Packet16uc p16uc_FORWARD = p16uc_REVERSE32;
+static Packet16uc p16uc_REVERSE64 = { 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
+static Packet16uc p16uc_PSET32_WODD = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 1), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);
+static Packet16uc p16uc_PSET32_WEVEN = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);
+static Packet16uc p16uc_HALF64_0_16 = vec_sld(vec_splat((Packet16uc) vec_abs(p4i_MINUS16), 0), (Packet16uc)p4i_ZERO, 8);
+#endif
+static Packet16uc p16uc_PSET64_HI = (Packet16uc) vec_mergeh((Packet4ui)p16uc_PSET32_WODD, (Packet4ui)p16uc_PSET32_WEVEN);
+static Packet16uc p16uc_PSET64_LO = (Packet16uc) vec_mergel((Packet4ui)p16uc_PSET32_WODD, (Packet4ui)p16uc_PSET32_WEVEN);
+static Packet16uc p16uc_TRANSPOSE64_HI = p16uc_PSET64_HI + p16uc_HALF64_0_16;
+static Packet16uc p16uc_TRANSPOSE64_LO = p16uc_PSET64_LO + p16uc_HALF64_0_16;
+static Packet16uc p16uc_COMPLEX32_REV = vec_sld(p16uc_REVERSE32, p16uc_REVERSE32, 8);
+#ifdef _BIG_ENDIAN
+static Packet16uc p16uc_COMPLEX32_REV2 = vec_sld(p16uc_FORWARD, p16uc_FORWARD, 8);
+#else
+static Packet16uc p16uc_COMPLEX32_REV2 = vec_sld(p16uc_PSET64_HI, p16uc_PSET64_LO, 8);
+#endif
+#if EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC
+ #define EIGEN_PPC_PREFETCH(ADDR) __builtin_prefetch(ADDR);
+#else
+ #define EIGEN_PPC_PREFETCH(ADDR) asm( " dcbt [%[addr]]\n" :: [addr] "r" (ADDR) : "cc" );
+#endif
+template<> struct packet_traits<float> : default_packet_traits
+{
+ typedef Packet4f type;
+ typedef Packet4f half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size=4,
+ HasHalfPacket = 1,
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasDiv = 1,
+ HasMin = 1,
+ HasMax = 1,
+ HasAbs = 1,
+ HasSin = 0,
+ HasCos = 0,
+ HasLog = 0,
+ HasExp = 1,
+#ifdef __VSX__
+ HasSqrt = 1,
+#if !EIGEN_COMP_CLANG
+ HasRsqrt = 1,
+#else
+ HasRsqrt = 0,
+#endif
+#else
+ HasSqrt = 0,
+ HasRsqrt = 0,
+#endif
+ HasRound = 1,
+ HasFloor = 1,
+ HasCeil = 1,
+ HasNegate = 1,
+ HasBlend = 1
+ };
+};
+template<> struct packet_traits<int> : default_packet_traits
+{
+ typedef Packet4i type;
+ typedef Packet4i half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size = 4,
+ HasHalfPacket = 0,
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasDiv = 0,
+ HasBlend = 1
+ };
+};
+template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; };
+template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; };
+inline std::ostream & operator <<(std::ostream & s, const Packet16uc & v)
+{
+ union {
+ Packet16uc v;
+ unsigned char n[16];
+ } vt;
+ vt.v = v;
+ for (int i=0; i< 16; i++)
+ s << (int)vt.n[i] << ", ";
+ return s;
+}
+inline std::ostream & operator <<(std::ostream & s, const Packet4f & v)
+{
+ union {
+ Packet4f v;
+ float n[4];
+ } vt;
+ vt.v = v;
+ s << vt.n[0] << ", " << vt.n[1] << ", " << vt.n[2] << ", " << vt.n[3];
+ return s;
+}
+inline std::ostream & operator <<(std::ostream & s, const Packet4i & v)
+{
+ union {
+ Packet4i v;
+ int n[4];
+ } vt;
+ vt.v = v;
+ s << vt.n[0] << ", " << vt.n[1] << ", " << vt.n[2] << ", " << vt.n[3];
+ return s;
+}
+inline std::ostream & operator <<(std::ostream & s, const Packet4ui & v)
+{
+ union {
+ Packet4ui v;
+ unsigned int n[4];
+ } vt;
+ vt.v = v;
+ s << vt.n[0] << ", " << vt.n[1] << ", " << vt.n[2] << ", " << vt.n[3];
+ return s;
+}
+template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from)
+{
+ EIGEN_DEBUG_ALIGNED_LOAD
+#ifdef __VSX__
+ return vec_vsx_ld(0, from);
+#else
+ return vec_ld(0, from);
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from)
+{
+ EIGEN_DEBUG_ALIGNED_LOAD
+#ifdef __VSX__
+ return vec_vsx_ld(0, from);
+#else
+ return vec_ld(0, from);
+#endif
+}
+template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from)
+{
+ EIGEN_DEBUG_ALIGNED_STORE
+#ifdef __VSX__
+ vec_vsx_st(from, 0, to);
+#else
+ vec_st(from, 0, to);
+#endif
+}
+template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from)
+{
+ EIGEN_DEBUG_ALIGNED_STORE
+#ifdef __VSX__
+ vec_vsx_st(from, 0, to);
+#else
+ vec_st(from, 0, to);
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) {
+ Packet4f v = {from, from, from, from};
+ return v;
+}
+template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) {
+ Packet4i v = {from, from, from, from};
+ return v;
+}
+template<> EIGEN_STRONG_INLINE void
+pbroadcast4<Packet4f>(const float *a,
+ Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
+{
+ a3 = pload<Packet4f>(a);
+ a0 = vec_splat(a3, 0);
+ a1 = vec_splat(a3, 1);
+ a2 = vec_splat(a3, 2);
+ a3 = vec_splat(a3, 3);
+}
+template<> EIGEN_STRONG_INLINE void
+pbroadcast4<Packet4i>(const int *a,
+ Packet4i& a0, Packet4i& a1, Packet4i& a2, Packet4i& a3)
+{
+ a3 = pload<Packet4i>(a);
+ a0 = vec_splat(a3, 0);
+ a1 = vec_splat(a3, 1);
+ a2 = vec_splat(a3, 2);
+ a3 = vec_splat(a3, 3);
+}
+template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride)
+{
+ float EIGEN_ALIGN16 af[4];
+ af[0] = from[0*stride];
+ af[1] = from[1*stride];
+ af[2] = from[2*stride];
+ af[3] = from[3*stride];
+ return pload<Packet4f>(af);
+}
+template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, Index stride)
+{
+ int EIGEN_ALIGN16 ai[4];
+ ai[0] = from[0*stride];
+ ai[1] = from[1*stride];
+ ai[2] = from[2*stride];
+ ai[3] = from[3*stride];
+ return pload<Packet4i>(ai);
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride)
+{
+ float EIGEN_ALIGN16 af[4];
+ pstore<float>(af, from);
+ to[0*stride] = af[0];
+ to[1*stride] = af[1];
+ to[2*stride] = af[2];
+ to[3*stride] = af[3];
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, Index stride)
+{
+ int EIGEN_ALIGN16 ai[4];
+ pstore<int>((int *)ai, from);
+ to[0*stride] = ai[0];
+ to[1*stride] = ai[1];
+ to[2*stride] = ai[2];
+ to[3*stride] = ai[3];
+}
+template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) { return pset1<Packet4f>(a) + p4f_COUNTDOWN; }
+template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a) { return pset1<Packet4i>(a) + p4i_COUNTDOWN; }
+template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return a + b; }
+template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return a + b; }
+template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return a - b; }
+template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return a - b; }
+template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) { return p4f_ZERO - a; }
+template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return p4i_ZERO - a; }
+template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_madd(a,b, p4f_MZERO); }
+template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b) { return a * b; }
+template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+#ifndef __VSX__
+ Packet4f t, y_0, y_1;
+ y_0 = vec_re(b);
+ t = vec_nmsub(y_0, b, p4f_ONE);
+ y_1 = vec_madd(y_0, t, y_0);
+ return vec_madd(a, y_1, p4f_MZERO);
+#else
+ return vec_div(a, b);
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& , const Packet4i& )
+{ eigen_assert(false && "packet integer division are not supported by AltiVec");
+ return pset1<Packet4i>(0);
+}
+template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_madd(a,b,c); }
+template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return a*b + c; }
+template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_min(a, b); }
+template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_min(a, b); }
+template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_max(a, b); }
+template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); }
+template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, b); }
+template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, b); }
+template<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_or(a, b); }
+template<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_or(a, b); }
+template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_xor(a, b); }
+template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_xor(a, b); }
+template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, vec_nor(b, b)); }
+template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, vec_nor(b, b)); }
+template<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a) { return vec_round(a); }
+template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a) { return vec_ceil(a); }
+template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a) { return vec_floor(a); }
+#ifdef _BIG_ENDIAN
+template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)
+{
+ EIGEN_DEBUG_ALIGNED_LOAD
+ Packet16uc MSQ, LSQ;
+ Packet16uc mask;
+ MSQ = vec_ld(0, (unsigned char *)from);
+ LSQ = vec_ld(15, (unsigned char *)from);
+ mask = vec_lvsl(0, from);
+ return (Packet4f) vec_perm(MSQ, LSQ, mask);
+}
+template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
+{
+ EIGEN_DEBUG_ALIGNED_LOAD
+ Packet16uc MSQ, LSQ;
+ Packet16uc mask;
+ MSQ = vec_ld(0, (unsigned char *)from);
+ LSQ = vec_ld(15, (unsigned char *)from);
+ mask = vec_lvsl(0, from);
+ return (Packet4i) vec_perm(MSQ, LSQ, mask);
+}
+#else
+template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
+{
+ EIGEN_DEBUG_UNALIGNED_LOAD
+ return (Packet4i) vec_vsx_ld((long)from & 15, (const int*) _EIGEN_ALIGNED_PTR(from));
+}
+template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)
+{
+ EIGEN_DEBUG_UNALIGNED_LOAD
+ return (Packet4f) vec_vsx_ld((long)from & 15, (const float*) _EIGEN_ALIGNED_PTR(from));
+}
+#endif
+template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
+{
+ Packet4f p;
+ if((std::ptrdiff_t(from) % 16) == 0) p = pload<Packet4f>(from);
+ else p = ploadu<Packet4f>(from);
+ return vec_perm(p, p, p16uc_DUPLICATE32_HI);
+}
+template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
+{
+ Packet4i p;
+ if((std::ptrdiff_t(from) % 16) == 0) p = pload<Packet4i>(from);
+ else p = ploadu<Packet4i>(from);
+ return vec_perm(p, p, p16uc_DUPLICATE32_HI);
+}
+#ifdef _BIG_ENDIAN
+template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from)
+{
+ EIGEN_DEBUG_UNALIGNED_STORE
+ Packet16uc MSQ, LSQ, edges;
+ Packet16uc edgeAlign, align;
+ MSQ = vec_ld(0, (unsigned char *)to);
+ LSQ = vec_ld(15, (unsigned char *)to);
+ edgeAlign = vec_lvsl(0, to);
+ edges=vec_perm(LSQ,MSQ,edgeAlign);
+ align = vec_lvsr( 0, to );
+ MSQ = vec_perm(edges,(Packet16uc)from,align);
+ LSQ = vec_perm((Packet16uc)from,edges,align);
+ vec_st( LSQ, 15, (unsigned char *)to );
+ vec_st( MSQ, 0, (unsigned char *)to );
+}
+template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from)
+{
+ EIGEN_DEBUG_UNALIGNED_STORE
+ Packet16uc MSQ, LSQ, edges;
+ Packet16uc edgeAlign, align;
+ MSQ = vec_ld(0, (unsigned char *)to);
+ LSQ = vec_ld(15, (unsigned char *)to);
+ edgeAlign = vec_lvsl(0, to);
+ edges=vec_perm(LSQ, MSQ, edgeAlign);
+ align = vec_lvsr( 0, to );
+ MSQ = vec_perm(edges, (Packet16uc) from, align);
+ LSQ = vec_perm((Packet16uc) from, edges, align);
+ vec_st( LSQ, 15, (unsigned char *)to );
+ vec_st( MSQ, 0, (unsigned char *)to );
+}
+#else
+template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from)
+{
+ EIGEN_DEBUG_ALIGNED_STORE
+ vec_vsx_st(from, (long)to & 15, (int*) _EIGEN_ALIGNED_PTR(to));
+}
+template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from)
+{
+ EIGEN_DEBUG_ALIGNED_STORE
+ vec_vsx_st(from, (long)to & 15, (float*) _EIGEN_ALIGNED_PTR(to));
+}
+#endif
+template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { EIGEN_PPC_PREFETCH(addr); }
+template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { EIGEN_PPC_PREFETCH(addr); }
+template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x; vec_ste(a, 0, &x); return x; }
+template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int EIGEN_ALIGN16 x; vec_ste(a, 0, &x); return x; }
+template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a)
+{
+ return reinterpret_cast<Packet4f>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE32));
+}
+template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a)
+{
+ return reinterpret_cast<Packet4i>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE32)); }
+template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vec_abs(a); }
+template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vec_abs(a); }
+template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
+{
+ Packet4f b, sum;
+ b = vec_sld(a, a, 8);
+ sum = a + b;
+ b = vec_sld(sum, sum, 4);
+ sum += b;
+ return pfirst(sum);
+}
+template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
+{
+ Packet4f v[4], sum[4];
+ v[0] = vec_mergeh(vecs[0], vecs[2]);
+ v[1] = vec_mergel(vecs[0], vecs[2]);
+ v[2] = vec_mergeh(vecs[1], vecs[3]);
+ v[3] = vec_mergel(vecs[1], vecs[3]);
+ sum[0] = vec_mergeh(v[0], v[2]);
+ sum[1] = vec_mergel(v[0], v[2]);
+ sum[2] = vec_mergeh(v[1], v[3]);
+ sum[3] = vec_mergel(v[1], v[3]);
+ sum[0] = sum[0] + sum[1];
+ sum[1] = sum[2] + sum[3];
+ sum[0] = sum[0] + sum[1];
+ return sum[0];
+}
+template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
+{
+ Packet4i sum;
+ sum = vec_sums(a, p4i_ZERO);
+#ifdef _BIG_ENDIAN
+ sum = vec_sld(sum, p4i_ZERO, 12);
+#else
+ sum = vec_sld(p4i_ZERO, sum, 4);
+#endif
+ return pfirst(sum);
+}
+template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
+{
+ Packet4i v[4], sum[4];
+ v[0] = vec_mergeh(vecs[0], vecs[2]);
+ v[1] = vec_mergel(vecs[0], vecs[2]);
+ v[2] = vec_mergeh(vecs[1], vecs[3]);
+ v[3] = vec_mergel(vecs[1], vecs[3]);
+ sum[0] = vec_mergeh(v[0], v[2]);
+ sum[1] = vec_mergel(v[0], v[2]);
+ sum[2] = vec_mergeh(v[1], v[3]);
+ sum[3] = vec_mergel(v[1], v[3]);
+ sum[0] = sum[0] + sum[1];
+ sum[1] = sum[2] + sum[3];
+ sum[0] = sum[0] + sum[1];
+ return sum[0];
+}
+template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
+{
+ Packet4f prod;
+ prod = pmul(a, vec_sld(a, a, 8));
+ return pfirst(pmul(prod, vec_sld(prod, prod, 4)));
+}
+template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
+{
+ EIGEN_ALIGN16 int aux[4];
+ pstore(aux, a);
+ return aux[0] * aux[1] * aux[2] * aux[3];
+}
+template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
+{
+ Packet4f b, res;
+ b = vec_min(a, vec_sld(a, a, 8));
+ res = vec_min(b, vec_sld(b, b, 4));
+ return pfirst(res);
+}
+template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
+{
+ Packet4i b, res;
+ b = vec_min(a, vec_sld(a, a, 8));
+ res = vec_min(b, vec_sld(b, b, 4));
+ return pfirst(res);
+}
+template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
+{
+ Packet4f b, res;
+ b = vec_max(a, vec_sld(a, a, 8));
+ res = vec_max(b, vec_sld(b, b, 4));
+ return pfirst(res);
+}
+template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
+{
+ Packet4i b, res;
+ b = vec_max(a, vec_sld(a, a, 8));
+ res = vec_max(b, vec_sld(b, b, 4));
+ return pfirst(res);
+}
+template<int Offset>
+struct palign_impl<Offset,Packet4f>
+{
+ static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
+ {
+#ifdef _BIG_ENDIAN
+ switch (Offset % 4) {
+ case 1:
+ first = vec_sld(first, second, 4); break;
+ case 2:
+ first = vec_sld(first, second, 8); break;
+ case 3:
+ first = vec_sld(first, second, 12); break;
+ }
+#else
+ switch (Offset % 4) {
+ case 1:
+ first = vec_sld(second, first, 12); break;
+ case 2:
+ first = vec_sld(second, first, 8); break;
+ case 3:
+ first = vec_sld(second, first, 4); break;
+ }
+#endif
+ }
+};
+template<int Offset>
+struct palign_impl<Offset,Packet4i>
+{
+ static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
+ {
+#ifdef _BIG_ENDIAN
+ switch (Offset % 4) {
+ case 1:
+ first = vec_sld(first, second, 4); break;
+ case 2:
+ first = vec_sld(first, second, 8); break;
+ case 3:
+ first = vec_sld(first, second, 12); break;
+ }
+#else
+ switch (Offset % 4) {
+ case 1:
+ first = vec_sld(second, first, 12); break;
+ case 2:
+ first = vec_sld(second, first, 8); break;
+ case 3:
+ first = vec_sld(second, first, 4); break;
+ }
+#endif
+ }
+};
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4f,4>& kernel) {
+ Packet4f t0, t1, t2, t3;
+ t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]);
+ t1 = vec_mergel(kernel.packet[0], kernel.packet[2]);
+ t2 = vec_mergeh(kernel.packet[1], kernel.packet[3]);
+ t3 = vec_mergel(kernel.packet[1], kernel.packet[3]);
+ kernel.packet[0] = vec_mergeh(t0, t2);
+ kernel.packet[1] = vec_mergel(t0, t2);
+ kernel.packet[2] = vec_mergeh(t1, t3);
+ kernel.packet[3] = vec_mergel(t1, t3);
+}
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4i,4>& kernel) {
+ Packet4i t0, t1, t2, t3;
+ t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]);
+ t1 = vec_mergel(kernel.packet[0], kernel.packet[2]);
+ t2 = vec_mergeh(kernel.packet[1], kernel.packet[3]);
+ t3 = vec_mergel(kernel.packet[1], kernel.packet[3]);
+ kernel.packet[0] = vec_mergeh(t0, t2);
+ kernel.packet[1] = vec_mergel(t0, t2);
+ kernel.packet[2] = vec_mergeh(t1, t3);
+ kernel.packet[3] = vec_mergel(t1, t3);
+}
+template<> EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket) {
+ Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3] };
+ Packet4ui mask = reinterpret_cast<Packet4ui>(vec_cmpeq(reinterpret_cast<Packet4ui>(select), reinterpret_cast<Packet4ui>(p4i_ONE)));
+ return vec_sel(elsePacket, thenPacket, mask);
+}
+template<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket) {
+ Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3] };
+ Packet4ui mask = reinterpret_cast<Packet4ui>(vec_cmpeq(reinterpret_cast<Packet4ui>(select), reinterpret_cast<Packet4ui>(p4i_ONE)));
+ return vec_sel(elsePacket, thenPacket, mask);
+}
+#ifdef __VSX__
+typedef __vector double Packet2d;
+typedef __vector unsigned long long Packet2ul;
+typedef __vector long long Packet2l;
+#if EIGEN_COMP_CLANG
+typedef Packet2ul Packet2bl;
+#else
+typedef __vector __bool long Packet2bl;
+#endif
+static Packet2l p2l_ONE = { 1, 1 };
+static Packet2l p2l_ZERO = reinterpret_cast<Packet2l>(p4i_ZERO);
+static Packet2d p2d_ONE = { 1.0, 1.0 };
+static Packet2d p2d_ZERO = reinterpret_cast<Packet2d>(p4f_ZERO);
+static Packet2d p2d_MZERO = { -0.0, -0.0 };
+#ifdef _BIG_ENDIAN
+static Packet2d p2d_COUNTDOWN = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(p2d_ZERO), reinterpret_cast<Packet4f>(p2d_ONE), 8));
+#else
+static Packet2d p2d_COUNTDOWN = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(p2d_ONE), reinterpret_cast<Packet4f>(p2d_ZERO), 8));
+#endif
+template<int index> Packet2d vec_splat_dbl(Packet2d& a);
+template<> EIGEN_STRONG_INLINE Packet2d vec_splat_dbl<0>(Packet2d& a)
+{
+ return reinterpret_cast<Packet2d>(vec_perm(a, a, p16uc_PSET64_HI));
+}
+template<> EIGEN_STRONG_INLINE Packet2d vec_splat_dbl<1>(Packet2d& a)
+{
+ return reinterpret_cast<Packet2d>(vec_perm(a, a, p16uc_PSET64_LO));
+}
+template<> struct packet_traits<double> : default_packet_traits
+{
+ typedef Packet2d type;
+ typedef Packet2d half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size=2,
+ HasHalfPacket = 1,
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasDiv = 1,
+ HasMin = 1,
+ HasMax = 1,
+ HasAbs = 1,
+ HasSin = 0,
+ HasCos = 0,
+ HasLog = 0,
+ HasExp = 1,
+ HasSqrt = 1,
+ HasRsqrt = 1,
+ HasRound = 1,
+ HasFloor = 1,
+ HasCeil = 1,
+ HasNegate = 1,
+ HasBlend = 1
+ };
+};
+template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; };
+inline std::ostream & operator <<(std::ostream & s, const Packet2l & v)
+{
+ union {
+ Packet2l v;
+ int64_t n[2];
+ } vt;
+ vt.v = v;
+ s << vt.n[0] << ", " << vt.n[1];
+ return s;
+}
+inline std::ostream & operator <<(std::ostream & s, const Packet2d & v)
+{
+ union {
+ Packet2d v;
+ double n[2];
+ } vt;
+ vt.v = v;
+ s << vt.n[0] << ", " << vt.n[1];
+ return s;
+}
+template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from)
+{
+ EIGEN_DEBUG_ALIGNED_LOAD
+#ifdef __VSX__
+ return vec_vsx_ld(0, from);
+#else
+ return vec_ld(0, from);
+#endif
+}
+template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from)
+{
+ EIGEN_DEBUG_ALIGNED_STORE
+#ifdef __VSX__
+ vec_vsx_st(from, 0, to);
+#else
+ vec_st(from, 0, to);
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) {
+ Packet2d v = {from, from};
+ return v;
+}
+template<> EIGEN_STRONG_INLINE void
+pbroadcast4<Packet2d>(const double *a,
+ Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3)
+{
+ a1 = pload<Packet2d>(a);
+ a0 = vec_splat_dbl<0>(a1);
+ a1 = vec_splat_dbl<1>(a1);
+ a3 = pload<Packet2d>(a+2);
+ a2 = vec_splat_dbl<0>(a3);
+ a3 = vec_splat_dbl<1>(a3);
+}
+template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride)
+{
+ double EIGEN_ALIGN16 af[2];
+ af[0] = from[0*stride];
+ af[1] = from[1*stride];
+ return pload<Packet2d>(af);
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride)
+{
+ double EIGEN_ALIGN16 af[2];
+ pstore<double>(af, from);
+ to[0*stride] = af[0];
+ to[1*stride] = af[1];
+}
+template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) { return pset1<Packet2d>(a) + p2d_COUNTDOWN; }
+template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return a + b; }
+template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return a - b; }
+template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { return p2d_ZERO - a; }
+template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_madd(a,b,p2d_MZERO); }
+template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_div(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vec_madd(a, b, c); }
+template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_min(a, b); }
+template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_max(a, b); }
+template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, b); }
+template<> EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_or(a, b); }
+template<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_xor(a, b); }
+template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, vec_nor(b, b)); }
+template<> EIGEN_STRONG_INLINE Packet2d pround<Packet2d>(const Packet2d& a) { return vec_round(a); }
+template<> EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const Packet2d& a) { return vec_ceil(a); }
+template<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a) { return vec_floor(a); }
+template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from)
+{
+ EIGEN_DEBUG_ALIGNED_LOAD
+ return (Packet2d) vec_vsx_ld((long)from & 15, (const double*) _EIGEN_ALIGNED_PTR(from));
+}
+template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
+{
+ Packet2d p;
+ if((std::ptrdiff_t(from) % 16) == 0) p = pload<Packet2d>(from);
+ else p = ploadu<Packet2d>(from);
+ return vec_splat_dbl<0>(p);
+}
+template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from)
+{
+ EIGEN_DEBUG_ALIGNED_STORE
+ vec_vsx_st((Packet4f)from, (long)to & 15, (float*) _EIGEN_ALIGNED_PTR(to));
+}
+template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { EIGEN_PPC_PREFETCH(addr); }
+template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { double EIGEN_ALIGN16 x[2]; pstore<double>(x, a); return x[0]; }
+template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a)
+{
+ return reinterpret_cast<Packet2d>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE64));
+}
+template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { return vec_abs(a); }
+template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
+{
+ Packet2d b, sum;
+ b = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(a), reinterpret_cast<Packet4f>(a), 8));
+ sum = a + b;
+ return pfirst<Packet2d>(sum);
+}
+template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
+{
+ Packet2d v[2], sum;
+ v[0] = vecs[0] + reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(vecs[0]), reinterpret_cast<Packet4f>(vecs[0]), 8));
+ v[1] = vecs[1] + reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(vecs[1]), reinterpret_cast<Packet4f>(vecs[1]), 8));
+#ifdef _BIG_ENDIAN
+ sum = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(v[0]), reinterpret_cast<Packet4f>(v[1]), 8));
+#else
+ sum = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(v[1]), reinterpret_cast<Packet4f>(v[0]), 8));
+#endif
+ return sum;
+}
+template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a)
+{
+ return pfirst(pmul(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(a), reinterpret_cast<Packet4ui>(a), 8))));
+}
+template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a)
+{
+ return pfirst(pmin(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(a), reinterpret_cast<Packet4ui>(a), 8))));
+}
+template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)
+{
+ return pfirst(pmax(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(a), reinterpret_cast<Packet4ui>(a), 8))));
+}
+template<int Offset>
+struct palign_impl<Offset,Packet2d>
+{
+ static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
+ {
+ if (Offset == 1)
+#ifdef _BIG_ENDIAN
+ first = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(first), reinterpret_cast<Packet4ui>(second), 8));
+#else
+ first = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(second), reinterpret_cast<Packet4ui>(first), 8));
+#endif
+ }
+};
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet2d,2>& kernel) {
+ Packet2d t0, t1;
+ t0 = vec_perm(kernel.packet[0], kernel.packet[1], p16uc_TRANSPOSE64_HI);
+ t1 = vec_perm(kernel.packet[0], kernel.packet[1], p16uc_TRANSPOSE64_LO);
+ kernel.packet[0] = t0;
+ kernel.packet[1] = t1;
+}
+template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) {
+ Packet2l select = { ifPacket.select[0], ifPacket.select[1] };
+ Packet2bl mask = vec_cmpeq(reinterpret_cast<Packet2d>(select), reinterpret_cast<Packet2d>(p2l_ONE));
+ return vec_sel(elsePacket, thenPacket, mask);
+}
+#endif
+}
+}
+#endif
+// end #include "src/Core/arch/AltiVec/PacketMath.h"
+// #include "src/Core/arch/AltiVec/MathFunctions.h"
+#ifndef EIGEN_MATH_FUNCTIONS_ALTIVEC_H
+#define EIGEN_MATH_FUNCTIONS_ALTIVEC_H
+namespace Eigen {
+namespace internal {
+static _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
+static _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
+static _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
+static _EIGEN_DECLARE_CONST_Packet4i(23, 23);
+static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_mant_mask, ~0x7f800000);
+static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos, 0x00800000);
+static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_inf, 0xff800000);
+static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_nan, 0xffffffff);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_SQRTHF, 0.707106781186547524f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p0, 7.0376836292E-2f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p1, - 1.1514610310E-1f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p2, 1.1676998740E-1f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p3, - 1.2420140846E-1f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p4, + 1.4249322787E-1f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p5, - 1.6668057665E-1f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p6, + 2.0000714765E-1f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p7, - 2.4999993993E-1f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p8, + 3.3333331174E-1f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q1, -2.12194440e-4f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q2, 0.693359375f);
+static _EIGEN_DECLARE_CONST_Packet4f(exp_hi, 88.3762626647950f);
+static _EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500E-4f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507E-3f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073E-3f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894E-2f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1f);
+#ifdef __VSX__
+static _EIGEN_DECLARE_CONST_Packet2d(1 , 1.0);
+static _EIGEN_DECLARE_CONST_Packet2d(2 , 2.0);
+static _EIGEN_DECLARE_CONST_Packet2d(half, 0.5);
+static _EIGEN_DECLARE_CONST_Packet2d(exp_hi, 709.437);
+static _EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
+#ifdef __POWER8_VECTOR__
+static Packet2l p2l_1023 = { 1023, 1023 };
+static Packet2ul p2ul_52 = { 52, 52 };
+#endif
+#endif
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f plog<Packet4f>(const Packet4f& _x)
+{
+ Packet4f x = _x;
+ Packet4i emm0;
+ Packet4ui isvalid_mask = reinterpret_cast<Packet4ui>(vec_cmpge(x, p4f_ZERO));
+ Packet4ui iszero_mask = reinterpret_cast<Packet4ui>(vec_cmpeq(x, p4f_ZERO));
+ x = pmax(x, p4f_min_norm_pos);
+ emm0 = vec_sr(reinterpret_cast<Packet4i>(x),
+ reinterpret_cast<Packet4ui>(p4i_23));
+ x = pand(x, p4f_inv_mant_mask);
+ x = por(x, p4f_half);
+ emm0 = psub(emm0, p4i_0x7f);
+ Packet4f e = padd(vec_ctf(emm0, 0), p4f_1);
+ Packet4f mask = reinterpret_cast<Packet4f>(vec_cmplt(x, p4f_cephes_SQRTHF));
+ Packet4f tmp = pand(x, mask);
+ x = psub(x, p4f_1);
+ e = psub(e, pand(p4f_1, mask));
+ x = padd(x, tmp);
+ Packet4f x2 = pmul(x,x);
+ Packet4f x3 = pmul(x2,x);
+ Packet4f y, y1, y2;
+ y = pmadd(p4f_cephes_log_p0, x, p4f_cephes_log_p1);
+ y1 = pmadd(p4f_cephes_log_p3, x, p4f_cephes_log_p4);
+ y2 = pmadd(p4f_cephes_log_p6, x, p4f_cephes_log_p7);
+ y = pmadd(y , x, p4f_cephes_log_p2);
+ y1 = pmadd(y1, x, p4f_cephes_log_p5);
+ y2 = pmadd(y2, x, p4f_cephes_log_p8);
+ y = pmadd(y, x3, y1);
+ y = pmadd(y, x3, y2);
+ y = pmul(y, x3);
+ y1 = pmul(e, p4f_cephes_log_q1);
+ tmp = pmul(x2, p4f_half);
+ y = padd(y, y1);
+ x = psub(x, tmp);
+ y2 = pmul(e, p4f_cephes_log_q2);
+ x = padd(x, y);
+ x = padd(x, y2);
+ x = vec_sel(x, p4f_minus_inf, iszero_mask);
+ x = vec_sel(p4f_minus_nan, x, isvalid_mask);
+ return x;
+}
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f pexp<Packet4f>(const Packet4f& _x)
+{
+ Packet4f x = _x;
+ Packet4f tmp, fx;
+ Packet4i emm0;
+ x = pmax(pmin(x, p4f_exp_hi), p4f_exp_lo);
+ fx = pmadd(x, p4f_cephes_LOG2EF, p4f_half);
+ fx = pfloor(fx);
+ tmp = pmul(fx, p4f_cephes_exp_C1);
+ Packet4f z = pmul(fx, p4f_cephes_exp_C2);
+ x = psub(x, tmp);
+ x = psub(x, z);
+ z = pmul(x,x);
+ Packet4f y = p4f_cephes_exp_p0;
+ y = pmadd(y, x, p4f_cephes_exp_p1);
+ y = pmadd(y, x, p4f_cephes_exp_p2);
+ y = pmadd(y, x, p4f_cephes_exp_p3);
+ y = pmadd(y, x, p4f_cephes_exp_p4);
+ y = pmadd(y, x, p4f_cephes_exp_p5);
+ y = pmadd(y, z, x);
+ y = padd(y, p4f_1);
+ emm0 = vec_cts(fx, 0);
+ emm0 = vec_add(emm0, p4i_0x7f);
+ emm0 = vec_sl(emm0, reinterpret_cast<Packet4ui>(p4i_23));
+ Packet4ui isnumber_mask = reinterpret_cast<Packet4ui>(vec_cmpeq(_x, _x));
+ return vec_sel(_x, pmax(pmul(y, reinterpret_cast<Packet4f>(emm0)), _x),
+ isnumber_mask);
+}
+#ifndef EIGEN_COMP_CLANG
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f prsqrt<Packet4f>(const Packet4f& x)
+{
+ return vec_rsqrt(x);
+}
+#endif
+#ifdef __VSX__
+#ifndef EIGEN_COMP_CLANG
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet2d prsqrt<Packet2d>(const Packet2d& x)
+{
+ return vec_rsqrt(x);
+}
+#endif
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f psqrt<Packet4f>(const Packet4f& x)
+{
+ return vec_sqrt(x);
+}
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet2d psqrt<Packet2d>(const Packet2d& x)
+{
+ return vec_sqrt(x);
+}
+static inline Packet2l ConvertToPacket2l(const Packet2d& x) {
+#if EIGEN_GNUC_AT_LEAST(5, 4) || \
+ (EIGEN_GNUC_AT(6, 1) && __GNUC_PATCHLEVEL__ >= 1)
+ return vec_cts(x, 0);
+#else
+ double tmp[2];
+ memcpy(tmp, &x, sizeof(tmp));
+ Packet2l l = { static_cast<long long>(tmp[0]),
+ static_cast<long long>(tmp[1]) };
+ return l;
+#endif
+}
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet2d pexp<Packet2d>(const Packet2d& _x)
+{
+ Packet2d x = _x;
+ Packet2d tmp, fx;
+ Packet2l emm0;
+ x = pmax(pmin(x, p2d_exp_hi), p2d_exp_lo);
+ fx = pmadd(x, p2d_cephes_LOG2EF, p2d_half);
+ fx = pfloor(fx);
+ tmp = pmul(fx, p2d_cephes_exp_C1);
+ Packet2d z = pmul(fx, p2d_cephes_exp_C2);
+ x = psub(x, tmp);
+ x = psub(x, z);
+ Packet2d x2 = pmul(x,x);
+ Packet2d px = p2d_cephes_exp_p0;
+ px = pmadd(px, x2, p2d_cephes_exp_p1);
+ px = pmadd(px, x2, p2d_cephes_exp_p2);
+ px = pmul (px, x);
+ Packet2d qx = p2d_cephes_exp_q0;
+ qx = pmadd(qx, x2, p2d_cephes_exp_q1);
+ qx = pmadd(qx, x2, p2d_cephes_exp_q2);
+ qx = pmadd(qx, x2, p2d_cephes_exp_q3);
+ x = pdiv(px,psub(qx,px));
+ x = pmadd(p2d_2,x,p2d_1);
+ emm0 = ConvertToPacket2l(fx);
+#ifdef __POWER8_VECTOR__
+ emm0 = vec_add(emm0, p2l_1023);
+ emm0 = vec_sl(emm0, p2ul_52);
+#else
+ _EIGEN_DECLARE_CONST_Packet4i(1023, 1023);
+ _EIGEN_DECLARE_CONST_Packet4i(20, 20);
+ Packet4i emm04i = reinterpret_cast<Packet4i>(emm0);
+ emm04i = vec_add(emm04i, p4i_1023);
+ emm04i = vec_sl(emm04i, reinterpret_cast<Packet4ui>(p4i_20));
+ static const Packet16uc perm = {
+ 0x14, 0x15, 0x16, 0x17, 0x00, 0x01, 0x02, 0x03,
+ 0x1c, 0x1d, 0x1e, 0x1f, 0x08, 0x09, 0x0a, 0x0b };
+#ifdef _BIG_ENDIAN
+ emm0 = reinterpret_cast<Packet2l>(vec_perm(p4i_ZERO, emm04i, perm));
+#else
+ emm0 = reinterpret_cast<Packet2l>(vec_perm(emm04i, p4i_ZERO, perm));
+#endif
+#endif
+ Packet2ul isnumber_mask = reinterpret_cast<Packet2ul>(vec_cmpeq(_x, _x));
+ return vec_sel(_x, pmax(pmul(x, reinterpret_cast<Packet2d>(emm0)), _x),
+ isnumber_mask);
+}
+#endif
+}
+}
+#endif
+// end #include "src/Core/arch/AltiVec/MathFunctions.h"
+// #include "src/Core/arch/AltiVec/Complex.h"
+#ifndef EIGEN_COMPLEX32_ALTIVEC_H
+#define EIGEN_COMPLEX32_ALTIVEC_H
+namespace Eigen {
+namespace internal {
+static Packet4ui p4ui_CONJ_XOR = vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_MZERO);
+#ifdef __VSX__
+#if defined(_BIG_ENDIAN)
+static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2d_MZERO, (Packet4ui) p2l_ZERO, 8);
+static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_MZERO, 8);
+#else
+static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_MZERO, 8);
+static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2d_MZERO, (Packet4ui) p2l_ZERO, 8);
+#endif
+#endif
+struct Packet2cf
+{
+ EIGEN_STRONG_INLINE explicit Packet2cf() : v(p4f_ZERO) {}
+ EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
+ Packet4f v;
+};
+template<> struct packet_traits<std::complex<float> > : default_packet_traits
+{
+ typedef Packet2cf type;
+ typedef Packet2cf half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size = 2,
+ HasHalfPacket = 0,
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasDiv = 1,
+ HasNegate = 1,
+ HasAbs = 0,
+ HasAbs2 = 0,
+ HasMin = 0,
+ HasMax = 0,
+#ifdef __VSX__
+ HasBlend = 1,
+#endif
+ HasSetLinear = 0
+ };
+};
+template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; };
+template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
+{
+ Packet2cf res;
+ if((std::ptrdiff_t(&from) % 16) == 0)
+ res.v = pload<Packet4f>((const float *)&from);
+ else
+ res.v = ploadu<Packet4f>((const float *)&from);
+ res.v = vec_perm(res.v, res.v, p16uc_PSET64_HI);
+ return res;
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pload<Packet2cf>(const std::complex<float>* from) { return Packet2cf(pload<Packet4f>((const float *) from)); }
+template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { return Packet2cf(ploadu<Packet4f>((const float*) from)); }
+template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
+template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { pstore((float*)to, from.v); }
+template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { pstoreu((float*)to, from.v); }
+template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
+{
+ std::complex<float> EIGEN_ALIGN16 af[2];
+ af[0] = from[0*stride];
+ af[1] = from[1*stride];
+ return pload<Packet2cf>(af);
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride)
+{
+ std::complex<float> EIGEN_ALIGN16 af[2];
+ pstore<std::complex<float> >((std::complex<float> *) af, from);
+ to[0*stride] = af[0];
+ to[1*stride] = af[1];
+}
+template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(a.v + b.v); }
+template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(a.v - b.v); }
+template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate(a.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) { return Packet2cf(pxor<Packet4f>(a.v, reinterpret_cast<Packet4f>(p4ui_CONJ_XOR))); }
+template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+ Packet4f v1, v2;
+ v1 = vec_perm(a.v, a.v, p16uc_PSET32_WODD);
+ v2 = vec_perm(a.v, a.v, p16uc_PSET32_WEVEN);
+ v1 = vec_madd(v1, b.v, p4f_ZERO);
+ v2 = vec_madd(v2, b.v, p4f_ZERO);
+ v2 = reinterpret_cast<Packet4f>(pxor(v2, reinterpret_cast<Packet4f>(p4ui_CONJ_XOR)));
+ v2 = vec_perm(v2, v2, p16uc_COMPLEX32_REV);
+ return Packet2cf(padd<Packet4f>(v1, v2));
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pand<Packet4f>(a.v, b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(por<Packet4f>(a.v, b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pxor<Packet4f>(a.v, b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pandnot<Packet4f>(a.v, b.v)); }
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_PPC_PREFETCH(addr); }
+template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
+{
+ std::complex<float> EIGEN_ALIGN16 res[2];
+ pstore((float *)&res, a.v);
+ return res[0];
+}
+template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)
+{
+ Packet4f rev_a;
+ rev_a = vec_perm(a.v, a.v, p16uc_COMPLEX32_REV2);
+ return Packet2cf(rev_a);
+}
+template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
+{
+ Packet4f b;
+ b = vec_sld(a.v, a.v, 8);
+ b = padd<Packet4f>(a.v, b);
+ return pfirst<Packet2cf>(Packet2cf(b));
+}
+template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
+{
+ Packet4f b1, b2;
+#ifdef _BIG_ENDIAN
+ b1 = vec_sld(vecs[0].v, vecs[1].v, 8);
+ b2 = vec_sld(vecs[1].v, vecs[0].v, 8);
+#else
+ b1 = vec_sld(vecs[1].v, vecs[0].v, 8);
+ b2 = vec_sld(vecs[0].v, vecs[1].v, 8);
+#endif
+ b2 = vec_sld(b2, b2, 8);
+ b2 = padd<Packet4f>(b1, b2);
+ return Packet2cf(b2);
+}
+template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
+{
+ Packet4f b;
+ Packet2cf prod;
+ b = vec_sld(a.v, a.v, 8);
+ prod = pmul<Packet2cf>(a, Packet2cf(b));
+ return pfirst<Packet2cf>(prod);
+}
+template<int Offset>
+struct palign_impl<Offset,Packet2cf>
+{
+ static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
+ {
+ if (Offset==1)
+ {
+#ifdef _BIG_ENDIAN
+ first.v = vec_sld(first.v, second.v, 8);
+#else
+ first.v = vec_sld(second.v, first.v, 8);
+#endif
+ }
+ }
+};
+template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+ {
+ return internal::pmul(a, pconj(b));
+ }
+};
+template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+ {
+ return internal::pmul(pconj(a), b);
+ }
+};
+template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+ {
+ return pconj(internal::pmul(a, b));
+ }
+};
+template<> struct conj_helper<Packet4f, Packet2cf, false,false>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet4f& x, const Packet2cf& y, const Packet2cf& c) const
+ { return padd(c, pmul(x,y)); }
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet4f& x, const Packet2cf& y) const
+ { return Packet2cf(internal::pmul<Packet4f>(x, y.v)); }
+};
+template<> struct conj_helper<Packet2cf, Packet4f, false,false>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet4f& y, const Packet2cf& c) const
+ { return padd(c, pmul(x,y)); }
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& x, const Packet4f& y) const
+ { return Packet2cf(internal::pmul<Packet4f>(x.v, y)); }
+};
+template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+ Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a, b);
+ Packet4f s = pmul<Packet4f>(b.v, b.v);
+ return Packet2cf(pdiv(res.v, padd<Packet4f>(s, vec_perm(s, s, p16uc_COMPLEX32_REV))));
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& x)
+{
+ return Packet2cf(vec_perm(x.v, x.v, p16uc_COMPLEX32_REV));
+}
+EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel)
+{
+ Packet4f tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI);
+ kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO);
+ kernel.packet[0].v = tmp;
+}
+#ifdef __VSX__
+template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) {
+ Packet2cf result;
+ result.v = reinterpret_cast<Packet4f>(pblend<Packet2d>(ifPacket, reinterpret_cast<Packet2d>(thenPacket.v), reinterpret_cast<Packet2d>(elsePacket.v)));
+ return result;
+}
+#endif
+#ifdef __VSX__
+struct Packet1cd
+{
+ EIGEN_STRONG_INLINE Packet1cd() {}
+ EIGEN_STRONG_INLINE explicit Packet1cd(const Packet2d& a) : v(a) {}
+ Packet2d v;
+};
+template<> struct packet_traits<std::complex<double> > : default_packet_traits
+{
+ typedef Packet1cd type;
+ typedef Packet1cd half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 0,
+ size = 1,
+ HasHalfPacket = 0,
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasDiv = 1,
+ HasNegate = 1,
+ HasAbs = 0,
+ HasAbs2 = 0,
+ HasMin = 0,
+ HasMax = 0,
+ HasSetLinear = 0
+ };
+};
+template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
+template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from) { return Packet1cd(pload<Packet2d>((const double*)from)); }
+template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { return Packet1cd(ploadu<Packet2d>((const double*)from)); }
+template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { pstore((double*)to, from.v); }
+template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { pstoreu((double*)to, from.v); }
+template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
+{ return ploadu<Packet1cd>(&from); }
+template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index stride)
+{
+ std::complex<double> EIGEN_ALIGN16 af[2];
+ af[0] = from[0*stride];
+ af[1] = from[1*stride];
+ return pload<Packet1cd>(af);
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, Index stride)
+{
+ std::complex<double> EIGEN_ALIGN16 af[2];
+ pstore<std::complex<double> >(af, from);
+ to[0*stride] = af[0];
+ to[1*stride] = af[1];
+}
+template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v + b.v); }
+template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v - b.v); }
+template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); }
+template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd(pxor(a.v, reinterpret_cast<Packet2d>(p2ul_CONJ_XOR2))); }
+template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+ Packet2d a_re, a_im, v1, v2;
+ a_re = vec_perm(a.v, a.v, p16uc_PSET64_HI);
+ a_im = vec_perm(a.v, a.v, p16uc_PSET64_LO);
+ v1 = vec_madd(a_re, b.v, p2d_ZERO);
+ v2 = vec_madd(a_im, b.v, p2d_ZERO);
+ v2 = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(v2), reinterpret_cast<Packet4ui>(v2), 8));
+ v2 = pxor(v2, reinterpret_cast<Packet2d>(p2ul_CONJ_XOR1));
+ return Packet1cd(padd<Packet2d>(v1, v2));
+}
+template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(pand(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(por(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(pxor(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(pandnot(a.v, b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); }
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { EIGEN_PPC_PREFETCH(addr); }
+template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
+{
+ std::complex<double> EIGEN_ALIGN16 res[2];
+ pstore<std::complex<double> >(res, a);
+ return res[0];
+}
+template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
+template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
+template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs) { return vecs[0]; }
+template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
+template<int Offset>
+struct palign_impl<Offset,Packet1cd>
+{
+ static EIGEN_STRONG_INLINE void run(Packet1cd& , const Packet1cd& )
+ {
+ }
+};
+template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+ {
+ return internal::pmul(a, pconj(b));
+ }
+};
+template<> struct conj_helper<Packet1cd, Packet1cd, true,false>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+ {
+ return internal::pmul(pconj(a), b);
+ }
+};
+template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+ {
+ return pconj(internal::pmul(a, b));
+ }
+};
+template<> struct conj_helper<Packet2d, Packet1cd, false,false>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet2d& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(c, pmul(x,y)); }
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet2d& x, const Packet1cd& y) const
+ { return Packet1cd(internal::pmul<Packet2d>(x, y.v)); }
+};
+template<> struct conj_helper<Packet1cd, Packet2d, false,false>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet2d& y, const Packet1cd& c) const
+ { return padd(c, pmul(x,y)); }
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& x, const Packet2d& y) const
+ { return Packet1cd(internal::pmul<Packet2d>(x.v, y)); }
+};
+template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+ Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
+ Packet2d s = pmul<Packet2d>(b.v, b.v);
+ return Packet1cd(pdiv(res.v, padd<Packet2d>(s, vec_perm(s, s, p16uc_REVERSE64))));
+}
+EIGEN_STRONG_INLINE Packet1cd pcplxflip(const Packet1cd& x)
+{
+ return Packet1cd(preverse(Packet2d(x.v)));
+}
+EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel)
+{
+ Packet2d tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI);
+ kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO);
+ kernel.packet[0].v = tmp;
+}
+#endif
+}
+}
+#endif
+// end #include "src/Core/arch/AltiVec/Complex.h"
+#elif defined EIGEN_VECTORIZE_NEON
+// #include "src/Core/arch/NEON/PacketMath.h"
+#ifndef EIGEN_PACKET_MATH_NEON_H
+#define EIGEN_PACKET_MATH_NEON_H
+namespace Eigen {
+namespace internal {
+#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
+#endif
+#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#endif
+#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
+#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
+#endif
+#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
+#if EIGEN_ARCH_ARM64
+#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
+#else
+#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 16
+#endif
+#endif
+typedef float32x2_t Packet2f;
+typedef float32x4_t Packet4f;
+typedef int32x4_t Packet4i;
+typedef int32x2_t Packet2i;
+typedef uint32x4_t Packet4ui;
+#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
+ const Packet4f p4f_##NAME = pset1<Packet4f>(X)
+#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
+ const Packet4f p4f_##NAME = vreinterpretq_f32_u32(pset1<int32_t>(X))
+#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
+ const Packet4i p4i_##NAME = pset1<Packet4i>(X)
+#if EIGEN_ARCH_ARM64
+ #define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__("prfm pldl1keep, [%[addr]]\n" ::[addr] "r"(ADDR) : );
+#elif EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC
+ #define EIGEN_ARM_PREFETCH(ADDR) __builtin_prefetch(ADDR);
+#elif defined __pld
+ #define EIGEN_ARM_PREFETCH(ADDR) __pld(ADDR)
+#elif EIGEN_ARCH_ARM32
+ #define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__ ("pld [%[addr]]\n" :: [addr] "r" (ADDR) : );
+#else
+ #define EIGEN_ARM_PREFETCH(ADDR)
+#endif
+template<> struct packet_traits<float> : default_packet_traits
+{
+ typedef Packet4f type;
+ typedef Packet4f half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size = 4,
+ HasHalfPacket=0,
+ HasDiv = 1,
+ HasSin = 0,
+ HasCos = 0,
+ HasLog = 0,
+ HasExp = 1,
+ HasSqrt = 0
+ };
+};
+template<> struct packet_traits<int32_t> : default_packet_traits
+{
+ typedef Packet4i type;
+ typedef Packet4i half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size=4,
+ HasHalfPacket=0
+ };
+};
+#if EIGEN_GNUC_AT_MOST(4,4) && !EIGEN_COMP_LLVM
+EIGEN_STRONG_INLINE float32x4_t vld1q_f32(const float* x) { return ::vld1q_f32((const float32_t*)x); }
+EIGEN_STRONG_INLINE float32x2_t vld1_f32 (const float* x) { return ::vld1_f32 ((const float32_t*)x); }
+EIGEN_STRONG_INLINE float32x2_t vld1_dup_f32 (const float* x) { return ::vld1_dup_f32 ((const float32_t*)x); }
+EIGEN_STRONG_INLINE void vst1q_f32(float* to, float32x4_t from) { ::vst1q_f32((float32_t*)to,from); }
+EIGEN_STRONG_INLINE void vst1_f32 (float* to, float32x2_t from) { ::vst1_f32 ((float32_t*)to,from); }
+#endif
+template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; };
+template<> struct unpacket_traits<Packet4i> { typedef int32_t type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; };
+template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return vdupq_n_f32(from); }
+template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int32_t& from) { return vdupq_n_s32(from); }
+template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a)
+{
+ const float f[] = {0, 1, 2, 3};
+ Packet4f countdown = vld1q_f32(f);
+ return vaddq_f32(pset1<Packet4f>(a), countdown);
+}
+template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int32_t& a)
+{
+ const int32_t i[] = {0, 1, 2, 3};
+ Packet4i countdown = vld1q_s32(i);
+ return vaddq_s32(pset1<Packet4i>(a), countdown);
+}
+template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return vaddq_f32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return vaddq_s32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return vsubq_f32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return vsubq_s32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) { return vnegq_f32(a); }
+template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return vnegq_s32(a); }
+template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return vmulq_f32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b) { return vmulq_s32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+#if EIGEN_ARCH_ARM64
+ return vdivq_f32(a,b);
+#else
+ Packet4f inv, restep, div;
+ inv = vrecpeq_f32(b);
+ restep = vrecpsq_f32(b, inv);
+ inv = vmulq_f32(restep, inv);
+ div = vmulq_f32(a, inv);
+ return div;
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& , const Packet4i& )
+{ eigen_assert(false && "packet integer division are not supported by NEON");
+ return pset1<Packet4i>(0);
+}
+#if (defined __ARM_FEATURE_FMA) && !(EIGEN_COMP_CLANG && EIGEN_ARCH_ARM)
+template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vfmaq_f32(c,a,b); }
+#else
+template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) {
+#if EIGEN_COMP_CLANG && EIGEN_ARCH_ARM
+ Packet4f r = c;
+ asm volatile(
+ "vmla.f32 %q[r], %q[a], %q[b]"
+ : [r] "+w" (r)
+ : [a] "w" (a),
+ [b] "w" (b)
+ : );
+ return r;
+#else
+ return vmlaq_f32(c,a,b);
+#endif
+}
+#endif
+template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return vmlaq_s32(c,a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return vminq_f32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vminq_s32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return vmaxq_f32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vmaxq_s32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+ return vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b)));
+}
+template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vandq_s32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+ return vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b)));
+}
+template<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return vorrq_s32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+ return vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b)));
+}
+template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return veorq_s32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+ return vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b)));
+}
+template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return vbicq_s32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f32(from); }
+template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int32_t* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s32(from); }
+template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_f32(from); }
+template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int32_t* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_s32(from); }
+template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
+{
+ float32x2_t lo, hi;
+ lo = vld1_dup_f32(from);
+ hi = vld1_dup_f32(from+1);
+ return vcombine_f32(lo, hi);
+}
+template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int32_t* from)
+{
+ int32x2_t lo, hi;
+ lo = vld1_dup_s32(from);
+ hi = vld1_dup_s32(from+1);
+ return vcombine_s32(lo, hi);
+}
+template<> EIGEN_STRONG_INLINE void pstore<float> (float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_f32(to, from); }
+template<> EIGEN_STRONG_INLINE void pstore<int32_t>(int32_t* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_s32(to, from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<float> (float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f32(to, from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<int32_t>(int32_t* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_s32(to, from); }
+template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride)
+{
+ Packet4f res = pset1<Packet4f>(0.f);
+ res = vsetq_lane_f32(from[0*stride], res, 0);
+ res = vsetq_lane_f32(from[1*stride], res, 1);
+ res = vsetq_lane_f32(from[2*stride], res, 2);
+ res = vsetq_lane_f32(from[3*stride], res, 3);
+ return res;
+}
+template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int32_t, Packet4i>(const int32_t* from, Index stride)
+{
+ Packet4i res = pset1<Packet4i>(0);
+ res = vsetq_lane_s32(from[0*stride], res, 0);
+ res = vsetq_lane_s32(from[1*stride], res, 1);
+ res = vsetq_lane_s32(from[2*stride], res, 2);
+ res = vsetq_lane_s32(from[3*stride], res, 3);
+ return res;
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride)
+{
+ to[stride*0] = vgetq_lane_f32(from, 0);
+ to[stride*1] = vgetq_lane_f32(from, 1);
+ to[stride*2] = vgetq_lane_f32(from, 2);
+ to[stride*3] = vgetq_lane_f32(from, 3);
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<int32_t, Packet4i>(int32_t* to, const Packet4i& from, Index stride)
+{
+ to[stride*0] = vgetq_lane_s32(from, 0);
+ to[stride*1] = vgetq_lane_s32(from, 1);
+ to[stride*2] = vgetq_lane_s32(from, 2);
+ to[stride*3] = vgetq_lane_s32(from, 3);
+}
+template<> EIGEN_STRONG_INLINE void prefetch<float> (const float* addr) { EIGEN_ARM_PREFETCH(addr); }
+template<> EIGEN_STRONG_INLINE void prefetch<int32_t>(const int32_t* addr) { EIGEN_ARM_PREFETCH(addr); }
+template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vst1q_f32(x, a); return x[0]; }
+template<> EIGEN_STRONG_INLINE int32_t pfirst<Packet4i>(const Packet4i& a) { int32_t EIGEN_ALIGN16 x[4]; vst1q_s32(x, a); return x[0]; }
+template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) {
+ float32x2_t a_lo, a_hi;
+ Packet4f a_r64;
+ a_r64 = vrev64q_f32(a);
+ a_lo = vget_low_f32(a_r64);
+ a_hi = vget_high_f32(a_r64);
+ return vcombine_f32(a_hi, a_lo);
+}
+template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) {
+ int32x2_t a_lo, a_hi;
+ Packet4i a_r64;
+ a_r64 = vrev64q_s32(a);
+ a_lo = vget_low_s32(a_r64);
+ a_hi = vget_high_s32(a_r64);
+ return vcombine_s32(a_hi, a_lo);
+}
+template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vabsq_f32(a); }
+template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vabsq_s32(a); }
+template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
+{
+ float32x2_t a_lo, a_hi, sum;
+ a_lo = vget_low_f32(a);
+ a_hi = vget_high_f32(a);
+ sum = vpadd_f32(a_lo, a_hi);
+ sum = vpadd_f32(sum, sum);
+ return vget_lane_f32(sum, 0);
+}
+template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
+{
+ float32x4x2_t vtrn1, vtrn2, res1, res2;
+ Packet4f sum1, sum2, sum;
+ vtrn1 = vzipq_f32(vecs[0], vecs[2]);
+ vtrn2 = vzipq_f32(vecs[1], vecs[3]);
+ res1 = vzipq_f32(vtrn1.val[0], vtrn2.val[0]);
+ res2 = vzipq_f32(vtrn1.val[1], vtrn2.val[1]);
+ sum1 = vaddq_f32(res1.val[0], res1.val[1]);
+ sum2 = vaddq_f32(res2.val[0], res2.val[1]);
+ sum = vaddq_f32(sum1, sum2);
+ return sum;
+}
+template<> EIGEN_STRONG_INLINE int32_t predux<Packet4i>(const Packet4i& a)
+{
+ int32x2_t a_lo, a_hi, sum;
+ a_lo = vget_low_s32(a);
+ a_hi = vget_high_s32(a);
+ sum = vpadd_s32(a_lo, a_hi);
+ sum = vpadd_s32(sum, sum);
+ return vget_lane_s32(sum, 0);
+}
+template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
+{
+ int32x4x2_t vtrn1, vtrn2, res1, res2;
+ Packet4i sum1, sum2, sum;
+ vtrn1 = vzipq_s32(vecs[0], vecs[2]);
+ vtrn2 = vzipq_s32(vecs[1], vecs[3]);
+ res1 = vzipq_s32(vtrn1.val[0], vtrn2.val[0]);
+ res2 = vzipq_s32(vtrn1.val[1], vtrn2.val[1]);
+ sum1 = vaddq_s32(res1.val[0], res1.val[1]);
+ sum2 = vaddq_s32(res2.val[0], res2.val[1]);
+ sum = vaddq_s32(sum1, sum2);
+ return sum;
+}
+template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
+{
+ float32x2_t a_lo, a_hi, prod;
+ a_lo = vget_low_f32(a);
+ a_hi = vget_high_f32(a);
+ prod = vmul_f32(a_lo, a_hi);
+ prod = vmul_f32(prod, vrev64_f32(prod));
+ return vget_lane_f32(prod, 0);
+}
+template<> EIGEN_STRONG_INLINE int32_t predux_mul<Packet4i>(const Packet4i& a)
+{
+ int32x2_t a_lo, a_hi, prod;
+ a_lo = vget_low_s32(a);
+ a_hi = vget_high_s32(a);
+ prod = vmul_s32(a_lo, a_hi);
+ prod = vmul_s32(prod, vrev64_s32(prod));
+ return vget_lane_s32(prod, 0);
+}
+template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
+{
+ float32x2_t a_lo, a_hi, min;
+ a_lo = vget_low_f32(a);
+ a_hi = vget_high_f32(a);
+ min = vpmin_f32(a_lo, a_hi);
+ min = vpmin_f32(min, min);
+ return vget_lane_f32(min, 0);
+}
+template<> EIGEN_STRONG_INLINE int32_t predux_min<Packet4i>(const Packet4i& a)
+{
+ int32x2_t a_lo, a_hi, min;
+ a_lo = vget_low_s32(a);
+ a_hi = vget_high_s32(a);
+ min = vpmin_s32(a_lo, a_hi);
+ min = vpmin_s32(min, min);
+ return vget_lane_s32(min, 0);
+}
+template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
+{
+ float32x2_t a_lo, a_hi, max;
+ a_lo = vget_low_f32(a);
+ a_hi = vget_high_f32(a);
+ max = vpmax_f32(a_lo, a_hi);
+ max = vpmax_f32(max, max);
+ return vget_lane_f32(max, 0);
+}
+template<> EIGEN_STRONG_INLINE int32_t predux_max<Packet4i>(const Packet4i& a)
+{
+ int32x2_t a_lo, a_hi, max;
+ a_lo = vget_low_s32(a);
+ a_hi = vget_high_s32(a);
+ max = vpmax_s32(a_lo, a_hi);
+ max = vpmax_s32(max, max);
+ return vget_lane_s32(max, 0);
+}
+#define PALIGN_NEON(Offset,Type,Command) \
+template<>\
+struct palign_impl<Offset,Type>\
+{\
+ EIGEN_STRONG_INLINE static void run(Type& first, const Type& second)\
+ {\
+ if (Offset!=0)\
+ first = Command(first, second, Offset);\
+ }\
+};\
+
+PALIGN_NEON(0,Packet4f,vextq_f32)
+PALIGN_NEON(1,Packet4f,vextq_f32)
+PALIGN_NEON(2,Packet4f,vextq_f32)
+PALIGN_NEON(3,Packet4f,vextq_f32)
+PALIGN_NEON(0,Packet4i,vextq_s32)
+PALIGN_NEON(1,Packet4i,vextq_s32)
+PALIGN_NEON(2,Packet4i,vextq_s32)
+PALIGN_NEON(3,Packet4i,vextq_s32)
+#undef PALIGN_NEON
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4f,4>& kernel) {
+ float32x4x2_t tmp1 = vzipq_f32(kernel.packet[0], kernel.packet[1]);
+ float32x4x2_t tmp2 = vzipq_f32(kernel.packet[2], kernel.packet[3]);
+ kernel.packet[0] = vcombine_f32(vget_low_f32(tmp1.val[0]), vget_low_f32(tmp2.val[0]));
+ kernel.packet[1] = vcombine_f32(vget_high_f32(tmp1.val[0]), vget_high_f32(tmp2.val[0]));
+ kernel.packet[2] = vcombine_f32(vget_low_f32(tmp1.val[1]), vget_low_f32(tmp2.val[1]));
+ kernel.packet[3] = vcombine_f32(vget_high_f32(tmp1.val[1]), vget_high_f32(tmp2.val[1]));
+}
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4i,4>& kernel) {
+ int32x4x2_t tmp1 = vzipq_s32(kernel.packet[0], kernel.packet[1]);
+ int32x4x2_t tmp2 = vzipq_s32(kernel.packet[2], kernel.packet[3]);
+ kernel.packet[0] = vcombine_s32(vget_low_s32(tmp1.val[0]), vget_low_s32(tmp2.val[0]));
+ kernel.packet[1] = vcombine_s32(vget_high_s32(tmp1.val[0]), vget_high_s32(tmp2.val[0]));
+ kernel.packet[2] = vcombine_s32(vget_low_s32(tmp1.val[1]), vget_low_s32(tmp2.val[1]));
+ kernel.packet[3] = vcombine_s32(vget_high_s32(tmp1.val[1]), vget_high_s32(tmp2.val[1]));
+}
+#ifdef __apple_build_version__
+#define EIGEN_APPLE_DOUBLE_NEON_BUG (__apple_build_version__ < 6010000)
+#else
+#define EIGEN_APPLE_DOUBLE_NEON_BUG 0
+#endif
+#if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
+template <typename T>
+uint64x2_t vreinterpretq_u64_f64(T a)
+{
+ return (uint64x2_t) a;
+}
+template <typename T>
+float64x2_t vreinterpretq_f64_u64(T a)
+{
+ return (float64x2_t) a;
+}
+typedef float64x2_t Packet2d;
+typedef float64x1_t Packet1d;
+template<> struct packet_traits<double> : default_packet_traits
+{
+ typedef Packet2d type;
+ typedef Packet2d half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size = 2,
+ HasHalfPacket=0,
+ HasDiv = 1,
+ HasSin = 0,
+ HasCos = 0,
+ HasLog = 0,
+ HasExp = 0,
+ HasSqrt = 0
+ };
+};
+template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; };
+template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return vdupq_n_f64(from); }
+template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a)
+{
+ const double countdown_raw[] = {0.0,1.0};
+ const Packet2d countdown = vld1q_f64(countdown_raw);
+ return vaddq_f64(pset1<Packet2d>(a), countdown);
+}
+template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return vaddq_f64(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return vsubq_f64(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { return vnegq_f64(a); }
+template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return vmulq_f64(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return vdivq_f64(a,b); }
+#ifdef __ARM_FEATURE_FMA
+template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vfmaq_f64(c,a,b); }
+#else
+template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vmlaq_f64(c,a,b); }
+#endif
+template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return vminq_f64(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return vmaxq_f64(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b)
+{
+ return vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(a),vreinterpretq_u64_f64(b)));
+}
+template<> EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b)
+{
+ return vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(a),vreinterpretq_u64_f64(b)));
+}
+template<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b)
+{
+ return vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a),vreinterpretq_u64_f64(b)));
+}
+template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b)
+{
+ return vreinterpretq_f64_u64(vbicq_u64(vreinterpretq_u64_f64(a),vreinterpretq_u64_f64(b)));
+}
+template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f64(from); }
+template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_f64(from); }
+template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
+{
+ return vld1q_dup_f64(from);
+}
+template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_f64(to, from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f64(to, from); }
+template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride)
+{
+ Packet2d res = pset1<Packet2d>(0.0);
+ res = vsetq_lane_f64(from[0*stride], res, 0);
+ res = vsetq_lane_f64(from[1*stride], res, 1);
+ return res;
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride)
+{
+ to[stride*0] = vgetq_lane_f64(from, 0);
+ to[stride*1] = vgetq_lane_f64(from, 1);
+}
+template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { EIGEN_ARM_PREFETCH(addr); }
+template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { return vgetq_lane_f64(a, 0); }
+template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) { return vcombine_f64(vget_high_f64(a), vget_low_f64(a)); }
+template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { return vabsq_f64(a); }
+#if EIGEN_COMP_CLANG && defined(__apple_build_version__)
+template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a) { return (vget_low_f64(a) + vget_high_f64(a))[0]; }
+#else
+template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a) { return vget_lane_f64(vget_low_f64(a) + vget_high_f64(a), 0); }
+#endif
+template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
+{
+ float64x2_t trn1, trn2;
+ trn1 = vzip1q_f64(vecs[0], vecs[1]);
+ trn2 = vzip2q_f64(vecs[0], vecs[1]);
+ return vaddq_f64(trn1, trn2);
+}
+#if EIGEN_COMP_CLANG && defined(__apple_build_version__)
+template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a) { return (vget_low_f64(a) * vget_high_f64(a))[0]; }
+#else
+template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a) { return vget_lane_f64(vget_low_f64(a) * vget_high_f64(a), 0); }
+#endif
+template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a) { return vgetq_lane_f64(vpminq_f64(a, a), 0); }
+template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a) { return vgetq_lane_f64(vpmaxq_f64(a, a), 0); }
+#define PALIGN_NEON(Offset,Type,Command) \
+template<>\
+struct palign_impl<Offset,Type>\
+{\
+ EIGEN_STRONG_INLINE static void run(Type& first, const Type& second)\
+ {\
+ if (Offset!=0)\
+ first = Command(first, second, Offset);\
+ }\
+};\
+PALIGN_NEON(0,Packet2d,vextq_f64)
+PALIGN_NEON(1,Packet2d,vextq_f64)
+#undef PALIGN_NEON
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet2d,2>& kernel) {
+ float64x2_t trn1 = vzip1q_f64(kernel.packet[0], kernel.packet[1]);
+ float64x2_t trn2 = vzip2q_f64(kernel.packet[0], kernel.packet[1]);
+ kernel.packet[0] = trn1;
+ kernel.packet[1] = trn2;
+}
+#endif
+}
+}
+#endif
+// end #include "src/Core/arch/NEON/PacketMath.h"
+// #include "src/Core/arch/NEON/MathFunctions.h"
+#ifndef EIGEN_MATH_FUNCTIONS_NEON_H
+#define EIGEN_MATH_FUNCTIONS_NEON_H
+namespace Eigen {
+namespace internal {
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f pexp<Packet4f>(const Packet4f& _x)
+{
+ Packet4f x = _x;
+ Packet4f tmp, fx;
+ _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
+ _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
+ _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
+ _EIGEN_DECLARE_CONST_Packet4f(exp_hi, 88.3762626647950f);
+ _EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500E-4f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507E-3f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073E-3f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894E-2f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1f);
+ _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1f);
+ x = vminq_f32(x, p4f_exp_hi);
+ x = vmaxq_f32(x, p4f_exp_lo);
+ fx = vmlaq_f32(p4f_half, x, p4f_cephes_LOG2EF);
+ tmp = vcvtq_f32_s32(vcvtq_s32_f32(fx));
+ Packet4ui mask = vcgtq_f32(tmp, fx);
+ mask = vandq_u32(mask, vreinterpretq_u32_f32(p4f_1));
+ fx = vsubq_f32(tmp, vreinterpretq_f32_u32(mask));
+ tmp = vmulq_f32(fx, p4f_cephes_exp_C1);
+ Packet4f z = vmulq_f32(fx, p4f_cephes_exp_C2);
+ x = vsubq_f32(x, tmp);
+ x = vsubq_f32(x, z);
+ Packet4f y = vmulq_f32(p4f_cephes_exp_p0, x);
+ z = vmulq_f32(x, x);
+ y = vaddq_f32(y, p4f_cephes_exp_p1);
+ y = vmulq_f32(y, x);
+ y = vaddq_f32(y, p4f_cephes_exp_p2);
+ y = vmulq_f32(y, x);
+ y = vaddq_f32(y, p4f_cephes_exp_p3);
+ y = vmulq_f32(y, x);
+ y = vaddq_f32(y, p4f_cephes_exp_p4);
+ y = vmulq_f32(y, x);
+ y = vaddq_f32(y, p4f_cephes_exp_p5);
+ y = vmulq_f32(y, z);
+ y = vaddq_f32(y, x);
+ y = vaddq_f32(y, p4f_1);
+ int32x4_t mm;
+ mm = vcvtq_s32_f32(fx);
+ mm = vaddq_s32(mm, p4i_0x7f);
+ mm = vshlq_n_s32(mm, 23);
+ Packet4f pow2n = vreinterpretq_f32_s32(mm);
+ y = vmulq_f32(y, pow2n);
+ return y;
+}
+}
+}
+#endif
+// end #include "src/Core/arch/NEON/MathFunctions.h"
+// #include "src/Core/arch/NEON/Complex.h"
+#ifndef EIGEN_COMPLEX_NEON_H
+#define EIGEN_COMPLEX_NEON_H
+namespace Eigen {
+namespace internal {
+inline uint32x4_t p4ui_CONJ_XOR() {
+#if EIGEN_COMP_CLANG
+ uint32x4_t ret = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
+ return ret;
+#else
+ static const uint32_t conj_XOR_DATA[] = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
+ return vld1q_u32( conj_XOR_DATA );
+#endif
+}
+inline uint32x2_t p2ui_CONJ_XOR() {
+ static const uint32_t conj_XOR_DATA[] = { 0x00000000, 0x80000000 };
+ return vld1_u32( conj_XOR_DATA );
+}
+struct Packet2cf
+{
+ EIGEN_STRONG_INLINE Packet2cf() {}
+ EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
+ Packet4f v;
+};
+template<> struct packet_traits<std::complex<float> > : default_packet_traits
+{
+ typedef Packet2cf type;
+ typedef Packet2cf half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size = 2,
+ HasHalfPacket = 0,
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasDiv = 1,
+ HasNegate = 1,
+ HasAbs = 0,
+ HasAbs2 = 0,
+ HasMin = 0,
+ HasMax = 0,
+ HasSetLinear = 0
+ };
+};
+template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; };
+template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
+{
+ float32x2_t r64;
+ r64 = vld1_f32((float *)&from);
+ return Packet2cf(vcombine_f32(r64, r64));
+}
+template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(padd<Packet4f>(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(psub<Packet4f>(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate<Packet4f>(a.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
+{
+ Packet4ui b = vreinterpretq_u32_f32(a.v);
+ return Packet2cf(vreinterpretq_f32_u32(veorq_u32(b, p4ui_CONJ_XOR())));
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+ Packet4f v1, v2;
+ v1 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 0), vdup_lane_f32(vget_high_f32(a.v), 0));
+ v2 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 1), vdup_lane_f32(vget_high_f32(a.v), 1));
+ v1 = vmulq_f32(v1, b.v);
+ v2 = vmulq_f32(v2, b.v);
+ v2 = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(v2), p4ui_CONJ_XOR()));
+ v2 = vrev64q_f32(v2);
+ return Packet2cf(vaddq_f32(v1, v2));
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+ return Packet2cf(vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
+}
+template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+ return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+ return Packet2cf(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+ return Packet2cf(vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pload<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>((const float*)from)); }
+template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>((const float*)from)); }
+template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
+template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }
+template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }
+template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
+{
+ Packet4f res = pset1<Packet4f>(0.f);
+ res = vsetq_lane_f32(std::real(from[0*stride]), res, 0);
+ res = vsetq_lane_f32(std::imag(from[0*stride]), res, 1);
+ res = vsetq_lane_f32(std::real(from[1*stride]), res, 2);
+ res = vsetq_lane_f32(std::imag(from[1*stride]), res, 3);
+ return Packet2cf(res);
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride)
+{
+ to[stride*0] = std::complex<float>(vgetq_lane_f32(from.v, 0), vgetq_lane_f32(from.v, 1));
+ to[stride*1] = std::complex<float>(vgetq_lane_f32(from.v, 2), vgetq_lane_f32(from.v, 3));
+}
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_ARM_PREFETCH((float *)addr); }
+template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
+{
+ std::complex<float> EIGEN_ALIGN16 x[2];
+ vst1q_f32((float *)x, a.v);
+ return x[0];
+}
+template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)
+{
+ float32x2_t a_lo, a_hi;
+ Packet4f a_r128;
+ a_lo = vget_low_f32(a.v);
+ a_hi = vget_high_f32(a.v);
+ a_r128 = vcombine_f32(a_hi, a_lo);
+ return Packet2cf(a_r128);
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& a)
+{
+ return Packet2cf(vrev64q_f32(a.v));
+}
+template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
+{
+ float32x2_t a1, a2;
+ std::complex<float> s;
+ a1 = vget_low_f32(a.v);
+ a2 = vget_high_f32(a.v);
+ a2 = vadd_f32(a1, a2);
+ vst1_f32((float *)&s, a2);
+ return s;
+}
+template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
+{
+ Packet4f sum1, sum2, sum;
+ sum1 = vcombine_f32(vget_low_f32(vecs[0].v), vget_low_f32(vecs[1].v));
+ sum2 = vcombine_f32(vget_high_f32(vecs[0].v), vget_high_f32(vecs[1].v));
+ sum = vaddq_f32(sum1, sum2);
+ return Packet2cf(sum);
+}
+template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
+{
+ float32x2_t a1, a2, v1, v2, prod;
+ std::complex<float> s;
+ a1 = vget_low_f32(a.v);
+ a2 = vget_high_f32(a.v);
+ v1 = vdup_lane_f32(a1, 0);
+ v2 = vdup_lane_f32(a1, 1);
+ v1 = vmul_f32(v1, a2);
+ v2 = vmul_f32(v2, a2);
+ v2 = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(v2), p2ui_CONJ_XOR()));
+ v2 = vrev64_f32(v2);
+ prod = vadd_f32(v1, v2);
+ vst1_f32((float *)&s, prod);
+ return s;
+}
+template<int Offset>
+struct palign_impl<Offset,Packet2cf>
+{
+ EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second)
+ {
+ if (Offset==1)
+ {
+ first.v = vextq_f32(first.v, second.v, 2);
+ }
+ }
+};
+template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+ {
+ return internal::pmul(a, pconj(b));
+ }
+};
+template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+ {
+ return internal::pmul(pconj(a), b);
+ }
+};
+template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+ {
+ return pconj(internal::pmul(a, b));
+ }
+};
+template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+ Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
+ Packet4f s, rev_s;
+ s = vmulq_f32(b.v, b.v);
+ rev_s = vrev64q_f32(s);
+ return Packet2cf(pdiv(res.v, vaddq_f32(s,rev_s)));
+}
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet2cf,2>& kernel) {
+ Packet4f tmp = vcombine_f32(vget_high_f32(kernel.packet[0].v), vget_high_f32(kernel.packet[1].v));
+ kernel.packet[0].v = vcombine_f32(vget_low_f32(kernel.packet[0].v), vget_low_f32(kernel.packet[1].v));
+ kernel.packet[1].v = tmp;
+}
+#if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
+#if EIGEN_COMP_CLANG
+ static uint64x2_t p2ul_CONJ_XOR = {0x0, 0x8000000000000000};
+#else
+ const uint64_t p2ul_conj_XOR_DATA[] = { 0x0, 0x8000000000000000 };
+ static uint64x2_t p2ul_CONJ_XOR = vld1q_u64( p2ul_conj_XOR_DATA );
+#endif
+struct Packet1cd
+{
+ EIGEN_STRONG_INLINE Packet1cd() {}
+ EIGEN_STRONG_INLINE explicit Packet1cd(const Packet2d& a) : v(a) {}
+ Packet2d v;
+};
+template<> struct packet_traits<std::complex<double> > : default_packet_traits
+{
+ typedef Packet1cd type;
+ typedef Packet1cd half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 0,
+ size = 1,
+ HasHalfPacket = 0,
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasDiv = 1,
+ HasNegate = 1,
+ HasAbs = 0,
+ HasAbs2 = 0,
+ HasMin = 0,
+ HasMax = 0,
+ HasSetLinear = 0
+ };
+};
+template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
+template<> EIGEN_STRONG_INLINE Packet1cd pload<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
+template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
+{ return ploadu<Packet1cd>(&from); }
+template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(padd<Packet2d>(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(psub<Packet2d>(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate<Packet2d>(a.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v), p2ul_CONJ_XOR))); }
+template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+ Packet2d v1, v2;
+ v1 = vdupq_lane_f64(vget_low_f64(a.v), 0);
+ v2 = vdupq_lane_f64(vget_high_f64(a.v), 0);
+ v1 = vmulq_f64(v1, b.v);
+ v2 = vmulq_f64(v2, b.v);
+ v2 = vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(v2), p2ul_CONJ_XOR));
+ v2 = preverse<Packet2d>(v2);
+ return Packet1cd(vaddq_f64(v1, v2));
+}
+template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+ return Packet1cd(vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
+}
+template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+ return Packet1cd(vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
+}
+template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+ return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
+}
+template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+ return Packet1cd(vreinterpretq_f64_u64(vbicq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
+}
+template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); }
+template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
+template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { EIGEN_ARM_PREFETCH((double *)addr); }
+template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index stride)
+{
+ Packet2d res = pset1<Packet2d>(0.0);
+ res = vsetq_lane_f64(std::real(from[0*stride]), res, 0);
+ res = vsetq_lane_f64(std::imag(from[0*stride]), res, 1);
+ return Packet1cd(res);
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, Index stride)
+{
+ to[stride*0] = std::complex<double>(vgetq_lane_f64(from.v, 0), vgetq_lane_f64(from.v, 1));
+}
+template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
+{
+ std::complex<double> EIGEN_ALIGN16 res;
+ pstore<std::complex<double> >(&res, a);
+ return res;
+}
+template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
+template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
+template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs) { return vecs[0]; }
+template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
+template<int Offset>
+struct palign_impl<Offset,Packet1cd>
+{
+ static EIGEN_STRONG_INLINE void run(Packet1cd& , const Packet1cd& )
+ {
+ }
+};
+template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+ {
+ return internal::pmul(a, pconj(b));
+ }
+};
+template<> struct conj_helper<Packet1cd, Packet1cd, true,false>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+ {
+ return internal::pmul(pconj(a), b);
+ }
+};
+template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+ {
+ return pconj(internal::pmul(a, b));
+ }
+};
+template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+ Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
+ Packet2d s = pmul<Packet2d>(b.v, b.v);
+ Packet2d rev_s = preverse<Packet2d>(s);
+ return Packet1cd(pdiv(res.v, padd<Packet2d>(s,rev_s)));
+}
+EIGEN_STRONG_INLINE Packet1cd pcplxflip(const Packet1cd& x)
+{
+ return Packet1cd(preverse(Packet2d(x.v)));
+}
+EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel)
+{
+ Packet2d tmp = vcombine_f64(vget_high_f64(kernel.packet[0].v), vget_high_f64(kernel.packet[1].v));
+ kernel.packet[0].v = vcombine_f64(vget_low_f64(kernel.packet[0].v), vget_low_f64(kernel.packet[1].v));
+ kernel.packet[1].v = tmp;
+}
+#endif
+}
+}
+#endif
+// end #include "src/Core/arch/NEON/Complex.h"
+#elif defined EIGEN_VECTORIZE_ZVECTOR
+// #include "src/Core/arch/ZVector/PacketMath.h"
+#ifndef EIGEN_PACKET_MATH_ZVECTOR_H
+#define EIGEN_PACKET_MATH_ZVECTOR_H
+#include <stdint.h>
+namespace Eigen {
+namespace internal {
+#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 4
+#endif
+#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#endif
+#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
+#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
+#endif
+#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
+#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 16
+#endif
+typedef __vector int Packet4i;
+typedef __vector unsigned int Packet4ui;
+typedef __vector __bool int Packet4bi;
+typedef __vector short int Packet8i;
+typedef __vector unsigned char Packet16uc;
+typedef __vector double Packet2d;
+typedef __vector unsigned long long Packet2ul;
+typedef __vector long long Packet2l;
+typedef struct {
+ Packet2d v4f[2];
+} Packet4f;
+typedef union {
+ int32_t i[4];
+ uint32_t ui[4];
+ int64_t l[2];
+ uint64_t ul[2];
+ double d[2];
+ Packet4i v4i;
+ Packet4ui v4ui;
+ Packet2l v2l;
+ Packet2ul v2ul;
+ Packet2d v2d;
+} Packet;
+#define _EIGEN_DECLARE_CONST_FAST_Packet4i(NAME,X) \
+ Packet4i p4i_##NAME = reinterpret_cast<Packet4i>(vec_splat_s32(X))
+#define _EIGEN_DECLARE_CONST_FAST_Packet2d(NAME,X) \
+ Packet2d p2d_##NAME = reinterpret_cast<Packet2d>(vec_splat_s64(X))
+#define _EIGEN_DECLARE_CONST_FAST_Packet2l(NAME,X) \
+ Packet2l p2l_##NAME = reinterpret_cast<Packet2l>(vec_splat_s64(X))
+#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
+ Packet4i p4i_##NAME = pset1<Packet4i>(X)
+#define _EIGEN_DECLARE_CONST_Packet2d(NAME,X) \
+ Packet2d p2d_##NAME = pset1<Packet2d>(X)
+#define _EIGEN_DECLARE_CONST_Packet2l(NAME,X) \
+ Packet2l p2l_##NAME = pset1<Packet2l>(X)
+static _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE, 1);
+static _EIGEN_DECLARE_CONST_FAST_Packet2d(ZERO, 0);
+static _EIGEN_DECLARE_CONST_FAST_Packet2l(ZERO, 0);
+static _EIGEN_DECLARE_CONST_FAST_Packet2l(ONE, 1);
+static Packet2d p2d_ONE = { 1.0, 1.0 };
+static Packet2d p2d_ZERO_ = { -0.0, -0.0 };
+static Packet4i p4i_COUNTDOWN = { 0, 1, 2, 3 };
+static Packet4f p4f_COUNTDOWN = { 0.0, 1.0, 2.0, 3.0 };
+static Packet2d p2d_COUNTDOWN = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet16uc>(p2d_ZERO), reinterpret_cast<Packet16uc>(p2d_ONE), 8));
+static Packet16uc p16uc_PSET64_HI = { 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 };
+static Packet16uc p16uc_DUPLICATE32_HI = { 0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7 };
+#define _EIGEN_MASK_ALIGNMENT 0xfffffffffffffff0
+#define _EIGEN_ALIGNED_PTR(x) ((std::ptrdiff_t)(x) & _EIGEN_MASK_ALIGNMENT)
+static Packet16uc p16uc_FORWARD = { 0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15 };
+static Packet16uc p16uc_REVERSE32 = { 12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3 };
+static Packet16uc p16uc_REVERSE64 = { 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
+static Packet16uc p16uc_PSET32_WODD = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);
+static Packet16uc p16uc_PSET32_WEVEN = vec_sld(p16uc_DUPLICATE32_HI, (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);
+EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4f,4>& kernel);
+inline std::ostream & operator <<(std::ostream & s, const Packet4i & v)
+{
+ Packet vt;
+ vt.v4i = v;
+ s << vt.i[0] << ", " << vt.i[1] << ", " << vt.i[2] << ", " << vt.i[3];
+ return s;
+}
+inline std::ostream & operator <<(std::ostream & s, const Packet4ui & v)
+{
+ Packet vt;
+ vt.v4ui = v;
+ s << vt.ui[0] << ", " << vt.ui[1] << ", " << vt.ui[2] << ", " << vt.ui[3];
+ return s;
+}
+inline std::ostream & operator <<(std::ostream & s, const Packet2l & v)
+{
+ Packet vt;
+ vt.v2l = v;
+ s << vt.l[0] << ", " << vt.l[1];
+ return s;
+}
+inline std::ostream & operator <<(std::ostream & s, const Packet2ul & v)
+{
+ Packet vt;
+ vt.v2ul = v;
+ s << vt.ul[0] << ", " << vt.ul[1] ;
+ return s;
+}
+inline std::ostream & operator <<(std::ostream & s, const Packet2d & v)
+{
+ Packet vt;
+ vt.v2d = v;
+ s << vt.d[0] << ", " << vt.d[1];
+ return s;
+}
+template<int element> EIGEN_STRONG_INLINE Packet4f vec_splat_packet4f(const Packet4f& from)
+{
+ Packet4f splat;
+ switch (element) {
+ case 0:
+ splat.v4f[0] = vec_splat(from.v4f[0], 0);
+ splat.v4f[1] = splat.v4f[0];
+ break;
+ case 1:
+ splat.v4f[0] = vec_splat(from.v4f[0], 1);
+ splat.v4f[1] = splat.v4f[0];
+ break;
+ case 2:
+ splat.v4f[0] = vec_splat(from.v4f[1], 0);
+ splat.v4f[1] = splat.v4f[0];
+ break;
+ case 3:
+ splat.v4f[0] = vec_splat(from.v4f[1], 1);
+ splat.v4f[1] = splat.v4f[0];
+ break;
+ }
+ return splat;
+}
+template<int Offset>
+struct palign_impl<Offset,Packet4i>
+{
+ static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
+ {
+ switch (Offset % 4) {
+ case 1:
+ first = vec_sld(first, second, 4); break;
+ case 2:
+ first = vec_sld(first, second, 8); break;
+ case 3:
+ first = vec_sld(first, second, 12); break;
+ }
+ }
+};
+template<int Offset>
+struct palign_impl<Offset,Packet4f>
+{
+ static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
+ {
+ switch (Offset % 4) {
+ case 1:
+ first.v4f[0] = vec_sld(first.v4f[0], first.v4f[1], 8);
+ first.v4f[1] = vec_sld(first.v4f[1], second.v4f[0], 8);
+ break;
+ case 2:
+ first.v4f[0] = first.v4f[1];
+ first.v4f[1] = second.v4f[0];
+ break;
+ case 3:
+ first.v4f[0] = vec_sld(first.v4f[1], second.v4f[0], 8);
+ first.v4f[1] = vec_sld(second.v4f[0], second.v4f[1], 8);
+ break;
+ }
+ }
+};
+template<int Offset>
+struct palign_impl<Offset,Packet2d>
+{
+ static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
+ {
+ if (Offset == 1)
+ first = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(first), reinterpret_cast<Packet4i>(second), 8));
+ }
+};
+template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from)
+{
+ EIGEN_DEBUG_ALIGNED_LOAD
+ Packet *vfrom;
+ vfrom = (Packet *) from;
+ return vfrom->v4i;
+}
+template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from)
+{
+ EIGEN_DEBUG_ALIGNED_LOAD
+ Packet4f vfrom;
+ vfrom.v4f[0] = vec_ld2f(&from[0]);
+ vfrom.v4f[1] = vec_ld2f(&from[2]);
+ return vfrom;
+}
+template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from)
+{
+ EIGEN_DEBUG_ALIGNED_LOAD
+ Packet *vfrom;
+ vfrom = (Packet *) from;
+ return vfrom->v2d;
+}
+template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from)
+{
+ EIGEN_DEBUG_ALIGNED_STORE
+ Packet *vto;
+ vto = (Packet *) to;
+ vto->v4i = from;
+}
+template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from)
+{
+ EIGEN_DEBUG_ALIGNED_STORE
+ vec_st2f(from.v4f[0], &to[0]);
+ vec_st2f(from.v4f[1], &to[2]);
+}
+template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from)
+{
+ EIGEN_DEBUG_ALIGNED_STORE
+ Packet *vto;
+ vto = (Packet *) to;
+ vto->v2d = from;
+}
+template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from)
+{
+ return vec_splats(from);
+}
+template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) {
+ return vec_splats(from);
+}
+template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from)
+{
+ Packet4f to;
+ to.v4f[0] = pset1<Packet2d>(static_cast<const double&>(from));
+ to.v4f[1] = to.v4f[0];
+ return to;
+}
+template<> EIGEN_STRONG_INLINE void
+pbroadcast4<Packet4i>(const int *a,
+ Packet4i& a0, Packet4i& a1, Packet4i& a2, Packet4i& a3)
+{
+ a3 = pload<Packet4i>(a);
+ a0 = vec_splat(a3, 0);
+ a1 = vec_splat(a3, 1);
+ a2 = vec_splat(a3, 2);
+ a3 = vec_splat(a3, 3);
+}
+template<> EIGEN_STRONG_INLINE void
+pbroadcast4<Packet4f>(const float *a,
+ Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
+{
+ a3 = pload<Packet4f>(a);
+ a0 = vec_splat_packet4f<0>(a3);
+ a1 = vec_splat_packet4f<1>(a3);
+ a2 = vec_splat_packet4f<2>(a3);
+ a3 = vec_splat_packet4f<3>(a3);
+}
+template<> EIGEN_STRONG_INLINE void
+pbroadcast4<Packet2d>(const double *a,
+ Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3)
+{
+ a1 = pload<Packet2d>(a);
+ a0 = vec_splat(a1, 0);
+ a1 = vec_splat(a1, 1);
+ a3 = pload<Packet2d>(a+2);
+ a2 = vec_splat(a3, 0);
+ a3 = vec_splat(a3, 1);
+}
+template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, Index stride)
+{
+ int EIGEN_ALIGN16 ai[4];
+ ai[0] = from[0*stride];
+ ai[1] = from[1*stride];
+ ai[2] = from[2*stride];
+ ai[3] = from[3*stride];
+ return pload<Packet4i>(ai);
+}
+template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride)
+{
+ float EIGEN_ALIGN16 ai[4];
+ ai[0] = from[0*stride];
+ ai[1] = from[1*stride];
+ ai[2] = from[2*stride];
+ ai[3] = from[3*stride];
+ return pload<Packet4f>(ai);
+}
+template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride)
+{
+ double EIGEN_ALIGN16 af[2];
+ af[0] = from[0*stride];
+ af[1] = from[1*stride];
+ return pload<Packet2d>(af);
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, Index stride)
+{
+ int EIGEN_ALIGN16 ai[4];
+ pstore<int>((int *)ai, from);
+ to[0*stride] = ai[0];
+ to[1*stride] = ai[1];
+ to[2*stride] = ai[2];
+ to[3*stride] = ai[3];
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride)
+{
+ float EIGEN_ALIGN16 ai[4];
+ pstore<float>((float *)ai, from);
+ to[0*stride] = ai[0];
+ to[1*stride] = ai[1];
+ to[2*stride] = ai[2];
+ to[3*stride] = ai[3];
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride)
+{
+ double EIGEN_ALIGN16 af[2];
+ pstore<double>(af, from);
+ to[0*stride] = af[0];
+ to[1*stride] = af[1];
+}
+template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a + b); }
+template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+ Packet4f c;
+ c.v4f[0] = a.v4f[0] + b.v4f[0];
+ c.v4f[1] = a.v4f[1] + b.v4f[1];
+ return c;
+}
+template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a + b); }
+template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a - b); }
+template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+ Packet4f c;
+ c.v4f[0] = a.v4f[0] - b.v4f[0];
+ c.v4f[1] = a.v4f[1] - b.v4f[1];
+ return c;
+}
+template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a - b); }
+template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a * b); }
+template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+ Packet4f c;
+ c.v4f[0] = a.v4f[0] * b.v4f[0];
+ c.v4f[1] = a.v4f[1] * b.v4f[1];
+ return c;
+}
+template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a * b); }
+template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a / b); }
+template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+ Packet4f c;
+ c.v4f[0] = a.v4f[0] / b.v4f[0];
+ c.v4f[1] = a.v4f[1] / b.v4f[1];
+ return c;
+}
+template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a / b); }
+template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return (-a); }
+template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a)
+{
+ Packet4f c;
+ c.v4f[0] = -a.v4f[0];
+ c.v4f[1] = -a.v4f[1];
+ return c;
+}
+template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { return (-a); }
+template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd<Packet4i>(pmul<Packet4i>(a, b), c); }
+template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c)
+{
+ Packet4f res;
+ res.v4f[0] = vec_madd(a.v4f[0], b.v4f[0], c.v4f[0]);
+ res.v4f[1] = vec_madd(a.v4f[1], b.v4f[1], c.v4f[1]);
+ return res;
+}
+template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vec_madd(a, b, c); }
+template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a) { return padd<Packet4i>(pset1<Packet4i>(a), p4i_COUNTDOWN); }
+template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) { return padd<Packet4f>(pset1<Packet4f>(a), p4f_COUNTDOWN); }
+template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) { return padd<Packet2d>(pset1<Packet2d>(a), p2d_COUNTDOWN); }
+template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_min(a, b); }
+template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_min(a, b); }
+template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+ Packet4f res;
+ res.v4f[0] = pmin(a.v4f[0], b.v4f[0]);
+ res.v4f[1] = pmin(a.v4f[1], b.v4f[1]);
+ return res;
+}
+template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); }
+template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_max(a, b); }
+template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+ Packet4f res;
+ res.v4f[0] = pmax(a.v4f[0], b.v4f[0]);
+ res.v4f[1] = pmax(a.v4f[1], b.v4f[1]);
+ return res;
+}
+template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, b); }
+template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, b); }
+template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+ Packet4f res;
+ res.v4f[0] = pand(a.v4f[0], b.v4f[0]);
+ res.v4f[1] = pand(a.v4f[1], b.v4f[1]);
+ return res;
+}
+template<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_or(a, b); }
+template<> EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_or(a, b); }
+template<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+ Packet4f res;
+ res.v4f[0] = pand(a.v4f[0], b.v4f[0]);
+ res.v4f[1] = pand(a.v4f[1], b.v4f[1]);
+ return res;
+}
+template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_xor(a, b); }
+template<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_xor(a, b); }
+template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+ Packet4f res;
+ res.v4f[0] = pand(a.v4f[0], b.v4f[0]);
+ res.v4f[1] = pand(a.v4f[1], b.v4f[1]);
+ return res;
+}
+template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return pand<Packet4i>(a, vec_nor(b, b)); }
+template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, vec_nor(b, b)); }
+template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+ Packet4f res;
+ res.v4f[0] = pandnot(a.v4f[0], b.v4f[0]);
+ res.v4f[1] = pandnot(a.v4f[1], b.v4f[1]);
+ return res;
+}
+template<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a)
+{
+ Packet4f res;
+ res.v4f[0] = vec_round(a.v4f[0]);
+ res.v4f[1] = vec_round(a.v4f[1]);
+ return res;
+}
+template<> EIGEN_STRONG_INLINE Packet2d pround<Packet2d>(const Packet2d& a) { return vec_round(a); }
+template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a)
+{
+ Packet4f res;
+ res.v4f[0] = vec_ceil(a.v4f[0]);
+ res.v4f[1] = vec_ceil(a.v4f[1]);
+ return res;
+}
+template<> EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const Packet2d& a) { return vec_ceil(a); }
+template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a)
+{
+ Packet4f res;
+ res.v4f[0] = vec_floor(a.v4f[0]);
+ res.v4f[1] = vec_floor(a.v4f[1]);
+ return res;
+}
+template<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a) { return vec_floor(a); }
+template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from) { return pload<Packet4i>(from); }
+template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) { return pload<Packet4f>(from); }
+template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) { return pload<Packet2d>(from); }
+template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
+{
+ Packet4i p = pload<Packet4i>(from);
+ return vec_perm(p, p, p16uc_DUPLICATE32_HI);
+}
+template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
+{
+ Packet4f p = pload<Packet4f>(from);
+ p.v4f[1] = vec_splat(p.v4f[0], 1);
+ p.v4f[0] = vec_splat(p.v4f[0], 0);
+ return p;
+}
+template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
+{
+ Packet2d p = pload<Packet2d>(from);
+ return vec_perm(p, p, p16uc_PSET64_HI);
+}
+template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { pstore<int>(to, from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { pstore<float>(to, from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) { pstore<double>(to, from); }
+template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
+template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
+template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
+template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; pstore(x, a); return x[0]; }
+template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[2]; vec_st2f(a.v4f[0], &x[0]); return x[0]; }
+template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { double EIGEN_ALIGN16 x[2]; pstore(x, a); return x[0]; }
+template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a)
+{
+ return reinterpret_cast<Packet4i>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE32));
+}
+template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a)
+{
+ return reinterpret_cast<Packet2d>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE64));
+}
+template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a)
+{
+ Packet4f rev;
+ rev.v4f[0] = preverse<Packet2d>(a.v4f[1]);
+ rev.v4f[1] = preverse<Packet2d>(a.v4f[0]);
+ return rev;
+}
+template<> EIGEN_STRONG_INLINE Packet4i pabs<Packet4i>(const Packet4i& a) { return vec_abs(a); }
+template<> EIGEN_STRONG_INLINE Packet2d pabs<Packet2d>(const Packet2d& a) { return vec_abs(a); }
+template<> EIGEN_STRONG_INLINE Packet4f pabs<Packet4f>(const Packet4f& a)
+{
+ Packet4f res;
+ res.v4f[0] = pabs(a.v4f[0]);
+ res.v4f[1] = pabs(a.v4f[1]);
+ return res;
+}
+template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
+{
+ Packet4i b, sum;
+ b = vec_sld(a, a, 8);
+ sum = padd<Packet4i>(a, b);
+ b = vec_sld(sum, sum, 4);
+ sum = padd<Packet4i>(sum, b);
+ return pfirst(sum);
+}
+template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
+{
+ Packet2d b, sum;
+ b = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8));
+ sum = padd<Packet2d>(a, b);
+ return pfirst(sum);
+}
+template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
+{
+ Packet2d sum;
+ sum = padd<Packet2d>(a.v4f[0], a.v4f[1]);
+ double first = predux<Packet2d>(sum);
+ return static_cast<float>(first);
+}
+template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
+{
+ Packet4i v[4], sum[4];
+ v[0] = vec_mergeh(vecs[0], vecs[2]);
+ v[1] = vec_mergel(vecs[0], vecs[2]);
+ v[2] = vec_mergeh(vecs[1], vecs[3]);
+ v[3] = vec_mergel(vecs[1], vecs[3]);
+ sum[0] = vec_mergeh(v[0], v[2]);
+ sum[1] = vec_mergel(v[0], v[2]);
+ sum[2] = vec_mergeh(v[1], v[3]);
+ sum[3] = vec_mergel(v[1], v[3]);
+ sum[0] = padd<Packet4i>(sum[0], sum[1]);
+ sum[1] = padd<Packet4i>(sum[2], sum[3]);
+ sum[0] = padd<Packet4i>(sum[0], sum[1]);
+ return sum[0];
+}
+template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
+{
+ Packet2d v[2], sum;
+ v[0] = padd<Packet2d>(vecs[0], reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(vecs[0]), reinterpret_cast<Packet4ui>(vecs[0]), 8)));
+ v[1] = padd<Packet2d>(vecs[1], reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(vecs[1]), reinterpret_cast<Packet4ui>(vecs[1]), 8)));
+ sum = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(v[0]), reinterpret_cast<Packet4ui>(v[1]), 8));
+ return sum;
+}
+template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
+{
+ PacketBlock<Packet4f,4> transpose;
+ transpose.packet[0] = vecs[0];
+ transpose.packet[1] = vecs[1];
+ transpose.packet[2] = vecs[2];
+ transpose.packet[3] = vecs[3];
+ ptranspose(transpose);
+ Packet4f sum = padd(transpose.packet[0], transpose.packet[1]);
+ sum = padd(sum, transpose.packet[2]);
+ sum = padd(sum, transpose.packet[3]);
+ return sum;
+}
+template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
+{
+ EIGEN_ALIGN16 int aux[4];
+ pstore(aux, a);
+ return aux[0] * aux[1] * aux[2] * aux[3];
+}
+template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a)
+{
+ return pfirst(pmul(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8))));
+}
+template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
+{
+ return static_cast<float>(pfirst(predux_mul(pmul(a.v4f[0], a.v4f[1]))));
+}
+template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
+{
+ Packet4i b, res;
+ b = pmin<Packet4i>(a, vec_sld(a, a, 8));
+ res = pmin<Packet4i>(b, vec_sld(b, b, 4));
+ return pfirst(res);
+}
+template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a)
+{
+ return pfirst(pmin<Packet2d>(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8))));
+}
+template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
+{
+ Packet2d b, res;
+ b = pmin<Packet2d>(a.v4f[0], a.v4f[1]);
+ res = pmin<Packet2d>(b, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(b), reinterpret_cast<Packet4i>(b), 8)));
+ return static_cast<float>(pfirst(res));
+}
+template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
+{
+ Packet4i b, res;
+ b = pmax<Packet4i>(a, vec_sld(a, a, 8));
+ res = pmax<Packet4i>(b, vec_sld(b, b, 4));
+ return pfirst(res);
+}
+template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)
+{
+ return pfirst(pmax<Packet2d>(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8))));
+}
+template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
+{
+ Packet2d b, res;
+ b = pmax<Packet2d>(a.v4f[0], a.v4f[1]);
+ res = pmax<Packet2d>(b, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(b), reinterpret_cast<Packet4i>(b), 8)));
+ return static_cast<float>(pfirst(res));
+}
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4i,4>& kernel) {
+ Packet4i t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]);
+ Packet4i t1 = vec_mergel(kernel.packet[0], kernel.packet[2]);
+ Packet4i t2 = vec_mergeh(kernel.packet[1], kernel.packet[3]);
+ Packet4i t3 = vec_mergel(kernel.packet[1], kernel.packet[3]);
+ kernel.packet[0] = vec_mergeh(t0, t2);
+ kernel.packet[1] = vec_mergel(t0, t2);
+ kernel.packet[2] = vec_mergeh(t1, t3);
+ kernel.packet[3] = vec_mergel(t1, t3);
+}
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet2d,2>& kernel) {
+ Packet2d t0 = vec_perm(kernel.packet[0], kernel.packet[1], p16uc_TRANSPOSE64_HI);
+ Packet2d t1 = vec_perm(kernel.packet[0], kernel.packet[1], p16uc_TRANSPOSE64_LO);
+ kernel.packet[0] = t0;
+ kernel.packet[1] = t1;
+}
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4f,4>& kernel) {
+ PacketBlock<Packet2d,2> t0,t1,t2,t3;
+ t0.packet[0] = kernel.packet[0].v4f[0];
+ t0.packet[1] = kernel.packet[1].v4f[0];
+ t1.packet[0] = kernel.packet[0].v4f[1];
+ t1.packet[1] = kernel.packet[1].v4f[1];
+ t2.packet[0] = kernel.packet[2].v4f[0];
+ t2.packet[1] = kernel.packet[3].v4f[0];
+ t3.packet[0] = kernel.packet[2].v4f[1];
+ t3.packet[1] = kernel.packet[3].v4f[1];
+ ptranspose(t0);
+ ptranspose(t1);
+ ptranspose(t2);
+ ptranspose(t3);
+ kernel.packet[0].v4f[0] = t0.packet[0];
+ kernel.packet[0].v4f[1] = t2.packet[0];
+ kernel.packet[1].v4f[0] = t0.packet[1];
+ kernel.packet[1].v4f[1] = t2.packet[1];
+ kernel.packet[2].v4f[0] = t1.packet[0];
+ kernel.packet[2].v4f[1] = t3.packet[0];
+ kernel.packet[3].v4f[0] = t1.packet[1];
+ kernel.packet[3].v4f[1] = t3.packet[1];
+}
+template<> EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket) {
+ Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3] };
+ Packet4ui mask = vec_cmpeq(select, reinterpret_cast<Packet4ui>(p4i_ONE));
+ return vec_sel(elsePacket, thenPacket, mask);
+}
+template<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket) {
+ Packet2ul select_hi = { ifPacket.select[0], ifPacket.select[1] };
+ Packet2ul select_lo = { ifPacket.select[2], ifPacket.select[3] };
+ Packet2ul mask_hi = vec_cmpeq(select_hi, reinterpret_cast<Packet2ul>(p2l_ONE));
+ Packet2ul mask_lo = vec_cmpeq(select_lo, reinterpret_cast<Packet2ul>(p2l_ONE));
+ Packet4f result;
+ result.v4f[0] = vec_sel(elsePacket.v4f[0], thenPacket.v4f[0], mask_hi);
+ result.v4f[1] = vec_sel(elsePacket.v4f[1], thenPacket.v4f[1], mask_lo);
+ return result;
+}
+template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) {
+ Packet2ul select = { ifPacket.select[0], ifPacket.select[1] };
+ Packet2ul mask = vec_cmpeq(select, reinterpret_cast<Packet2ul>(p2l_ONE));
+ return vec_sel(elsePacket, thenPacket, mask);
+}
+}
+}
+#endif
+// end #include "src/Core/arch/ZVector/PacketMath.h"
+// #include "src/Core/arch/ZVector/MathFunctions.h"
+#ifndef EIGEN_MATH_FUNCTIONS_ALTIVEC_H
+#define EIGEN_MATH_FUNCTIONS_ALTIVEC_H
+namespace Eigen {
+namespace internal {
+static _EIGEN_DECLARE_CONST_Packet2d(1 , 1.0);
+static _EIGEN_DECLARE_CONST_Packet2d(2 , 2.0);
+static _EIGEN_DECLARE_CONST_Packet2d(half, 0.5);
+static _EIGEN_DECLARE_CONST_Packet2d(exp_hi, 709.437);
+static _EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet2d pexp<Packet2d>(const Packet2d& _x)
+{
+ Packet2d x = _x;
+ Packet2d tmp, fx;
+ Packet2l emm0;
+ x = pmax(pmin(x, p2d_exp_hi), p2d_exp_lo);
+ fx = pmadd(p2d_cephes_LOG2EF, x, p2d_half);
+ fx = vec_floor(fx);
+ tmp = pmul(fx, p2d_cephes_exp_C1);
+ Packet2d z = pmul(fx, p2d_cephes_exp_C2);
+ x = psub(x, tmp);
+ x = psub(x, z);
+ Packet2d x2 = pmul(x,x);
+ Packet2d px = p2d_cephes_exp_p0;
+ px = pmadd(px, x2, p2d_cephes_exp_p1);
+ px = pmadd(px, x2, p2d_cephes_exp_p2);
+ px = pmul (px, x);
+ Packet2d qx = p2d_cephes_exp_q0;
+ qx = pmadd(qx, x2, p2d_cephes_exp_q1);
+ qx = pmadd(qx, x2, p2d_cephes_exp_q2);
+ qx = pmadd(qx, x2, p2d_cephes_exp_q3);
+ x = pdiv(px,psub(qx,px));
+ x = pmadd(p2d_2,x,p2d_1);
+ emm0 = vec_ctsl(fx, 0);
+ static const Packet2l p2l_1023 = { 1023, 1023 };
+ static const Packet2ul p2ul_52 = { 52, 52 };
+ emm0 = emm0 + p2l_1023;
+ emm0 = emm0 << reinterpret_cast<Packet2l>(p2ul_52);
+ Packet2ul isnumber_mask = reinterpret_cast<Packet2ul>(vec_cmpeq(_x, _x));
+ return vec_sel(_x, pmax(pmul(x, reinterpret_cast<Packet2d>(emm0)), _x),
+ isnumber_mask);
+}
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f pexp<Packet4f>(const Packet4f& x)
+{
+ Packet4f res;
+ res.v4f[0] = pexp<Packet2d>(x.v4f[0]);
+ res.v4f[1] = pexp<Packet2d>(x.v4f[1]);
+ return res;
+}
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet2d psqrt<Packet2d>(const Packet2d& x)
+{
+ return __builtin_s390_vfsqdb(x);
+}
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f psqrt<Packet4f>(const Packet4f& x)
+{
+ Packet4f res;
+ res.v4f[0] = psqrt<Packet2d>(x.v4f[0]);
+ res.v4f[1] = psqrt<Packet2d>(x.v4f[1]);
+ return res;
+}
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet2d prsqrt<Packet2d>(const Packet2d& x) {
+ return pset1<Packet2d>(1.0) / psqrt<Packet2d>(x);
+}
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f prsqrt<Packet4f>(const Packet4f& x) {
+ Packet4f res;
+ res.v4f[0] = prsqrt<Packet2d>(x.v4f[0]);
+ res.v4f[1] = prsqrt<Packet2d>(x.v4f[1]);
+ return res;
+}
+}
+}
+#endif
+// end #include "src/Core/arch/ZVector/MathFunctions.h"
+// #include "src/Core/arch/ZVector/Complex.h"
+#ifndef EIGEN_COMPLEX32_ALTIVEC_H
+#define EIGEN_COMPLEX32_ALTIVEC_H
+namespace Eigen {
+namespace internal {
+static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2d_ZERO_, (Packet4ui) p2l_ZERO, 8);
+static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_ZERO_, 8);
+struct Packet1cd
+{
+ EIGEN_STRONG_INLINE Packet1cd() {}
+ EIGEN_STRONG_INLINE explicit Packet1cd(const Packet2d& a) : v(a) {}
+ Packet2d v;
+};
+struct Packet2cf
+{
+ EIGEN_STRONG_INLINE Packet2cf() {}
+ EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
+ union {
+ Packet4f v;
+ Packet1cd cd[2];
+ };
+};
+template<> struct packet_traits<std::complex<float> > : default_packet_traits
+{
+ typedef Packet2cf type;
+ typedef Packet2cf half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size = 2,
+ HasHalfPacket = 0,
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasDiv = 1,
+ HasNegate = 1,
+ HasAbs = 0,
+ HasAbs2 = 0,
+ HasMin = 0,
+ HasMax = 0,
+ HasBlend = 1,
+ HasSetLinear = 0
+ };
+};
+template<> struct packet_traits<std::complex<double> > : default_packet_traits
+{
+ typedef Packet1cd type;
+ typedef Packet1cd half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size = 1,
+ HasHalfPacket = 0,
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasDiv = 1,
+ HasNegate = 1,
+ HasAbs = 0,
+ HasAbs2 = 0,
+ HasMin = 0,
+ HasMax = 0,
+ HasSetLinear = 0
+ };
+};
+template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; };
+template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
+EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel);
+template<> EIGEN_STRONG_INLINE Packet2cf pload <Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>((const float*)from)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
+template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>((const float*)from)); }
+template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
+template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }
+template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
+template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }
+template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
+template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
+{ return ploadu<Packet1cd>(&from); }
+template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
+{
+ Packet2cf res;
+ res.cd[0] = Packet1cd(vec_ld2f((const float *)&from));
+ res.cd[1] = res.cd[0];
+ return res;
+}
+template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
+{
+ std::complex<float> EIGEN_ALIGN16 af[2];
+ af[0] = from[0*stride];
+ af[1] = from[1*stride];
+ return pload<Packet2cf>(af);
+}
+template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index stride EIGEN_UNUSED)
+{
+ return pload<Packet1cd>(from);
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride)
+{
+ std::complex<float> EIGEN_ALIGN16 af[2];
+ pstore<std::complex<float> >((std::complex<float> *) af, from);
+ to[0*stride] = af[0];
+ to[1*stride] = af[1];
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, Index stride EIGEN_UNUSED)
+{
+ pstore<std::complex<double> >(to, from);
+}
+template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(padd<Packet4f>(a.v, b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v + b.v); }
+template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(psub<Packet4f>(a.v, b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v - b.v); }
+template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); }
+template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate(Packet4f(a.v))); }
+template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd((Packet2d)vec_xor((Packet2d)a.v, (Packet2d)p2ul_CONJ_XOR2)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
+{
+ Packet2cf res;
+ res.v.v4f[0] = pconj(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[0]))).v;
+ res.v.v4f[1] = pconj(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[1]))).v;
+ return res;
+}
+template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+ Packet2d a_re, a_im, v1, v2;
+ a_re = vec_perm(a.v, a.v, p16uc_PSET64_HI);
+ a_im = vec_perm(a.v, a.v, p16uc_PSET64_LO);
+ v1 = vec_madd(a_re, b.v, p2d_ZERO);
+ v2 = vec_madd(a_im, b.v, p2d_ZERO);
+ v2 = (Packet2d) vec_sld((Packet4ui)v2, (Packet4ui)v2, 8);
+ v2 = (Packet2d) vec_xor((Packet2d)v2, (Packet2d) p2ul_CONJ_XOR1);
+ return Packet1cd(v1 + v2);
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+ Packet2cf res;
+ res.v.v4f[0] = pmul(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[0])), Packet1cd(reinterpret_cast<Packet2d>(b.v.v4f[0]))).v;
+ res.v.v4f[1] = pmul(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[1])), Packet1cd(reinterpret_cast<Packet2d>(b.v.v4f[1]))).v;
+ return res;
+}
+template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pand<Packet4f>(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_or(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(por<Packet4f>(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_xor(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pxor<Packet4f>(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v, vec_nor(b.v,b.v))); }
+template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pandnot<Packet4f>(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); }
+template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
+template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
+{
+ std::complex<double> EIGEN_ALIGN16 res;
+ pstore<std::complex<double> >(&res, a);
+ return res;
+}
+template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
+{
+ std::complex<float> EIGEN_ALIGN16 res[2];
+ pstore<std::complex<float> >(res, a);
+ return res[0];
+}
+template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)
+{
+ Packet2cf res;
+ res.cd[0] = a.cd[1];
+ res.cd[1] = a.cd[0];
+ return res;
+}
+template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a)
+{
+ return pfirst(a);
+}
+template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
+{
+ std::complex<float> res;
+ Packet1cd b = padd<Packet1cd>(a.cd[0], a.cd[1]);
+ vec_st2f(b.v, (float*)&res);
+ return res;
+}
+template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs)
+{
+ return vecs[0];
+}
+template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
+{
+ PacketBlock<Packet2cf,2> transpose;
+ transpose.packet[0] = vecs[0];
+ transpose.packet[1] = vecs[1];
+ ptranspose(transpose);
+ return padd<Packet2cf>(transpose.packet[0], transpose.packet[1]);
+}
+template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a)
+{
+ return pfirst(a);
+}
+template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
+{
+ std::complex<float> res;
+ Packet1cd b = pmul<Packet1cd>(a.cd[0], a.cd[1]);
+ vec_st2f(b.v, (float*)&res);
+ return res;
+}
+template<int Offset>
+struct palign_impl<Offset,Packet1cd>
+{
+ static EIGEN_STRONG_INLINE void run(Packet1cd& , const Packet1cd& )
+ {
+ }
+};
+template<int Offset>
+struct palign_impl<Offset,Packet2cf>
+{
+ static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
+ {
+ if (Offset == 1) {
+ first.cd[0] = first.cd[1];
+ first.cd[1] = second.cd[0];
+ }
+ }
+};
+template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+ {
+ return internal::pmul(a, pconj(b));
+ }
+};
+template<> struct conj_helper<Packet1cd, Packet1cd, true,false>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+ {
+ return internal::pmul(pconj(a), b);
+ }
+};
+template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
+{
+ EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+ {
+ return pconj(internal::pmul(a, b));
+ }
+};
+template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+ {
+ return internal::pmul(a, pconj(b));
+ }
+};
+template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+ {
+ return internal::pmul(pconj(a), b);
+ }
+};
+template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
+{
+ EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+ { return padd(pmul(x,y),c); }
+ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+ {
+ return pconj(internal::pmul(a, b));
+ }
+};
+template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+ Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
+ Packet2d s = vec_madd(b.v, b.v, p2d_ZERO_);
+ return Packet1cd(pdiv(res.v, s + vec_perm(s, s, p16uc_REVERSE64)));
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+ Packet2cf res;
+ res.cd[0] = pdiv<Packet1cd>(a.cd[0], b.cd[0]);
+ res.cd[1] = pdiv<Packet1cd>(a.cd[1], b.cd[1]);
+ return res;
+}
+EIGEN_STRONG_INLINE Packet1cd pcplxflip(const Packet1cd& x)
+{
+ return Packet1cd(preverse(Packet2d(x.v)));
+}
+EIGEN_STRONG_INLINE Packet2cf pcplxflip(const Packet2cf& x)
+{
+ Packet2cf res;
+ res.cd[0] = pcplxflip(x.cd[0]);
+ res.cd[1] = pcplxflip(x.cd[1]);
+ return res;
+}
+EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel)
+{
+ Packet2d tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI);
+ kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO);
+ kernel.packet[0].v = tmp;
+}
+EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel)
+{
+ Packet1cd tmp = kernel.packet[0].cd[1];
+ kernel.packet[0].cd[1] = kernel.packet[1].cd[0];
+ kernel.packet[1].cd[0] = tmp;
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) {
+ Packet2cf result;
+ const Selector<4> ifPacket4 = { ifPacket.select[0], ifPacket.select[0], ifPacket.select[1], ifPacket.select[1] };
+ result.v = pblend<Packet4f>(ifPacket4, thenPacket.v, elsePacket.v);
+ return result;
+}
+}
+}
+#endif
+// end #include "src/Core/arch/ZVector/Complex.h"
+#endif
+#if defined EIGEN_VECTORIZE_CUDA
+// #include "src/Core/arch/CUDA/PacketMath.h"
+#ifndef EIGEN_PACKET_MATH_CUDA_H
+#define EIGEN_PACKET_MATH_CUDA_H
+namespace Eigen {
+namespace internal {
+#if defined(__CUDACC__) && defined(EIGEN_USE_GPU)
+template<> struct is_arithmetic<float4> { enum { value = true }; };
+template<> struct is_arithmetic<double2> { enum { value = true }; };
+template<> struct packet_traits<float> : default_packet_traits
+{
+ typedef float4 type;
+ typedef float4 half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size=4,
+ HasHalfPacket = 0,
+ HasDiv = 1,
+ HasSin = 0,
+ HasCos = 0,
+ HasLog = 1,
+ HasExp = 1,
+ HasSqrt = 1,
+ HasRsqrt = 1,
+ HasLGamma = 1,
+ HasDiGamma = 1,
+ HasZeta = 1,
+ HasPolygamma = 1,
+ HasErf = 1,
+ HasErfc = 1,
+ HasIGamma = 1,
+ HasIGammac = 1,
+ HasBetaInc = 1,
+ HasBlend = 0,
+ };
+};
+template<> struct packet_traits<double> : default_packet_traits
+{
+ typedef double2 type;
+ typedef double2 half;
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size=2,
+ HasHalfPacket = 0,
+ HasDiv = 1,
+ HasLog = 1,
+ HasExp = 1,
+ HasSqrt = 1,
+ HasRsqrt = 1,
+ HasLGamma = 1,
+ HasDiGamma = 1,
+ HasZeta = 1,
+ HasPolygamma = 1,
+ HasErf = 1,
+ HasErfc = 1,
+ HasIGamma = 1,
+ HasIGammac = 1,
+ HasBetaInc = 1,
+ HasBlend = 0,
+ };
+};
+template<> struct unpacket_traits<float4> { typedef float type; enum {size=4, alignment=Aligned16}; typedef float4 half; };
+template<> struct unpacket_traits<double2> { typedef double type; enum {size=2, alignment=Aligned16}; typedef double2 half; };
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pset1<float4>(const float& from) {
+ return make_float4(from, from, from, from);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pset1<double2>(const double& from) {
+ return make_double2(from, from);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 plset<float4>(const float& a) {
+ return make_float4(a, a+1, a+2, a+3);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 plset<double2>(const double& a) {
+ return make_double2(a, a+1);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 padd<float4>(const float4& a, const float4& b) {
+ return make_float4(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 padd<double2>(const double2& a, const double2& b) {
+ return make_double2(a.x+b.x, a.y+b.y);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 psub<float4>(const float4& a, const float4& b) {
+ return make_float4(a.x-b.x, a.y-b.y, a.z-b.z, a.w-b.w);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 psub<double2>(const double2& a, const double2& b) {
+ return make_double2(a.x-b.x, a.y-b.y);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pnegate(const float4& a) {
+ return make_float4(-a.x, -a.y, -a.z, -a.w);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pnegate(const double2& a) {
+ return make_double2(-a.x, -a.y);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pconj(const float4& a) { return a; }
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pconj(const double2& a) { return a; }
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pmul<float4>(const float4& a, const float4& b) {
+ return make_float4(a.x*b.x, a.y*b.y, a.z*b.z, a.w*b.w);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pmul<double2>(const double2& a, const double2& b) {
+ return make_double2(a.x*b.x, a.y*b.y);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pdiv<float4>(const float4& a, const float4& b) {
+ return make_float4(a.x/b.x, a.y/b.y, a.z/b.z, a.w/b.w);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pdiv<double2>(const double2& a, const double2& b) {
+ return make_double2(a.x/b.x, a.y/b.y);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pmin<float4>(const float4& a, const float4& b) {
+ return make_float4(fminf(a.x, b.x), fminf(a.y, b.y), fminf(a.z, b.z), fminf(a.w, b.w));
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pmin<double2>(const double2& a, const double2& b) {
+ return make_double2(fmin(a.x, b.x), fmin(a.y, b.y));
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pmax<float4>(const float4& a, const float4& b) {
+ return make_float4(fmaxf(a.x, b.x), fmaxf(a.y, b.y), fmaxf(a.z, b.z), fmaxf(a.w, b.w));
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pmax<double2>(const double2& a, const double2& b) {
+ return make_double2(fmax(a.x, b.x), fmax(a.y, b.y));
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pload<float4>(const float* from) {
+ return *reinterpret_cast<const float4*>(from);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pload<double2>(const double* from) {
+ return *reinterpret_cast<const double2*>(from);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 ploadu<float4>(const float* from) {
+ return make_float4(from[0], from[1], from[2], from[3]);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 ploadu<double2>(const double* from) {
+ return make_double2(from[0], from[1]);
+}
+template<> EIGEN_STRONG_INLINE float4 ploaddup<float4>(const float* from) {
+ return make_float4(from[0], from[0], from[1], from[1]);
+}
+template<> EIGEN_STRONG_INLINE double2 ploaddup<double2>(const double* from) {
+ return make_double2(from[0], from[0]);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<float>(float* to, const float4& from) {
+ *reinterpret_cast<float4*>(to) = from;
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<double>(double* to, const double2& from) {
+ *reinterpret_cast<double2*>(to) = from;
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const float4& from) {
+ to[0] = from.x;
+ to[1] = from.y;
+ to[2] = from.z;
+ to[3] = from.w;
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const double2& from) {
+ to[0] = from.x;
+ to[1] = from.y;
+}
+template<>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Aligned>(const float* from) {
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
+ return __ldg((const float4*)from);
+#else
+ return make_float4(from[0], from[1], from[2], from[3]);
+#endif
+}
+template<>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Aligned>(const double* from) {
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
+ return __ldg((const double2*)from);
+#else
+ return make_double2(from[0], from[1]);
+#endif
+}
+template<>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Unaligned>(const float* from) {
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
+ return make_float4(__ldg(from+0), __ldg(from+1), __ldg(from+2), __ldg(from+3));
+#else
+ return make_float4(from[0], from[1], from[2], from[3]);
+#endif
+}
+template<>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Unaligned>(const double* from) {
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
+ return make_double2(__ldg(from+0), __ldg(from+1));
+#else
+ return make_double2(from[0], from[1]);
+#endif
+}
+template<> EIGEN_DEVICE_FUNC inline float4 pgather<float, float4>(const float* from, Index stride) {
+ return make_float4(from[0*stride], from[1*stride], from[2*stride], from[3*stride]);
+}
+template<> EIGEN_DEVICE_FUNC inline double2 pgather<double, double2>(const double* from, Index stride) {
+ return make_double2(from[0*stride], from[1*stride]);
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<float, float4>(float* to, const float4& from, Index stride) {
+ to[stride*0] = from.x;
+ to[stride*1] = from.y;
+ to[stride*2] = from.z;
+ to[stride*3] = from.w;
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<double, double2>(double* to, const double2& from, Index stride) {
+ to[stride*0] = from.x;
+ to[stride*1] = from.y;
+}
+template<> EIGEN_DEVICE_FUNC inline float pfirst<float4>(const float4& a) {
+ return a.x;
+}
+template<> EIGEN_DEVICE_FUNC inline double pfirst<double2>(const double2& a) {
+ return a.x;
+}
+template<> EIGEN_DEVICE_FUNC inline float predux<float4>(const float4& a) {
+ return a.x + a.y + a.z + a.w;
+}
+template<> EIGEN_DEVICE_FUNC inline double predux<double2>(const double2& a) {
+ return a.x + a.y;
+}
+template<> EIGEN_DEVICE_FUNC inline float predux_max<float4>(const float4& a) {
+ return fmaxf(fmaxf(a.x, a.y), fmaxf(a.z, a.w));
+}
+template<> EIGEN_DEVICE_FUNC inline double predux_max<double2>(const double2& a) {
+ return fmax(a.x, a.y);
+}
+template<> EIGEN_DEVICE_FUNC inline float predux_min<float4>(const float4& a) {
+ return fminf(fminf(a.x, a.y), fminf(a.z, a.w));
+}
+template<> EIGEN_DEVICE_FUNC inline double predux_min<double2>(const double2& a) {
+ return fmin(a.x, a.y);
+}
+template<> EIGEN_DEVICE_FUNC inline float predux_mul<float4>(const float4& a) {
+ return a.x * a.y * a.z * a.w;
+}
+template<> EIGEN_DEVICE_FUNC inline double predux_mul<double2>(const double2& a) {
+ return a.x * a.y;
+}
+template<> EIGEN_DEVICE_FUNC inline float4 pabs<float4>(const float4& a) {
+ return make_float4(fabsf(a.x), fabsf(a.y), fabsf(a.z), fabsf(a.w));
+}
+template<> EIGEN_DEVICE_FUNC inline double2 pabs<double2>(const double2& a) {
+ return make_double2(fabs(a.x), fabs(a.y));
+}
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<float4,4>& kernel) {
+ float tmp = kernel.packet[0].y;
+ kernel.packet[0].y = kernel.packet[1].x;
+ kernel.packet[1].x = tmp;
+ tmp = kernel.packet[0].z;
+ kernel.packet[0].z = kernel.packet[2].x;
+ kernel.packet[2].x = tmp;
+ tmp = kernel.packet[0].w;
+ kernel.packet[0].w = kernel.packet[3].x;
+ kernel.packet[3].x = tmp;
+ tmp = kernel.packet[1].z;
+ kernel.packet[1].z = kernel.packet[2].y;
+ kernel.packet[2].y = tmp;
+ tmp = kernel.packet[1].w;
+ kernel.packet[1].w = kernel.packet[3].y;
+ kernel.packet[3].y = tmp;
+ tmp = kernel.packet[2].w;
+ kernel.packet[2].w = kernel.packet[3].z;
+ kernel.packet[3].z = tmp;
+}
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<double2,2>& kernel) {
+ double tmp = kernel.packet[0].y;
+ kernel.packet[0].y = kernel.packet[1].x;
+ kernel.packet[1].x = tmp;
+}
+#endif
+}
+}
+#endif
+// end #include "src/Core/arch/CUDA/PacketMath.h"
+// #include "src/Core/arch/CUDA/MathFunctions.h"
+#ifndef EIGEN_MATH_FUNCTIONS_CUDA_H
+#define EIGEN_MATH_FUNCTIONS_CUDA_H
+namespace Eigen {
+namespace internal {
+#if defined(__CUDACC__) && defined(EIGEN_USE_GPU)
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 plog<float4>(const float4& a)
+{
+ return make_float4(logf(a.x), logf(a.y), logf(a.z), logf(a.w));
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 plog<double2>(const double2& a)
+{
+ using ::log;
+ return make_double2(log(a.x), log(a.y));
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 plog1p<float4>(const float4& a)
+{
+ return make_float4(log1pf(a.x), log1pf(a.y), log1pf(a.z), log1pf(a.w));
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 plog1p<double2>(const double2& a)
+{
+ return make_double2(log1p(a.x), log1p(a.y));
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 pexp<float4>(const float4& a)
+{
+ return make_float4(expf(a.x), expf(a.y), expf(a.z), expf(a.w));
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 pexp<double2>(const double2& a)
+{
+ using ::exp;
+ return make_double2(exp(a.x), exp(a.y));
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 psqrt<float4>(const float4& a)
+{
+ return make_float4(sqrtf(a.x), sqrtf(a.y), sqrtf(a.z), sqrtf(a.w));
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 psqrt<double2>(const double2& a)
+{
+ using ::sqrt;
+ return make_double2(sqrt(a.x), sqrt(a.y));
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 prsqrt<float4>(const float4& a)
+{
+ return make_float4(rsqrtf(a.x), rsqrtf(a.y), rsqrtf(a.z), rsqrtf(a.w));
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 prsqrt<double2>(const double2& a)
+{
+ return make_double2(rsqrt(a.x), rsqrt(a.y));
+}
+#endif
+}
+}
+#endif
+// end #include "src/Core/arch/CUDA/MathFunctions.h"
+#endif
+// #include "src/Core/arch/Default/Settings.h"
+#ifndef EIGEN_DEFAULT_SETTINGS_H
+#define EIGEN_DEFAULT_SETTINGS_H
+#ifndef EIGEN_UNROLLING_LIMIT
+#define EIGEN_UNROLLING_LIMIT 100
+#endif
+#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
+#endif
+#ifndef EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH
+#define EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH 8
+#endif
+#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
+#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 8
+#endif
+#endif
+// end #include "src/Core/arch/Default/Settings.h"
+// #include "src/Core/functors/BinaryFunctors.h"
+#ifndef EIGEN_BINARY_FUNCTORS_H
+#define EIGEN_BINARY_FUNCTORS_H
+namespace Eigen {
+namespace internal {
+template<typename Arg1, typename Arg2>
+struct binary_op_base
+{
+ typedef Arg1 first_argument_type;
+ typedef Arg2 second_argument_type;
+};
+template<typename LhsScalar,typename RhsScalar>
+struct scalar_sum_op : binary_op_base<LhsScalar,RhsScalar>
+{
+ typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_sum_op>::ReturnType result_type;
+#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op)
+#else
+ scalar_sum_op() {
+ EIGEN_SCALAR_BINARY_OP_PLUGIN
+ }
+#endif
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a + b; }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+ { return internal::padd(a,b); }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const
+ { return internal::predux(a); }
+};
+template<typename LhsScalar,typename RhsScalar>
+struct functor_traits<scalar_sum_op<LhsScalar,RhsScalar> > {
+ enum {
+ Cost = (NumTraits<LhsScalar>::AddCost+NumTraits<RhsScalar>::AddCost)/2,
+ PacketAccess = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasAdd && packet_traits<RhsScalar>::HasAdd
+ };
+};
+template<> struct scalar_sum_op<bool,bool> : scalar_sum_op<int,int> {
+ EIGEN_DEPRECATED
+ scalar_sum_op() {}
+};
+template<typename LhsScalar,typename RhsScalar>
+struct scalar_product_op : binary_op_base<LhsScalar,RhsScalar>
+{
+ typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_product_op>::ReturnType result_type;
+#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op)
+#else
+ scalar_product_op() {
+ EIGEN_SCALAR_BINARY_OP_PLUGIN
+ }
+#endif
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a * b; }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+ { return internal::pmul(a,b); }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const
+ { return internal::predux_mul(a); }
+};
+template<typename LhsScalar,typename RhsScalar>
+struct functor_traits<scalar_product_op<LhsScalar,RhsScalar> > {
+ enum {
+ Cost = (NumTraits<LhsScalar>::MulCost + NumTraits<RhsScalar>::MulCost)/2,
+ PacketAccess = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasMul && packet_traits<RhsScalar>::HasMul
+ };
+};
+template<typename LhsScalar,typename RhsScalar>
+struct scalar_conj_product_op : binary_op_base<LhsScalar,RhsScalar>
+{
+ enum {
+ Conj = NumTraits<LhsScalar>::IsComplex
+ };
+ typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_conj_product_op>::ReturnType result_type;
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_conj_product_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const
+ { return conj_helper<LhsScalar,RhsScalar,Conj,false>().pmul(a,b); }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+ { return conj_helper<Packet,Packet,Conj,false>().pmul(a,b); }
+};
+template<typename LhsScalar,typename RhsScalar>
+struct functor_traits<scalar_conj_product_op<LhsScalar,RhsScalar> > {
+ enum {
+ Cost = NumTraits<LhsScalar>::MulCost,
+ PacketAccess = internal::is_same<LhsScalar, RhsScalar>::value && packet_traits<LhsScalar>::HasMul
+ };
+};
+template<typename LhsScalar,typename RhsScalar>
+struct scalar_min_op : binary_op_base<LhsScalar,RhsScalar>
+{
+ typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_min_op>::ReturnType result_type;
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_min_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return numext::mini(a, b); }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+ { return internal::pmin(a,b); }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const
+ { return internal::predux_min(a); }
+};
+template<typename LhsScalar,typename RhsScalar>
+struct functor_traits<scalar_min_op<LhsScalar,RhsScalar> > {
+ enum {
+ Cost = (NumTraits<LhsScalar>::AddCost+NumTraits<RhsScalar>::AddCost)/2,
+ PacketAccess = internal::is_same<LhsScalar, RhsScalar>::value && packet_traits<LhsScalar>::HasMin
+ };
+};
+template<typename LhsScalar,typename RhsScalar>
+struct scalar_max_op : binary_op_base<LhsScalar,RhsScalar>
+{
+ typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_max_op>::ReturnType result_type;
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_max_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return numext::maxi(a, b); }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+ { return internal::pmax(a,b); }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const
+ { return internal::predux_max(a); }
+};
+template<typename LhsScalar,typename RhsScalar>
+struct functor_traits<scalar_max_op<LhsScalar,RhsScalar> > {
+ enum {
+ Cost = (NumTraits<LhsScalar>::AddCost+NumTraits<RhsScalar>::AddCost)/2,
+ PacketAccess = internal::is_same<LhsScalar, RhsScalar>::value && packet_traits<LhsScalar>::HasMax
+ };
+};
+template<typename LhsScalar, typename RhsScalar, ComparisonName cmp> struct scalar_cmp_op;
+template<typename LhsScalar, typename RhsScalar, ComparisonName cmp>
+struct functor_traits<scalar_cmp_op<LhsScalar,RhsScalar, cmp> > {
+ enum {
+ Cost = (NumTraits<LhsScalar>::AddCost+NumTraits<RhsScalar>::AddCost)/2,
+ PacketAccess = false
+ };
+};
+template<ComparisonName Cmp, typename LhsScalar, typename RhsScalar>
+struct result_of<scalar_cmp_op<LhsScalar, RhsScalar, Cmp>(LhsScalar,RhsScalar)> {
+ typedef bool type;
+};
+template<typename LhsScalar, typename RhsScalar>
+struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_EQ> : binary_op_base<LhsScalar,RhsScalar>
+{
+ typedef bool result_type;
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a==b;}
+};
+template<typename LhsScalar, typename RhsScalar>
+struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_LT> : binary_op_base<LhsScalar,RhsScalar>
+{
+ typedef bool result_type;
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a<b;}
+};
+template<typename LhsScalar, typename RhsScalar>
+struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_LE> : binary_op_base<LhsScalar,RhsScalar>
+{
+ typedef bool result_type;
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a<=b;}
+};
+template<typename LhsScalar, typename RhsScalar>
+struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_GT> : binary_op_base<LhsScalar,RhsScalar>
+{
+ typedef bool result_type;
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a>b;}
+};
+template<typename LhsScalar, typename RhsScalar>
+struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_GE> : binary_op_base<LhsScalar,RhsScalar>
+{
+ typedef bool result_type;
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a>=b;}
+};
+template<typename LhsScalar, typename RhsScalar>
+struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_UNORD> : binary_op_base<LhsScalar,RhsScalar>
+{
+ typedef bool result_type;
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return !(a<=b || b<=a);}
+};
+template<typename LhsScalar, typename RhsScalar>
+struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_NEQ> : binary_op_base<LhsScalar,RhsScalar>
+{
+ typedef bool result_type;
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a!=b;}
+};
+template<typename Scalar>
+struct scalar_hypot_op<Scalar,Scalar> : binary_op_base<Scalar,Scalar>
+{
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_hypot_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const
+ {
+ EIGEN_USING_STD_MATH(sqrt)
+ Scalar p, qp;
+ if(_x>_y)
+ {
+ p = _x;
+ qp = _y / p;
+ }
+ else
+ {
+ p = _y;
+ qp = _x / p;
+ }
+ return p * sqrt(Scalar(1) + qp*qp);
+ }
+};
+template<typename Scalar>
+struct functor_traits<scalar_hypot_op<Scalar,Scalar> > {
+ enum
+ {
+ Cost = 3 * NumTraits<Scalar>::AddCost +
+ 2 * NumTraits<Scalar>::MulCost +
+ 2 * scalar_div_cost<Scalar,false>::value,
+ PacketAccess = false
+ };
+};
+template<typename Scalar, typename Exponent>
+struct scalar_pow_op : binary_op_base<Scalar,Exponent>
+{
+ typedef typename ScalarBinaryOpTraits<Scalar,Exponent,scalar_pow_op>::ReturnType result_type;
+#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_pow_op)
+#else
+ scalar_pow_op() {
+ typedef Scalar LhsScalar;
+ typedef Exponent RhsScalar;
+ EIGEN_SCALAR_BINARY_OP_PLUGIN
+ }
+#endif
+ EIGEN_DEVICE_FUNC
+ inline result_type operator() (const Scalar& a, const Exponent& b) const { return numext::pow(a, b); }
+};
+template<typename Scalar, typename Exponent>
+struct functor_traits<scalar_pow_op<Scalar,Exponent> > {
+ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false };
+};
+template<typename LhsScalar,typename RhsScalar>
+struct scalar_difference_op : binary_op_base<LhsScalar,RhsScalar>
+{
+ typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_difference_op>::ReturnType result_type;
+#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op)
+#else
+ scalar_difference_op() {
+ EIGEN_SCALAR_BINARY_OP_PLUGIN
+ }
+#endif
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a - b; }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+ { return internal::psub(a,b); }
+};
+template<typename LhsScalar,typename RhsScalar>
+struct functor_traits<scalar_difference_op<LhsScalar,RhsScalar> > {
+ enum {
+ Cost = (NumTraits<LhsScalar>::AddCost+NumTraits<RhsScalar>::AddCost)/2,
+ PacketAccess = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasSub && packet_traits<RhsScalar>::HasSub
+ };
+};
+template<typename LhsScalar,typename RhsScalar>
+struct scalar_quotient_op : binary_op_base<LhsScalar,RhsScalar>
+{
+ typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_quotient_op>::ReturnType result_type;
+#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op)
+#else
+ scalar_quotient_op() {
+ EIGEN_SCALAR_BINARY_OP_PLUGIN
+ }
+#endif
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a / b; }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+ { return internal::pdiv(a,b); }
+};
+template<typename LhsScalar,typename RhsScalar>
+struct functor_traits<scalar_quotient_op<LhsScalar,RhsScalar> > {
+ typedef typename scalar_quotient_op<LhsScalar,RhsScalar>::result_type result_type;
+ enum {
+ PacketAccess = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasDiv && packet_traits<RhsScalar>::HasDiv,
+ Cost = scalar_div_cost<result_type,PacketAccess>::value
+ };
+};
+struct scalar_boolean_and_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_and_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a && b; }
+};
+template<> struct functor_traits<scalar_boolean_and_op> {
+ enum {
+ Cost = NumTraits<bool>::AddCost,
+ PacketAccess = false
+ };
+};
+struct scalar_boolean_or_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_or_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a || b; }
+};
+template<> struct functor_traits<scalar_boolean_or_op> {
+ enum {
+ Cost = NumTraits<bool>::AddCost,
+ PacketAccess = false
+ };
+};
+struct scalar_boolean_xor_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_xor_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a ^ b; }
+};
+template<> struct functor_traits<scalar_boolean_xor_op> {
+ enum {
+ Cost = NumTraits<bool>::AddCost,
+ PacketAccess = false
+ };
+};
+template<typename BinaryOp> struct bind1st_op : BinaryOp {
+ typedef typename BinaryOp::first_argument_type first_argument_type;
+ typedef typename BinaryOp::second_argument_type second_argument_type;
+ typedef typename BinaryOp::result_type result_type;
+ bind1st_op(const first_argument_type &val) : m_value(val) {}
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const second_argument_type& b) const { return BinaryOp::operator()(m_value,b); }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& b) const
+ { return BinaryOp::packetOp(internal::pset1<Packet>(m_value), b); }
+ first_argument_type m_value;
+};
+template<typename BinaryOp> struct functor_traits<bind1st_op<BinaryOp> > : functor_traits<BinaryOp> {};
+template<typename BinaryOp> struct bind2nd_op : BinaryOp {
+ typedef typename BinaryOp::first_argument_type first_argument_type;
+ typedef typename BinaryOp::second_argument_type second_argument_type;
+ typedef typename BinaryOp::result_type result_type;
+ bind2nd_op(const second_argument_type &val) : m_value(val) {}
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const first_argument_type& a) const { return BinaryOp::operator()(a,m_value); }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
+ { return BinaryOp::packetOp(a,internal::pset1<Packet>(m_value)); }
+ second_argument_type m_value;
+};
+template<typename BinaryOp> struct functor_traits<bind2nd_op<BinaryOp> > : functor_traits<BinaryOp> {};
+}
+}
+#endif
+// end #include "src/Core/functors/BinaryFunctors.h"
+// #include "src/Core/functors/UnaryFunctors.h"
+#ifndef EIGEN_UNARY_FUNCTORS_H
+#define EIGEN_UNARY_FUNCTORS_H
+namespace Eigen {
+namespace internal {
+template<typename Scalar> struct scalar_opposite_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_opposite_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return -a; }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
+ { return internal::pnegate(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_opposite_op<Scalar> >
+{ enum {
+ Cost = NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasNegate };
+};
+template<typename Scalar> struct scalar_abs_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_abs_op)
+ typedef typename NumTraits<Scalar>::Real result_type;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return numext::abs(a); }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
+ { return internal::pabs(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_abs_op<Scalar> >
+{
+ enum {
+ Cost = NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasAbs
+ };
+};
+template<typename Scalar> struct scalar_score_coeff_op : scalar_abs_op<Scalar>
+{
+ typedef void Score_is_abs;
+};
+template<typename Scalar>
+struct functor_traits<scalar_score_coeff_op<Scalar> > : functor_traits<scalar_abs_op<Scalar> > {};
+template<typename Scalar, typename=void> struct abs_knowing_score
+{
+ EIGEN_EMPTY_STRUCT_CTOR(abs_knowing_score)
+ typedef typename NumTraits<Scalar>::Real result_type;
+ template<typename Score>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a, const Score&) const { return numext::abs(a); }
+};
+template<typename Scalar> struct abs_knowing_score<Scalar, typename scalar_score_coeff_op<Scalar>::Score_is_abs>
+{
+ EIGEN_EMPTY_STRUCT_CTOR(abs_knowing_score)
+ typedef typename NumTraits<Scalar>::Real result_type;
+ template<typename Scal>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scal&, const result_type& a) const { return a; }
+};
+template<typename Scalar> struct scalar_abs2_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_abs2_op)
+ typedef typename NumTraits<Scalar>::Real result_type;
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return numext::abs2(a); }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
+ { return internal::pmul(a,a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_abs2_op<Scalar> >
+{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasAbs2 }; };
+template<typename Scalar> struct scalar_conjugate_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_conjugate_op)
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { using numext::conj; return conj(a); }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::pconj(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_conjugate_op<Scalar> >
+{
+ enum {
+ Cost = NumTraits<Scalar>::IsComplex ? NumTraits<Scalar>::AddCost : 0,
+ PacketAccess = packet_traits<Scalar>::HasConj
+ };
+};
+template<typename Scalar> struct scalar_arg_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_arg_op)
+ typedef typename NumTraits<Scalar>::Real result_type;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { using numext::arg; return arg(a); }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
+ { return internal::parg(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_arg_op<Scalar> >
+{
+ enum {
+ Cost = NumTraits<Scalar>::IsComplex ? 5 * NumTraits<Scalar>::MulCost : NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasArg
+ };
+};
+template<typename Scalar, typename NewType>
+struct scalar_cast_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
+ typedef NewType result_type;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const NewType operator() (const Scalar& a) const { return cast<Scalar, NewType>(a); }
+};
+template<typename Scalar, typename NewType>
+struct functor_traits<scalar_cast_op<Scalar,NewType> >
+{ enum { Cost = is_same<Scalar, NewType>::value ? 0 : NumTraits<NewType>::AddCost, PacketAccess = false }; };
+template<typename Scalar>
+struct scalar_real_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_real_op)
+ typedef typename NumTraits<Scalar>::Real result_type;
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return numext::real(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_real_op<Scalar> >
+{ enum { Cost = 0, PacketAccess = false }; };
+template<typename Scalar>
+struct scalar_imag_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_imag_op)
+ typedef typename NumTraits<Scalar>::Real result_type;
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return numext::imag(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_imag_op<Scalar> >
+{ enum { Cost = 0, PacketAccess = false }; };
+template<typename Scalar>
+struct scalar_real_ref_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_real_ref_op)
+ typedef typename NumTraits<Scalar>::Real result_type;
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return numext::real_ref(*const_cast<Scalar*>(&a)); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_real_ref_op<Scalar> >
+{ enum { Cost = 0, PacketAccess = false }; };
+template<typename Scalar>
+struct scalar_imag_ref_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_imag_ref_op)
+ typedef typename NumTraits<Scalar>::Real result_type;
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return numext::imag_ref(*const_cast<Scalar*>(&a)); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_imag_ref_op<Scalar> >
+{ enum { Cost = 0, PacketAccess = false }; };
+template<typename Scalar> struct scalar_exp_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_exp_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::exp(a); }
+ template <typename Packet>
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pexp(a); }
+};
+template <typename Scalar>
+struct functor_traits<scalar_exp_op<Scalar> > {
+ enum {
+ PacketAccess = packet_traits<Scalar>::HasExp,
+#ifdef EIGEN_VECTORIZE_FMA
+ Cost =
+ (sizeof(Scalar) == 4
+ ? (8 * NumTraits<Scalar>::AddCost + 6 * NumTraits<Scalar>::MulCost)
+ : (14 * NumTraits<Scalar>::AddCost +
+ 6 * NumTraits<Scalar>::MulCost +
+ scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value))
+#else
+ Cost =
+ (sizeof(Scalar) == 4
+ ? (21 * NumTraits<Scalar>::AddCost + 13 * NumTraits<Scalar>::MulCost)
+ : (23 * NumTraits<Scalar>::AddCost +
+ 12 * NumTraits<Scalar>::MulCost +
+ scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value))
+#endif
+ };
+};
+template<typename Scalar> struct scalar_log_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_log_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::log(a); }
+ template <typename Packet>
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::plog(a); }
+};
+template <typename Scalar>
+struct functor_traits<scalar_log_op<Scalar> > {
+ enum {
+ PacketAccess = packet_traits<Scalar>::HasLog,
+ Cost =
+ (PacketAccess
+#ifdef EIGEN_VECTORIZE_FMA
+ ? (20 * NumTraits<Scalar>::AddCost + 7 * NumTraits<Scalar>::MulCost)
+#else
+ ? (36 * NumTraits<Scalar>::AddCost + 14 * NumTraits<Scalar>::MulCost)
+#endif
+ : sizeof(Scalar)==4 ? 40 : 85)
+ };
+};
+template<typename Scalar> struct scalar_log1p_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_log1p_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::log1p(a); }
+ template <typename Packet>
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::plog1p(a); }
+};
+template <typename Scalar>
+struct functor_traits<scalar_log1p_op<Scalar> > {
+ enum {
+ PacketAccess = packet_traits<Scalar>::HasLog1p,
+ Cost = functor_traits<scalar_log_op<Scalar> >::Cost
+ };
+};
+template<typename Scalar> struct scalar_log10_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_log10_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { EIGEN_USING_STD_MATH(log10) return log10(a); }
+ template <typename Packet>
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::plog10(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_log10_op<Scalar> >
+{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasLog10 }; };
+template<typename Scalar> struct scalar_sqrt_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_sqrt_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::sqrt(a); }
+ template <typename Packet>
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psqrt(a); }
+};
+template <typename Scalar>
+struct functor_traits<scalar_sqrt_op<Scalar> > {
+ enum {
+#if EIGEN_FAST_MATH
+ Cost = (sizeof(Scalar) == 8 ? 28
+ : (3 * NumTraits<Scalar>::AddCost +
+ 5 * NumTraits<Scalar>::MulCost)),
+#else
+ Cost = (sizeof(Scalar) == 8 ? 28 : 14),
+#endif
+ PacketAccess = packet_traits<Scalar>::HasSqrt
+ };
+};
+template<typename Scalar> struct scalar_rsqrt_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_rsqrt_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return Scalar(1)/numext::sqrt(a); }
+ template <typename Packet>
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::prsqrt(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_rsqrt_op<Scalar> >
+{ enum {
+ Cost = 5 * NumTraits<Scalar>::MulCost,
+ PacketAccess = packet_traits<Scalar>::HasRsqrt
+ };
+};
+template<typename Scalar> struct scalar_cos_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_cos_op)
+ EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return numext::cos(a); }
+ template <typename Packet>
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pcos(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_cos_op<Scalar> >
+{
+ enum {
+ Cost = 5 * NumTraits<Scalar>::MulCost,
+ PacketAccess = packet_traits<Scalar>::HasCos
+ };
+};
+template<typename Scalar> struct scalar_sin_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_sin_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::sin(a); }
+ template <typename Packet>
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psin(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_sin_op<Scalar> >
+{
+ enum {
+ Cost = 5 * NumTraits<Scalar>::MulCost,
+ PacketAccess = packet_traits<Scalar>::HasSin
+ };
+};
+template<typename Scalar> struct scalar_tan_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_tan_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::tan(a); }
+ template <typename Packet>
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::ptan(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_tan_op<Scalar> >
+{
+ enum {
+ Cost = 5 * NumTraits<Scalar>::MulCost,
+ PacketAccess = packet_traits<Scalar>::HasTan
+ };
+};
+template<typename Scalar> struct scalar_acos_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_acos_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::acos(a); }
+ template <typename Packet>
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pacos(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_acos_op<Scalar> >
+{
+ enum {
+ Cost = 5 * NumTraits<Scalar>::MulCost,
+ PacketAccess = packet_traits<Scalar>::HasACos
+ };
+};
+template<typename Scalar> struct scalar_asin_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_asin_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::asin(a); }
+ template <typename Packet>
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pasin(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_asin_op<Scalar> >
+{
+ enum {
+ Cost = 5 * NumTraits<Scalar>::MulCost,
+ PacketAccess = packet_traits<Scalar>::HasASin
+ };
+};
+template<typename Scalar> struct scalar_atan_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_atan_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::atan(a); }
+ template <typename Packet>
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::patan(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_atan_op<Scalar> >
+{
+ enum {
+ Cost = 5 * NumTraits<Scalar>::MulCost,
+ PacketAccess = packet_traits<Scalar>::HasATan
+ };
+};
+template <typename Scalar>
+struct scalar_tanh_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_tanh_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator()(const Scalar& a) const { return numext::tanh(a); }
+ template <typename Packet>
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& x) const { return ptanh(x); }
+};
+template <typename Scalar>
+struct functor_traits<scalar_tanh_op<Scalar> > {
+ enum {
+ PacketAccess = packet_traits<Scalar>::HasTanh,
+ Cost = ( (EIGEN_FAST_MATH && is_same<Scalar,float>::value)
+#ifdef EIGEN_VECTORIZE_FMA
+ ? (2 * NumTraits<Scalar>::AddCost +
+ 6 * NumTraits<Scalar>::MulCost +
+ scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value)
+#else
+ ? (11 * NumTraits<Scalar>::AddCost +
+ 11 * NumTraits<Scalar>::MulCost +
+ scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value)
+#endif
+ : (6 * NumTraits<Scalar>::AddCost +
+ 3 * NumTraits<Scalar>::MulCost +
+ 2 * scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value +
+ functor_traits<scalar_exp_op<Scalar> >::Cost))
+ };
+};
+template<typename Scalar> struct scalar_sinh_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_sinh_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::sinh(a); }
+ template <typename Packet>
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psinh(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_sinh_op<Scalar> >
+{
+ enum {
+ Cost = 5 * NumTraits<Scalar>::MulCost,
+ PacketAccess = packet_traits<Scalar>::HasSinh
+ };
+};
+template<typename Scalar> struct scalar_cosh_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_cosh_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::cosh(a); }
+ template <typename Packet>
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pcosh(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_cosh_op<Scalar> >
+{
+ enum {
+ Cost = 5 * NumTraits<Scalar>::MulCost,
+ PacketAccess = packet_traits<Scalar>::HasCosh
+ };
+};
+template<typename Scalar>
+struct scalar_inverse_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_inverse_op)
+ EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return Scalar(1)/a; }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const
+ { return internal::pdiv(pset1<Packet>(Scalar(1)),a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_inverse_op<Scalar> >
+{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasDiv }; };
+template<typename Scalar>
+struct scalar_square_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_square_op)
+ EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a*a; }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const
+ { return internal::pmul(a,a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_square_op<Scalar> >
+{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasMul }; };
+template<typename Scalar>
+struct scalar_cube_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_cube_op)
+ EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a*a*a; }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const
+ { return internal::pmul(a,pmul(a,a)); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_cube_op<Scalar> >
+{ enum { Cost = 2*NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasMul }; };
+template<typename Scalar> struct scalar_round_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_round_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return numext::round(a); }
+ template <typename Packet>
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pround(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_round_op<Scalar> >
+{
+ enum {
+ Cost = NumTraits<Scalar>::MulCost,
+ PacketAccess = packet_traits<Scalar>::HasRound
+ };
+};
+template<typename Scalar> struct scalar_floor_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_floor_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return numext::floor(a); }
+ template <typename Packet>
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pfloor(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_floor_op<Scalar> >
+{
+ enum {
+ Cost = NumTraits<Scalar>::MulCost,
+ PacketAccess = packet_traits<Scalar>::HasFloor
+ };
+};
+template<typename Scalar> struct scalar_ceil_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_ceil_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return numext::ceil(a); }
+ template <typename Packet>
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pceil(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_ceil_op<Scalar> >
+{
+ enum {
+ Cost = NumTraits<Scalar>::MulCost,
+ PacketAccess = packet_traits<Scalar>::HasCeil
+ };
+};
+template<typename Scalar> struct scalar_isnan_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_isnan_op)
+ typedef bool result_type;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return (numext::isnan)(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_isnan_op<Scalar> >
+{
+ enum {
+ Cost = NumTraits<Scalar>::MulCost,
+ PacketAccess = false
+ };
+};
+template<typename Scalar> struct scalar_isinf_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_isinf_op)
+ typedef bool result_type;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return (numext::isinf)(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_isinf_op<Scalar> >
+{
+ enum {
+ Cost = NumTraits<Scalar>::MulCost,
+ PacketAccess = false
+ };
+};
+template<typename Scalar> struct scalar_isfinite_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_isfinite_op)
+ typedef bool result_type;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return (numext::isfinite)(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_isfinite_op<Scalar> >
+{
+ enum {
+ Cost = NumTraits<Scalar>::MulCost,
+ PacketAccess = false
+ };
+};
+template<typename Scalar> struct scalar_boolean_not_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_not_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const bool& a) const { return !a; }
+};
+template<typename Scalar>
+struct functor_traits<scalar_boolean_not_op<Scalar> > {
+ enum {
+ Cost = NumTraits<bool>::AddCost,
+ PacketAccess = false
+ };
+};
+template<typename Scalar,bool iscpx=(NumTraits<Scalar>::IsComplex!=0) > struct scalar_sign_op;
+template<typename Scalar>
+struct scalar_sign_op<Scalar,false> {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_sign_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const
+ {
+ return Scalar( (a>Scalar(0)) - (a<Scalar(0)) );
+ }
+};
+template<typename Scalar>
+struct scalar_sign_op<Scalar,true> {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_sign_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const
+ {
+ typedef typename NumTraits<Scalar>::Real real_type;
+ real_type aa = numext::abs(a);
+ if (aa==real_type(0))
+ return Scalar(0);
+ aa = real_type(1)/aa;
+ return Scalar(real(a)*aa, imag(a)*aa );
+ }
+};
+template<typename Scalar>
+struct functor_traits<scalar_sign_op<Scalar> >
+{ enum {
+ Cost =
+ NumTraits<Scalar>::IsComplex
+ ? ( 8*NumTraits<Scalar>::MulCost )
+ : ( 3*NumTraits<Scalar>::AddCost),
+ PacketAccess = packet_traits<Scalar>::HasSign
+ };
+};
+
+template <typename T>
+struct scalar_logistic_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_logistic_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator()(const T& x) const {
+ const T one = T(1);
+ return one / (one + numext::exp(-x));
+ }
+
+ template <typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Packet packetOp(const Packet& x) const {
+ const Packet one = pset1<Packet>(T(1));
+ return pdiv(one, padd(one, pexp(pnegate(x))));
+ }
+};
+template <typename T>
+struct functor_traits<scalar_logistic_op<T> > {
+ enum {
+ Cost = NumTraits<T>::AddCost * 2 + NumTraits<T>::MulCost * 6,
+ PacketAccess = packet_traits<T>::HasAdd && packet_traits<T>::HasDiv &&
+ packet_traits<T>::HasNegate && packet_traits<T>::HasExp
+ };
+};
+
+template <>
+struct scalar_logistic_op<float> {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_logistic_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float operator()(const float& x) const {
+ const float one = 1.0f;
+ return one / (one + numext::exp(-x));
+ }
+
+ template <typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Packet packetOp(const Packet& _x) const {
+ // Clamp the inputs to the range [-18, 18] since anything outside
+ // this range is 0.0f or 1.0f in single-precision.
+ const Packet x = pmax(pmin(_x, pset1<Packet>(18.0)), pset1<Packet>(-18.0));
+
+ // The monomial coefficients of the numerator polynomial (odd).
+ const Packet alpha_1 = pset1<Packet>(2.48287947061529e-01);
+ const Packet alpha_3 = pset1<Packet>(8.51377133304701e-03);
+ const Packet alpha_5 = pset1<Packet>(6.08574864600143e-05);
+ const Packet alpha_7 = pset1<Packet>(1.15627324459942e-07);
+ const Packet alpha_9 = pset1<Packet>(4.37031012579801e-11);
+
+ // The monomial coefficients of the denominator polynomial (even).
+ const Packet beta_0 = pset1<Packet>(9.93151921023180e-01);
+ const Packet beta_2 = pset1<Packet>(1.16817656904453e-01);
+ const Packet beta_4 = pset1<Packet>(1.70198817374094e-03);
+ const Packet beta_6 = pset1<Packet>(6.29106785017040e-06);
+ const Packet beta_8 = pset1<Packet>(5.76102136993427e-09);
+ const Packet beta_10 = pset1<Packet>(6.10247389755681e-13);
+
+ // Since the polynomials are odd/even, we need x^2.
+ const Packet x2 = pmul(x, x);
+
+ // Evaluate the numerator polynomial p.
+ Packet p = pmadd(x2, alpha_9, alpha_7);
+ p = pmadd(x2, p, alpha_5);
+ p = pmadd(x2, p, alpha_3);
+ p = pmadd(x2, p, alpha_1);
+ p = pmul(x, p);
+
+ // Evaluate the denominator polynomial p.
+ Packet q = pmadd(x2, beta_10, beta_8);
+ q = pmadd(x2, q, beta_6);
+ q = pmadd(x2, q, beta_4);
+ q = pmadd(x2, q, beta_2);
+ q = pmadd(x2, q, beta_0);
+
+ // Divide the numerator by the denominator and shift it up.
+ return pmax(pmin(padd(pdiv(p, q), pset1<Packet>(0.5)), pset1<Packet>(1.0)),
+ pset1<Packet>(0.0));
+ }
+};
+}
+}
+#endif
+// end #include "src/Core/functors/UnaryFunctors.h"
+// #include "src/Core/functors/NullaryFunctors.h"
+#ifndef EIGEN_NULLARY_FUNCTORS_H
+#define EIGEN_NULLARY_FUNCTORS_H
+namespace Eigen {
+namespace internal {
+template<typename Scalar>
+struct scalar_constant_op {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_constant_op(const scalar_constant_op& other) : m_other(other.m_other) { }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_constant_op(const Scalar& other) : m_other(other) { }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() () const { return m_other; }
+ template<typename PacketType>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PacketType packetOp() const { return internal::pset1<PacketType>(m_other); }
+ const Scalar m_other;
+};
+template<typename Scalar>
+struct functor_traits<scalar_constant_op<Scalar> >
+{ enum { Cost = 0 ,
+ PacketAccess = packet_traits<Scalar>::Vectorizable, IsRepeatable = true }; };
+template<typename Scalar> struct scalar_identity_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_identity_op)
+ template<typename IndexType>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType row, IndexType col) const { return row==col ? Scalar(1) : Scalar(0); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_identity_op<Scalar> >
+{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = false, IsRepeatable = true }; };
+template <typename Scalar, typename Packet, bool IsInteger> struct linspaced_op_impl;
+template <typename Scalar, typename Packet>
+struct linspaced_op_impl<Scalar,Packet,false>
+{
+ linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) :
+ m_low(low), m_high(high), m_size1(num_steps==1 ? 1 : num_steps-1), m_step(num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1)),
+ m_flip(numext::abs(high)<numext::abs(low))
+ {}
+ template<typename IndexType>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const {
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ if(m_flip)
+ return (i==0)? m_low : (m_high - RealScalar(m_size1-i)*m_step);
+ else
+ return (i==m_size1)? m_high : (m_low + RealScalar(i)*m_step);
+ }
+ template<typename IndexType>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(IndexType i) const
+ {
+ if(m_flip)
+ {
+ Packet pi = plset<Packet>(Scalar(i-m_size1));
+ Packet res = padd(pset1<Packet>(m_high), pmul(pset1<Packet>(m_step), pi));
+ if(i==0)
+ res = pinsertfirst(res, m_low);
+ return res;
+ }
+ else
+ {
+ Packet pi = plset<Packet>(Scalar(i));
+ Packet res = padd(pset1<Packet>(m_low), pmul(pset1<Packet>(m_step), pi));
+ if(i==m_size1-unpacket_traits<Packet>::size+1)
+ res = pinsertlast(res, m_high);
+ return res;
+ }
+ }
+ const Scalar m_low;
+ const Scalar m_high;
+ const Index m_size1;
+ const Scalar m_step;
+ const bool m_flip;
+};
+template <typename Scalar, typename Packet>
+struct linspaced_op_impl<Scalar,Packet,true>
+{
+ linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) :
+ m_low(low),
+ m_multiplier((high-low)/convert_index<Scalar>(num_steps<=1 ? 1 : num_steps-1)),
+ m_divisor(convert_index<Scalar>((high>=low?num_steps:-num_steps)+(high-low))/((numext::abs(high-low)+1)==0?1:(numext::abs(high-low)+1))),
+ m_use_divisor(num_steps>1 && (numext::abs(high-low)+1)<num_steps)
+ {}
+ template<typename IndexType>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ const Scalar operator() (IndexType i) const
+ {
+ if(m_use_divisor) return m_low + convert_index<Scalar>(i)/m_divisor;
+ else return m_low + convert_index<Scalar>(i)*m_multiplier;
+ }
+ const Scalar m_low;
+ const Scalar m_multiplier;
+ const Scalar m_divisor;
+ const bool m_use_divisor;
+};
+template <typename Scalar, typename PacketType> struct linspaced_op;
+template <typename Scalar, typename PacketType> struct functor_traits< linspaced_op<Scalar,PacketType> >
+{
+ enum
+ {
+ Cost = 1,
+ PacketAccess = (!NumTraits<Scalar>::IsInteger) && packet_traits<Scalar>::HasSetLinear && packet_traits<Scalar>::HasBlend,
+ IsRepeatable = true
+ };
+};
+template <typename Scalar, typename PacketType> struct linspaced_op
+{
+ linspaced_op(const Scalar& low, const Scalar& high, Index num_steps)
+ : impl((num_steps==1 ? high : low),high,num_steps)
+ {}
+ template<typename IndexType>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const { return impl(i); }
+ template<typename Packet,typename IndexType>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(IndexType i) const { return impl.packetOp(i); }
+ const linspaced_op_impl<Scalar,PacketType,NumTraits<Scalar>::IsInteger> impl;
+};
+template<typename Functor> struct functor_has_linear_access { enum { ret = !has_binary_operator<Functor>::value }; };
+#if !( (EIGEN_COMP_MSVC>1600) || (EIGEN_GNUC_AT_LEAST(4,8)) || (EIGEN_COMP_ICC>=1600))
+template<typename Scalar,typename IndexType>
+struct has_nullary_operator<scalar_constant_op<Scalar>,IndexType> { enum { value = 1}; };
+template<typename Scalar,typename IndexType>
+struct has_unary_operator<scalar_constant_op<Scalar>,IndexType> { enum { value = 0}; };
+template<typename Scalar,typename IndexType>
+struct has_binary_operator<scalar_constant_op<Scalar>,IndexType> { enum { value = 0}; };
+template<typename Scalar,typename IndexType>
+struct has_nullary_operator<scalar_identity_op<Scalar>,IndexType> { enum { value = 0}; };
+template<typename Scalar,typename IndexType>
+struct has_unary_operator<scalar_identity_op<Scalar>,IndexType> { enum { value = 0}; };
+template<typename Scalar,typename IndexType>
+struct has_binary_operator<scalar_identity_op<Scalar>,IndexType> { enum { value = 1}; };
+template<typename Scalar, typename PacketType,typename IndexType>
+struct has_nullary_operator<linspaced_op<Scalar,PacketType>,IndexType> { enum { value = 0}; };
+template<typename Scalar, typename PacketType,typename IndexType>
+struct has_unary_operator<linspaced_op<Scalar,PacketType>,IndexType> { enum { value = 1}; };
+template<typename Scalar, typename PacketType,typename IndexType>
+struct has_binary_operator<linspaced_op<Scalar,PacketType>,IndexType> { enum { value = 0}; };
+template<typename Scalar,typename IndexType>
+struct has_nullary_operator<scalar_random_op<Scalar>,IndexType> { enum { value = 1}; };
+template<typename Scalar,typename IndexType>
+struct has_unary_operator<scalar_random_op<Scalar>,IndexType> { enum { value = 0}; };
+template<typename Scalar,typename IndexType>
+struct has_binary_operator<scalar_random_op<Scalar>,IndexType> { enum { value = 0}; };
+#endif
+}
+}
+#endif
+// end #include "src/Core/functors/NullaryFunctors.h"
+// #include "src/Core/functors/AssignmentFunctors.h"
+#ifndef EIGEN_ASSIGNMENT_FUNCTORS_H
+#define EIGEN_ASSIGNMENT_FUNCTORS_H
+namespace Eigen {
+namespace internal {
+template<typename DstScalar,typename SrcScalar> struct assign_op {
+ EIGEN_EMPTY_STRUCT_CTOR(assign_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a = b; }
+ template<int Alignment, typename Packet>
+ EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const
+ { internal::pstoret<DstScalar,Packet,Alignment>(a,b); }
+};
+template<typename DstScalar> struct assign_op<DstScalar,void> {};
+template<typename DstScalar,typename SrcScalar>
+struct functor_traits<assign_op<DstScalar,SrcScalar> > {
+ enum {
+ Cost = NumTraits<DstScalar>::ReadCost,
+ PacketAccess = is_same<DstScalar,SrcScalar>::value && packet_traits<DstScalar>::Vectorizable && packet_traits<SrcScalar>::Vectorizable
+ };
+};
+template<typename DstScalar,typename SrcScalar> struct add_assign_op {
+ EIGEN_EMPTY_STRUCT_CTOR(add_assign_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a += b; }
+ template<int Alignment, typename Packet>
+ EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const
+ { internal::pstoret<DstScalar,Packet,Alignment>(a,internal::padd(internal::ploadt<Packet,Alignment>(a),b)); }
+};
+template<typename DstScalar,typename SrcScalar>
+struct functor_traits<add_assign_op<DstScalar,SrcScalar> > {
+ enum {
+ Cost = NumTraits<DstScalar>::ReadCost + NumTraits<DstScalar>::AddCost,
+ PacketAccess = is_same<DstScalar,SrcScalar>::value && packet_traits<DstScalar>::HasAdd
+ };
+};
+template<typename DstScalar,typename SrcScalar> struct sub_assign_op {
+ EIGEN_EMPTY_STRUCT_CTOR(sub_assign_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a -= b; }
+ template<int Alignment, typename Packet>
+ EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const
+ { internal::pstoret<DstScalar,Packet,Alignment>(a,internal::psub(internal::ploadt<Packet,Alignment>(a),b)); }
+};
+template<typename DstScalar,typename SrcScalar>
+struct functor_traits<sub_assign_op<DstScalar,SrcScalar> > {
+ enum {
+ Cost = NumTraits<DstScalar>::ReadCost + NumTraits<DstScalar>::AddCost,
+ PacketAccess = is_same<DstScalar,SrcScalar>::value && packet_traits<DstScalar>::HasSub
+ };
+};
+template<typename DstScalar, typename SrcScalar=DstScalar>
+struct mul_assign_op {
+ EIGEN_EMPTY_STRUCT_CTOR(mul_assign_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a *= b; }
+ template<int Alignment, typename Packet>
+ EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const
+ { internal::pstoret<DstScalar,Packet,Alignment>(a,internal::pmul(internal::ploadt<Packet,Alignment>(a),b)); }
+};
+template<typename DstScalar, typename SrcScalar>
+struct functor_traits<mul_assign_op<DstScalar,SrcScalar> > {
+ enum {
+ Cost = NumTraits<DstScalar>::ReadCost + NumTraits<DstScalar>::MulCost,
+ PacketAccess = is_same<DstScalar,SrcScalar>::value && packet_traits<DstScalar>::HasMul
+ };
+};
+template<typename DstScalar, typename SrcScalar=DstScalar> struct div_assign_op {
+ EIGEN_EMPTY_STRUCT_CTOR(div_assign_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a /= b; }
+ template<int Alignment, typename Packet>
+ EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const
+ { internal::pstoret<DstScalar,Packet,Alignment>(a,internal::pdiv(internal::ploadt<Packet,Alignment>(a),b)); }
+};
+template<typename DstScalar, typename SrcScalar>
+struct functor_traits<div_assign_op<DstScalar,SrcScalar> > {
+ enum {
+ Cost = NumTraits<DstScalar>::ReadCost + NumTraits<DstScalar>::MulCost,
+ PacketAccess = is_same<DstScalar,SrcScalar>::value && packet_traits<DstScalar>::HasDiv
+ };
+};
+template<typename Scalar> struct swap_assign_op {
+ EIGEN_EMPTY_STRUCT_CTOR(swap_assign_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const
+ {
+#ifdef __CUDACC__
+ Scalar t=b; const_cast<Scalar&>(b)=a; a=t;
+#else
+ using std::swap;
+ swap(a,const_cast<Scalar&>(b));
+#endif
+ }
+};
+template<typename Scalar>
+struct functor_traits<swap_assign_op<Scalar> > {
+ enum {
+ Cost = 3 * NumTraits<Scalar>::ReadCost,
+ PacketAccess = packet_traits<Scalar>::Vectorizable
+ };
+};
+}
+}
+#endif
+// end #include "src/Core/functors/AssignmentFunctors.h"
+// #include "src/Core/DenseCoeffsBase.h"
+#ifndef EIGEN_DENSECOEFFSBASE_H
+#define EIGEN_DENSECOEFFSBASE_H
+namespace Eigen {
+namespace internal {
+template<typename T> struct add_const_on_value_type_if_arithmetic
+{
+ typedef typename conditional<is_arithmetic<T>::value, T, typename add_const_on_value_type<T>::type>::type type;
+};
+}
+template<typename Derived>
+class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
+{
+ public:
+ typedef typename internal::traits<Derived>::StorageKind StorageKind;
+ typedef typename internal::traits<Derived>::Scalar Scalar;
+ typedef typename internal::packet_traits<Scalar>::type PacketScalar;
+ typedef typename internal::conditional<bool(internal::traits<Derived>::Flags&LvalueBit),
+ const Scalar&,
+ typename internal::conditional<internal::is_arithmetic<Scalar>::value, Scalar, const Scalar>::type
+ >::type CoeffReturnType;
+ typedef typename internal::add_const_on_value_type_if_arithmetic<
+ typename internal::packet_traits<Scalar>::type
+ >::type PacketReturnType;
+ typedef EigenBase<Derived> Base;
+ using Base::rows;
+ using Base::cols;
+ using Base::size;
+ using Base::derived;
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner) const
+ {
+ return int(Derived::RowsAtCompileTime) == 1 ? 0
+ : int(Derived::ColsAtCompileTime) == 1 ? inner
+ : int(Derived::Flags)&RowMajorBit ? outer
+ : inner;
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner) const
+ {
+ return int(Derived::ColsAtCompileTime) == 1 ? 0
+ : int(Derived::RowsAtCompileTime) == 1 ? inner
+ : int(Derived::Flags)&RowMajorBit ? inner
+ : outer;
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const
+ {
+ eigen_internal_assert(row >= 0 && row < rows()
+ && col >= 0 && col < cols());
+ return internal::evaluator<Derived>(derived()).coeff(row,col);
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE CoeffReturnType coeffByOuterInner(Index outer, Index inner) const
+ {
+ return coeff(rowIndexByOuterInner(outer, inner),
+ colIndexByOuterInner(outer, inner));
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE CoeffReturnType operator()(Index row, Index col) const
+ {
+ eigen_assert(row >= 0 && row < rows()
+ && col >= 0 && col < cols());
+ return coeff(row, col);
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE CoeffReturnType
+ coeff(Index index) const
+ {
+ EIGEN_STATIC_ASSERT(internal::evaluator<Derived>::Flags & LinearAccessBit,
+ THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS)
+ eigen_internal_assert(index >= 0 && index < size());
+ return internal::evaluator<Derived>(derived()).coeff(index);
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE CoeffReturnType
+ operator[](Index index) const
+ {
+ EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime,
+ THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD)
+ eigen_assert(index >= 0 && index < size());
+ return coeff(index);
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE CoeffReturnType
+ operator()(Index index) const
+ {
+ eigen_assert(index >= 0 && index < size());
+ return coeff(index);
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE CoeffReturnType
+ x() const { return (*this)[0]; }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE CoeffReturnType
+ y() const
+ {
+ EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=2, OUT_OF_RANGE_ACCESS);
+ return (*this)[1];
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE CoeffReturnType
+ z() const
+ {
+ EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=3, OUT_OF_RANGE_ACCESS);
+ return (*this)[2];
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE CoeffReturnType
+ w() const
+ {
+ EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=4, OUT_OF_RANGE_ACCESS);
+ return (*this)[3];
+ }
+ template<int LoadMode>
+ EIGEN_STRONG_INLINE PacketReturnType packet(Index row, Index col) const
+ {
+ typedef typename internal::packet_traits<Scalar>::type DefaultPacketType;
+ eigen_internal_assert(row >= 0 && row < rows() && col >= 0 && col < cols());
+ return internal::evaluator<Derived>(derived()).template packet<LoadMode,DefaultPacketType>(row,col);
+ }
+ template<int LoadMode>
+ EIGEN_STRONG_INLINE PacketReturnType packetByOuterInner(Index outer, Index inner) const
+ {
+ return packet<LoadMode>(rowIndexByOuterInner(outer, inner),
+ colIndexByOuterInner(outer, inner));
+ }
+ template<int LoadMode>
+ EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
+ {
+ EIGEN_STATIC_ASSERT(internal::evaluator<Derived>::Flags & LinearAccessBit,
+ THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS)
+ typedef typename internal::packet_traits<Scalar>::type DefaultPacketType;
+ eigen_internal_assert(index >= 0 && index < size());
+ return internal::evaluator<Derived>(derived()).template packet<LoadMode,DefaultPacketType>(index);
+ }
+ protected:
+ void coeffRef();
+ void coeffRefByOuterInner();
+ void writePacket();
+ void writePacketByOuterInner();
+ void copyCoeff();
+ void copyCoeffByOuterInner();
+ void copyPacket();
+ void copyPacketByOuterInner();
+ void stride();
+ void innerStride();
+ void outerStride();
+ void rowStride();
+ void colStride();
+};
+template<typename Derived>
+class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived, ReadOnlyAccessors>
+{
+ public:
+ typedef DenseCoeffsBase<Derived, ReadOnlyAccessors> Base;
+ typedef typename internal::traits<Derived>::StorageKind StorageKind;
+ typedef typename internal::traits<Derived>::Scalar Scalar;
+ typedef typename internal::packet_traits<Scalar>::type PacketScalar;
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ using Base::coeff;
+ using Base::rows;
+ using Base::cols;
+ using Base::size;
+ using Base::derived;
+ using Base::rowIndexByOuterInner;
+ using Base::colIndexByOuterInner;
+ using Base::operator[];
+ using Base::operator();
+ using Base::x;
+ using Base::y;
+ using Base::z;
+ using Base::w;
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col)
+ {
+ eigen_internal_assert(row >= 0 && row < rows()
+ && col >= 0 && col < cols());
+ return internal::evaluator<Derived>(derived()).coeffRef(row,col);
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Scalar&
+ coeffRefByOuterInner(Index outer, Index inner)
+ {
+ return coeffRef(rowIndexByOuterInner(outer, inner),
+ colIndexByOuterInner(outer, inner));
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Scalar&
+ operator()(Index row, Index col)
+ {
+ eigen_assert(row >= 0 && row < rows()
+ && col >= 0 && col < cols());
+ return coeffRef(row, col);
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Scalar&
+ coeffRef(Index index)
+ {
+ EIGEN_STATIC_ASSERT(internal::evaluator<Derived>::Flags & LinearAccessBit,
+ THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS)
+ eigen_internal_assert(index >= 0 && index < size());
+ return internal::evaluator<Derived>(derived()).coeffRef(index);
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Scalar&
+ operator[](Index index)
+ {
+ EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime,
+ THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD)
+ eigen_assert(index >= 0 && index < size());
+ return coeffRef(index);
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Scalar&
+ operator()(Index index)
+ {
+ eigen_assert(index >= 0 && index < size());
+ return coeffRef(index);
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Scalar&
+ x() { return (*this)[0]; }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Scalar&
+ y()
+ {
+ EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=2, OUT_OF_RANGE_ACCESS);
+ return (*this)[1];
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Scalar&
+ z()
+ {
+ EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=3, OUT_OF_RANGE_ACCESS);
+ return (*this)[2];
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Scalar&
+ w()
+ {
+ EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=4, OUT_OF_RANGE_ACCESS);
+ return (*this)[3];
+ }
+};
+template<typename Derived>
+class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived, ReadOnlyAccessors>
+{
+ public:
+ typedef DenseCoeffsBase<Derived, ReadOnlyAccessors> Base;
+ typedef typename internal::traits<Derived>::Scalar Scalar;
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ using Base::rows;
+ using Base::cols;
+ using Base::size;
+ using Base::derived;
+ EIGEN_DEVICE_FUNC
+ inline Index innerStride() const
+ {
+ return derived().innerStride();
+ }
+ EIGEN_DEVICE_FUNC
+ inline Index outerStride() const
+ {
+ return derived().outerStride();
+ }
+ inline Index stride() const
+ {
+ return Derived::IsVectorAtCompileTime ? innerStride() : outerStride();
+ }
+ EIGEN_DEVICE_FUNC
+ inline Index rowStride() const
+ {
+ return Derived::IsRowMajor ? outerStride() : innerStride();
+ }
+ EIGEN_DEVICE_FUNC
+ inline Index colStride() const
+ {
+ return Derived::IsRowMajor ? innerStride() : outerStride();
+ }
+};
+template<typename Derived>
+class DenseCoeffsBase<Derived, DirectWriteAccessors>
+ : public DenseCoeffsBase<Derived, WriteAccessors>
+{
+ public:
+ typedef DenseCoeffsBase<Derived, WriteAccessors> Base;
+ typedef typename internal::traits<Derived>::Scalar Scalar;
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ using Base::rows;
+ using Base::cols;
+ using Base::size;
+ using Base::derived;
+ EIGEN_DEVICE_FUNC
+ inline Index innerStride() const
+ {
+ return derived().innerStride();
+ }
+ EIGEN_DEVICE_FUNC
+ inline Index outerStride() const
+ {
+ return derived().outerStride();
+ }
+ inline Index stride() const
+ {
+ return Derived::IsVectorAtCompileTime ? innerStride() : outerStride();
+ }
+ EIGEN_DEVICE_FUNC
+ inline Index rowStride() const
+ {
+ return Derived::IsRowMajor ? outerStride() : innerStride();
+ }
+ EIGEN_DEVICE_FUNC
+ inline Index colStride() const
+ {
+ return Derived::IsRowMajor ? innerStride() : outerStride();
+ }
+};
+namespace internal {
+template<int Alignment, typename Derived, bool JustReturnZero>
+struct first_aligned_impl
+{
+ static inline Index run(const Derived&)
+ { return 0; }
+};
+template<int Alignment, typename Derived>
+struct first_aligned_impl<Alignment, Derived, false>
+{
+ static inline Index run(const Derived& m)
+ {
+ return internal::first_aligned<Alignment>(m.data(), m.size());
+ }
+};
+template<int Alignment, typename Derived>
+static inline Index first_aligned(const DenseBase<Derived>& m)
+{
+ enum { ReturnZero = (int(evaluator<Derived>::Alignment) >= Alignment) || !(Derived::Flags & DirectAccessBit) };
+ return first_aligned_impl<Alignment, Derived, ReturnZero>::run(m.derived());
+}
+template<typename Derived>
+static inline Index first_default_aligned(const DenseBase<Derived>& m)
+{
+ typedef typename Derived::Scalar Scalar;
+ typedef typename packet_traits<Scalar>::type DefaultPacketType;
+ return internal::first_aligned<int(unpacket_traits<DefaultPacketType>::alignment),Derived>(m);
+}
+template<typename Derived, bool HasDirectAccess = has_direct_access<Derived>::ret>
+struct inner_stride_at_compile_time
+{
+ enum { ret = traits<Derived>::InnerStrideAtCompileTime };
+};
+template<typename Derived>
+struct inner_stride_at_compile_time<Derived, false>
+{
+ enum { ret = 0 };
+};
+template<typename Derived, bool HasDirectAccess = has_direct_access<Derived>::ret>
+struct outer_stride_at_compile_time
+{
+ enum { ret = traits<Derived>::OuterStrideAtCompileTime };
+};
+template<typename Derived>
+struct outer_stride_at_compile_time<Derived, false>
+{
+ enum { ret = 0 };
+};
+}
+}
+#endif
+// end #include "src/Core/DenseCoeffsBase.h"
+// #include "src/Core/DenseBase.h"
+#ifndef EIGEN_DENSEBASE_H
+#define EIGEN_DENSEBASE_H
+namespace Eigen {
+namespace internal {
+static inline void check_DenseIndex_is_signed() {
+ EIGEN_STATIC_ASSERT(NumTraits<DenseIndex>::IsSigned,THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE);
+}
+}
+template<typename Derived> class DenseBase
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+ : public DenseCoeffsBase<Derived>
+#else
+ : public DenseCoeffsBase<Derived,DirectWriteAccessors>
+#endif
+{
+ public:
+ typedef Eigen::InnerIterator<Derived> InnerIterator;
+ typedef typename internal::traits<Derived>::StorageKind StorageKind;
+ typedef typename internal::traits<Derived>::StorageIndex StorageIndex;
+ typedef typename internal::traits<Derived>::Scalar Scalar;
+ typedef Scalar value_type;
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ typedef DenseCoeffsBase<Derived> Base;
+ using Base::derived;
+ using Base::const_cast_derived;
+ using Base::rows;
+ using Base::cols;
+ using Base::size;
+ using Base::rowIndexByOuterInner;
+ using Base::colIndexByOuterInner;
+ using Base::coeff;
+ using Base::coeffByOuterInner;
+ using Base::operator();
+ using Base::operator[];
+ using Base::x;
+ using Base::y;
+ using Base::z;
+ using Base::w;
+ using Base::stride;
+ using Base::innerStride;
+ using Base::outerStride;
+ using Base::rowStride;
+ using Base::colStride;
+ typedef typename Base::CoeffReturnType CoeffReturnType;
+ enum {
+ RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,
+ ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,
+ SizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::RowsAtCompileTime,
+ internal::traits<Derived>::ColsAtCompileTime>::ret),
+ MaxRowsAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime,
+ MaxColsAtCompileTime = internal::traits<Derived>::MaxColsAtCompileTime,
+ MaxSizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::MaxRowsAtCompileTime,
+ internal::traits<Derived>::MaxColsAtCompileTime>::ret),
+ IsVectorAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime == 1
+ || internal::traits<Derived>::MaxColsAtCompileTime == 1,
+ Flags = internal::traits<Derived>::Flags,
+ IsRowMajor = int(Flags) & RowMajorBit,
+ InnerSizeAtCompileTime = int(IsVectorAtCompileTime) ? int(SizeAtCompileTime)
+ : int(IsRowMajor) ? int(ColsAtCompileTime) : int(RowsAtCompileTime),
+ InnerStrideAtCompileTime = internal::inner_stride_at_compile_time<Derived>::ret,
+ OuterStrideAtCompileTime = internal::outer_stride_at_compile_time<Derived>::ret
+ };
+ typedef typename internal::find_best_packet<Scalar,SizeAtCompileTime>::type PacketScalar;
+ enum { IsPlainObjectBase = 0 };
+ typedef Matrix<typename internal::traits<Derived>::Scalar,
+ internal::traits<Derived>::RowsAtCompileTime,
+ internal::traits<Derived>::ColsAtCompileTime,
+ AutoAlign | (internal::traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor),
+ internal::traits<Derived>::MaxRowsAtCompileTime,
+ internal::traits<Derived>::MaxColsAtCompileTime
+ > PlainMatrix;
+ typedef Array<typename internal::traits<Derived>::Scalar,
+ internal::traits<Derived>::RowsAtCompileTime,
+ internal::traits<Derived>::ColsAtCompileTime,
+ AutoAlign | (internal::traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor),
+ internal::traits<Derived>::MaxRowsAtCompileTime,
+ internal::traits<Derived>::MaxColsAtCompileTime
+ > PlainArray;
+ typedef typename internal::conditional<internal::is_same<typename internal::traits<Derived>::XprKind,MatrixXpr >::value,
+ PlainMatrix, PlainArray>::type PlainObject;
+ EIGEN_DEVICE_FUNC
+ inline Index nonZeros() const { return size(); }
+ EIGEN_DEVICE_FUNC
+ Index outerSize() const
+ {
+ return IsVectorAtCompileTime ? 1
+ : int(IsRowMajor) ? this->rows() : this->cols();
+ }
+ EIGEN_DEVICE_FUNC
+ Index innerSize() const
+ {
+ return IsVectorAtCompileTime ? this->size()
+ : int(IsRowMajor) ? this->cols() : this->rows();
+ }
+ EIGEN_DEVICE_FUNC
+ void resize(Index newSize)
+ {
+ EIGEN_ONLY_USED_FOR_DEBUG(newSize);
+ eigen_assert(newSize == this->size()
+ && "DenseBase::resize() does not actually allow to resize.");
+ }
+ EIGEN_DEVICE_FUNC
+ void resize(Index rows, Index cols)
+ {
+ EIGEN_ONLY_USED_FOR_DEBUG(rows);
+ EIGEN_ONLY_USED_FOR_DEBUG(cols);
+ eigen_assert(rows == this->rows() && cols == this->cols()
+ && "DenseBase::resize() does not actually allow to resize.");
+ }
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+ typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,PlainObject> ConstantReturnType;
+ typedef CwiseNullaryOp<internal::linspaced_op<Scalar,PacketScalar>,PlainObject> SequentialLinSpacedReturnType;
+ typedef CwiseNullaryOp<internal::linspaced_op<Scalar,PacketScalar>,PlainObject> RandomAccessLinSpacedReturnType;
+ typedef Matrix<typename NumTraits<typename internal::traits<Derived>::Scalar>::Real, internal::traits<Derived>::ColsAtCompileTime, 1> EigenvaluesReturnType;
+#endif
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Derived& operator=(const DenseBase<OtherDerived>& other);
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Derived& operator=(const DenseBase& other);
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ Derived& operator=(const EigenBase<OtherDerived> &other);
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ Derived& operator+=(const EigenBase<OtherDerived> &other);
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ Derived& operator-=(const EigenBase<OtherDerived> &other);
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ Derived& operator=(const ReturnByValue<OtherDerived>& func);
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ Derived& lazyAssign(const DenseBase<OtherDerived>& other);
+ EIGEN_DEVICE_FUNC
+ CommaInitializer<Derived> operator<< (const Scalar& s);
+ template<unsigned int Added,unsigned int Removed>
+ EIGEN_DEPRECATED
+ const Derived& flagged() const
+ { return derived(); }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ CommaInitializer<Derived> operator<< (const DenseBase<OtherDerived>& other);
+ typedef Transpose<Derived> TransposeReturnType;
+ EIGEN_DEVICE_FUNC
+ TransposeReturnType transpose();
+ typedef typename internal::add_const<Transpose<const Derived> >::type ConstTransposeReturnType;
+ EIGEN_DEVICE_FUNC
+ ConstTransposeReturnType transpose() const;
+ EIGEN_DEVICE_FUNC
+ void transposeInPlace();
+ EIGEN_DEVICE_FUNC static const ConstantReturnType
+ Constant(Index rows, Index cols, const Scalar& value);
+ EIGEN_DEVICE_FUNC static const ConstantReturnType
+ Constant(Index size, const Scalar& value);
+ EIGEN_DEVICE_FUNC static const ConstantReturnType
+ Constant(const Scalar& value);
+ EIGEN_DEVICE_FUNC static const SequentialLinSpacedReturnType
+ LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high);
+ EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType
+ LinSpaced(Index size, const Scalar& low, const Scalar& high);
+ EIGEN_DEVICE_FUNC static const SequentialLinSpacedReturnType
+ LinSpaced(Sequential_t, const Scalar& low, const Scalar& high);
+ EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType
+ LinSpaced(const Scalar& low, const Scalar& high);
+ template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
+ static const CwiseNullaryOp<CustomNullaryOp, PlainObject>
+ NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func);
+ template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
+ static const CwiseNullaryOp<CustomNullaryOp, PlainObject>
+ NullaryExpr(Index size, const CustomNullaryOp& func);
+ template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
+ static const CwiseNullaryOp<CustomNullaryOp, PlainObject>
+ NullaryExpr(const CustomNullaryOp& func);
+ EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(Index rows, Index cols);
+ EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(Index size);
+ EIGEN_DEVICE_FUNC static const ConstantReturnType Zero();
+ EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(Index rows, Index cols);
+ EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(Index size);
+ EIGEN_DEVICE_FUNC static const ConstantReturnType Ones();
+ EIGEN_DEVICE_FUNC void fill(const Scalar& value);
+ EIGEN_DEVICE_FUNC Derived& setConstant(const Scalar& value);
+ EIGEN_DEVICE_FUNC Derived& setLinSpaced(Index size, const Scalar& low, const Scalar& high);
+ EIGEN_DEVICE_FUNC Derived& setLinSpaced(const Scalar& low, const Scalar& high);
+ EIGEN_DEVICE_FUNC Derived& setZero();
+ EIGEN_DEVICE_FUNC Derived& setOnes();
+ EIGEN_DEVICE_FUNC Derived& setRandom();
+ template<typename OtherDerived> EIGEN_DEVICE_FUNC
+ bool isApprox(const DenseBase<OtherDerived>& other,
+ const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+ EIGEN_DEVICE_FUNC
+ bool isMuchSmallerThan(const RealScalar& other,
+ const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+ template<typename OtherDerived> EIGEN_DEVICE_FUNC
+ bool isMuchSmallerThan(const DenseBase<OtherDerived>& other,
+ const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+ EIGEN_DEVICE_FUNC bool isApproxToConstant(const Scalar& value, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+ EIGEN_DEVICE_FUNC bool isConstant(const Scalar& value, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+ EIGEN_DEVICE_FUNC bool isZero(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+ EIGEN_DEVICE_FUNC bool isOnes(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+ inline bool hasNaN() const;
+ inline bool allFinite() const;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Derived& operator*=(const Scalar& other);
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Derived& operator/=(const Scalar& other);
+ typedef typename internal::add_const_on_value_type<typename internal::eval<Derived>::type>::type EvalReturnType;
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE EvalReturnType eval() const
+ {
+ return typename internal::eval<Derived>::type(derived());
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ void swap(const DenseBase<OtherDerived>& other)
+ {
+ EIGEN_STATIC_ASSERT(!OtherDerived::IsPlainObjectBase,THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY);
+ eigen_assert(rows()==other.rows() && cols()==other.cols());
+ call_assignment(derived(), other.const_cast_derived(), internal::swap_assign_op<Scalar>());
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ void swap(PlainObjectBase<OtherDerived>& other)
+ {
+ eigen_assert(rows()==other.rows() && cols()==other.cols());
+ call_assignment(derived(), other.derived(), internal::swap_assign_op<Scalar>());
+ }
+ EIGEN_DEVICE_FUNC inline const NestByValue<Derived> nestByValue() const;
+ EIGEN_DEVICE_FUNC inline const ForceAlignedAccess<Derived> forceAlignedAccess() const;
+ EIGEN_DEVICE_FUNC inline ForceAlignedAccess<Derived> forceAlignedAccess();
+ template<bool Enable> EIGEN_DEVICE_FUNC
+ inline const typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf() const;
+ template<bool Enable> EIGEN_DEVICE_FUNC
+ inline typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf();
+ EIGEN_DEVICE_FUNC Scalar sum() const;
+ EIGEN_DEVICE_FUNC Scalar mean() const;
+ EIGEN_DEVICE_FUNC Scalar trace() const;
+ EIGEN_DEVICE_FUNC Scalar prod() const;
+ EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar minCoeff() const;
+ EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar maxCoeff() const;
+ template<typename IndexType> EIGEN_DEVICE_FUNC
+ typename internal::traits<Derived>::Scalar minCoeff(IndexType* row, IndexType* col) const;
+ template<typename IndexType> EIGEN_DEVICE_FUNC
+ typename internal::traits<Derived>::Scalar maxCoeff(IndexType* row, IndexType* col) const;
+ template<typename IndexType> EIGEN_DEVICE_FUNC
+ typename internal::traits<Derived>::Scalar minCoeff(IndexType* index) const;
+ template<typename IndexType> EIGEN_DEVICE_FUNC
+ typename internal::traits<Derived>::Scalar maxCoeff(IndexType* index) const;
+ template<typename BinaryOp>
+ EIGEN_DEVICE_FUNC
+ Scalar redux(const BinaryOp& func) const;
+ template<typename Visitor>
+ EIGEN_DEVICE_FUNC
+ void visit(Visitor& func) const;
+ inline const WithFormat<Derived> format(const IOFormat& fmt) const
+ {
+ return WithFormat<Derived>(derived(), fmt);
+ }
+ EIGEN_DEVICE_FUNC
+ CoeffReturnType value() const
+ {
+ EIGEN_STATIC_ASSERT_SIZE_1x1(Derived)
+ eigen_assert(this->rows() == 1 && this->cols() == 1);
+ return derived().coeff(0,0);
+ }
+ EIGEN_DEVICE_FUNC bool all() const;
+ EIGEN_DEVICE_FUNC bool any() const;
+ EIGEN_DEVICE_FUNC Index count() const;
+ typedef VectorwiseOp<Derived, Horizontal> RowwiseReturnType;
+ typedef const VectorwiseOp<const Derived, Horizontal> ConstRowwiseReturnType;
+ typedef VectorwiseOp<Derived, Vertical> ColwiseReturnType;
+ typedef const VectorwiseOp<const Derived, Vertical> ConstColwiseReturnType;
+ EIGEN_DEVICE_FUNC inline ConstRowwiseReturnType rowwise() const {
+ return ConstRowwiseReturnType(derived());
+ }
+ EIGEN_DEVICE_FUNC RowwiseReturnType rowwise();
+ EIGEN_DEVICE_FUNC inline ConstColwiseReturnType colwise() const {
+ return ConstColwiseReturnType(derived());
+ }
+ EIGEN_DEVICE_FUNC ColwiseReturnType colwise();
+ typedef CwiseNullaryOp<internal::scalar_random_op<Scalar>,PlainObject> RandomReturnType;
+ static const RandomReturnType Random(Index rows, Index cols);
+ static const RandomReturnType Random(Index size);
+ static const RandomReturnType Random();
+ template<typename ThenDerived,typename ElseDerived>
+ const Select<Derived,ThenDerived,ElseDerived>
+ select(const DenseBase<ThenDerived>& thenMatrix,
+ const DenseBase<ElseDerived>& elseMatrix) const;
+ template<typename ThenDerived>
+ inline const Select<Derived,ThenDerived, typename ThenDerived::ConstantReturnType>
+ select(const DenseBase<ThenDerived>& thenMatrix, const typename ThenDerived::Scalar& elseScalar) const;
+ template<typename ElseDerived>
+ inline const Select<Derived, typename ElseDerived::ConstantReturnType, ElseDerived >
+ select(const typename ElseDerived::Scalar& thenScalar, const DenseBase<ElseDerived>& elseMatrix) const;
+ template<int p> RealScalar lpNorm() const;
+ template<int RowFactor, int ColFactor>
+ EIGEN_DEVICE_FUNC
+ const Replicate<Derived,RowFactor,ColFactor> replicate() const;
+ EIGEN_DEVICE_FUNC
+ const Replicate<Derived, Dynamic, Dynamic> replicate(Index rowFactor, Index colFactor) const
+ {
+ return Replicate<Derived, Dynamic, Dynamic>(derived(), rowFactor, colFactor);
+ }
+ typedef Reverse<Derived, BothDirections> ReverseReturnType;
+ typedef const Reverse<const Derived, BothDirections> ConstReverseReturnType;
+ EIGEN_DEVICE_FUNC ReverseReturnType reverse();
+ EIGEN_DEVICE_FUNC ConstReverseReturnType reverse() const
+ {
+ return ConstReverseReturnType(derived());
+ }
+ EIGEN_DEVICE_FUNC void reverseInPlace();
+#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::DenseBase
+#define EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+#define EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(COND)
+// # include "../plugins/BlockMethods.h"
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+typedef Block<Derived, internal::traits<Derived>::RowsAtCompileTime, 1, !IsRowMajor> ColXpr;
+typedef const Block<const Derived, internal::traits<Derived>::RowsAtCompileTime, 1, !IsRowMajor> ConstColXpr;
+typedef Block<Derived, 1, internal::traits<Derived>::ColsAtCompileTime, IsRowMajor> RowXpr;
+typedef const Block<const Derived, 1, internal::traits<Derived>::ColsAtCompileTime, IsRowMajor> ConstRowXpr;
+typedef Block<Derived, internal::traits<Derived>::RowsAtCompileTime, Dynamic, !IsRowMajor> ColsBlockXpr;
+typedef const Block<const Derived, internal::traits<Derived>::RowsAtCompileTime, Dynamic, !IsRowMajor> ConstColsBlockXpr;
+typedef Block<Derived, Dynamic, internal::traits<Derived>::ColsAtCompileTime, IsRowMajor> RowsBlockXpr;
+typedef const Block<const Derived, Dynamic, internal::traits<Derived>::ColsAtCompileTime, IsRowMajor> ConstRowsBlockXpr;
+template<int N> struct NColsBlockXpr { typedef Block<Derived, internal::traits<Derived>::RowsAtCompileTime, N, !IsRowMajor> Type; };
+template<int N> struct ConstNColsBlockXpr { typedef const Block<const Derived, internal::traits<Derived>::RowsAtCompileTime, N, !IsRowMajor> Type; };
+template<int N> struct NRowsBlockXpr { typedef Block<Derived, N, internal::traits<Derived>::ColsAtCompileTime, IsRowMajor> Type; };
+template<int N> struct ConstNRowsBlockXpr { typedef const Block<const Derived, N, internal::traits<Derived>::ColsAtCompileTime, IsRowMajor> Type; };
+typedef Block<Derived> BlockXpr;
+typedef const Block<const Derived> ConstBlockXpr;
+template<int Rows, int Cols> struct FixedBlockXpr { typedef Block<Derived,Rows,Cols> Type; };
+template<int Rows, int Cols> struct ConstFixedBlockXpr { typedef Block<const Derived,Rows,Cols> Type; };
+typedef VectorBlock<Derived> SegmentReturnType;
+typedef const VectorBlock<const Derived> ConstSegmentReturnType;
+template<int Size> struct FixedSegmentReturnType { typedef VectorBlock<Derived, Size> Type; };
+template<int Size> struct ConstFixedSegmentReturnType { typedef const VectorBlock<const Derived, Size> Type; };
+#endif
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+EIGEN_DEVICE_FUNC
+inline BlockXpr block(Index startRow, Index startCol, Index blockRows, Index blockCols)
+{
+ return BlockXpr(derived(), startRow, startCol, blockRows, blockCols);
+}
+EIGEN_DEVICE_FUNC
+inline const ConstBlockXpr block(Index startRow, Index startCol, Index blockRows, Index blockCols) const
+{
+ return ConstBlockXpr(derived(), startRow, startCol, blockRows, blockCols);
+}
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+EIGEN_DEVICE_FUNC
+inline BlockXpr topRightCorner(Index cRows, Index cCols)
+{
+ return BlockXpr(derived(), 0, cols() - cCols, cRows, cCols);
+}
+EIGEN_DEVICE_FUNC
+inline const ConstBlockXpr topRightCorner(Index cRows, Index cCols) const
+{
+ return ConstBlockXpr(derived(), 0, cols() - cCols, cRows, cCols);
+}
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+template<int CRows, int CCols>
+EIGEN_DEVICE_FUNC
+inline typename FixedBlockXpr<CRows,CCols>::Type topRightCorner()
+{
+ return typename FixedBlockXpr<CRows,CCols>::Type(derived(), 0, cols() - CCols);
+}
+template<int CRows, int CCols>
+EIGEN_DEVICE_FUNC
+inline const typename ConstFixedBlockXpr<CRows,CCols>::Type topRightCorner() const
+{
+ return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), 0, cols() - CCols);
+}
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+template<int CRows, int CCols>
+inline typename FixedBlockXpr<CRows,CCols>::Type topRightCorner(Index cRows, Index cCols)
+{
+ return typename FixedBlockXpr<CRows,CCols>::Type(derived(), 0, cols() - cCols, cRows, cCols);
+}
+template<int CRows, int CCols>
+inline const typename ConstFixedBlockXpr<CRows,CCols>::Type topRightCorner(Index cRows, Index cCols) const
+{
+ return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), 0, cols() - cCols, cRows, cCols);
+}
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+EIGEN_DEVICE_FUNC
+inline BlockXpr topLeftCorner(Index cRows, Index cCols)
+{
+ return BlockXpr(derived(), 0, 0, cRows, cCols);
+}
+EIGEN_DEVICE_FUNC
+inline const ConstBlockXpr topLeftCorner(Index cRows, Index cCols) const
+{
+ return ConstBlockXpr(derived(), 0, 0, cRows, cCols);
+}
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+template<int CRows, int CCols>
+EIGEN_DEVICE_FUNC
+inline typename FixedBlockXpr<CRows,CCols>::Type topLeftCorner()
+{
+ return typename FixedBlockXpr<CRows,CCols>::Type(derived(), 0, 0);
+}
+template<int CRows, int CCols>
+EIGEN_DEVICE_FUNC
+inline const typename ConstFixedBlockXpr<CRows,CCols>::Type topLeftCorner() const
+{
+ return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), 0, 0);
+}
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+template<int CRows, int CCols>
+inline typename FixedBlockXpr<CRows,CCols>::Type topLeftCorner(Index cRows, Index cCols)
+{
+ return typename FixedBlockXpr<CRows,CCols>::Type(derived(), 0, 0, cRows, cCols);
+}
+template<int CRows, int CCols>
+inline const typename ConstFixedBlockXpr<CRows,CCols>::Type topLeftCorner(Index cRows, Index cCols) const
+{
+ return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), 0, 0, cRows, cCols);
+}
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+EIGEN_DEVICE_FUNC
+inline BlockXpr bottomRightCorner(Index cRows, Index cCols)
+{
+ return BlockXpr(derived(), rows() - cRows, cols() - cCols, cRows, cCols);
+}
+EIGEN_DEVICE_FUNC
+inline const ConstBlockXpr bottomRightCorner(Index cRows, Index cCols) const
+{
+ return ConstBlockXpr(derived(), rows() - cRows, cols() - cCols, cRows, cCols);
+}
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+template<int CRows, int CCols>
+EIGEN_DEVICE_FUNC
+inline typename FixedBlockXpr<CRows,CCols>::Type bottomRightCorner()
+{
+ return typename FixedBlockXpr<CRows,CCols>::Type(derived(), rows() - CRows, cols() - CCols);
+}
+template<int CRows, int CCols>
+EIGEN_DEVICE_FUNC
+inline const typename ConstFixedBlockXpr<CRows,CCols>::Type bottomRightCorner() const
+{
+ return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), rows() - CRows, cols() - CCols);
+}
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+template<int CRows, int CCols>
+inline typename FixedBlockXpr<CRows,CCols>::Type bottomRightCorner(Index cRows, Index cCols)
+{
+ return typename FixedBlockXpr<CRows,CCols>::Type(derived(), rows() - cRows, cols() - cCols, cRows, cCols);
+}
+template<int CRows, int CCols>
+inline const typename ConstFixedBlockXpr<CRows,CCols>::Type bottomRightCorner(Index cRows, Index cCols) const
+{
+ return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), rows() - cRows, cols() - cCols, cRows, cCols);
+}
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+EIGEN_DEVICE_FUNC
+inline BlockXpr bottomLeftCorner(Index cRows, Index cCols)
+{
+ return BlockXpr(derived(), rows() - cRows, 0, cRows, cCols);
+}
+EIGEN_DEVICE_FUNC
+inline const ConstBlockXpr bottomLeftCorner(Index cRows, Index cCols) const
+{
+ return ConstBlockXpr(derived(), rows() - cRows, 0, cRows, cCols);
+}
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+template<int CRows, int CCols>
+EIGEN_DEVICE_FUNC
+inline typename FixedBlockXpr<CRows,CCols>::Type bottomLeftCorner()
+{
+ return typename FixedBlockXpr<CRows,CCols>::Type(derived(), rows() - CRows, 0);
+}
+template<int CRows, int CCols>
+EIGEN_DEVICE_FUNC
+inline const typename ConstFixedBlockXpr<CRows,CCols>::Type bottomLeftCorner() const
+{
+ return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), rows() - CRows, 0);
+}
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+template<int CRows, int CCols>
+inline typename FixedBlockXpr<CRows,CCols>::Type bottomLeftCorner(Index cRows, Index cCols)
+{
+ return typename FixedBlockXpr<CRows,CCols>::Type(derived(), rows() - cRows, 0, cRows, cCols);
+}
+template<int CRows, int CCols>
+inline const typename ConstFixedBlockXpr<CRows,CCols>::Type bottomLeftCorner(Index cRows, Index cCols) const
+{
+ return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), rows() - cRows, 0, cRows, cCols);
+}
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major)
+EIGEN_DEVICE_FUNC
+inline RowsBlockXpr topRows(Index n)
+{
+ return RowsBlockXpr(derived(), 0, 0, n, cols());
+}
+EIGEN_DEVICE_FUNC
+inline ConstRowsBlockXpr topRows(Index n) const
+{
+ return ConstRowsBlockXpr(derived(), 0, 0, n, cols());
+}
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major)
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename NRowsBlockXpr<N>::Type topRows(Index n = N)
+{
+ return typename NRowsBlockXpr<N>::Type(derived(), 0, 0, n, cols());
+}
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename ConstNRowsBlockXpr<N>::Type topRows(Index n = N) const
+{
+ return typename ConstNRowsBlockXpr<N>::Type(derived(), 0, 0, n, cols());
+}
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major)
+EIGEN_DEVICE_FUNC
+inline RowsBlockXpr bottomRows(Index n)
+{
+ return RowsBlockXpr(derived(), rows() - n, 0, n, cols());
+}
+EIGEN_DEVICE_FUNC
+inline ConstRowsBlockXpr bottomRows(Index n) const
+{
+ return ConstRowsBlockXpr(derived(), rows() - n, 0, n, cols());
+}
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major)
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename NRowsBlockXpr<N>::Type bottomRows(Index n = N)
+{
+ return typename NRowsBlockXpr<N>::Type(derived(), rows() - n, 0, n, cols());
+}
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename ConstNRowsBlockXpr<N>::Type bottomRows(Index n = N) const
+{
+ return typename ConstNRowsBlockXpr<N>::Type(derived(), rows() - n, 0, n, cols());
+}
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major)
+EIGEN_DEVICE_FUNC
+inline RowsBlockXpr middleRows(Index startRow, Index n)
+{
+ return RowsBlockXpr(derived(), startRow, 0, n, cols());
+}
+EIGEN_DEVICE_FUNC
+inline ConstRowsBlockXpr middleRows(Index startRow, Index n) const
+{
+ return ConstRowsBlockXpr(derived(), startRow, 0, n, cols());
+}
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major)
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename NRowsBlockXpr<N>::Type middleRows(Index startRow, Index n = N)
+{
+ return typename NRowsBlockXpr<N>::Type(derived(), startRow, 0, n, cols());
+}
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename ConstNRowsBlockXpr<N>::Type middleRows(Index startRow, Index n = N) const
+{
+ return typename ConstNRowsBlockXpr<N>::Type(derived(), startRow, 0, n, cols());
+}
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major)
+EIGEN_DEVICE_FUNC
+inline ColsBlockXpr leftCols(Index n)
+{
+ return ColsBlockXpr(derived(), 0, 0, rows(), n);
+}
+EIGEN_DEVICE_FUNC
+inline ConstColsBlockXpr leftCols(Index n) const
+{
+ return ConstColsBlockXpr(derived(), 0, 0, rows(), n);
+}
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major)
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename NColsBlockXpr<N>::Type leftCols(Index n = N)
+{
+ return typename NColsBlockXpr<N>::Type(derived(), 0, 0, rows(), n);
+}
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename ConstNColsBlockXpr<N>::Type leftCols(Index n = N) const
+{
+ return typename ConstNColsBlockXpr<N>::Type(derived(), 0, 0, rows(), n);
+}
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major)
+EIGEN_DEVICE_FUNC
+inline ColsBlockXpr rightCols(Index n)
+{
+ return ColsBlockXpr(derived(), 0, cols() - n, rows(), n);
+}
+EIGEN_DEVICE_FUNC
+inline ConstColsBlockXpr rightCols(Index n) const
+{
+ return ConstColsBlockXpr(derived(), 0, cols() - n, rows(), n);
+}
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major)
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename NColsBlockXpr<N>::Type rightCols(Index n = N)
+{
+ return typename NColsBlockXpr<N>::Type(derived(), 0, cols() - n, rows(), n);
+}
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename ConstNColsBlockXpr<N>::Type rightCols(Index n = N) const
+{
+ return typename ConstNColsBlockXpr<N>::Type(derived(), 0, cols() - n, rows(), n);
+}
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major)
+EIGEN_DEVICE_FUNC
+inline ColsBlockXpr middleCols(Index startCol, Index numCols)
+{
+ return ColsBlockXpr(derived(), 0, startCol, rows(), numCols);
+}
+EIGEN_DEVICE_FUNC
+inline ConstColsBlockXpr middleCols(Index startCol, Index numCols) const
+{
+ return ConstColsBlockXpr(derived(), 0, startCol, rows(), numCols);
+}
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major)
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename NColsBlockXpr<N>::Type middleCols(Index startCol, Index n = N)
+{
+ return typename NColsBlockXpr<N>::Type(derived(), 0, startCol, rows(), n);
+}
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename ConstNColsBlockXpr<N>::Type middleCols(Index startCol, Index n = N) const
+{
+ return typename ConstNColsBlockXpr<N>::Type(derived(), 0, startCol, rows(), n);
+}
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+template<int NRows, int NCols>
+EIGEN_DEVICE_FUNC
+inline typename FixedBlockXpr<NRows,NCols>::Type block(Index startRow, Index startCol)
+{
+ return typename FixedBlockXpr<NRows,NCols>::Type(derived(), startRow, startCol);
+}
+template<int NRows, int NCols>
+EIGEN_DEVICE_FUNC
+inline const typename ConstFixedBlockXpr<NRows,NCols>::Type block(Index startRow, Index startCol) const
+{
+ return typename ConstFixedBlockXpr<NRows,NCols>::Type(derived(), startRow, startCol);
+}
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+template<int NRows, int NCols>
+inline typename FixedBlockXpr<NRows,NCols>::Type block(Index startRow, Index startCol,
+ Index blockRows, Index blockCols)
+{
+ return typename FixedBlockXpr<NRows,NCols>::Type(derived(), startRow, startCol, blockRows, blockCols);
+}
+template<int NRows, int NCols>
+inline const typename ConstFixedBlockXpr<NRows,NCols>::Type block(Index startRow, Index startCol,
+ Index blockRows, Index blockCols) const
+{
+ return typename ConstFixedBlockXpr<NRows,NCols>::Type(derived(), startRow, startCol, blockRows, blockCols);
+}
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major)
+EIGEN_DEVICE_FUNC
+inline ColXpr col(Index i)
+{
+ return ColXpr(derived(), i);
+}
+EIGEN_DEVICE_FUNC
+inline ConstColXpr col(Index i) const
+{
+ return ConstColXpr(derived(), i);
+}
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major)
+EIGEN_DEVICE_FUNC
+inline RowXpr row(Index i)
+{
+ return RowXpr(derived(), i);
+}
+EIGEN_DEVICE_FUNC
+inline ConstRowXpr row(Index i) const
+{
+ return ConstRowXpr(derived(), i);
+}
+EIGEN_DEVICE_FUNC
+inline SegmentReturnType segment(Index start, Index n)
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ return SegmentReturnType(derived(), start, n);
+}
+EIGEN_DEVICE_FUNC
+inline ConstSegmentReturnType segment(Index start, Index n) const
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ return ConstSegmentReturnType(derived(), start, n);
+}
+EIGEN_DEVICE_FUNC
+inline SegmentReturnType head(Index n)
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ return SegmentReturnType(derived(), 0, n);
+}
+EIGEN_DEVICE_FUNC
+inline ConstSegmentReturnType head(Index n) const
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ return ConstSegmentReturnType(derived(), 0, n);
+}
+EIGEN_DEVICE_FUNC
+inline SegmentReturnType tail(Index n)
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ return SegmentReturnType(derived(), this->size() - n, n);
+}
+EIGEN_DEVICE_FUNC
+inline ConstSegmentReturnType tail(Index n) const
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ return ConstSegmentReturnType(derived(), this->size() - n, n);
+}
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename FixedSegmentReturnType<N>::Type segment(Index start, Index n = N)
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ return typename FixedSegmentReturnType<N>::Type(derived(), start, n);
+}
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename ConstFixedSegmentReturnType<N>::Type segment(Index start, Index n = N) const
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ return typename ConstFixedSegmentReturnType<N>::Type(derived(), start, n);
+}
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename FixedSegmentReturnType<N>::Type head(Index n = N)
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ return typename FixedSegmentReturnType<N>::Type(derived(), 0, n);
+}
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename ConstFixedSegmentReturnType<N>::Type head(Index n = N) const
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ return typename ConstFixedSegmentReturnType<N>::Type(derived(), 0, n);
+}
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename FixedSegmentReturnType<N>::Type tail(Index n = N)
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ return typename FixedSegmentReturnType<N>::Type(derived(), size() - n);
+}
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename ConstFixedSegmentReturnType<N>::Type tail(Index n = N) const
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ return typename ConstFixedSegmentReturnType<N>::Type(derived(), size() - n);
+}
+// end # include "../plugins/BlockMethods.h"
+# ifdef EIGEN_DENSEBASE_PLUGIN
+# include EIGEN_DENSEBASE_PLUGIN
+# endif
+#undef EIGEN_CURRENT_STORAGE_BASE_CLASS
+#undef EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+#undef EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF
+ template<typename Dest>
+ EIGEN_DEVICE_FUNC
+ inline void evalTo(Dest& ) const
+ {
+ EIGEN_STATIC_ASSERT((internal::is_same<Dest,void>::value),THE_EVAL_EVALTO_FUNCTION_SHOULD_NEVER_BE_CALLED_FOR_DENSE_OBJECTS);
+ }
+ protected:
+ EIGEN_DEVICE_FUNC DenseBase()
+ {
+#ifdef EIGEN_INTERNAL_DEBUGGING
+ EIGEN_STATIC_ASSERT((EIGEN_IMPLIES(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, int(IsRowMajor))
+ && EIGEN_IMPLIES(MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1, int(!IsRowMajor))),
+ INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION)
+#endif
+ }
+ private:
+ EIGEN_DEVICE_FUNC explicit DenseBase(int);
+ EIGEN_DEVICE_FUNC DenseBase(int,int);
+ template<typename OtherDerived> EIGEN_DEVICE_FUNC explicit DenseBase(const DenseBase<OtherDerived>&);
+};
+}
+#endif
+// end #include "src/Core/DenseBase.h"
+// #include "src/Core/MatrixBase.h"
+#ifndef EIGEN_MATRIXBASE_H
+#define EIGEN_MATRIXBASE_H
+namespace Eigen {
+template<typename Derived> class MatrixBase
+ : public DenseBase<Derived>
+{
+ public:
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+ typedef MatrixBase StorageBaseType;
+ typedef typename internal::traits<Derived>::StorageKind StorageKind;
+ typedef typename internal::traits<Derived>::StorageIndex StorageIndex;
+ typedef typename internal::traits<Derived>::Scalar Scalar;
+ typedef typename internal::packet_traits<Scalar>::type PacketScalar;
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ typedef DenseBase<Derived> Base;
+ using Base::RowsAtCompileTime;
+ using Base::ColsAtCompileTime;
+ using Base::SizeAtCompileTime;
+ using Base::MaxRowsAtCompileTime;
+ using Base::MaxColsAtCompileTime;
+ using Base::MaxSizeAtCompileTime;
+ using Base::IsVectorAtCompileTime;
+ using Base::Flags;
+ using Base::derived;
+ using Base::const_cast_derived;
+ using Base::rows;
+ using Base::cols;
+ using Base::size;
+ using Base::coeff;
+ using Base::coeffRef;
+ using Base::lazyAssign;
+ using Base::eval;
+ using Base::operator+=;
+ using Base::operator-=;
+ using Base::operator*=;
+ using Base::operator/=;
+ typedef typename Base::CoeffReturnType CoeffReturnType;
+ typedef typename Base::ConstTransposeReturnType ConstTransposeReturnType;
+ typedef typename Base::RowXpr RowXpr;
+ typedef typename Base::ColXpr ColXpr;
+#endif
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+ typedef Matrix<Scalar,EIGEN_SIZE_MAX(RowsAtCompileTime,ColsAtCompileTime),
+ EIGEN_SIZE_MAX(RowsAtCompileTime,ColsAtCompileTime)> SquareMatrixType;
+#endif
+ EIGEN_DEVICE_FUNC
+ inline Index diagonalSize() const { return (numext::mini)(rows(),cols()); }
+ typedef typename Base::PlainObject PlainObject;
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+ typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,PlainObject> ConstantReturnType;
+ typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
+ CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, ConstTransposeReturnType>,
+ ConstTransposeReturnType
+ >::type AdjointReturnType;
+ typedef Matrix<std::complex<RealScalar>, internal::traits<Derived>::ColsAtCompileTime, 1, ColMajor> EigenvaluesReturnType;
+ typedef CwiseNullaryOp<internal::scalar_identity_op<Scalar>,PlainObject> IdentityReturnType;
+ typedef Block<const CwiseNullaryOp<internal::scalar_identity_op<Scalar>, SquareMatrixType>,
+ internal::traits<Derived>::RowsAtCompileTime,
+ internal::traits<Derived>::ColsAtCompileTime> BasisReturnType;
+#endif
+#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::MatrixBase
+#define EIGEN_DOC_UNARY_ADDONS(X,Y)
+// # include "../plugins/CommonCwiseUnaryOps.h"
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
+ const CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, const Derived>,
+ const Derived&
+ >::type ConjugateReturnType;
+typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
+ const CwiseUnaryOp<internal::scalar_real_op<Scalar>, const Derived>,
+ const Derived&
+ >::type RealReturnType;
+typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
+ CwiseUnaryView<internal::scalar_real_ref_op<Scalar>, Derived>,
+ Derived&
+ >::type NonConstRealReturnType;
+typedef CwiseUnaryOp<internal::scalar_imag_op<Scalar>, const Derived> ImagReturnType;
+typedef CwiseUnaryView<internal::scalar_imag_ref_op<Scalar>, Derived> NonConstImagReturnType;
+typedef CwiseUnaryOp<internal::scalar_opposite_op<Scalar>, const Derived> NegativeReturnType;
+#endif
+EIGEN_DOC_UNARY_ADDONS(operator-,opposite)
+EIGEN_DEVICE_FUNC
+inline const NegativeReturnType
+operator-() const { return NegativeReturnType(derived()); }
+template<class NewType> struct CastXpr { typedef typename internal::cast_return_type<Derived,const CwiseUnaryOp<internal::scalar_cast_op<Scalar, NewType>, const Derived> >::type Type; };
+EIGEN_DOC_UNARY_ADDONS(cast,conversion function)
+template<typename NewType>
+EIGEN_DEVICE_FUNC
+typename CastXpr<NewType>::Type
+cast() const
+{
+ return typename CastXpr<NewType>::Type(derived());
+}
+EIGEN_DOC_UNARY_ADDONS(conjugate,complex conjugate)
+EIGEN_DEVICE_FUNC
+inline ConjugateReturnType
+conjugate() const
+{
+ return ConjugateReturnType(derived());
+}
+EIGEN_DOC_UNARY_ADDONS(real,real part function)
+EIGEN_DEVICE_FUNC
+inline RealReturnType
+real() const { return RealReturnType(derived()); }
+EIGEN_DOC_UNARY_ADDONS(imag,imaginary part function)
+EIGEN_DEVICE_FUNC
+inline const ImagReturnType
+imag() const { return ImagReturnType(derived()); }
+EIGEN_DOC_UNARY_ADDONS(unaryExpr,unary function)
+template<typename CustomUnaryOp>
+EIGEN_DEVICE_FUNC
+inline const CwiseUnaryOp<CustomUnaryOp, const Derived>
+unaryExpr(const CustomUnaryOp& func = CustomUnaryOp()) const
+{
+ return CwiseUnaryOp<CustomUnaryOp, const Derived>(derived(), func);
+}
+EIGEN_DOC_UNARY_ADDONS(unaryViewExpr,unary function)
+template<typename CustomViewOp>
+EIGEN_DEVICE_FUNC
+inline const CwiseUnaryView<CustomViewOp, const Derived>
+unaryViewExpr(const CustomViewOp& func = CustomViewOp()) const
+{
+ return CwiseUnaryView<CustomViewOp, const Derived>(derived(), func);
+}
+EIGEN_DOC_UNARY_ADDONS(real,real part function)
+EIGEN_DEVICE_FUNC
+inline NonConstRealReturnType
+real() { return NonConstRealReturnType(derived()); }
+EIGEN_DOC_UNARY_ADDONS(imag,imaginary part function)
+EIGEN_DEVICE_FUNC
+inline NonConstImagReturnType
+imag() { return NonConstImagReturnType(derived()); }
+// end # include "../plugins/CommonCwiseUnaryOps.h"
+// # include "../plugins/CommonCwiseBinaryOps.h"
+EIGEN_MAKE_CWISE_BINARY_OP(operator-,difference)
+EIGEN_MAKE_CWISE_BINARY_OP(operator+,sum)
+template<typename CustomBinaryOp, typename OtherDerived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseBinaryOp<CustomBinaryOp, const Derived, const OtherDerived>
+binaryExpr(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other, const CustomBinaryOp& func = CustomBinaryOp()) const
+{
+ return CwiseBinaryOp<CustomBinaryOp, const Derived, const OtherDerived>(derived(), other.derived(), func);
+}
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+EIGEN_MAKE_SCALAR_BINARY_OP(operator*,product)
+#else
+template<typename T>
+const CwiseBinaryOp<internal::scalar_product_op<Scalar,T>,Derived,Constant<T> > operator*(const T& scalar) const;
+template<typename T> friend
+const CwiseBinaryOp<internal::scalar_product_op<T,Scalar>,Constant<T>,Derived> operator*(const T& scalar, const StorageBaseType& expr);
+#endif
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(operator/,quotient)
+#else
+template<typename T>
+const CwiseBinaryOp<internal::scalar_quotient_op<Scalar,T>,Derived,Constant<T> > operator/(const T& scalar) const;
+#endif
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+inline const CwiseBinaryOp<internal::scalar_boolean_and_op, const Derived, const OtherDerived>
+operator&&(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+ EIGEN_STATIC_ASSERT((internal::is_same<bool,Scalar>::value && internal::is_same<bool,typename OtherDerived::Scalar>::value),
+ THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL);
+ return CwiseBinaryOp<internal::scalar_boolean_and_op, const Derived, const OtherDerived>(derived(),other.derived());
+}
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+inline const CwiseBinaryOp<internal::scalar_boolean_or_op, const Derived, const OtherDerived>
+operator||(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+ EIGEN_STATIC_ASSERT((internal::is_same<bool,Scalar>::value && internal::is_same<bool,typename OtherDerived::Scalar>::value),
+ THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL);
+ return CwiseBinaryOp<internal::scalar_boolean_or_op, const Derived, const OtherDerived>(derived(),other.derived());
+}
+// end # include "../plugins/CommonCwiseBinaryOps.h"
+// # include "../plugins/MatrixCwiseUnaryOps.h"
+typedef CwiseUnaryOp<internal::scalar_abs_op<Scalar>, const Derived> CwiseAbsReturnType;
+typedef CwiseUnaryOp<internal::scalar_abs2_op<Scalar>, const Derived> CwiseAbs2ReturnType;
+typedef CwiseUnaryOp<internal::scalar_sqrt_op<Scalar>, const Derived> CwiseSqrtReturnType;
+typedef CwiseUnaryOp<internal::scalar_sign_op<Scalar>, const Derived> CwiseSignReturnType;
+typedef CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const Derived> CwiseInverseReturnType;
+EIGEN_DOC_UNARY_ADDONS(cwiseAbs,absolute value)
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseAbsReturnType
+cwiseAbs() const { return CwiseAbsReturnType(derived()); }
+EIGEN_DOC_UNARY_ADDONS(cwiseAbs2,squared absolute value)
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseAbs2ReturnType
+cwiseAbs2() const { return CwiseAbs2ReturnType(derived()); }
+EIGEN_DOC_UNARY_ADDONS(cwiseSqrt,square-root)
+EIGEN_DEVICE_FUNC
+inline const CwiseSqrtReturnType
+cwiseSqrt() const { return CwiseSqrtReturnType(derived()); }
+EIGEN_DOC_UNARY_ADDONS(cwiseSign,sign function)
+EIGEN_DEVICE_FUNC
+inline const CwiseSignReturnType
+cwiseSign() const { return CwiseSignReturnType(derived()); }
+EIGEN_DOC_UNARY_ADDONS(cwiseInverse,inverse)
+EIGEN_DEVICE_FUNC
+inline const CwiseInverseReturnType
+cwiseInverse() const { return CwiseInverseReturnType(derived()); }
+// end # include "../plugins/MatrixCwiseUnaryOps.h"
+// # include "../plugins/MatrixCwiseBinaryOps.h"
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,product)
+cwiseProduct(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+ return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,product)(derived(), other.derived());
+}
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+inline const CwiseBinaryOp<std::equal_to<Scalar>, const Derived, const OtherDerived>
+cwiseEqual(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+ return CwiseBinaryOp<std::equal_to<Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
+}
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+inline const CwiseBinaryOp<std::not_equal_to<Scalar>, const Derived, const OtherDerived>
+cwiseNotEqual(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+ return CwiseBinaryOp<std::not_equal_to<Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
+}
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_min_op<Scalar,Scalar>, const Derived, const OtherDerived>
+cwiseMin(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+ return CwiseBinaryOp<internal::scalar_min_op<Scalar,Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
+}
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_min_op<Scalar,Scalar>, const Derived, const ConstantReturnType>
+cwiseMin(const Scalar &other) const
+{
+ return cwiseMin(Derived::Constant(rows(), cols(), other));
+}
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_max_op<Scalar,Scalar>, const Derived, const OtherDerived>
+cwiseMax(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+ return CwiseBinaryOp<internal::scalar_max_op<Scalar,Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
+}
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_max_op<Scalar,Scalar>, const Derived, const ConstantReturnType>
+cwiseMax(const Scalar &other) const
+{
+ return cwiseMax(Derived::Constant(rows(), cols(), other));
+}
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_quotient_op<Scalar>, const Derived, const OtherDerived>
+cwiseQuotient(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+ return CwiseBinaryOp<internal::scalar_quotient_op<Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
+}
+typedef CwiseBinaryOp<internal::scalar_cmp_op<Scalar,Scalar,internal::cmp_EQ>, const Derived, const ConstantReturnType> CwiseScalarEqualReturnType;
+EIGEN_DEVICE_FUNC
+inline const CwiseScalarEqualReturnType
+cwiseEqual(const Scalar& s) const
+{
+ return CwiseScalarEqualReturnType(derived(), Derived::Constant(rows(), cols(), s), internal::scalar_cmp_op<Scalar,Scalar,internal::cmp_EQ>());
+}
+// end # include "../plugins/MatrixCwiseBinaryOps.h"
+# ifdef EIGEN_MATRIXBASE_PLUGIN
+# include EIGEN_MATRIXBASE_PLUGIN
+# endif
+#undef EIGEN_CURRENT_STORAGE_BASE_CLASS
+#undef EIGEN_DOC_UNARY_ADDONS
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Derived& operator=(const MatrixBase& other);
+ template <typename OtherDerived>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Derived& operator=(const DenseBase<OtherDerived>& other);
+ template <typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ Derived& operator=(const EigenBase<OtherDerived>& other);
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ Derived& operator=(const ReturnByValue<OtherDerived>& other);
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Derived& operator+=(const MatrixBase<OtherDerived>& other);
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Derived& operator-=(const MatrixBase<OtherDerived>& other);
+#ifdef __CUDACC__
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ const Product<Derived,OtherDerived,LazyProduct>
+ operator*(const MatrixBase<OtherDerived> &other) const
+ { return this->lazyProduct(other); }
+#else
+ template<typename OtherDerived>
+ const Product<Derived,OtherDerived>
+ operator*(const MatrixBase<OtherDerived> &other) const;
+#endif
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ const Product<Derived,OtherDerived,LazyProduct>
+ lazyProduct(const MatrixBase<OtherDerived> &other) const;
+ template<typename OtherDerived>
+ Derived& operator*=(const EigenBase<OtherDerived>& other);
+ template<typename OtherDerived>
+ void applyOnTheLeft(const EigenBase<OtherDerived>& other);
+ template<typename OtherDerived>
+ void applyOnTheRight(const EigenBase<OtherDerived>& other);
+ template<typename DiagonalDerived>
+ EIGEN_DEVICE_FUNC
+ const Product<Derived, DiagonalDerived, LazyProduct>
+ operator*(const DiagonalBase<DiagonalDerived> &diagonal) const;
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
+ dot(const MatrixBase<OtherDerived>& other) const;
+ EIGEN_DEVICE_FUNC RealScalar squaredNorm() const;
+ EIGEN_DEVICE_FUNC RealScalar norm() const;
+ RealScalar stableNorm() const;
+ RealScalar blueNorm() const;
+ RealScalar hypotNorm() const;
+ EIGEN_DEVICE_FUNC const PlainObject normalized() const;
+ EIGEN_DEVICE_FUNC const PlainObject stableNormalized() const;
+ EIGEN_DEVICE_FUNC void normalize();
+ EIGEN_DEVICE_FUNC void stableNormalize();
+ EIGEN_DEVICE_FUNC const AdjointReturnType adjoint() const;
+ EIGEN_DEVICE_FUNC void adjointInPlace();
+ typedef Diagonal<Derived> DiagonalReturnType;
+ EIGEN_DEVICE_FUNC
+ DiagonalReturnType diagonal();
+ typedef typename internal::add_const<Diagonal<const Derived> >::type ConstDiagonalReturnType;
+ EIGEN_DEVICE_FUNC
+ ConstDiagonalReturnType diagonal() const;
+ template<int Index> struct DiagonalIndexReturnType { typedef Diagonal<Derived,Index> Type; };
+ template<int Index> struct ConstDiagonalIndexReturnType { typedef const Diagonal<const Derived,Index> Type; };
+ template<int Index>
+ EIGEN_DEVICE_FUNC
+ typename DiagonalIndexReturnType<Index>::Type diagonal();
+ template<int Index>
+ EIGEN_DEVICE_FUNC
+ typename ConstDiagonalIndexReturnType<Index>::Type diagonal() const;
+ typedef Diagonal<Derived,DynamicIndex> DiagonalDynamicIndexReturnType;
+ typedef typename internal::add_const<Diagonal<const Derived,DynamicIndex> >::type ConstDiagonalDynamicIndexReturnType;
+ EIGEN_DEVICE_FUNC
+ DiagonalDynamicIndexReturnType diagonal(Index index);
+ EIGEN_DEVICE_FUNC
+ ConstDiagonalDynamicIndexReturnType diagonal(Index index) const;
+ template<unsigned int Mode> struct TriangularViewReturnType { typedef TriangularView<Derived, Mode> Type; };
+ template<unsigned int Mode> struct ConstTriangularViewReturnType { typedef const TriangularView<const Derived, Mode> Type; };
+ template<unsigned int Mode>
+ EIGEN_DEVICE_FUNC
+ typename TriangularViewReturnType<Mode>::Type triangularView();
+ template<unsigned int Mode>
+ EIGEN_DEVICE_FUNC
+ typename ConstTriangularViewReturnType<Mode>::Type triangularView() const;
+ template<unsigned int UpLo> struct SelfAdjointViewReturnType { typedef SelfAdjointView<Derived, UpLo> Type; };
+ template<unsigned int UpLo> struct ConstSelfAdjointViewReturnType { typedef const SelfAdjointView<const Derived, UpLo> Type; };
+ template<unsigned int UpLo>
+ EIGEN_DEVICE_FUNC
+ typename SelfAdjointViewReturnType<UpLo>::Type selfadjointView();
+ template<unsigned int UpLo>
+ EIGEN_DEVICE_FUNC
+ typename ConstSelfAdjointViewReturnType<UpLo>::Type selfadjointView() const;
+ const SparseView<Derived> sparseView(const Scalar& m_reference = Scalar(0),
+ const typename NumTraits<Scalar>::Real& m_epsilon = NumTraits<Scalar>::dummy_precision()) const;
+ EIGEN_DEVICE_FUNC static const IdentityReturnType Identity();
+ EIGEN_DEVICE_FUNC static const IdentityReturnType Identity(Index rows, Index cols);
+ EIGEN_DEVICE_FUNC static const BasisReturnType Unit(Index size, Index i);
+ EIGEN_DEVICE_FUNC static const BasisReturnType Unit(Index i);
+ EIGEN_DEVICE_FUNC static const BasisReturnType UnitX();
+ EIGEN_DEVICE_FUNC static const BasisReturnType UnitY();
+ EIGEN_DEVICE_FUNC static const BasisReturnType UnitZ();
+ EIGEN_DEVICE_FUNC static const BasisReturnType UnitW();
+ EIGEN_DEVICE_FUNC
+ const DiagonalWrapper<const Derived> asDiagonal() const;
+ const PermutationWrapper<const Derived> asPermutation() const;
+ EIGEN_DEVICE_FUNC
+ Derived& setIdentity();
+ EIGEN_DEVICE_FUNC
+ Derived& setIdentity(Index rows, Index cols);
+ bool isIdentity(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+ bool isDiagonal(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+ bool isUpperTriangular(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+ bool isLowerTriangular(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+ template<typename OtherDerived>
+ bool isOrthogonal(const MatrixBase<OtherDerived>& other,
+ const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+ bool isUnitary(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC inline bool operator==(const MatrixBase<OtherDerived>& other) const
+ { return cwiseEqual(other).all(); }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC inline bool operator!=(const MatrixBase<OtherDerived>& other) const
+ { return cwiseNotEqual(other).any(); }
+ NoAlias<Derived,Eigen::MatrixBase > noalias();
+ inline const Derived& forceAlignedAccess() const { return derived(); }
+ inline Derived& forceAlignedAccess() { return derived(); }
+ template<bool Enable> inline const Derived& forceAlignedAccessIf() const { return derived(); }
+ template<bool Enable> inline Derived& forceAlignedAccessIf() { return derived(); }
+ EIGEN_DEVICE_FUNC Scalar trace() const;
+ template<int p> EIGEN_DEVICE_FUNC RealScalar lpNorm() const;
+ EIGEN_DEVICE_FUNC MatrixBase<Derived>& matrix() { return *this; }
+ EIGEN_DEVICE_FUNC const MatrixBase<Derived>& matrix() const { return *this; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ArrayWrapper<Derived> array() { return ArrayWrapper<Derived>(derived()); }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const ArrayWrapper<const Derived> array() const { return ArrayWrapper<const Derived>(derived()); }
+ inline const FullPivLU<PlainObject> fullPivLu() const;
+ inline const PartialPivLU<PlainObject> partialPivLu() const;
+ inline const PartialPivLU<PlainObject> lu() const;
+ inline const Inverse<Derived> inverse() const;
+ template<typename ResultType>
+ inline void computeInverseAndDetWithCheck(
+ ResultType& inverse,
+ typename ResultType::Scalar& determinant,
+ bool& invertible,
+ const RealScalar& absDeterminantThreshold = NumTraits<Scalar>::dummy_precision()
+ ) const;
+ template<typename ResultType>
+ inline void computeInverseWithCheck(
+ ResultType& inverse,
+ bool& invertible,
+ const RealScalar& absDeterminantThreshold = NumTraits<Scalar>::dummy_precision()
+ ) const;
+ Scalar determinant() const;
+ inline const LLT<PlainObject> llt() const;
+ inline const LDLT<PlainObject> ldlt() const;
+ inline const HouseholderQR<PlainObject> householderQr() const;
+ inline const ColPivHouseholderQR<PlainObject> colPivHouseholderQr() const;
+ inline const FullPivHouseholderQR<PlainObject> fullPivHouseholderQr() const;
+ inline const CompleteOrthogonalDecomposition<PlainObject> completeOrthogonalDecomposition() const;
+ inline EigenvaluesReturnType eigenvalues() const;
+ inline RealScalar operatorNorm() const;
+ inline JacobiSVD<PlainObject> jacobiSvd(unsigned int computationOptions = 0) const;
+ inline BDCSVD<PlainObject> bdcSvd(unsigned int computationOptions = 0) const;
+ #ifndef EIGEN_PARSED_BY_DOXYGEN
+ template<typename OtherDerived> struct cross_product_return_type {
+ typedef typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType Scalar;
+ typedef Matrix<Scalar,MatrixBase::RowsAtCompileTime,MatrixBase::ColsAtCompileTime> type;
+ };
+ #endif
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+ inline typename cross_product_return_type<OtherDerived>::type
+#else
+ inline PlainObject
+#endif
+ cross(const MatrixBase<OtherDerived>& other) const;
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ inline PlainObject cross3(const MatrixBase<OtherDerived>& other) const;
+ EIGEN_DEVICE_FUNC
+ inline PlainObject unitOrthogonal(void) const;
+ EIGEN_DEVICE_FUNC
+ inline Matrix<Scalar,3,1> eulerAngles(Index a0, Index a1, Index a2) const;
+ enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1&&RowsAtCompileTime==1 ? ((internal::traits<Derived>::Flags&RowMajorBit)==RowMajorBit ? Horizontal : Vertical)
+ : ColsAtCompileTime==1 ? Vertical : Horizontal };
+ typedef Homogeneous<Derived, HomogeneousReturnTypeDirection> HomogeneousReturnType;
+ EIGEN_DEVICE_FUNC
+ inline HomogeneousReturnType homogeneous() const;
+ enum {
+ SizeMinusOne = SizeAtCompileTime==Dynamic ? Dynamic : SizeAtCompileTime-1
+ };
+ typedef Block<const Derived,
+ internal::traits<Derived>::ColsAtCompileTime==1 ? SizeMinusOne : 1,
+ internal::traits<Derived>::ColsAtCompileTime==1 ? 1 : SizeMinusOne> ConstStartMinusOne;
+ typedef EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(ConstStartMinusOne,Scalar,quotient) HNormalizedReturnType;
+ EIGEN_DEVICE_FUNC
+ inline const HNormalizedReturnType hnormalized() const;
+ void makeHouseholderInPlace(Scalar& tau, RealScalar& beta);
+ template<typename EssentialPart>
+ void makeHouseholder(EssentialPart& essential,
+ Scalar& tau, RealScalar& beta) const;
+ template<typename EssentialPart>
+ void applyHouseholderOnTheLeft(const EssentialPart& essential,
+ const Scalar& tau,
+ Scalar* workspace);
+ template<typename EssentialPart>
+ void applyHouseholderOnTheRight(const EssentialPart& essential,
+ const Scalar& tau,
+ Scalar* workspace);
+ template<typename OtherScalar>
+ void applyOnTheLeft(Index p, Index q, const JacobiRotation<OtherScalar>& j);
+ template<typename OtherScalar>
+ void applyOnTheRight(Index p, Index q, const JacobiRotation<OtherScalar>& j);
+ template<typename OtherDerived>
+ EIGEN_STRONG_INLINE const typename SparseMatrixBase<OtherDerived>::template CwiseProductDenseReturnType<Derived>::Type
+ cwiseProduct(const SparseMatrixBase<OtherDerived> &other) const
+ {
+ return other.cwiseProduct(derived());
+ }
+ typedef typename internal::stem_function<Scalar>::type StemFunction;
+ const MatrixExponentialReturnValue<Derived> exp() const;
+ const MatrixFunctionReturnValue<Derived> matrixFunction(StemFunction f) const;
+ const MatrixFunctionReturnValue<Derived> cosh() const;
+ const MatrixFunctionReturnValue<Derived> sinh() const;
+ const MatrixFunctionReturnValue<Derived> cos() const;
+ const MatrixFunctionReturnValue<Derived> sin() const;
+ const MatrixSquareRootReturnValue<Derived> sqrt() const;
+ const MatrixLogarithmReturnValue<Derived> log() const;
+ const MatrixPowerReturnValue<Derived> pow(const RealScalar& p) const;
+ const MatrixComplexPowerReturnValue<Derived> pow(const std::complex<RealScalar>& p) const;
+ protected:
+ EIGEN_DEVICE_FUNC MatrixBase() : Base() {}
+ private:
+ EIGEN_DEVICE_FUNC explicit MatrixBase(int);
+ EIGEN_DEVICE_FUNC MatrixBase(int,int);
+ template<typename OtherDerived> EIGEN_DEVICE_FUNC explicit MatrixBase(const MatrixBase<OtherDerived>&);
+ protected:
+ template<typename OtherDerived> Derived& operator+=(const ArrayBase<OtherDerived>& )
+ {EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}
+ template<typename OtherDerived> Derived& operator-=(const ArrayBase<OtherDerived>& )
+ {EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}
+};
+template<typename Derived>
+template<typename OtherDerived>
+inline Derived&
+MatrixBase<Derived>::operator*=(const EigenBase<OtherDerived> &other)
+{
+ other.derived().applyThisOnTheRight(derived());
+ return derived();
+}
+template<typename Derived>
+template<typename OtherDerived>
+inline void MatrixBase<Derived>::applyOnTheRight(const EigenBase<OtherDerived> &other)
+{
+ other.derived().applyThisOnTheRight(derived());
+}
+template<typename Derived>
+template<typename OtherDerived>
+inline void MatrixBase<Derived>::applyOnTheLeft(const EigenBase<OtherDerived> &other)
+{
+ other.derived().applyThisOnTheLeft(derived());
+}
+}
+#endif
+// end #include "src/Core/MatrixBase.h"
+// #include "src/Core/EigenBase.h"
+#ifndef EIGEN_EIGENBASE_H
+#define EIGEN_EIGENBASE_H
+namespace Eigen {
+template<typename Derived> struct EigenBase
+{
+ typedef Eigen::Index Index;
+ typedef typename internal::traits<Derived>::StorageKind StorageKind;
+ EIGEN_DEVICE_FUNC
+ Derived& derived() { return *static_cast<Derived*>(this); }
+ EIGEN_DEVICE_FUNC
+ const Derived& derived() const { return *static_cast<const Derived*>(this); }
+ EIGEN_DEVICE_FUNC
+ inline Derived& const_cast_derived() const
+ { return *static_cast<Derived*>(const_cast<EigenBase*>(this)); }
+ EIGEN_DEVICE_FUNC
+ inline const Derived& const_derived() const
+ { return *static_cast<const Derived*>(this); }
+ EIGEN_DEVICE_FUNC
+ inline Index rows() const { return derived().rows(); }
+ EIGEN_DEVICE_FUNC
+ inline Index cols() const { return derived().cols(); }
+ EIGEN_DEVICE_FUNC
+ inline Index size() const { return rows() * cols(); }
+ template<typename Dest>
+ EIGEN_DEVICE_FUNC
+ inline void evalTo(Dest& dst) const
+ { derived().evalTo(dst); }
+ template<typename Dest>
+ EIGEN_DEVICE_FUNC
+ inline void addTo(Dest& dst) const
+ {
+ typename Dest::PlainObject res(rows(),cols());
+ evalTo(res);
+ dst += res;
+ }
+ template<typename Dest>
+ EIGEN_DEVICE_FUNC
+ inline void subTo(Dest& dst) const
+ {
+ typename Dest::PlainObject res(rows(),cols());
+ evalTo(res);
+ dst -= res;
+ }
+ template<typename Dest>
+ EIGEN_DEVICE_FUNC inline void applyThisOnTheRight(Dest& dst) const
+ {
+ dst = dst * this->derived();
+ }
+ template<typename Dest>
+ EIGEN_DEVICE_FUNC inline void applyThisOnTheLeft(Dest& dst) const
+ {
+ dst = this->derived() * dst;
+ }
+};
+template<typename Derived>
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+Derived& DenseBase<Derived>::operator=(const EigenBase<OtherDerived> &other)
+{
+ call_assignment(derived(), other.derived());
+ return derived();
+}
+template<typename Derived>
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+Derived& DenseBase<Derived>::operator+=(const EigenBase<OtherDerived> &other)
+{
+ call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
+ return derived();
+}
+template<typename Derived>
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+Derived& DenseBase<Derived>::operator-=(const EigenBase<OtherDerived> &other)
+{
+ call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());
+ return derived();
+}
+}
+#endif
+// end #include "src/Core/EigenBase.h"
+// #include "src/Core/Product.h"
+#ifndef EIGEN_PRODUCT_H
+#define EIGEN_PRODUCT_H
+namespace Eigen {
+template<typename Lhs, typename Rhs, int Option, typename StorageKind> class ProductImpl;
+namespace internal {
+template<typename Lhs, typename Rhs, int Option>
+struct traits<Product<Lhs, Rhs, Option> >
+{
+ typedef typename remove_all<Lhs>::type LhsCleaned;
+ typedef typename remove_all<Rhs>::type RhsCleaned;
+ typedef traits<LhsCleaned> LhsTraits;
+ typedef traits<RhsCleaned> RhsTraits;
+ typedef MatrixXpr XprKind;
+ typedef typename ScalarBinaryOpTraits<typename traits<LhsCleaned>::Scalar, typename traits<RhsCleaned>::Scalar>::ReturnType Scalar;
+ typedef typename product_promote_storage_type<typename LhsTraits::StorageKind,
+ typename RhsTraits::StorageKind,
+ internal::product_type<Lhs,Rhs>::ret>::ret StorageKind;
+ typedef typename promote_index_type<typename LhsTraits::StorageIndex,
+ typename RhsTraits::StorageIndex>::type StorageIndex;
+ enum {
+ RowsAtCompileTime = LhsTraits::RowsAtCompileTime,
+ ColsAtCompileTime = RhsTraits::ColsAtCompileTime,
+ MaxRowsAtCompileTime = LhsTraits::MaxRowsAtCompileTime,
+ MaxColsAtCompileTime = RhsTraits::MaxColsAtCompileTime,
+ InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsTraits::ColsAtCompileTime, RhsTraits::RowsAtCompileTime),
+ Flags = (MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1) ? RowMajorBit
+ : (MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1) ? 0
+ : ( ((LhsTraits::Flags&NoPreferredStorageOrderBit) && (RhsTraits::Flags&RowMajorBit))
+ || ((RhsTraits::Flags&NoPreferredStorageOrderBit) && (LhsTraits::Flags&RowMajorBit)) ) ? RowMajorBit
+ : NoPreferredStorageOrderBit
+ };
+};
+}
+template<typename _Lhs, typename _Rhs, int Option>
+class Product : public ProductImpl<_Lhs,_Rhs,Option,
+ typename internal::product_promote_storage_type<typename internal::traits<_Lhs>::StorageKind,
+ typename internal::traits<_Rhs>::StorageKind,
+ internal::product_type<_Lhs,_Rhs>::ret>::ret>
+{
+ public:
+ typedef _Lhs Lhs;
+ typedef _Rhs Rhs;
+ typedef typename ProductImpl<
+ Lhs, Rhs, Option,
+ typename internal::product_promote_storage_type<typename internal::traits<Lhs>::StorageKind,
+ typename internal::traits<Rhs>::StorageKind,
+ internal::product_type<Lhs,Rhs>::ret>::ret>::Base Base;
+ EIGEN_GENERIC_PUBLIC_INTERFACE(Product)
+ typedef typename internal::ref_selector<Lhs>::type LhsNested;
+ typedef typename internal::ref_selector<Rhs>::type RhsNested;
+ typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
+ typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
+ EIGEN_DEVICE_FUNC Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs)
+ {
+ eigen_assert(lhs.cols() == rhs.rows()
+ && "invalid matrix product"
+ && "if you wanted a coeff-wise or a dot product use the respective explicit functions");
+ }
+ EIGEN_DEVICE_FUNC inline Index rows() const { return m_lhs.rows(); }
+ EIGEN_DEVICE_FUNC inline Index cols() const { return m_rhs.cols(); }
+ EIGEN_DEVICE_FUNC const LhsNestedCleaned& lhs() const { return m_lhs; }
+ EIGEN_DEVICE_FUNC const RhsNestedCleaned& rhs() const { return m_rhs; }
+ protected:
+ LhsNested m_lhs;
+ RhsNested m_rhs;
+};
+namespace internal {
+template<typename Lhs, typename Rhs, int Option, int ProductTag = internal::product_type<Lhs,Rhs>::ret>
+class dense_product_base
+ : public internal::dense_xpr_base<Product<Lhs,Rhs,Option> >::type
+{};
+template<typename Lhs, typename Rhs, int Option>
+class dense_product_base<Lhs, Rhs, Option, InnerProduct>
+ : public internal::dense_xpr_base<Product<Lhs,Rhs,Option> >::type
+{
+ typedef Product<Lhs,Rhs,Option> ProductXpr;
+ typedef typename internal::dense_xpr_base<ProductXpr>::type Base;
+public:
+ using Base::derived;
+ typedef typename Base::Scalar Scalar;
+ operator const Scalar() const
+ {
+ return internal::evaluator<ProductXpr>(derived()).coeff(0,0);
+ }
+};
+}
+template<typename Lhs, typename Rhs, int Option, typename StorageKind>
+class ProductImpl : public internal::generic_xpr_base<Product<Lhs,Rhs,Option>, MatrixXpr, StorageKind>::type
+{
+ public:
+ typedef typename internal::generic_xpr_base<Product<Lhs,Rhs,Option>, MatrixXpr, StorageKind>::type Base;
+};
+template<typename Lhs, typename Rhs, int Option>
+class ProductImpl<Lhs,Rhs,Option,Dense>
+ : public internal::dense_product_base<Lhs,Rhs,Option>
+{
+ typedef Product<Lhs, Rhs, Option> Derived;
+ public:
+ typedef typename internal::dense_product_base<Lhs, Rhs, Option> Base;
+ EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
+ protected:
+ enum {
+ IsOneByOne = (RowsAtCompileTime == 1 || RowsAtCompileTime == Dynamic) &&
+ (ColsAtCompileTime == 1 || ColsAtCompileTime == Dynamic),
+ EnableCoeff = IsOneByOne || Option==LazyProduct
+ };
+ public:
+ EIGEN_DEVICE_FUNC Scalar coeff(Index row, Index col) const
+ {
+ EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS);
+ eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) );
+ return internal::evaluator<Derived>(derived()).coeff(row,col);
+ }
+ EIGEN_DEVICE_FUNC Scalar coeff(Index i) const
+ {
+ EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS);
+ eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) );
+ return internal::evaluator<Derived>(derived()).coeff(i);
+ }
+};
+}
+#endif
+// end #include "src/Core/Product.h"
+// #include "src/Core/CoreEvaluators.h"
+#ifndef EIGEN_COREEVALUATORS_H
+#define EIGEN_COREEVALUATORS_H
+namespace Eigen {
+namespace internal {
+template<typename StorageKind>
+struct storage_kind_to_evaluator_kind {
+ typedef IndexBased Kind;
+};
+template<typename StorageKind> struct storage_kind_to_shape;
+template<> struct storage_kind_to_shape<Dense> { typedef DenseShape Shape; };
+template<> struct storage_kind_to_shape<SolverStorage> { typedef SolverShape Shape; };
+template<> struct storage_kind_to_shape<PermutationStorage> { typedef PermutationShape Shape; };
+template<> struct storage_kind_to_shape<TranspositionsStorage> { typedef TranspositionsShape Shape; };
+template< typename T,
+ typename Arg1Kind = typename evaluator_traits<typename T::Arg1>::Kind,
+ typename Arg2Kind = typename evaluator_traits<typename T::Arg2>::Kind,
+ typename Arg3Kind = typename evaluator_traits<typename T::Arg3>::Kind,
+ typename Arg1Scalar = typename traits<typename T::Arg1>::Scalar,
+ typename Arg2Scalar = typename traits<typename T::Arg2>::Scalar,
+ typename Arg3Scalar = typename traits<typename T::Arg3>::Scalar> struct ternary_evaluator;
+template< typename T,
+ typename LhsKind = typename evaluator_traits<typename T::Lhs>::Kind,
+ typename RhsKind = typename evaluator_traits<typename T::Rhs>::Kind,
+ typename LhsScalar = typename traits<typename T::Lhs>::Scalar,
+ typename RhsScalar = typename traits<typename T::Rhs>::Scalar> struct binary_evaluator;
+template< typename T,
+ typename Kind = typename evaluator_traits<typename T::NestedExpression>::Kind,
+ typename Scalar = typename T::Scalar> struct unary_evaluator;
+template<typename T>
+struct evaluator_traits_base
+{
+ typedef typename storage_kind_to_evaluator_kind<typename traits<T>::StorageKind>::Kind Kind;
+ typedef typename storage_kind_to_shape<typename traits<T>::StorageKind>::Shape Shape;
+};
+template<typename T>
+struct evaluator_traits : public evaluator_traits_base<T>
+{
+};
+template<typename T, typename Shape = typename evaluator_traits<T>::Shape >
+struct evaluator_assume_aliasing {
+ static const bool value = false;
+};
+template<typename T>
+struct evaluator : public unary_evaluator<T>
+{
+ typedef unary_evaluator<T> Base;
+ EIGEN_DEVICE_FUNC explicit evaluator(const T& xpr) : Base(xpr) {}
+};
+template<typename T>
+struct evaluator<const T>
+ : evaluator<T>
+{
+ EIGEN_DEVICE_FUNC
+ explicit evaluator(const T& xpr) : evaluator<T>(xpr) {}
+};
+template<typename ExpressionType>
+struct evaluator_base : public noncopyable
+{
+ typedef traits<ExpressionType> ExpressionTraits;
+ enum {
+ Alignment = 0
+ };
+};
+template<typename Derived>
+struct evaluator<PlainObjectBase<Derived> >
+ : evaluator_base<Derived>
+{
+ typedef PlainObjectBase<Derived> PlainObjectType;
+ typedef typename PlainObjectType::Scalar Scalar;
+ typedef typename PlainObjectType::CoeffReturnType CoeffReturnType;
+ enum {
+ IsRowMajor = PlainObjectType::IsRowMajor,
+ IsVectorAtCompileTime = PlainObjectType::IsVectorAtCompileTime,
+ RowsAtCompileTime = PlainObjectType::RowsAtCompileTime,
+ ColsAtCompileTime = PlainObjectType::ColsAtCompileTime,
+ CoeffReadCost = NumTraits<Scalar>::ReadCost,
+ Flags = traits<Derived>::EvaluatorFlags,
+ Alignment = traits<Derived>::Alignment
+ };
+ EIGEN_DEVICE_FUNC evaluator()
+ : m_data(0),
+ m_outerStride(IsVectorAtCompileTime ? 0
+ : int(IsRowMajor) ? ColsAtCompileTime
+ : RowsAtCompileTime)
+ {
+ EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+ }
+ EIGEN_DEVICE_FUNC explicit evaluator(const PlainObjectType& m)
+ : m_data(m.data()), m_outerStride(IsVectorAtCompileTime ? 0 : m.outerStride())
+ {
+ EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index row, Index col) const
+ {
+ if (IsRowMajor)
+ return m_data[row * m_outerStride.value() + col];
+ else
+ return m_data[row + col * m_outerStride.value()];
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index index) const
+ {
+ return m_data[index];
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Scalar& coeffRef(Index row, Index col)
+ {
+ if (IsRowMajor)
+ return const_cast<Scalar*>(m_data)[row * m_outerStride.value() + col];
+ else
+ return const_cast<Scalar*>(m_data)[row + col * m_outerStride.value()];
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Scalar& coeffRef(Index index)
+ {
+ return const_cast<Scalar*>(m_data)[index];
+ }
+ template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ PacketType packet(Index row, Index col) const
+ {
+ if (IsRowMajor)
+ return ploadt<PacketType, LoadMode>(m_data + row * m_outerStride.value() + col);
+ else
+ return ploadt<PacketType, LoadMode>(m_data + row + col * m_outerStride.value());
+ }
+ template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ PacketType packet(Index index) const
+ {
+ return ploadt<PacketType, LoadMode>(m_data + index);
+ }
+ template<int StoreMode,typename PacketType>
+ EIGEN_STRONG_INLINE
+ void writePacket(Index row, Index col, const PacketType& x)
+ {
+ if (IsRowMajor)
+ return pstoret<Scalar, PacketType, StoreMode>
+ (const_cast<Scalar*>(m_data) + row * m_outerStride.value() + col, x);
+ else
+ return pstoret<Scalar, PacketType, StoreMode>
+ (const_cast<Scalar*>(m_data) + row + col * m_outerStride.value(), x);
+ }
+ template<int StoreMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ void writePacket(Index index, const PacketType& x)
+ {
+ return pstoret<Scalar, PacketType, StoreMode>(const_cast<Scalar*>(m_data) + index, x);
+ }
+protected:
+ const Scalar *m_data;
+ variable_if_dynamic<Index, IsVectorAtCompileTime ? 0
+ : int(IsRowMajor) ? ColsAtCompileTime
+ : RowsAtCompileTime> m_outerStride;
+};
+template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
+struct evaluator<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
+ : evaluator<PlainObjectBase<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > >
+{
+ typedef Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> XprType;
+ EIGEN_DEVICE_FUNC evaluator() {}
+ EIGEN_DEVICE_FUNC explicit evaluator(const XprType& m)
+ : evaluator<PlainObjectBase<XprType> >(m)
+ { }
+};
+template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
+struct evaluator<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
+ : evaluator<PlainObjectBase<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > >
+{
+ typedef Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> XprType;
+ EIGEN_DEVICE_FUNC evaluator() {}
+ EIGEN_DEVICE_FUNC explicit evaluator(const XprType& m)
+ : evaluator<PlainObjectBase<XprType> >(m)
+ { }
+};
+template<typename ArgType>
+struct unary_evaluator<Transpose<ArgType>, IndexBased>
+ : evaluator_base<Transpose<ArgType> >
+{
+ typedef Transpose<ArgType> XprType;
+ enum {
+ CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
+ Flags = evaluator<ArgType>::Flags ^ RowMajorBit,
+ Alignment = evaluator<ArgType>::Alignment
+ };
+ EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& t) : m_argImpl(t.nestedExpression()) {}
+ typedef typename XprType::Scalar Scalar;
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index row, Index col) const
+ {
+ return m_argImpl.coeff(col, row);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index index) const
+ {
+ return m_argImpl.coeff(index);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Scalar& coeffRef(Index row, Index col)
+ {
+ return m_argImpl.coeffRef(col, row);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ typename XprType::Scalar& coeffRef(Index index)
+ {
+ return m_argImpl.coeffRef(index);
+ }
+ template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ PacketType packet(Index row, Index col) const
+ {
+ return m_argImpl.template packet<LoadMode,PacketType>(col, row);
+ }
+ template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ PacketType packet(Index index) const
+ {
+ return m_argImpl.template packet<LoadMode,PacketType>(index);
+ }
+ template<int StoreMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ void writePacket(Index row, Index col, const PacketType& x)
+ {
+ m_argImpl.template writePacket<StoreMode,PacketType>(col, row, x);
+ }
+ template<int StoreMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ void writePacket(Index index, const PacketType& x)
+ {
+ m_argImpl.template writePacket<StoreMode,PacketType>(index, x);
+ }
+protected:
+ evaluator<ArgType> m_argImpl;
+};
+template<typename Scalar,typename NullaryOp,
+ bool has_nullary = has_nullary_operator<NullaryOp>::value,
+ bool has_unary = has_unary_operator<NullaryOp>::value,
+ bool has_binary = has_binary_operator<NullaryOp>::value>
+struct nullary_wrapper
+{
+ template <typename IndexType>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j) const { return op(i,j); }
+ template <typename IndexType>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i) const { return op(i); }
+ template <typename T, typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j) const { return op.template packetOp<T>(i,j); }
+ template <typename T, typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i) const { return op.template packetOp<T>(i); }
+};
+template<typename Scalar,typename NullaryOp>
+struct nullary_wrapper<Scalar,NullaryOp,true,false,false>
+{
+ template <typename IndexType>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType=0, IndexType=0) const { return op(); }
+ template <typename T, typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType=0, IndexType=0) const { return op.template packetOp<T>(); }
+};
+template<typename Scalar,typename NullaryOp>
+struct nullary_wrapper<Scalar,NullaryOp,false,false,true>
+{
+ template <typename IndexType>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j=0) const { return op(i,j); }
+ template <typename T, typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j=0) const { return op.template packetOp<T>(i,j); }
+};
+template<typename Scalar,typename NullaryOp>
+struct nullary_wrapper<Scalar,NullaryOp,false,true,false>
+{
+ template <typename IndexType>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j) const {
+ eigen_assert(i==0 || j==0);
+ return op(i+j);
+ }
+ template <typename T, typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j) const {
+ eigen_assert(i==0 || j==0);
+ return op.template packetOp<T>(i+j);
+ }
+ template <typename IndexType>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i) const { return op(i); }
+ template <typename T, typename IndexType>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i) const { return op.template packetOp<T>(i); }
+};
+template<typename Scalar,typename NullaryOp>
+struct nullary_wrapper<Scalar,NullaryOp,false,false,false> {};
+#if 0 && EIGEN_COMP_MSVC>0
+template<typename T> struct nullary_wrapper_workaround_msvc {
+ nullary_wrapper_workaround_msvc(const T&);
+ operator T()const;
+};
+template<typename Scalar,typename NullaryOp>
+struct nullary_wrapper<Scalar,NullaryOp,true,true,true>
+{
+ template <typename IndexType>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j) const {
+ return nullary_wrapper<Scalar,NullaryOp,
+ has_nullary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
+ has_unary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
+ has_binary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value>().operator()(op,i,j);
+ }
+ template <typename IndexType>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i) const {
+ return nullary_wrapper<Scalar,NullaryOp,
+ has_nullary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
+ has_unary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
+ has_binary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value>().operator()(op,i);
+ }
+ template <typename T, typename IndexType>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j) const {
+ return nullary_wrapper<Scalar,NullaryOp,
+ has_nullary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
+ has_unary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
+ has_binary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value>().template packetOp<T>(op,i,j);
+ }
+ template <typename T, typename IndexType>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i) const {
+ return nullary_wrapper<Scalar,NullaryOp,
+ has_nullary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
+ has_unary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
+ has_binary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value>().template packetOp<T>(op,i);
+ }
+};
+#endif
+template<typename NullaryOp, typename PlainObjectType>
+struct evaluator<CwiseNullaryOp<NullaryOp,PlainObjectType> >
+ : evaluator_base<CwiseNullaryOp<NullaryOp,PlainObjectType> >
+{
+ typedef CwiseNullaryOp<NullaryOp,PlainObjectType> XprType;
+ typedef typename internal::remove_all<PlainObjectType>::type PlainObjectTypeCleaned;
+ enum {
+ CoeffReadCost = internal::functor_traits<NullaryOp>::Cost,
+ Flags = (evaluator<PlainObjectTypeCleaned>::Flags
+ & ( HereditaryBits
+ | (functor_has_linear_access<NullaryOp>::ret ? LinearAccessBit : 0)
+ | (functor_traits<NullaryOp>::PacketAccess ? PacketAccessBit : 0)))
+ | (functor_traits<NullaryOp>::IsRepeatable ? 0 : EvalBeforeNestingBit),
+ Alignment = AlignedMax
+ };
+ EIGEN_DEVICE_FUNC explicit evaluator(const XprType& n)
+ : m_functor(n.functor()), m_wrapper()
+ {
+ EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+ }
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
+ template <typename IndexType>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(IndexType row, IndexType col) const
+ {
+ return m_wrapper(m_functor, row, col);
+ }
+ template <typename IndexType>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(IndexType index) const
+ {
+ return m_wrapper(m_functor,index);
+ }
+ template<int LoadMode, typename PacketType, typename IndexType>
+ EIGEN_STRONG_INLINE
+ PacketType packet(IndexType row, IndexType col) const
+ {
+ return m_wrapper.template packetOp<PacketType>(m_functor, row, col);
+ }
+ template<int LoadMode, typename PacketType, typename IndexType>
+ EIGEN_STRONG_INLINE
+ PacketType packet(IndexType index) const
+ {
+ return m_wrapper.template packetOp<PacketType>(m_functor, index);
+ }
+protected:
+ const NullaryOp m_functor;
+ const internal::nullary_wrapper<CoeffReturnType,NullaryOp> m_wrapper;
+};
+template<typename UnaryOp, typename ArgType>
+struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased >
+ : evaluator_base<CwiseUnaryOp<UnaryOp, ArgType> >
+{
+ typedef CwiseUnaryOp<UnaryOp, ArgType> XprType;
+ enum {
+ CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost,
+ Flags = evaluator<ArgType>::Flags
+ & (HereditaryBits | LinearAccessBit | (functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0)),
+ Alignment = evaluator<ArgType>::Alignment
+ };
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ explicit unary_evaluator(const XprType& op)
+ : m_functor(op.functor()),
+ m_argImpl(op.nestedExpression())
+ {
+ EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<UnaryOp>::Cost);
+ EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+ }
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index row, Index col) const
+ {
+ return m_functor(m_argImpl.coeff(row, col));
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index index) const
+ {
+ return m_functor(m_argImpl.coeff(index));
+ }
+ template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ PacketType packet(Index row, Index col) const
+ {
+ return m_functor.packetOp(m_argImpl.template packet<LoadMode, PacketType>(row, col));
+ }
+ template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ PacketType packet(Index index) const
+ {
+ return m_functor.packetOp(m_argImpl.template packet<LoadMode, PacketType>(index));
+ }
+protected:
+ const UnaryOp m_functor;
+ evaluator<ArgType> m_argImpl;
+};
+template<typename TernaryOp, typename Arg1, typename Arg2, typename Arg3>
+struct evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> >
+ : public ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> >
+{
+ typedef CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> XprType;
+ typedef ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> > Base;
+ EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {}
+};
+template<typename TernaryOp, typename Arg1, typename Arg2, typename Arg3>
+struct ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3>, IndexBased, IndexBased>
+ : evaluator_base<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> >
+{
+ typedef CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> XprType;
+ enum {
+ CoeffReadCost = evaluator<Arg1>::CoeffReadCost + evaluator<Arg2>::CoeffReadCost + evaluator<Arg3>::CoeffReadCost + functor_traits<TernaryOp>::Cost,
+ Arg1Flags = evaluator<Arg1>::Flags,
+ Arg2Flags = evaluator<Arg2>::Flags,
+ Arg3Flags = evaluator<Arg3>::Flags,
+ SameType = is_same<typename Arg1::Scalar,typename Arg2::Scalar>::value && is_same<typename Arg1::Scalar,typename Arg3::Scalar>::value,
+ StorageOrdersAgree = (int(Arg1Flags)&RowMajorBit)==(int(Arg2Flags)&RowMajorBit) && (int(Arg1Flags)&RowMajorBit)==(int(Arg3Flags)&RowMajorBit),
+ Flags0 = (int(Arg1Flags) | int(Arg2Flags) | int(Arg3Flags)) & (
+ HereditaryBits
+ | (int(Arg1Flags) & int(Arg2Flags) & int(Arg3Flags) &
+ ( (StorageOrdersAgree ? LinearAccessBit : 0)
+ | (functor_traits<TernaryOp>::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0)
+ )
+ )
+ ),
+ Flags = (Flags0 & ~RowMajorBit) | (Arg1Flags & RowMajorBit),
+ Alignment = EIGEN_PLAIN_ENUM_MIN(
+ EIGEN_PLAIN_ENUM_MIN(evaluator<Arg1>::Alignment, evaluator<Arg2>::Alignment),
+ evaluator<Arg3>::Alignment)
+ };
+ EIGEN_DEVICE_FUNC explicit ternary_evaluator(const XprType& xpr)
+ : m_functor(xpr.functor()),
+ m_arg1Impl(xpr.arg1()),
+ m_arg2Impl(xpr.arg2()),
+ m_arg3Impl(xpr.arg3())
+ {
+ EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<TernaryOp>::Cost);
+ EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+ }
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index row, Index col) const
+ {
+ return m_functor(m_arg1Impl.coeff(row, col), m_arg2Impl.coeff(row, col), m_arg3Impl.coeff(row, col));
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index index) const
+ {
+ return m_functor(m_arg1Impl.coeff(index), m_arg2Impl.coeff(index), m_arg3Impl.coeff(index));
+ }
+ template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ PacketType packet(Index row, Index col) const
+ {
+ return m_functor.packetOp(m_arg1Impl.template packet<LoadMode,PacketType>(row, col),
+ m_arg2Impl.template packet<LoadMode,PacketType>(row, col),
+ m_arg3Impl.template packet<LoadMode,PacketType>(row, col));
+ }
+ template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ PacketType packet(Index index) const
+ {
+ return m_functor.packetOp(m_arg1Impl.template packet<LoadMode,PacketType>(index),
+ m_arg2Impl.template packet<LoadMode,PacketType>(index),
+ m_arg3Impl.template packet<LoadMode,PacketType>(index));
+ }
+protected:
+ const TernaryOp m_functor;
+ evaluator<Arg1> m_arg1Impl;
+ evaluator<Arg2> m_arg2Impl;
+ evaluator<Arg3> m_arg3Impl;
+};
+template<typename BinaryOp, typename Lhs, typename Rhs>
+struct evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
+ : public binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
+{
+ typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;
+ typedef binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > Base;
+ EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {}
+};
+template<typename BinaryOp, typename Lhs, typename Rhs>
+struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBased>
+ : evaluator_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
+{
+ typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;
+ enum {
+ CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
+ LhsFlags = evaluator<Lhs>::Flags,
+ RhsFlags = evaluator<Rhs>::Flags,
+ SameType = is_same<typename Lhs::Scalar,typename Rhs::Scalar>::value,
+ StorageOrdersAgree = (int(LhsFlags)&RowMajorBit)==(int(RhsFlags)&RowMajorBit),
+ Flags0 = (int(LhsFlags) | int(RhsFlags)) & (
+ HereditaryBits
+ | (int(LhsFlags) & int(RhsFlags) &
+ ( (StorageOrdersAgree ? LinearAccessBit : 0)
+ | (functor_traits<BinaryOp>::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0)
+ )
+ )
+ ),
+ Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit),
+ Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<Lhs>::Alignment,evaluator<Rhs>::Alignment)
+ };
+ EIGEN_DEVICE_FUNC explicit binary_evaluator(const XprType& xpr)
+ : m_functor(xpr.functor()),
+ m_lhsImpl(xpr.lhs()),
+ m_rhsImpl(xpr.rhs())
+ {
+ EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<BinaryOp>::Cost);
+ EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+ }
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index row, Index col) const
+ {
+ return m_functor(m_lhsImpl.coeff(row, col), m_rhsImpl.coeff(row, col));
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index index) const
+ {
+ return m_functor(m_lhsImpl.coeff(index), m_rhsImpl.coeff(index));
+ }
+ template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ PacketType packet(Index row, Index col) const
+ {
+ return m_functor.packetOp(m_lhsImpl.template packet<LoadMode,PacketType>(row, col),
+ m_rhsImpl.template packet<LoadMode,PacketType>(row, col));
+ }
+ template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ PacketType packet(Index index) const
+ {
+ return m_functor.packetOp(m_lhsImpl.template packet<LoadMode,PacketType>(index),
+ m_rhsImpl.template packet<LoadMode,PacketType>(index));
+ }
+protected:
+ const BinaryOp m_functor;
+ evaluator<Lhs> m_lhsImpl;
+ evaluator<Rhs> m_rhsImpl;
+};
+template<typename UnaryOp, typename ArgType>
+struct unary_evaluator<CwiseUnaryView<UnaryOp, ArgType>, IndexBased>
+ : evaluator_base<CwiseUnaryView<UnaryOp, ArgType> >
+{
+ typedef CwiseUnaryView<UnaryOp, ArgType> XprType;
+ enum {
+ CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost,
+ Flags = (evaluator<ArgType>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit)),
+ Alignment = 0
+ };
+ EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op)
+ : m_unaryOp(op.functor()),
+ m_argImpl(op.nestedExpression())
+ {
+ EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<UnaryOp>::Cost);
+ EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+ }
+ typedef typename XprType::Scalar Scalar;
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index row, Index col) const
+ {
+ return m_unaryOp(m_argImpl.coeff(row, col));
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index index) const
+ {
+ return m_unaryOp(m_argImpl.coeff(index));
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Scalar& coeffRef(Index row, Index col)
+ {
+ return m_unaryOp(m_argImpl.coeffRef(row, col));
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Scalar& coeffRef(Index index)
+ {
+ return m_unaryOp(m_argImpl.coeffRef(index));
+ }
+protected:
+ const UnaryOp m_unaryOp;
+ evaluator<ArgType> m_argImpl;
+};
+template<typename Derived, typename PlainObjectType>
+struct mapbase_evaluator;
+template<typename Derived, typename PlainObjectType>
+struct mapbase_evaluator : evaluator_base<Derived>
+{
+ typedef Derived XprType;
+ typedef typename XprType::PointerType PointerType;
+ typedef typename XprType::Scalar Scalar;
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
+ enum {
+ IsRowMajor = XprType::RowsAtCompileTime,
+ ColsAtCompileTime = XprType::ColsAtCompileTime,
+ CoeffReadCost = NumTraits<Scalar>::ReadCost
+ };
+ EIGEN_DEVICE_FUNC explicit mapbase_evaluator(const XprType& map)
+ : m_data(const_cast<PointerType>(map.data())),
+ m_innerStride(map.innerStride()),
+ m_outerStride(map.outerStride())
+ {
+ EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(evaluator<Derived>::Flags&PacketAccessBit, internal::inner_stride_at_compile_time<Derived>::ret==1),
+ PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1);
+ EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index row, Index col) const
+ {
+ return m_data[col * colStride() + row * rowStride()];
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index index) const
+ {
+ return m_data[index * m_innerStride.value()];
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Scalar& coeffRef(Index row, Index col)
+ {
+ return m_data[col * colStride() + row * rowStride()];
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Scalar& coeffRef(Index index)
+ {
+ return m_data[index * m_innerStride.value()];
+ }
+ template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ PacketType packet(Index row, Index col) const
+ {
+ PointerType ptr = m_data + row * rowStride() + col * colStride();
+ return internal::ploadt<PacketType, LoadMode>(ptr);
+ }
+ template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ PacketType packet(Index index) const
+ {
+ return internal::ploadt<PacketType, LoadMode>(m_data + index * m_innerStride.value());
+ }
+ template<int StoreMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ void writePacket(Index row, Index col, const PacketType& x)
+ {
+ PointerType ptr = m_data + row * rowStride() + col * colStride();
+ return internal::pstoret<Scalar, PacketType, StoreMode>(ptr, x);
+ }
+ template<int StoreMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ void writePacket(Index index, const PacketType& x)
+ {
+ internal::pstoret<Scalar, PacketType, StoreMode>(m_data + index * m_innerStride.value(), x);
+ }
+protected:
+ EIGEN_DEVICE_FUNC
+ inline Index rowStride() const { return XprType::IsRowMajor ? m_outerStride.value() : m_innerStride.value(); }
+ EIGEN_DEVICE_FUNC
+ inline Index colStride() const { return XprType::IsRowMajor ? m_innerStride.value() : m_outerStride.value(); }
+ PointerType m_data;
+ const internal::variable_if_dynamic<Index, XprType::InnerStrideAtCompileTime> m_innerStride;
+ const internal::variable_if_dynamic<Index, XprType::OuterStrideAtCompileTime> m_outerStride;
+};
+template<typename PlainObjectType, int MapOptions, typename StrideType>
+struct evaluator<Map<PlainObjectType, MapOptions, StrideType> >
+ : public mapbase_evaluator<Map<PlainObjectType, MapOptions, StrideType>, PlainObjectType>
+{
+ typedef Map<PlainObjectType, MapOptions, StrideType> XprType;
+ typedef typename XprType::Scalar Scalar;
+ typedef typename packet_traits<Scalar>::type PacketScalar;
+ enum {
+ InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0
+ ? int(PlainObjectType::InnerStrideAtCompileTime)
+ : int(StrideType::InnerStrideAtCompileTime),
+ OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
+ ? int(PlainObjectType::OuterStrideAtCompileTime)
+ : int(StrideType::OuterStrideAtCompileTime),
+ HasNoInnerStride = InnerStrideAtCompileTime == 1,
+ HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0,
+ HasNoStride = HasNoInnerStride && HasNoOuterStride,
+ IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic,
+ PacketAccessMask = bool(HasNoInnerStride) ? ~int(0) : ~int(PacketAccessBit),
+ LinearAccessMask = bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime) ? ~int(0) : ~int(LinearAccessBit),
+ Flags = int( evaluator<PlainObjectType>::Flags) & (LinearAccessMask&PacketAccessMask),
+ Alignment = int(MapOptions)&int(AlignedMask)
+ };
+ EIGEN_DEVICE_FUNC explicit evaluator(const XprType& map)
+ : mapbase_evaluator<XprType, PlainObjectType>(map)
+ { }
+};
+template<typename PlainObjectType, int RefOptions, typename StrideType>
+struct evaluator<Ref<PlainObjectType, RefOptions, StrideType> >
+ : public mapbase_evaluator<Ref<PlainObjectType, RefOptions, StrideType>, PlainObjectType>
+{
+ typedef Ref<PlainObjectType, RefOptions, StrideType> XprType;
+ enum {
+ Flags = evaluator<Map<PlainObjectType, RefOptions, StrideType> >::Flags,
+ Alignment = evaluator<Map<PlainObjectType, RefOptions, StrideType> >::Alignment
+ };
+ EIGEN_DEVICE_FUNC explicit evaluator(const XprType& ref)
+ : mapbase_evaluator<XprType, PlainObjectType>(ref)
+ { }
+};
+template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel,
+ bool HasDirectAccess = internal::has_direct_access<ArgType>::ret> struct block_evaluator;
+template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
+struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
+ : block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel>
+{
+ typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
+ typedef typename XprType::Scalar Scalar;
+ typedef typename packet_traits<Scalar>::type PacketScalar;
+ enum {
+ CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
+ RowsAtCompileTime = traits<XprType>::RowsAtCompileTime,
+ ColsAtCompileTime = traits<XprType>::ColsAtCompileTime,
+ MaxRowsAtCompileTime = traits<XprType>::MaxRowsAtCompileTime,
+ MaxColsAtCompileTime = traits<XprType>::MaxColsAtCompileTime,
+ ArgTypeIsRowMajor = (int(evaluator<ArgType>::Flags)&RowMajorBit) != 0,
+ IsRowMajor = (MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1) ? 1
+ : (MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1) ? 0
+ : ArgTypeIsRowMajor,
+ HasSameStorageOrderAsArgType = (IsRowMajor == ArgTypeIsRowMajor),
+ InnerSize = IsRowMajor ? int(ColsAtCompileTime) : int(RowsAtCompileTime),
+ InnerStrideAtCompileTime = HasSameStorageOrderAsArgType
+ ? int(inner_stride_at_compile_time<ArgType>::ret)
+ : int(outer_stride_at_compile_time<ArgType>::ret),
+ OuterStrideAtCompileTime = HasSameStorageOrderAsArgType
+ ? int(outer_stride_at_compile_time<ArgType>::ret)
+ : int(inner_stride_at_compile_time<ArgType>::ret),
+ MaskPacketAccessBit = (InnerStrideAtCompileTime == 1) ? PacketAccessBit : 0,
+ FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator<ArgType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0,
+ FlagsRowMajorBit = XprType::Flags&RowMajorBit,
+ Flags0 = evaluator<ArgType>::Flags & ( (HereditaryBits & ~RowMajorBit) |
+ DirectAccessBit |
+ MaskPacketAccessBit),
+ Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit,
+ PacketAlignment = unpacket_traits<PacketScalar>::alignment,
+ Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) ? int(PacketAlignment) : 0,
+ Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ArgType>::Alignment, Alignment0)
+ };
+ typedef block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel> block_evaluator_type;
+ EIGEN_DEVICE_FUNC explicit evaluator(const XprType& block) : block_evaluator_type(block)
+ {
+ EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+ }
+};
+template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
+struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, false>
+ : unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
+{
+ typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
+ EIGEN_DEVICE_FUNC explicit block_evaluator(const XprType& block)
+ : unary_evaluator<XprType>(block)
+ {}
+};
+template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
+struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBased>
+ : evaluator_base<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
+{
+ typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
+ EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& block)
+ : m_argImpl(block.nestedExpression()),
+ m_startRow(block.startRow()),
+ m_startCol(block.startCol())
+ { }
+ typedef typename XprType::Scalar Scalar;
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
+ enum {
+ RowsAtCompileTime = XprType::RowsAtCompileTime
+ };
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index row, Index col) const
+ {
+ return m_argImpl.coeff(m_startRow.value() + row, m_startCol.value() + col);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index index) const
+ {
+ return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Scalar& coeffRef(Index row, Index col)
+ {
+ return m_argImpl.coeffRef(m_startRow.value() + row, m_startCol.value() + col);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Scalar& coeffRef(Index index)
+ {
+ return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
+ }
+ template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ PacketType packet(Index row, Index col) const
+ {
+ return m_argImpl.template packet<LoadMode,PacketType>(m_startRow.value() + row, m_startCol.value() + col);
+ }
+ template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ PacketType packet(Index index) const
+ {
+ return packet<LoadMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
+ RowsAtCompileTime == 1 ? index : 0);
+ }
+ template<int StoreMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ void writePacket(Index row, Index col, const PacketType& x)
+ {
+ return m_argImpl.template writePacket<StoreMode,PacketType>(m_startRow.value() + row, m_startCol.value() + col, x);
+ }
+ template<int StoreMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ void writePacket(Index index, const PacketType& x)
+ {
+ return writePacket<StoreMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
+ RowsAtCompileTime == 1 ? index : 0,
+ x);
+ }
+protected:
+ evaluator<ArgType> m_argImpl;
+ const variable_if_dynamic<Index, (ArgType::RowsAtCompileTime == 1 && BlockRows==1) ? 0 : Dynamic> m_startRow;
+ const variable_if_dynamic<Index, (ArgType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol;
+};
+template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
+struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, true>
+ : mapbase_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>,
+ typename Block<ArgType, BlockRows, BlockCols, InnerPanel>::PlainObject>
+{
+ typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
+ typedef typename XprType::Scalar Scalar;
+ EIGEN_DEVICE_FUNC explicit block_evaluator(const XprType& block)
+ : mapbase_evaluator<XprType, typename XprType::PlainObject>(block)
+ {
+ eigen_assert(((internal::UIntPtr(block.data()) % EIGEN_PLAIN_ENUM_MAX(1,evaluator<XprType>::Alignment)) == 0) && "data is not aligned");
+ }
+};
+template<typename ConditionMatrixType, typename ThenMatrixType, typename ElseMatrixType>
+struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
+ : evaluator_base<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
+{
+ typedef Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> XprType;
+ enum {
+ CoeffReadCost = evaluator<ConditionMatrixType>::CoeffReadCost
+ + EIGEN_PLAIN_ENUM_MAX(evaluator<ThenMatrixType>::CoeffReadCost,
+ evaluator<ElseMatrixType>::CoeffReadCost),
+ Flags = (unsigned int)evaluator<ThenMatrixType>::Flags & evaluator<ElseMatrixType>::Flags & HereditaryBits,
+ Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ThenMatrixType>::Alignment, evaluator<ElseMatrixType>::Alignment)
+ };
+ EIGEN_DEVICE_FUNC explicit evaluator(const XprType& select)
+ : m_conditionImpl(select.conditionMatrix()),
+ m_thenImpl(select.thenMatrix()),
+ m_elseImpl(select.elseMatrix())
+ {
+ EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+ }
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index row, Index col) const
+ {
+ if (m_conditionImpl.coeff(row, col))
+ return m_thenImpl.coeff(row, col);
+ else
+ return m_elseImpl.coeff(row, col);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index index) const
+ {
+ if (m_conditionImpl.coeff(index))
+ return m_thenImpl.coeff(index);
+ else
+ return m_elseImpl.coeff(index);
+ }
+protected:
+ evaluator<ConditionMatrixType> m_conditionImpl;
+ evaluator<ThenMatrixType> m_thenImpl;
+ evaluator<ElseMatrixType> m_elseImpl;
+};
+template<typename ArgType, int RowFactor, int ColFactor>
+struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
+ : evaluator_base<Replicate<ArgType, RowFactor, ColFactor> >
+{
+ typedef Replicate<ArgType, RowFactor, ColFactor> XprType;
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
+ enum {
+ Factor = (RowFactor==Dynamic || ColFactor==Dynamic) ? Dynamic : RowFactor*ColFactor
+ };
+ typedef typename internal::nested_eval<ArgType,Factor>::type ArgTypeNested;
+ typedef typename internal::remove_all<ArgTypeNested>::type ArgTypeNestedCleaned;
+ enum {
+ CoeffReadCost = evaluator<ArgTypeNestedCleaned>::CoeffReadCost,
+ LinearAccessMask = XprType::IsVectorAtCompileTime ? LinearAccessBit : 0,
+ Flags = (evaluator<ArgTypeNestedCleaned>::Flags & (HereditaryBits|LinearAccessMask) & ~RowMajorBit) | (traits<XprType>::Flags & RowMajorBit),
+ Alignment = evaluator<ArgTypeNestedCleaned>::Alignment
+ };
+ EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& replicate)
+ : m_arg(replicate.nestedExpression()),
+ m_argImpl(m_arg),
+ m_rows(replicate.nestedExpression().rows()),
+ m_cols(replicate.nestedExpression().cols())
+ {}
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index row, Index col) const
+ {
+ const Index actual_row = internal::traits<XprType>::RowsAtCompileTime==1 ? 0
+ : RowFactor==1 ? row
+ : row % m_rows.value();
+ const Index actual_col = internal::traits<XprType>::ColsAtCompileTime==1 ? 0
+ : ColFactor==1 ? col
+ : col % m_cols.value();
+ return m_argImpl.coeff(actual_row, actual_col);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index index) const
+ {
+ const Index actual_index = internal::traits<XprType>::RowsAtCompileTime==1
+ ? (ColFactor==1 ? index : index%m_cols.value())
+ : (RowFactor==1 ? index : index%m_rows.value());
+ return m_argImpl.coeff(actual_index);
+ }
+ template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ PacketType packet(Index row, Index col) const
+ {
+ const Index actual_row = internal::traits<XprType>::RowsAtCompileTime==1 ? 0
+ : RowFactor==1 ? row
+ : row % m_rows.value();
+ const Index actual_col = internal::traits<XprType>::ColsAtCompileTime==1 ? 0
+ : ColFactor==1 ? col
+ : col % m_cols.value();
+ return m_argImpl.template packet<LoadMode,PacketType>(actual_row, actual_col);
+ }
+ template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ PacketType packet(Index index) const
+ {
+ const Index actual_index = internal::traits<XprType>::RowsAtCompileTime==1
+ ? (ColFactor==1 ? index : index%m_cols.value())
+ : (RowFactor==1 ? index : index%m_rows.value());
+ return m_argImpl.template packet<LoadMode,PacketType>(actual_index);
+ }
+protected:
+ const ArgTypeNested m_arg;
+ evaluator<ArgTypeNestedCleaned> m_argImpl;
+ const variable_if_dynamic<Index, ArgType::RowsAtCompileTime> m_rows;
+ const variable_if_dynamic<Index, ArgType::ColsAtCompileTime> m_cols;
+};
+template< typename ArgType, typename MemberOp, int Direction>
+struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> >
+ : evaluator_base<PartialReduxExpr<ArgType, MemberOp, Direction> >
+{
+ typedef PartialReduxExpr<ArgType, MemberOp, Direction> XprType;
+ typedef typename internal::nested_eval<ArgType,1>::type ArgTypeNested;
+ typedef typename internal::remove_all<ArgTypeNested>::type ArgTypeNestedCleaned;
+ typedef typename ArgType::Scalar InputScalar;
+ typedef typename XprType::Scalar Scalar;
+ enum {
+ TraversalSize = Direction==int(Vertical) ? int(ArgType::RowsAtCompileTime) : int(ArgType::ColsAtCompileTime)
+ };
+ typedef typename MemberOp::template Cost<InputScalar,int(TraversalSize)> CostOpType;
+ enum {
+ CoeffReadCost = TraversalSize==Dynamic ? HugeCost
+ : TraversalSize * evaluator<ArgType>::CoeffReadCost + int(CostOpType::value),
+ Flags = (traits<XprType>::Flags&RowMajorBit) | (evaluator<ArgType>::Flags&(HereditaryBits&(~RowMajorBit))) | LinearAccessBit,
+ Alignment = 0
+ };
+ EIGEN_DEVICE_FUNC explicit evaluator(const XprType xpr)
+ : m_arg(xpr.nestedExpression()), m_functor(xpr.functor())
+ {
+ EIGEN_INTERNAL_CHECK_COST_VALUE(TraversalSize==Dynamic ? HugeCost : int(CostOpType::value));
+ EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+ }
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ const Scalar coeff(Index i, Index j) const
+ {
+ if (Direction==Vertical)
+ return m_functor(m_arg.col(j));
+ else
+ return m_functor(m_arg.row(i));
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ const Scalar coeff(Index index) const
+ {
+ if (Direction==Vertical)
+ return m_functor(m_arg.col(index));
+ else
+ return m_functor(m_arg.row(index));
+ }
+protected:
+ typename internal::add_const_on_value_type<ArgTypeNested>::type m_arg;
+ const MemberOp m_functor;
+};
+template<typename XprType>
+struct evaluator_wrapper_base
+ : evaluator_base<XprType>
+{
+ typedef typename remove_all<typename XprType::NestedExpressionType>::type ArgType;
+ enum {
+ CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
+ Flags = evaluator<ArgType>::Flags,
+ Alignment = evaluator<ArgType>::Alignment
+ };
+ EIGEN_DEVICE_FUNC explicit evaluator_wrapper_base(const ArgType& arg) : m_argImpl(arg) {}
+ typedef typename ArgType::Scalar Scalar;
+ typedef typename ArgType::CoeffReturnType CoeffReturnType;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index row, Index col) const
+ {
+ return m_argImpl.coeff(row, col);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index index) const
+ {
+ return m_argImpl.coeff(index);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Scalar& coeffRef(Index row, Index col)
+ {
+ return m_argImpl.coeffRef(row, col);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Scalar& coeffRef(Index index)
+ {
+ return m_argImpl.coeffRef(index);
+ }
+ template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ PacketType packet(Index row, Index col) const
+ {
+ return m_argImpl.template packet<LoadMode,PacketType>(row, col);
+ }
+ template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ PacketType packet(Index index) const
+ {
+ return m_argImpl.template packet<LoadMode,PacketType>(index);
+ }
+ template<int StoreMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ void writePacket(Index row, Index col, const PacketType& x)
+ {
+ m_argImpl.template writePacket<StoreMode>(row, col, x);
+ }
+ template<int StoreMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ void writePacket(Index index, const PacketType& x)
+ {
+ m_argImpl.template writePacket<StoreMode>(index, x);
+ }
+protected:
+ evaluator<ArgType> m_argImpl;
+};
+template<typename TArgType>
+struct unary_evaluator<ArrayWrapper<TArgType> >
+ : evaluator_wrapper_base<ArrayWrapper<TArgType> >
+{
+ typedef ArrayWrapper<TArgType> XprType;
+ EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& wrapper)
+ : evaluator_wrapper_base<ArrayWrapper<TArgType> >(wrapper.nestedExpression())
+ { }
+};
+template<typename PacketType, bool ReversePacket> struct reverse_packet_cond;
+template<typename ArgType, int Direction>
+struct unary_evaluator<Reverse<ArgType, Direction> >
+ : evaluator_base<Reverse<ArgType, Direction> >
+{
+ typedef Reverse<ArgType, Direction> XprType;
+ typedef typename XprType::Scalar Scalar;
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
+ enum {
+ IsRowMajor = XprType::IsRowMajor,
+ IsColMajor = !IsRowMajor,
+ ReverseRow = (Direction == Vertical) || (Direction == BothDirections),
+ ReverseCol = (Direction == Horizontal) || (Direction == BothDirections),
+ ReversePacket = (Direction == BothDirections)
+ || ((Direction == Vertical) && IsColMajor)
+ || ((Direction == Horizontal) && IsRowMajor),
+ CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
+ Flags0 = evaluator<ArgType>::Flags,
+ LinearAccess = ( (Direction==BothDirections) && (int(Flags0)&PacketAccessBit) )
+ || ((ReverseRow && XprType::ColsAtCompileTime==1) || (ReverseCol && XprType::RowsAtCompileTime==1))
+ ? LinearAccessBit : 0,
+ Flags = int(Flags0) & (HereditaryBits | PacketAccessBit | LinearAccess),
+ Alignment = 0
+ };
+ EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& reverse)
+ : m_argImpl(reverse.nestedExpression()),
+ m_rows(ReverseRow ? reverse.nestedExpression().rows() : 1),
+ m_cols(ReverseCol ? reverse.nestedExpression().cols() : 1)
+ { }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index row, Index col) const
+ {
+ return m_argImpl.coeff(ReverseRow ? m_rows.value() - row - 1 : row,
+ ReverseCol ? m_cols.value() - col - 1 : col);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index index) const
+ {
+ return m_argImpl.coeff(m_rows.value() * m_cols.value() - index - 1);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Scalar& coeffRef(Index row, Index col)
+ {
+ return m_argImpl.coeffRef(ReverseRow ? m_rows.value() - row - 1 : row,
+ ReverseCol ? m_cols.value() - col - 1 : col);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Scalar& coeffRef(Index index)
+ {
+ return m_argImpl.coeffRef(m_rows.value() * m_cols.value() - index - 1);
+ }
+ template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ PacketType packet(Index row, Index col) const
+ {
+ enum {
+ PacketSize = unpacket_traits<PacketType>::size,
+ OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1,
+ OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1
+ };
+ typedef internal::reverse_packet_cond<PacketType,ReversePacket> reverse_packet;
+ return reverse_packet::run(m_argImpl.template packet<LoadMode,PacketType>(
+ ReverseRow ? m_rows.value() - row - OffsetRow : row,
+ ReverseCol ? m_cols.value() - col - OffsetCol : col));
+ }
+ template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ PacketType packet(Index index) const
+ {
+ enum { PacketSize = unpacket_traits<PacketType>::size };
+ return preverse(m_argImpl.template packet<LoadMode,PacketType>(m_rows.value() * m_cols.value() - index - PacketSize));
+ }
+ template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ void writePacket(Index row, Index col, const PacketType& x)
+ {
+ enum {
+ PacketSize = unpacket_traits<PacketType>::size,
+ OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1,
+ OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1
+ };
+ typedef internal::reverse_packet_cond<PacketType,ReversePacket> reverse_packet;
+ m_argImpl.template writePacket<LoadMode>(
+ ReverseRow ? m_rows.value() - row - OffsetRow : row,
+ ReverseCol ? m_cols.value() - col - OffsetCol : col,
+ reverse_packet::run(x));
+ }
+ template<int LoadMode, typename PacketType>
+ EIGEN_STRONG_INLINE
+ void writePacket(Index index, const PacketType& x)
+ {
+ enum { PacketSize = unpacket_traits<PacketType>::size };
+ m_argImpl.template writePacket<LoadMode>
+ (m_rows.value() * m_cols.value() - index - PacketSize, preverse(x));
+ }
+protected:
+ evaluator<ArgType> m_argImpl;
+ const variable_if_dynamic<Index, ReverseRow ? ArgType::RowsAtCompileTime : 1> m_rows;
+ const variable_if_dynamic<Index, ReverseCol ? ArgType::ColsAtCompileTime : 1> m_cols;
+};
+template<typename ArgType, int DiagIndex>
+struct evaluator<Diagonal<ArgType, DiagIndex> >
+ : evaluator_base<Diagonal<ArgType, DiagIndex> >
+{
+ typedef Diagonal<ArgType, DiagIndex> XprType;
+ enum {
+ CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
+ Flags = (unsigned int)(evaluator<ArgType>::Flags & (HereditaryBits | DirectAccessBit) & ~RowMajorBit) | LinearAccessBit,
+ Alignment = 0
+ };
+ EIGEN_DEVICE_FUNC explicit evaluator(const XprType& diagonal)
+ : m_argImpl(diagonal.nestedExpression()),
+ m_index(diagonal.index())
+ { }
+ typedef typename XprType::Scalar Scalar;
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index row, Index) const
+ {
+ return m_argImpl.coeff(row + rowOffset(), row + colOffset());
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ CoeffReturnType coeff(Index index) const
+ {
+ return m_argImpl.coeff(index + rowOffset(), index + colOffset());
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Scalar& coeffRef(Index row, Index)
+ {
+ return m_argImpl.coeffRef(row + rowOffset(), row + colOffset());
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Scalar& coeffRef(Index index)
+ {
+ return m_argImpl.coeffRef(index + rowOffset(), index + colOffset());
+ }
+protected:
+ evaluator<ArgType> m_argImpl;
+ const internal::variable_if_dynamicindex<Index, XprType::DiagIndex> m_index;
+private:
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rowOffset() const { return m_index.value() > 0 ? 0 : -m_index.value(); }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value() > 0 ? m_index.value() : 0; }
+};
+template<typename ArgType> class EvalToTemp;
+template<typename ArgType>
+struct traits<EvalToTemp<ArgType> >
+ : public traits<ArgType>
+{ };
+template<typename ArgType>
+class EvalToTemp
+ : public dense_xpr_base<EvalToTemp<ArgType> >::type
+{
+ public:
+ typedef typename dense_xpr_base<EvalToTemp>::type Base;
+ EIGEN_GENERIC_PUBLIC_INTERFACE(EvalToTemp)
+ explicit EvalToTemp(const ArgType& arg)
+ : m_arg(arg)
+ { }
+ const ArgType& arg() const
+ {
+ return m_arg;
+ }
+ Index rows() const
+ {
+ return m_arg.rows();
+ }
+ Index cols() const
+ {
+ return m_arg.cols();
+ }
+ private:
+ const ArgType& m_arg;
+};
+template<typename ArgType>
+struct evaluator<EvalToTemp<ArgType> >
+ : public evaluator<typename ArgType::PlainObject>
+{
+ typedef EvalToTemp<ArgType> XprType;
+ typedef typename ArgType::PlainObject PlainObject;
+ typedef evaluator<PlainObject> Base;
+ EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
+ : m_result(xpr.arg())
+ {
+ ::new (static_cast<Base*>(this)) Base(m_result);
+ }
+ EIGEN_DEVICE_FUNC evaluator(const ArgType& arg)
+ : m_result(arg)
+ {
+ ::new (static_cast<Base*>(this)) Base(m_result);
+ }
+protected:
+ PlainObject m_result;
+};
+}
+}
+#endif
+// end #include "src/Core/CoreEvaluators.h"
+// #include "src/Core/AssignEvaluator.h"
+#ifndef EIGEN_ASSIGN_EVALUATOR_H
+#define EIGEN_ASSIGN_EVALUATOR_H
+namespace Eigen {
+namespace internal {
+template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc>
+struct copy_using_evaluator_traits
+{
+ typedef typename DstEvaluator::XprType Dst;
+ typedef typename Dst::Scalar DstScalar;
+ enum {
+ DstFlags = DstEvaluator::Flags,
+ SrcFlags = SrcEvaluator::Flags
+ };
+public:
+ enum {
+ DstAlignment = DstEvaluator::Alignment,
+ SrcAlignment = SrcEvaluator::Alignment,
+ DstHasDirectAccess = DstFlags & DirectAccessBit,
+ JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment)
+ };
+private:
+ enum {
+ InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
+ : int(DstFlags)&RowMajorBit ? int(Dst::ColsAtCompileTime)
+ : int(Dst::RowsAtCompileTime),
+ InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
+ : int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
+ : int(Dst::MaxRowsAtCompileTime),
+ OuterStride = int(outer_stride_at_compile_time<Dst>::ret),
+ MaxSizeAtCompileTime = Dst::SizeAtCompileTime
+ };
+ typedef typename find_best_packet<DstScalar,Dst::SizeAtCompileTime>::type LinearPacketType;
+ typedef typename find_best_packet<DstScalar,InnerSize>::type InnerPacketType;
+ enum {
+ LinearPacketSize = unpacket_traits<LinearPacketType>::size,
+ InnerPacketSize = unpacket_traits<InnerPacketType>::size
+ };
+public:
+ enum {
+ LinearRequiredAlignment = unpacket_traits<LinearPacketType>::alignment,
+ InnerRequiredAlignment = unpacket_traits<InnerPacketType>::alignment
+ };
+private:
+ enum {
+ DstIsRowMajor = DstFlags&RowMajorBit,
+ SrcIsRowMajor = SrcFlags&RowMajorBit,
+ StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)),
+ MightVectorize = bool(StorageOrdersAgree)
+ && (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit)
+ && bool(functor_traits<AssignFunc>::PacketAccess),
+ MayInnerVectorize = MightVectorize
+ && int(InnerSize)!=Dynamic && int(InnerSize)%int(InnerPacketSize)==0
+ && int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0
+ && (EIGEN_UNALIGNED_VECTORIZE || int(JointAlignment)>=int(InnerRequiredAlignment)),
+ MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit),
+ MayLinearVectorize = bool(MightVectorize) && MayLinearize && DstHasDirectAccess
+ && (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic),
+ MaySliceVectorize = bool(MightVectorize) && bool(DstHasDirectAccess)
+ && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=(EIGEN_UNALIGNED_VECTORIZE?InnerPacketSize:(3*InnerPacketSize)))
+ };
+public:
+ enum {
+ Traversal = int(MayLinearVectorize) && (LinearPacketSize>InnerPacketSize) ? int(LinearVectorizedTraversal)
+ : int(MayInnerVectorize) ? int(InnerVectorizedTraversal)
+ : int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
+ : int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
+ : int(MayLinearize) ? int(LinearTraversal)
+ : int(DefaultTraversal),
+ Vectorized = int(Traversal) == InnerVectorizedTraversal
+ || int(Traversal) == LinearVectorizedTraversal
+ || int(Traversal) == SliceVectorizedTraversal
+ };
+ typedef typename conditional<int(Traversal)==LinearVectorizedTraversal, LinearPacketType, InnerPacketType>::type PacketType;
+private:
+ enum {
+ ActualPacketSize = int(Traversal)==LinearVectorizedTraversal ? LinearPacketSize
+ : Vectorized ? InnerPacketSize
+ : 1,
+ UnrollingLimit = EIGEN_UNROLLING_LIMIT * ActualPacketSize,
+ MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic
+ && int(Dst::SizeAtCompileTime) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit),
+ MayUnrollInner = int(InnerSize) != Dynamic
+ && int(InnerSize) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit)
+ };
+public:
+ enum {
+ Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal))
+ ? (
+ int(MayUnrollCompletely) ? int(CompleteUnrolling)
+ : int(MayUnrollInner) ? int(InnerUnrolling)
+ : int(NoUnrolling)
+ )
+ : int(Traversal) == int(LinearVectorizedTraversal)
+ ? ( bool(MayUnrollCompletely) && ( EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)))
+ ? int(CompleteUnrolling)
+ : int(NoUnrolling) )
+ : int(Traversal) == int(LinearTraversal)
+ ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling)
+ : int(NoUnrolling) )
+#if EIGEN_UNALIGNED_VECTORIZE
+ : int(Traversal) == int(SliceVectorizedTraversal)
+ ? ( bool(MayUnrollInner) ? int(InnerUnrolling)
+ : int(NoUnrolling) )
+#endif
+ : int(NoUnrolling)
+ };
+#ifdef EIGEN_DEBUG_ASSIGN
+ static void debug()
+ {
+ std::cerr << "DstXpr: " << typeid(typename DstEvaluator::XprType).name() << std::endl;
+ std::cerr << "SrcXpr: " << typeid(typename SrcEvaluator::XprType).name() << std::endl;
+ std::cerr.setf(std::ios::hex, std::ios::basefield);
+ std::cerr << "DstFlags" << " = " << DstFlags << " (" << demangle_flags(DstFlags) << " )" << std::endl;
+ std::cerr << "SrcFlags" << " = " << SrcFlags << " (" << demangle_flags(SrcFlags) << " )" << std::endl;
+ std::cerr.unsetf(std::ios::hex);
+ EIGEN_DEBUG_VAR(DstAlignment)
+ EIGEN_DEBUG_VAR(SrcAlignment)
+ EIGEN_DEBUG_VAR(LinearRequiredAlignment)
+ EIGEN_DEBUG_VAR(InnerRequiredAlignment)
+ EIGEN_DEBUG_VAR(JointAlignment)
+ EIGEN_DEBUG_VAR(InnerSize)
+ EIGEN_DEBUG_VAR(InnerMaxSize)
+ EIGEN_DEBUG_VAR(LinearPacketSize)
+ EIGEN_DEBUG_VAR(InnerPacketSize)
+ EIGEN_DEBUG_VAR(ActualPacketSize)
+ EIGEN_DEBUG_VAR(StorageOrdersAgree)
+ EIGEN_DEBUG_VAR(MightVectorize)
+ EIGEN_DEBUG_VAR(MayLinearize)
+ EIGEN_DEBUG_VAR(MayInnerVectorize)
+ EIGEN_DEBUG_VAR(MayLinearVectorize)
+ EIGEN_DEBUG_VAR(MaySliceVectorize)
+ std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl;
+ EIGEN_DEBUG_VAR(SrcEvaluator::CoeffReadCost)
+ EIGEN_DEBUG_VAR(UnrollingLimit)
+ EIGEN_DEBUG_VAR(MayUnrollCompletely)
+ EIGEN_DEBUG_VAR(MayUnrollInner)
+ std::cerr << "Unrolling" << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl;
+ std::cerr << std::endl;
+ }
+#endif
+};
+template<typename Kernel, int Index, int Stop>
+struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
+{
+ typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
+ typedef typename DstEvaluatorType::XprType DstXprType;
+ enum {
+ outer = Index / DstXprType::InnerSizeAtCompileTime,
+ inner = Index % DstXprType::InnerSizeAtCompileTime
+ };
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+ {
+ kernel.assignCoeffByOuterInner(outer, inner);
+ copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
+ }
+};
+template<typename Kernel, int Stop>
+struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Stop, Stop>
+{
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
+};
+template<typename Kernel, int Index_, int Stop>
+struct copy_using_evaluator_DefaultTraversal_InnerUnrolling
+{
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
+ {
+ kernel.assignCoeffByOuterInner(outer, Index_);
+ copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Index_+1, Stop>::run(kernel, outer);
+ }
+};
+template<typename Kernel, int Stop>
+struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Stop, Stop>
+{
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index) { }
+};
+template<typename Kernel, int Index, int Stop>
+struct copy_using_evaluator_LinearTraversal_CompleteUnrolling
+{
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel)
+ {
+ kernel.assignCoeff(Index);
+ copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
+ }
+};
+template<typename Kernel, int Stop>
+struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Stop, Stop>
+{
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
+};
+template<typename Kernel, int Index, int Stop>
+struct copy_using_evaluator_innervec_CompleteUnrolling
+{
+ typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
+ typedef typename DstEvaluatorType::XprType DstXprType;
+ typedef typename Kernel::PacketType PacketType;
+ enum {
+ outer = Index / DstXprType::InnerSizeAtCompileTime,
+ inner = Index % DstXprType::InnerSizeAtCompileTime,
+ SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
+ DstAlignment = Kernel::AssignmentTraits::DstAlignment
+ };
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+ {
+ kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
+ enum { NextIndex = Index + unpacket_traits<PacketType>::size };
+ copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel);
+ }
+};
+template<typename Kernel, int Stop>
+struct copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop>
+{
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
+};
+template<typename Kernel, int Index_, int Stop, int SrcAlignment, int DstAlignment>
+struct copy_using_evaluator_innervec_InnerUnrolling
+{
+ typedef typename Kernel::PacketType PacketType;
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
+ {
+ kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, Index_);
+ enum { NextIndex = Index_ + unpacket_traits<PacketType>::size };
+ copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop, SrcAlignment, DstAlignment>::run(kernel, outer);
+ }
+};
+template<typename Kernel, int Stop, int SrcAlignment, int DstAlignment>
+struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop, SrcAlignment, DstAlignment>
+{
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &, Index) { }
+};
+template<typename Kernel,
+ int Traversal = Kernel::AssignmentTraits::Traversal,
+ int Unrolling = Kernel::AssignmentTraits::Unrolling>
+struct dense_assignment_loop;
+template<typename Kernel>
+struct dense_assignment_loop<Kernel, DefaultTraversal, NoUnrolling>
+{
+ EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE run(Kernel &kernel)
+ {
+ for(Index outer = 0; outer < kernel.outerSize(); ++outer) {
+ for(Index inner = 0; inner < kernel.innerSize(); ++inner) {
+ kernel.assignCoeffByOuterInner(outer, inner);
+ }
+ }
+ }
+};
+template<typename Kernel>
+struct dense_assignment_loop<Kernel, DefaultTraversal, CompleteUnrolling>
+{
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+ {
+ typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
+ copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
+ }
+};
+template<typename Kernel>
+struct dense_assignment_loop<Kernel, DefaultTraversal, InnerUnrolling>
+{
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+ {
+ typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
+ const Index outerSize = kernel.outerSize();
+ for(Index outer = 0; outer < outerSize; ++outer)
+ copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime>::run(kernel, outer);
+ }
+};
+template <bool IsAligned = false>
+struct unaligned_dense_assignment_loop
+{
+ template <typename Kernel>
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index, Index) {}
+};
+template <>
+struct unaligned_dense_assignment_loop<false>
+{
+#if EIGEN_COMP_MSVC
+ template <typename Kernel>
+ static EIGEN_DONT_INLINE void run(Kernel &kernel,
+ Index start,
+ Index end)
+#else
+ template <typename Kernel>
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel,
+ Index start,
+ Index end)
+#endif
+ {
+ for (Index index = start; index < end; ++index)
+ kernel.assignCoeff(index);
+ }
+};
+template<typename Kernel>
+struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling>
+{
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+ {
+ const Index size = kernel.size();
+ typedef typename Kernel::Scalar Scalar;
+ typedef typename Kernel::PacketType PacketType;
+ enum {
+ requestedAlignment = Kernel::AssignmentTraits::LinearRequiredAlignment,
+ packetSize = unpacket_traits<PacketType>::size,
+ dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
+ dstAlignment = packet_traits<Scalar>::AlignedOnScalar ? int(requestedAlignment)
+ : int(Kernel::AssignmentTraits::DstAlignment),
+ srcAlignment = Kernel::AssignmentTraits::JointAlignment
+ };
+ const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned<requestedAlignment>(kernel.dstDataPtr(), size);
+ const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
+ unaligned_dense_assignment_loop<dstIsAligned!=0>::run(kernel, 0, alignedStart);
+ for(Index index = alignedStart; index < alignedEnd; index += packetSize)
+ kernel.template assignPacket<dstAlignment, srcAlignment, PacketType>(index);
+ unaligned_dense_assignment_loop<>::run(kernel, alignedEnd, size);
+ }
+};
+template<typename Kernel>
+struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrolling>
+{
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+ {
+ typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
+ typedef typename Kernel::PacketType PacketType;
+ enum { size = DstXprType::SizeAtCompileTime,
+ packetSize =unpacket_traits<PacketType>::size,
+ alignedSize = (size/packetSize)*packetSize };
+ copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, alignedSize>::run(kernel);
+ copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, alignedSize, size>::run(kernel);
+ }
+};
+template<typename Kernel>
+struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling>
+{
+ typedef typename Kernel::PacketType PacketType;
+ enum {
+ SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
+ DstAlignment = Kernel::AssignmentTraits::DstAlignment
+ };
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+ {
+ const Index innerSize = kernel.innerSize();
+ const Index outerSize = kernel.outerSize();
+ const Index packetSize = unpacket_traits<PacketType>::size;
+ for(Index outer = 0; outer < outerSize; ++outer)
+ for(Index inner = 0; inner < innerSize; inner+=packetSize)
+ kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
+ }
+};
+template<typename Kernel>
+struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, CompleteUnrolling>
+{
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+ {
+ typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
+ copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
+ }
+};
+template<typename Kernel>
+struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling>
+{
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+ {
+ typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
+ typedef typename Kernel::AssignmentTraits Traits;
+ const Index outerSize = kernel.outerSize();
+ for(Index outer = 0; outer < outerSize; ++outer)
+ copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime,
+ Traits::SrcAlignment, Traits::DstAlignment>::run(kernel, outer);
+ }
+};
+template<typename Kernel>
+struct dense_assignment_loop<Kernel, LinearTraversal, NoUnrolling>
+{
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+ {
+ const Index size = kernel.size();
+ for(Index i = 0; i < size; ++i)
+ kernel.assignCoeff(i);
+ }
+};
+template<typename Kernel>
+struct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling>
+{
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+ {
+ typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
+ copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
+ }
+};
+template<typename Kernel>
+struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
+{
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+ {
+ typedef typename Kernel::Scalar Scalar;
+ typedef typename Kernel::PacketType PacketType;
+ enum {
+ packetSize = unpacket_traits<PacketType>::size,
+ requestedAlignment = int(Kernel::AssignmentTraits::InnerRequiredAlignment),
+ alignable = packet_traits<Scalar>::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment)>=sizeof(Scalar),
+ dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
+ dstAlignment = alignable ? int(requestedAlignment)
+ : int(Kernel::AssignmentTraits::DstAlignment)
+ };
+ const Scalar *dst_ptr = kernel.dstDataPtr();
+ if((!bool(dstIsAligned)) && (UIntPtr(dst_ptr) % sizeof(Scalar))>0)
+ {
+ return dense_assignment_loop<Kernel,DefaultTraversal,NoUnrolling>::run(kernel);
+ }
+ const Index packetAlignedMask = packetSize - 1;
+ const Index innerSize = kernel.innerSize();
+ const Index outerSize = kernel.outerSize();
+ const Index alignedStep = alignable ? (packetSize - kernel.outerStride() % packetSize) & packetAlignedMask : 0;
+ Index alignedStart = ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned<requestedAlignment>(dst_ptr, innerSize);
+ for(Index outer = 0; outer < outerSize; ++outer)
+ {
+ const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask);
+ for(Index inner = 0; inner<alignedStart ; ++inner)
+ kernel.assignCoeffByOuterInner(outer, inner);
+ for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize)
+ kernel.template assignPacketByOuterInner<dstAlignment, Unaligned, PacketType>(outer, inner);
+ for(Index inner = alignedEnd; inner<innerSize ; ++inner)
+ kernel.assignCoeffByOuterInner(outer, inner);
+ alignedStart = numext::mini((alignedStart+alignedStep)%packetSize, innerSize);
+ }
+ }
+};
+#if EIGEN_UNALIGNED_VECTORIZE
+template<typename Kernel>
+struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, InnerUnrolling>
+{
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+ {
+ typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
+ typedef typename Kernel::PacketType PacketType;
+ enum { size = DstXprType::InnerSizeAtCompileTime,
+ packetSize =unpacket_traits<PacketType>::size,
+ vectorizableSize = (size/packetSize)*packetSize };
+ for(Index outer = 0; outer < kernel.outerSize(); ++outer)
+ {
+ copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, vectorizableSize, 0, 0>::run(kernel, outer);
+ copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, vectorizableSize, size>::run(kernel, outer);
+ }
+ }
+};
+#endif
+template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version = Specialized>
+class generic_dense_assignment_kernel
+{
+protected:
+ typedef typename DstEvaluatorTypeT::XprType DstXprType;
+ typedef typename SrcEvaluatorTypeT::XprType SrcXprType;
+public:
+ typedef DstEvaluatorTypeT DstEvaluatorType;
+ typedef SrcEvaluatorTypeT SrcEvaluatorType;
+ typedef typename DstEvaluatorType::Scalar Scalar;
+ typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor> AssignmentTraits;
+ typedef typename AssignmentTraits::PacketType PacketType;
+ EIGEN_DEVICE_FUNC generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
+ : m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr)
+ {
+ #ifdef EIGEN_DEBUG_ASSIGN
+ AssignmentTraits::debug();
+ #endif
+ }
+ EIGEN_DEVICE_FUNC Index size() const { return m_dstExpr.size(); }
+ EIGEN_DEVICE_FUNC Index innerSize() const { return m_dstExpr.innerSize(); }
+ EIGEN_DEVICE_FUNC Index outerSize() const { return m_dstExpr.outerSize(); }
+ EIGEN_DEVICE_FUNC Index rows() const { return m_dstExpr.rows(); }
+ EIGEN_DEVICE_FUNC Index cols() const { return m_dstExpr.cols(); }
+ EIGEN_DEVICE_FUNC Index outerStride() const { return m_dstExpr.outerStride(); }
+ EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() { return m_dst; }
+ EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const { return m_src; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index row, Index col)
+ {
+ m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col));
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index index)
+ {
+ m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index));
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeffByOuterInner(Index outer, Index inner)
+ {
+ Index row = rowIndexByOuterInner(outer, inner);
+ Index col = colIndexByOuterInner(outer, inner);
+ assignCoeff(row, col);
+ }
+ template<int StoreMode, int LoadMode, typename PacketType>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index row, Index col)
+ {
+ m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row,col), m_src.template packet<LoadMode,PacketType>(row,col));
+ }
+ template<int StoreMode, int LoadMode, typename PacketType>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index index)
+ {
+ m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(index), m_src.template packet<LoadMode,PacketType>(index));
+ }
+ template<int StoreMode, int LoadMode, typename PacketType>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketByOuterInner(Index outer, Index inner)
+ {
+ Index row = rowIndexByOuterInner(outer, inner);
+ Index col = colIndexByOuterInner(outer, inner);
+ assignPacket<StoreMode,LoadMode,PacketType>(row, col);
+ }
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner)
+ {
+ typedef typename DstEvaluatorType::ExpressionTraits Traits;
+ return int(Traits::RowsAtCompileTime) == 1 ? 0
+ : int(Traits::ColsAtCompileTime) == 1 ? inner
+ : int(DstEvaluatorType::Flags)&RowMajorBit ? outer
+ : inner;
+ }
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner)
+ {
+ typedef typename DstEvaluatorType::ExpressionTraits Traits;
+ return int(Traits::ColsAtCompileTime) == 1 ? 0
+ : int(Traits::RowsAtCompileTime) == 1 ? inner
+ : int(DstEvaluatorType::Flags)&RowMajorBit ? inner
+ : outer;
+ }
+ EIGEN_DEVICE_FUNC const Scalar* dstDataPtr() const
+ {
+ return m_dstExpr.data();
+ }
+protected:
+ DstEvaluatorType& m_dst;
+ const SrcEvaluatorType& m_src;
+ const Functor &m_functor;
+ DstXprType& m_dstExpr;
+};
+template<typename DstXprType,typename SrcXprType, typename Functor>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const Functor &)
+{
+ EIGEN_ONLY_USED_FOR_DEBUG(dst);
+ EIGEN_ONLY_USED_FOR_DEBUG(src);
+ eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
+}
+template<typename DstXprType,typename SrcXprType, typename T1, typename T2>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const internal::assign_op<T1,T2> &)
+{
+ Index dstRows = src.rows();
+ Index dstCols = src.cols();
+ if(((dst.rows()!=dstRows) || (dst.cols()!=dstCols)))
+ dst.resize(dstRows, dstCols);
+ eigen_assert(dst.rows() == dstRows && dst.cols() == dstCols);
+}
+template<typename DstXprType, typename SrcXprType, typename Functor>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src, const Functor &func)
+{
+ typedef evaluator<DstXprType> DstEvaluatorType;
+ typedef evaluator<SrcXprType> SrcEvaluatorType;
+ SrcEvaluatorType srcEvaluator(src);
+ resize_if_allowed(dst, src, func);
+ DstEvaluatorType dstEvaluator(dst);
+ typedef generic_dense_assignment_kernel<DstEvaluatorType,SrcEvaluatorType,Functor> Kernel;
+ Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
+ dense_assignment_loop<Kernel>::run(kernel);
+}
+template<typename DstXprType, typename SrcXprType>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src)
+{
+ call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar>());
+}
+template<typename DstShape, typename SrcShape> struct AssignmentKind;
+struct Dense2Dense {};
+struct EigenBase2EigenBase {};
+template<typename,typename> struct AssignmentKind { typedef EigenBase2EigenBase Kind; };
+template<> struct AssignmentKind<DenseShape,DenseShape> { typedef Dense2Dense Kind; };
+template< typename DstXprType, typename SrcXprType, typename Functor,
+ typename Kind = typename AssignmentKind< typename evaluator_traits<DstXprType>::Shape , typename evaluator_traits<SrcXprType>::Shape >::Kind,
+ typename EnableIf = void>
+struct Assignment;
+template<typename Dst, typename Src>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+void call_assignment(Dst& dst, const Src& src)
+{
+ call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
+}
+template<typename Dst, typename Src>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+void call_assignment(const Dst& dst, const Src& src)
+{
+ call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
+}
+template<typename Dst, typename Src, typename Func>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if< evaluator_assume_aliasing<Src>::value, void*>::type = 0)
+{
+ typename plain_matrix_type<Src>::type tmp(src);
+ call_assignment_no_alias(dst, tmp, func);
+}
+template<typename Dst, typename Src, typename Func>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<!evaluator_assume_aliasing<Src>::value, void*>::type = 0)
+{
+ call_assignment_no_alias(dst, src, func);
+}
+template<typename Dst, template <typename> class StorageBase, typename Src, typename Func>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func)
+{
+ call_assignment_no_alias(dst.expression(), src, func);
+}
+template<typename Dst, typename Src, typename Func>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
+{
+ enum {
+ NeedToTranspose = ( (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1)
+ || (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1)
+ ) && int(Dst::SizeAtCompileTime) != 1
+ };
+ typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst>::type ActualDstTypeCleaned;
+ typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst&>::type ActualDstType;
+ ActualDstType actualDst(dst);
+ EIGEN_STATIC_ASSERT_LVALUE(Dst)
+ EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned,Src)
+ EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename ActualDstTypeCleaned::Scalar,typename Src::Scalar);
+ Assignment<ActualDstTypeCleaned,Src,Func>::run(actualDst, src, func);
+}
+template<typename Dst, typename Src>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+void call_assignment_no_alias(Dst& dst, const Src& src)
+{
+ call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
+}
+template<typename Dst, typename Src, typename Func>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func)
+{
+ EIGEN_STATIC_ASSERT_LVALUE(Dst)
+ EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst,Src)
+ EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename Dst::Scalar,typename Src::Scalar);
+ Assignment<Dst,Src,Func>::run(dst, src, func);
+}
+template<typename Dst, typename Src>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src)
+{
+ call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
+}
+template<typename Dst, typename Src> void check_for_aliasing(const Dst &dst, const Src &src);
+template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
+struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Weak>
+{
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
+ {
+#ifndef EIGEN_NO_DEBUG
+ internal::check_for_aliasing(dst, src);
+#endif
+ call_dense_assignment_loop(dst, src, func);
+ }
+};
+template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
+struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Weak>
+{
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &)
+ {
+ Index dstRows = src.rows();
+ Index dstCols = src.cols();
+ if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+ dst.resize(dstRows, dstCols);
+ eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
+ src.evalTo(dst);
+ }
+ template<typename SrcScalarType>
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar,SrcScalarType> &)
+ {
+ Index dstRows = src.rows();
+ Index dstCols = src.cols();
+ if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+ dst.resize(dstRows, dstCols);
+ eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
+ src.addTo(dst);
+ }
+ template<typename SrcScalarType>
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar,SrcScalarType> &)
+ {
+ Index dstRows = src.rows();
+ Index dstCols = src.cols();
+ if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+ dst.resize(dstRows, dstCols);
+ eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
+ src.subTo(dst);
+ }
+};
+}
+}
+#endif
+// end #include "src/Core/AssignEvaluator.h"
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+// #include "src/Core/Assign.h"
+#ifndef EIGEN_ASSIGN_H
+#define EIGEN_ASSIGN_H
+namespace Eigen {
+template<typename Derived>
+template<typename OtherDerived>
+EIGEN_STRONG_INLINE Derived& DenseBase<Derived>
+ ::lazyAssign(const DenseBase<OtherDerived>& other)
+{
+ enum{
+ SameType = internal::is_same<typename Derived::Scalar,typename OtherDerived::Scalar>::value
+ };
+ EIGEN_STATIC_ASSERT_LVALUE(Derived)
+ EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived,OtherDerived)
+ EIGEN_STATIC_ASSERT(SameType,YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
+ eigen_assert(rows() == other.rows() && cols() == other.cols());
+ internal::call_assignment_no_alias(derived(),other.derived());
+ return derived();
+}
+template<typename Derived>
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator=(const DenseBase<OtherDerived>& other)
+{
+ internal::call_assignment(derived(), other.derived());
+ return derived();
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator=(const DenseBase& other)
+{
+ internal::call_assignment(derived(), other.derived());
+ return derived();
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const MatrixBase& other)
+{
+ internal::call_assignment(derived(), other.derived());
+ return derived();
+}
+template<typename Derived>
+template <typename OtherDerived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const DenseBase<OtherDerived>& other)
+{
+ internal::call_assignment(derived(), other.derived());
+ return derived();
+}
+template<typename Derived>
+template <typename OtherDerived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const EigenBase<OtherDerived>& other)
+{
+ internal::call_assignment(derived(), other.derived());
+ return derived();
+}
+template<typename Derived>
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const ReturnByValue<OtherDerived>& other)
+{
+ other.derived().evalTo(derived());
+ return derived();
+}
+}
+#endif
+// end #include "src/Core/Assign.h"
+#endif
+// #include "src/Core/ArrayBase.h"
+#ifndef EIGEN_ARRAYBASE_H
+#define EIGEN_ARRAYBASE_H
+namespace Eigen {
+template<typename Derived> class ArrayBase
+ : public DenseBase<Derived>
+{
+ public:
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+ typedef ArrayBase StorageBaseType;
+ typedef ArrayBase Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl;
+ typedef typename internal::traits<Derived>::StorageKind StorageKind;
+ typedef typename internal::traits<Derived>::Scalar Scalar;
+ typedef typename internal::packet_traits<Scalar>::type PacketScalar;
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ typedef DenseBase<Derived> Base;
+ using Base::RowsAtCompileTime;
+ using Base::ColsAtCompileTime;
+ using Base::SizeAtCompileTime;
+ using Base::MaxRowsAtCompileTime;
+ using Base::MaxColsAtCompileTime;
+ using Base::MaxSizeAtCompileTime;
+ using Base::IsVectorAtCompileTime;
+ using Base::Flags;
+ using Base::derived;
+ using Base::const_cast_derived;
+ using Base::rows;
+ using Base::cols;
+ using Base::size;
+ using Base::coeff;
+ using Base::coeffRef;
+ using Base::lazyAssign;
+ using Base::operator=;
+ using Base::operator+=;
+ using Base::operator-=;
+ using Base::operator*=;
+ using Base::operator/=;
+ typedef typename Base::CoeffReturnType CoeffReturnType;
+#endif
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+ typedef typename Base::PlainObject PlainObject;
+ typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,PlainObject> ConstantReturnType;
+#endif
+#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::ArrayBase
+#define EIGEN_DOC_UNARY_ADDONS(X,Y)
+// # include "../plugins/CommonCwiseUnaryOps.h"
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
+ const CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, const Derived>,
+ const Derived&
+ >::type ConjugateReturnType;
+typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
+ const CwiseUnaryOp<internal::scalar_real_op<Scalar>, const Derived>,
+ const Derived&
+ >::type RealReturnType;
+typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
+ CwiseUnaryView<internal::scalar_real_ref_op<Scalar>, Derived>,
+ Derived&
+ >::type NonConstRealReturnType;
+typedef CwiseUnaryOp<internal::scalar_imag_op<Scalar>, const Derived> ImagReturnType;
+typedef CwiseUnaryView<internal::scalar_imag_ref_op<Scalar>, Derived> NonConstImagReturnType;
+typedef CwiseUnaryOp<internal::scalar_opposite_op<Scalar>, const Derived> NegativeReturnType;
+#endif
+EIGEN_DOC_UNARY_ADDONS(operator-,opposite)
+EIGEN_DEVICE_FUNC
+inline const NegativeReturnType
+operator-() const { return NegativeReturnType(derived()); }
+template<class NewType> struct CastXpr { typedef typename internal::cast_return_type<Derived,const CwiseUnaryOp<internal::scalar_cast_op<Scalar, NewType>, const Derived> >::type Type; };
+EIGEN_DOC_UNARY_ADDONS(cast,conversion function)
+template<typename NewType>
+EIGEN_DEVICE_FUNC
+typename CastXpr<NewType>::Type
+cast() const
+{
+ return typename CastXpr<NewType>::Type(derived());
+}
+EIGEN_DOC_UNARY_ADDONS(conjugate,complex conjugate)
+EIGEN_DEVICE_FUNC
+inline ConjugateReturnType
+conjugate() const
+{
+ return ConjugateReturnType(derived());
+}
+EIGEN_DOC_UNARY_ADDONS(real,real part function)
+EIGEN_DEVICE_FUNC
+inline RealReturnType
+real() const { return RealReturnType(derived()); }
+EIGEN_DOC_UNARY_ADDONS(imag,imaginary part function)
+EIGEN_DEVICE_FUNC
+inline const ImagReturnType
+imag() const { return ImagReturnType(derived()); }
+EIGEN_DOC_UNARY_ADDONS(unaryExpr,unary function)
+template<typename CustomUnaryOp>
+EIGEN_DEVICE_FUNC
+inline const CwiseUnaryOp<CustomUnaryOp, const Derived>
+unaryExpr(const CustomUnaryOp& func = CustomUnaryOp()) const
+{
+ return CwiseUnaryOp<CustomUnaryOp, const Derived>(derived(), func);
+}
+EIGEN_DOC_UNARY_ADDONS(unaryViewExpr,unary function)
+template<typename CustomViewOp>
+EIGEN_DEVICE_FUNC
+inline const CwiseUnaryView<CustomViewOp, const Derived>
+unaryViewExpr(const CustomViewOp& func = CustomViewOp()) const
+{
+ return CwiseUnaryView<CustomViewOp, const Derived>(derived(), func);
+}
+EIGEN_DOC_UNARY_ADDONS(real,real part function)
+EIGEN_DEVICE_FUNC
+inline NonConstRealReturnType
+real() { return NonConstRealReturnType(derived()); }
+EIGEN_DOC_UNARY_ADDONS(imag,imaginary part function)
+EIGEN_DEVICE_FUNC
+inline NonConstImagReturnType
+imag() { return NonConstImagReturnType(derived()); }
+// end # include "../plugins/CommonCwiseUnaryOps.h"
+// # include "../plugins/MatrixCwiseUnaryOps.h"
+typedef CwiseUnaryOp<internal::scalar_abs_op<Scalar>, const Derived> CwiseAbsReturnType;
+typedef CwiseUnaryOp<internal::scalar_abs2_op<Scalar>, const Derived> CwiseAbs2ReturnType;
+typedef CwiseUnaryOp<internal::scalar_sqrt_op<Scalar>, const Derived> CwiseSqrtReturnType;
+typedef CwiseUnaryOp<internal::scalar_sign_op<Scalar>, const Derived> CwiseSignReturnType;
+typedef CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const Derived> CwiseInverseReturnType;
+EIGEN_DOC_UNARY_ADDONS(cwiseAbs,absolute value)
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseAbsReturnType
+cwiseAbs() const { return CwiseAbsReturnType(derived()); }
+EIGEN_DOC_UNARY_ADDONS(cwiseAbs2,squared absolute value)
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseAbs2ReturnType
+cwiseAbs2() const { return CwiseAbs2ReturnType(derived()); }
+EIGEN_DOC_UNARY_ADDONS(cwiseSqrt,square-root)
+EIGEN_DEVICE_FUNC
+inline const CwiseSqrtReturnType
+cwiseSqrt() const { return CwiseSqrtReturnType(derived()); }
+EIGEN_DOC_UNARY_ADDONS(cwiseSign,sign function)
+EIGEN_DEVICE_FUNC
+inline const CwiseSignReturnType
+cwiseSign() const { return CwiseSignReturnType(derived()); }
+EIGEN_DOC_UNARY_ADDONS(cwiseInverse,inverse)
+EIGEN_DEVICE_FUNC
+inline const CwiseInverseReturnType
+cwiseInverse() const { return CwiseInverseReturnType(derived()); }
+// end # include "../plugins/MatrixCwiseUnaryOps.h"
+// # include "../plugins/ArrayCwiseUnaryOps.h"
+typedef CwiseUnaryOp<internal::scalar_abs_op<Scalar>, const Derived> AbsReturnType;
+typedef CwiseUnaryOp<internal::scalar_arg_op<Scalar>, const Derived> ArgReturnType;
+typedef CwiseUnaryOp<internal::scalar_abs2_op<Scalar>, const Derived> Abs2ReturnType;
+typedef CwiseUnaryOp<internal::scalar_sqrt_op<Scalar>, const Derived> SqrtReturnType;
+typedef CwiseUnaryOp<internal::scalar_rsqrt_op<Scalar>, const Derived> RsqrtReturnType;
+typedef CwiseUnaryOp<internal::scalar_sign_op<Scalar>, const Derived> SignReturnType;
+typedef CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const Derived> InverseReturnType;
+typedef CwiseUnaryOp<internal::scalar_boolean_not_op<Scalar>, const Derived> BooleanNotReturnType;
+typedef CwiseUnaryOp<internal::scalar_exp_op<Scalar>, const Derived> ExpReturnType;
+typedef CwiseUnaryOp<internal::scalar_log_op<Scalar>, const Derived> LogReturnType;
+typedef CwiseUnaryOp<internal::scalar_log1p_op<Scalar>, const Derived> Log1pReturnType;
+typedef CwiseUnaryOp<internal::scalar_log10_op<Scalar>, const Derived> Log10ReturnType;
+typedef CwiseUnaryOp<internal::scalar_cos_op<Scalar>, const Derived> CosReturnType;
+typedef CwiseUnaryOp<internal::scalar_sin_op<Scalar>, const Derived> SinReturnType;
+typedef CwiseUnaryOp<internal::scalar_tan_op<Scalar>, const Derived> TanReturnType;
+typedef CwiseUnaryOp<internal::scalar_acos_op<Scalar>, const Derived> AcosReturnType;
+typedef CwiseUnaryOp<internal::scalar_asin_op<Scalar>, const Derived> AsinReturnType;
+typedef CwiseUnaryOp<internal::scalar_atan_op<Scalar>, const Derived> AtanReturnType;
+typedef CwiseUnaryOp<internal::scalar_tanh_op<Scalar>, const Derived> TanhReturnType;
+typedef CwiseUnaryOp<internal::scalar_logistic_op<Scalar>, const Derived> LogisticReturnType;
+typedef CwiseUnaryOp<internal::scalar_sinh_op<Scalar>, const Derived> SinhReturnType;
+typedef CwiseUnaryOp<internal::scalar_cosh_op<Scalar>, const Derived> CoshReturnType;
+typedef CwiseUnaryOp<internal::scalar_square_op<Scalar>, const Derived> SquareReturnType;
+typedef CwiseUnaryOp<internal::scalar_cube_op<Scalar>, const Derived> CubeReturnType;
+typedef CwiseUnaryOp<internal::scalar_round_op<Scalar>, const Derived> RoundReturnType;
+typedef CwiseUnaryOp<internal::scalar_floor_op<Scalar>, const Derived> FloorReturnType;
+typedef CwiseUnaryOp<internal::scalar_ceil_op<Scalar>, const Derived> CeilReturnType;
+typedef CwiseUnaryOp<internal::scalar_isnan_op<Scalar>, const Derived> IsNaNReturnType;
+typedef CwiseUnaryOp<internal::scalar_isinf_op<Scalar>, const Derived> IsInfReturnType;
+typedef CwiseUnaryOp<internal::scalar_isfinite_op<Scalar>, const Derived> IsFiniteReturnType;
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const AbsReturnType
+abs() const
+{
+ return AbsReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const ArgReturnType
+arg() const
+{
+ return ArgReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const Abs2ReturnType
+abs2() const
+{
+ return Abs2ReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const ExpReturnType
+exp() const
+{
+ return ExpReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const LogReturnType
+log() const
+{
+ return LogReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const Log1pReturnType
+log1p() const
+{
+ return Log1pReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const Log10ReturnType
+log10() const
+{
+ return Log10ReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const SqrtReturnType
+sqrt() const
+{
+ return SqrtReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const RsqrtReturnType
+rsqrt() const
+{
+ return RsqrtReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const SignReturnType
+sign() const
+{
+ return SignReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const CosReturnType
+cos() const
+{
+ return CosReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const SinReturnType
+sin() const
+{
+ return SinReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const TanReturnType
+tan() const
+{
+ return TanReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const AtanReturnType
+atan() const
+{
+ return AtanReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const AcosReturnType
+acos() const
+{
+ return AcosReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const AsinReturnType
+asin() const
+{
+ return AsinReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const TanhReturnType
+tanh() const
+{
+ return TanhReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const SinhReturnType
+sinh() const
+{
+ return SinhReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const CoshReturnType
+cosh() const
+{
+ return CoshReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const LogisticReturnType
+logistic() const
+{
+ return LogisticReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const InverseReturnType
+inverse() const
+{
+ return InverseReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const SquareReturnType
+square() const
+{
+ return SquareReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const CubeReturnType
+cube() const
+{
+ return CubeReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const RoundReturnType
+round() const
+{
+ return RoundReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const FloorReturnType
+floor() const
+{
+ return FloorReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const CeilReturnType
+ceil() const
+{
+ return CeilReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const IsNaNReturnType
+isNaN() const
+{
+ return IsNaNReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const IsInfReturnType
+isInf() const
+{
+ return IsInfReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const IsFiniteReturnType
+isFinite() const
+{
+ return IsFiniteReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const BooleanNotReturnType
+operator!() const
+{
+ EIGEN_STATIC_ASSERT((internal::is_same<bool,Scalar>::value),
+ THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL);
+ return BooleanNotReturnType(derived());
+}
+typedef CwiseUnaryOp<internal::scalar_lgamma_op<Scalar>, const Derived> LgammaReturnType;
+typedef CwiseUnaryOp<internal::scalar_digamma_op<Scalar>, const Derived> DigammaReturnType;
+typedef CwiseUnaryOp<internal::scalar_erf_op<Scalar>, const Derived> ErfReturnType;
+typedef CwiseUnaryOp<internal::scalar_erfc_op<Scalar>, const Derived> ErfcReturnType;
+EIGEN_DEVICE_FUNC
+inline const LgammaReturnType
+lgamma() const
+{
+ return LgammaReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const DigammaReturnType
+digamma() const
+{
+ return DigammaReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const ErfReturnType
+erf() const
+{
+ return ErfReturnType(derived());
+}
+EIGEN_DEVICE_FUNC
+inline const ErfcReturnType
+erfc() const
+{
+ return ErfcReturnType(derived());
+}
+// end # include "../plugins/ArrayCwiseUnaryOps.h"
+// # include "../plugins/CommonCwiseBinaryOps.h"
+EIGEN_MAKE_CWISE_BINARY_OP(operator-,difference)
+EIGEN_MAKE_CWISE_BINARY_OP(operator+,sum)
+template<typename CustomBinaryOp, typename OtherDerived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseBinaryOp<CustomBinaryOp, const Derived, const OtherDerived>
+binaryExpr(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other, const CustomBinaryOp& func = CustomBinaryOp()) const
+{
+ return CwiseBinaryOp<CustomBinaryOp, const Derived, const OtherDerived>(derived(), other.derived(), func);
+}
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+EIGEN_MAKE_SCALAR_BINARY_OP(operator*,product)
+#else
+template<typename T>
+const CwiseBinaryOp<internal::scalar_product_op<Scalar,T>,Derived,Constant<T> > operator*(const T& scalar) const;
+template<typename T> friend
+const CwiseBinaryOp<internal::scalar_product_op<T,Scalar>,Constant<T>,Derived> operator*(const T& scalar, const StorageBaseType& expr);
+#endif
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(operator/,quotient)
+#else
+template<typename T>
+const CwiseBinaryOp<internal::scalar_quotient_op<Scalar,T>,Derived,Constant<T> > operator/(const T& scalar) const;
+#endif
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+inline const CwiseBinaryOp<internal::scalar_boolean_and_op, const Derived, const OtherDerived>
+operator&&(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+ EIGEN_STATIC_ASSERT((internal::is_same<bool,Scalar>::value && internal::is_same<bool,typename OtherDerived::Scalar>::value),
+ THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL);
+ return CwiseBinaryOp<internal::scalar_boolean_and_op, const Derived, const OtherDerived>(derived(),other.derived());
+}
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+inline const CwiseBinaryOp<internal::scalar_boolean_or_op, const Derived, const OtherDerived>
+operator||(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+ EIGEN_STATIC_ASSERT((internal::is_same<bool,Scalar>::value && internal::is_same<bool,typename OtherDerived::Scalar>::value),
+ THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL);
+ return CwiseBinaryOp<internal::scalar_boolean_or_op, const Derived, const OtherDerived>(derived(),other.derived());
+}
+// end # include "../plugins/CommonCwiseBinaryOps.h"
+// # include "../plugins/MatrixCwiseBinaryOps.h"
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,product)
+cwiseProduct(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+ return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,product)(derived(), other.derived());
+}
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+inline const CwiseBinaryOp<std::equal_to<Scalar>, const Derived, const OtherDerived>
+cwiseEqual(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+ return CwiseBinaryOp<std::equal_to<Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
+}
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+inline const CwiseBinaryOp<std::not_equal_to<Scalar>, const Derived, const OtherDerived>
+cwiseNotEqual(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+ return CwiseBinaryOp<std::not_equal_to<Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
+}
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_min_op<Scalar,Scalar>, const Derived, const OtherDerived>
+cwiseMin(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+ return CwiseBinaryOp<internal::scalar_min_op<Scalar,Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
+}
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_min_op<Scalar,Scalar>, const Derived, const ConstantReturnType>
+cwiseMin(const Scalar &other) const
+{
+ return cwiseMin(Derived::Constant(rows(), cols(), other));
+}
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_max_op<Scalar,Scalar>, const Derived, const OtherDerived>
+cwiseMax(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+ return CwiseBinaryOp<internal::scalar_max_op<Scalar,Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
+}
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_max_op<Scalar,Scalar>, const Derived, const ConstantReturnType>
+cwiseMax(const Scalar &other) const
+{
+ return cwiseMax(Derived::Constant(rows(), cols(), other));
+}
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_quotient_op<Scalar>, const Derived, const OtherDerived>
+cwiseQuotient(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+ return CwiseBinaryOp<internal::scalar_quotient_op<Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
+}
+typedef CwiseBinaryOp<internal::scalar_cmp_op<Scalar,Scalar,internal::cmp_EQ>, const Derived, const ConstantReturnType> CwiseScalarEqualReturnType;
+EIGEN_DEVICE_FUNC
+inline const CwiseScalarEqualReturnType
+cwiseEqual(const Scalar& s) const
+{
+ return CwiseScalarEqualReturnType(derived(), Derived::Constant(rows(), cols(), s), internal::scalar_cmp_op<Scalar,Scalar,internal::cmp_EQ>());
+}
+// end # include "../plugins/MatrixCwiseBinaryOps.h"
+// # include "../plugins/ArrayCwiseBinaryOps.h"
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,product)
+operator*(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+ return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,product)(derived(), other.derived());
+}
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_quotient_op<Scalar,typename OtherDerived::Scalar>, const Derived, const OtherDerived>
+operator/(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+ return CwiseBinaryOp<internal::scalar_quotient_op<Scalar,typename OtherDerived::Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
+}
+EIGEN_MAKE_CWISE_BINARY_OP(min,min)
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_min_op<Scalar,Scalar>, const Derived,
+ const CwiseNullaryOp<internal::scalar_constant_op<Scalar>, PlainObject> >
+#ifdef EIGEN_PARSED_BY_DOXYGEN
+min
+#else
+(min)
+#endif
+(const Scalar &other) const
+{
+ return (min)(Derived::PlainObject::Constant(rows(), cols(), other));
+}
+EIGEN_MAKE_CWISE_BINARY_OP(max,max)
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_max_op<Scalar,Scalar>, const Derived,
+ const CwiseNullaryOp<internal::scalar_constant_op<Scalar>, PlainObject> >
+#ifdef EIGEN_PARSED_BY_DOXYGEN
+max
+#else
+(max)
+#endif
+(const Scalar &other) const
+{
+ return (max)(Derived::PlainObject::Constant(rows(), cols(), other));
+}
+EIGEN_MAKE_CWISE_BINARY_OP(pow,pow)
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(pow,pow)
+#else
+template<typename T>
+const CwiseBinaryOp<internal::scalar_pow_op<Scalar,T>,Derived,Constant<T> > pow(const T& exponent) const;
+#endif
+#define EIGEN_MAKE_CWISE_COMP_OP(OP, COMPARATOR) \
+template<typename OtherDerived> \
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_cmp_op<Scalar, typename OtherDerived::Scalar, internal::cmp_ ## COMPARATOR>, const Derived, const OtherDerived> \
+OP(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const \
+{ \
+ return CwiseBinaryOp<internal::scalar_cmp_op<Scalar, typename OtherDerived::Scalar, internal::cmp_ ## COMPARATOR>, const Derived, const OtherDerived>(derived(), other.derived()); \
+}\
+typedef CwiseBinaryOp<internal::scalar_cmp_op<Scalar,Scalar, internal::cmp_ ## COMPARATOR>, const Derived, const CwiseNullaryOp<internal::scalar_constant_op<Scalar>, PlainObject> > Cmp ## COMPARATOR ## ReturnType; \
+typedef CwiseBinaryOp<internal::scalar_cmp_op<Scalar,Scalar, internal::cmp_ ## COMPARATOR>, const CwiseNullaryOp<internal::scalar_constant_op<Scalar>, PlainObject>, const Derived > RCmp ## COMPARATOR ## ReturnType; \
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Cmp ## COMPARATOR ## ReturnType \
+OP(const Scalar& s) const { \
+ return this->OP(Derived::PlainObject::Constant(rows(), cols(), s)); \
+} \
+EIGEN_DEVICE_FUNC friend EIGEN_STRONG_INLINE const RCmp ## COMPARATOR ## ReturnType \
+OP(const Scalar& s, const Derived& d) { \
+ return Derived::PlainObject::Constant(d.rows(), d.cols(), s).OP(d); \
+}
+#define EIGEN_MAKE_CWISE_COMP_R_OP(OP, R_OP, RCOMPARATOR) \
+template<typename OtherDerived> \
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_cmp_op<typename OtherDerived::Scalar, Scalar, internal::cmp_##RCOMPARATOR>, const OtherDerived, const Derived> \
+OP(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const \
+{ \
+ return CwiseBinaryOp<internal::scalar_cmp_op<typename OtherDerived::Scalar, Scalar, internal::cmp_##RCOMPARATOR>, const OtherDerived, const Derived>(other.derived(), derived()); \
+} \
+EIGEN_DEVICE_FUNC \
+inline const RCmp ## RCOMPARATOR ## ReturnType \
+OP(const Scalar& s) const { \
+ return Derived::PlainObject::Constant(rows(), cols(), s).R_OP(*this); \
+} \
+friend inline const Cmp ## RCOMPARATOR ## ReturnType \
+OP(const Scalar& s, const Derived& d) { \
+ return d.R_OP(Derived::PlainObject::Constant(d.rows(), d.cols(), s)); \
+}
+EIGEN_MAKE_CWISE_COMP_OP(operator<, LT)
+EIGEN_MAKE_CWISE_COMP_OP(operator<=, LE)
+EIGEN_MAKE_CWISE_COMP_R_OP(operator>, operator<, LT)
+EIGEN_MAKE_CWISE_COMP_R_OP(operator>=, operator<=, LE)
+EIGEN_MAKE_CWISE_COMP_OP(operator==, EQ)
+EIGEN_MAKE_CWISE_COMP_OP(operator!=, NEQ)
+#undef EIGEN_MAKE_CWISE_COMP_OP
+#undef EIGEN_MAKE_CWISE_COMP_R_OP
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+EIGEN_MAKE_SCALAR_BINARY_OP(operator+,sum)
+#else
+template<typename T>
+const CwiseBinaryOp<internal::scalar_sum_op<Scalar,T>,Derived,Constant<T> > operator+(const T& scalar) const;
+template<typename T> friend
+const CwiseBinaryOp<internal::scalar_sum_op<T,Scalar>,Constant<T>,Derived> operator+(const T& scalar, const StorageBaseType& expr);
+#endif
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+EIGEN_MAKE_SCALAR_BINARY_OP(operator-,difference)
+#else
+template<typename T>
+const CwiseBinaryOp<internal::scalar_difference_op<Scalar,T>,Derived,Constant<T> > operator-(const T& scalar) const;
+template<typename T> friend
+const CwiseBinaryOp<internal::scalar_difference_op<T,Scalar>,Constant<T>,Derived> operator-(const T& scalar, const StorageBaseType& expr);
+#endif
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+ EIGEN_MAKE_SCALAR_BINARY_OP_ONTHELEFT(operator/,quotient)
+#else
+ template<typename T> friend
+ inline const CwiseBinaryOp<internal::scalar_quotient_op<T,Scalar>,Constant<T>,Derived>
+ operator/(const T& s,const StorageBaseType& a);
+#endif
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+inline const CwiseBinaryOp<internal::scalar_boolean_xor_op, const Derived, const OtherDerived>
+operator^(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+ EIGEN_STATIC_ASSERT((internal::is_same<bool,Scalar>::value && internal::is_same<bool,typename OtherDerived::Scalar>::value),
+ THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL);
+ return CwiseBinaryOp<internal::scalar_boolean_xor_op, const Derived, const OtherDerived>(derived(),other.derived());
+}
+#if 0
+template<typename DerivedN>
+inline const CwiseBinaryOp<internal::scalar_polygamma_op<Scalar>, const DerivedN, const Derived>
+polygamma(const EIGEN_CURRENT_STORAGE_BASE_CLASS<DerivedN> &n) const
+{
+ return CwiseBinaryOp<internal::scalar_polygamma_op<Scalar>, const DerivedN, const Derived>(n.derived(), this->derived());
+}
+#endif
+template<typename DerivedQ>
+inline const CwiseBinaryOp<internal::scalar_zeta_op<Scalar>, const Derived, const DerivedQ>
+zeta(const EIGEN_CURRENT_STORAGE_BASE_CLASS<DerivedQ> &q) const
+{
+ return CwiseBinaryOp<internal::scalar_zeta_op<Scalar>, const Derived, const DerivedQ>(this->derived(), q.derived());
+}
+// end # include "../plugins/ArrayCwiseBinaryOps.h"
+# ifdef EIGEN_ARRAYBASE_PLUGIN
+# include EIGEN_ARRAYBASE_PLUGIN
+# endif
+#undef EIGEN_CURRENT_STORAGE_BASE_CLASS
+#undef EIGEN_DOC_UNARY_ADDONS
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Derived& operator=(const ArrayBase& other)
+ {
+ internal::call_assignment(derived(), other.derived());
+ return derived();
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Derived& operator=(const Scalar &value)
+ { Base::setConstant(value); return derived(); }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Derived& operator+=(const Scalar& scalar);
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Derived& operator-=(const Scalar& scalar);
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Derived& operator+=(const ArrayBase<OtherDerived>& other);
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Derived& operator-=(const ArrayBase<OtherDerived>& other);
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Derived& operator*=(const ArrayBase<OtherDerived>& other);
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Derived& operator/=(const ArrayBase<OtherDerived>& other);
+ public:
+ EIGEN_DEVICE_FUNC
+ ArrayBase<Derived>& array() { return *this; }
+ EIGEN_DEVICE_FUNC
+ const ArrayBase<Derived>& array() const { return *this; }
+ protected:
+ EIGEN_DEVICE_FUNC
+ ArrayBase() : Base() {}
+ private:
+ explicit ArrayBase(Index);
+ ArrayBase(Index,Index);
+ template<typename OtherDerived> explicit ArrayBase(const ArrayBase<OtherDerived>&);
+ protected:
+ template<typename OtherDerived> Derived& operator+=(const MatrixBase<OtherDerived>& )
+ {EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}
+ template<typename OtherDerived> Derived& operator-=(const MatrixBase<OtherDerived>& )
+ {EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}
+};
+template<typename Derived>
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
+ArrayBase<Derived>::operator-=(const ArrayBase<OtherDerived> &other)
+{
+ call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());
+ return derived();
+}
+template<typename Derived>
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
+ArrayBase<Derived>::operator+=(const ArrayBase<OtherDerived>& other)
+{
+ call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
+ return derived();
+}
+template<typename Derived>
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
+ArrayBase<Derived>::operator*=(const ArrayBase<OtherDerived>& other)
+{
+ call_assignment(derived(), other.derived(), internal::mul_assign_op<Scalar,typename OtherDerived::Scalar>());
+ return derived();
+}
+template<typename Derived>
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
+ArrayBase<Derived>::operator/=(const ArrayBase<OtherDerived>& other)
+{
+ call_assignment(derived(), other.derived(), internal::div_assign_op<Scalar,typename OtherDerived::Scalar>());
+ return derived();
+}
+}
+#endif
+// end #include "src/Core/ArrayBase.h"
+// #include "src/Core/util/BlasUtil.h"
+#ifndef EIGEN_BLASUTIL_H
+#define EIGEN_BLASUTIL_H
+namespace Eigen {
+namespace internal {
+template<typename LhsScalar, typename RhsScalar, typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs=false, bool ConjugateRhs=false>
+struct gebp_kernel;
+template<typename Scalar, typename Index, typename DataMapper, int nr, int StorageOrder, bool Conjugate = false, bool PanelMode=false>
+struct gemm_pack_rhs;
+template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, int StorageOrder, bool Conjugate = false, bool PanelMode = false>
+struct gemm_pack_lhs;
+template<
+ typename Index,
+ typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
+ typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,
+ int ResStorageOrder>
+struct general_matrix_matrix_product;
+template<typename Index,
+ typename LhsScalar, typename LhsMapper, int LhsStorageOrder, bool ConjugateLhs,
+ typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version=Specialized>
+struct general_matrix_vector_product;
+template<bool Conjugate> struct conj_if;
+template<> struct conj_if<true> {
+ template<typename T>
+ inline T operator()(const T& x) const { return numext::conj(x); }
+ template<typename T>
+ inline T pconj(const T& x) const { return internal::pconj(x); }
+};
+template<> struct conj_if<false> {
+ template<typename T>
+ inline const T& operator()(const T& x) const { return x; }
+ template<typename T>
+ inline const T& pconj(const T& x) const { return x; }
+};
+template<typename LhsScalar, typename RhsScalar, bool ConjLhs, bool ConjRhs>
+struct conj_helper
+{
+ typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar>::ReturnType Scalar;
+ EIGEN_STRONG_INLINE Scalar pmadd(const LhsScalar& x, const RhsScalar& y, const Scalar& c) const
+ { return padd(c, pmul(x,y)); }
+ EIGEN_STRONG_INLINE Scalar pmul(const LhsScalar& x, const RhsScalar& y) const
+ { return conj_if<ConjLhs>()(x) * conj_if<ConjRhs>()(y); }
+};
+template<typename Scalar> struct conj_helper<Scalar,Scalar,false,false>
+{
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const { return internal::pmadd(x,y,c); }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const { return internal::pmul(x,y); }
+};
+template<typename RealScalar> struct conj_helper<std::complex<RealScalar>, std::complex<RealScalar>, false,true>
+{
+ typedef std::complex<RealScalar> Scalar;
+ EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const
+ { return c + pmul(x,y); }
+ EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const
+ { return Scalar(numext::real(x)*numext::real(y) + numext::imag(x)*numext::imag(y), numext::imag(x)*numext::real(y) - numext::real(x)*numext::imag(y)); }
+};
+template<typename RealScalar> struct conj_helper<std::complex<RealScalar>, std::complex<RealScalar>, true,false>
+{
+ typedef std::complex<RealScalar> Scalar;
+ EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const
+ { return c + pmul(x,y); }
+ EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const
+ { return Scalar(numext::real(x)*numext::real(y) + numext::imag(x)*numext::imag(y), numext::real(x)*numext::imag(y) - numext::imag(x)*numext::real(y)); }
+};
+template<typename RealScalar> struct conj_helper<std::complex<RealScalar>, std::complex<RealScalar>, true,true>
+{
+ typedef std::complex<RealScalar> Scalar;
+ EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const
+ { return c + pmul(x,y); }
+ EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const
+ { return Scalar(numext::real(x)*numext::real(y) - numext::imag(x)*numext::imag(y), - numext::real(x)*numext::imag(y) - numext::imag(x)*numext::real(y)); }
+};
+template<typename RealScalar,bool Conj> struct conj_helper<std::complex<RealScalar>, RealScalar, Conj,false>
+{
+ typedef std::complex<RealScalar> Scalar;
+ EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const RealScalar& y, const Scalar& c) const
+ { return padd(c, pmul(x,y)); }
+ EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const RealScalar& y) const
+ { return conj_if<Conj>()(x)*y; }
+};
+template<typename RealScalar,bool Conj> struct conj_helper<RealScalar, std::complex<RealScalar>, false,Conj>
+{
+ typedef std::complex<RealScalar> Scalar;
+ EIGEN_STRONG_INLINE Scalar pmadd(const RealScalar& x, const Scalar& y, const Scalar& c) const
+ { return padd(c, pmul(x,y)); }
+ EIGEN_STRONG_INLINE Scalar pmul(const RealScalar& x, const Scalar& y) const
+ { return x*conj_if<Conj>()(y); }
+};
+template<typename From,typename To> struct get_factor {
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE To run(const From& x) { return To(x); }
+};
+template<typename Scalar> struct get_factor<Scalar,typename NumTraits<Scalar>::Real> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE typename NumTraits<Scalar>::Real run(const Scalar& x) { return numext::real(x); }
+};
+template<typename Scalar, typename Index>
+class BlasVectorMapper {
+ public:
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasVectorMapper(Scalar *data) : m_data(data) {}
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i) const {
+ return m_data[i];
+ }
+ template <typename Packet, int AlignmentType>
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet load(Index i) const {
+ return ploadt<Packet, AlignmentType>(m_data + i);
+ }
+ template <typename Packet>
+ EIGEN_DEVICE_FUNC bool aligned(Index i) const {
+ return (UIntPtr(m_data+i)%sizeof(Packet))==0;
+ }
+ protected:
+ Scalar* m_data;
+};
+template<typename Scalar, typename Index, int AlignmentType>
+class BlasLinearMapper {
+ public:
+ typedef typename packet_traits<Scalar>::type Packet;
+ typedef typename packet_traits<Scalar>::half HalfPacket;
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasLinearMapper(Scalar *data) : m_data(data) {}
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void prefetch(int i) const {
+ internal::prefetch(&operator()(i));
+ }
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar& operator()(Index i) const {
+ return m_data[i];
+ }
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const {
+ return ploadt<Packet, AlignmentType>(m_data + i);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i) const {
+ return ploadt<HalfPacket, AlignmentType>(m_data + i);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, const Packet &p) const {
+ pstoret<Scalar, Packet, AlignmentType>(m_data + i, p);
+ }
+ protected:
+ Scalar *m_data;
+};
+template<typename Scalar, typename Index, int StorageOrder, int AlignmentType = Unaligned>
+class blas_data_mapper {
+ public:
+ typedef typename packet_traits<Scalar>::type Packet;
+ typedef typename packet_traits<Scalar>::half HalfPacket;
+ typedef BlasLinearMapper<Scalar, Index, AlignmentType> LinearMapper;
+ typedef BlasVectorMapper<Scalar, Index> VectorMapper;
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper(Scalar* data, Index stride) : m_data(data), m_stride(stride) {}
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType>
+ getSubMapper(Index i, Index j) const {
+ return blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType>(&operator()(i, j), m_stride);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const {
+ return LinearMapper(&operator()(i, j));
+ }
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE VectorMapper getVectorMapper(Index i, Index j) const {
+ return VectorMapper(&operator()(i, j));
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_ALWAYS_INLINE Scalar& operator()(Index i, Index j) const {
+ return m_data[StorageOrder==RowMajor ? j + i*m_stride : i + j*m_stride];
+ }
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, Index j) const {
+ return ploadt<Packet, AlignmentType>(&operator()(i, j));
+ }
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i, Index j) const {
+ return ploadt<HalfPacket, AlignmentType>(&operator()(i, j));
+ }
+ template<typename SubPacket>
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, const SubPacket &p) const {
+ pscatter<Scalar, SubPacket>(&operator()(i, j), p, m_stride);
+ }
+ template<typename SubPacket>
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE SubPacket gatherPacket(Index i, Index j) const {
+ return pgather<Scalar, SubPacket>(&operator()(i, j), m_stride);
+ }
+ EIGEN_DEVICE_FUNC const Index stride() const { return m_stride; }
+ EIGEN_DEVICE_FUNC const Scalar* data() const { return m_data; }
+ EIGEN_DEVICE_FUNC Index firstAligned(Index size) const {
+ if (UIntPtr(m_data)%sizeof(Scalar)) {
+ return -1;
+ }
+ return internal::first_default_aligned(m_data, size);
+ }
+ protected:
+ Scalar* EIGEN_RESTRICT m_data;
+ const Index m_stride;
+};
+template<typename Scalar, typename Index, int StorageOrder>
+class const_blas_data_mapper : public blas_data_mapper<const Scalar, Index, StorageOrder> {
+ public:
+ EIGEN_ALWAYS_INLINE const_blas_data_mapper(const Scalar *data, Index stride) : blas_data_mapper<const Scalar, Index, StorageOrder>(data, stride) {}
+ EIGEN_ALWAYS_INLINE const_blas_data_mapper<Scalar, Index, StorageOrder> getSubMapper(Index i, Index j) const {
+ return const_blas_data_mapper<Scalar, Index, StorageOrder>(&(this->operator()(i, j)), this->m_stride);
+ }
+};
+template<typename XprType> struct blas_traits
+{
+ typedef typename traits<XprType>::Scalar Scalar;
+ typedef const XprType& ExtractType;
+ typedef XprType _ExtractType;
+ enum {
+ IsComplex = NumTraits<Scalar>::IsComplex,
+ IsTransposed = false,
+ NeedToConjugate = false,
+ HasUsableDirectAccess = ( (int(XprType::Flags)&DirectAccessBit)
+ && ( bool(XprType::IsVectorAtCompileTime)
+ || int(inner_stride_at_compile_time<XprType>::ret) == 1)
+ ) ? 1 : 0
+ };
+ typedef typename conditional<bool(HasUsableDirectAccess),
+ ExtractType,
+ typename _ExtractType::PlainObject
+ >::type DirectLinearAccessType;
+ static inline ExtractType extract(const XprType& x) { return x; }
+ static inline const Scalar extractScalarFactor(const XprType&) { return Scalar(1); }
+};
+template<typename Scalar, typename NestedXpr>
+struct blas_traits<CwiseUnaryOp<scalar_conjugate_op<Scalar>, NestedXpr> >
+ : blas_traits<NestedXpr>
+{
+ typedef blas_traits<NestedXpr> Base;
+ typedef CwiseUnaryOp<scalar_conjugate_op<Scalar>, NestedXpr> XprType;
+ typedef typename Base::ExtractType ExtractType;
+ enum {
+ IsComplex = NumTraits<Scalar>::IsComplex,
+ NeedToConjugate = Base::NeedToConjugate ? 0 : IsComplex
+ };
+ static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
+ static inline Scalar extractScalarFactor(const XprType& x) { return conj(Base::extractScalarFactor(x.nestedExpression())); }
+};
+template<typename Scalar, typename NestedXpr, typename Plain>
+struct blas_traits<CwiseBinaryOp<scalar_product_op<Scalar>, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain>, NestedXpr> >
+ : blas_traits<NestedXpr>
+{
+ typedef blas_traits<NestedXpr> Base;
+ typedef CwiseBinaryOp<scalar_product_op<Scalar>, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain>, NestedXpr> XprType;
+ typedef typename Base::ExtractType ExtractType;
+ static inline ExtractType extract(const XprType& x) { return Base::extract(x.rhs()); }
+ static inline Scalar extractScalarFactor(const XprType& x)
+ { return x.lhs().functor().m_other * Base::extractScalarFactor(x.rhs()); }
+};
+template<typename Scalar, typename NestedXpr, typename Plain>
+struct blas_traits<CwiseBinaryOp<scalar_product_op<Scalar>, NestedXpr, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain> > >
+ : blas_traits<NestedXpr>
+{
+ typedef blas_traits<NestedXpr> Base;
+ typedef CwiseBinaryOp<scalar_product_op<Scalar>, NestedXpr, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain> > XprType;
+ typedef typename Base::ExtractType ExtractType;
+ static inline ExtractType extract(const XprType& x) { return Base::extract(x.lhs()); }
+ static inline Scalar extractScalarFactor(const XprType& x)
+ { return Base::extractScalarFactor(x.lhs()) * x.rhs().functor().m_other; }
+};
+template<typename Scalar, typename Plain1, typename Plain2>
+struct blas_traits<CwiseBinaryOp<scalar_product_op<Scalar>, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain1>,
+ const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain2> > >
+ : blas_traits<CwiseNullaryOp<scalar_constant_op<Scalar>,Plain1> >
+{};
+template<typename Scalar, typename NestedXpr>
+struct blas_traits<CwiseUnaryOp<scalar_opposite_op<Scalar>, NestedXpr> >
+ : blas_traits<NestedXpr>
+{
+ typedef blas_traits<NestedXpr> Base;
+ typedef CwiseUnaryOp<scalar_opposite_op<Scalar>, NestedXpr> XprType;
+ typedef typename Base::ExtractType ExtractType;
+ static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
+ static inline Scalar extractScalarFactor(const XprType& x)
+ { return - Base::extractScalarFactor(x.nestedExpression()); }
+};
+template<typename NestedXpr>
+struct blas_traits<Transpose<NestedXpr> >
+ : blas_traits<NestedXpr>
+{
+ typedef typename NestedXpr::Scalar Scalar;
+ typedef blas_traits<NestedXpr> Base;
+ typedef Transpose<NestedXpr> XprType;
+ typedef Transpose<const typename Base::_ExtractType> ExtractType;
+ typedef Transpose<const typename Base::_ExtractType> _ExtractType;
+ typedef typename conditional<bool(Base::HasUsableDirectAccess),
+ ExtractType,
+ typename ExtractType::PlainObject
+ >::type DirectLinearAccessType;
+ enum {
+ IsTransposed = Base::IsTransposed ? 0 : 1
+ };
+ static inline ExtractType extract(const XprType& x) { return ExtractType(Base::extract(x.nestedExpression())); }
+ static inline Scalar extractScalarFactor(const XprType& x) { return Base::extractScalarFactor(x.nestedExpression()); }
+};
+template<typename T>
+struct blas_traits<const T>
+ : blas_traits<T>
+{};
+template<typename T, bool HasUsableDirectAccess=blas_traits<T>::HasUsableDirectAccess>
+struct extract_data_selector {
+ static const typename T::Scalar* run(const T& m)
+ {
+ return blas_traits<T>::extract(m).data();
+ }
+};
+template<typename T>
+struct extract_data_selector<T,false> {
+ static typename T::Scalar* run(const T&) { return 0; }
+};
+template<typename T> const typename T::Scalar* extract_data(const T& m)
+{
+ return extract_data_selector<T>::run(m);
+}
+}
+}
+#endif
+// end #include "src/Core/util/BlasUtil.h"
+// #include "src/Core/DenseStorage.h"
+#ifndef EIGEN_MATRIXSTORAGE_H
+#define EIGEN_MATRIXSTORAGE_H
+#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+ #define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(X) X; EIGEN_DENSE_STORAGE_CTOR_PLUGIN;
+#else
+ #define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(X)
+#endif
+namespace Eigen {
+namespace internal {
+ template<typename T>
+ T generic_fast_tanh_float(const T& a_x)
+ {
+ // Clamp the inputs to the range [-9, 9] since anything outside
+ // this range is +/-1.0f in single-precision.
+ const T plus_9 = pset1<T>(9.f);
+ const T minus_9 = pset1<T>(-9.f);
+ const T x = pmax(pmin(a_x, plus_9), minus_9);
+ // The monomial coefficients of the numerator polynomial (odd).
+ const T alpha_1 = pset1<T>(4.89352455891786e-03f);
+ const T alpha_3 = pset1<T>(6.37261928875436e-04f);
+ const T alpha_5 = pset1<T>(1.48572235717979e-05f);
+ const T alpha_7 = pset1<T>(5.12229709037114e-08f);
+ const T alpha_9 = pset1<T>(-8.60467152213735e-11f);
+ const T alpha_11 = pset1<T>(2.00018790482477e-13f);
+ const T alpha_13 = pset1<T>(-2.76076847742355e-16f);
+
+ // The monomial coefficients of the denominator polynomial (even).
+ const T beta_0 = pset1<T>(4.89352518554385e-03f);
+ const T beta_2 = pset1<T>(2.26843463243900e-03f);
+ const T beta_4 = pset1<T>(1.18534705686654e-04f);
+ const T beta_6 = pset1<T>(1.19825839466702e-06f);
+
+ // Since the polynomials are odd/even, we need x^2.
+ const T x2 = pmul(x, x);
+
+ // Evaluate the numerator polynomial p.
+ T p = pmadd(x2, alpha_13, alpha_11);
+ p = pmadd(x2, p, alpha_9);
+ p = pmadd(x2, p, alpha_7);
+ p = pmadd(x2, p, alpha_5);
+ p = pmadd(x2, p, alpha_3);
+ p = pmadd(x2, p, alpha_1);
+ p = pmul(x, p);
+
+ // Evaluate the denominator polynomial p.
+ T q = pmadd(x2, beta_6, beta_4);
+ q = pmadd(x2, q, beta_2);
+ q = pmadd(x2, q, beta_0);
+
+ // Divide the numerator by the denominator.
+ return pdiv(p, q);
+ }
+
+struct constructor_without_unaligned_array_assert {};
+template<typename T, int Size>
+EIGEN_DEVICE_FUNC
+void check_static_allocation_size()
+{
+ #if EIGEN_STACK_ALLOCATION_LIMIT
+ EIGEN_STATIC_ASSERT(Size * sizeof(T) <= EIGEN_STACK_ALLOCATION_LIMIT, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG);
+ #endif
+}
+template <typename T, int Size, int MatrixOrArrayOptions,
+ int Alignment = (MatrixOrArrayOptions&DontAlign) ? 0
+ : compute_default_alignment<T,Size>::value >
+struct plain_array
+{
+ T array[Size];
+ EIGEN_DEVICE_FUNC
+ plain_array()
+ {
+ check_static_allocation_size<T,Size>();
+ }
+ EIGEN_DEVICE_FUNC
+ plain_array(constructor_without_unaligned_array_assert)
+ {
+ check_static_allocation_size<T,Size>();
+ }
+};
+#if defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT)
+ #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask)
+#elif EIGEN_GNUC_AT_LEAST(4,7)
+ template<typename PtrType>
+ EIGEN_ALWAYS_INLINE PtrType eigen_unaligned_array_assert_workaround_gcc47(PtrType array) { return array; }
+ #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \
+ eigen_assert((internal::UIntPtr(eigen_unaligned_array_assert_workaround_gcc47(array)) & (sizemask)) == 0 \
+ && "this assertion is explained here: " \
+ "http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \
+ " **** READ THIS WEB PAGE !!! ****");
+#else
+ #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \
+ eigen_assert((internal::UIntPtr(array) & (sizemask)) == 0 \
+ && "this assertion is explained here: " \
+ "http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \
+ " **** READ THIS WEB PAGE !!! ****");
+#endif
+template <typename T, int Size, int MatrixOrArrayOptions>
+struct plain_array<T, Size, MatrixOrArrayOptions, 8>
+{
+ EIGEN_ALIGN_TO_BOUNDARY(8) T array[Size];
+ EIGEN_DEVICE_FUNC
+ plain_array()
+ {
+ EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(7);
+ check_static_allocation_size<T,Size>();
+ }
+ EIGEN_DEVICE_FUNC
+ plain_array(constructor_without_unaligned_array_assert)
+ {
+ check_static_allocation_size<T,Size>();
+ }
+};
+template <typename T, int Size, int MatrixOrArrayOptions>
+struct plain_array<T, Size, MatrixOrArrayOptions, 16>
+{
+ EIGEN_ALIGN_TO_BOUNDARY(16) T array[Size];
+ EIGEN_DEVICE_FUNC
+ plain_array()
+ {
+ EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(15);
+ check_static_allocation_size<T,Size>();
+ }
+ EIGEN_DEVICE_FUNC
+ plain_array(constructor_without_unaligned_array_assert)
+ {
+ check_static_allocation_size<T,Size>();
+ }
+};
+template <typename T, int Size, int MatrixOrArrayOptions>
+struct plain_array<T, Size, MatrixOrArrayOptions, 32>
+{
+ EIGEN_ALIGN_TO_BOUNDARY(32) T array[Size];
+ EIGEN_DEVICE_FUNC
+ plain_array()
+ {
+ EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(31);
+ check_static_allocation_size<T,Size>();
+ }
+ EIGEN_DEVICE_FUNC
+ plain_array(constructor_without_unaligned_array_assert)
+ {
+ check_static_allocation_size<T,Size>();
+ }
+};
+template <typename T, int Size, int MatrixOrArrayOptions>
+struct plain_array<T, Size, MatrixOrArrayOptions, 64>
+{
+ EIGEN_ALIGN_TO_BOUNDARY(64) T array[Size];
+ EIGEN_DEVICE_FUNC
+ plain_array()
+ {
+ EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(63);
+ check_static_allocation_size<T,Size>();
+ }
+ EIGEN_DEVICE_FUNC
+ plain_array(constructor_without_unaligned_array_assert)
+ {
+ check_static_allocation_size<T,Size>();
+ }
+};
+template <typename T, int MatrixOrArrayOptions, int Alignment>
+struct plain_array<T, 0, MatrixOrArrayOptions, Alignment>
+{
+ T array[1];
+ EIGEN_DEVICE_FUNC plain_array() {}
+ EIGEN_DEVICE_FUNC plain_array(constructor_without_unaligned_array_assert) {}
+};
+}
+template<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseStorage;
+template<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseStorage
+{
+ internal::plain_array<T,Size,_Options> m_data;
+ public:
+ EIGEN_DEVICE_FUNC DenseStorage() {
+ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = Size)
+ }
+ EIGEN_DEVICE_FUNC
+ explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
+ : m_data(internal::constructor_without_unaligned_array_assert()) {}
+ EIGEN_DEVICE_FUNC
+ DenseStorage(const DenseStorage& other) : m_data(other.m_data) {
+ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = Size)
+ }
+ EIGEN_DEVICE_FUNC
+ DenseStorage& operator=(const DenseStorage& other)
+ {
+ if (this != &other) m_data = other.m_data;
+ return *this;
+ }
+ EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) {
+ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
+ eigen_internal_assert(size==rows*cols && rows==_Rows && cols==_Cols);
+ EIGEN_UNUSED_VARIABLE(size);
+ EIGEN_UNUSED_VARIABLE(rows);
+ EIGEN_UNUSED_VARIABLE(cols);
+ }
+ EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); }
+ EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;}
+ EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;}
+ EIGEN_DEVICE_FUNC void conservativeResize(Index,Index,Index) {}
+ EIGEN_DEVICE_FUNC void resize(Index,Index,Index) {}
+ EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
+ EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
+};
+template<typename T, int _Rows, int _Cols, int _Options> class DenseStorage<T, 0, _Rows, _Cols, _Options>
+{
+ public:
+ EIGEN_DEVICE_FUNC DenseStorage() {}
+ EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) {}
+ EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage&) {}
+ EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage&) { return *this; }
+ EIGEN_DEVICE_FUNC DenseStorage(Index,Index,Index) {}
+ EIGEN_DEVICE_FUNC void swap(DenseStorage& ) {}
+ EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;}
+ EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;}
+ EIGEN_DEVICE_FUNC void conservativeResize(Index,Index,Index) {}
+ EIGEN_DEVICE_FUNC void resize(Index,Index,Index) {}
+ EIGEN_DEVICE_FUNC const T *data() const { return 0; }
+ EIGEN_DEVICE_FUNC T *data() { return 0; }
+};
+template<typename T, int _Options> class DenseStorage<T, 0, Dynamic, Dynamic, _Options>
+: public DenseStorage<T, 0, 0, 0, _Options> { };
+template<typename T, int _Rows, int _Options> class DenseStorage<T, 0, _Rows, Dynamic, _Options>
+: public DenseStorage<T, 0, 0, 0, _Options> { };
+template<typename T, int _Cols, int _Options> class DenseStorage<T, 0, Dynamic, _Cols, _Options>
+: public DenseStorage<T, 0, 0, 0, _Options> { };
+template<typename T, int Size, int _Options> class DenseStorage<T, Size, Dynamic, Dynamic, _Options>
+{
+ internal::plain_array<T,Size,_Options> m_data;
+ Index m_rows;
+ Index m_cols;
+ public:
+ EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0), m_cols(0) {}
+ EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
+ : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0), m_cols(0) {}
+ EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows), m_cols(other.m_cols) {}
+ EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
+ {
+ if (this != &other)
+ {
+ m_data = other.m_data;
+ m_rows = other.m_rows;
+ m_cols = other.m_cols;
+ }
+ return *this;
+ }
+ EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index cols) : m_rows(rows), m_cols(cols) {}
+ EIGEN_DEVICE_FUNC void swap(DenseStorage& other)
+ { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); }
+ EIGEN_DEVICE_FUNC Index rows() const {return m_rows;}
+ EIGEN_DEVICE_FUNC Index cols() const {return m_cols;}
+ EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index cols) { m_rows = rows; m_cols = cols; }
+ EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index cols) { m_rows = rows; m_cols = cols; }
+ EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
+ EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
+};
+template<typename T, int Size, int _Cols, int _Options> class DenseStorage<T, Size, Dynamic, _Cols, _Options>
+{
+ internal::plain_array<T,Size,_Options> m_data;
+ Index m_rows;
+ public:
+ EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0) {}
+ EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
+ : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0) {}
+ EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows) {}
+ EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
+ {
+ if (this != &other)
+ {
+ m_data = other.m_data;
+ m_rows = other.m_rows;
+ }
+ return *this;
+ }
+ EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index) : m_rows(rows) {}
+ EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
+ EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;}
+ EIGEN_DEVICE_FUNC Index cols(void) const {return _Cols;}
+ EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index) { m_rows = rows; }
+ EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index) { m_rows = rows; }
+ EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
+ EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
+};
+template<typename T, int Size, int _Rows, int _Options> class DenseStorage<T, Size, _Rows, Dynamic, _Options>
+{
+ internal::plain_array<T,Size,_Options> m_data;
+ Index m_cols;
+ public:
+ EIGEN_DEVICE_FUNC DenseStorage() : m_cols(0) {}
+ EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
+ : m_data(internal::constructor_without_unaligned_array_assert()), m_cols(0) {}
+ EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_cols(other.m_cols) {}
+ EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
+ {
+ if (this != &other)
+ {
+ m_data = other.m_data;
+ m_cols = other.m_cols;
+ }
+ return *this;
+ }
+ EIGEN_DEVICE_FUNC DenseStorage(Index, Index, Index cols) : m_cols(cols) {}
+ EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
+ EIGEN_DEVICE_FUNC Index rows(void) const {return _Rows;}
+ EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;}
+ void conservativeResize(Index, Index, Index cols) { m_cols = cols; }
+ void resize(Index, Index, Index cols) { m_cols = cols; }
+ EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
+ EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
+};
+template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynamic, _Options>
+{
+ T *m_data;
+ Index m_rows;
+ Index m_cols;
+ public:
+ EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0), m_cols(0) {}
+ EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
+ : m_data(0), m_rows(0), m_cols(0) {}
+ EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols)
+ : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(rows), m_cols(cols)
+ {
+ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
+ eigen_internal_assert(size==rows*cols && rows>=0 && cols >=0);
+ }
+ EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
+ : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(other.m_rows*other.m_cols))
+ , m_rows(other.m_rows)
+ , m_cols(other.m_cols)
+ {
+ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_rows*m_cols)
+ internal::smart_copy(other.m_data, other.m_data+other.m_rows*other.m_cols, m_data);
+ }
+ EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
+ {
+ if (this != &other)
+ {
+ DenseStorage tmp(other);
+ this->swap(tmp);
+ }
+ return *this;
+ }
+#if EIGEN_HAS_RVALUE_REFERENCES
+ EIGEN_DEVICE_FUNC
+ DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT
+ : m_data(std::move(other.m_data))
+ , m_rows(std::move(other.m_rows))
+ , m_cols(std::move(other.m_cols))
+ {
+ other.m_data = nullptr;
+ other.m_rows = 0;
+ other.m_cols = 0;
+ }
+ EIGEN_DEVICE_FUNC
+ DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT
+ {
+ using std::swap;
+ swap(m_data, other.m_data);
+ swap(m_rows, other.m_rows);
+ swap(m_cols, other.m_cols);
+ return *this;
+ }
+#endif
+ EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, m_rows*m_cols); }
+ EIGEN_DEVICE_FUNC void swap(DenseStorage& other)
+ { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); }
+ EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;}
+ EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;}
+ void conservativeResize(Index size, Index rows, Index cols)
+ {
+ m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, m_rows*m_cols);
+ m_rows = rows;
+ m_cols = cols;
+ }
+ EIGEN_DEVICE_FUNC void resize(Index size, Index rows, Index cols)
+ {
+ if(size != m_rows*m_cols)
+ {
+ internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, m_rows*m_cols);
+ if (size)
+ m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);
+ else
+ m_data = 0;
+ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
+ }
+ m_rows = rows;
+ m_cols = cols;
+ }
+ EIGEN_DEVICE_FUNC const T *data() const { return m_data; }
+ EIGEN_DEVICE_FUNC T *data() { return m_data; }
+};
+template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Rows, Dynamic, _Options>
+{
+ T *m_data;
+ Index m_cols;
+ public:
+ EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_cols(0) {}
+ explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {}
+ EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_cols(cols)
+ {
+ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
+ eigen_internal_assert(size==rows*cols && rows==_Rows && cols >=0);
+ EIGEN_UNUSED_VARIABLE(rows);
+ }
+ EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
+ : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(_Rows*other.m_cols))
+ , m_cols(other.m_cols)
+ {
+ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_cols*_Rows)
+ internal::smart_copy(other.m_data, other.m_data+_Rows*m_cols, m_data);
+ }
+ EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
+ {
+ if (this != &other)
+ {
+ DenseStorage tmp(other);
+ this->swap(tmp);
+ }
+ return *this;
+ }
+#if EIGEN_HAS_RVALUE_REFERENCES
+ EIGEN_DEVICE_FUNC
+ DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT
+ : m_data(std::move(other.m_data))
+ , m_cols(std::move(other.m_cols))
+ {
+ other.m_data = nullptr;
+ other.m_cols = 0;
+ }
+ EIGEN_DEVICE_FUNC
+ DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT
+ {
+ using std::swap;
+ swap(m_data, other.m_data);
+ swap(m_cols, other.m_cols);
+ return *this;
+ }
+#endif
+ EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols); }
+ EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
+ EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;}
+ EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;}
+ EIGEN_DEVICE_FUNC void conservativeResize(Index size, Index, Index cols)
+ {
+ m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, _Rows*m_cols);
+ m_cols = cols;
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(Index size, Index, Index cols)
+ {
+ if(size != _Rows*m_cols)
+ {
+ internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols);
+ if (size)
+ m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);
+ else
+ m_data = 0;
+ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
+ }
+ m_cols = cols;
+ }
+ EIGEN_DEVICE_FUNC const T *data() const { return m_data; }
+ EIGEN_DEVICE_FUNC T *data() { return m_data; }
+};
+template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dynamic, _Cols, _Options>
+{
+ T *m_data;
+ Index m_rows;
+ public:
+ EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0) {}
+ explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {}
+ EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(rows)
+ {
+ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
+ eigen_internal_assert(size==rows*cols && rows>=0 && cols == _Cols);
+ EIGEN_UNUSED_VARIABLE(cols);
+ }
+ EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
+ : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(other.m_rows*_Cols))
+ , m_rows(other.m_rows)
+ {
+ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_rows*_Cols)
+ internal::smart_copy(other.m_data, other.m_data+other.m_rows*_Cols, m_data);
+ }
+ EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
+ {
+ if (this != &other)
+ {
+ DenseStorage tmp(other);
+ this->swap(tmp);
+ }
+ return *this;
+ }
+#if EIGEN_HAS_RVALUE_REFERENCES
+ EIGEN_DEVICE_FUNC
+ DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT
+ : m_data(std::move(other.m_data))
+ , m_rows(std::move(other.m_rows))
+ {
+ other.m_data = nullptr;
+ other.m_rows = 0;
+ }
+ EIGEN_DEVICE_FUNC
+ DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT
+ {
+ using std::swap;
+ swap(m_data, other.m_data);
+ swap(m_rows, other.m_rows);
+ return *this;
+ }
+#endif
+ EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows); }
+ EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
+ EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;}
+ EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;}
+ void conservativeResize(Index size, Index rows, Index)
+ {
+ m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, m_rows*_Cols);
+ m_rows = rows;
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(Index size, Index rows, Index)
+ {
+ if(size != m_rows*_Cols)
+ {
+ internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows);
+ if (size)
+ m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);
+ else
+ m_data = 0;
+ EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
+ }
+ m_rows = rows;
+ }
+ EIGEN_DEVICE_FUNC const T *data() const { return m_data; }
+ EIGEN_DEVICE_FUNC T *data() { return m_data; }
+};
+}
+#endif
+// end #include "src/Core/DenseStorage.h"
+// #include "src/Core/NoAlias.h"
+#ifndef EIGEN_NOALIAS_H
+#define EIGEN_NOALIAS_H
+namespace Eigen {
+template<typename ExpressionType, template <typename> class StorageBase>
+class NoAlias
+{
+ public:
+ typedef typename ExpressionType::Scalar Scalar;
+ explicit NoAlias(ExpressionType& expression) : m_expression(expression) {}
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE ExpressionType& operator=(const StorageBase<OtherDerived>& other)
+ {
+ call_assignment_no_alias(m_expression, other.derived(), internal::assign_op<Scalar,typename OtherDerived::Scalar>());
+ return m_expression;
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE ExpressionType& operator+=(const StorageBase<OtherDerived>& other)
+ {
+ call_assignment_no_alias(m_expression, other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
+ return m_expression;
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE ExpressionType& operator-=(const StorageBase<OtherDerived>& other)
+ {
+ call_assignment_no_alias(m_expression, other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());
+ return m_expression;
+ }
+ EIGEN_DEVICE_FUNC
+ ExpressionType& expression() const
+ {
+ return m_expression;
+ }
+ protected:
+ ExpressionType& m_expression;
+};
+template<typename Derived>
+NoAlias<Derived,MatrixBase> MatrixBase<Derived>::noalias()
+{
+ return NoAlias<Derived, Eigen::MatrixBase >(derived());
+}
+}
+#endif
+// end #include "src/Core/NoAlias.h"
+// #include "src/Core/PlainObjectBase.h"
+#ifndef EIGEN_DENSESTORAGEBASE_H
+#define EIGEN_DENSESTORAGEBASE_H
+#if defined(EIGEN_INITIALIZE_MATRICES_BY_ZERO)
+# define EIGEN_INITIALIZE_COEFFS
+# define EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED for(int i=0;i<base().size();++i) coeffRef(i)=Scalar(0);
+#elif defined(EIGEN_INITIALIZE_MATRICES_BY_NAN)
+# define EIGEN_INITIALIZE_COEFFS
+# define EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED for(int i=0;i<base().size();++i) coeffRef(i)=std::numeric_limits<Scalar>::quiet_NaN();
+#else
+# undef EIGEN_INITIALIZE_COEFFS
+# define EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
+#endif
+namespace Eigen {
+namespace internal {
+template<int MaxSizeAtCompileTime> struct check_rows_cols_for_overflow {
+ template<typename Index>
+ EIGEN_DEVICE_FUNC
+ static EIGEN_ALWAYS_INLINE void run(Index, Index)
+ {
+ }
+};
+template<> struct check_rows_cols_for_overflow<Dynamic> {
+ template<typename Index>
+ EIGEN_DEVICE_FUNC
+ static EIGEN_ALWAYS_INLINE void run(Index rows, Index cols)
+ {
+ Index max_index = (std::size_t(1) << (8 * sizeof(Index) - 1)) - 1;
+ bool error = (rows == 0 || cols == 0) ? false
+ : (rows > max_index / cols);
+ if (error)
+ throw_std_bad_alloc();
+ }
+};
+template <typename Derived,
+ typename OtherDerived = Derived,
+ bool IsVector = bool(Derived::IsVectorAtCompileTime) && bool(OtherDerived::IsVectorAtCompileTime)>
+struct conservative_resize_like_impl;
+template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers> struct matrix_swap_impl;
+}
+#ifdef EIGEN_PARSED_BY_DOXYGEN
+namespace doxygen {
+template<typename Derived> struct dense_xpr_base_dispatcher;
+template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
+struct dense_xpr_base_dispatcher<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
+ : public MatrixBase {};
+template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
+struct dense_xpr_base_dispatcher<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
+ : public ArrayBase {};
+}
+template<typename Derived>
+class PlainObjectBase : public doxygen::dense_xpr_base_dispatcher<Derived>
+#else
+template<typename Derived>
+class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
+#endif
+{
+ public:
+ enum { Options = internal::traits<Derived>::Options };
+ typedef typename internal::dense_xpr_base<Derived>::type Base;
+ typedef typename internal::traits<Derived>::StorageKind StorageKind;
+ typedef typename internal::traits<Derived>::Scalar Scalar;
+ typedef typename internal::packet_traits<Scalar>::type PacketScalar;
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ typedef Derived DenseType;
+ using Base::RowsAtCompileTime;
+ using Base::ColsAtCompileTime;
+ using Base::SizeAtCompileTime;
+ using Base::MaxRowsAtCompileTime;
+ using Base::MaxColsAtCompileTime;
+ using Base::MaxSizeAtCompileTime;
+ using Base::IsVectorAtCompileTime;
+ using Base::Flags;
+ template<typename PlainObjectType, int MapOptions, typename StrideType> friend class Eigen::Map;
+ friend class Eigen::Map<Derived, Unaligned>;
+ typedef Eigen::Map<Derived, Unaligned> MapType;
+ friend class Eigen::Map<const Derived, Unaligned>;
+ typedef const Eigen::Map<const Derived, Unaligned> ConstMapType;
+#if EIGEN_MAX_ALIGN_BYTES>0
+ friend class Eigen::Map<Derived, AlignedMax>;
+ friend class Eigen::Map<const Derived, AlignedMax>;
+#endif
+ typedef Eigen::Map<Derived, AlignedMax> AlignedMapType;
+ typedef const Eigen::Map<const Derived, AlignedMax> ConstAlignedMapType;
+ template<typename StrideType> struct StridedMapType { typedef Eigen::Map<Derived, Unaligned, StrideType> type; };
+ template<typename StrideType> struct StridedConstMapType { typedef Eigen::Map<const Derived, Unaligned, StrideType> type; };
+ template<typename StrideType> struct StridedAlignedMapType { typedef Eigen::Map<Derived, AlignedMax, StrideType> type; };
+ template<typename StrideType> struct StridedConstAlignedMapType { typedef Eigen::Map<const Derived, AlignedMax, StrideType> type; };
+ protected:
+ DenseStorage<Scalar, Base::MaxSizeAtCompileTime, Base::RowsAtCompileTime, Base::ColsAtCompileTime, Options> m_storage;
+ public:
+ enum { NeedsToAlign = (SizeAtCompileTime != Dynamic) && (internal::traits<Derived>::Alignment>0) };
+ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
+ EIGEN_DEVICE_FUNC
+ Base& base() { return *static_cast<Base*>(this); }
+ EIGEN_DEVICE_FUNC
+ const Base& base() const { return *static_cast<const Base*>(this); }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Index rows() const { return m_storage.rows(); }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Index cols() const { return m_storage.cols(); }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE const Scalar& coeff(Index rowId, Index colId) const
+ {
+ if(Flags & RowMajorBit)
+ return m_storage.data()[colId + rowId * m_storage.cols()];
+ else
+ return m_storage.data()[rowId + colId * m_storage.rows()];
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE const Scalar& coeff(Index index) const
+ {
+ return m_storage.data()[index];
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Scalar& coeffRef(Index rowId, Index colId)
+ {
+ if(Flags & RowMajorBit)
+ return m_storage.data()[colId + rowId * m_storage.cols()];
+ else
+ return m_storage.data()[rowId + colId * m_storage.rows()];
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Scalar& coeffRef(Index index)
+ {
+ return m_storage.data()[index];
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE const Scalar& coeffRef(Index rowId, Index colId) const
+ {
+ if(Flags & RowMajorBit)
+ return m_storage.data()[colId + rowId * m_storage.cols()];
+ else
+ return m_storage.data()[rowId + colId * m_storage.rows()];
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE const Scalar& coeffRef(Index index) const
+ {
+ return m_storage.data()[index];
+ }
+ template<int LoadMode>
+ EIGEN_STRONG_INLINE PacketScalar packet(Index rowId, Index colId) const
+ {
+ return internal::ploadt<PacketScalar, LoadMode>
+ (m_storage.data() + (Flags & RowMajorBit
+ ? colId + rowId * m_storage.cols()
+ : rowId + colId * m_storage.rows()));
+ }
+ template<int LoadMode>
+ EIGEN_STRONG_INLINE PacketScalar packet(Index index) const
+ {
+ return internal::ploadt<PacketScalar, LoadMode>(m_storage.data() + index);
+ }
+ template<int StoreMode>
+ EIGEN_STRONG_INLINE void writePacket(Index rowId, Index colId, const PacketScalar& val)
+ {
+ internal::pstoret<Scalar, PacketScalar, StoreMode>
+ (m_storage.data() + (Flags & RowMajorBit
+ ? colId + rowId * m_storage.cols()
+ : rowId + colId * m_storage.rows()), val);
+ }
+ template<int StoreMode>
+ EIGEN_STRONG_INLINE void writePacket(Index index, const PacketScalar& val)
+ {
+ internal::pstoret<Scalar, PacketScalar, StoreMode>(m_storage.data() + index, val);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar *data() const
+ { return m_storage.data(); }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar *data()
+ { return m_storage.data(); }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void resize(Index rows, Index cols)
+ {
+ eigen_assert( EIGEN_IMPLIES(RowsAtCompileTime!=Dynamic,rows==RowsAtCompileTime)
+ && EIGEN_IMPLIES(ColsAtCompileTime!=Dynamic,cols==ColsAtCompileTime)
+ && EIGEN_IMPLIES(RowsAtCompileTime==Dynamic && MaxRowsAtCompileTime!=Dynamic,rows<=MaxRowsAtCompileTime)
+ && EIGEN_IMPLIES(ColsAtCompileTime==Dynamic && MaxColsAtCompileTime!=Dynamic,cols<=MaxColsAtCompileTime)
+ && rows>=0 && cols>=0 && "Invalid sizes when resizing a matrix or array.");
+ internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(rows, cols);
+ #ifdef EIGEN_INITIALIZE_COEFFS
+ Index size = rows*cols;
+ bool size_changed = size != this->size();
+ m_storage.resize(size, rows, cols);
+ if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
+ #else
+ m_storage.resize(rows*cols, rows, cols);
+ #endif
+ }
+ EIGEN_DEVICE_FUNC
+ inline void resize(Index size)
+ {
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(PlainObjectBase)
+ eigen_assert(((SizeAtCompileTime == Dynamic && (MaxSizeAtCompileTime==Dynamic || size<=MaxSizeAtCompileTime)) || SizeAtCompileTime == size) && size>=0);
+ #ifdef EIGEN_INITIALIZE_COEFFS
+ bool size_changed = size != this->size();
+ #endif
+ if(RowsAtCompileTime == 1)
+ m_storage.resize(size, 1, size);
+ else
+ m_storage.resize(size, size, 1);
+ #ifdef EIGEN_INITIALIZE_COEFFS
+ if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
+ #endif
+ }
+ EIGEN_DEVICE_FUNC
+ inline void resize(NoChange_t, Index cols)
+ {
+ resize(rows(), cols);
+ }
+ EIGEN_DEVICE_FUNC
+ inline void resize(Index rows, NoChange_t)
+ {
+ resize(rows, cols());
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void resizeLike(const EigenBase<OtherDerived>& _other)
+ {
+ const OtherDerived& other = _other.derived();
+ internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(other.rows(), other.cols());
+ const Index othersize = other.rows()*other.cols();
+ if(RowsAtCompileTime == 1)
+ {
+ eigen_assert(other.rows() == 1 || other.cols() == 1);
+ resize(1, othersize);
+ }
+ else if(ColsAtCompileTime == 1)
+ {
+ eigen_assert(other.rows() == 1 || other.cols() == 1);
+ resize(othersize, 1);
+ }
+ else resize(other.rows(), other.cols());
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void conservativeResize(Index rows, Index cols)
+ {
+ internal::conservative_resize_like_impl<Derived>::run(*this, rows, cols);
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void conservativeResize(Index rows, NoChange_t)
+ {
+ conservativeResize(rows, cols());
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void conservativeResize(NoChange_t, Index cols)
+ {
+ conservativeResize(rows(), cols);
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void conservativeResize(Index size)
+ {
+ internal::conservative_resize_like_impl<Derived>::run(*this, size);
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void conservativeResizeLike(const DenseBase<OtherDerived>& other)
+ {
+ internal::conservative_resize_like_impl<Derived,OtherDerived>::run(*this, other);
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Derived& operator=(const PlainObjectBase& other)
+ {
+ return _set(other);
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Derived& lazyAssign(const DenseBase<OtherDerived>& other)
+ {
+ _resize_to_match(other);
+ return Base::lazyAssign(other.derived());
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Derived& operator=(const ReturnByValue<OtherDerived>& func)
+ {
+ resize(func.rows(), func.cols());
+ return Base::operator=(func);
+ }
+ protected:
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE PlainObjectBase() : m_storage()
+ {
+ }
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+ EIGEN_DEVICE_FUNC
+ explicit PlainObjectBase(internal::constructor_without_unaligned_array_assert)
+ : m_storage(internal::constructor_without_unaligned_array_assert())
+ {
+ }
+#endif
+#if EIGEN_HAS_RVALUE_REFERENCES
+ EIGEN_DEVICE_FUNC
+ PlainObjectBase(PlainObjectBase&& other) EIGEN_NOEXCEPT
+ : m_storage( std::move(other.m_storage) )
+ {
+ }
+ EIGEN_DEVICE_FUNC
+ PlainObjectBase& operator=(PlainObjectBase&& other) EIGEN_NOEXCEPT
+ {
+ using std::swap;
+ swap(m_storage, other.m_storage);
+ return *this;
+ }
+#endif
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE PlainObjectBase(const PlainObjectBase& other)
+ : Base(), m_storage(other.m_storage) { }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE PlainObjectBase(Index size, Index rows, Index cols)
+ : m_storage(size, rows, cols)
+ {
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE PlainObjectBase(const DenseBase<OtherDerived> &other)
+ : m_storage()
+ {
+ _check_template_params();
+ resizeLike(other);
+ _set_noalias(other);
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE PlainObjectBase(const EigenBase<OtherDerived> &other)
+ : m_storage()
+ {
+ _check_template_params();
+ resizeLike(other);
+ *this = other.derived();
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE PlainObjectBase(const ReturnByValue<OtherDerived>& other)
+ {
+ _check_template_params();
+ resize(other.rows(), other.cols());
+ other.evalTo(this->derived());
+ }
+ public:
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Derived& operator=(const EigenBase<OtherDerived> &other)
+ {
+ _resize_to_match(other);
+ Base::operator=(other.derived());
+ return this->derived();
+ }
+ static inline ConstMapType Map(const Scalar* data)
+ { return ConstMapType(data); }
+ static inline MapType Map(Scalar* data)
+ { return MapType(data); }
+ static inline ConstMapType Map(const Scalar* data, Index size)
+ { return ConstMapType(data, size); }
+ static inline MapType Map(Scalar* data, Index size)
+ { return MapType(data, size); }
+ static inline ConstMapType Map(const Scalar* data, Index rows, Index cols)
+ { return ConstMapType(data, rows, cols); }
+ static inline MapType Map(Scalar* data, Index rows, Index cols)
+ { return MapType(data, rows, cols); }
+ static inline ConstAlignedMapType MapAligned(const Scalar* data)
+ { return ConstAlignedMapType(data); }
+ static inline AlignedMapType MapAligned(Scalar* data)
+ { return AlignedMapType(data); }
+ static inline ConstAlignedMapType MapAligned(const Scalar* data, Index size)
+ { return ConstAlignedMapType(data, size); }
+ static inline AlignedMapType MapAligned(Scalar* data, Index size)
+ { return AlignedMapType(data, size); }
+ static inline ConstAlignedMapType MapAligned(const Scalar* data, Index rows, Index cols)
+ { return ConstAlignedMapType(data, rows, cols); }
+ static inline AlignedMapType MapAligned(Scalar* data, Index rows, Index cols)
+ { return AlignedMapType(data, rows, cols); }
+ template<int Outer, int Inner>
+ static inline typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, const Stride<Outer, Inner>& stride)
+ { return typename StridedConstMapType<Stride<Outer, Inner> >::type(data, stride); }
+ template<int Outer, int Inner>
+ static inline typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, const Stride<Outer, Inner>& stride)
+ { return typename StridedMapType<Stride<Outer, Inner> >::type(data, stride); }
+ template<int Outer, int Inner>
+ static inline typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, Index size, const Stride<Outer, Inner>& stride)
+ { return typename StridedConstMapType<Stride<Outer, Inner> >::type(data, size, stride); }
+ template<int Outer, int Inner>
+ static inline typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, Index size, const Stride<Outer, Inner>& stride)
+ { return typename StridedMapType<Stride<Outer, Inner> >::type(data, size, stride); }
+ template<int Outer, int Inner>
+ static inline typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
+ { return typename StridedConstMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }
+ template<int Outer, int Inner>
+ static inline typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
+ { return typename StridedMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }
+ template<int Outer, int Inner>
+ static inline typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, const Stride<Outer, Inner>& stride)
+ { return typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type(data, stride); }
+ template<int Outer, int Inner>
+ static inline typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, const Stride<Outer, Inner>& stride)
+ { return typename StridedAlignedMapType<Stride<Outer, Inner> >::type(data, stride); }
+ template<int Outer, int Inner>
+ static inline typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, Index size, const Stride<Outer, Inner>& stride)
+ { return typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type(data, size, stride); }
+ template<int Outer, int Inner>
+ static inline typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, Index size, const Stride<Outer, Inner>& stride)
+ { return typename StridedAlignedMapType<Stride<Outer, Inner> >::type(data, size, stride); }
+ template<int Outer, int Inner>
+ static inline typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
+ { return typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }
+ template<int Outer, int Inner>
+ static inline typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
+ { return typename StridedAlignedMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }
+ using Base::setConstant;
+ EIGEN_DEVICE_FUNC Derived& setConstant(Index size, const Scalar& val);
+ EIGEN_DEVICE_FUNC Derived& setConstant(Index rows, Index cols, const Scalar& val);
+ using Base::setZero;
+ EIGEN_DEVICE_FUNC Derived& setZero(Index size);
+ EIGEN_DEVICE_FUNC Derived& setZero(Index rows, Index cols);
+ using Base::setOnes;
+ EIGEN_DEVICE_FUNC Derived& setOnes(Index size);
+ EIGEN_DEVICE_FUNC Derived& setOnes(Index rows, Index cols);
+ using Base::setRandom;
+ Derived& setRandom(Index size);
+ Derived& setRandom(Index rows, Index cols);
+ #ifdef EIGEN_PLAINOBJECTBASE_PLUGIN
+ #include EIGEN_PLAINOBJECTBASE_PLUGIN
+ #endif
+ protected:
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void _resize_to_match(const EigenBase<OtherDerived>& other)
+ {
+ #ifdef EIGEN_NO_AUTOMATIC_RESIZING
+ eigen_assert((this->size()==0 || (IsVectorAtCompileTime ? (this->size() == other.size())
+ : (rows() == other.rows() && cols() == other.cols())))
+ && "Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined");
+ EIGEN_ONLY_USED_FOR_DEBUG(other);
+ #else
+ resizeLike(other);
+ #endif
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Derived& _set(const DenseBase<OtherDerived>& other)
+ {
+ internal::call_assignment(this->derived(), other.derived());
+ return this->derived();
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Derived& _set_noalias(const DenseBase<OtherDerived>& other)
+ {
+ internal::call_assignment_no_alias(this->derived(), other.derived(), internal::assign_op<Scalar,typename OtherDerived::Scalar>());
+ return this->derived();
+ }
+ template<typename T0, typename T1>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void _init2(Index rows, Index cols, typename internal::enable_if<Base::SizeAtCompileTime!=2,T0>::type* = 0)
+ {
+ EIGEN_STATIC_ASSERT(bool(NumTraits<T0>::IsInteger) &&
+ bool(NumTraits<T1>::IsInteger),
+ FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED)
+ resize(rows,cols);
+ }
+ template<typename T0, typename T1>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void _init2(const T0& val0, const T1& val1, typename internal::enable_if<Base::SizeAtCompileTime==2,T0>::type* = 0)
+ {
+ EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 2)
+ m_storage.data()[0] = Scalar(val0);
+ m_storage.data()[1] = Scalar(val1);
+ }
+ template<typename T0, typename T1>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void _init2(const Index& val0, const Index& val1,
+ typename internal::enable_if< (!internal::is_same<Index,Scalar>::value)
+ && (internal::is_same<T0,Index>::value)
+ && (internal::is_same<T1,Index>::value)
+ && Base::SizeAtCompileTime==2,T1>::type* = 0)
+ {
+ EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 2)
+ m_storage.data()[0] = Scalar(val0);
+ m_storage.data()[1] = Scalar(val1);
+ }
+ template<typename T>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void _init1(Index size, typename internal::enable_if< (Base::SizeAtCompileTime!=1 || !internal::is_convertible<T, Scalar>::value)
+ && ((!internal::is_same<typename internal::traits<Derived>::XprKind,ArrayXpr>::value || Base::SizeAtCompileTime==Dynamic)),T>::type* = 0)
+ {
+ const bool is_integer = NumTraits<T>::IsInteger;
+ EIGEN_UNUSED_VARIABLE(is_integer);
+ EIGEN_STATIC_ASSERT(is_integer,
+ FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED)
+ resize(size);
+ }
+ template<typename T>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void _init1(const Scalar& val0, typename internal::enable_if<Base::SizeAtCompileTime==1 && internal::is_convertible<T, Scalar>::value,T>::type* = 0)
+ {
+ EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 1)
+ m_storage.data()[0] = val0;
+ }
+ template<typename T>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void _init1(const Index& val0,
+ typename internal::enable_if< (!internal::is_same<Index,Scalar>::value)
+ && (internal::is_same<Index,T>::value)
+ && Base::SizeAtCompileTime==1
+ && internal::is_convertible<T, Scalar>::value,T*>::type* = 0)
+ {
+ EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 1)
+ m_storage.data()[0] = Scalar(val0);
+ }
+ template<typename T>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void _init1(const Scalar* data){
+ this->_set_noalias(ConstMapType(data));
+ }
+ template<typename T, typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void _init1(const DenseBase<OtherDerived>& other){
+ this->_set_noalias(other);
+ }
+ template<typename T>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void _init1(const Derived& other){
+ this->_set_noalias(other);
+ }
+ template<typename T, typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void _init1(const EigenBase<OtherDerived>& other){
+ this->derived() = other;
+ }
+ template<typename T, typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void _init1(const ReturnByValue<OtherDerived>& other)
+ {
+ resize(other.rows(), other.cols());
+ other.evalTo(this->derived());
+ }
+ template<typename T, typename OtherDerived, int ColsAtCompileTime>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void _init1(const RotationBase<OtherDerived,ColsAtCompileTime>& r)
+ {
+ this->derived() = r;
+ }
+ template<typename T>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void _init1(const Scalar& val0,
+ typename internal::enable_if< Base::SizeAtCompileTime!=Dynamic
+ && Base::SizeAtCompileTime!=1
+ && internal::is_convertible<T, Scalar>::value
+ && internal::is_same<typename internal::traits<Derived>::XprKind,ArrayXpr>::value,T>::type* = 0)
+ {
+ Base::setConstant(val0);
+ }
+ template<typename T>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE void _init1(const Index& val0,
+ typename internal::enable_if< (!internal::is_same<Index,Scalar>::value)
+ && (internal::is_same<Index,T>::value)
+ && Base::SizeAtCompileTime!=Dynamic
+ && Base::SizeAtCompileTime!=1
+ && internal::is_convertible<T, Scalar>::value
+ && internal::is_same<typename internal::traits<Derived>::XprKind,ArrayXpr>::value,T*>::type* = 0)
+ {
+ Base::setConstant(val0);
+ }
+ template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers>
+ friend struct internal::matrix_swap_impl;
+ public:
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ void swap(DenseBase<OtherDerived> & other)
+ {
+ enum { SwapPointers = internal::is_same<Derived, OtherDerived>::value && Base::SizeAtCompileTime==Dynamic };
+ internal::matrix_swap_impl<Derived, OtherDerived, bool(SwapPointers)>::run(this->derived(), other.derived());
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ void swap(DenseBase<OtherDerived> const & other)
+ { Base::swap(other.derived()); }
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE void _check_template_params()
+ {
+ EIGEN_STATIC_ASSERT((EIGEN_IMPLIES(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, (Options&RowMajor)==RowMajor)
+ && EIGEN_IMPLIES(MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1, (Options&RowMajor)==0)
+ && ((RowsAtCompileTime == Dynamic) || (RowsAtCompileTime >= 0))
+ && ((ColsAtCompileTime == Dynamic) || (ColsAtCompileTime >= 0))
+ && ((MaxRowsAtCompileTime == Dynamic) || (MaxRowsAtCompileTime >= 0))
+ && ((MaxColsAtCompileTime == Dynamic) || (MaxColsAtCompileTime >= 0))
+ && (MaxRowsAtCompileTime == RowsAtCompileTime || RowsAtCompileTime==Dynamic)
+ && (MaxColsAtCompileTime == ColsAtCompileTime || ColsAtCompileTime==Dynamic)
+ && (Options & (DontAlign|RowMajor)) == Options),
+ INVALID_MATRIX_TEMPLATE_PARAMETERS)
+ }
+ enum { IsPlainObjectBase = 1 };
+#endif
+};
+namespace internal {
+template <typename Derived, typename OtherDerived, bool IsVector>
+struct conservative_resize_like_impl
+{
+ static void run(DenseBase<Derived>& _this, Index rows, Index cols)
+ {
+ if (_this.rows() == rows && _this.cols() == cols) return;
+ EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(Derived)
+ if ( ( Derived::IsRowMajor && _this.cols() == cols) ||
+ (!Derived::IsRowMajor && _this.rows() == rows) )
+ {
+ internal::check_rows_cols_for_overflow<Derived::MaxSizeAtCompileTime>::run(rows, cols);
+ _this.derived().m_storage.conservativeResize(rows*cols,rows,cols);
+ }
+ else
+ {
+ typename Derived::PlainObject tmp(rows,cols);
+ const Index common_rows = numext::mini(rows, _this.rows());
+ const Index common_cols = numext::mini(cols, _this.cols());
+ tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
+ _this.derived().swap(tmp);
+ }
+ }
+ static void run(DenseBase<Derived>& _this, const DenseBase<OtherDerived>& other)
+ {
+ if (_this.rows() == other.rows() && _this.cols() == other.cols()) return;
+ EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(Derived)
+ EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(OtherDerived)
+ if ( ( Derived::IsRowMajor && _this.cols() == other.cols()) ||
+ (!Derived::IsRowMajor && _this.rows() == other.rows()) )
+ {
+ const Index new_rows = other.rows() - _this.rows();
+ const Index new_cols = other.cols() - _this.cols();
+ _this.derived().m_storage.conservativeResize(other.size(),other.rows(),other.cols());
+ if (new_rows>0)
+ _this.bottomRightCorner(new_rows, other.cols()) = other.bottomRows(new_rows);
+ else if (new_cols>0)
+ _this.bottomRightCorner(other.rows(), new_cols) = other.rightCols(new_cols);
+ }
+ else
+ {
+ typename Derived::PlainObject tmp(other);
+ const Index common_rows = numext::mini(tmp.rows(), _this.rows());
+ const Index common_cols = numext::mini(tmp.cols(), _this.cols());
+ tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
+ _this.derived().swap(tmp);
+ }
+ }
+};
+template <typename Derived, typename OtherDerived>
+struct conservative_resize_like_impl<Derived,OtherDerived,true>
+ : conservative_resize_like_impl<Derived,OtherDerived,false>
+{
+ using conservative_resize_like_impl<Derived,OtherDerived,false>::run;
+ static void run(DenseBase<Derived>& _this, Index size)
+ {
+ const Index new_rows = Derived::RowsAtCompileTime==1 ? 1 : size;
+ const Index new_cols = Derived::RowsAtCompileTime==1 ? size : 1;
+ _this.derived().m_storage.conservativeResize(size,new_rows,new_cols);
+ }
+ static void run(DenseBase<Derived>& _this, const DenseBase<OtherDerived>& other)
+ {
+ if (_this.rows() == other.rows() && _this.cols() == other.cols()) return;
+ const Index num_new_elements = other.size() - _this.size();
+ const Index new_rows = Derived::RowsAtCompileTime==1 ? 1 : other.rows();
+ const Index new_cols = Derived::RowsAtCompileTime==1 ? other.cols() : 1;
+ _this.derived().m_storage.conservativeResize(other.size(),new_rows,new_cols);
+ if (num_new_elements > 0)
+ _this.tail(num_new_elements) = other.tail(num_new_elements);
+ }
+};
+template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers>
+struct matrix_swap_impl
+{
+ EIGEN_DEVICE_FUNC
+ static inline void run(MatrixTypeA& a, MatrixTypeB& b)
+ {
+ a.base().swap(b);
+ }
+};
+template<typename MatrixTypeA, typename MatrixTypeB>
+struct matrix_swap_impl<MatrixTypeA, MatrixTypeB, true>
+{
+ EIGEN_DEVICE_FUNC
+ static inline void run(MatrixTypeA& a, MatrixTypeB& b)
+ {
+ static_cast<typename MatrixTypeA::Base&>(a).m_storage.swap(static_cast<typename MatrixTypeB::Base&>(b).m_storage);
+ }
+};
+}
+}
+#endif
+// end #include "src/Core/PlainObjectBase.h"
+// #include "src/Core/Matrix.h"
+#ifndef EIGEN_MATRIX_H
+#define EIGEN_MATRIX_H
+namespace Eigen {
+namespace internal {
+template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
+struct traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
+{
+private:
+ enum { size = internal::size_at_compile_time<_Rows,_Cols>::ret };
+ typedef typename find_best_packet<_Scalar,size>::type PacketScalar;
+ enum {
+ row_major_bit = _Options&RowMajor ? RowMajorBit : 0,
+ is_dynamic_size_storage = _MaxRows==Dynamic || _MaxCols==Dynamic,
+ max_size = is_dynamic_size_storage ? Dynamic : _MaxRows*_MaxCols,
+ default_alignment = compute_default_alignment<_Scalar,max_size>::value,
+ actual_alignment = ((_Options&DontAlign)==0) ? default_alignment : 0,
+ required_alignment = unpacket_traits<PacketScalar>::alignment,
+ packet_access_bit = (packet_traits<_Scalar>::Vectorizable && (EIGEN_UNALIGNED_VECTORIZE || (actual_alignment>=required_alignment))) ? PacketAccessBit : 0
+ };
+public:
+ typedef _Scalar Scalar;
+ typedef Dense StorageKind;
+ typedef Eigen::Index StorageIndex;
+ typedef MatrixXpr XprKind;
+ enum {
+ RowsAtCompileTime = _Rows,
+ ColsAtCompileTime = _Cols,
+ MaxRowsAtCompileTime = _MaxRows,
+ MaxColsAtCompileTime = _MaxCols,
+ Flags = compute_matrix_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret,
+ Options = _Options,
+ InnerStrideAtCompileTime = 1,
+ OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime,
+ EvaluatorFlags = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit,
+ Alignment = actual_alignment
+ };
+};
+}
+template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
+class Matrix
+ : public PlainObjectBase<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
+{
+ public:
+ typedef PlainObjectBase<Matrix> Base;
+ enum { Options = _Options };
+ EIGEN_DENSE_PUBLIC_INTERFACE(Matrix)
+ typedef typename Base::PlainObject PlainObject;
+ using Base::base;
+ using Base::coeffRef;
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Matrix& operator=(const Matrix& other)
+ {
+ return Base::_set(other);
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Matrix& operator=(const DenseBase<OtherDerived>& other)
+ {
+ return Base::_set(other);
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Matrix& operator=(const EigenBase<OtherDerived> &other)
+ {
+ return Base::operator=(other);
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Matrix& operator=(const ReturnByValue<OtherDerived>& func)
+ {
+ return Base::operator=(func);
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Matrix() : Base()
+ {
+ Base::_check_template_params();
+ EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
+ }
+ EIGEN_DEVICE_FUNC
+ explicit Matrix(internal::constructor_without_unaligned_array_assert)
+ : Base(internal::constructor_without_unaligned_array_assert())
+ { Base::_check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED }
+#if EIGEN_HAS_RVALUE_REFERENCES
+ EIGEN_DEVICE_FUNC
+ Matrix(Matrix&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_constructible<Scalar>::value)
+ : Base(std::move(other))
+ {
+ Base::_check_template_params();
+ if (RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic)
+ Base::_set_noalias(other);
+ }
+ EIGEN_DEVICE_FUNC
+ Matrix& operator=(Matrix&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable<Scalar>::value)
+ {
+ other.swap(*this);
+ return *this;
+ }
+#endif
+ #ifndef EIGEN_PARSED_BY_DOXYGEN
+ template<typename T>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE explicit Matrix(const T& x)
+ {
+ Base::_check_template_params();
+ Base::template _init1<T>(x);
+ }
+ template<typename T0, typename T1>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Matrix(const T0& x, const T1& y)
+ {
+ Base::_check_template_params();
+ Base::template _init2<T0,T1>(x, y);
+ }
+ #else
+ EIGEN_DEVICE_FUNC
+ explicit Matrix(const Scalar *data);
+ EIGEN_STRONG_INLINE explicit Matrix(Index dim);
+ Matrix(const Scalar& x);
+ EIGEN_DEVICE_FUNC
+ Matrix(Index rows, Index cols);
+ Matrix(const Scalar& x, const Scalar& y);
+ #endif
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z)
+ {
+ Base::_check_template_params();
+ EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Matrix, 3)
+ m_storage.data()[0] = x;
+ m_storage.data()[1] = y;
+ m_storage.data()[2] = z;
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z, const Scalar& w)
+ {
+ Base::_check_template_params();
+ EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Matrix, 4)
+ m_storage.data()[0] = x;
+ m_storage.data()[1] = y;
+ m_storage.data()[2] = z;
+ m_storage.data()[3] = w;
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Matrix(const Matrix& other) : Base(other)
+ { }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Matrix(const EigenBase<OtherDerived> &other)
+ : Base(other.derived())
+ { }
+ EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; }
+ EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ explicit Matrix(const RotationBase<OtherDerived,ColsAtCompileTime>& r);
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ Matrix& operator=(const RotationBase<OtherDerived,ColsAtCompileTime>& r);
+ #ifdef EIGEN_MATRIX_PLUGIN
+ #include EIGEN_MATRIX_PLUGIN
+ #endif
+ protected:
+ template <typename Derived, typename OtherDerived, bool IsVector>
+ friend struct internal::conservative_resize_like_impl;
+ using Base::m_storage;
+};
+#define EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, Size, SizeSuffix) \
+ \
+typedef Matrix<Type, Size, Size> Matrix##SizeSuffix##TypeSuffix; \
+ \
+typedef Matrix<Type, Size, 1> Vector##SizeSuffix##TypeSuffix; \
+ \
+typedef Matrix<Type, 1, Size> RowVector##SizeSuffix##TypeSuffix;
+#define EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, Size) \
+ \
+typedef Matrix<Type, Size, Dynamic> Matrix##Size##X##TypeSuffix; \
+ \
+typedef Matrix<Type, Dynamic, Size> Matrix##X##Size##TypeSuffix;
+#define EIGEN_MAKE_TYPEDEFS_ALL_SIZES(Type, TypeSuffix) \
+EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 2, 2) \
+EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 3, 3) \
+EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 4, 4) \
+EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, Dynamic, X) \
+EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, 2) \
+EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, 3) \
+EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, 4)
+EIGEN_MAKE_TYPEDEFS_ALL_SIZES(int, i)
+EIGEN_MAKE_TYPEDEFS_ALL_SIZES(float, f)
+EIGEN_MAKE_TYPEDEFS_ALL_SIZES(double, d)
+EIGEN_MAKE_TYPEDEFS_ALL_SIZES(std::complex<float>, cf)
+EIGEN_MAKE_TYPEDEFS_ALL_SIZES(std::complex<double>, cd)
+#undef EIGEN_MAKE_TYPEDEFS_ALL_SIZES
+#undef EIGEN_MAKE_TYPEDEFS
+#undef EIGEN_MAKE_FIXED_TYPEDEFS
+}
+#endif
+// end #include "src/Core/Matrix.h"
+// #include "src/Core/Array.h"
+#ifndef EIGEN_ARRAY_H
+#define EIGEN_ARRAY_H
+namespace Eigen {
+namespace internal {
+template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
+struct traits<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > : traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
+{
+ typedef ArrayXpr XprKind;
+ typedef ArrayBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > XprBase;
+};
+}
+template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
+class Array
+ : public PlainObjectBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
+{
+ public:
+ typedef PlainObjectBase<Array> Base;
+ EIGEN_DENSE_PUBLIC_INTERFACE(Array)
+ enum { Options = _Options };
+ typedef typename Base::PlainObject PlainObject;
+ protected:
+ template <typename Derived, typename OtherDerived, bool IsVector>
+ friend struct internal::conservative_resize_like_impl;
+ using Base::m_storage;
+ public:
+ using Base::base;
+ using Base::coeff;
+ using Base::coeffRef;
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Array& operator=(const EigenBase<OtherDerived> &other)
+ {
+ return Base::operator=(other);
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Array& operator=(const Scalar &value)
+ {
+ Base::setConstant(value);
+ return *this;
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Array& operator=(const DenseBase<OtherDerived>& other)
+ {
+ return Base::_set(other);
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Array& operator=(const Array& other)
+ {
+ return Base::_set(other);
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Array() : Base()
+ {
+ Base::_check_template_params();
+ EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
+ }
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+ EIGEN_DEVICE_FUNC
+ Array(internal::constructor_without_unaligned_array_assert)
+ : Base(internal::constructor_without_unaligned_array_assert())
+ {
+ Base::_check_template_params();
+ EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
+ }
+#endif
+#if EIGEN_HAS_RVALUE_REFERENCES
+ EIGEN_DEVICE_FUNC
+ Array(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_constructible<Scalar>::value)
+ : Base(std::move(other))
+ {
+ Base::_check_template_params();
+ if (RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic)
+ Base::_set_noalias(other);
+ }
+ EIGEN_DEVICE_FUNC
+ Array& operator=(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable<Scalar>::value)
+ {
+ other.swap(*this);
+ return *this;
+ }
+#endif
+ #ifndef EIGEN_PARSED_BY_DOXYGEN
+ template<typename T>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE explicit Array(const T& x)
+ {
+ Base::_check_template_params();
+ Base::template _init1<T>(x);
+ }
+ template<typename T0, typename T1>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Array(const T0& val0, const T1& val1)
+ {
+ Base::_check_template_params();
+ this->template _init2<T0,T1>(val0, val1);
+ }
+ #else
+ EIGEN_DEVICE_FUNC explicit Array(const Scalar *data);
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE explicit Array(Index dim);
+ Array(const Scalar& value);
+ Array(Index rows, Index cols);
+ Array(const Scalar& val0, const Scalar& val1);
+ #endif
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Array(const Scalar& val0, const Scalar& val1, const Scalar& val2)
+ {
+ Base::_check_template_params();
+ EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Array, 3)
+ m_storage.data()[0] = val0;
+ m_storage.data()[1] = val1;
+ m_storage.data()[2] = val2;
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Array(const Scalar& val0, const Scalar& val1, const Scalar& val2, const Scalar& val3)
+ {
+ Base::_check_template_params();
+ EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Array, 4)
+ m_storage.data()[0] = val0;
+ m_storage.data()[1] = val1;
+ m_storage.data()[2] = val2;
+ m_storage.data()[3] = val3;
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Array(const Array& other)
+ : Base(other)
+ { }
+ private:
+ struct PrivateType {};
+ public:
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Array(const EigenBase<OtherDerived> &other,
+ typename internal::enable_if<internal::is_convertible<typename OtherDerived::Scalar,Scalar>::value,
+ PrivateType>::type = PrivateType())
+ : Base(other.derived())
+ { }
+ EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; }
+ EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); }
+ #ifdef EIGEN_ARRAY_PLUGIN
+ #include EIGEN_ARRAY_PLUGIN
+ #endif
+ private:
+ template<typename MatrixType, typename OtherDerived, bool SwapPointers>
+ friend struct internal::matrix_swap_impl;
+};
+#define EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, Size, SizeSuffix) \
+ \
+typedef Array<Type, Size, Size> Array##SizeSuffix##SizeSuffix##TypeSuffix; \
+ \
+typedef Array<Type, Size, 1> Array##SizeSuffix##TypeSuffix;
+#define EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Type, TypeSuffix, Size) \
+ \
+typedef Array<Type, Size, Dynamic> Array##Size##X##TypeSuffix; \
+ \
+typedef Array<Type, Dynamic, Size> Array##X##Size##TypeSuffix;
+#define EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(Type, TypeSuffix) \
+EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, 2, 2) \
+EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, 3, 3) \
+EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, 4, 4) \
+EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, Dynamic, X) \
+EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Type, TypeSuffix, 2) \
+EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Type, TypeSuffix, 3) \
+EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Type, TypeSuffix, 4)
+EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(int, i)
+EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(float, f)
+EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(double, d)
+EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(std::complex<float>, cf)
+EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(std::complex<double>, cd)
+#undef EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES
+#undef EIGEN_MAKE_ARRAY_TYPEDEFS
+#undef EIGEN_MAKE_ARRAY_TYPEDEFS_LARGE
+#define EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, SizeSuffix) \
+using Eigen::Matrix##SizeSuffix##TypeSuffix; \
+using Eigen::Vector##SizeSuffix##TypeSuffix; \
+using Eigen::RowVector##SizeSuffix##TypeSuffix;
+#define EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(TypeSuffix) \
+EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 2) \
+EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 3) \
+EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 4) \
+EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, X)
+#define EIGEN_USING_ARRAY_TYPEDEFS \
+EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(i) \
+EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(f) \
+EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(d) \
+EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(cf) \
+EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(cd)
+}
+#endif
+// end #include "src/Core/Array.h"
+// #include "src/Core/CwiseBinaryOp.h"
+#ifndef EIGEN_CWISE_BINARY_OP_H
+#define EIGEN_CWISE_BINARY_OP_H
+namespace Eigen {
+namespace internal {
+template<typename BinaryOp, typename Lhs, typename Rhs>
+struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
+{
+ typedef typename remove_all<Lhs>::type Ancestor;
+ typedef typename traits<Ancestor>::XprKind XprKind;
+ enum {
+ RowsAtCompileTime = traits<Ancestor>::RowsAtCompileTime,
+ ColsAtCompileTime = traits<Ancestor>::ColsAtCompileTime,
+ MaxRowsAtCompileTime = traits<Ancestor>::MaxRowsAtCompileTime,
+ MaxColsAtCompileTime = traits<Ancestor>::MaxColsAtCompileTime
+ };
+ typedef typename result_of<
+ BinaryOp(
+ const typename Lhs::Scalar&,
+ const typename Rhs::Scalar&
+ )
+ >::type Scalar;
+ typedef typename cwise_promote_storage_type<typename traits<Lhs>::StorageKind,
+ typename traits<Rhs>::StorageKind,
+ BinaryOp>::ret StorageKind;
+ typedef typename promote_index_type<typename traits<Lhs>::StorageIndex,
+ typename traits<Rhs>::StorageIndex>::type StorageIndex;
+ typedef typename Lhs::Nested LhsNested;
+ typedef typename Rhs::Nested RhsNested;
+ typedef typename remove_reference<LhsNested>::type _LhsNested;
+ typedef typename remove_reference<RhsNested>::type _RhsNested;
+ enum {
+ Flags = cwise_promote_storage_order<typename traits<Lhs>::StorageKind,typename traits<Rhs>::StorageKind,_LhsNested::Flags & RowMajorBit,_RhsNested::Flags & RowMajorBit>::value
+ };
+};
+}
+template<typename BinaryOp, typename Lhs, typename Rhs, typename StorageKind>
+class CwiseBinaryOpImpl;
+template<typename BinaryOp, typename LhsType, typename RhsType>
+class CwiseBinaryOp :
+ public CwiseBinaryOpImpl<
+ BinaryOp, LhsType, RhsType,
+ typename internal::cwise_promote_storage_type<typename internal::traits<LhsType>::StorageKind,
+ typename internal::traits<RhsType>::StorageKind,
+ BinaryOp>::ret>,
+ internal::no_assignment_operator
+{
+ public:
+ typedef typename internal::remove_all<BinaryOp>::type Functor;
+ typedef typename internal::remove_all<LhsType>::type Lhs;
+ typedef typename internal::remove_all<RhsType>::type Rhs;
+ typedef typename CwiseBinaryOpImpl<
+ BinaryOp, LhsType, RhsType,
+ typename internal::cwise_promote_storage_type<typename internal::traits<LhsType>::StorageKind,
+ typename internal::traits<Rhs>::StorageKind,
+ BinaryOp>::ret>::Base Base;
+ EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseBinaryOp)
+ typedef typename internal::ref_selector<LhsType>::type LhsNested;
+ typedef typename internal::ref_selector<RhsType>::type RhsNested;
+ typedef typename internal::remove_reference<LhsNested>::type _LhsNested;
+ typedef typename internal::remove_reference<RhsNested>::type _RhsNested;
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE CwiseBinaryOp(const Lhs& aLhs, const Rhs& aRhs, const BinaryOp& func = BinaryOp())
+ : m_lhs(aLhs), m_rhs(aRhs), m_functor(func)
+ {
+ EIGEN_CHECK_BINARY_COMPATIBILIY(BinaryOp,typename Lhs::Scalar,typename Rhs::Scalar);
+ EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Lhs, Rhs)
+ eigen_assert(aLhs.rows() == aRhs.rows() && aLhs.cols() == aRhs.cols());
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Index rows() const {
+ if (internal::traits<typename internal::remove_all<LhsNested>::type>::RowsAtCompileTime==Dynamic)
+ return m_rhs.rows();
+ else
+ return m_lhs.rows();
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Index cols() const {
+ if (internal::traits<typename internal::remove_all<LhsNested>::type>::ColsAtCompileTime==Dynamic)
+ return m_rhs.cols();
+ else
+ return m_lhs.cols();
+ }
+ EIGEN_DEVICE_FUNC
+ const _LhsNested& lhs() const { return m_lhs; }
+ EIGEN_DEVICE_FUNC
+ const _RhsNested& rhs() const { return m_rhs; }
+ EIGEN_DEVICE_FUNC
+ const BinaryOp& functor() const { return m_functor; }
+ protected:
+ LhsNested m_lhs;
+ RhsNested m_rhs;
+ const BinaryOp m_functor;
+};
+template<typename BinaryOp, typename Lhs, typename Rhs, typename StorageKind>
+class CwiseBinaryOpImpl
+ : public internal::generic_xpr_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type
+{
+public:
+ typedef typename internal::generic_xpr_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type Base;
+};
+template<typename Derived>
+template<typename OtherDerived>
+EIGEN_STRONG_INLINE Derived &
+MatrixBase<Derived>::operator-=(const MatrixBase<OtherDerived> &other)
+{
+ call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());
+ return derived();
+}
+template<typename Derived>
+template<typename OtherDerived>
+EIGEN_STRONG_INLINE Derived &
+MatrixBase<Derived>::operator+=(const MatrixBase<OtherDerived>& other)
+{
+ call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
+ return derived();
+}
+}
+#endif
+// end #include "src/Core/CwiseBinaryOp.h"
+// #include "src/Core/CwiseUnaryOp.h"
+#ifndef EIGEN_CWISE_UNARY_OP_H
+#define EIGEN_CWISE_UNARY_OP_H
+namespace Eigen {
+namespace internal {
+template<typename UnaryOp, typename XprType>
+struct traits<CwiseUnaryOp<UnaryOp, XprType> >
+ : traits<XprType>
+{
+ typedef typename result_of<
+ UnaryOp(const typename XprType::Scalar&)
+ >::type Scalar;
+ typedef typename XprType::Nested XprTypeNested;
+ typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
+ enum {
+ Flags = _XprTypeNested::Flags & RowMajorBit
+ };
+};
+}
+template<typename UnaryOp, typename XprType, typename StorageKind>
+class CwiseUnaryOpImpl;
+template<typename UnaryOp, typename XprType>
+class CwiseUnaryOp : public CwiseUnaryOpImpl<UnaryOp, XprType, typename internal::traits<XprType>::StorageKind>, internal::no_assignment_operator
+{
+ public:
+ typedef typename CwiseUnaryOpImpl<UnaryOp, XprType,typename internal::traits<XprType>::StorageKind>::Base Base;
+ EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryOp)
+ typedef typename internal::ref_selector<XprType>::type XprTypeNested;
+ typedef typename internal::remove_all<XprType>::type NestedExpression;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ explicit CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp())
+ : m_xpr(xpr), m_functor(func) {}
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Index rows() const { return m_xpr.rows(); }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ Index cols() const { return m_xpr.cols(); }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ const UnaryOp& functor() const { return m_functor; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ const typename internal::remove_all<XprTypeNested>::type&
+ nestedExpression() const { return m_xpr; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ typename internal::remove_all<XprTypeNested>::type&
+ nestedExpression() { return m_xpr; }
+ protected:
+ XprTypeNested m_xpr;
+ const UnaryOp m_functor;
+};
+template<typename UnaryOp, typename XprType, typename StorageKind>
+class CwiseUnaryOpImpl
+ : public internal::generic_xpr_base<CwiseUnaryOp<UnaryOp, XprType> >::type
+{
+public:
+ typedef typename internal::generic_xpr_base<CwiseUnaryOp<UnaryOp, XprType> >::type Base;
+};
+}
+#endif
+// end #include "src/Core/CwiseUnaryOp.h"
+// #include "src/Core/CwiseNullaryOp.h"
+#ifndef EIGEN_CWISE_NULLARY_OP_H
+#define EIGEN_CWISE_NULLARY_OP_H
+namespace Eigen {
+namespace internal {
+template<typename NullaryOp, typename PlainObjectType>
+struct traits<CwiseNullaryOp<NullaryOp, PlainObjectType> > : traits<PlainObjectType>
+{
+ enum {
+ Flags = traits<PlainObjectType>::Flags & RowMajorBit
+ };
+};
+}
+template<typename NullaryOp, typename PlainObjectType>
+class CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp<NullaryOp, PlainObjectType> >::type, internal::no_assignment_operator
+{
+ public:
+ typedef typename internal::dense_xpr_base<CwiseNullaryOp>::type Base;
+ EIGEN_DENSE_PUBLIC_INTERFACE(CwiseNullaryOp)
+ EIGEN_DEVICE_FUNC
+ CwiseNullaryOp(Index rows, Index cols, const NullaryOp& func = NullaryOp())
+ : m_rows(rows), m_cols(cols), m_functor(func)
+ {
+ eigen_assert(rows >= 0
+ && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
+ && cols >= 0
+ && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols));
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Index rows() const { return m_rows.value(); }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE Index cols() const { return m_cols.value(); }
+ EIGEN_DEVICE_FUNC
+ const NullaryOp& functor() const { return m_functor; }
+ protected:
+ const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_rows;
+ const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_cols;
+ const NullaryOp m_functor;
+};
+template<typename Derived>
+template<typename CustomNullaryOp>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
+DenseBase<Derived>::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func)
+{
+ return CwiseNullaryOp<CustomNullaryOp, PlainObject>(rows, cols, func);
+}
+template<typename Derived>
+template<typename CustomNullaryOp>
+EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
+DenseBase<Derived>::NullaryExpr(Index size, const CustomNullaryOp& func)
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ if(RowsAtCompileTime == 1) return CwiseNullaryOp<CustomNullaryOp, PlainObject>(1, size, func);
+ else return CwiseNullaryOp<CustomNullaryOp, PlainObject>(size, 1, func);
+}
+template<typename Derived>
+template<typename CustomNullaryOp>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
+DenseBase<Derived>::NullaryExpr(const CustomNullaryOp& func)
+{
+ return CwiseNullaryOp<CustomNullaryOp, PlainObject>(RowsAtCompileTime, ColsAtCompileTime, func);
+}
+template<typename Derived>
+EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
+DenseBase<Derived>::Constant(Index rows, Index cols, const Scalar& value)
+{
+ return DenseBase<Derived>::NullaryExpr(rows, cols, internal::scalar_constant_op<Scalar>(value));
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
+DenseBase<Derived>::Constant(Index size, const Scalar& value)
+{
+ return DenseBase<Derived>::NullaryExpr(size, internal::scalar_constant_op<Scalar>(value));
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
+DenseBase<Derived>::Constant(const Scalar& value)
+{
+ EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
+ return DenseBase<Derived>::NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_constant_op<Scalar>(value));
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
+DenseBase<Derived>::LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high)
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,PacketScalar>(low,high,size));
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
+DenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& high)
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
+ return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,PacketScalar>(low,high,Derived::SizeAtCompileTime));
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
+DenseBase<Derived>::LinSpaced(Index size, const Scalar& low, const Scalar& high)
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,PacketScalar>(low,high,size));
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
+DenseBase<Derived>::LinSpaced(const Scalar& low, const Scalar& high)
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
+ return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,PacketScalar>(low,high,Derived::SizeAtCompileTime));
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isApproxToConstant
+(const Scalar& val, const RealScalar& prec) const
+{
+ typename internal::nested_eval<Derived,1>::type self(derived());
+ for(Index j = 0; j < cols(); ++j)
+ for(Index i = 0; i < rows(); ++i)
+ if(!internal::isApprox(self.coeff(i, j), val, prec))
+ return false;
+ return true;
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isConstant
+(const Scalar& val, const RealScalar& prec) const
+{
+ return isApproxToConstant(val, prec);
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void DenseBase<Derived>::fill(const Scalar& val)
+{
+ setConstant(val);
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setConstant(const Scalar& val)
+{
+ return derived() = Constant(rows(), cols(), val);
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
+PlainObjectBase<Derived>::setConstant(Index size, const Scalar& val)
+{
+ resize(size);
+ return setConstant(val);
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
+PlainObjectBase<Derived>::setConstant(Index rows, Index cols, const Scalar& val)
+{
+ resize(rows, cols);
+ return setConstant(val);
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index newSize, const Scalar& low, const Scalar& high)
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ return derived() = Derived::NullaryExpr(newSize, internal::linspaced_op<Scalar,PacketScalar>(low,high,newSize));
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(const Scalar& low, const Scalar& high)
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ return setLinSpaced(size(), low, high);
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
+DenseBase<Derived>::Zero(Index rows, Index cols)
+{
+ return Constant(rows, cols, Scalar(0));
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
+DenseBase<Derived>::Zero(Index size)
+{
+ return Constant(size, Scalar(0));
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
+DenseBase<Derived>::Zero()
+{
+ return Constant(Scalar(0));
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isZero(const RealScalar& prec) const
+{
+ typename internal::nested_eval<Derived,1>::type self(derived());
+ for(Index j = 0; j < cols(); ++j)
+ for(Index i = 0; i < rows(); ++i)
+ if(!internal::isMuchSmallerThan(self.coeff(i, j), static_cast<Scalar>(1), prec))
+ return false;
+ return true;
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setZero()
+{
+ return setConstant(Scalar(0));
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
+PlainObjectBase<Derived>::setZero(Index newSize)
+{
+ resize(newSize);
+ return setConstant(Scalar(0));
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
+PlainObjectBase<Derived>::setZero(Index rows, Index cols)
+{
+ resize(rows, cols);
+ return setConstant(Scalar(0));
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
+DenseBase<Derived>::Ones(Index rows, Index cols)
+{
+ return Constant(rows, cols, Scalar(1));
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
+DenseBase<Derived>::Ones(Index newSize)
+{
+ return Constant(newSize, Scalar(1));
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
+DenseBase<Derived>::Ones()
+{
+ return Constant(Scalar(1));
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isOnes
+(const RealScalar& prec) const
+{
+ return isApproxToConstant(Scalar(1), prec);
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setOnes()
+{
+ return setConstant(Scalar(1));
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
+PlainObjectBase<Derived>::setOnes(Index newSize)
+{
+ resize(newSize);
+ return setConstant(Scalar(1));
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
+PlainObjectBase<Derived>::setOnes(Index rows, Index cols)
+{
+ resize(rows, cols);
+ return setConstant(Scalar(1));
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType
+MatrixBase<Derived>::Identity(Index rows, Index cols)
+{
+ return DenseBase<Derived>::NullaryExpr(rows, cols, internal::scalar_identity_op<Scalar>());
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType
+MatrixBase<Derived>::Identity()
+{
+ EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
+ return MatrixBase<Derived>::NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_identity_op<Scalar>());
+}
+template<typename Derived>
+bool MatrixBase<Derived>::isIdentity
+(const RealScalar& prec) const
+{
+ typename internal::nested_eval<Derived,1>::type self(derived());
+ for(Index j = 0; j < cols(); ++j)
+ {
+ for(Index i = 0; i < rows(); ++i)
+ {
+ if(i == j)
+ {
+ if(!internal::isApprox(self.coeff(i, j), static_cast<Scalar>(1), prec))
+ return false;
+ }
+ else
+ {
+ if(!internal::isMuchSmallerThan(self.coeff(i, j), static_cast<RealScalar>(1), prec))
+ return false;
+ }
+ }
+ }
+ return true;
+}
+namespace internal {
+template<typename Derived, bool Big = (Derived::SizeAtCompileTime>=16)>
+struct setIdentity_impl
+{
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Derived& run(Derived& m)
+ {
+ return m = Derived::Identity(m.rows(), m.cols());
+ }
+};
+template<typename Derived>
+struct setIdentity_impl<Derived, true>
+{
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Derived& run(Derived& m)
+ {
+ m.setZero();
+ const Index size = numext::mini(m.rows(), m.cols());
+ for(Index i = 0; i < size; ++i) m.coeffRef(i,i) = typename Derived::Scalar(1);
+ return m;
+ }
+};
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity()
+{
+ return internal::setIdentity_impl<Derived>::run(derived());
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity(Index rows, Index cols)
+{
+ derived().resize(rows, cols);
+ return setIdentity();
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index newSize, Index i)
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ return BasisReturnType(SquareMatrixType::Identity(newSize,newSize), i);
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index i)
+{
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ return BasisReturnType(SquareMatrixType::Identity(),i);
+}
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitX()
+{ return Derived::Unit(0); }
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitY()
+{ return Derived::Unit(1); }
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitZ()
+{ return Derived::Unit(2); }
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitW()
+{ return Derived::Unit(3); }
+}
+#endif
+// end #include "src/Core/CwiseNullaryOp.h"
+// #include "src/Core/Stride.h"
+#ifndef EIGEN_STRIDE_H
+#define EIGEN_STRIDE_H
+namespace Eigen {
+template<int _OuterStrideAtCompileTime, int _InnerStrideAtCompileTime>
+class Stride
+{
+ public:
+ typedef Eigen::Index Index;
+ enum {
+ InnerStrideAtCompileTime = _InnerStrideAtCompileTime,
+ OuterStrideAtCompileTime = _OuterStrideAtCompileTime
+ };
+ EIGEN_DEVICE_FUNC
+ Stride()
+ : m_outer(OuterStrideAtCompileTime), m_inner(InnerStrideAtCompileTime)
+ {
+ eigen_assert(InnerStrideAtCompileTime != Dynamic && OuterStrideAtCompileTime != Dynamic);
+ }
+ EIGEN_DEVICE_FUNC
+ Stride(Index outerStride, Index innerStride)
+ : m_outer(outerStride), m_inner(innerStride)
+ {
+ eigen_assert(innerStride>=0 && outerStride>=0);
+ }
+ EIGEN_DEVICE_FUNC
+ Stride(const Stride& other)
+ : m_outer(other.outer()), m_inner(other.inner())
+ {}
+ EIGEN_DEVICE_FUNC
+ inline Index outer() const { return m_outer.value(); }
+ EIGEN_DEVICE_FUNC
+ inline Index inner() const { return m_inner.value(); }
+ protected:
+ internal::variable_if_dynamic<Index, OuterStrideAtCompileTime> m_outer;
+ internal::variable_if_dynamic<Index, InnerStrideAtCompileTime> m_inner;
+};
+template<int Value>
+class InnerStride : public Stride<0, Value>
+{
+ typedef Stride<0, Value> Base;
+ public:
+ EIGEN_DEVICE_FUNC InnerStride() : Base() {}
+ EIGEN_DEVICE_FUNC InnerStride(Index v) : Base(0, v) {}
+};
+template<int Value>
+class OuterStride : public Stride<Value, 0>
+{
+ typedef Stride<Value, 0> Base;
+ public:
+ EIGEN_DEVICE_FUNC OuterStride() : Base() {}
+ EIGEN_DEVICE_FUNC OuterStride(Index v) : Base(v,0) {}
+};
+}
+#endif
+// end #include "src/Core/Stride.h"
+// #include "src/Core/MapBase.h"
+#ifndef EIGEN_MAPBASE_H
+#define EIGEN_MAPBASE_H
+#define EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) \
+ EIGEN_STATIC_ASSERT((int(internal::evaluator<Derived>::Flags) & LinearAccessBit) || Derived::IsVectorAtCompileTime, \
+ YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT)
+namespace Eigen {
+template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
+ : public internal::dense_xpr_base<Derived>::type
+{
+ public:
+ typedef typename internal::dense_xpr_base<Derived>::type Base;
+ enum {
+ RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,
+ ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,
+ SizeAtCompileTime = Base::SizeAtCompileTime
+ };
+ typedef typename internal::traits<Derived>::StorageKind StorageKind;
+ typedef typename internal::traits<Derived>::Scalar Scalar;
+ typedef typename internal::packet_traits<Scalar>::type PacketScalar;
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ typedef typename internal::conditional<
+ bool(internal::is_lvalue<Derived>::value),
+ Scalar *,
+ const Scalar *>::type
+ PointerType;
+ using Base::derived;
+ using Base::MaxRowsAtCompileTime;
+ using Base::MaxColsAtCompileTime;
+ using Base::MaxSizeAtCompileTime;
+ using Base::IsVectorAtCompileTime;
+ using Base::Flags;
+ using Base::IsRowMajor;
+ using Base::rows;
+ using Base::cols;
+ using Base::size;
+ using Base::coeff;
+ using Base::coeffRef;
+ using Base::lazyAssign;
+ using Base::eval;
+ using Base::innerStride;
+ using Base::outerStride;
+ using Base::rowStride;
+ using Base::colStride;
+ using Base::operator=;
+ typedef typename Base::CoeffReturnType CoeffReturnType;
+ EIGEN_DEVICE_FUNC inline Index rows() const { return m_rows.value(); }
+ EIGEN_DEVICE_FUNC inline Index cols() const { return m_cols.value(); }
+ EIGEN_DEVICE_FUNC inline const Scalar* data() const { return m_data; }
+ EIGEN_DEVICE_FUNC
+ inline const Scalar& coeff(Index rowId, Index colId) const
+ {
+ return m_data[colId * colStride() + rowId * rowStride()];
+ }
+ EIGEN_DEVICE_FUNC
+ inline const Scalar& coeff(Index index) const
+ {
+ EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
+ return m_data[index * innerStride()];
+ }
+ EIGEN_DEVICE_FUNC
+ inline const Scalar& coeffRef(Index rowId, Index colId) const
+ {
+ return this->m_data[colId * colStride() + rowId * rowStride()];
+ }
+ EIGEN_DEVICE_FUNC
+ inline const Scalar& coeffRef(Index index) const
+ {
+ EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
+ return this->m_data[index * innerStride()];
+ }
+ template<int LoadMode>
+ inline PacketScalar packet(Index rowId, Index colId) const
+ {
+ return internal::ploadt<PacketScalar, LoadMode>
+ (m_data + (colId * colStride() + rowId * rowStride()));
+ }
+ template<int LoadMode>
+ inline PacketScalar packet(Index index) const
+ {
+ EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
+ return internal::ploadt<PacketScalar, LoadMode>(m_data + index * innerStride());
+ }
+ EIGEN_DEVICE_FUNC
+ explicit inline MapBase(PointerType dataPtr) : m_data(dataPtr), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime)
+ {
+ EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
+ checkSanity<Derived>();
+ }
+ EIGEN_DEVICE_FUNC
+ inline MapBase(PointerType dataPtr, Index vecSize)
+ : m_data(dataPtr),
+ m_rows(RowsAtCompileTime == Dynamic ? vecSize : Index(RowsAtCompileTime)),
+ m_cols(ColsAtCompileTime == Dynamic ? vecSize : Index(ColsAtCompileTime))
+ {
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+ eigen_assert(vecSize >= 0);
+ eigen_assert(dataPtr == 0 || SizeAtCompileTime == Dynamic || SizeAtCompileTime == vecSize);
+ checkSanity<Derived>();
+ }
+ EIGEN_DEVICE_FUNC
+ inline MapBase(PointerType dataPtr, Index rows, Index cols)
+ : m_data(dataPtr), m_rows(rows), m_cols(cols)
+ {
+ eigen_assert( (dataPtr == 0)
+ || ( rows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
+ && cols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols)));
+ checkSanity<Derived>();
+ }
+ #ifdef EIGEN_MAPBASE_PLUGIN
+ #include EIGEN_MAPBASE_PLUGIN
+ #endif
+ protected:
+ template<typename T>
+ EIGEN_DEVICE_FUNC
+ void checkSanity(typename internal::enable_if<(internal::traits<T>::Alignment>0),void*>::type = 0) const
+ {
+#if EIGEN_MAX_ALIGN_BYTES>0
+ eigen_assert(( ((internal::UIntPtr(m_data) % internal::traits<Derived>::Alignment) == 0)
+ || (cols() * rows() * innerStride() * sizeof(Scalar)) < internal::traits<Derived>::Alignment ) && "data is not aligned");
+#endif
+ }
+ template<typename T>
+ EIGEN_DEVICE_FUNC
+ void checkSanity(typename internal::enable_if<internal::traits<T>::Alignment==0,void*>::type = 0) const
+ {}
+ PointerType m_data;
+ const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_rows;
+ const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_cols;
+};
+template<typename Derived> class MapBase<Derived, WriteAccessors>
+ : public MapBase<Derived, ReadOnlyAccessors>
+{
+ typedef MapBase<Derived, ReadOnlyAccessors> ReadOnlyMapBase;
+ public:
+ typedef MapBase<Derived, ReadOnlyAccessors> Base;
+ typedef typename Base::Scalar Scalar;
+ typedef typename Base::PacketScalar PacketScalar;
+ typedef typename Base::StorageIndex StorageIndex;
+ typedef typename Base::PointerType PointerType;
+ using Base::derived;
+ using Base::rows;
+ using Base::cols;
+ using Base::size;
+ using Base::coeff;
+ using Base::coeffRef;
+ using Base::innerStride;
+ using Base::outerStride;
+ using Base::rowStride;
+ using Base::colStride;
+ typedef typename internal::conditional<
+ internal::is_lvalue<Derived>::value,
+ Scalar,
+ const Scalar
+ >::type ScalarWithConstIfNotLvalue;
+ EIGEN_DEVICE_FUNC
+ inline const Scalar* data() const { return this->m_data; }
+ EIGEN_DEVICE_FUNC
+ inline ScalarWithConstIfNotLvalue* data() { return this->m_data; }
+ EIGEN_DEVICE_FUNC
+ inline ScalarWithConstIfNotLvalue& coeffRef(Index row, Index col)
+ {
+ return this->m_data[col * colStride() + row * rowStride()];
+ }
+ EIGEN_DEVICE_FUNC
+ inline ScalarWithConstIfNotLvalue& coeffRef(Index index)
+ {
+ EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
+ return this->m_data[index * innerStride()];
+ }
+ template<int StoreMode>
+ inline void writePacket(Index row, Index col, const PacketScalar& val)
+ {
+ internal::pstoret<Scalar, PacketScalar, StoreMode>
+ (this->m_data + (col * colStride() + row * rowStride()), val);
+ }
+ template<int StoreMode>
+ inline void writePacket(Index index, const PacketScalar& val)
+ {
+ EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
+ internal::pstoret<Scalar, PacketScalar, StoreMode>
+ (this->m_data + index * innerStride(), val);
+ }
+ EIGEN_DEVICE_FUNC explicit inline MapBase(PointerType dataPtr) : Base(dataPtr) {}
+ EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index vecSize) : Base(dataPtr, vecSize) {}
+ EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index rows, Index cols) : Base(dataPtr, rows, cols) {}
+ EIGEN_DEVICE_FUNC
+ Derived& operator=(const MapBase& other)
+ {
+ ReadOnlyMapBase::Base::operator=(other);
+ return derived();
+ }
+ using ReadOnlyMapBase::Base::operator=;
+};
+#undef EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS
+}
+#endif
+// end #include "src/Core/MapBase.h"
+// #include "src/Core/Map.h"
+#ifndef EIGEN_MAP_H
+#define EIGEN_MAP_H
+namespace Eigen {
+namespace internal {
+template<typename PlainObjectType, int MapOptions, typename StrideType>
+struct traits<Map<PlainObjectType, MapOptions, StrideType> >
+ : public traits<PlainObjectType>
+{
+ typedef traits<PlainObjectType> TraitsBase;
+ enum {
+ InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0
+ ? int(PlainObjectType::InnerStrideAtCompileTime)
+ : int(StrideType::InnerStrideAtCompileTime),
+ OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
+ ? int(PlainObjectType::OuterStrideAtCompileTime)
+ : int(StrideType::OuterStrideAtCompileTime),
+ Alignment = int(MapOptions)&int(AlignedMask),
+ Flags0 = TraitsBase::Flags & (~NestByRefBit),
+ Flags = is_lvalue<PlainObjectType>::value ? int(Flags0) : (int(Flags0) & ~LvalueBit)
+ };
+private:
+ enum { Options };
+};
+}
+template<typename PlainObjectType, int MapOptions, typename StrideType> class Map
+ : public MapBase<Map<PlainObjectType, MapOptions, StrideType> >
+{
+ public:
+ typedef MapBase<Map> Base;
+ EIGEN_DENSE_PUBLIC_INTERFACE(Map)
+ typedef typename Base::PointerType PointerType;
+ typedef PointerType PointerArgType;
+ EIGEN_DEVICE_FUNC
+ inline PointerType cast_to_pointer_type(PointerArgType ptr) { return ptr; }
+ EIGEN_DEVICE_FUNC
+ inline Index innerStride() const
+ {
+ return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1;
+ }
+ EIGEN_DEVICE_FUNC
+ inline Index outerStride() const
+ {
+ return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer()
+ : IsVectorAtCompileTime ? this->size()
+ : int(Flags)&RowMajorBit ? this->cols()
+ : this->rows();
+ }
+ EIGEN_DEVICE_FUNC
+ explicit inline Map(PointerArgType dataPtr, const StrideType& stride = StrideType())
+ : Base(cast_to_pointer_type(dataPtr)), m_stride(stride)
+ {
+ PlainObjectType::Base::_check_template_params();
+ }
+ EIGEN_DEVICE_FUNC
+ inline Map(PointerArgType dataPtr, Index size, const StrideType& stride = StrideType())
+ : Base(cast_to_pointer_type(dataPtr), size), m_stride(stride)
+ {
+ PlainObjectType::Base::_check_template_params();
+ }
+ EIGEN_DEVICE_FUNC
+ inline Map(PointerArgType dataPtr, Index rows, Index cols, const StrideType& stride = StrideType())
+ : Base(cast_to_pointer_type(dataPtr), rows, cols), m_stride(stride)
+ {
+ PlainObjectType::Base::_check_template_params();
+ }
+ EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Map)
+ protected:
+ StrideType m_stride;
+};
+}
+#endif
+// end #include "src/Core/Map.h"
+// #include "src/Core/Block.h"
+#ifndef EIGEN_BLOCK_H
+#define EIGEN_BLOCK_H
+namespace Eigen {
+namespace internal {
+template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>
+struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprType>
+{
+ typedef typename traits<XprType>::Scalar Scalar;
+ typedef typename traits<XprType>::StorageKind StorageKind;
+ typedef typename traits<XprType>::XprKind XprKind;
+ typedef typename ref_selector<XprType>::type XprTypeNested;
+ typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
+ enum{
+ MatrixRows = traits<XprType>::RowsAtCompileTime,
+ MatrixCols = traits<XprType>::ColsAtCompileTime,
+ RowsAtCompileTime = MatrixRows == 0 ? 0 : BlockRows,
+ ColsAtCompileTime = MatrixCols == 0 ? 0 : BlockCols,
+ MaxRowsAtCompileTime = BlockRows==0 ? 0
+ : RowsAtCompileTime != Dynamic ? int(RowsAtCompileTime)
+ : int(traits<XprType>::MaxRowsAtCompileTime),
+ MaxColsAtCompileTime = BlockCols==0 ? 0
+ : ColsAtCompileTime != Dynamic ? int(ColsAtCompileTime)
+ : int(traits<XprType>::MaxColsAtCompileTime),
+ XprTypeIsRowMajor = (int(traits<XprType>::Flags)&RowMajorBit) != 0,
+ IsRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
+ : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
+ : XprTypeIsRowMajor,
+ HasSameStorageOrderAsXprType = (IsRowMajor == XprTypeIsRowMajor),
+ InnerSize = IsRowMajor ? int(ColsAtCompileTime) : int(RowsAtCompileTime),
+ InnerStrideAtCompileTime = HasSameStorageOrderAsXprType
+ ? int(inner_stride_at_compile_time<XprType>::ret)
+ : int(outer_stride_at_compile_time<XprType>::ret),
+ OuterStrideAtCompileTime = HasSameStorageOrderAsXprType
+ ? int(outer_stride_at_compile_time<XprType>::ret)
+ : int(inner_stride_at_compile_time<XprType>::ret),
+ FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0,
+ FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0,
+ Flags = (traits<XprType>::Flags & (DirectAccessBit | (InnerPanel?CompressedAccessBit:0))) | FlagsLvalueBit | FlagsRowMajorBit,
+ Alignment = 0
+ };
+};
+template<typename XprType, int BlockRows=Dynamic, int BlockCols=Dynamic, bool InnerPanel = false,
+ bool HasDirectAccess = internal::has_direct_access<XprType>::ret> class BlockImpl_dense;
+}
+template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, typename StorageKind> class BlockImpl;
+template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class Block
+ : public BlockImpl<XprType, BlockRows, BlockCols, InnerPanel, typename internal::traits<XprType>::StorageKind>
+{
+ typedef BlockImpl<XprType, BlockRows, BlockCols, InnerPanel, typename internal::traits<XprType>::StorageKind> Impl;
+ public:
+ typedef Impl Base;
+ EIGEN_GENERIC_PUBLIC_INTERFACE(Block)
+ EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Block)
+ typedef typename internal::remove_all<XprType>::type NestedExpression;
+ EIGEN_DEVICE_FUNC
+ inline Block(XprType& xpr, Index i) : Impl(xpr,i)
+ {
+ eigen_assert( (i>=0) && (
+ ((BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) && i<xpr.rows())
+ ||((BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) && i<xpr.cols())));
+ }
+ EIGEN_DEVICE_FUNC
+ inline Block(XprType& xpr, Index startRow, Index startCol)
+ : Impl(xpr, startRow, startCol)
+ {
+ EIGEN_STATIC_ASSERT(RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic,THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE)
+ eigen_assert(startRow >= 0 && BlockRows >= 0 && startRow + BlockRows <= xpr.rows()
+ && startCol >= 0 && BlockCols >= 0 && startCol + BlockCols <= xpr.cols());
+ }
+ EIGEN_DEVICE_FUNC
+ inline Block(XprType& xpr,
+ Index startRow, Index startCol,
+ Index blockRows, Index blockCols)
+ : Impl(xpr, startRow, startCol, blockRows, blockCols)
+ {
+ eigen_assert((RowsAtCompileTime==Dynamic || RowsAtCompileTime==blockRows)
+ && (ColsAtCompileTime==Dynamic || ColsAtCompileTime==blockCols));
+ eigen_assert(startRow >= 0 && blockRows >= 0 && startRow <= xpr.rows() - blockRows
+ && startCol >= 0 && blockCols >= 0 && startCol <= xpr.cols() - blockCols);
+ }
+};
+template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>
+class BlockImpl<XprType, BlockRows, BlockCols, InnerPanel, Dense>
+ : public internal::BlockImpl_dense<XprType, BlockRows, BlockCols, InnerPanel>
+{
+ typedef internal::BlockImpl_dense<XprType, BlockRows, BlockCols, InnerPanel> Impl;
+ typedef typename XprType::StorageIndex StorageIndex;
+ public:
+ typedef Impl Base;
+ EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl)
+ EIGEN_DEVICE_FUNC inline BlockImpl(XprType& xpr, Index i) : Impl(xpr,i) {}
+ EIGEN_DEVICE_FUNC inline BlockImpl(XprType& xpr, Index startRow, Index startCol) : Impl(xpr, startRow, startCol) {}
+ EIGEN_DEVICE_FUNC
+ inline BlockImpl(XprType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols)
+ : Impl(xpr, startRow, startCol, blockRows, blockCols) {}
+};
+namespace internal {
+template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool HasDirectAccess> class BlockImpl_dense
+ : public internal::dense_xpr_base<Block<XprType, BlockRows, BlockCols, InnerPanel> >::type
+{
+ typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType;
+ typedef typename internal::ref_selector<XprType>::non_const_type XprTypeNested;
+ public:
+ typedef typename internal::dense_xpr_base<BlockType>::type Base;
+ EIGEN_DENSE_PUBLIC_INTERFACE(BlockType)
+ EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl_dense)
+ EIGEN_DEVICE_FUNC
+ inline BlockImpl_dense(XprType& xpr, Index i)
+ : m_xpr(xpr),
+ m_startRow( (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? i : 0),
+ m_startCol( (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? i : 0),
+ m_blockRows(BlockRows==1 ? 1 : xpr.rows()),
+ m_blockCols(BlockCols==1 ? 1 : xpr.cols())
+ {}
+ EIGEN_DEVICE_FUNC
+ inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol)
+ : m_xpr(xpr), m_startRow(startRow), m_startCol(startCol),
+ m_blockRows(BlockRows), m_blockCols(BlockCols)
+ {}
+ EIGEN_DEVICE_FUNC
+ inline BlockImpl_dense(XprType& xpr,
+ Index startRow, Index startCol,
+ Index blockRows, Index blockCols)
+ : m_xpr(xpr), m_startRow(startRow), m_startCol(startCol),
+ m_blockRows(blockRows), m_blockCols(blockCols)
+ {}
+ EIGEN_DEVICE_FUNC inline Index rows() const { return m_blockRows.value(); }
+ EIGEN_DEVICE_FUNC inline Index cols() const { return m_blockCols.value(); }
+ EIGEN_DEVICE_FUNC
+ inline Scalar& coeffRef(Index rowId, Index colId)
+ {
+ EIGEN_STATIC_ASSERT_LVALUE(XprType)
+ return m_xpr.coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());
+ }
+ EIGEN_DEVICE_FUNC
+ inline const Scalar& coeffRef(Index rowId, Index colId) const
+ {
+ return m_xpr.derived().coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index rowId, Index colId) const
+ {
+ return m_xpr.coeff(rowId + m_startRow.value(), colId + m_startCol.value());
+ }
+ EIGEN_DEVICE_FUNC
+ inline Scalar& coeffRef(Index index)
+ {
+ EIGEN_STATIC_ASSERT_LVALUE(XprType)
+ return m_xpr.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
+ m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
+ }
+ EIGEN_DEVICE_FUNC
+ inline const Scalar& coeffRef(Index index) const
+ {
+ return m_xpr.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
+ m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
+ }
+ EIGEN_DEVICE_FUNC
+ inline const CoeffReturnType coeff(Index index) const
+ {
+ return m_xpr.coeff(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
+ m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
+ }
+ template<int LoadMode>
+ inline PacketScalar packet(Index rowId, Index colId) const
+ {
+ return m_xpr.template packet<Unaligned>(rowId + m_startRow.value(), colId + m_startCol.value());
+ }
+ template<int LoadMode>
+ inline void writePacket(Index rowId, Index colId, const PacketScalar& val)
+ {
+ m_xpr.template writePacket<Unaligned>(rowId + m_startRow.value(), colId + m_startCol.value(), val);
+ }
+ template<int LoadMode>
+ inline PacketScalar packet(Index index) const
+ {
+ return m_xpr.template packet<Unaligned>
+ (m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
+ m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
+ }
+ template<int LoadMode>
+ inline void writePacket(Index index, const PacketScalar& val)
+ {
+ m_xpr.template writePacket<Unaligned>
+ (m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
+ m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0), val);
+ }
+ #ifdef EIGEN_PARSED_BY_DOXYGEN
+ EIGEN_DEVICE_FUNC inline const Scalar* data() const;
+ EIGEN_DEVICE_FUNC inline Index innerStride() const;
+ EIGEN_DEVICE_FUNC inline Index outerStride() const;
+ #endif
+ EIGEN_DEVICE_FUNC
+ const typename internal::remove_all<XprTypeNested>::type& nestedExpression() const
+ {
+ return m_xpr;
+ }
+ EIGEN_DEVICE_FUNC
+ XprType& nestedExpression() { return m_xpr; }
+ EIGEN_DEVICE_FUNC
+ StorageIndex startRow() const
+ {
+ return m_startRow.value();
+ }
+ EIGEN_DEVICE_FUNC
+ StorageIndex startCol() const
+ {
+ return m_startCol.value();
+ }
+ protected:
+ XprTypeNested m_xpr;
+ const internal::variable_if_dynamic<StorageIndex, (XprType::RowsAtCompileTime == 1 && BlockRows==1) ? 0 : Dynamic> m_startRow;
+ const internal::variable_if_dynamic<StorageIndex, (XprType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol;
+ const internal::variable_if_dynamic<StorageIndex, RowsAtCompileTime> m_blockRows;
+ const internal::variable_if_dynamic<StorageIndex, ColsAtCompileTime> m_blockCols;
+};
+template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>
+class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
+ : public MapBase<Block<XprType, BlockRows, BlockCols, InnerPanel> >
+{
+ typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType;
+ typedef typename internal::ref_selector<XprType>::non_const_type XprTypeNested;
+ enum {
+ XprTypeIsRowMajor = (int(traits<XprType>::Flags)&RowMajorBit) != 0
+ };
+ public:
+ typedef MapBase<BlockType> Base;
+ EIGEN_DENSE_PUBLIC_INTERFACE(BlockType)
+ EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl_dense)
+ EIGEN_DEVICE_FUNC
+ inline BlockImpl_dense(XprType& xpr, Index i)
+ : Base(xpr.data() + i * ( ((BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) && (!XprTypeIsRowMajor))
+ || ((BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) && ( XprTypeIsRowMajor)) ? xpr.innerStride() : xpr.outerStride()),
+ BlockRows==1 ? 1 : xpr.rows(),
+ BlockCols==1 ? 1 : xpr.cols()),
+ m_xpr(xpr),
+ m_startRow( (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? i : 0),
+ m_startCol( (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? i : 0)
+ {
+ init();
+ }
+ EIGEN_DEVICE_FUNC
+ inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol)
+ : Base(xpr.data()+xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol)),
+ m_xpr(xpr), m_startRow(startRow), m_startCol(startCol)
+ {
+ init();
+ }
+ EIGEN_DEVICE_FUNC
+ inline BlockImpl_dense(XprType& xpr,
+ Index startRow, Index startCol,
+ Index blockRows, Index blockCols)
+ : Base(xpr.data()+xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol), blockRows, blockCols),
+ m_xpr(xpr), m_startRow(startRow), m_startCol(startCol)
+ {
+ init();
+ }
+ EIGEN_DEVICE_FUNC
+ const typename internal::remove_all<XprTypeNested>::type& nestedExpression() const
+ {
+ return m_xpr;
+ }
+ EIGEN_DEVICE_FUNC
+ XprType& nestedExpression() { return m_xpr; }
+ EIGEN_DEVICE_FUNC
+ inline Index innerStride() const
+ {
+ return internal::traits<BlockType>::HasSameStorageOrderAsXprType
+ ? m_xpr.innerStride()
+ : m_xpr.outerStride();
+ }
+ EIGEN_DEVICE_FUNC
+ inline Index outerStride() const
+ {
+ return m_outerStride;
+ }
+ EIGEN_DEVICE_FUNC
+ StorageIndex startRow() const
+ {
+ return m_startRow.value();
+ }
+ EIGEN_DEVICE_FUNC
+ StorageIndex startCol() const
+ {
+ return m_startCol.value();
+ }
+ #ifndef __SUNPRO_CC
+ protected:
+ #endif
+ #ifndef EIGEN_PARSED_BY_DOXYGEN
+ EIGEN_DEVICE_FUNC
+ inline BlockImpl_dense(XprType& xpr, const Scalar* data, Index blockRows, Index blockCols)
+ : Base(data, blockRows, blockCols), m_xpr(xpr)
+ {
+ init();
+ }
+ #endif
+ protected:
+ EIGEN_DEVICE_FUNC
+ void init()
+ {
+ m_outerStride = internal::traits<BlockType>::HasSameStorageOrderAsXprType
+ ? m_xpr.outerStride()
+ : m_xpr.innerStride();
+ }
+ XprTypeNested m_xpr;
+ const internal::variable_if_dynamic<StorageIndex, (XprType::RowsAtCompileTime == 1 && BlockRows==1) ? 0 : Dynamic> m_startRow;
+ const internal::variable_if_dynamic<StorageIndex, (XprType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol;
+ Index m_outerStride;
+};
+}
+}
+#endif
+// end #include "src/Core/Block.h"
+// #include "src/Core/Transpose.h"
+#ifndef EIGEN_TRANSPOSE_H
+#define EIGEN_TRANSPOSE_H
+namespace Eigen {
+namespace internal {
+template<typename MatrixType>
+struct traits<Transpose<MatrixType> > : public traits<MatrixType>
+{
+ typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
+ typedef typename remove_reference<MatrixTypeNested>::type MatrixTypeNestedPlain;
+ enum {
+ RowsAtCompileTime = MatrixType::ColsAtCompileTime,
+ ColsAtCompileTime = MatrixType::RowsAtCompileTime,
+ MaxRowsAtCompileTime = MatrixType::MaxColsAtCompileTime,
+ MaxColsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
+ FlagsLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
+ Flags0 = traits<MatrixTypeNestedPlain>::Flags & ~(LvalueBit | NestByRefBit),
+ Flags1 = Flags0 | FlagsLvalueBit,
+ Flags = Flags1 ^ RowMajorBit,
+ InnerStrideAtCompileTime = inner_stride_at_compile_time<MatrixType>::ret,
+ OuterStrideAtCompileTime = outer_stride_at_compile_time<MatrixType>::ret
+ };
+};
+}
+template<typename MatrixType, typename StorageKind> class TransposeImpl;
+template<typename MatrixType> class Transpose
+ : public TransposeImpl<MatrixType,typename internal::traits<MatrixType>::StorageKind>
+{
+ public:
+ typedef typename internal::ref_selector<MatrixType>::non_const_type MatrixTypeNested;
+ typedef typename TransposeImpl<MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base;
+ EIGEN_GENERIC_PUBLIC_INTERFACE(Transpose)
+ typedef typename internal::remove_all<MatrixType>::type NestedExpression;
+ EIGEN_DEVICE_FUNC
+ explicit inline Transpose(MatrixType& matrix) : m_matrix(matrix) {}
+ EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Transpose)
+ EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.cols(); }
+ EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.rows(); }
+ EIGEN_DEVICE_FUNC
+ const typename internal::remove_all<MatrixTypeNested>::type&
+ nestedExpression() const { return m_matrix; }
+ EIGEN_DEVICE_FUNC
+ typename internal::remove_reference<MatrixTypeNested>::type&
+ nestedExpression() { return m_matrix; }
+ void resize(Index nrows, Index ncols) {
+ m_matrix.resize(ncols,nrows);
+ }
+ protected:
+ typename internal::ref_selector<MatrixType>::non_const_type m_matrix;
+};
+namespace internal {
+template<typename MatrixType, bool HasDirectAccess = has_direct_access<MatrixType>::ret>
+struct TransposeImpl_base
+{
+ typedef typename dense_xpr_base<Transpose<MatrixType> >::type type;
+};
+template<typename MatrixType>
+struct TransposeImpl_base<MatrixType, false>
+{
+ typedef typename dense_xpr_base<Transpose<MatrixType> >::type type;
+};
+}
+template<typename XprType, typename StorageKind>
+class TransposeImpl
+ : public internal::generic_xpr_base<Transpose<XprType> >::type
+{
+public:
+ typedef typename internal::generic_xpr_base<Transpose<XprType> >::type Base;
+};
+template<typename MatrixType> class TransposeImpl<MatrixType,Dense>
+ : public internal::TransposeImpl_base<MatrixType>::type
+{
+ public:
+ typedef typename internal::TransposeImpl_base<MatrixType>::type Base;
+ using Base::coeffRef;
+ EIGEN_DENSE_PUBLIC_INTERFACE(Transpose<MatrixType>)
+ EIGEN_INHERIT_ASSIGNMENT_OPERATORS(TransposeImpl)
+ EIGEN_DEVICE_FUNC inline Index innerStride() const { return derived().nestedExpression().innerStride(); }
+ EIGEN_DEVICE_FUNC inline Index outerStride() const { return derived().nestedExpression().outerStride(); }
+ typedef typename internal::conditional<
+ internal::is_lvalue<MatrixType>::value,
+ Scalar,
+ const Scalar
+ >::type ScalarWithConstIfNotLvalue;
+ EIGEN_DEVICE_FUNC inline ScalarWithConstIfNotLvalue* data() { return derived().nestedExpression().data(); }
+ EIGEN_DEVICE_FUNC inline const Scalar* data() const { return derived().nestedExpression().data(); }
+ EIGEN_DEVICE_FUNC
+ inline const Scalar& coeffRef(Index rowId, Index colId) const
+ {
+ return derived().nestedExpression().coeffRef(colId, rowId);
+ }
+ EIGEN_DEVICE_FUNC
+ inline const Scalar& coeffRef(Index index) const
+ {
+ return derived().nestedExpression().coeffRef(index);
+ }
+};
+template<typename Derived>
+inline Transpose<Derived>
+DenseBase<Derived>::transpose()
+{
+ return TransposeReturnType(derived());
+}
+template<typename Derived>
+inline typename DenseBase<Derived>::ConstTransposeReturnType
+DenseBase<Derived>::transpose() const
+{
+ return ConstTransposeReturnType(derived());
+}
+template<typename Derived>
+inline const typename MatrixBase<Derived>::AdjointReturnType
+MatrixBase<Derived>::adjoint() const
+{
+ return AdjointReturnType(this->transpose());
+}
+namespace internal {
+template<typename MatrixType,
+ bool IsSquare = (MatrixType::RowsAtCompileTime == MatrixType::ColsAtCompileTime) && MatrixType::RowsAtCompileTime!=Dynamic,
+ bool MatchPacketSize =
+ (int(MatrixType::RowsAtCompileTime) == int(internal::packet_traits<typename MatrixType::Scalar>::size))
+ && (internal::evaluator<MatrixType>::Flags&PacketAccessBit) >
+struct inplace_transpose_selector;
+template<typename MatrixType>
+struct inplace_transpose_selector<MatrixType,true,false> {
+ static void run(MatrixType& m) {
+ m.matrix().template triangularView<StrictlyUpper>().swap(m.matrix().transpose());
+ }
+};
+template<typename MatrixType>
+struct inplace_transpose_selector<MatrixType,true,true> {
+ static void run(MatrixType& m) {
+ typedef typename MatrixType::Scalar Scalar;
+ typedef typename internal::packet_traits<typename MatrixType::Scalar>::type Packet;
+ const Index PacketSize = internal::packet_traits<Scalar>::size;
+ const Index Alignment = internal::evaluator<MatrixType>::Alignment;
+ PacketBlock<Packet> A;
+ for (Index i=0; i<PacketSize; ++i)
+ A.packet[i] = m.template packetByOuterInner<Alignment>(i,0);
+ internal::ptranspose(A);
+ for (Index i=0; i<PacketSize; ++i)
+ m.template writePacket<Alignment>(m.rowIndexByOuterInner(i,0), m.colIndexByOuterInner(i,0), A.packet[i]);
+ }
+};
+template<typename MatrixType,bool MatchPacketSize>
+struct inplace_transpose_selector<MatrixType,false,MatchPacketSize> {
+ static void run(MatrixType& m) {
+ if (m.rows()==m.cols())
+ m.matrix().template triangularView<StrictlyUpper>().swap(m.matrix().transpose());
+ else
+ m = m.transpose().eval();
+ }
+};
+}
+template<typename Derived>
+inline void DenseBase<Derived>::transposeInPlace()
+{
+ eigen_assert((rows() == cols() || (RowsAtCompileTime == Dynamic && ColsAtCompileTime == Dynamic))
+ && "transposeInPlace() called on a non-square non-resizable matrix");
+ internal::inplace_transpose_selector<Derived>::run(derived());
+}
+template<typename Derived>
+inline void MatrixBase<Derived>::adjointInPlace()
+{
+ derived() = adjoint().eval();
+}
+#ifndef EIGEN_NO_DEBUG
+namespace internal {
+template<bool DestIsTransposed, typename OtherDerived>
+struct check_transpose_aliasing_compile_time_selector
+{
+ enum { ret = bool(blas_traits<OtherDerived>::IsTransposed) != DestIsTransposed };
+};
+template<bool DestIsTransposed, typename BinOp, typename DerivedA, typename DerivedB>
+struct check_transpose_aliasing_compile_time_selector<DestIsTransposed,CwiseBinaryOp<BinOp,DerivedA,DerivedB> >
+{
+ enum { ret = bool(blas_traits<DerivedA>::IsTransposed) != DestIsTransposed
+ || bool(blas_traits<DerivedB>::IsTransposed) != DestIsTransposed
+ };
+};
+template<typename Scalar, bool DestIsTransposed, typename OtherDerived>
+struct check_transpose_aliasing_run_time_selector
+{
+ static bool run(const Scalar* dest, const OtherDerived& src)
+ {
+ return (bool(blas_traits<OtherDerived>::IsTransposed) != DestIsTransposed) && (dest!=0 && dest==(const Scalar*)extract_data(src));
+ }
+};
+template<typename Scalar, bool DestIsTransposed, typename BinOp, typename DerivedA, typename DerivedB>
+struct check_transpose_aliasing_run_time_selector<Scalar,DestIsTransposed,CwiseBinaryOp<BinOp,DerivedA,DerivedB> >
+{
+ static bool run(const Scalar* dest, const CwiseBinaryOp<BinOp,DerivedA,DerivedB>& src)
+ {
+ return ((blas_traits<DerivedA>::IsTransposed != DestIsTransposed) && (dest!=0 && dest==(const Scalar*)extract_data(src.lhs())))
+ || ((blas_traits<DerivedB>::IsTransposed != DestIsTransposed) && (dest!=0 && dest==(const Scalar*)extract_data(src.rhs())));
+ }
+};
+template<typename Derived, typename OtherDerived,
+ bool MightHaveTransposeAliasing
+ = check_transpose_aliasing_compile_time_selector
+ <blas_traits<Derived>::IsTransposed,OtherDerived>::ret
+ >
+struct checkTransposeAliasing_impl
+{
+ static void run(const Derived& dst, const OtherDerived& other)
+ {
+ eigen_assert((!check_transpose_aliasing_run_time_selector
+ <typename Derived::Scalar,blas_traits<Derived>::IsTransposed,OtherDerived>
+ ::run(extract_data(dst), other))
+ && "aliasing detected during transposition, use transposeInPlace() "
+ "or evaluate the rhs into a temporary using .eval()");
+ }
+};
+template<typename Derived, typename OtherDerived>
+struct checkTransposeAliasing_impl<Derived, OtherDerived, false>
+{
+ static void run(const Derived&, const OtherDerived&)
+ {
+ }
+};
+template<typename Dst, typename Src>
+void check_for_aliasing(const Dst &dst, const Src &src)
+{
+ internal::checkTransposeAliasing_impl<Dst, Src>::run(dst, src);
+}
+}
+#endif
+}
+#endif
+// end #include "src/Core/Transpose.h"
+// #include "src/Core/Redux.h"
+#ifndef EIGEN_REDUX_H
+#define EIGEN_REDUX_H
+namespace Eigen {
+namespace internal {
+template<typename Func, typename Derived>
+struct redux_traits
+{
+public:
+ typedef typename find_best_packet<typename Derived::Scalar,Derived::SizeAtCompileTime>::type PacketType;
+ enum {
+ PacketSize = unpacket_traits<PacketType>::size,
+ InnerMaxSize = int(Derived::IsRowMajor)
+ ? Derived::MaxColsAtCompileTime
+ : Derived::MaxRowsAtCompileTime
+ };
+ enum {
+ MightVectorize = (int(Derived::Flags)&ActualPacketAccessBit)
+ && (functor_traits<Func>::PacketAccess),
+ MayLinearVectorize = bool(MightVectorize) && (int(Derived::Flags)&LinearAccessBit),
+ MaySliceVectorize = bool(MightVectorize) && int(InnerMaxSize)>=3*PacketSize
+ };
+public:
+ enum {
+ Traversal = int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
+ : int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
+ : int(DefaultTraversal)
+ };
+public:
+ enum {
+ Cost = Derived::SizeAtCompileTime == Dynamic ? HugeCost
+ : Derived::SizeAtCompileTime * Derived::CoeffReadCost + (Derived::SizeAtCompileTime-1) * functor_traits<Func>::Cost,
+ UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize))
+ };
+public:
+ enum {
+ Unrolling = Cost <= UnrollingLimit ? CompleteUnrolling : NoUnrolling
+ };
+#ifdef EIGEN_DEBUG_ASSIGN
+ static void debug()
+ {
+ std::cerr << "Xpr: " << typeid(typename Derived::XprType).name() << std::endl;
+ std::cerr.setf(std::ios::hex, std::ios::basefield);
+ EIGEN_DEBUG_VAR(Derived::Flags)
+ std::cerr.unsetf(std::ios::hex);
+ EIGEN_DEBUG_VAR(InnerMaxSize)
+ EIGEN_DEBUG_VAR(PacketSize)
+ EIGEN_DEBUG_VAR(MightVectorize)
+ EIGEN_DEBUG_VAR(MayLinearVectorize)
+ EIGEN_DEBUG_VAR(MaySliceVectorize)
+ EIGEN_DEBUG_VAR(Traversal)
+ EIGEN_DEBUG_VAR(UnrollingLimit)
+ EIGEN_DEBUG_VAR(Unrolling)
+ std::cerr << std::endl;
+ }
+#endif
+};
+template<typename Func, typename Derived, int Start, int Length>
+struct redux_novec_unroller
+{
+ enum {
+ HalfLength = Length/2
+ };
+ typedef typename Derived::Scalar Scalar;
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
+ {
+ return func(redux_novec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
+ redux_novec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func));
+ }
+};
+template<typename Func, typename Derived, int Start>
+struct redux_novec_unroller<Func, Derived, Start, 1>
+{
+ enum {
+ outer = Start / Derived::InnerSizeAtCompileTime,
+ inner = Start % Derived::InnerSizeAtCompileTime
+ };
+ typedef typename Derived::Scalar Scalar;
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func&)
+ {
+ return mat.coeffByOuterInner(outer, inner);
+ }
+};
+template<typename Func, typename Derived, int Start>
+struct redux_novec_unroller<Func, Derived, Start, 0>
+{
+ typedef typename Derived::Scalar Scalar;
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar run(const Derived&, const Func&) { return Scalar(); }
+};
+template<typename Func, typename Derived, int Start, int Length>
+struct redux_vec_unroller
+{
+ enum {
+ PacketSize = redux_traits<Func, Derived>::PacketSize,
+ HalfLength = Length/2
+ };
+ typedef typename Derived::Scalar Scalar;
+ typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
+ static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func& func)
+ {
+ return func.packetOp(
+ redux_vec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
+ redux_vec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func) );
+ }
+};
+template<typename Func, typename Derived, int Start>
+struct redux_vec_unroller<Func, Derived, Start, 1>
+{
+ enum {
+ index = Start * redux_traits<Func, Derived>::PacketSize,
+ outer = index / int(Derived::InnerSizeAtCompileTime),
+ inner = index % int(Derived::InnerSizeAtCompileTime),
+ alignment = Derived::Alignment
+ };
+ typedef typename Derived::Scalar Scalar;
+ typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
+ static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&)
+ {
+ return mat.template packetByOuterInner<alignment,PacketScalar>(outer, inner);
+ }
+};
+template<typename Func, typename Derived,
+ int Traversal = redux_traits<Func, Derived>::Traversal,
+ int Unrolling = redux_traits<Func, Derived>::Unrolling
+>
+struct redux_impl;
+template<typename Func, typename Derived>
+struct redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>
+{
+ typedef typename Derived::Scalar Scalar;
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
+ {
+ eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
+ Scalar res;
+ res = mat.coeffByOuterInner(0, 0);
+ for(Index i = 1; i < mat.innerSize(); ++i)
+ res = func(res, mat.coeffByOuterInner(0, i));
+ for(Index i = 1; i < mat.outerSize(); ++i)
+ for(Index j = 0; j < mat.innerSize(); ++j)
+ res = func(res, mat.coeffByOuterInner(i, j));
+ return res;
+ }
+};
+template<typename Func, typename Derived>
+struct redux_impl<Func,Derived, DefaultTraversal, CompleteUnrolling>
+ : public redux_novec_unroller<Func,Derived, 0, Derived::SizeAtCompileTime>
+{};
+template<typename Func, typename Derived>
+struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
+{
+ typedef typename Derived::Scalar Scalar;
+ typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
+ static Scalar run(const Derived &mat, const Func& func)
+ {
+ const Index size = mat.size();
+ const Index packetSize = redux_traits<Func, Derived>::PacketSize;
+ const int packetAlignment = unpacket_traits<PacketScalar>::alignment;
+ enum {
+ alignment0 = (bool(Derived::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned),
+ alignment = EIGEN_PLAIN_ENUM_MAX(alignment0, Derived::Alignment)
+ };
+ const Index alignedStart = internal::first_default_aligned(mat.nestedExpression());
+ const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
+ const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
+ const Index alignedEnd2 = alignedStart + alignedSize2;
+ const Index alignedEnd = alignedStart + alignedSize;
+ Scalar res;
+ if(alignedSize)
+ {
+ PacketScalar packet_res0 = mat.template packet<alignment,PacketScalar>(alignedStart);
+ if(alignedSize>packetSize)
+ {
+ PacketScalar packet_res1 = mat.template packet<alignment,PacketScalar>(alignedStart+packetSize);
+ for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize)
+ {
+ packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment,PacketScalar>(index));
+ packet_res1 = func.packetOp(packet_res1, mat.template packet<alignment,PacketScalar>(index+packetSize));
+ }
+ packet_res0 = func.packetOp(packet_res0,packet_res1);
+ if(alignedEnd>alignedEnd2)
+ packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment,PacketScalar>(alignedEnd2));
+ }
+ res = func.predux(packet_res0);
+ for(Index index = 0; index < alignedStart; ++index)
+ res = func(res,mat.coeff(index));
+ for(Index index = alignedEnd; index < size; ++index)
+ res = func(res,mat.coeff(index));
+ }
+ else
+ {
+ res = mat.coeff(0);
+ for(Index index = 1; index < size; ++index)
+ res = func(res,mat.coeff(index));
+ }
+ return res;
+ }
+};
+template<typename Func, typename Derived, int Unrolling>
+struct redux_impl<Func, Derived, SliceVectorizedTraversal, Unrolling>
+{
+ typedef typename Derived::Scalar Scalar;
+ typedef typename redux_traits<Func, Derived>::PacketType PacketType;
+ EIGEN_DEVICE_FUNC static Scalar run(const Derived &mat, const Func& func)
+ {
+ eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
+ const Index innerSize = mat.innerSize();
+ const Index outerSize = mat.outerSize();
+ enum {
+ packetSize = redux_traits<Func, Derived>::PacketSize
+ };
+ const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize;
+ Scalar res;
+ if(packetedInnerSize)
+ {
+ PacketType packet_res = mat.template packet<Unaligned,PacketType>(0,0);
+ for(Index j=0; j<outerSize; ++j)
+ for(Index i=(j==0?packetSize:0); i<packetedInnerSize; i+=Index(packetSize))
+ packet_res = func.packetOp(packet_res, mat.template packetByOuterInner<Unaligned,PacketType>(j,i));
+ res = func.predux(packet_res);
+ for(Index j=0; j<outerSize; ++j)
+ for(Index i=packetedInnerSize; i<innerSize; ++i)
+ res = func(res, mat.coeffByOuterInner(j,i));
+ }
+ else
+ {
+ res = redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>::run(mat, func);
+ }
+ return res;
+ }
+};
+template<typename Func, typename Derived>
+struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling>
+{
+ typedef typename Derived::Scalar Scalar;
+ typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
+ enum {
+ PacketSize = redux_traits<Func, Derived>::PacketSize,
+ Size = Derived::SizeAtCompileTime,
+ VectorizedSize = (Size / PacketSize) * PacketSize
+ };
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
+ {
+ eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
+ if (VectorizedSize > 0) {
+ Scalar res = func.predux(redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func));
+ if (VectorizedSize != Size)
+ res = func(res,redux_novec_unroller<Func, Derived, VectorizedSize, Size-VectorizedSize>::run(mat,func));
+ return res;
+ }
+ else {
+ return redux_novec_unroller<Func, Derived, 0, Size>::run(mat,func);
+ }
+ }
+};
+template<typename _XprType>
+class redux_evaluator
+{
+public:
+ typedef _XprType XprType;
+ EIGEN_DEVICE_FUNC explicit redux_evaluator(const XprType &xpr) : m_evaluator(xpr), m_xpr(xpr) {}
+ typedef typename XprType::Scalar Scalar;
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
+ typedef typename XprType::PacketScalar PacketScalar;
+ typedef typename XprType::PacketReturnType PacketReturnType;
+ enum {
+ MaxRowsAtCompileTime = XprType::MaxRowsAtCompileTime,
+ MaxColsAtCompileTime = XprType::MaxColsAtCompileTime,
+ Flags = evaluator<XprType>::Flags & ~DirectAccessBit,
+ IsRowMajor = XprType::IsRowMajor,
+ SizeAtCompileTime = XprType::SizeAtCompileTime,
+ InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime,
+ CoeffReadCost = evaluator<XprType>::CoeffReadCost,
+ Alignment = evaluator<XprType>::Alignment
+ };
+ EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); }
+ EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); }
+ EIGEN_DEVICE_FUNC Index size() const { return m_xpr.size(); }
+ EIGEN_DEVICE_FUNC Index innerSize() const { return m_xpr.innerSize(); }
+ EIGEN_DEVICE_FUNC Index outerSize() const { return m_xpr.outerSize(); }
+ EIGEN_DEVICE_FUNC
+ CoeffReturnType coeff(Index row, Index col) const
+ { return m_evaluator.coeff(row, col); }
+ EIGEN_DEVICE_FUNC
+ CoeffReturnType coeff(Index index) const
+ { return m_evaluator.coeff(index); }
+ template<int LoadMode, typename PacketType>
+ PacketType packet(Index row, Index col) const
+ { return m_evaluator.template packet<LoadMode,PacketType>(row, col); }
+ template<int LoadMode, typename PacketType>
+ PacketType packet(Index index) const
+ { return m_evaluator.template packet<LoadMode,PacketType>(index); }
+ EIGEN_DEVICE_FUNC
+ CoeffReturnType coeffByOuterInner(Index outer, Index inner) const
+ { return m_evaluator.coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
+ template<int LoadMode, typename PacketType>
+ PacketType packetByOuterInner(Index outer, Index inner) const
+ { return m_evaluator.template packet<LoadMode,PacketType>(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
+ const XprType & nestedExpression() const { return m_xpr; }
+protected:
+ internal::evaluator<XprType> m_evaluator;
+ const XprType &m_xpr;
+};
+}
+template<typename Derived>
+template<typename Func>
+typename internal::traits<Derived>::Scalar
+DenseBase<Derived>::redux(const Func& func) const
+{
+ eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
+ typedef typename internal::redux_evaluator<Derived> ThisEvaluator;
+ ThisEvaluator thisEval(derived());
+ return internal::redux_impl<Func, ThisEvaluator>::run(thisEval, func);
+}
+template<typename Derived>
+EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
+DenseBase<Derived>::minCoeff() const
+{
+ return derived().redux(Eigen::internal::scalar_min_op<Scalar,Scalar>());
+}
+template<typename Derived>
+EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
+DenseBase<Derived>::maxCoeff() const
+{
+ return derived().redux(Eigen::internal::scalar_max_op<Scalar,Scalar>());
+}
+template<typename Derived>
+EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
+DenseBase<Derived>::sum() const
+{
+ if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
+ return Scalar(0);
+ return derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>());
+}
+template<typename Derived>
+EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
+DenseBase<Derived>::mean() const
+{
+#ifdef __INTEL_COMPILER
+ #pragma warning push
+ #pragma warning ( disable : 2259 )
+#endif
+ return Scalar(derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>())) / Scalar(this->size());
+#ifdef __INTEL_COMPILER
+ #pragma warning pop
+#endif
+}
+template<typename Derived>
+EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
+DenseBase<Derived>::prod() const
+{
+ if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
+ return Scalar(1);
+ return derived().redux(Eigen::internal::scalar_product_op<Scalar>());
+}
+template<typename Derived>
+EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
+MatrixBase<Derived>::trace() const
+{
+ return derived().diagonal().sum();
+}
+}
+#endif
+// end #include "src/Core/Redux.h"
+// #include "src/Core/GeneralProduct.h"
+#ifndef EIGEN_GENERAL_PRODUCT_H
+#define EIGEN_GENERAL_PRODUCT_H
+namespace Eigen {
+enum {
+ Large = 2,
+ Small = 3
+};
+namespace internal {
+template<int Rows, int Cols, int Depth> struct product_type_selector;
+template<int Size, int MaxSize> struct product_size_category
+{
+ enum { is_large = MaxSize == Dynamic ||
+ Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ||
+ (Size==Dynamic && MaxSize>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD),
+ value = is_large ? Large
+ : Size == 1 ? 1
+ : Small
+ };
+};
+template<typename Lhs, typename Rhs> struct product_type
+{
+ typedef typename remove_all<Lhs>::type _Lhs;
+ typedef typename remove_all<Rhs>::type _Rhs;
+ enum {
+ MaxRows = traits<_Lhs>::MaxRowsAtCompileTime,
+ Rows = traits<_Lhs>::RowsAtCompileTime,
+ MaxCols = traits<_Rhs>::MaxColsAtCompileTime,
+ Cols = traits<_Rhs>::ColsAtCompileTime,
+ MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(traits<_Lhs>::MaxColsAtCompileTime,
+ traits<_Rhs>::MaxRowsAtCompileTime),
+ Depth = EIGEN_SIZE_MIN_PREFER_FIXED(traits<_Lhs>::ColsAtCompileTime,
+ traits<_Rhs>::RowsAtCompileTime)
+ };
+private:
+ enum {
+ rows_select = product_size_category<Rows,MaxRows>::value,
+ cols_select = product_size_category<Cols,MaxCols>::value,
+ depth_select = product_size_category<Depth,MaxDepth>::value
+ };
+ typedef product_type_selector<rows_select, cols_select, depth_select> selector;
+public:
+ enum {
+ value = selector::ret,
+ ret = selector::ret
+ };
+#ifdef EIGEN_DEBUG_PRODUCT
+ static void debug()
+ {
+ EIGEN_DEBUG_VAR(Rows);
+ EIGEN_DEBUG_VAR(Cols);
+ EIGEN_DEBUG_VAR(Depth);
+ EIGEN_DEBUG_VAR(rows_select);
+ EIGEN_DEBUG_VAR(cols_select);
+ EIGEN_DEBUG_VAR(depth_select);
+ EIGEN_DEBUG_VAR(value);
+ }
+#endif
+};
+template<int M, int N> struct product_type_selector<M,N,1> { enum { ret = OuterProduct }; };
+template<int M> struct product_type_selector<M, 1, 1> { enum { ret = LazyCoeffBasedProductMode }; };
+template<int N> struct product_type_selector<1, N, 1> { enum { ret = LazyCoeffBasedProductMode }; };
+template<int Depth> struct product_type_selector<1, 1, Depth> { enum { ret = InnerProduct }; };
+template<> struct product_type_selector<1, 1, 1> { enum { ret = InnerProduct }; };
+template<> struct product_type_selector<Small,1, Small> { enum { ret = CoeffBasedProductMode }; };
+template<> struct product_type_selector<1, Small,Small> { enum { ret = CoeffBasedProductMode }; };
+template<> struct product_type_selector<Small,Small,Small> { enum { ret = CoeffBasedProductMode }; };
+template<> struct product_type_selector<Small, Small, 1> { enum { ret = LazyCoeffBasedProductMode }; };
+template<> struct product_type_selector<Small, Large, 1> { enum { ret = LazyCoeffBasedProductMode }; };
+template<> struct product_type_selector<Large, Small, 1> { enum { ret = LazyCoeffBasedProductMode }; };
+template<> struct product_type_selector<1, Large,Small> { enum { ret = CoeffBasedProductMode }; };
+template<> struct product_type_selector<1, Large,Large> { enum { ret = GemvProduct }; };
+template<> struct product_type_selector<1, Small,Large> { enum { ret = CoeffBasedProductMode }; };
+template<> struct product_type_selector<Large,1, Small> { enum { ret = CoeffBasedProductMode }; };
+template<> struct product_type_selector<Large,1, Large> { enum { ret = GemvProduct }; };
+template<> struct product_type_selector<Small,1, Large> { enum { ret = CoeffBasedProductMode }; };
+template<> struct product_type_selector<Small,Small,Large> { enum { ret = GemmProduct }; };
+template<> struct product_type_selector<Large,Small,Large> { enum { ret = GemmProduct }; };
+template<> struct product_type_selector<Small,Large,Large> { enum { ret = GemmProduct }; };
+template<> struct product_type_selector<Large,Large,Large> { enum { ret = GemmProduct }; };
+template<> struct product_type_selector<Large,Small,Small> { enum { ret = CoeffBasedProductMode }; };
+template<> struct product_type_selector<Small,Large,Small> { enum { ret = CoeffBasedProductMode }; };
+template<> struct product_type_selector<Large,Large,Small> { enum { ret = GemmProduct }; };
+}
+namespace internal {
+template<int Side, int StorageOrder, bool BlasCompatible>
+struct gemv_dense_selector;
+}
+namespace internal {
+template<typename Scalar,int Size,int MaxSize,bool Cond> struct gemv_static_vector_if;
+template<typename Scalar,int Size,int MaxSize>
+struct gemv_static_vector_if<Scalar,Size,MaxSize,false>
+{
+ EIGEN_STRONG_INLINE Scalar* data() { eigen_internal_assert(false && "should never be called"); return 0; }
+};
+template<typename Scalar,int Size>
+struct gemv_static_vector_if<Scalar,Size,Dynamic,true>
+{
+ EIGEN_STRONG_INLINE Scalar* data() { return 0; }
+};
+template<typename Scalar,int Size,int MaxSize>
+struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
+{
+ enum {
+ ForceAlignment = internal::packet_traits<Scalar>::Vectorizable,
+ PacketSize = internal::packet_traits<Scalar>::size
+ };
+ #if EIGEN_MAX_STATIC_ALIGN_BYTES!=0
+ internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0,EIGEN_PLAIN_ENUM_MIN(AlignedMax,PacketSize)> m_data;
+ EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; }
+ #else
+ internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?EIGEN_MAX_ALIGN_BYTES:0),0> m_data;
+ EIGEN_STRONG_INLINE Scalar* data() {
+ return ForceAlignment
+ ? reinterpret_cast<Scalar*>((internal::UIntPtr(m_data.array) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES)
+ : m_data.array;
+ }
+ #endif
+};
+template<int StorageOrder, bool BlasCompatible>
+struct gemv_dense_selector<OnTheLeft,StorageOrder,BlasCompatible>
+{
+ template<typename Lhs, typename Rhs, typename Dest>
+ static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
+ {
+ Transpose<Dest> destT(dest);
+ enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor };
+ gemv_dense_selector<OnTheRight,OtherStorageOrder,BlasCompatible>
+ ::run(rhs.transpose(), lhs.transpose(), destT, alpha);
+ }
+};
+template<> struct gemv_dense_selector<OnTheRight,ColMajor,true>
+{
+ template<typename Lhs, typename Rhs, typename Dest>
+ static inline void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
+ {
+ typedef typename Lhs::Scalar LhsScalar;
+ typedef typename Rhs::Scalar RhsScalar;
+ typedef typename Dest::Scalar ResScalar;
+ typedef typename Dest::RealScalar RealScalar;
+ typedef internal::blas_traits<Lhs> LhsBlasTraits;
+ typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
+ typedef internal::blas_traits<Rhs> RhsBlasTraits;
+ typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
+ typedef Map<Matrix<ResScalar,Dynamic,1>, EIGEN_PLAIN_ENUM_MIN(AlignedMax,internal::packet_traits<ResScalar>::size)> MappedDest;
+ ActualLhsType actualLhs = LhsBlasTraits::extract(lhs);
+ ActualRhsType actualRhs = RhsBlasTraits::extract(rhs);
+ ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs)
+ * RhsBlasTraits::extractScalarFactor(rhs);
+ typedef typename conditional<Dest::IsVectorAtCompileTime, Dest, typename Dest::ColXpr>::type ActualDest;
+ enum {
+ EvalToDestAtCompileTime = (ActualDest::InnerStrideAtCompileTime==1),
+ ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex),
+ MightCannotUseDest = (!EvalToDestAtCompileTime) || ComplexByReal
+ };
+ typedef const_blas_data_mapper<LhsScalar,Index,ColMajor> LhsMapper;
+ typedef const_blas_data_mapper<RhsScalar,Index,RowMajor> RhsMapper;
+ RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);
+ if(!MightCannotUseDest)
+ {
+ general_matrix_vector_product
+ <Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
+ actualLhs.rows(), actualLhs.cols(),
+ LhsMapper(actualLhs.data(), actualLhs.outerStride()),
+ RhsMapper(actualRhs.data(), actualRhs.innerStride()),
+ dest.data(), 1,
+ compatibleAlpha);
+ }
+ else
+ {
+ gemv_static_vector_if<ResScalar,ActualDest::SizeAtCompileTime,ActualDest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
+ const bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0));
+ const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
+ ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
+ evalToDest ? dest.data() : static_dest.data());
+ if(!evalToDest)
+ {
+ #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+ Index size = dest.size();
+ EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+ #endif
+ if(!alphaIsCompatible)
+ {
+ MappedDest(actualDestPtr, dest.size()).setZero();
+ compatibleAlpha = RhsScalar(1);
+ }
+ else
+ MappedDest(actualDestPtr, dest.size()) = dest;
+ }
+ general_matrix_vector_product
+ <Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
+ actualLhs.rows(), actualLhs.cols(),
+ LhsMapper(actualLhs.data(), actualLhs.outerStride()),
+ RhsMapper(actualRhs.data(), actualRhs.innerStride()),
+ actualDestPtr, 1,
+ compatibleAlpha);
+ if (!evalToDest)
+ {
+ if(!alphaIsCompatible)
+ dest.matrix() += actualAlpha * MappedDest(actualDestPtr, dest.size());
+ else
+ dest = MappedDest(actualDestPtr, dest.size());
+ }
+ }
+ }
+};
+template<> struct gemv_dense_selector<OnTheRight,RowMajor,true>
+{
+ template<typename Lhs, typename Rhs, typename Dest>
+ static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
+ {
+ typedef typename Lhs::Scalar LhsScalar;
+ typedef typename Rhs::Scalar RhsScalar;
+ typedef typename Dest::Scalar ResScalar;
+ typedef internal::blas_traits<Lhs> LhsBlasTraits;
+ typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
+ typedef internal::blas_traits<Rhs> RhsBlasTraits;
+ typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
+ typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;
+ typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);
+ typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);
+ ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs)
+ * RhsBlasTraits::extractScalarFactor(rhs);
+ enum {
+ DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1
+ };
+ gemv_static_vector_if<RhsScalar,ActualRhsTypeCleaned::SizeAtCompileTime,ActualRhsTypeCleaned::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs;
+ ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(),
+ DirectlyUseRhs ? const_cast<RhsScalar*>(actualRhs.data()) : static_rhs.data());
+ if(!DirectlyUseRhs)
+ {
+ #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+ Index size = actualRhs.size();
+ EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+ #endif
+ Map<typename ActualRhsTypeCleaned::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
+ }
+ typedef const_blas_data_mapper<LhsScalar,Index,RowMajor> LhsMapper;
+ typedef const_blas_data_mapper<RhsScalar,Index,ColMajor> RhsMapper;
+ general_matrix_vector_product
+ <Index,LhsScalar,LhsMapper,RowMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
+ actualLhs.rows(), actualLhs.cols(),
+ LhsMapper(actualLhs.data(), actualLhs.outerStride()),
+ RhsMapper(actualRhsPtr, 1),
+ dest.data(), dest.col(0).innerStride(),
+ actualAlpha);
+ }
+};
+template<> struct gemv_dense_selector<OnTheRight,ColMajor,false>
+{
+ template<typename Lhs, typename Rhs, typename Dest>
+ static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
+ {
+ EIGEN_STATIC_ASSERT((!nested_eval<Lhs,1>::Evaluate),EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE);
+ typename nested_eval<Rhs,1>::type actual_rhs(rhs);
+ const Index size = rhs.rows();
+ for(Index k=0; k<size; ++k)
+ dest += (alpha*actual_rhs.coeff(k)) * lhs.col(k);
+ }
+};
+template<> struct gemv_dense_selector<OnTheRight,RowMajor,false>
+{
+ template<typename Lhs, typename Rhs, typename Dest>
+ static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
+ {
+ EIGEN_STATIC_ASSERT((!nested_eval<Lhs,1>::Evaluate),EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE);
+ typename nested_eval<Rhs,Lhs::RowsAtCompileTime>::type actual_rhs(rhs);
+ const Index rows = dest.rows();
+ for(Index i=0; i<rows; ++i)
+ dest.coeffRef(i) += alpha * (lhs.row(i).cwiseProduct(actual_rhs.transpose())).sum();
+ }
+};
+}
+#ifndef __CUDACC__
+template<typename Derived>
+template<typename OtherDerived>
+inline const Product<Derived, OtherDerived>
+MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
+{
+ enum {
+ ProductIsValid = Derived::ColsAtCompileTime==Dynamic
+ || OtherDerived::RowsAtCompileTime==Dynamic
+ || int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime),
+ AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
+ SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived)
+ };
+ EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),
+ INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
+ EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
+ INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
+ EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
+#ifdef EIGEN_DEBUG_PRODUCT
+ internal::product_type<Derived,OtherDerived>::debug();
+#endif
+ return Product<Derived, OtherDerived>(derived(), other.derived());
+}
+#endif
+template<typename Derived>
+template<typename OtherDerived>
+const Product<Derived,OtherDerived,LazyProduct>
+MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const
+{
+ enum {
+ ProductIsValid = Derived::ColsAtCompileTime==Dynamic
+ || OtherDerived::RowsAtCompileTime==Dynamic
+ || int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime),
+ AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
+ SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived)
+ };
+ EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),
+ INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
+ EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
+ INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
+ EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
+ return Product<Derived,OtherDerived,LazyProduct>(derived(), other.derived());
+}
+}
+#endif
+// end #include "src/Core/GeneralProduct.h"
+// #include "src/Core/products/GeneralBlockPanelKernel.h"
+#ifndef EIGEN_GENERAL_BLOCK_PANEL_H
+#define EIGEN_GENERAL_BLOCK_PANEL_H
+namespace Eigen {
+namespace internal {
+template<typename _LhsScalar, typename _RhsScalar, bool _ConjLhs=false, bool _ConjRhs=false>
+class gebp_traits;
+inline std::ptrdiff_t manage_caching_sizes_helper(std::ptrdiff_t a, std::ptrdiff_t b)
+{
+ return a<=0 ? b : a;
+}
+#if EIGEN_ARCH_i386_OR_x86_64
+const std::ptrdiff_t defaultL1CacheSize = 32*1024;
+const std::ptrdiff_t defaultL2CacheSize = 256*1024;
+const std::ptrdiff_t defaultL3CacheSize = 2*1024*1024;
+#else
+const std::ptrdiff_t defaultL1CacheSize = 16*1024;
+const std::ptrdiff_t defaultL2CacheSize = 512*1024;
+const std::ptrdiff_t defaultL3CacheSize = 512*1024;
+#endif
+struct CacheSizes {
+ CacheSizes(): m_l1(-1),m_l2(-1),m_l3(-1) {
+ int l1CacheSize, l2CacheSize, l3CacheSize;
+ queryCacheSizes(l1CacheSize, l2CacheSize, l3CacheSize);
+ m_l1 = manage_caching_sizes_helper(l1CacheSize, defaultL1CacheSize);
+ m_l2 = manage_caching_sizes_helper(l2CacheSize, defaultL2CacheSize);
+ m_l3 = manage_caching_sizes_helper(l3CacheSize, defaultL3CacheSize);
+ }
+ std::ptrdiff_t m_l1;
+ std::ptrdiff_t m_l2;
+ std::ptrdiff_t m_l3;
+};
+inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff_t* l2, std::ptrdiff_t* l3)
+{
+ static CacheSizes m_cacheSizes;
+ if(action==SetAction)
+ {
+ eigen_internal_assert(l1!=0 && l2!=0);
+ m_cacheSizes.m_l1 = *l1;
+ m_cacheSizes.m_l2 = *l2;
+ m_cacheSizes.m_l3 = *l3;
+ }
+ else if(action==GetAction)
+ {
+ eigen_internal_assert(l1!=0 && l2!=0);
+ *l1 = m_cacheSizes.m_l1;
+ *l2 = m_cacheSizes.m_l2;
+ *l3 = m_cacheSizes.m_l3;
+ }
+ else
+ {
+ eigen_internal_assert(false);
+ }
+}
+template<typename LhsScalar, typename RhsScalar, int KcFactor, typename Index>
+void evaluateProductBlockingSizesHeuristic(Index& k, Index& m, Index& n, Index num_threads = 1)
+{
+ typedef gebp_traits<LhsScalar,RhsScalar> Traits;
+ std::ptrdiff_t l1, l2, l3;
+ manage_caching_sizes(GetAction, &l1, &l2, &l3);
+ if (num_threads > 1) {
+ typedef typename Traits::ResScalar ResScalar;
+ enum {
+ kdiv = KcFactor * (Traits::mr * sizeof(LhsScalar) + Traits::nr * sizeof(RhsScalar)),
+ ksub = Traits::mr * Traits::nr * sizeof(ResScalar),
+ kr = 8,
+ mr = Traits::mr,
+ nr = Traits::nr
+ };
+ const Index k_cache = (numext::mini<Index>)((l1-ksub)/kdiv, 320);
+ if (k_cache < k) {
+ k = k_cache - (k_cache % kr);
+ eigen_internal_assert(k > 0);
+ }
+ const Index n_cache = (l2-l1) / (nr * sizeof(RhsScalar) * k);
+ const Index n_per_thread = numext::div_ceil(n, num_threads);
+ if (n_cache <= n_per_thread) {
+ eigen_internal_assert(n_cache >= static_cast<Index>(nr));
+ n = n_cache - (n_cache % nr);
+ eigen_internal_assert(n > 0);
+ } else {
+ n = (numext::mini<Index>)(n, (n_per_thread + nr - 1) - ((n_per_thread + nr - 1) % nr));
+ }
+ if (l3 > l2) {
+ const Index m_cache = (l3-l2) / (sizeof(LhsScalar) * k * num_threads);
+ const Index m_per_thread = numext::div_ceil(m, num_threads);
+ if(m_cache < m_per_thread && m_cache >= static_cast<Index>(mr)) {
+ m = m_cache - (m_cache % mr);
+ eigen_internal_assert(m > 0);
+ } else {
+ m = (numext::mini<Index>)(m, (m_per_thread + mr - 1) - ((m_per_thread + mr - 1) % mr));
+ }
+ }
+ }
+ else {
+#ifdef EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS
+ l1 = 9*1024;
+ l2 = 32*1024;
+ l3 = 512*1024;
+#endif
+ if((numext::maxi)(k,(numext::maxi)(m,n))<48)
+ return;
+ typedef typename Traits::ResScalar ResScalar;
+ enum {
+ k_peeling = 8,
+ k_div = KcFactor * (Traits::mr * sizeof(LhsScalar) + Traits::nr * sizeof(RhsScalar)),
+ k_sub = Traits::mr * Traits::nr * sizeof(ResScalar)
+ };
+ const Index max_kc = numext::maxi<Index>(((l1-k_sub)/k_div) & (~(k_peeling-1)),1);
+ const Index old_k = k;
+ if(k>max_kc)
+ {
+ k = (k%max_kc)==0 ? max_kc
+ : max_kc - k_peeling * ((max_kc-1-(k%max_kc))/(k_peeling*(k/max_kc+1)));
+ eigen_internal_assert(((old_k/k) == (old_k/max_kc)) && "the number of sweeps has to remain the same");
+ }
+ #ifdef EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS
+ const Index actual_l2 = l3;
+ #else
+ const Index actual_l2 = 1572864;
+ #endif
+ Index max_nc;
+ const Index lhs_bytes = m * k * sizeof(LhsScalar);
+ const Index remaining_l1 = l1- k_sub - lhs_bytes;
+ if(remaining_l1 >= Index(Traits::nr*sizeof(RhsScalar))*k)
+ {
+ max_nc = remaining_l1 / (k*sizeof(RhsScalar));
+ }
+ else
+ {
+ max_nc = (3*actual_l2)/(2*2*max_kc*sizeof(RhsScalar));
+ }
+ Index nc = numext::mini<Index>(actual_l2/(2*k*sizeof(RhsScalar)), max_nc) & (~(Traits::nr-1));
+ if(n>nc)
+ {
+ n = (n%nc)==0 ? nc
+ : (nc - Traits::nr * ((nc-(n%nc))/(Traits::nr*(n/nc+1))));
+ }
+ else if(old_k==k)
+ {
+ Index problem_size = k*n*sizeof(LhsScalar);
+ Index actual_lm = actual_l2;
+ Index max_mc = m;
+ if(problem_size<=1024)
+ {
+ actual_lm = l1;
+ }
+ else if(l3!=0 && problem_size<=32768)
+ {
+ actual_lm = l2;
+ max_mc = (numext::mini<Index>)(576,max_mc);
+ }
+ Index mc = (numext::mini<Index>)(actual_lm/(3*k*sizeof(LhsScalar)), max_mc);
+ if (mc > Traits::mr) mc -= mc % Traits::mr;
+ else if (mc==0) return;
+ m = (m%mc)==0 ? mc
+ : (mc - Traits::mr * ((mc-(m%mc))/(Traits::mr*(m/mc+1))));
+ }
+ }
+}
+template <typename Index>
+inline bool useSpecificBlockingSizes(Index& k, Index& m, Index& n)
+{
+#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES
+ if (EIGEN_TEST_SPECIFIC_BLOCKING_SIZES) {
+ k = numext::mini<Index>(k, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K);
+ m = numext::mini<Index>(m, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M);
+ n = numext::mini<Index>(n, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N);
+ return true;
+ }
+#else
+ EIGEN_UNUSED_VARIABLE(k)
+ EIGEN_UNUSED_VARIABLE(m)
+ EIGEN_UNUSED_VARIABLE(n)
+#endif
+ return false;
+}
+template<typename LhsScalar, typename RhsScalar, int KcFactor, typename Index>
+void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1)
+{
+ if (!useSpecificBlockingSizes(k, m, n)) {
+ evaluateProductBlockingSizesHeuristic<LhsScalar, RhsScalar, KcFactor, Index>(k, m, n, num_threads);
+ }
+}
+template<typename LhsScalar, typename RhsScalar, typename Index>
+inline void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1)
+{
+ computeProductBlockingSizes<LhsScalar,RhsScalar,1,Index>(k, m, n, num_threads);
+}
+#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
+ #define CJMADD(CJ,A,B,C,T) C = CJ.pmadd(A,B,C);
+#else
+ template<typename CJ, typename A, typename B, typename C, typename T> struct gebp_madd_selector {
+ EIGEN_ALWAYS_INLINE static void run(const CJ& cj, A& a, B& b, C& c, T& )
+ {
+ c = cj.pmadd(a,b,c);
+ }
+ };
+ template<typename CJ, typename T> struct gebp_madd_selector<CJ,T,T,T,T> {
+ EIGEN_ALWAYS_INLINE static void run(const CJ& cj, T& a, T& b, T& c, T& t)
+ {
+ t = b; t = cj.pmul(a,t); c = padd(c,t);
+ }
+ };
+ template<typename CJ, typename A, typename B, typename C, typename T>
+ EIGEN_STRONG_INLINE void gebp_madd(const CJ& cj, A& a, B& b, C& c, T& t)
+ {
+ gebp_madd_selector<CJ,A,B,C,T>::run(cj,a,b,c,t);
+ }
+ #define CJMADD(CJ,A,B,C,T) gebp_madd(CJ,A,B,C,T);
+#endif
+template<typename _LhsScalar, typename _RhsScalar, bool _ConjLhs, bool _ConjRhs>
+class gebp_traits
+{
+public:
+ typedef _LhsScalar LhsScalar;
+ typedef _RhsScalar RhsScalar;
+ typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
+ enum {
+ ConjLhs = _ConjLhs,
+ ConjRhs = _ConjRhs,
+ Vectorizable = packet_traits<LhsScalar>::Vectorizable && packet_traits<RhsScalar>::Vectorizable,
+ LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
+ RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
+ ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1,
+ NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
+ nr = 4,
+ default_mr = (EIGEN_PLAIN_ENUM_MIN(16,NumberOfRegisters)/2/nr)*LhsPacketSize,
+#if defined(EIGEN_HAS_SINGLE_INSTRUCTION_MADD) && !defined(EIGEN_VECTORIZE_ALTIVEC) && !defined(EIGEN_VECTORIZE_VSX)
+ mr = Vectorizable ? 3*LhsPacketSize : default_mr,
+#else
+ mr = default_mr,
+#endif
+ LhsProgress = LhsPacketSize,
+ RhsProgress = 1
+ };
+ typedef typename packet_traits<LhsScalar>::type _LhsPacket;
+ typedef typename packet_traits<RhsScalar>::type _RhsPacket;
+ typedef typename packet_traits<ResScalar>::type _ResPacket;
+ typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;
+ typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
+ typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
+ typedef ResPacket AccPacket;
+ EIGEN_STRONG_INLINE void initAcc(AccPacket& p)
+ {
+ p = pset1<ResPacket>(ResScalar(0));
+ }
+ EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, RhsPacket& b0, RhsPacket& b1, RhsPacket& b2, RhsPacket& b3)
+ {
+ pbroadcast4(b, b0, b1, b2, b3);
+ }
+ template<typename RhsPacketType>
+ EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacketType& dest) const
+ {
+ dest = pset1<RhsPacketType>(*b);
+ }
+ EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, RhsPacket& dest) const
+ {
+ dest = ploadquad<RhsPacket>(b);
+ }
+ template<typename LhsPacketType>
+ EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacketType& dest) const
+ {
+ dest = pload<LhsPacketType>(a);
+ }
+ template<typename LhsPacketType>
+ EIGEN_STRONG_INLINE void loadLhsUnaligned(const LhsScalar* a, LhsPacketType& dest) const
+ {
+ dest = ploadu<LhsPacketType>(a);
+ }
+ template<typename LhsPacketType, typename RhsPacketType, typename AccPacketType>
+ EIGEN_STRONG_INLINE void madd(const LhsPacketType& a, const RhsPacketType& b, AccPacketType& c, AccPacketType& tmp) const
+ {
+ conj_helper<LhsPacketType,RhsPacketType,ConjLhs,ConjRhs> cj;
+#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+ EIGEN_UNUSED_VARIABLE(tmp);
+ c = cj.pmadd(a,b,c);
+#else
+ tmp = b; tmp = cj.pmul(a,tmp); c = padd(c,tmp);
+#endif
+ }
+ EIGEN_STRONG_INLINE void acc(const AccPacket& c, const ResPacket& alpha, ResPacket& r) const
+ {
+ r = pmadd(c,alpha,r);
+ }
+ template<typename ResPacketHalf>
+ EIGEN_STRONG_INLINE void acc(const ResPacketHalf& c, const ResPacketHalf& alpha, ResPacketHalf& r) const
+ {
+ r = pmadd(c,alpha,r);
+ }
+};
+template<typename RealScalar, bool _ConjLhs>
+class gebp_traits<std::complex<RealScalar>, RealScalar, _ConjLhs, false>
+{
+public:
+ typedef std::complex<RealScalar> LhsScalar;
+ typedef RealScalar RhsScalar;
+ typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
+ enum {
+ ConjLhs = _ConjLhs,
+ ConjRhs = false,
+ Vectorizable = packet_traits<LhsScalar>::Vectorizable && packet_traits<RhsScalar>::Vectorizable,
+ LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
+ RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
+ ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1,
+ NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
+ nr = 4,
+#if defined(EIGEN_HAS_SINGLE_INSTRUCTION_MADD) && !defined(EIGEN_VECTORIZE_ALTIVEC) && !defined(EIGEN_VECTORIZE_VSX)
+ mr = 3*LhsPacketSize,
+#else
+ mr = (EIGEN_PLAIN_ENUM_MIN(16,NumberOfRegisters)/2/nr)*LhsPacketSize,
+#endif
+ LhsProgress = LhsPacketSize,
+ RhsProgress = 1
+ };
+ typedef typename packet_traits<LhsScalar>::type _LhsPacket;
+ typedef typename packet_traits<RhsScalar>::type _RhsPacket;
+ typedef typename packet_traits<ResScalar>::type _ResPacket;
+ typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;
+ typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
+ typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
+ typedef ResPacket AccPacket;
+ EIGEN_STRONG_INLINE void initAcc(AccPacket& p)
+ {
+ p = pset1<ResPacket>(ResScalar(0));
+ }
+ EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const
+ {
+ dest = pset1<RhsPacket>(*b);
+ }
+ EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, RhsPacket& dest) const
+ {
+ dest = pset1<RhsPacket>(*b);
+ }
+ EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const
+ {
+ dest = pload<LhsPacket>(a);
+ }
+ EIGEN_STRONG_INLINE void loadLhsUnaligned(const LhsScalar* a, LhsPacket& dest) const
+ {
+ dest = ploadu<LhsPacket>(a);
+ }
+ EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, RhsPacket& b0, RhsPacket& b1, RhsPacket& b2, RhsPacket& b3)
+ {
+ pbroadcast4(b, b0, b1, b2, b3);
+ }
+ EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp) const
+ {
+ madd_impl(a, b, c, tmp, typename conditional<Vectorizable,true_type,false_type>::type());
+ }
+ EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const true_type&) const
+ {
+#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+ EIGEN_UNUSED_VARIABLE(tmp);
+ c.v = pmadd(a.v,b,c.v);
+#else
+ tmp = b; tmp = pmul(a.v,tmp); c.v = padd(c.v,tmp);
+#endif
+ }
+ EIGEN_STRONG_INLINE void madd_impl(const LhsScalar& a, const RhsScalar& b, ResScalar& c, RhsScalar& , const false_type&) const
+ {
+ c += a * b;
+ }
+ EIGEN_STRONG_INLINE void acc(const AccPacket& c, const ResPacket& alpha, ResPacket& r) const
+ {
+ r = cj.pmadd(c,alpha,r);
+ }
+protected:
+ conj_helper<ResPacket,ResPacket,ConjLhs,false> cj;
+};
+template<typename Packet>
+struct DoublePacket
+{
+ Packet first;
+ Packet second;
+};
+template<typename Packet>
+DoublePacket<Packet> padd(const DoublePacket<Packet> &a, const DoublePacket<Packet> &b)
+{
+ DoublePacket<Packet> res;
+ res.first = padd(a.first, b.first);
+ res.second = padd(a.second,b.second);
+ return res;
+}
+template<typename Packet>
+const DoublePacket<Packet>& predux_downto4(const DoublePacket<Packet> &a)
+{
+ return a;
+}
+template<typename Packet> struct unpacket_traits<DoublePacket<Packet> > { typedef DoublePacket<Packet> half; };
+template<typename RealScalar, bool _ConjLhs, bool _ConjRhs>
+class gebp_traits<std::complex<RealScalar>, std::complex<RealScalar>, _ConjLhs, _ConjRhs >
+{
+public:
+ typedef std::complex<RealScalar> Scalar;
+ typedef std::complex<RealScalar> LhsScalar;
+ typedef std::complex<RealScalar> RhsScalar;
+ typedef std::complex<RealScalar> ResScalar;
+ enum {
+ ConjLhs = _ConjLhs,
+ ConjRhs = _ConjRhs,
+ Vectorizable = packet_traits<RealScalar>::Vectorizable
+ && packet_traits<Scalar>::Vectorizable,
+ RealPacketSize = Vectorizable ? packet_traits<RealScalar>::size : 1,
+ ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1,
+ LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
+ RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
+ nr = 4,
+ mr = ResPacketSize,
+ LhsProgress = ResPacketSize,
+ RhsProgress = 1
+ };
+ typedef typename packet_traits<RealScalar>::type RealPacket;
+ typedef typename packet_traits<Scalar>::type ScalarPacket;
+ typedef DoublePacket<RealPacket> DoublePacketType;
+ typedef typename conditional<Vectorizable,RealPacket, Scalar>::type LhsPacket;
+ typedef typename conditional<Vectorizable,DoublePacketType,Scalar>::type RhsPacket;
+ typedef typename conditional<Vectorizable,ScalarPacket,Scalar>::type ResPacket;
+ typedef typename conditional<Vectorizable,DoublePacketType,Scalar>::type AccPacket;
+ EIGEN_STRONG_INLINE void initAcc(Scalar& p) { p = Scalar(0); }
+ EIGEN_STRONG_INLINE void initAcc(DoublePacketType& p)
+ {
+ p.first = pset1<RealPacket>(RealScalar(0));
+ p.second = pset1<RealPacket>(RealScalar(0));
+ }
+ EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, ResPacket& dest) const
+ {
+ dest = pset1<ResPacket>(*b);
+ }
+ EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, DoublePacketType& dest) const
+ {
+ dest.first = pset1<RealPacket>(real(*b));
+ dest.second = pset1<RealPacket>(imag(*b));
+ }
+ EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, ResPacket& dest) const
+ {
+ loadRhs(b,dest);
+ }
+ EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, DoublePacketType& dest) const
+ {
+ eigen_internal_assert(unpacket_traits<ScalarPacket>::size<=4);
+ loadRhs(b,dest);
+ }
+ EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, RhsPacket& b0, RhsPacket& b1, RhsPacket& b2, RhsPacket& b3)
+ {
+ loadRhs(b+0, b0);
+ loadRhs(b+1, b1);
+ loadRhs(b+2, b2);
+ loadRhs(b+3, b3);
+ }
+ EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, DoublePacketType& b0, DoublePacketType& b1)
+ {
+ loadRhs(b+0, b0);
+ loadRhs(b+1, b1);
+ }
+ EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, RhsScalar& b0, RhsScalar& b1)
+ {
+ loadRhs(b+0, b0);
+ loadRhs(b+1, b1);
+ }
+ EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const
+ {
+ dest = pload<LhsPacket>((const typename unpacket_traits<LhsPacket>::type*)(a));
+ }
+ EIGEN_STRONG_INLINE void loadLhsUnaligned(const LhsScalar* a, LhsPacket& dest) const
+ {
+ dest = ploadu<LhsPacket>((const typename unpacket_traits<LhsPacket>::type*)(a));
+ }
+ EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, DoublePacketType& c, RhsPacket& ) const
+ {
+ c.first = padd(pmul(a,b.first), c.first);
+ c.second = padd(pmul(a,b.second),c.second);
+ }
+ EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, ResPacket& c, RhsPacket& ) const
+ {
+ c = cj.pmadd(a,b,c);
+ }
+ EIGEN_STRONG_INLINE void acc(const Scalar& c, const Scalar& alpha, Scalar& r) const { r += alpha * c; }
+ EIGEN_STRONG_INLINE void acc(const DoublePacketType& c, const ResPacket& alpha, ResPacket& r) const
+ {
+ ResPacket tmp;
+ if((!ConjLhs)&&(!ConjRhs))
+ {
+ tmp = pcplxflip(pconj(ResPacket(c.second)));
+ tmp = padd(ResPacket(c.first),tmp);
+ }
+ else if((!ConjLhs)&&(ConjRhs))
+ {
+ tmp = pconj(pcplxflip(ResPacket(c.second)));
+ tmp = padd(ResPacket(c.first),tmp);
+ }
+ else if((ConjLhs)&&(!ConjRhs))
+ {
+ tmp = pcplxflip(ResPacket(c.second));
+ tmp = padd(pconj(ResPacket(c.first)),tmp);
+ }
+ else if((ConjLhs)&&(ConjRhs))
+ {
+ tmp = pcplxflip(ResPacket(c.second));
+ tmp = psub(pconj(ResPacket(c.first)),tmp);
+ }
+ r = pmadd(tmp,alpha,r);
+ }
+protected:
+ conj_helper<LhsScalar,RhsScalar,ConjLhs,ConjRhs> cj;
+};
+template<typename RealScalar, bool _ConjRhs>
+class gebp_traits<RealScalar, std::complex<RealScalar>, false, _ConjRhs >
+{
+public:
+ typedef std::complex<RealScalar> Scalar;
+ typedef RealScalar LhsScalar;
+ typedef Scalar RhsScalar;
+ typedef Scalar ResScalar;
+ enum {
+ ConjLhs = false,
+ ConjRhs = _ConjRhs,
+ Vectorizable = packet_traits<RealScalar>::Vectorizable
+ && packet_traits<Scalar>::Vectorizable,
+ LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
+ RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
+ ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1,
+ NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
+ nr = 4,
+ mr = (EIGEN_PLAIN_ENUM_MIN(16,NumberOfRegisters)/2/nr)*ResPacketSize,
+ LhsProgress = ResPacketSize,
+ RhsProgress = 1
+ };
+ typedef typename packet_traits<LhsScalar>::type _LhsPacket;
+ typedef typename packet_traits<RhsScalar>::type _RhsPacket;
+ typedef typename packet_traits<ResScalar>::type _ResPacket;
+ typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;
+ typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
+ typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
+ typedef ResPacket AccPacket;
+ EIGEN_STRONG_INLINE void initAcc(AccPacket& p)
+ {
+ p = pset1<ResPacket>(ResScalar(0));
+ }
+ EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const
+ {
+ dest = pset1<RhsPacket>(*b);
+ }
+ void broadcastRhs(const RhsScalar* b, RhsPacket& b0, RhsPacket& b1, RhsPacket& b2, RhsPacket& b3)
+ {
+ pbroadcast4(b, b0, b1, b2, b3);
+ }
+ EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const
+ {
+ dest = ploaddup<LhsPacket>(a);
+ }
+ EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, RhsPacket& dest) const
+ {
+ eigen_internal_assert(unpacket_traits<RhsPacket>::size<=4);
+ loadRhs(b,dest);
+ }
+ EIGEN_STRONG_INLINE void loadLhsUnaligned(const LhsScalar* a, LhsPacket& dest) const
+ {
+ dest = ploaddup<LhsPacket>(a);
+ }
+ EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp) const
+ {
+ madd_impl(a, b, c, tmp, typename conditional<Vectorizable,true_type,false_type>::type());
+ }
+ EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const true_type&) const
+ {
+#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+ EIGEN_UNUSED_VARIABLE(tmp);
+ c.v = pmadd(a,b.v,c.v);
+#else
+ tmp = b; tmp.v = pmul(a,tmp.v); c = padd(c,tmp);
+#endif
+ }
+ EIGEN_STRONG_INLINE void madd_impl(const LhsScalar& a, const RhsScalar& b, ResScalar& c, RhsScalar& , const false_type&) const
+ {
+ c += a * b;
+ }
+ EIGEN_STRONG_INLINE void acc(const AccPacket& c, const ResPacket& alpha, ResPacket& r) const
+ {
+ r = cj.pmadd(alpha,c,r);
+ }
+protected:
+ conj_helper<ResPacket,ResPacket,false,ConjRhs> cj;
+};
+template<typename LhsScalar, typename RhsScalar, typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>
+struct gebp_kernel
+{
+ typedef gebp_traits<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> Traits;
+ typedef typename Traits::ResScalar ResScalar;
+ typedef typename Traits::LhsPacket LhsPacket;
+ typedef typename Traits::RhsPacket RhsPacket;
+ typedef typename Traits::ResPacket ResPacket;
+ typedef typename Traits::AccPacket AccPacket;
+ typedef gebp_traits<RhsScalar,LhsScalar,ConjugateRhs,ConjugateLhs> SwappedTraits;
+ typedef typename SwappedTraits::ResScalar SResScalar;
+ typedef typename SwappedTraits::LhsPacket SLhsPacket;
+ typedef typename SwappedTraits::RhsPacket SRhsPacket;
+ typedef typename SwappedTraits::ResPacket SResPacket;
+ typedef typename SwappedTraits::AccPacket SAccPacket;
+ typedef typename DataMapper::LinearMapper LinearMapper;
+ enum {
+ Vectorizable = Traits::Vectorizable,
+ LhsProgress = Traits::LhsProgress,
+ RhsProgress = Traits::RhsProgress,
+ ResPacketSize = Traits::ResPacketSize
+ };
+ EIGEN_DONT_INLINE
+ void operator()(const DataMapper& res, const LhsScalar* blockA, const RhsScalar* blockB,
+ Index rows, Index depth, Index cols, ResScalar alpha,
+ Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0);
+};
+template<typename LhsScalar, typename RhsScalar, typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>
+EIGEN_DONT_INLINE
+void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,ConjugateRhs>
+ ::operator()(const DataMapper& res, const LhsScalar* blockA, const RhsScalar* blockB,
+ Index rows, Index depth, Index cols, ResScalar alpha,
+ Index strideA, Index strideB, Index offsetA, Index offsetB)
+ {
+ Traits traits;
+ SwappedTraits straits;
+ if(strideA==-1) strideA = depth;
+ if(strideB==-1) strideB = depth;
+ conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj;
+ Index packet_cols4 = nr>=4 ? (cols/4) * 4 : 0;
+ const Index peeled_mc3 = mr>=3*Traits::LhsProgress ? (rows/(3*LhsProgress))*(3*LhsProgress) : 0;
+ const Index peeled_mc2 = mr>=2*Traits::LhsProgress ? peeled_mc3+((rows-peeled_mc3)/(2*LhsProgress))*(2*LhsProgress) : 0;
+ const Index peeled_mc1 = mr>=1*Traits::LhsProgress ? (rows/(1*LhsProgress))*(1*LhsProgress) : 0;
+ enum { pk = 8 };
+ const Index peeled_kc = depth & ~(pk-1);
+ const Index prefetch_res_offset = 32/sizeof(ResScalar);
+ if(mr>=3*Traits::LhsProgress)
+ {
+ const Index l1 = defaultL1CacheSize;
+ const Index actual_panel_rows = (3*LhsProgress) * std::max<Index>(1,( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 3*LhsProgress) ));
+ for(Index i1=0; i1<peeled_mc3; i1+=actual_panel_rows)
+ {
+ const Index actual_panel_end = (std::min)(i1+actual_panel_rows, peeled_mc3);
+ for(Index j2=0; j2<packet_cols4; j2+=nr)
+ {
+ for(Index i=i1; i<actual_panel_end; i+=3*LhsProgress)
+ {
+ const LhsScalar* blA = &blockA[i*strideA+offsetA*(3*LhsProgress)];
+ prefetch(&blA[0]);
+ AccPacket C0, C1, C2, C3,
+ C4, C5, C6, C7,
+ C8, C9, C10, C11;
+ traits.initAcc(C0); traits.initAcc(C1); traits.initAcc(C2); traits.initAcc(C3);
+ traits.initAcc(C4); traits.initAcc(C5); traits.initAcc(C6); traits.initAcc(C7);
+ traits.initAcc(C8); traits.initAcc(C9); traits.initAcc(C10); traits.initAcc(C11);
+ LinearMapper r0 = res.getLinearMapper(i, j2 + 0);
+ LinearMapper r1 = res.getLinearMapper(i, j2 + 1);
+ LinearMapper r2 = res.getLinearMapper(i, j2 + 2);
+ LinearMapper r3 = res.getLinearMapper(i, j2 + 3);
+ r0.prefetch(0);
+ r1.prefetch(0);
+ r2.prefetch(0);
+ r3.prefetch(0);
+ const RhsScalar* blB = &blockB[j2*strideB+offsetB*nr];
+ prefetch(&blB[0]);
+ LhsPacket A0, A1;
+ for(Index k=0; k<peeled_kc; k+=pk)
+ {
+ EIGEN_ASM_COMMENT("begin gebp micro kernel 3pX4");
+ RhsPacket B_0, T0;
+ LhsPacket A2;
+#define EIGEN_GEBP_ONESTEP(K) \
+ do { \
+ EIGEN_ASM_COMMENT("begin step of gebp micro kernel 3pX4"); \
+ EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
+ internal::prefetch(blA+(3*K+16)*LhsProgress); \
+ if (EIGEN_ARCH_ARM) { internal::prefetch(blB+(4*K+16)*RhsProgress); } \
+ traits.loadLhs(&blA[(0+3*K)*LhsProgress], A0); \
+ traits.loadLhs(&blA[(1+3*K)*LhsProgress], A1); \
+ traits.loadLhs(&blA[(2+3*K)*LhsProgress], A2); \
+ traits.loadRhs(blB + (0+4*K)*Traits::RhsProgress, B_0); \
+ traits.madd(A0, B_0, C0, T0); \
+ traits.madd(A1, B_0, C4, T0); \
+ traits.madd(A2, B_0, C8, B_0); \
+ traits.loadRhs(blB + (1+4*K)*Traits::RhsProgress, B_0); \
+ traits.madd(A0, B_0, C1, T0); \
+ traits.madd(A1, B_0, C5, T0); \
+ traits.madd(A2, B_0, C9, B_0); \
+ traits.loadRhs(blB + (2+4*K)*Traits::RhsProgress, B_0); \
+ traits.madd(A0, B_0, C2, T0); \
+ traits.madd(A1, B_0, C6, T0); \
+ traits.madd(A2, B_0, C10, B_0); \
+ traits.loadRhs(blB + (3+4*K)*Traits::RhsProgress, B_0); \
+ traits.madd(A0, B_0, C3 , T0); \
+ traits.madd(A1, B_0, C7, T0); \
+ traits.madd(A2, B_0, C11, B_0); \
+ EIGEN_ASM_COMMENT("end step of gebp micro kernel 3pX4"); \
+ } while(false)
+ internal::prefetch(blB);
+ EIGEN_GEBP_ONESTEP(0);
+ EIGEN_GEBP_ONESTEP(1);
+ EIGEN_GEBP_ONESTEP(2);
+ EIGEN_GEBP_ONESTEP(3);
+ EIGEN_GEBP_ONESTEP(4);
+ EIGEN_GEBP_ONESTEP(5);
+ EIGEN_GEBP_ONESTEP(6);
+ EIGEN_GEBP_ONESTEP(7);
+ blB += pk*4*RhsProgress;
+ blA += pk*3*Traits::LhsProgress;
+ EIGEN_ASM_COMMENT("end gebp micro kernel 3pX4");
+ }
+ for(Index k=peeled_kc; k<depth; k++)
+ {
+ RhsPacket B_0, T0;
+ LhsPacket A2;
+ EIGEN_GEBP_ONESTEP(0);
+ blB += 4*RhsProgress;
+ blA += 3*Traits::LhsProgress;
+ }
+#undef EIGEN_GEBP_ONESTEP
+ ResPacket R0, R1, R2;
+ ResPacket alphav = pset1<ResPacket>(alpha);
+ R0 = r0.loadPacket(0 * Traits::ResPacketSize);
+ R1 = r0.loadPacket(1 * Traits::ResPacketSize);
+ R2 = r0.loadPacket(2 * Traits::ResPacketSize);
+ traits.acc(C0, alphav, R0);
+ traits.acc(C4, alphav, R1);
+ traits.acc(C8, alphav, R2);
+ r0.storePacket(0 * Traits::ResPacketSize, R0);
+ r0.storePacket(1 * Traits::ResPacketSize, R1);
+ r0.storePacket(2 * Traits::ResPacketSize, R2);
+ R0 = r1.loadPacket(0 * Traits::ResPacketSize);
+ R1 = r1.loadPacket(1 * Traits::ResPacketSize);
+ R2 = r1.loadPacket(2 * Traits::ResPacketSize);
+ traits.acc(C1, alphav, R0);
+ traits.acc(C5, alphav, R1);
+ traits.acc(C9, alphav, R2);
+ r1.storePacket(0 * Traits::ResPacketSize, R0);
+ r1.storePacket(1 * Traits::ResPacketSize, R1);
+ r1.storePacket(2 * Traits::ResPacketSize, R2);
+ R0 = r2.loadPacket(0 * Traits::ResPacketSize);
+ R1 = r2.loadPacket(1 * Traits::ResPacketSize);
+ R2 = r2.loadPacket(2 * Traits::ResPacketSize);
+ traits.acc(C2, alphav, R0);
+ traits.acc(C6, alphav, R1);
+ traits.acc(C10, alphav, R2);
+ r2.storePacket(0 * Traits::ResPacketSize, R0);
+ r2.storePacket(1 * Traits::ResPacketSize, R1);
+ r2.storePacket(2 * Traits::ResPacketSize, R2);
+ R0 = r3.loadPacket(0 * Traits::ResPacketSize);
+ R1 = r3.loadPacket(1 * Traits::ResPacketSize);
+ R2 = r3.loadPacket(2 * Traits::ResPacketSize);
+ traits.acc(C3, alphav, R0);
+ traits.acc(C7, alphav, R1);
+ traits.acc(C11, alphav, R2);
+ r3.storePacket(0 * Traits::ResPacketSize, R0);
+ r3.storePacket(1 * Traits::ResPacketSize, R1);
+ r3.storePacket(2 * Traits::ResPacketSize, R2);
+ }
+ }
+ for(Index j2=packet_cols4; j2<cols; j2++)
+ {
+ for(Index i=i1; i<actual_panel_end; i+=3*LhsProgress)
+ {
+ const LhsScalar* blA = &blockA[i*strideA+offsetA*(3*Traits::LhsProgress)];
+ prefetch(&blA[0]);
+ AccPacket C0, C4, C8;
+ traits.initAcc(C0);
+ traits.initAcc(C4);
+ traits.initAcc(C8);
+ LinearMapper r0 = res.getLinearMapper(i, j2);
+ r0.prefetch(0);
+ const RhsScalar* blB = &blockB[j2*strideB+offsetB];
+ LhsPacket A0, A1, A2;
+ for(Index k=0; k<peeled_kc; k+=pk)
+ {
+ EIGEN_ASM_COMMENT("begin gebp micro kernel 3pX1");
+ RhsPacket B_0;
+#define EIGEN_GEBGP_ONESTEP(K) \
+ do { \
+ EIGEN_ASM_COMMENT("begin step of gebp micro kernel 3pX1"); \
+ EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
+ traits.loadLhs(&blA[(0+3*K)*LhsProgress], A0); \
+ traits.loadLhs(&blA[(1+3*K)*LhsProgress], A1); \
+ traits.loadLhs(&blA[(2+3*K)*LhsProgress], A2); \
+ traits.loadRhs(&blB[(0+K)*RhsProgress], B_0); \
+ traits.madd(A0, B_0, C0, B_0); \
+ traits.madd(A1, B_0, C4, B_0); \
+ traits.madd(A2, B_0, C8, B_0); \
+ EIGEN_ASM_COMMENT("end step of gebp micro kernel 3pX1"); \
+ } while(false)
+ EIGEN_GEBGP_ONESTEP(0);
+ EIGEN_GEBGP_ONESTEP(1);
+ EIGEN_GEBGP_ONESTEP(2);
+ EIGEN_GEBGP_ONESTEP(3);
+ EIGEN_GEBGP_ONESTEP(4);
+ EIGEN_GEBGP_ONESTEP(5);
+ EIGEN_GEBGP_ONESTEP(6);
+ EIGEN_GEBGP_ONESTEP(7);
+ blB += pk*RhsProgress;
+ blA += pk*3*Traits::LhsProgress;
+ EIGEN_ASM_COMMENT("end gebp micro kernel 3pX1");
+ }
+ for(Index k=peeled_kc; k<depth; k++)
+ {
+ RhsPacket B_0;
+ EIGEN_GEBGP_ONESTEP(0);
+ blB += RhsProgress;
+ blA += 3*Traits::LhsProgress;
+ }
+#undef EIGEN_GEBGP_ONESTEP
+ ResPacket R0, R1, R2;
+ ResPacket alphav = pset1<ResPacket>(alpha);
+ R0 = r0.loadPacket(0 * Traits::ResPacketSize);
+ R1 = r0.loadPacket(1 * Traits::ResPacketSize);
+ R2 = r0.loadPacket(2 * Traits::ResPacketSize);
+ traits.acc(C0, alphav, R0);
+ traits.acc(C4, alphav, R1);
+ traits.acc(C8, alphav, R2);
+ r0.storePacket(0 * Traits::ResPacketSize, R0);
+ r0.storePacket(1 * Traits::ResPacketSize, R1);
+ r0.storePacket(2 * Traits::ResPacketSize, R2);
+ }
+ }
+ }
+ }
+ if(mr>=2*Traits::LhsProgress)
+ {
+ const Index l1 = defaultL1CacheSize;
+ Index actual_panel_rows = (2*LhsProgress) * std::max<Index>(1,( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 2*LhsProgress) ));
+ for(Index i1=peeled_mc3; i1<peeled_mc2; i1+=actual_panel_rows)
+ {
+ Index actual_panel_end = (std::min)(i1+actual_panel_rows, peeled_mc2);
+ for(Index j2=0; j2<packet_cols4; j2+=nr)
+ {
+ for(Index i=i1; i<actual_panel_end; i+=2*LhsProgress)
+ {
+ const LhsScalar* blA = &blockA[i*strideA+offsetA*(2*Traits::LhsProgress)];
+ prefetch(&blA[0]);
+ AccPacket C0, C1, C2, C3,
+ C4, C5, C6, C7;
+ traits.initAcc(C0); traits.initAcc(C1); traits.initAcc(C2); traits.initAcc(C3);
+ traits.initAcc(C4); traits.initAcc(C5); traits.initAcc(C6); traits.initAcc(C7);
+ LinearMapper r0 = res.getLinearMapper(i, j2 + 0);
+ LinearMapper r1 = res.getLinearMapper(i, j2 + 1);
+ LinearMapper r2 = res.getLinearMapper(i, j2 + 2);
+ LinearMapper r3 = res.getLinearMapper(i, j2 + 3);
+ r0.prefetch(prefetch_res_offset);
+ r1.prefetch(prefetch_res_offset);
+ r2.prefetch(prefetch_res_offset);
+ r3.prefetch(prefetch_res_offset);
+ const RhsScalar* blB = &blockB[j2*strideB+offsetB*nr];
+ prefetch(&blB[0]);
+ LhsPacket A0, A1;
+ for(Index k=0; k<peeled_kc; k+=pk)
+ {
+ EIGEN_ASM_COMMENT("begin gebp micro kernel 2pX4");
+ RhsPacket B_0, B1, B2, B3, T0;
+ #define EIGEN_GEBGP_ONESTEP(K) \
+ do { \
+ EIGEN_ASM_COMMENT("begin step of gebp micro kernel 2pX4"); \
+ EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
+ traits.loadLhs(&blA[(0+2*K)*LhsProgress], A0); \
+ traits.loadLhs(&blA[(1+2*K)*LhsProgress], A1); \
+ traits.broadcastRhs(&blB[(0+4*K)*RhsProgress], B_0, B1, B2, B3); \
+ traits.madd(A0, B_0, C0, T0); \
+ traits.madd(A1, B_0, C4, B_0); \
+ traits.madd(A0, B1, C1, T0); \
+ traits.madd(A1, B1, C5, B1); \
+ traits.madd(A0, B2, C2, T0); \
+ traits.madd(A1, B2, C6, B2); \
+ traits.madd(A0, B3, C3, T0); \
+ traits.madd(A1, B3, C7, B3); \
+ EIGEN_ASM_COMMENT("end step of gebp micro kernel 2pX4"); \
+ } while(false)
+ internal::prefetch(blB+(48+0));
+ EIGEN_GEBGP_ONESTEP(0);
+ EIGEN_GEBGP_ONESTEP(1);
+ EIGEN_GEBGP_ONESTEP(2);
+ EIGEN_GEBGP_ONESTEP(3);
+ internal::prefetch(blB+(48+16));
+ EIGEN_GEBGP_ONESTEP(4);
+ EIGEN_GEBGP_ONESTEP(5);
+ EIGEN_GEBGP_ONESTEP(6);
+ EIGEN_GEBGP_ONESTEP(7);
+ blB += pk*4*RhsProgress;
+ blA += pk*(2*Traits::LhsProgress);
+ EIGEN_ASM_COMMENT("end gebp micro kernel 2pX4");
+ }
+ for(Index k=peeled_kc; k<depth; k++)
+ {
+ RhsPacket B_0, B1, B2, B3, T0;
+ EIGEN_GEBGP_ONESTEP(0);
+ blB += 4*RhsProgress;
+ blA += 2*Traits::LhsProgress;
+ }
+#undef EIGEN_GEBGP_ONESTEP
+ ResPacket R0, R1, R2, R3;
+ ResPacket alphav = pset1<ResPacket>(alpha);
+ R0 = r0.loadPacket(0 * Traits::ResPacketSize);
+ R1 = r0.loadPacket(1 * Traits::ResPacketSize);
+ R2 = r1.loadPacket(0 * Traits::ResPacketSize);
+ R3 = r1.loadPacket(1 * Traits::ResPacketSize);
+ traits.acc(C0, alphav, R0);
+ traits.acc(C4, alphav, R1);
+ traits.acc(C1, alphav, R2);
+ traits.acc(C5, alphav, R3);
+ r0.storePacket(0 * Traits::ResPacketSize, R0);
+ r0.storePacket(1 * Traits::ResPacketSize, R1);
+ r1.storePacket(0 * Traits::ResPacketSize, R2);
+ r1.storePacket(1 * Traits::ResPacketSize, R3);
+ R0 = r2.loadPacket(0 * Traits::ResPacketSize);
+ R1 = r2.loadPacket(1 * Traits::ResPacketSize);
+ R2 = r3.loadPacket(0 * Traits::ResPacketSize);
+ R3 = r3.loadPacket(1 * Traits::ResPacketSize);
+ traits.acc(C2, alphav, R0);
+ traits.acc(C6, alphav, R1);
+ traits.acc(C3, alphav, R2);
+ traits.acc(C7, alphav, R3);
+ r2.storePacket(0 * Traits::ResPacketSize, R0);
+ r2.storePacket(1 * Traits::ResPacketSize, R1);
+ r3.storePacket(0 * Traits::ResPacketSize, R2);
+ r3.storePacket(1 * Traits::ResPacketSize, R3);
+ }
+ }
+ for(Index j2=packet_cols4; j2<cols; j2++)
+ {
+ for(Index i=i1; i<actual_panel_end; i+=2*LhsProgress)
+ {
+ const LhsScalar* blA = &blockA[i*strideA+offsetA*(2*Traits::LhsProgress)];
+ prefetch(&blA[0]);
+ AccPacket C0, C4;
+ traits.initAcc(C0);
+ traits.initAcc(C4);
+ LinearMapper r0 = res.getLinearMapper(i, j2);
+ r0.prefetch(prefetch_res_offset);
+ const RhsScalar* blB = &blockB[j2*strideB+offsetB];
+ LhsPacket A0, A1;
+ for(Index k=0; k<peeled_kc; k+=pk)
+ {
+ EIGEN_ASM_COMMENT("begin gebp micro kernel 2pX1");
+ RhsPacket B_0, B1;
+#define EIGEN_GEBGP_ONESTEP(K) \
+ do { \
+ EIGEN_ASM_COMMENT("begin step of gebp micro kernel 2pX1"); \
+ EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
+ traits.loadLhs(&blA[(0+2*K)*LhsProgress], A0); \
+ traits.loadLhs(&blA[(1+2*K)*LhsProgress], A1); \
+ traits.loadRhs(&blB[(0+K)*RhsProgress], B_0); \
+ traits.madd(A0, B_0, C0, B1); \
+ traits.madd(A1, B_0, C4, B_0); \
+ EIGEN_ASM_COMMENT("end step of gebp micro kernel 2pX1"); \
+ } while(false)
+ EIGEN_GEBGP_ONESTEP(0);
+ EIGEN_GEBGP_ONESTEP(1);
+ EIGEN_GEBGP_ONESTEP(2);
+ EIGEN_GEBGP_ONESTEP(3);
+ EIGEN_GEBGP_ONESTEP(4);
+ EIGEN_GEBGP_ONESTEP(5);
+ EIGEN_GEBGP_ONESTEP(6);
+ EIGEN_GEBGP_ONESTEP(7);
+ blB += pk*RhsProgress;
+ blA += pk*2*Traits::LhsProgress;
+ EIGEN_ASM_COMMENT("end gebp micro kernel 2pX1");
+ }
+ for(Index k=peeled_kc; k<depth; k++)
+ {
+ RhsPacket B_0, B1;
+ EIGEN_GEBGP_ONESTEP(0);
+ blB += RhsProgress;
+ blA += 2*Traits::LhsProgress;
+ }
+#undef EIGEN_GEBGP_ONESTEP
+ ResPacket R0, R1;
+ ResPacket alphav = pset1<ResPacket>(alpha);
+ R0 = r0.loadPacket(0 * Traits::ResPacketSize);
+ R1 = r0.loadPacket(1 * Traits::ResPacketSize);
+ traits.acc(C0, alphav, R0);
+ traits.acc(C4, alphav, R1);
+ r0.storePacket(0 * Traits::ResPacketSize, R0);
+ r0.storePacket(1 * Traits::ResPacketSize, R1);
+ }
+ }
+ }
+ }
+ if(mr>=1*Traits::LhsProgress)
+ {
+ for(Index i=peeled_mc2; i<peeled_mc1; i+=1*LhsProgress)
+ {
+ for(Index j2=0; j2<packet_cols4; j2+=nr)
+ {
+ const LhsScalar* blA = &blockA[i*strideA+offsetA*(1*Traits::LhsProgress)];
+ prefetch(&blA[0]);
+ AccPacket C0, C1, C2, C3;
+ traits.initAcc(C0);
+ traits.initAcc(C1);
+ traits.initAcc(C2);
+ traits.initAcc(C3);
+ LinearMapper r0 = res.getLinearMapper(i, j2 + 0);
+ LinearMapper r1 = res.getLinearMapper(i, j2 + 1);
+ LinearMapper r2 = res.getLinearMapper(i, j2 + 2);
+ LinearMapper r3 = res.getLinearMapper(i, j2 + 3);
+ r0.prefetch(prefetch_res_offset);
+ r1.prefetch(prefetch_res_offset);
+ r2.prefetch(prefetch_res_offset);
+ r3.prefetch(prefetch_res_offset);
+ const RhsScalar* blB = &blockB[j2*strideB+offsetB*nr];
+ prefetch(&blB[0]);
+ LhsPacket A0;
+ for(Index k=0; k<peeled_kc; k+=pk)
+ {
+ EIGEN_ASM_COMMENT("begin gebp micro kernel 1pX4");
+ RhsPacket B_0, B1, B2, B3;
+#define EIGEN_GEBGP_ONESTEP(K) \
+ do { \
+ EIGEN_ASM_COMMENT("begin step of gebp micro kernel 1pX4"); \
+ EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
+ traits.loadLhs(&blA[(0+1*K)*LhsProgress], A0); \
+ traits.broadcastRhs(&blB[(0+4*K)*RhsProgress], B_0, B1, B2, B3); \
+ traits.madd(A0, B_0, C0, B_0); \
+ traits.madd(A0, B1, C1, B1); \
+ traits.madd(A0, B2, C2, B2); \
+ traits.madd(A0, B3, C3, B3); \
+ EIGEN_ASM_COMMENT("end step of gebp micro kernel 1pX4"); \
+ } while(false)
+ internal::prefetch(blB+(48+0));
+ EIGEN_GEBGP_ONESTEP(0);
+ EIGEN_GEBGP_ONESTEP(1);
+ EIGEN_GEBGP_ONESTEP(2);
+ EIGEN_GEBGP_ONESTEP(3);
+ internal::prefetch(blB+(48+16));
+ EIGEN_GEBGP_ONESTEP(4);
+ EIGEN_GEBGP_ONESTEP(5);
+ EIGEN_GEBGP_ONESTEP(6);
+ EIGEN_GEBGP_ONESTEP(7);
+ blB += pk*4*RhsProgress;
+ blA += pk*1*LhsProgress;
+ EIGEN_ASM_COMMENT("end gebp micro kernel 1pX4");
+ }
+ for(Index k=peeled_kc; k<depth; k++)
+ {
+ RhsPacket B_0, B1, B2, B3;
+ EIGEN_GEBGP_ONESTEP(0);
+ blB += 4*RhsProgress;
+ blA += 1*LhsProgress;
+ }
+#undef EIGEN_GEBGP_ONESTEP
+ ResPacket R0, R1;
+ ResPacket alphav = pset1<ResPacket>(alpha);
+ R0 = r0.loadPacket(0 * Traits::ResPacketSize);
+ R1 = r1.loadPacket(0 * Traits::ResPacketSize);
+ traits.acc(C0, alphav, R0);
+ traits.acc(C1, alphav, R1);
+ r0.storePacket(0 * Traits::ResPacketSize, R0);
+ r1.storePacket(0 * Traits::ResPacketSize, R1);
+ R0 = r2.loadPacket(0 * Traits::ResPacketSize);
+ R1 = r3.loadPacket(0 * Traits::ResPacketSize);
+ traits.acc(C2, alphav, R0);
+ traits.acc(C3, alphav, R1);
+ r2.storePacket(0 * Traits::ResPacketSize, R0);
+ r3.storePacket(0 * Traits::ResPacketSize, R1);
+ }
+ for(Index j2=packet_cols4; j2<cols; j2++)
+ {
+ const LhsScalar* blA = &blockA[i*strideA+offsetA*(1*Traits::LhsProgress)];
+ prefetch(&blA[0]);
+ AccPacket C0;
+ traits.initAcc(C0);
+ LinearMapper r0 = res.getLinearMapper(i, j2);
+ const RhsScalar* blB = &blockB[j2*strideB+offsetB];
+ LhsPacket A0;
+ for(Index k=0; k<peeled_kc; k+=pk)
+ {
+ EIGEN_ASM_COMMENT("begin gebp micro kernel 1pX1");
+ RhsPacket B_0;
+#define EIGEN_GEBGP_ONESTEP(K) \
+ do { \
+ EIGEN_ASM_COMMENT("begin step of gebp micro kernel 1pX1"); \
+ EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
+ traits.loadLhs(&blA[(0+1*K)*LhsProgress], A0); \
+ traits.loadRhs(&blB[(0+K)*RhsProgress], B_0); \
+ traits.madd(A0, B_0, C0, B_0); \
+ EIGEN_ASM_COMMENT("end step of gebp micro kernel 1pX1"); \
+ } while(false);
+ EIGEN_GEBGP_ONESTEP(0);
+ EIGEN_GEBGP_ONESTEP(1);
+ EIGEN_GEBGP_ONESTEP(2);
+ EIGEN_GEBGP_ONESTEP(3);
+ EIGEN_GEBGP_ONESTEP(4);
+ EIGEN_GEBGP_ONESTEP(5);
+ EIGEN_GEBGP_ONESTEP(6);
+ EIGEN_GEBGP_ONESTEP(7);
+ blB += pk*RhsProgress;
+ blA += pk*1*Traits::LhsProgress;
+ EIGEN_ASM_COMMENT("end gebp micro kernel 1pX1");
+ }
+ for(Index k=peeled_kc; k<depth; k++)
+ {
+ RhsPacket B_0;
+ EIGEN_GEBGP_ONESTEP(0);
+ blB += RhsProgress;
+ blA += 1*Traits::LhsProgress;
+ }
+#undef EIGEN_GEBGP_ONESTEP
+ ResPacket R0;
+ ResPacket alphav = pset1<ResPacket>(alpha);
+ R0 = r0.loadPacket(0 * Traits::ResPacketSize);
+ traits.acc(C0, alphav, R0);
+ r0.storePacket(0 * Traits::ResPacketSize, R0);
+ }
+ }
+ }
+ if(peeled_mc1<rows)
+ {
+ for(Index j2=0; j2<packet_cols4; j2+=nr)
+ {
+ for(Index i=peeled_mc1; i<rows; i+=1)
+ {
+ const LhsScalar* blA = &blockA[i*strideA+offsetA];
+ prefetch(&blA[0]);
+ const RhsScalar* blB = &blockB[j2*strideB+offsetB*nr];
+ typedef typename unpacket_traits<SResPacket>::half SResPacketHalf;
+ if ((SwappedTraits::LhsProgress % 4) == 0 &&
+ (SwappedTraits::LhsProgress <= 8) &&
+ (SwappedTraits::LhsProgress!=8 || unpacket_traits<SResPacketHalf>::size==nr))
+ {
+ SAccPacket C0, C1, C2, C3;
+ straits.initAcc(C0);
+ straits.initAcc(C1);
+ straits.initAcc(C2);
+ straits.initAcc(C3);
+ const Index spk = (std::max)(1,SwappedTraits::LhsProgress/4);
+ const Index endk = (depth/spk)*spk;
+ const Index endk4 = (depth/(spk*4))*(spk*4);
+ Index k=0;
+ for(; k<endk4; k+=4*spk)
+ {
+ SLhsPacket A0,A1;
+ SRhsPacket B_0,B_1;
+ straits.loadLhsUnaligned(blB+0*SwappedTraits::LhsProgress, A0);
+ straits.loadLhsUnaligned(blB+1*SwappedTraits::LhsProgress, A1);
+ straits.loadRhsQuad(blA+0*spk, B_0);
+ straits.loadRhsQuad(blA+1*spk, B_1);
+ straits.madd(A0,B_0,C0,B_0);
+ straits.madd(A1,B_1,C1,B_1);
+ straits.loadLhsUnaligned(blB+2*SwappedTraits::LhsProgress, A0);
+ straits.loadLhsUnaligned(blB+3*SwappedTraits::LhsProgress, A1);
+ straits.loadRhsQuad(blA+2*spk, B_0);
+ straits.loadRhsQuad(blA+3*spk, B_1);
+ straits.madd(A0,B_0,C2,B_0);
+ straits.madd(A1,B_1,C3,B_1);
+ blB += 4*SwappedTraits::LhsProgress;
+ blA += 4*spk;
+ }
+ C0 = padd(padd(C0,C1),padd(C2,C3));
+ for(; k<endk; k+=spk)
+ {
+ SLhsPacket A0;
+ SRhsPacket B_0;
+ straits.loadLhsUnaligned(blB, A0);
+ straits.loadRhsQuad(blA, B_0);
+ straits.madd(A0,B_0,C0,B_0);
+ blB += SwappedTraits::LhsProgress;
+ blA += spk;
+ }
+ if(SwappedTraits::LhsProgress==8)
+ {
+ typedef typename conditional<SwappedTraits::LhsProgress>=8,typename unpacket_traits<SResPacket>::half,SResPacket>::type SResPacketHalf;
+ typedef typename conditional<SwappedTraits::LhsProgress>=8,typename unpacket_traits<SLhsPacket>::half,SLhsPacket>::type SLhsPacketHalf;
+ typedef typename conditional<SwappedTraits::LhsProgress>=8,typename unpacket_traits<SLhsPacket>::half,SRhsPacket>::type SRhsPacketHalf;
+ typedef typename conditional<SwappedTraits::LhsProgress>=8,typename unpacket_traits<SAccPacket>::half,SAccPacket>::type SAccPacketHalf;
+ SResPacketHalf R = res.template gatherPacket<SResPacketHalf>(i, j2);
+ SResPacketHalf alphav = pset1<SResPacketHalf>(alpha);
+ if(depth-endk>0)
+ {
+ SLhsPacketHalf a0;
+ SRhsPacketHalf b0;
+ straits.loadLhsUnaligned(blB, a0);
+ straits.loadRhs(blA, b0);
+ SAccPacketHalf c0 = predux_downto4(C0);
+ straits.madd(a0,b0,c0,b0);
+ straits.acc(c0, alphav, R);
+ }
+ else
+ {
+ straits.acc(predux_downto4(C0), alphav, R);
+ }
+ res.scatterPacket(i, j2, R);
+ }
+ else
+ {
+ SResPacket R = res.template gatherPacket<SResPacket>(i, j2);
+ SResPacket alphav = pset1<SResPacket>(alpha);
+ straits.acc(C0, alphav, R);
+ res.scatterPacket(i, j2, R);
+ }
+ }
+ else
+ {
+ ResScalar C0(0), C1(0), C2(0), C3(0);
+ for(Index k=0; k<depth; k++)
+ {
+ LhsScalar A0;
+ RhsScalar B_0, B_1;
+ A0 = blA[k];
+ B_0 = blB[0];
+ B_1 = blB[1];
+ CJMADD(cj,A0,B_0,C0, B_0);
+ CJMADD(cj,A0,B_1,C1, B_1);
+ B_0 = blB[2];
+ B_1 = blB[3];
+ CJMADD(cj,A0,B_0,C2, B_0);
+ CJMADD(cj,A0,B_1,C3, B_1);
+ blB += 4;
+ }
+ res(i, j2 + 0) += alpha * C0;
+ res(i, j2 + 1) += alpha * C1;
+ res(i, j2 + 2) += alpha * C2;
+ res(i, j2 + 3) += alpha * C3;
+ }
+ }
+ }
+ for(Index j2=packet_cols4; j2<cols; j2++)
+ {
+ for(Index i=peeled_mc1; i<rows; i+=1)
+ {
+ const LhsScalar* blA = &blockA[i*strideA+offsetA];
+ prefetch(&blA[0]);
+ ResScalar C0(0);
+ const RhsScalar* blB = &blockB[j2*strideB+offsetB];
+ for(Index k=0; k<depth; k++)
+ {
+ LhsScalar A0 = blA[k];
+ RhsScalar B_0 = blB[k];
+ CJMADD(cj, A0, B_0, C0, B_0);
+ }
+ res(i, j2) += alpha * C0;
+ }
+ }
+ }
+ }
+#undef CJMADD
+template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, bool Conjugate, bool PanelMode>
+struct gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, ColMajor, Conjugate, PanelMode>
+{
+ typedef typename DataMapper::LinearMapper LinearMapper;
+ EIGEN_DONT_INLINE void operator()(Scalar* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride=0, Index offset=0);
+};
+template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, bool Conjugate, bool PanelMode>
+EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, ColMajor, Conjugate, PanelMode>
+ ::operator()(Scalar* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride, Index offset)
+{
+ typedef typename packet_traits<Scalar>::type Packet;
+ enum { PacketSize = packet_traits<Scalar>::size };
+ EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK LHS");
+ EIGEN_UNUSED_VARIABLE(stride);
+ EIGEN_UNUSED_VARIABLE(offset);
+ eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
+ eigen_assert( ((Pack1%PacketSize)==0 && Pack1<=4*PacketSize) || (Pack1<=4) );
+ conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
+ Index count = 0;
+ const Index peeled_mc3 = Pack1>=3*PacketSize ? (rows/(3*PacketSize))*(3*PacketSize) : 0;
+ const Index peeled_mc2 = Pack1>=2*PacketSize ? peeled_mc3+((rows-peeled_mc3)/(2*PacketSize))*(2*PacketSize) : 0;
+ const Index peeled_mc1 = Pack1>=1*PacketSize ? (rows/(1*PacketSize))*(1*PacketSize) : 0;
+ const Index peeled_mc0 = Pack2>=1*PacketSize ? peeled_mc1
+ : Pack2>1 ? (rows/Pack2)*Pack2 : 0;
+ Index i=0;
+ if(Pack1>=3*PacketSize)
+ {
+ for(; i<peeled_mc3; i+=3*PacketSize)
+ {
+ if(PanelMode) count += (3*PacketSize) * offset;
+ for(Index k=0; k<depth; k++)
+ {
+ Packet A, B, C;
+ A = lhs.loadPacket(i+0*PacketSize, k);
+ B = lhs.loadPacket(i+1*PacketSize, k);
+ C = lhs.loadPacket(i+2*PacketSize, k);
+ pstore(blockA+count, cj.pconj(A)); count+=PacketSize;
+ pstore(blockA+count, cj.pconj(B)); count+=PacketSize;
+ pstore(blockA+count, cj.pconj(C)); count+=PacketSize;
+ }
+ if(PanelMode) count += (3*PacketSize) * (stride-offset-depth);
+ }
+ }
+ if(Pack1>=2*PacketSize)
+ {
+ for(; i<peeled_mc2; i+=2*PacketSize)
+ {
+ if(PanelMode) count += (2*PacketSize) * offset;
+ for(Index k=0; k<depth; k++)
+ {
+ Packet A, B;
+ A = lhs.loadPacket(i+0*PacketSize, k);
+ B = lhs.loadPacket(i+1*PacketSize, k);
+ pstore(blockA+count, cj.pconj(A)); count+=PacketSize;
+ pstore(blockA+count, cj.pconj(B)); count+=PacketSize;
+ }
+ if(PanelMode) count += (2*PacketSize) * (stride-offset-depth);
+ }
+ }
+ if(Pack1>=1*PacketSize)
+ {
+ for(; i<peeled_mc1; i+=1*PacketSize)
+ {
+ if(PanelMode) count += (1*PacketSize) * offset;
+ for(Index k=0; k<depth; k++)
+ {
+ Packet A;
+ A = lhs.loadPacket(i+0*PacketSize, k);
+ pstore(blockA+count, cj.pconj(A));
+ count+=PacketSize;
+ }
+ if(PanelMode) count += (1*PacketSize) * (stride-offset-depth);
+ }
+ }
+ if(Pack2<PacketSize && Pack2>1)
+ {
+ for(; i<peeled_mc0; i+=Pack2)
+ {
+ if(PanelMode) count += Pack2 * offset;
+ for(Index k=0; k<depth; k++)
+ for(Index w=0; w<Pack2; w++)
+ blockA[count++] = cj(lhs(i+w, k));
+ if(PanelMode) count += Pack2 * (stride-offset-depth);
+ }
+ }
+ for(; i<rows; i++)
+ {
+ if(PanelMode) count += offset;
+ for(Index k=0; k<depth; k++)
+ blockA[count++] = cj(lhs(i, k));
+ if(PanelMode) count += (stride-offset-depth);
+ }
+}
+template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, bool Conjugate, bool PanelMode>
+struct gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, RowMajor, Conjugate, PanelMode>
+{
+ typedef typename DataMapper::LinearMapper LinearMapper;
+ EIGEN_DONT_INLINE void operator()(Scalar* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride=0, Index offset=0);
+};
+template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, bool Conjugate, bool PanelMode>
+EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, RowMajor, Conjugate, PanelMode>
+ ::operator()(Scalar* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride, Index offset)
+{
+ typedef typename packet_traits<Scalar>::type Packet;
+ enum { PacketSize = packet_traits<Scalar>::size };
+ EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK LHS");
+ EIGEN_UNUSED_VARIABLE(stride);
+ EIGEN_UNUSED_VARIABLE(offset);
+ eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
+ conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
+ Index count = 0;
+ int pack = Pack1;
+ Index i = 0;
+ while(pack>0)
+ {
+ Index remaining_rows = rows-i;
+ Index peeled_mc = i+(remaining_rows/pack)*pack;
+ for(; i<peeled_mc; i+=pack)
+ {
+ if(PanelMode) count += pack * offset;
+ const Index peeled_k = (depth/PacketSize)*PacketSize;
+ Index k=0;
+ if(pack>=PacketSize)
+ {
+ for(; k<peeled_k; k+=PacketSize)
+ {
+ for (Index m = 0; m < pack; m += PacketSize)
+ {
+ PacketBlock<Packet> kernel;
+ for (int p = 0; p < PacketSize; ++p) kernel.packet[p] = lhs.loadPacket(i+p+m, k);
+ ptranspose(kernel);
+ for (int p = 0; p < PacketSize; ++p) pstore(blockA+count+m+(pack)*p, cj.pconj(kernel.packet[p]));
+ }
+ count += PacketSize*pack;
+ }
+ }
+ for(; k<depth; k++)
+ {
+ Index w=0;
+ for(; w<pack-3; w+=4)
+ {
+ Scalar a(cj(lhs(i+w+0, k))),
+ b(cj(lhs(i+w+1, k))),
+ c(cj(lhs(i+w+2, k))),
+ d(cj(lhs(i+w+3, k)));
+ blockA[count++] = a;
+ blockA[count++] = b;
+ blockA[count++] = c;
+ blockA[count++] = d;
+ }
+ if(pack%4)
+ for(;w<pack;++w)
+ blockA[count++] = cj(lhs(i+w, k));
+ }
+ if(PanelMode) count += pack * (stride-offset-depth);
+ }
+ pack -= PacketSize;
+ if(pack<Pack2 && (pack+PacketSize)!=Pack2)
+ pack = Pack2;
+ }
+ for(; i<rows; i++)
+ {
+ if(PanelMode) count += offset;
+ for(Index k=0; k<depth; k++)
+ blockA[count++] = cj(lhs(i, k));
+ if(PanelMode) count += (stride-offset-depth);
+ }
+}
+template<typename Scalar, typename Index, typename DataMapper, int nr, bool Conjugate, bool PanelMode>
+struct gemm_pack_rhs<Scalar, Index, DataMapper, nr, ColMajor, Conjugate, PanelMode>
+{
+ typedef typename packet_traits<Scalar>::type Packet;
+ typedef typename DataMapper::LinearMapper LinearMapper;
+ enum { PacketSize = packet_traits<Scalar>::size };
+ EIGEN_DONT_INLINE void operator()(Scalar* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride=0, Index offset=0);
+};
+template<typename Scalar, typename Index, typename DataMapper, int nr, bool Conjugate, bool PanelMode>
+EIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, DataMapper, nr, ColMajor, Conjugate, PanelMode>
+ ::operator()(Scalar* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride, Index offset)
+{
+ EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS COLMAJOR");
+ EIGEN_UNUSED_VARIABLE(stride);
+ EIGEN_UNUSED_VARIABLE(offset);
+ eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
+ conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
+ Index packet_cols8 = nr>=8 ? (cols/8) * 8 : 0;
+ Index packet_cols4 = nr>=4 ? (cols/4) * 4 : 0;
+ Index count = 0;
+ const Index peeled_k = (depth/PacketSize)*PacketSize;
+ if(nr>=4)
+ {
+ for(Index j2=packet_cols8; j2<packet_cols4; j2+=4)
+ {
+ if(PanelMode) count += 4 * offset;
+ const LinearMapper dm0 = rhs.getLinearMapper(0, j2 + 0);
+ const LinearMapper dm1 = rhs.getLinearMapper(0, j2 + 1);
+ const LinearMapper dm2 = rhs.getLinearMapper(0, j2 + 2);
+ const LinearMapper dm3 = rhs.getLinearMapper(0, j2 + 3);
+ Index k=0;
+ if((PacketSize%4)==0)
+ {
+ for(; k<peeled_k; k+=PacketSize) {
+ PacketBlock<Packet,(PacketSize%4)==0?4:PacketSize> kernel;
+ kernel.packet[0] = dm0.loadPacket(k);
+ kernel.packet[1%PacketSize] = dm1.loadPacket(k);
+ kernel.packet[2%PacketSize] = dm2.loadPacket(k);
+ kernel.packet[3%PacketSize] = dm3.loadPacket(k);
+ ptranspose(kernel);
+ pstoreu(blockB+count+0*PacketSize, cj.pconj(kernel.packet[0]));
+ pstoreu(blockB+count+1*PacketSize, cj.pconj(kernel.packet[1%PacketSize]));
+ pstoreu(blockB+count+2*PacketSize, cj.pconj(kernel.packet[2%PacketSize]));
+ pstoreu(blockB+count+3*PacketSize, cj.pconj(kernel.packet[3%PacketSize]));
+ count+=4*PacketSize;
+ }
+ }
+ for(; k<depth; k++)
+ {
+ blockB[count+0] = cj(dm0(k));
+ blockB[count+1] = cj(dm1(k));
+ blockB[count+2] = cj(dm2(k));
+ blockB[count+3] = cj(dm3(k));
+ count += 4;
+ }
+ if(PanelMode) count += 4 * (stride-offset-depth);
+ }
+ }
+ for(Index j2=packet_cols4; j2<cols; ++j2)
+ {
+ if(PanelMode) count += offset;
+ const LinearMapper dm0 = rhs.getLinearMapper(0, j2);
+ for(Index k=0; k<depth; k++)
+ {
+ blockB[count] = cj(dm0(k));
+ count += 1;
+ }
+ if(PanelMode) count += (stride-offset-depth);
+ }
+}
+template<typename Scalar, typename Index, typename DataMapper, int nr, bool Conjugate, bool PanelMode>
+struct gemm_pack_rhs<Scalar, Index, DataMapper, nr, RowMajor, Conjugate, PanelMode>
+{
+ typedef typename packet_traits<Scalar>::type Packet;
+ typedef typename DataMapper::LinearMapper LinearMapper;
+ enum { PacketSize = packet_traits<Scalar>::size };
+ EIGEN_DONT_INLINE void operator()(Scalar* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride=0, Index offset=0);
+};
+template<typename Scalar, typename Index, typename DataMapper, int nr, bool Conjugate, bool PanelMode>
+EIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, DataMapper, nr, RowMajor, Conjugate, PanelMode>
+ ::operator()(Scalar* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride, Index offset)
+{
+ EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS ROWMAJOR");
+ EIGEN_UNUSED_VARIABLE(stride);
+ EIGEN_UNUSED_VARIABLE(offset);
+ eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
+ conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
+ Index packet_cols8 = nr>=8 ? (cols/8) * 8 : 0;
+ Index packet_cols4 = nr>=4 ? (cols/4) * 4 : 0;
+ Index count = 0;
+ if(nr>=4)
+ {
+ for(Index j2=packet_cols8; j2<packet_cols4; j2+=4)
+ {
+ if(PanelMode) count += 4 * offset;
+ for(Index k=0; k<depth; k++)
+ {
+ if (PacketSize==4) {
+ Packet A = rhs.loadPacket(k, j2);
+ pstoreu(blockB+count, cj.pconj(A));
+ count += PacketSize;
+ } else {
+ const LinearMapper dm0 = rhs.getLinearMapper(k, j2);
+ blockB[count+0] = cj(dm0(0));
+ blockB[count+1] = cj(dm0(1));
+ blockB[count+2] = cj(dm0(2));
+ blockB[count+3] = cj(dm0(3));
+ count += 4;
+ }
+ }
+ if(PanelMode) count += 4 * (stride-offset-depth);
+ }
+ }
+ for(Index j2=packet_cols4; j2<cols; ++j2)
+ {
+ if(PanelMode) count += offset;
+ for(Index k=0; k<depth; k++)
+ {
+ blockB[count] = cj(rhs(k, j2));
+ count += 1;
+ }
+ if(PanelMode) count += stride-offset-depth;
+ }
+}
+}
+inline std::ptrdiff_t l1CacheSize()
+{
+ std::ptrdiff_t l1, l2, l3;
+ internal::manage_caching_sizes(GetAction, &l1, &l2, &l3);
+ return l1;
+}
+inline std::ptrdiff_t l2CacheSize()
+{
+ std::ptrdiff_t l1, l2, l3;
+ internal::manage_caching_sizes(GetAction, &l1, &l2, &l3);
+ return l2;
+}
+inline std::ptrdiff_t l3CacheSize()
+{
+ std::ptrdiff_t l1, l2, l3;
+ internal::manage_caching_sizes(GetAction, &l1, &l2, &l3);
+ return l3;
+}
+inline void setCpuCacheSizes(std::ptrdiff_t l1, std::ptrdiff_t l2, std::ptrdiff_t l3)
+{
+ internal::manage_caching_sizes(SetAction, &l1, &l2, &l3);
+}
+}
+#endif
+// end #include "src/Core/products/GeneralBlockPanelKernel.h"
+// #include "src/Core/products/Parallelizer.h"
+#ifndef EIGEN_PARALLELIZER_H
+#define EIGEN_PARALLELIZER_H
+#include <atomic>
+namespace Eigen {
+namespace internal {
+inline void manage_multi_threading(Action action, int* v)
+{
+ static EIGEN_UNUSED int m_maxThreads = -1;
+ if(action==SetAction)
+ {
+ eigen_internal_assert(v!=0);
+ m_maxThreads = *v;
+ }
+ else if(action==GetAction)
+ {
+ eigen_internal_assert(v!=0);
+ #ifdef EIGEN_HAS_OPENMP
+ if(m_maxThreads>0)
+ *v = m_maxThreads;
+ else
+ *v = omp_get_max_threads();
+ #else
+ *v = 1;
+ #endif
+ }
+ else
+ {
+ eigen_internal_assert(false);
+ }
+}
+}
+inline void initParallel()
+{
+ int nbt;
+ internal::manage_multi_threading(GetAction, &nbt);
+ std::ptrdiff_t l1, l2, l3;
+ internal::manage_caching_sizes(GetAction, &l1, &l2, &l3);
+}
+inline int nbThreads()
+{
+ int ret;
+ internal::manage_multi_threading(GetAction, &ret);
+ return ret;
+}
+inline void setNbThreads(int v)
+{
+ internal::manage_multi_threading(SetAction, &v);
+}
+namespace internal {
+template<typename Index> struct GemmParallelInfo
+{
+ GemmParallelInfo() : sync(-1), users(0), lhs_start(0), lhs_length(0) {}
+ std::atomic<Index> sync;
+ std::atomic<int> users;
+ Index lhs_start;
+ Index lhs_length;
+};
+template<bool Condition, typename Functor, typename Index>
+void parallelize_gemm(const Functor& func, Index rows, Index cols, Index depth, bool transpose)
+{
+#if !(defined (EIGEN_HAS_OPENMP)) || defined (EIGEN_USE_BLAS)
+ EIGEN_UNUSED_VARIABLE(depth);
+ EIGEN_UNUSED_VARIABLE(transpose);
+ func(0,rows, 0,cols);
+#else
+ Index size = transpose ? rows : cols;
+ Index pb_max_threads = std::max<Index>(1,size / Functor::Traits::nr);
+ double work = static_cast<double>(rows) * static_cast<double>(cols) *
+ static_cast<double>(depth);
+ double kMinTaskSize = 50000;
+ pb_max_threads = std::max<Index>(1, std::min<Index>(pb_max_threads, work / kMinTaskSize));
+ Index threads = std::min<Index>(nbThreads(), pb_max_threads);
+ if((!Condition) || (threads==1) || (omp_get_num_threads()>1))
+ return func(0,rows, 0,cols);
+ Eigen::initParallel();
+ func.initParallelSession(threads);
+ if(transpose)
+ std::swap(rows,cols);
+ ei_declare_aligned_stack_constructed_variable(GemmParallelInfo<Index>,info,threads,0);
+ #pragma omp parallel num_threads(threads)
+ {
+ Index i = omp_get_thread_num();
+ Index actual_threads = omp_get_num_threads();
+ Index blockCols = (cols / actual_threads) & ~Index(0x3);
+ Index blockRows = (rows / actual_threads);
+ blockRows = (blockRows/Functor::Traits::mr)*Functor::Traits::mr;
+ Index r0 = i*blockRows;
+ Index actualBlockRows = (i+1==actual_threads) ? rows-r0 : blockRows;
+ Index c0 = i*blockCols;
+ Index actualBlockCols = (i+1==actual_threads) ? cols-c0 : blockCols;
+ info[i].lhs_start = r0;
+ info[i].lhs_length = actualBlockRows;
+ if(transpose) func(c0, actualBlockCols, 0, rows, info);
+ else func(0, rows, c0, actualBlockCols, info);
+ }
+#endif
+}
+}
+}
+#endif
+// end #include "src/Core/products/Parallelizer.h"
+// #include "src/Core/ProductEvaluators.h"
+#ifndef EIGEN_PRODUCTEVALUATORS_H
+#define EIGEN_PRODUCTEVALUATORS_H
+namespace Eigen {
+namespace internal {
+template<typename Lhs, typename Rhs, int Options>
+struct evaluator<Product<Lhs, Rhs, Options> >
+ : public product_evaluator<Product<Lhs, Rhs, Options> >
+{
+ typedef Product<Lhs, Rhs, Options> XprType;
+ typedef product_evaluator<XprType> Base;
+ EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {}
+};
+template<typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1>
+struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
+ const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>,
+ const Product<Lhs, Rhs, DefaultProduct> > >
+{
+ static const bool value = true;
+};
+template<typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1>
+struct evaluator<CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
+ const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>,
+ const Product<Lhs, Rhs, DefaultProduct> > >
+ : public evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1,Lhs,product), Rhs, DefaultProduct> >
+{
+ typedef CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
+ const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>,
+ const Product<Lhs, Rhs, DefaultProduct> > XprType;
+ typedef evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1,Lhs,product), Rhs, DefaultProduct> > Base;
+ EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
+ : Base(xpr.lhs().functor().m_other * xpr.rhs().lhs() * xpr.rhs().rhs())
+ {}
+};
+template<typename Lhs, typename Rhs, int DiagIndex>
+struct evaluator<Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> >
+ : public evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> >
+{
+ typedef Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> XprType;
+ typedef evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> > Base;
+ EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
+ : Base(Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>(
+ Product<Lhs, Rhs, LazyProduct>(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()),
+ xpr.index() ))
+ {}
+};
+template< typename Lhs, typename Rhs,
+ typename LhsShape = typename evaluator_traits<Lhs>::Shape,
+ typename RhsShape = typename evaluator_traits<Rhs>::Shape,
+ int ProductType = internal::product_type<Lhs,Rhs>::value>
+struct generic_product_impl;
+template<typename Lhs, typename Rhs>
+struct evaluator_assume_aliasing<Product<Lhs, Rhs, DefaultProduct> > {
+ static const bool value = true;
+};
+template<typename Lhs, typename Rhs, int Options, int ProductTag, typename LhsShape, typename RhsShape>
+struct product_evaluator<Product<Lhs, Rhs, Options>, ProductTag, LhsShape, RhsShape>
+ : public evaluator<typename Product<Lhs, Rhs, Options>::PlainObject>
+{
+ typedef Product<Lhs, Rhs, Options> XprType;
+ typedef typename XprType::PlainObject PlainObject;
+ typedef evaluator<PlainObject> Base;
+ enum {
+ Flags = Base::Flags | EvalBeforeNestingBit
+ };
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ explicit product_evaluator(const XprType& xpr)
+ : m_result(xpr.rows(), xpr.cols())
+ {
+ ::new (static_cast<Base*>(this)) Base(m_result);
+ generic_product_impl<Lhs, Rhs, LhsShape, RhsShape, ProductTag>::evalTo(m_result, xpr.lhs(), xpr.rhs());
+ }
+protected:
+ PlainObject m_result;
+};
+template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
+struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::assign_op<Scalar,Scalar>, Dense2Dense,
+ typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
+{
+ typedef Product<Lhs,Rhs,Options> SrcXprType;
+ static EIGEN_STRONG_INLINE
+ void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
+ {
+ Index dstRows = src.rows();
+ Index dstCols = src.cols();
+ if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+ dst.resize(dstRows, dstCols);
+ generic_product_impl<Lhs, Rhs>::evalTo(dst, src.lhs(), src.rhs());
+ }
+};
+template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
+struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<Scalar,Scalar>, Dense2Dense,
+ typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
+{
+ typedef Product<Lhs,Rhs,Options> SrcXprType;
+ static EIGEN_STRONG_INLINE
+ void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,Scalar> &)
+ {
+ eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
+ generic_product_impl<Lhs, Rhs>::addTo(dst, src.lhs(), src.rhs());
+ }
+};
+template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
+struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::sub_assign_op<Scalar,Scalar>, Dense2Dense,
+ typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
+{
+ typedef Product<Lhs,Rhs,Options> SrcXprType;
+ static EIGEN_STRONG_INLINE
+ void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,Scalar> &)
+ {
+ eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
+ generic_product_impl<Lhs, Rhs>::subTo(dst, src.lhs(), src.rhs());
+ }
+};
+template< typename DstXprType, typename Lhs, typename Rhs, typename AssignFunc, typename Scalar, typename ScalarBis, typename Plain>
+struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_product_op<ScalarBis,Scalar>, const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>,Plain>,
+ const Product<Lhs,Rhs,DefaultProduct> >, AssignFunc, Dense2Dense>
+{
+ typedef CwiseBinaryOp<internal::scalar_product_op<ScalarBis,Scalar>,
+ const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>,Plain>,
+ const Product<Lhs,Rhs,DefaultProduct> > SrcXprType;
+ static EIGEN_STRONG_INLINE
+ void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func)
+ {
+ call_assignment_no_alias(dst, (src.lhs().functor().m_other * src.rhs().lhs())*src.rhs().rhs(), func);
+ }
+};
+template<typename OtherXpr, typename Lhs, typename Rhs>
+struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_sum_op<typename OtherXpr::Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, const OtherXpr,
+ const Product<Lhs,Rhs,DefaultProduct> >, DenseShape > {
+ static const bool value = true;
+};
+template<typename OtherXpr, typename Lhs, typename Rhs>
+struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_difference_op<typename OtherXpr::Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, const OtherXpr,
+ const Product<Lhs,Rhs,DefaultProduct> >, DenseShape > {
+ static const bool value = true;
+};
+template<typename DstXprType, typename OtherXpr, typename ProductType, typename Func1, typename Func2>
+struct assignment_from_xpr_op_product
+{
+ template<typename SrcXprType, typename InitialFunc>
+ static EIGEN_STRONG_INLINE
+ void run(DstXprType &dst, const SrcXprType &src, const InitialFunc& )
+ {
+ call_assignment_no_alias(dst, src.lhs(), Func1());
+ call_assignment_no_alias(dst, src.rhs(), Func2());
+ }
+};
+#define EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(ASSIGN_OP,BINOP,ASSIGN_OP2) \
+ template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename DstScalar, typename SrcScalar, typename OtherScalar,typename ProdScalar> \
+ struct Assignment<DstXprType, CwiseBinaryOp<internal::BINOP<OtherScalar,ProdScalar>, const OtherXpr, \
+ const Product<Lhs,Rhs,DefaultProduct> >, internal::ASSIGN_OP<DstScalar,SrcScalar>, Dense2Dense> \
+ : assignment_from_xpr_op_product<DstXprType, OtherXpr, Product<Lhs,Rhs,DefaultProduct>, internal::ASSIGN_OP<DstScalar,OtherScalar>, internal::ASSIGN_OP2<DstScalar,ProdScalar> > \
+ {}
+EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(assign_op, scalar_sum_op,add_assign_op);
+EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(add_assign_op,scalar_sum_op,add_assign_op);
+EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(sub_assign_op,scalar_sum_op,sub_assign_op);
+EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(assign_op, scalar_difference_op,sub_assign_op);
+EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(add_assign_op,scalar_difference_op,sub_assign_op);
+EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(sub_assign_op,scalar_difference_op,add_assign_op);
+template<typename Lhs, typename Rhs>
+struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct>
+{
+ template<typename Dst>
+ static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+ {
+ dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
+ }
+ template<typename Dst>
+ static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+ {
+ dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum();
+ }
+ template<typename Dst>
+ static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+ { dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); }
+};
+template<typename Dst, typename Lhs, typename Rhs, typename Func>
+void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&)
+{
+ evaluator<Rhs> rhsEval(rhs);
+ typename nested_eval<Lhs,Rhs::SizeAtCompileTime>::type actual_lhs(lhs);
+ const Index cols = dst.cols();
+ for (Index j=0; j<cols; ++j)
+ func(dst.col(j), rhsEval.coeff(Index(0),j) * actual_lhs);
+}
+template<typename Dst, typename Lhs, typename Rhs, typename Func>
+void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&)
+{
+ evaluator<Lhs> lhsEval(lhs);
+ typename nested_eval<Rhs,Lhs::SizeAtCompileTime>::type actual_rhs(rhs);
+ const Index rows = dst.rows();
+ for (Index i=0; i<rows; ++i)
+ func(dst.row(i), lhsEval.coeff(i,Index(0)) * actual_rhs);
+}
+template<typename Lhs, typename Rhs>
+struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,OuterProduct>
+{
+ template<typename T> struct is_row_major : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {};
+ typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+ struct set { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } };
+ struct add { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } };
+ struct sub { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } };
+ struct adds {
+ Scalar m_scale;
+ explicit adds(const Scalar& s) : m_scale(s) {}
+ template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const {
+ dst.const_cast_derived() += m_scale * src;
+ }
+ };
+ template<typename Dst>
+ static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+ {
+ internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major<Dst>());
+ }
+ template<typename Dst>
+ static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+ {
+ internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major<Dst>());
+ }
+ template<typename Dst>
+ static inline void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+ {
+ internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major<Dst>());
+ }
+ template<typename Dst>
+ static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
+ {
+ internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major<Dst>());
+ }
+};
+template<typename Lhs, typename Rhs, typename Derived>
+struct generic_product_impl_base
+{
+ typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+ template<typename Dst>
+ static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+ { dst.setZero(); scaleAndAddTo(dst, lhs, rhs, Scalar(1)); }
+ template<typename Dst>
+ static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+ { scaleAndAddTo(dst,lhs, rhs, Scalar(1)); }
+ template<typename Dst>
+ static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+ { scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); }
+ template<typename Dst>
+ static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
+ { Derived::scaleAndAddTo(dst,lhs,rhs,alpha); }
+};
+template<typename Lhs, typename Rhs>
+struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct>
+ : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct> >
+{
+ typedef typename nested_eval<Lhs,1>::type LhsNested;
+ typedef typename nested_eval<Rhs,1>::type RhsNested;
+ typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+ enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight };
+ typedef typename internal::remove_all<typename internal::conditional<int(Side)==OnTheRight,LhsNested,RhsNested>::type>::type MatrixType;
+ template<typename Dest>
+ static EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
+ {
+ LhsNested actual_lhs(lhs);
+ RhsNested actual_rhs(rhs);
+ internal::gemv_dense_selector<Side,
+ (int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
+ bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess)
+ >::run(actual_lhs, actual_rhs, dst, alpha);
+ }
+};
+template<typename Lhs, typename Rhs>
+struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode>
+{
+ typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+ template<typename Dst>
+ static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+ {
+ call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::assign_op<typename Dst::Scalar,Scalar>());
+ }
+ template<typename Dst>
+ static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+ {
+ call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op<typename Dst::Scalar,Scalar>());
+ }
+ template<typename Dst>
+ static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+ {
+ call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op<typename Dst::Scalar,Scalar>());
+ }
+};
+template<typename Lhs, typename Rhs>
+struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,LazyCoeffBasedProductMode>
+ : generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode> {};
+template<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
+struct etor_product_coeff_impl;
+template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct etor_product_packet_impl;
+template<typename Lhs, typename Rhs, int ProductTag>
+struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, DenseShape>
+ : evaluator_base<Product<Lhs, Rhs, LazyProduct> >
+{
+ typedef Product<Lhs, Rhs, LazyProduct> XprType;
+ typedef typename XprType::Scalar Scalar;
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ explicit product_evaluator(const XprType& xpr)
+ : m_lhs(xpr.lhs()),
+ m_rhs(xpr.rhs()),
+ m_lhsImpl(m_lhs),
+ m_rhsImpl(m_rhs),
+ m_innerDim(xpr.lhs().cols())
+ {
+ EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost);
+ EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::AddCost);
+ EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+#if 0
+ std::cerr << "LhsOuterStrideBytes= " << LhsOuterStrideBytes << "\n";
+ std::cerr << "RhsOuterStrideBytes= " << RhsOuterStrideBytes << "\n";
+ std::cerr << "LhsAlignment= " << LhsAlignment << "\n";
+ std::cerr << "RhsAlignment= " << RhsAlignment << "\n";
+ std::cerr << "CanVectorizeLhs= " << CanVectorizeLhs << "\n";
+ std::cerr << "CanVectorizeRhs= " << CanVectorizeRhs << "\n";
+ std::cerr << "CanVectorizeInner= " << CanVectorizeInner << "\n";
+ std::cerr << "EvalToRowMajor= " << EvalToRowMajor << "\n";
+ std::cerr << "Alignment= " << Alignment << "\n";
+ std::cerr << "Flags= " << Flags << "\n";
+#endif
+ }
+ typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
+ typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
+ typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
+ typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
+ typedef evaluator<LhsNestedCleaned> LhsEtorType;
+ typedef evaluator<RhsNestedCleaned> RhsEtorType;
+ enum {
+ RowsAtCompileTime = LhsNestedCleaned::RowsAtCompileTime,
+ ColsAtCompileTime = RhsNestedCleaned::ColsAtCompileTime,
+ InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsNestedCleaned::ColsAtCompileTime, RhsNestedCleaned::RowsAtCompileTime),
+ MaxRowsAtCompileTime = LhsNestedCleaned::MaxRowsAtCompileTime,
+ MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime
+ };
+ typedef typename find_best_packet<Scalar,RowsAtCompileTime>::type LhsVecPacketType;
+ typedef typename find_best_packet<Scalar,ColsAtCompileTime>::type RhsVecPacketType;
+ enum {
+ LhsCoeffReadCost = LhsEtorType::CoeffReadCost,
+ RhsCoeffReadCost = RhsEtorType::CoeffReadCost,
+ CoeffReadCost = InnerSize==0 ? NumTraits<Scalar>::ReadCost
+ : InnerSize == Dynamic ? HugeCost
+ : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
+ + (InnerSize - 1) * NumTraits<Scalar>::AddCost,
+ Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
+ LhsFlags = LhsEtorType::Flags,
+ RhsFlags = RhsEtorType::Flags,
+ LhsRowMajor = LhsFlags & RowMajorBit,
+ RhsRowMajor = RhsFlags & RowMajorBit,
+ LhsVecPacketSize = unpacket_traits<LhsVecPacketType>::size,
+ RhsVecPacketSize = unpacket_traits<RhsVecPacketType>::size,
+ LhsAlignment = EIGEN_PLAIN_ENUM_MIN(LhsEtorType::Alignment,LhsVecPacketSize*int(sizeof(typename LhsNestedCleaned::Scalar))),
+ RhsAlignment = EIGEN_PLAIN_ENUM_MIN(RhsEtorType::Alignment,RhsVecPacketSize*int(sizeof(typename RhsNestedCleaned::Scalar))),
+ SameType = is_same<typename LhsNestedCleaned::Scalar,typename RhsNestedCleaned::Scalar>::value,
+ CanVectorizeRhs = bool(RhsRowMajor) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime!=1),
+ CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) && (RowsAtCompileTime!=1),
+ EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
+ : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
+ : (bool(RhsRowMajor) && !CanVectorizeLhs),
+ Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
+ | (EvalToRowMajor ? RowMajorBit : 0)
+ | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0)
+ | (XprType::IsVectorAtCompileTime ? LinearAccessBit : 0),
+ LhsOuterStrideBytes = int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)),
+ RhsOuterStrideBytes = int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)),
+ Alignment = bool(CanVectorizeLhs) ? (LhsOuterStrideBytes<=0 || (int(LhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,LhsAlignment))!=0 ? 0 : LhsAlignment)
+ : bool(CanVectorizeRhs) ? (RhsOuterStrideBytes<=0 || (int(RhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,RhsAlignment))!=0 ? 0 : RhsAlignment)
+ : 0,
+ CanVectorizeInner = SameType
+ && LhsRowMajor
+ && (!RhsRowMajor)
+ && (LhsFlags & RhsFlags & ActualPacketAccessBit)
+ && (InnerSize % packet_traits<Scalar>::size == 0)
+ };
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index row, Index col) const
+ {
+ return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum();
+ }
+ EIGEN_DEVICE_FUNC const CoeffReturnType coeff(Index index) const
+ {
+ const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? 0 : index;
+ const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? index : 0;
+ return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum();
+ }
+ template<int LoadMode, typename PacketType>
+ const PacketType packet(Index row, Index col) const
+ {
+ PacketType res;
+ typedef etor_product_packet_impl<bool(int(Flags)&RowMajorBit) ? RowMajor : ColMajor,
+ Unroll ? int(InnerSize) : Dynamic,
+ LhsEtorType, RhsEtorType, PacketType, LoadMode> PacketImpl;
+ PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res);
+ return res;
+ }
+ template<int LoadMode, typename PacketType>
+ const PacketType packet(Index index) const
+ {
+ const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? 0 : index;
+ const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? index : 0;
+ return packet<LoadMode,PacketType>(row,col);
+ }
+protected:
+ typename internal::add_const_on_value_type<LhsNested>::type m_lhs;
+ typename internal::add_const_on_value_type<RhsNested>::type m_rhs;
+ LhsEtorType m_lhsImpl;
+ RhsEtorType m_rhsImpl;
+ Index m_innerDim;
+};
+template<typename Lhs, typename Rhs>
+struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, LazyCoeffBasedProductMode, DenseShape, DenseShape>
+ : product_evaluator<Product<Lhs, Rhs, LazyProduct>, CoeffBasedProductMode, DenseShape, DenseShape>
+{
+ typedef Product<Lhs, Rhs, DefaultProduct> XprType;
+ typedef Product<Lhs, Rhs, LazyProduct> BaseProduct;
+ typedef product_evaluator<BaseProduct, CoeffBasedProductMode, DenseShape, DenseShape> Base;
+ enum {
+ Flags = Base::Flags | EvalBeforeNestingBit
+ };
+ EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
+ : Base(BaseProduct(xpr.lhs(),xpr.rhs()))
+ {}
+};
+template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
+{
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
+ {
+ etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
+ res = pmadd(pset1<Packet>(lhs.coeff(row, Index(UnrollingIndex-1))), rhs.template packet<LoadMode,Packet>(Index(UnrollingIndex-1), col), res);
+ }
+};
+template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
+{
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
+ {
+ etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
+ res = pmadd(lhs.template packet<LoadMode,Packet>(row, Index(UnrollingIndex-1)), pset1<Packet>(rhs.coeff(Index(UnrollingIndex-1), col)), res);
+ }
+};
+template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode>
+{
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index , Packet &res)
+ {
+ res = pmul(pset1<Packet>(lhs.coeff(row, Index(0))),rhs.template packet<LoadMode,Packet>(Index(0), col));
+ }
+};
+template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode>
+{
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index , Packet &res)
+ {
+ res = pmul(lhs.template packet<LoadMode,Packet>(row, Index(0)), pset1<Packet>(rhs.coeff(Index(0), col)));
+ }
+};
+template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
+{
+ static EIGEN_STRONG_INLINE void run(Index , Index , const Lhs& , const Rhs& , Index , Packet &res)
+ {
+ res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
+ }
+};
+template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
+{
+ static EIGEN_STRONG_INLINE void run(Index , Index , const Lhs& , const Rhs& , Index , Packet &res)
+ {
+ res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
+ }
+};
+template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
+{
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
+ {
+ res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
+ for(Index i = 0; i < innerDim; ++i)
+ res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode,Packet>(i, col), res);
+ }
+};
+template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
+{
+ static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
+ {
+ res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
+ for(Index i = 0; i < innerDim; ++i)
+ res = pmadd(lhs.template packet<LoadMode,Packet>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
+ }
+};
+template<int Mode, bool LhsIsTriangular,
+ typename Lhs, bool LhsIsVector,
+ typename Rhs, bool RhsIsVector>
+struct triangular_product_impl;
+template<typename Lhs, typename Rhs, int ProductTag>
+struct generic_product_impl<Lhs,Rhs,TriangularShape,DenseShape,ProductTag>
+ : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,TriangularShape,DenseShape,ProductTag> >
+{
+ typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+ template<typename Dest>
+ static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
+ {
+ triangular_product_impl<Lhs::Mode,true,typename Lhs::MatrixType,false,Rhs, Rhs::ColsAtCompileTime==1>
+ ::run(dst, lhs.nestedExpression(), rhs, alpha);
+ }
+};
+template<typename Lhs, typename Rhs, int ProductTag>
+struct generic_product_impl<Lhs,Rhs,DenseShape,TriangularShape,ProductTag>
+: generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,TriangularShape,ProductTag> >
+{
+ typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+ template<typename Dest>
+ static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
+ {
+ triangular_product_impl<Rhs::Mode,false,Lhs,Lhs::RowsAtCompileTime==1, typename Rhs::MatrixType, false>::run(dst, lhs, rhs.nestedExpression(), alpha);
+ }
+};
+template <typename Lhs, int LhsMode, bool LhsIsVector,
+ typename Rhs, int RhsMode, bool RhsIsVector>
+struct selfadjoint_product_impl;
+template<typename Lhs, typename Rhs, int ProductTag>
+struct generic_product_impl<Lhs,Rhs,SelfAdjointShape,DenseShape,ProductTag>
+ : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,SelfAdjointShape,DenseShape,ProductTag> >
+{
+ typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+ template<typename Dest>
+ static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
+ {
+ selfadjoint_product_impl<typename Lhs::MatrixType,Lhs::Mode,false,Rhs,0,Rhs::IsVectorAtCompileTime>::run(dst, lhs.nestedExpression(), rhs, alpha);
+ }
+};
+template<typename Lhs, typename Rhs, int ProductTag>
+struct generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag>
+: generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag> >
+{
+ typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+ template<typename Dest>
+ static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
+ {
+ selfadjoint_product_impl<Lhs,0,Lhs::IsVectorAtCompileTime,typename Rhs::MatrixType,Rhs::Mode,false>::run(dst, lhs, rhs.nestedExpression(), alpha);
+ }
+};
+template<typename MatrixType, typename DiagonalType, typename Derived, int ProductOrder>
+struct diagonal_product_evaluator_base
+ : evaluator_base<Derived>
+{
+ typedef typename ScalarBinaryOpTraits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar;
+public:
+ enum {
+ CoeffReadCost = NumTraits<Scalar>::MulCost + evaluator<MatrixType>::CoeffReadCost + evaluator<DiagonalType>::CoeffReadCost,
+ MatrixFlags = evaluator<MatrixType>::Flags,
+ DiagFlags = evaluator<DiagonalType>::Flags,
+ _StorageOrder = MatrixFlags & RowMajorBit ? RowMajor : ColMajor,
+ _ScalarAccessOnDiag = !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft)
+ ||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)),
+ _SameTypes = is_same<typename MatrixType::Scalar, typename DiagonalType::Scalar>::value,
+ _Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))),
+ _LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0,
+ Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0),
+ Alignment = evaluator<MatrixType>::Alignment
+ };
+ diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag)
+ : m_diagImpl(diag), m_matImpl(mat)
+ {
+ EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost);
+ EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const
+ {
+ return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx);
+ }
+protected:
+ template<int LoadMode,typename PacketType>
+ EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::true_type) const
+ {
+ return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col),
+ internal::pset1<PacketType>(m_diagImpl.coeff(id)));
+ }
+ template<int LoadMode,typename PacketType>
+ EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::false_type) const
+ {
+ enum {
+ InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime,
+ DiagonalPacketLoadMode = EIGEN_PLAIN_ENUM_MIN(LoadMode,((InnerSize%16) == 0) ? int(Aligned16) : int(evaluator<DiagonalType>::Alignment))
+ };
+ return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col),
+ m_diagImpl.template packet<DiagonalPacketLoadMode,PacketType>(id));
+ }
+ evaluator<DiagonalType> m_diagImpl;
+ evaluator<MatrixType> m_matImpl;
+};
+template<typename Lhs, typename Rhs, int ProductKind, int ProductTag>
+struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalShape, DenseShape>
+ : diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheLeft>
+{
+ typedef diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheLeft> Base;
+ using Base::m_diagImpl;
+ using Base::m_matImpl;
+ using Base::coeff;
+ typedef typename Base::Scalar Scalar;
+ typedef Product<Lhs, Rhs, ProductKind> XprType;
+ typedef typename XprType::PlainObject PlainObject;
+ enum {
+ StorageOrder = int(Rhs::Flags) & RowMajorBit ? RowMajor : ColMajor
+ };
+ EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
+ : Base(xpr.rhs(), xpr.lhs().diagonal())
+ {
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
+ {
+ return m_diagImpl.coeff(row) * m_matImpl.coeff(row, col);
+ }
+#ifndef __CUDACC__
+ template<int LoadMode,typename PacketType>
+ EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const
+ {
+ return this->template packet_impl<LoadMode,PacketType>(row,col, row,
+ typename internal::conditional<int(StorageOrder)==RowMajor, internal::true_type, internal::false_type>::type());
+ }
+ template<int LoadMode,typename PacketType>
+ EIGEN_STRONG_INLINE PacketType packet(Index idx) const
+ {
+ return packet<LoadMode,PacketType>(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx);
+ }
+#endif
+};
+template<typename Lhs, typename Rhs, int ProductKind, int ProductTag>
+struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape, DiagonalShape>
+ : diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheRight>
+{
+ typedef diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheRight> Base;
+ using Base::m_diagImpl;
+ using Base::m_matImpl;
+ using Base::coeff;
+ typedef typename Base::Scalar Scalar;
+ typedef Product<Lhs, Rhs, ProductKind> XprType;
+ typedef typename XprType::PlainObject PlainObject;
+ enum { StorageOrder = int(Lhs::Flags) & RowMajorBit ? RowMajor : ColMajor };
+ EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
+ : Base(xpr.lhs(), xpr.rhs().diagonal())
+ {
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
+ {
+ return m_matImpl.coeff(row, col) * m_diagImpl.coeff(col);
+ }
+#ifndef __CUDACC__
+ template<int LoadMode,typename PacketType>
+ EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const
+ {
+ return this->template packet_impl<LoadMode,PacketType>(row,col, col,
+ typename internal::conditional<int(StorageOrder)==ColMajor, internal::true_type, internal::false_type>::type());
+ }
+ template<int LoadMode,typename PacketType>
+ EIGEN_STRONG_INLINE PacketType packet(Index idx) const
+ {
+ return packet<LoadMode,PacketType>(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx);
+ }
+#endif
+};
+template<typename ExpressionType, int Side, bool Transposed, typename ExpressionShape>
+struct permutation_matrix_product;
+template<typename ExpressionType, int Side, bool Transposed>
+struct permutation_matrix_product<ExpressionType, Side, Transposed, DenseShape>
+{
+ typedef typename nested_eval<ExpressionType, 1>::type MatrixType;
+ typedef typename remove_all<MatrixType>::type MatrixTypeCleaned;
+ template<typename Dest, typename PermutationType>
+ static inline void run(Dest& dst, const PermutationType& perm, const ExpressionType& xpr)
+ {
+ MatrixType mat(xpr);
+ const Index n = Side==OnTheLeft ? mat.rows() : mat.cols();
+ if(is_same_dense(dst, mat))
+ {
+ Matrix<bool,PermutationType::RowsAtCompileTime,1,0,PermutationType::MaxRowsAtCompileTime> mask(perm.size());
+ mask.fill(false);
+ Index r = 0;
+ while(r < perm.size())
+ {
+ while(r<perm.size() && mask[r]) r++;
+ if(r>=perm.size())
+ break;
+ Index k0 = r++;
+ Index kPrev = k0;
+ mask.coeffRef(k0) = true;
+ for(Index k=perm.indices().coeff(k0); k!=k0; k=perm.indices().coeff(k))
+ {
+ Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>(dst, k)
+ .swap(Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>
+ (dst,((Side==OnTheLeft) ^ Transposed) ? k0 : kPrev));
+ mask.coeffRef(k) = true;
+ kPrev = k;
+ }
+ }
+ }
+ else
+ {
+ for(Index i = 0; i < n; ++i)
+ {
+ Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>
+ (dst, ((Side==OnTheLeft) ^ Transposed) ? perm.indices().coeff(i) : i)
+ =
+ Block<const MatrixTypeCleaned,Side==OnTheLeft ? 1 : MatrixTypeCleaned::RowsAtCompileTime,Side==OnTheRight ? 1 : MatrixTypeCleaned::ColsAtCompileTime>
+ (mat, ((Side==OnTheRight) ^ Transposed) ? perm.indices().coeff(i) : i);
+ }
+ }
+ }
+};
+template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
+struct generic_product_impl<Lhs, Rhs, PermutationShape, MatrixShape, ProductTag>
+{
+ template<typename Dest>
+ static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
+ {
+ permutation_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs);
+ }
+};
+template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
+struct generic_product_impl<Lhs, Rhs, MatrixShape, PermutationShape, ProductTag>
+{
+ template<typename Dest>
+ static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
+ {
+ permutation_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs);
+ }
+};
+template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
+struct generic_product_impl<Inverse<Lhs>, Rhs, PermutationShape, MatrixShape, ProductTag>
+{
+ template<typename Dest>
+ static void evalTo(Dest& dst, const Inverse<Lhs>& lhs, const Rhs& rhs)
+ {
+ permutation_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs);
+ }
+};
+template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
+struct generic_product_impl<Lhs, Inverse<Rhs>, MatrixShape, PermutationShape, ProductTag>
+{
+ template<typename Dest>
+ static void evalTo(Dest& dst, const Lhs& lhs, const Inverse<Rhs>& rhs)
+ {
+ permutation_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs);
+ }
+};
+template<typename ExpressionType, int Side, bool Transposed, typename ExpressionShape>
+struct transposition_matrix_product
+{
+ typedef typename nested_eval<ExpressionType, 1>::type MatrixType;
+ typedef typename remove_all<MatrixType>::type MatrixTypeCleaned;
+ template<typename Dest, typename TranspositionType>
+ static inline void run(Dest& dst, const TranspositionType& tr, const ExpressionType& xpr)
+ {
+ MatrixType mat(xpr);
+ typedef typename TranspositionType::StorageIndex StorageIndex;
+ const Index size = tr.size();
+ StorageIndex j = 0;
+ if(!is_same_dense(dst,mat))
+ dst = mat;
+ for(Index k=(Transposed?size-1:0) ; Transposed?k>=0:k<size ; Transposed?--k:++k)
+ if(Index(j=tr.coeff(k))!=k)
+ {
+ if(Side==OnTheLeft) dst.row(k).swap(dst.row(j));
+ else if(Side==OnTheRight) dst.col(k).swap(dst.col(j));
+ }
+ }
+};
+template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
+struct generic_product_impl<Lhs, Rhs, TranspositionsShape, MatrixShape, ProductTag>
+{
+ template<typename Dest>
+ static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
+ {
+ transposition_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs);
+ }
+};
+template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
+struct generic_product_impl<Lhs, Rhs, MatrixShape, TranspositionsShape, ProductTag>
+{
+ template<typename Dest>
+ static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
+ {
+ transposition_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs);
+ }
+};
+template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
+struct generic_product_impl<Transpose<Lhs>, Rhs, TranspositionsShape, MatrixShape, ProductTag>
+{
+ template<typename Dest>
+ static void evalTo(Dest& dst, const Transpose<Lhs>& lhs, const Rhs& rhs)
+ {
+ transposition_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs);
+ }
+};
+template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
+struct generic_product_impl<Lhs, Transpose<Rhs>, MatrixShape, TranspositionsShape, ProductTag>
+{
+ template<typename Dest>
+ static void evalTo(Dest& dst, const Lhs& lhs, const Transpose<Rhs>& rhs)
+ {
+ transposition_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs);
+ }
+};
+}
+}
+#endif
+// end #include "src/Core/ProductEvaluators.h"
+// #include "src/Core/products/GeneralMatrixVector.h"
+#ifndef EIGEN_GENERAL_MATRIX_VECTOR_H
+#define EIGEN_GENERAL_MATRIX_VECTOR_H
+namespace Eigen {
+namespace internal {
+template<typename Index, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version>
+struct general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>
+{
+ typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
+enum {
+ Vectorizable = packet_traits<LhsScalar>::Vectorizable && packet_traits<RhsScalar>::Vectorizable
+ && int(packet_traits<LhsScalar>::size)==int(packet_traits<RhsScalar>::size),
+ LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
+ RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
+ ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1
+};
+typedef typename packet_traits<LhsScalar>::type _LhsPacket;
+typedef typename packet_traits<RhsScalar>::type _RhsPacket;
+typedef typename packet_traits<ResScalar>::type _ResPacket;
+typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;
+typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
+typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
+EIGEN_DONT_INLINE static void run(
+ Index rows, Index cols,
+ const LhsMapper& lhs,
+ const RhsMapper& rhs,
+ ResScalar* res, Index resIncr,
+ RhsScalar alpha);
+};
+template<typename Index, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version>
+EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>::run(
+ Index rows, Index cols,
+ const LhsMapper& lhs,
+ const RhsMapper& rhs,
+ ResScalar* res, Index resIncr,
+ RhsScalar alpha)
+{
+ EIGEN_UNUSED_VARIABLE(resIncr);
+ eigen_internal_assert(resIncr==1);
+ #ifdef _EIGEN_ACCUMULATE_PACKETS
+ #error _EIGEN_ACCUMULATE_PACKETS has already been defined
+ #endif
+ #define _EIGEN_ACCUMULATE_PACKETS(Alignment0,Alignment13,Alignment2) \
+ pstore(&res[j], \
+ padd(pload<ResPacket>(&res[j]), \
+ padd( \
+ padd(pcj.pmul(lhs0.template load<LhsPacket, Alignment0>(j), ptmp0), \
+ pcj.pmul(lhs1.template load<LhsPacket, Alignment13>(j), ptmp1)), \
+ padd(pcj.pmul(lhs2.template load<LhsPacket, Alignment2>(j), ptmp2), \
+ pcj.pmul(lhs3.template load<LhsPacket, Alignment13>(j), ptmp3)) )))
+ typedef typename LhsMapper::VectorMapper LhsScalars;
+ conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj;
+ conj_helper<LhsPacket,RhsPacket,ConjugateLhs,ConjugateRhs> pcj;
+ if(ConjugateRhs)
+ alpha = numext::conj(alpha);
+ enum { AllAligned = 0, EvenAligned, FirstAligned, NoneAligned };
+ const Index columnsAtOnce = 4;
+ const Index peels = 2;
+ const Index LhsPacketAlignedMask = LhsPacketSize-1;
+ const Index ResPacketAlignedMask = ResPacketSize-1;
+ const Index size = rows;
+ const Index lhsStride = lhs.stride();
+ Index alignedStart = internal::first_default_aligned(res,size);
+ Index alignedSize = ResPacketSize>1 ? alignedStart + ((size-alignedStart) & ~ResPacketAlignedMask) : 0;
+ const Index peeledSize = alignedSize - RhsPacketSize*peels - RhsPacketSize + 1;
+ const Index alignmentStep = LhsPacketSize>1 ? (LhsPacketSize - lhsStride % LhsPacketSize) & LhsPacketAlignedMask : 0;
+ Index alignmentPattern = alignmentStep==0 ? AllAligned
+ : alignmentStep==(LhsPacketSize/2) ? EvenAligned
+ : FirstAligned;
+ const Index lhsAlignmentOffset = lhs.firstAligned(size);
+ Index skipColumns = 0;
+ if( (lhsAlignmentOffset < 0) || (lhsAlignmentOffset == size) || (UIntPtr(res)%sizeof(ResScalar)) )
+ {
+ alignedSize = 0;
+ alignedStart = 0;
+ alignmentPattern = NoneAligned;
+ }
+ else if(LhsPacketSize > 4)
+ {
+ alignmentPattern = NoneAligned;
+ }
+ else if (LhsPacketSize>1)
+ {
+ while (skipColumns<LhsPacketSize &&
+ alignedStart != ((lhsAlignmentOffset + alignmentStep*skipColumns)%LhsPacketSize))
+ ++skipColumns;
+ if (skipColumns==LhsPacketSize)
+ {
+ alignmentPattern = NoneAligned;
+ skipColumns = 0;
+ }
+ else
+ {
+ skipColumns = (std::min)(skipColumns,cols);
+ }
+ }
+ else if(Vectorizable)
+ {
+ alignedStart = 0;
+ alignedSize = size;
+ alignmentPattern = AllAligned;
+ }
+ const Index offset1 = (FirstAligned && alignmentStep==1)?3:1;
+ const Index offset3 = (FirstAligned && alignmentStep==1)?1:3;
+ Index columnBound = ((cols-skipColumns)/columnsAtOnce)*columnsAtOnce + skipColumns;
+ for (Index i=skipColumns; i<columnBound; i+=columnsAtOnce)
+ {
+ RhsPacket ptmp0 = pset1<RhsPacket>(alpha*rhs(i, 0)),
+ ptmp1 = pset1<RhsPacket>(alpha*rhs(i+offset1, 0)),
+ ptmp2 = pset1<RhsPacket>(alpha*rhs(i+2, 0)),
+ ptmp3 = pset1<RhsPacket>(alpha*rhs(i+offset3, 0));
+ const LhsScalars lhs0 = lhs.getVectorMapper(0, i+0), lhs1 = lhs.getVectorMapper(0, i+offset1),
+ lhs2 = lhs.getVectorMapper(0, i+2), lhs3 = lhs.getVectorMapper(0, i+offset3);
+ if (Vectorizable)
+ {
+ for (Index j=0; j<alignedStart; ++j)
+ {
+ res[j] = cj.pmadd(lhs0(j), pfirst(ptmp0), res[j]);
+ res[j] = cj.pmadd(lhs1(j), pfirst(ptmp1), res[j]);
+ res[j] = cj.pmadd(lhs2(j), pfirst(ptmp2), res[j]);
+ res[j] = cj.pmadd(lhs3(j), pfirst(ptmp3), res[j]);
+ }
+ if (alignedSize>alignedStart)
+ {
+ switch(alignmentPattern)
+ {
+ case AllAligned:
+ for (Index j = alignedStart; j<alignedSize; j+=ResPacketSize)
+ _EIGEN_ACCUMULATE_PACKETS(Aligned,Aligned,Aligned);
+ break;
+ case EvenAligned:
+ for (Index j = alignedStart; j<alignedSize; j+=ResPacketSize)
+ _EIGEN_ACCUMULATE_PACKETS(Aligned,Unaligned,Aligned);
+ break;
+ case FirstAligned:
+ {
+ Index j = alignedStart;
+ if(peels>1)
+ {
+ LhsPacket A00, A01, A02, A03, A10, A11, A12, A13;
+ ResPacket T0, T1;
+ A01 = lhs1.template load<LhsPacket, Aligned>(alignedStart-1);
+ A02 = lhs2.template load<LhsPacket, Aligned>(alignedStart-2);
+ A03 = lhs3.template load<LhsPacket, Aligned>(alignedStart-3);
+ for (; j<peeledSize; j+=peels*ResPacketSize)
+ {
+ A11 = lhs1.template load<LhsPacket, Aligned>(j-1+LhsPacketSize); palign<1>(A01,A11);
+ A12 = lhs2.template load<LhsPacket, Aligned>(j-2+LhsPacketSize); palign<2>(A02,A12);
+ A13 = lhs3.template load<LhsPacket, Aligned>(j-3+LhsPacketSize); palign<3>(A03,A13);
+ A00 = lhs0.template load<LhsPacket, Aligned>(j);
+ A10 = lhs0.template load<LhsPacket, Aligned>(j+LhsPacketSize);
+ T0 = pcj.pmadd(A00, ptmp0, pload<ResPacket>(&res[j]));
+ T1 = pcj.pmadd(A10, ptmp0, pload<ResPacket>(&res[j+ResPacketSize]));
+ T0 = pcj.pmadd(A01, ptmp1, T0);
+ A01 = lhs1.template load<LhsPacket, Aligned>(j-1+2*LhsPacketSize); palign<1>(A11,A01);
+ T0 = pcj.pmadd(A02, ptmp2, T0);
+ A02 = lhs2.template load<LhsPacket, Aligned>(j-2+2*LhsPacketSize); palign<2>(A12,A02);
+ T0 = pcj.pmadd(A03, ptmp3, T0);
+ pstore(&res[j],T0);
+ A03 = lhs3.template load<LhsPacket, Aligned>(j-3+2*LhsPacketSize); palign<3>(A13,A03);
+ T1 = pcj.pmadd(A11, ptmp1, T1);
+ T1 = pcj.pmadd(A12, ptmp2, T1);
+ T1 = pcj.pmadd(A13, ptmp3, T1);
+ pstore(&res[j+ResPacketSize],T1);
+ }
+ }
+ for (; j<alignedSize; j+=ResPacketSize)
+ _EIGEN_ACCUMULATE_PACKETS(Aligned,Unaligned,Unaligned);
+ break;
+ }
+ default:
+ for (Index j = alignedStart; j<alignedSize; j+=ResPacketSize)
+ _EIGEN_ACCUMULATE_PACKETS(Unaligned,Unaligned,Unaligned);
+ break;
+ }
+ }
+ }
+ for (Index j=alignedSize; j<size; ++j)
+ {
+ res[j] = cj.pmadd(lhs0(j), pfirst(ptmp0), res[j]);
+ res[j] = cj.pmadd(lhs1(j), pfirst(ptmp1), res[j]);
+ res[j] = cj.pmadd(lhs2(j), pfirst(ptmp2), res[j]);
+ res[j] = cj.pmadd(lhs3(j), pfirst(ptmp3), res[j]);
+ }
+ }
+ Index end = cols;
+ Index start = columnBound;
+ do
+ {
+ for (Index k=start; k<end; ++k)
+ {
+ RhsPacket ptmp0 = pset1<RhsPacket>(alpha*rhs(k, 0));
+ const LhsScalars lhs0 = lhs.getVectorMapper(0, k);
+ if (Vectorizable)
+ {
+ for (Index j=0; j<alignedStart; ++j)
+ res[j] += cj.pmul(lhs0(j), pfirst(ptmp0));
+ if (lhs0.template aligned<LhsPacket>(alignedStart))
+ for (Index i = alignedStart;i<alignedSize;i+=ResPacketSize)
+ pstore(&res[i], pcj.pmadd(lhs0.template load<LhsPacket, Aligned>(i), ptmp0, pload<ResPacket>(&res[i])));
+ else
+ for (Index i = alignedStart;i<alignedSize;i+=ResPacketSize)
+ pstore(&res[i], pcj.pmadd(lhs0.template load<LhsPacket, Unaligned>(i), ptmp0, pload<ResPacket>(&res[i])));
+ }
+ for (Index i=alignedSize; i<size; ++i)
+ res[i] += cj.pmul(lhs0(i), pfirst(ptmp0));
+ }
+ if (skipColumns)
+ {
+ start = 0;
+ end = skipColumns;
+ skipColumns = 0;
+ }
+ else
+ break;
+ } while(Vectorizable);
+ #undef _EIGEN_ACCUMULATE_PACKETS
+}
+template<typename Index, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version>
+struct general_matrix_vector_product<Index,LhsScalar,LhsMapper,RowMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>
+{
+typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
+enum {
+ Vectorizable = packet_traits<LhsScalar>::Vectorizable && packet_traits<RhsScalar>::Vectorizable
+ && int(packet_traits<LhsScalar>::size)==int(packet_traits<RhsScalar>::size),
+ LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
+ RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
+ ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1
+};
+typedef typename packet_traits<LhsScalar>::type _LhsPacket;
+typedef typename packet_traits<RhsScalar>::type _RhsPacket;
+typedef typename packet_traits<ResScalar>::type _ResPacket;
+typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;
+typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
+typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
+EIGEN_DONT_INLINE static void run(
+ Index rows, Index cols,
+ const LhsMapper& lhs,
+ const RhsMapper& rhs,
+ ResScalar* res, Index resIncr,
+ ResScalar alpha);
+};
+template<typename Index, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version>
+EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,RowMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>::run(
+ Index rows, Index cols,
+ const LhsMapper& lhs,
+ const RhsMapper& rhs,
+ ResScalar* res, Index resIncr,
+ ResScalar alpha)
+{
+ eigen_internal_assert(rhs.stride()==1);
+ #ifdef _EIGEN_ACCUMULATE_PACKETS
+ #error _EIGEN_ACCUMULATE_PACKETS has already been defined
+ #endif
+ #define _EIGEN_ACCUMULATE_PACKETS(Alignment0,Alignment13,Alignment2) {\
+ RhsPacket b = rhs.getVectorMapper(j, 0).template load<RhsPacket, Aligned>(0); \
+ ptmp0 = pcj.pmadd(lhs0.template load<LhsPacket, Alignment0>(j), b, ptmp0); \
+ ptmp1 = pcj.pmadd(lhs1.template load<LhsPacket, Alignment13>(j), b, ptmp1); \
+ ptmp2 = pcj.pmadd(lhs2.template load<LhsPacket, Alignment2>(j), b, ptmp2); \
+ ptmp3 = pcj.pmadd(lhs3.template load<LhsPacket, Alignment13>(j), b, ptmp3); }
+ conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj;
+ conj_helper<LhsPacket,RhsPacket,ConjugateLhs,ConjugateRhs> pcj;
+ typedef typename LhsMapper::VectorMapper LhsScalars;
+ enum { AllAligned=0, EvenAligned=1, FirstAligned=2, NoneAligned=3 };
+ const Index rowsAtOnce = 4;
+ const Index peels = 2;
+ const Index RhsPacketAlignedMask = RhsPacketSize-1;
+ const Index LhsPacketAlignedMask = LhsPacketSize-1;
+ const Index depth = cols;
+ const Index lhsStride = lhs.stride();
+ Index alignedStart = rhs.firstAligned(depth);
+ Index alignedSize = RhsPacketSize>1 ? alignedStart + ((depth-alignedStart) & ~RhsPacketAlignedMask) : 0;
+ const Index peeledSize = alignedSize - RhsPacketSize*peels - RhsPacketSize + 1;
+ const Index alignmentStep = LhsPacketSize>1 ? (LhsPacketSize - lhsStride % LhsPacketSize) & LhsPacketAlignedMask : 0;
+ Index alignmentPattern = alignmentStep==0 ? AllAligned
+ : alignmentStep==(LhsPacketSize/2) ? EvenAligned
+ : FirstAligned;
+ const Index lhsAlignmentOffset = lhs.firstAligned(depth);
+ const Index rhsAlignmentOffset = rhs.firstAligned(rows);
+ Index skipRows = 0;
+ if( (sizeof(LhsScalar)!=sizeof(RhsScalar)) ||
+ (lhsAlignmentOffset < 0) || (lhsAlignmentOffset == depth) ||
+ (rhsAlignmentOffset < 0) || (rhsAlignmentOffset == rows) )
+ {
+ alignedSize = 0;
+ alignedStart = 0;
+ alignmentPattern = NoneAligned;
+ }
+ else if(LhsPacketSize > 4)
+ {
+ alignmentPattern = NoneAligned;
+ }
+ else if (LhsPacketSize>1)
+ {
+ while (skipRows<LhsPacketSize &&
+ alignedStart != ((lhsAlignmentOffset + alignmentStep*skipRows)%LhsPacketSize))
+ ++skipRows;
+ if (skipRows==LhsPacketSize)
+ {
+ alignmentPattern = NoneAligned;
+ skipRows = 0;
+ }
+ else
+ {
+ skipRows = (std::min)(skipRows,Index(rows));
+ }
+ }
+ else if(Vectorizable)
+ {
+ alignedStart = 0;
+ alignedSize = depth;
+ alignmentPattern = AllAligned;
+ }
+ const Index offset1 = (FirstAligned && alignmentStep==1)?3:1;
+ const Index offset3 = (FirstAligned && alignmentStep==1)?1:3;
+ Index rowBound = ((rows-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows;
+ for (Index i=skipRows; i<rowBound; i+=rowsAtOnce)
+ {
+ EIGEN_ALIGN_MAX ResScalar tmp0 = ResScalar(0);
+ ResScalar tmp1 = ResScalar(0), tmp2 = ResScalar(0), tmp3 = ResScalar(0);
+ const LhsScalars lhs0 = lhs.getVectorMapper(i+0, 0), lhs1 = lhs.getVectorMapper(i+offset1, 0),
+ lhs2 = lhs.getVectorMapper(i+2, 0), lhs3 = lhs.getVectorMapper(i+offset3, 0);
+ if (Vectorizable)
+ {
+ ResPacket ptmp0 = pset1<ResPacket>(ResScalar(0)), ptmp1 = pset1<ResPacket>(ResScalar(0)),
+ ptmp2 = pset1<ResPacket>(ResScalar(0)), ptmp3 = pset1<ResPacket>(ResScalar(0));
+ for (Index j=0; j<alignedStart; ++j)
+ {
+ RhsScalar b = rhs(j, 0);
+ tmp0 += cj.pmul(lhs0(j),b); tmp1 += cj.pmul(lhs1(j),b);
+ tmp2 += cj.pmul(lhs2(j),b); tmp3 += cj.pmul(lhs3(j),b);
+ }
+ if (alignedSize>alignedStart)
+ {
+ switch(alignmentPattern)
+ {
+ case AllAligned:
+ for (Index j = alignedStart; j<alignedSize; j+=RhsPacketSize)
+ _EIGEN_ACCUMULATE_PACKETS(Aligned,Aligned,Aligned);
+ break;
+ case EvenAligned:
+ for (Index j = alignedStart; j<alignedSize; j+=RhsPacketSize)
+ _EIGEN_ACCUMULATE_PACKETS(Aligned,Unaligned,Aligned);
+ break;
+ case FirstAligned:
+ {
+ Index j = alignedStart;
+ if (peels>1)
+ {
+ LhsPacket A01, A02, A03, A11, A12, A13;
+ A01 = lhs1.template load<LhsPacket, Aligned>(alignedStart-1);
+ A02 = lhs2.template load<LhsPacket, Aligned>(alignedStart-2);
+ A03 = lhs3.template load<LhsPacket, Aligned>(alignedStart-3);
+ for (; j<peeledSize; j+=peels*RhsPacketSize)
+ {
+ RhsPacket b = rhs.getVectorMapper(j, 0).template load<RhsPacket, Aligned>(0);
+ A11 = lhs1.template load<LhsPacket, Aligned>(j-1+LhsPacketSize); palign<1>(A01,A11);
+ A12 = lhs2.template load<LhsPacket, Aligned>(j-2+LhsPacketSize); palign<2>(A02,A12);
+ A13 = lhs3.template load<LhsPacket, Aligned>(j-3+LhsPacketSize); palign<3>(A03,A13);
+ ptmp0 = pcj.pmadd(lhs0.template load<LhsPacket, Aligned>(j), b, ptmp0);
+ ptmp1 = pcj.pmadd(A01, b, ptmp1);
+ A01 = lhs1.template load<LhsPacket, Aligned>(j-1+2*LhsPacketSize); palign<1>(A11,A01);
+ ptmp2 = pcj.pmadd(A02, b, ptmp2);
+ A02 = lhs2.template load<LhsPacket, Aligned>(j-2+2*LhsPacketSize); palign<2>(A12,A02);
+ ptmp3 = pcj.pmadd(A03, b, ptmp3);
+ A03 = lhs3.template load<LhsPacket, Aligned>(j-3+2*LhsPacketSize); palign<3>(A13,A03);
+ b = rhs.getVectorMapper(j+RhsPacketSize, 0).template load<RhsPacket, Aligned>(0);
+ ptmp0 = pcj.pmadd(lhs0.template load<LhsPacket, Aligned>(j+LhsPacketSize), b, ptmp0);
+ ptmp1 = pcj.pmadd(A11, b, ptmp1);
+ ptmp2 = pcj.pmadd(A12, b, ptmp2);
+ ptmp3 = pcj.pmadd(A13, b, ptmp3);
+ }
+ }
+ for (; j<alignedSize; j+=RhsPacketSize)
+ _EIGEN_ACCUMULATE_PACKETS(Aligned,Unaligned,Unaligned);
+ break;
+ }
+ default:
+ for (Index j = alignedStart; j<alignedSize; j+=RhsPacketSize)
+ _EIGEN_ACCUMULATE_PACKETS(Unaligned,Unaligned,Unaligned);
+ break;
+ }
+ tmp0 += predux(ptmp0);
+ tmp1 += predux(ptmp1);
+ tmp2 += predux(ptmp2);
+ tmp3 += predux(ptmp3);
+ }
+ }
+ for (Index j=alignedSize; j<depth; ++j)
+ {
+ RhsScalar b = rhs(j, 0);
+ tmp0 += cj.pmul(lhs0(j),b); tmp1 += cj.pmul(lhs1(j),b);
+ tmp2 += cj.pmul(lhs2(j),b); tmp3 += cj.pmul(lhs3(j),b);
+ }
+ res[i*resIncr] += alpha*tmp0;
+ res[(i+offset1)*resIncr] += alpha*tmp1;
+ res[(i+2)*resIncr] += alpha*tmp2;
+ res[(i+offset3)*resIncr] += alpha*tmp3;
+ }
+ Index end = rows;
+ Index start = rowBound;
+ do
+ {
+ for (Index i=start; i<end; ++i)
+ {
+ EIGEN_ALIGN_MAX ResScalar tmp0 = ResScalar(0);
+ ResPacket ptmp0 = pset1<ResPacket>(tmp0);
+ const LhsScalars lhs0 = lhs.getVectorMapper(i, 0);
+ for (Index j=0; j<alignedStart; ++j)
+ tmp0 += cj.pmul(lhs0(j), rhs(j, 0));
+ if (alignedSize>alignedStart)
+ {
+ if (lhs0.template aligned<LhsPacket>(alignedStart))
+ for (Index j = alignedStart;j<alignedSize;j+=RhsPacketSize)
+ ptmp0 = pcj.pmadd(lhs0.template load<LhsPacket, Aligned>(j), rhs.getVectorMapper(j, 0).template load<RhsPacket, Aligned>(0), ptmp0);
+ else
+ for (Index j = alignedStart;j<alignedSize;j+=RhsPacketSize)
+ ptmp0 = pcj.pmadd(lhs0.template load<LhsPacket, Unaligned>(j), rhs.getVectorMapper(j, 0).template load<RhsPacket, Aligned>(0), ptmp0);
+ tmp0 += predux(ptmp0);
+ }
+ for (Index j=alignedSize; j<depth; ++j)
+ tmp0 += cj.pmul(lhs0(j), rhs(j, 0));
+ res[i*resIncr] += alpha*tmp0;
+ }
+ if (skipRows)
+ {
+ start = 0;
+ end = skipRows;
+ skipRows = 0;
+ }
+ else
+ break;
+ } while(Vectorizable);
+ #undef _EIGEN_ACCUMULATE_PACKETS
+}
+}
+}
+#endif
+// end #include "src/Core/products/GeneralMatrixVector.h"
+// #include "src/Core/products/GeneralMatrixMatrix.h"
+#ifndef EIGEN_GENERAL_MATRIX_MATRIX_H
+#define EIGEN_GENERAL_MATRIX_MATRIX_H
+namespace Eigen {
+namespace internal {
+template<typename _LhsScalar, typename _RhsScalar> class level3_blocking;
+template<
+ typename Index,
+ typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
+ typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs>
+struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor>
+{
+ typedef gebp_traits<RhsScalar,LhsScalar> Traits;
+ typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
+ static EIGEN_STRONG_INLINE void run(
+ Index rows, Index cols, Index depth,
+ const LhsScalar* lhs, Index lhsStride,
+ const RhsScalar* rhs, Index rhsStride,
+ ResScalar* res, Index resStride,
+ ResScalar alpha,
+ level3_blocking<RhsScalar,LhsScalar>& blocking,
+ GemmParallelInfo<Index>* info = 0)
+ {
+ general_matrix_matrix_product<Index,
+ RhsScalar, RhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateRhs,
+ LhsScalar, LhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateLhs,
+ ColMajor>
+ ::run(cols,rows,depth,rhs,rhsStride,lhs,lhsStride,res,resStride,alpha,blocking,info);
+ }
+};
+template<
+ typename Index,
+ typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
+ typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs>
+struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor>
+{
+typedef gebp_traits<LhsScalar,RhsScalar> Traits;
+typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
+static void run(Index rows, Index cols, Index depth,
+ const LhsScalar* _lhs, Index lhsStride,
+ const RhsScalar* _rhs, Index rhsStride,
+ ResScalar* _res, Index resStride,
+ ResScalar alpha,
+ level3_blocking<LhsScalar,RhsScalar>& blocking,
+ GemmParallelInfo<Index>* info = 0)
+{
+ typedef const_blas_data_mapper<LhsScalar, Index, LhsStorageOrder> LhsMapper;
+ typedef const_blas_data_mapper<RhsScalar, Index, RhsStorageOrder> RhsMapper;
+ typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor> ResMapper;
+ LhsMapper lhs(_lhs,lhsStride);
+ RhsMapper rhs(_rhs,rhsStride);
+ ResMapper res(_res, resStride);
+ Index kc = blocking.kc();
+ Index mc = (std::min)(rows,blocking.mc());
+ Index nc = (std::min)(cols,blocking.nc());
+ gemm_pack_lhs<LhsScalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
+ gemm_pack_rhs<RhsScalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs;
+ gebp_kernel<LhsScalar, RhsScalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp;
+#ifdef EIGEN_HAS_OPENMP
+ if(info)
+ {
+ int tid = omp_get_thread_num();
+ int threads = omp_get_num_threads();
+ LhsScalar* blockA = blocking.blockA();
+ eigen_internal_assert(blockA!=0);
+ std::size_t sizeB = kc*nc;
+ ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, 0);
+ for(Index k=0; k<depth; k+=kc)
+ {
+ const Index actual_kc = (std::min)(k+kc,depth)-k;
+ pack_rhs(blockB, rhs.getSubMapper(k,0), actual_kc, nc);
+ while(info[tid].users!=0) {}
+ info[tid].users += threads;
+ pack_lhs(blockA+info[tid].lhs_start*actual_kc, lhs.getSubMapper(info[tid].lhs_start,k), actual_kc, info[tid].lhs_length);
+ info[tid].sync = k;
+ for(int shift=0; shift<threads; ++shift)
+ {
+ int i = (tid+shift)%threads;
+ if (shift>0) {
+ while(info[i].sync!=k) {
+ }
+ }
+ gebp(res.getSubMapper(info[i].lhs_start, 0), blockA+info[i].lhs_start*actual_kc, blockB, info[i].lhs_length, actual_kc, nc, alpha);
+ }
+ for(Index j=nc; j<cols; j+=nc)
+ {
+ const Index actual_nc = (std::min)(j+nc,cols)-j;
+ pack_rhs(blockB, rhs.getSubMapper(k,j), actual_kc, actual_nc);
+ gebp(res.getSubMapper(0, j), blockA, blockB, rows, actual_kc, actual_nc, alpha);
+ }
+ for(Index i=0; i<threads; ++i)
+ info[i].users -= 1;
+ }
+ }
+ else
+#endif
+ {
+ EIGEN_UNUSED_VARIABLE(info);
+ std::size_t sizeA = kc*mc;
+ std::size_t sizeB = kc*nc;
+ ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, blocking.blockA());
+ ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, blocking.blockB());
+ const bool pack_rhs_once = mc!=rows && kc==depth && nc==cols;
+ for(Index i2=0; i2<rows; i2+=mc)
+ {
+ const Index actual_mc = (std::min)(i2+mc,rows)-i2;
+ for(Index k2=0; k2<depth; k2+=kc)
+ {
+ const Index actual_kc = (std::min)(k2+kc,depth)-k2;
+ pack_lhs(blockA, lhs.getSubMapper(i2,k2), actual_kc, actual_mc);
+ for(Index j2=0; j2<cols; j2+=nc)
+ {
+ const Index actual_nc = (std::min)(j2+nc,cols)-j2;
+ if((!pack_rhs_once) || i2==0)
+ pack_rhs(blockB, rhs.getSubMapper(k2,j2), actual_kc, actual_nc);
+ gebp(res.getSubMapper(i2, j2), blockA, blockB, actual_mc, actual_kc, actual_nc, alpha);
+ }
+ }
+ }
+ }
+}
+};
+template<typename Scalar, typename Index, typename Gemm, typename Lhs, typename Rhs, typename Dest, typename BlockingType>
+struct gemm_functor
+{
+ gemm_functor(const Lhs& lhs, const Rhs& rhs, Dest& dest, const Scalar& actualAlpha, BlockingType& blocking)
+ : m_lhs(lhs), m_rhs(rhs), m_dest(dest), m_actualAlpha(actualAlpha), m_blocking(blocking)
+ {}
+ void initParallelSession(Index num_threads) const
+ {
+ m_blocking.initParallel(m_lhs.rows(), m_rhs.cols(), m_lhs.cols(), num_threads);
+ m_blocking.allocateA();
+ }
+ void operator() (Index row, Index rows, Index col=0, Index cols=-1, GemmParallelInfo<Index>* info=0) const
+ {
+ if(cols==-1)
+ cols = m_rhs.cols();
+ Gemm::run(rows, cols, m_lhs.cols(),
+ &m_lhs.coeffRef(row,0), m_lhs.outerStride(),
+ &m_rhs.coeffRef(0,col), m_rhs.outerStride(),
+ (Scalar*)&(m_dest.coeffRef(row,col)), m_dest.outerStride(),
+ m_actualAlpha, m_blocking, info);
+ }
+ typedef typename Gemm::Traits Traits;
+ protected:
+ const Lhs& m_lhs;
+ const Rhs& m_rhs;
+ Dest& m_dest;
+ Scalar m_actualAlpha;
+ BlockingType& m_blocking;
+};
+template<int StorageOrder, typename LhsScalar, typename RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor=1,
+bool FiniteAtCompileTime = MaxRows!=Dynamic && MaxCols!=Dynamic && MaxDepth != Dynamic> class gemm_blocking_space;
+template<typename _LhsScalar, typename _RhsScalar>
+class level3_blocking
+{
+ typedef _LhsScalar LhsScalar;
+ typedef _RhsScalar RhsScalar;
+ protected:
+ LhsScalar* m_blockA;
+ RhsScalar* m_blockB;
+ Index m_mc;
+ Index m_nc;
+ Index m_kc;
+ public:
+ level3_blocking()
+ : m_blockA(0), m_blockB(0), m_mc(0), m_nc(0), m_kc(0)
+ {}
+ inline Index mc() const { return m_mc; }
+ inline Index nc() const { return m_nc; }
+ inline Index kc() const { return m_kc; }
+ inline LhsScalar* blockA() { return m_blockA; }
+ inline RhsScalar* blockB() { return m_blockB; }
+};
+template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor>
+class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, KcFactor, true >
+ : public level3_blocking<
+ typename conditional<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::type,
+ typename conditional<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::type>
+{
+ enum {
+ Transpose = StorageOrder==RowMajor,
+ ActualRows = Transpose ? MaxCols : MaxRows,
+ ActualCols = Transpose ? MaxRows : MaxCols
+ };
+ typedef typename conditional<Transpose,_RhsScalar,_LhsScalar>::type LhsScalar;
+ typedef typename conditional<Transpose,_LhsScalar,_RhsScalar>::type RhsScalar;
+ typedef gebp_traits<LhsScalar,RhsScalar> Traits;
+ enum {
+ SizeA = ActualRows * MaxDepth,
+ SizeB = ActualCols * MaxDepth
+ };
+#if EIGEN_MAX_STATIC_ALIGN_BYTES >= EIGEN_DEFAULT_ALIGN_BYTES
+ EIGEN_ALIGN_MAX LhsScalar m_staticA[SizeA];
+ EIGEN_ALIGN_MAX RhsScalar m_staticB[SizeB];
+#else
+ EIGEN_ALIGN_MAX char m_staticA[SizeA * sizeof(LhsScalar) + EIGEN_DEFAULT_ALIGN_BYTES-1];
+ EIGEN_ALIGN_MAX char m_staticB[SizeB * sizeof(RhsScalar) + EIGEN_DEFAULT_ALIGN_BYTES-1];
+#endif
+ public:
+ gemm_blocking_space(Index , Index , Index , Index , bool )
+ {
+ this->m_mc = ActualRows;
+ this->m_nc = ActualCols;
+ this->m_kc = MaxDepth;
+#if EIGEN_MAX_STATIC_ALIGN_BYTES >= EIGEN_DEFAULT_ALIGN_BYTES
+ this->m_blockA = m_staticA;
+ this->m_blockB = m_staticB;
+#else
+ this->m_blockA = reinterpret_cast<LhsScalar*>((internal::UIntPtr(m_staticA) + (EIGEN_DEFAULT_ALIGN_BYTES-1)) & ~std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1));
+ this->m_blockB = reinterpret_cast<RhsScalar*>((internal::UIntPtr(m_staticB) + (EIGEN_DEFAULT_ALIGN_BYTES-1)) & ~std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1));
+#endif
+ }
+ void initParallel(Index, Index, Index, Index)
+ {}
+ inline void allocateA() {}
+ inline void allocateB() {}
+ inline void allocateAll() {}
+};
+template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor>
+class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, KcFactor, false>
+ : public level3_blocking<
+ typename conditional<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::type,
+ typename conditional<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::type>
+{
+ enum {
+ Transpose = StorageOrder==RowMajor
+ };
+ typedef typename conditional<Transpose,_RhsScalar,_LhsScalar>::type LhsScalar;
+ typedef typename conditional<Transpose,_LhsScalar,_RhsScalar>::type RhsScalar;
+ typedef gebp_traits<LhsScalar,RhsScalar> Traits;
+ Index m_sizeA;
+ Index m_sizeB;
+ public:
+ gemm_blocking_space(Index rows, Index cols, Index depth, Index num_threads, bool l3_blocking)
+ {
+ this->m_mc = Transpose ? cols : rows;
+ this->m_nc = Transpose ? rows : cols;
+ this->m_kc = depth;
+ if(l3_blocking)
+ {
+ computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, this->m_mc, this->m_nc, num_threads);
+ }
+ else
+ {
+ Index n = this->m_nc;
+ computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, this->m_mc, n, num_threads);
+ }
+ m_sizeA = this->m_mc * this->m_kc;
+ m_sizeB = this->m_kc * this->m_nc;
+ }
+ void initParallel(Index rows, Index cols, Index depth, Index num_threads)
+ {
+ this->m_mc = Transpose ? cols : rows;
+ this->m_nc = Transpose ? rows : cols;
+ this->m_kc = depth;
+ eigen_internal_assert(this->m_blockA==0 && this->m_blockB==0);
+ Index m = this->m_mc;
+ computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, m, this->m_nc, num_threads);
+ m_sizeA = this->m_mc * this->m_kc;
+ m_sizeB = this->m_kc * this->m_nc;
+ }
+ void allocateA()
+ {
+ if(this->m_blockA==0)
+ this->m_blockA = aligned_new<LhsScalar>(m_sizeA);
+ }
+ void allocateB()
+ {
+ if(this->m_blockB==0)
+ this->m_blockB = aligned_new<RhsScalar>(m_sizeB);
+ }
+ void allocateAll()
+ {
+ allocateA();
+ allocateB();
+ }
+ ~gemm_blocking_space()
+ {
+ aligned_delete(this->m_blockA, m_sizeA);
+ aligned_delete(this->m_blockB, m_sizeB);
+ }
+};
+}
+namespace internal {
+template<typename Lhs, typename Rhs>
+struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
+ : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct> >
+{
+ typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+ typedef typename Lhs::Scalar LhsScalar;
+ typedef typename Rhs::Scalar RhsScalar;
+ typedef internal::blas_traits<Lhs> LhsBlasTraits;
+ typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
+ typedef typename internal::remove_all<ActualLhsType>::type ActualLhsTypeCleaned;
+ typedef internal::blas_traits<Rhs> RhsBlasTraits;
+ typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
+ typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;
+ enum {
+ MaxDepthAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(Lhs::MaxColsAtCompileTime,Rhs::MaxRowsAtCompileTime)
+ };
+ typedef generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode> lazyproduct;
+ template<typename Dst>
+ static void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+ {
+ if((rhs.rows()+dst.rows()+dst.cols())<20 && rhs.rows()>0)
+ lazyproduct::evalTo(dst, lhs, rhs);
+ else
+ {
+ dst.setZero();
+ scaleAndAddTo(dst, lhs, rhs, Scalar(1));
+ }
+ }
+ template<typename Dst>
+ static void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+ {
+ if((rhs.rows()+dst.rows()+dst.cols())<20 && rhs.rows()>0)
+ lazyproduct::addTo(dst, lhs, rhs);
+ else
+ scaleAndAddTo(dst,lhs, rhs, Scalar(1));
+ }
+ template<typename Dst>
+ static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+ {
+ if((rhs.rows()+dst.rows()+dst.cols())<20 && rhs.rows()>0)
+ lazyproduct::subTo(dst, lhs, rhs);
+ else
+ scaleAndAddTo(dst, lhs, rhs, Scalar(-1));
+ }
+ template<typename Dest>
+ static void scaleAndAddTo(Dest& dst, const Lhs& a_lhs, const Rhs& a_rhs, const Scalar& alpha)
+ {
+ eigen_assert(dst.rows()==a_lhs.rows() && dst.cols()==a_rhs.cols());
+ if(a_lhs.cols()==0 || a_lhs.rows()==0 || a_rhs.cols()==0)
+ return;
+ typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);
+ typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);
+ Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs)
+ * RhsBlasTraits::extractScalarFactor(a_rhs);
+ typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,LhsScalar,RhsScalar,
+ Dest::MaxRowsAtCompileTime,Dest::MaxColsAtCompileTime,MaxDepthAtCompileTime> BlockingType;
+ typedef internal::gemm_functor<
+ Scalar, Index,
+ internal::general_matrix_matrix_product<
+ Index,
+ LhsScalar, (ActualLhsTypeCleaned::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(LhsBlasTraits::NeedToConjugate),
+ RhsScalar, (ActualRhsTypeCleaned::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(RhsBlasTraits::NeedToConjugate),
+ (Dest::Flags&RowMajorBit) ? RowMajor : ColMajor>,
+ ActualLhsTypeCleaned, ActualRhsTypeCleaned, Dest, BlockingType> GemmFunctor;
+ BlockingType blocking(dst.rows(), dst.cols(), lhs.cols(), 1, true);
+ internal::parallelize_gemm<(Dest::MaxRowsAtCompileTime>32 || Dest::MaxRowsAtCompileTime==Dynamic)>
+ (GemmFunctor(lhs, rhs, dst, actualAlpha, blocking), a_lhs.rows(), a_rhs.cols(), a_lhs.cols(), Dest::Flags&RowMajorBit);
+ }
+};
+}
+}
+#endif
+// end #include "src/Core/products/GeneralMatrixMatrix.h"
+// #include "src/Core/VectorwiseOp.h"
+#ifndef EIGEN_PARTIAL_REDUX_H
+#define EIGEN_PARTIAL_REDUX_H
+namespace Eigen {
+template< typename MatrixType, typename MemberOp, int Direction>
+class PartialReduxExpr;
+namespace internal {
+template<typename MatrixType, typename MemberOp, int Direction>
+struct traits<PartialReduxExpr<MatrixType, MemberOp, Direction> >
+ : traits<MatrixType>
+{
+ typedef typename MemberOp::result_type Scalar;
+ typedef typename traits<MatrixType>::StorageKind StorageKind;
+ typedef typename traits<MatrixType>::XprKind XprKind;
+ typedef typename MatrixType::Scalar InputScalar;
+ enum {
+ RowsAtCompileTime = Direction==Vertical ? 1 : MatrixType::RowsAtCompileTime,
+ ColsAtCompileTime = Direction==Horizontal ? 1 : MatrixType::ColsAtCompileTime,
+ MaxRowsAtCompileTime = Direction==Vertical ? 1 : MatrixType::MaxRowsAtCompileTime,
+ MaxColsAtCompileTime = Direction==Horizontal ? 1 : MatrixType::MaxColsAtCompileTime,
+ Flags = RowsAtCompileTime == 1 ? RowMajorBit : 0,
+ TraversalSize = Direction==Vertical ? MatrixType::RowsAtCompileTime : MatrixType::ColsAtCompileTime
+ };
+};
+}
+template< typename MatrixType, typename MemberOp, int Direction>
+class PartialReduxExpr : public internal::dense_xpr_base< PartialReduxExpr<MatrixType, MemberOp, Direction> >::type,
+ internal::no_assignment_operator
+{
+ public:
+ typedef typename internal::dense_xpr_base<PartialReduxExpr>::type Base;
+ EIGEN_DENSE_PUBLIC_INTERFACE(PartialReduxExpr)
+ EIGEN_DEVICE_FUNC
+ explicit PartialReduxExpr(const MatrixType& mat, const MemberOp& func = MemberOp())
+ : m_matrix(mat), m_functor(func) {}
+ EIGEN_DEVICE_FUNC
+ Index rows() const { return (Direction==Vertical ? 1 : m_matrix.rows()); }
+ EIGEN_DEVICE_FUNC
+ Index cols() const { return (Direction==Horizontal ? 1 : m_matrix.cols()); }
+ EIGEN_DEVICE_FUNC
+ typename MatrixType::Nested nestedExpression() const { return m_matrix; }
+ EIGEN_DEVICE_FUNC
+ const MemberOp& functor() const { return m_functor; }
+ protected:
+ typename MatrixType::Nested m_matrix;
+ const MemberOp m_functor;
+};
+#define EIGEN_MEMBER_FUNCTOR(MEMBER,COST) \
+ template <typename ResultType> \
+ struct member_##MEMBER { \
+ EIGEN_EMPTY_STRUCT_CTOR(member_##MEMBER) \
+ typedef ResultType result_type; \
+ template<typename Scalar, int Size> struct Cost \
+ { enum { value = COST }; }; \
+ template<typename XprType> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+ ResultType operator()(const XprType& mat) const \
+ { return mat.MEMBER(); } \
+ }
+namespace internal {
+EIGEN_MEMBER_FUNCTOR(squaredNorm, Size * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost);
+EIGEN_MEMBER_FUNCTOR(norm, (Size+5) * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost);
+EIGEN_MEMBER_FUNCTOR(stableNorm, (Size+5) * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost);
+EIGEN_MEMBER_FUNCTOR(blueNorm, (Size+5) * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost);
+EIGEN_MEMBER_FUNCTOR(hypotNorm, (Size-1) * functor_traits<scalar_hypot_op<Scalar> >::Cost );
+EIGEN_MEMBER_FUNCTOR(sum, (Size-1)*NumTraits<Scalar>::AddCost);
+EIGEN_MEMBER_FUNCTOR(mean, (Size-1)*NumTraits<Scalar>::AddCost + NumTraits<Scalar>::MulCost);
+EIGEN_MEMBER_FUNCTOR(minCoeff, (Size-1)*NumTraits<Scalar>::AddCost);
+EIGEN_MEMBER_FUNCTOR(maxCoeff, (Size-1)*NumTraits<Scalar>::AddCost);
+EIGEN_MEMBER_FUNCTOR(all, (Size-1)*NumTraits<Scalar>::AddCost);
+EIGEN_MEMBER_FUNCTOR(any, (Size-1)*NumTraits<Scalar>::AddCost);
+EIGEN_MEMBER_FUNCTOR(count, (Size-1)*NumTraits<Scalar>::AddCost);
+EIGEN_MEMBER_FUNCTOR(prod, (Size-1)*NumTraits<Scalar>::MulCost);
+template <int p, typename ResultType>
+struct member_lpnorm {
+ typedef ResultType result_type;
+ template<typename Scalar, int Size> struct Cost
+ { enum { value = (Size+5) * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost }; };
+ EIGEN_DEVICE_FUNC member_lpnorm() {}
+ template<typename XprType>
+ EIGEN_DEVICE_FUNC inline ResultType operator()(const XprType& mat) const
+ { return mat.template lpNorm<p>(); }
+};
+template <typename BinaryOp, typename Scalar>
+struct member_redux {
+ typedef typename result_of<
+ BinaryOp(const Scalar&,const Scalar&)
+ >::type result_type;
+ template<typename _Scalar, int Size> struct Cost
+ { enum { value = (Size-1) * functor_traits<BinaryOp>::Cost }; };
+ EIGEN_DEVICE_FUNC explicit member_redux(const BinaryOp func) : m_functor(func) {}
+ template<typename Derived>
+ EIGEN_DEVICE_FUNC inline result_type operator()(const DenseBase<Derived>& mat) const
+ { return mat.redux(m_functor); }
+ const BinaryOp m_functor;
+};
+}
+template<typename ExpressionType, int Direction> class VectorwiseOp
+{
+ public:
+ typedef typename ExpressionType::Scalar Scalar;
+ typedef typename ExpressionType::RealScalar RealScalar;
+ typedef Eigen::Index Index;
+ typedef typename internal::ref_selector<ExpressionType>::non_const_type ExpressionTypeNested;
+ typedef typename internal::remove_all<ExpressionTypeNested>::type ExpressionTypeNestedCleaned;
+ template<template<typename _Scalar> class Functor,
+ typename Scalar_=Scalar> struct ReturnType
+ {
+ typedef PartialReduxExpr<ExpressionType,
+ Functor<Scalar_>,
+ Direction
+ > Type;
+ };
+ template<typename BinaryOp> struct ReduxReturnType
+ {
+ typedef PartialReduxExpr<ExpressionType,
+ internal::member_redux<BinaryOp,Scalar>,
+ Direction
+ > Type;
+ };
+ enum {
+ isVertical = (Direction==Vertical) ? 1 : 0,
+ isHorizontal = (Direction==Horizontal) ? 1 : 0
+ };
+ protected:
+ typedef typename internal::conditional<isVertical,
+ typename ExpressionType::ColXpr,
+ typename ExpressionType::RowXpr>::type SubVector;
+ EIGEN_DEVICE_FUNC
+ SubVector subVector(Index i)
+ {
+ return SubVector(m_matrix.derived(),i);
+ }
+ EIGEN_DEVICE_FUNC
+ Index subVectors() const
+ { return isVertical?m_matrix.cols():m_matrix.rows(); }
+ template<typename OtherDerived> struct ExtendedType {
+ typedef Replicate<OtherDerived,
+ isVertical ? 1 : ExpressionType::RowsAtCompileTime,
+ isHorizontal ? 1 : ExpressionType::ColsAtCompileTime> Type;
+ };
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ typename ExtendedType<OtherDerived>::Type
+ extendedTo(const DenseBase<OtherDerived>& other) const
+ {
+ EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(isVertical, OtherDerived::MaxColsAtCompileTime==1),
+ YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED)
+ EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(isHorizontal, OtherDerived::MaxRowsAtCompileTime==1),
+ YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED)
+ return typename ExtendedType<OtherDerived>::Type
+ (other.derived(),
+ isVertical ? 1 : m_matrix.rows(),
+ isHorizontal ? 1 : m_matrix.cols());
+ }
+ template<typename OtherDerived> struct OppositeExtendedType {
+ typedef Replicate<OtherDerived,
+ isHorizontal ? 1 : ExpressionType::RowsAtCompileTime,
+ isVertical ? 1 : ExpressionType::ColsAtCompileTime> Type;
+ };
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ typename OppositeExtendedType<OtherDerived>::Type
+ extendedToOpposite(const DenseBase<OtherDerived>& other) const
+ {
+ EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(isHorizontal, OtherDerived::MaxColsAtCompileTime==1),
+ YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED)
+ EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(isVertical, OtherDerived::MaxRowsAtCompileTime==1),
+ YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED)
+ return typename OppositeExtendedType<OtherDerived>::Type
+ (other.derived(),
+ isHorizontal ? 1 : m_matrix.rows(),
+ isVertical ? 1 : m_matrix.cols());
+ }
+ public:
+ EIGEN_DEVICE_FUNC
+ explicit inline VectorwiseOp(ExpressionType& matrix) : m_matrix(matrix) {}
+ EIGEN_DEVICE_FUNC
+ inline const ExpressionType& _expression() const { return m_matrix; }
+ template<typename BinaryOp>
+ EIGEN_DEVICE_FUNC
+ const typename ReduxReturnType<BinaryOp>::Type
+ redux(const BinaryOp& func = BinaryOp()) const
+ { return typename ReduxReturnType<BinaryOp>::Type(_expression(), internal::member_redux<BinaryOp,Scalar>(func)); }
+ typedef typename ReturnType<internal::member_minCoeff>::Type MinCoeffReturnType;
+ typedef typename ReturnType<internal::member_maxCoeff>::Type MaxCoeffReturnType;
+ typedef typename ReturnType<internal::member_squaredNorm,RealScalar>::Type SquaredNormReturnType;
+ typedef typename ReturnType<internal::member_norm,RealScalar>::Type NormReturnType;
+ typedef typename ReturnType<internal::member_blueNorm,RealScalar>::Type BlueNormReturnType;
+ typedef typename ReturnType<internal::member_stableNorm,RealScalar>::Type StableNormReturnType;
+ typedef typename ReturnType<internal::member_hypotNorm,RealScalar>::Type HypotNormReturnType;
+ typedef typename ReturnType<internal::member_sum>::Type SumReturnType;
+ typedef typename ReturnType<internal::member_mean>::Type MeanReturnType;
+ typedef typename ReturnType<internal::member_all>::Type AllReturnType;
+ typedef typename ReturnType<internal::member_any>::Type AnyReturnType;
+ typedef PartialReduxExpr<ExpressionType, internal::member_count<Index>, Direction> CountReturnType;
+ typedef typename ReturnType<internal::member_prod>::Type ProdReturnType;
+ typedef Reverse<const ExpressionType, Direction> ConstReverseReturnType;
+ typedef Reverse<ExpressionType, Direction> ReverseReturnType;
+ template<int p> struct LpNormReturnType {
+ typedef PartialReduxExpr<ExpressionType, internal::member_lpnorm<p,RealScalar>,Direction> Type;
+ };
+ EIGEN_DEVICE_FUNC
+ const MinCoeffReturnType minCoeff() const
+ { return MinCoeffReturnType(_expression()); }
+ EIGEN_DEVICE_FUNC
+ const MaxCoeffReturnType maxCoeff() const
+ { return MaxCoeffReturnType(_expression()); }
+ EIGEN_DEVICE_FUNC
+ const SquaredNormReturnType squaredNorm() const
+ { return SquaredNormReturnType(_expression()); }
+ EIGEN_DEVICE_FUNC
+ const NormReturnType norm() const
+ { return NormReturnType(_expression()); }
+ template<int p>
+ EIGEN_DEVICE_FUNC
+ const typename LpNormReturnType<p>::Type lpNorm() const
+ { return typename LpNormReturnType<p>::Type(_expression()); }
+ EIGEN_DEVICE_FUNC
+ const BlueNormReturnType blueNorm() const
+ { return BlueNormReturnType(_expression()); }
+ EIGEN_DEVICE_FUNC
+ const StableNormReturnType stableNorm() const
+ { return StableNormReturnType(_expression()); }
+ EIGEN_DEVICE_FUNC
+ const HypotNormReturnType hypotNorm() const
+ { return HypotNormReturnType(_expression()); }
+ EIGEN_DEVICE_FUNC
+ const SumReturnType sum() const
+ { return SumReturnType(_expression()); }
+ EIGEN_DEVICE_FUNC
+ const MeanReturnType mean() const
+ { return MeanReturnType(_expression()); }
+ EIGEN_DEVICE_FUNC
+ const AllReturnType all() const
+ { return AllReturnType(_expression()); }
+ EIGEN_DEVICE_FUNC
+ const AnyReturnType any() const
+ { return AnyReturnType(_expression()); }
+ EIGEN_DEVICE_FUNC
+ const CountReturnType count() const
+ { return CountReturnType(_expression()); }
+ EIGEN_DEVICE_FUNC
+ const ProdReturnType prod() const
+ { return ProdReturnType(_expression()); }
+ EIGEN_DEVICE_FUNC
+ const ConstReverseReturnType reverse() const
+ { return ConstReverseReturnType( _expression() ); }
+ EIGEN_DEVICE_FUNC
+ ReverseReturnType reverse()
+ { return ReverseReturnType( _expression() ); }
+ typedef Replicate<ExpressionType,(isVertical?Dynamic:1),(isHorizontal?Dynamic:1)> ReplicateReturnType;
+ EIGEN_DEVICE_FUNC
+ const ReplicateReturnType replicate(Index factor) const;
+ template<int Factor> const Replicate<ExpressionType,isVertical*Factor+isHorizontal,isHorizontal*Factor+isVertical>
+ EIGEN_DEVICE_FUNC
+ replicate(Index factor = Factor) const
+ {
+ return Replicate<ExpressionType,(isVertical?Factor:1),(isHorizontal?Factor:1)>
+ (_expression(),isVertical?factor:1,isHorizontal?factor:1);
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ ExpressionType& operator=(const DenseBase<OtherDerived>& other)
+ {
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+ EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
+ return const_cast<ExpressionType&>(m_matrix = extendedTo(other.derived()));
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ ExpressionType& operator+=(const DenseBase<OtherDerived>& other)
+ {
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+ EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
+ return const_cast<ExpressionType&>(m_matrix += extendedTo(other.derived()));
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ ExpressionType& operator-=(const DenseBase<OtherDerived>& other)
+ {
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+ EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
+ return const_cast<ExpressionType&>(m_matrix -= extendedTo(other.derived()));
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ ExpressionType& operator*=(const DenseBase<OtherDerived>& other)
+ {
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+ EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)
+ EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
+ m_matrix *= extendedTo(other.derived());
+ return const_cast<ExpressionType&>(m_matrix);
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ ExpressionType& operator/=(const DenseBase<OtherDerived>& other)
+ {
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+ EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)
+ EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
+ m_matrix /= extendedTo(other.derived());
+ return const_cast<ExpressionType&>(m_matrix);
+ }
+ template<typename OtherDerived> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
+ CwiseBinaryOp<internal::scalar_sum_op<Scalar,typename OtherDerived::Scalar>,
+ const ExpressionTypeNestedCleaned,
+ const typename ExtendedType<OtherDerived>::Type>
+ operator+(const DenseBase<OtherDerived>& other) const
+ {
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+ EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
+ return m_matrix + extendedTo(other.derived());
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ CwiseBinaryOp<internal::scalar_difference_op<Scalar,typename OtherDerived::Scalar>,
+ const ExpressionTypeNestedCleaned,
+ const typename ExtendedType<OtherDerived>::Type>
+ operator-(const DenseBase<OtherDerived>& other) const
+ {
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+ EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
+ return m_matrix - extendedTo(other.derived());
+ }
+ template<typename OtherDerived> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
+ CwiseBinaryOp<internal::scalar_product_op<Scalar>,
+ const ExpressionTypeNestedCleaned,
+ const typename ExtendedType<OtherDerived>::Type>
+ EIGEN_DEVICE_FUNC
+ operator*(const DenseBase<OtherDerived>& other) const
+ {
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+ EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)
+ EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
+ return m_matrix * extendedTo(other.derived());
+ }
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ CwiseBinaryOp<internal::scalar_quotient_op<Scalar>,
+ const ExpressionTypeNestedCleaned,
+ const typename ExtendedType<OtherDerived>::Type>
+ operator/(const DenseBase<OtherDerived>& other) const
+ {
+ EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+ EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)
+ EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
+ return m_matrix / extendedTo(other.derived());
+ }
+ EIGEN_DEVICE_FUNC
+ CwiseBinaryOp<internal::scalar_quotient_op<Scalar>,
+ const ExpressionTypeNestedCleaned,
+ const typename OppositeExtendedType<typename ReturnType<internal::member_norm,RealScalar>::Type>::Type>
+ normalized() const { return m_matrix.cwiseQuotient(extendedToOpposite(this->norm())); }
+ EIGEN_DEVICE_FUNC void normalize() {
+ m_matrix = this->normalized();
+ }
+ EIGEN_DEVICE_FUNC inline void reverseInPlace();
+ typedef Homogeneous<ExpressionType,Direction> HomogeneousReturnType;
+ EIGEN_DEVICE_FUNC
+ HomogeneousReturnType homogeneous() const;
+ typedef typename ExpressionType::PlainObject CrossReturnType;
+ template<typename OtherDerived>
+ EIGEN_DEVICE_FUNC
+ const CrossReturnType cross(const MatrixBase<OtherDerived>& other) const;
+ enum {
+ HNormalized_Size = Direction==Vertical ? internal::traits<ExpressionType>::RowsAtCompileTime
+ : internal::traits<ExpressionType>::ColsAtCompileTime,
+ HNormalized_SizeMinusOne = HNormalized_Size==Dynamic ? Dynamic : HNormalized_Size-1
+ };
+ typedef Block<const ExpressionType,
+ Direction==Vertical ? int(HNormalized_SizeMinusOne)
+ : int(internal::traits<ExpressionType>::RowsAtCompileTime),
+ Direction==Horizontal ? int(HNormalized_SizeMinusOne)
+ : int(internal::traits<ExpressionType>::ColsAtCompileTime)>
+ HNormalized_Block;
+ typedef Block<const ExpressionType,
+ Direction==Vertical ? 1 : int(internal::traits<ExpressionType>::RowsAtCompileTime),
+ Direction==Horizontal ? 1 : int(internal::traits<ExpressionType>::ColsAtCompileTime)>
+ HNormalized_Factors;
+ typedef CwiseBinaryOp<internal::scalar_quotient_op<typename internal::traits<ExpressionType>::Scalar>,
+ const HNormalized_Block,
+ const Replicate<HNormalized_Factors,
+ Direction==Vertical ? HNormalized_SizeMinusOne : 1,
+ Direction==Horizontal ? HNormalized_SizeMinusOne : 1> >
+ HNormalizedReturnType;
+ EIGEN_DEVICE_FUNC
+ const HNormalizedReturnType hnormalized() const;
+ protected:
+ ExpressionTypeNested m_matrix;
+};
+template<typename Derived>
+inline typename DenseBase<Derived>::ColwiseReturnType
+DenseBase<Derived>::colwise()
+{
+ return ColwiseReturnType(derived());
+}
+template<typename Derived>
+inline typename DenseBase<Derived>::RowwiseReturnType
+DenseBase<Derived>::rowwise()
+{
+ return RowwiseReturnType(derived());
+}
+}
+#endif
+// end #include "src/Core/VectorwiseOp.h"
+// #include "src/Core/Replicate.h"
+#ifndef EIGEN_REPLICATE_H
+#define EIGEN_REPLICATE_H
+namespace Eigen {
+namespace internal {
+template<typename MatrixType,int RowFactor,int ColFactor>
+struct traits<Replicate<MatrixType,RowFactor,ColFactor> >
+ : traits<MatrixType>
+{
+ typedef typename MatrixType::Scalar Scalar;
+ typedef typename traits<MatrixType>::StorageKind StorageKind;
+ typedef typename traits<MatrixType>::XprKind XprKind;
+ typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
+ typedef typename remove_reference<MatrixTypeNested>::type _MatrixTypeNested;
+ enum {
+ RowsAtCompileTime = RowFactor==Dynamic || int(MatrixType::RowsAtCompileTime)==Dynamic
+ ? Dynamic
+ : RowFactor * MatrixType::RowsAtCompileTime,
+ ColsAtCompileTime = ColFactor==Dynamic || int(MatrixType::ColsAtCompileTime)==Dynamic
+ ? Dynamic
+ : ColFactor * MatrixType::ColsAtCompileTime,
+ MaxRowsAtCompileTime = RowsAtCompileTime,
+ MaxColsAtCompileTime = ColsAtCompileTime,
+ IsRowMajor = MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1 ? 1
+ : MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1 ? 0
+ : (MatrixType::Flags & RowMajorBit) ? 1 : 0,
+ Flags = IsRowMajor ? RowMajorBit : 0
+ };
+};
+}
+template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
+ : public internal::dense_xpr_base< Replicate<MatrixType,RowFactor,ColFactor> >::type
+{
+ typedef typename internal::traits<Replicate>::MatrixTypeNested MatrixTypeNested;
+ typedef typename internal::traits<Replicate>::_MatrixTypeNested _MatrixTypeNested;
+ public:
+ typedef typename internal::dense_xpr_base<Replicate>::type Base;
+ EIGEN_DENSE_PUBLIC_INTERFACE(Replicate)
+ typedef typename internal::remove_all<MatrixType>::type NestedExpression;
+ template<typename OriginalMatrixType>
+ EIGEN_DEVICE_FUNC
+ inline explicit Replicate(const OriginalMatrixType& matrix)
+ : m_matrix(matrix), m_rowFactor(RowFactor), m_colFactor(ColFactor)
+ {
+ EIGEN_STATIC_ASSERT((internal::is_same<typename internal::remove_const<MatrixType>::type,OriginalMatrixType>::value),
+ THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE)
+ eigen_assert(RowFactor!=Dynamic && ColFactor!=Dynamic);
+ }
+ template<typename OriginalMatrixType>
+ EIGEN_DEVICE_FUNC
+ inline Replicate(const OriginalMatrixType& matrix, Index rowFactor, Index colFactor)
+ : m_matrix(matrix), m_rowFactor(rowFactor), m_colFactor(colFactor)
+ {
+ EIGEN_STATIC_ASSERT((internal::is_same<typename internal::remove_const<MatrixType>::type,OriginalMatrixType>::value),
+ THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE)
+ }
+ EIGEN_DEVICE_FUNC
+ inline Index rows() const { return m_matrix.rows() * m_rowFactor.value(); }
+ EIGEN_DEVICE_FUNC
+ inline Index cols() const { return m_matrix.cols() * m_colFactor.value(); }
+ EIGEN_DEVICE_FUNC
+ const _MatrixTypeNested& nestedExpression() const
+ {
+ return m_matrix;
+ }
+ protected:
+ MatrixTypeNested m_matrix;
+ const internal::variable_if_dynamic<Index, RowFactor> m_rowFactor;
+ const internal::variable_if_dynamic<Index, ColFactor> m_colFactor;
+};
+template<typename Derived>
+template<int RowFactor, int ColFactor>
+const Replicate<Derived,RowFactor,ColFactor>
+DenseBase<Derived>::replicate() const
+{
+ return Replicate<Derived,RowFactor,ColFactor>(derived());
+}
+template<typename ExpressionType, int Direction>
+const typename VectorwiseOp<ExpressionType,Direction>::ReplicateReturnType
+VectorwiseOp<ExpressionType,Direction>::replicate(Index factor) const
+{
+ return typename VectorwiseOp<ExpressionType,Direction>::ReplicateReturnType
+ (_expression(),Direction==Vertical?factor:1,Direction==Horizontal?factor:1);
+}
+}
+#endif
+// end #include "src/Core/Replicate.h"
+// #include "src/Core/ArrayWrapper.h"
+#ifndef EIGEN_ARRAYWRAPPER_H
+#define EIGEN_ARRAYWRAPPER_H
+namespace Eigen {
+namespace internal {
+template<typename ExpressionType>
+struct traits<ArrayWrapper<ExpressionType> >
+ : public traits<typename remove_all<typename ExpressionType::Nested>::type >
+{
+ typedef ArrayXpr XprKind;
+ enum {
+ Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags,
+ LvalueBitFlag = is_lvalue<ExpressionType>::value ? LvalueBit : 0,
+ Flags = (Flags0 & ~(NestByRefBit | LvalueBit)) | LvalueBitFlag
+ };
+};
+}
+template<typename ExpressionType>
+class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
+{
+ public:
+ typedef ArrayBase<ArrayWrapper> Base;
+ EIGEN_DENSE_PUBLIC_INTERFACE(ArrayWrapper)
+ EIGEN_INHERIT_ASSIGNMENT_OPERATORS(ArrayWrapper)
+ typedef typename internal::remove_all<ExpressionType>::type NestedExpression;
+ typedef typename internal::conditional<
+ internal::is_lvalue<ExpressionType>::value,
+ Scalar,
+ const Scalar
+ >::type ScalarWithConstIfNotLvalue;
+ typedef typename internal::ref_selector<ExpressionType>::non_const_type NestedExpressionType;
+ using Base::coeffRef;
+ EIGEN_DEVICE_FUNC
+ explicit EIGEN_STRONG_INLINE ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {}
+ EIGEN_DEVICE_FUNC
+ inline Index rows() const { return m_expression.rows(); }
+ EIGEN_DEVICE_FUNC
+ inline Index cols() const { return m_expression.cols(); }
+ EIGEN_DEVICE_FUNC
+ inline Index outerStride() const { return m_expression.outerStride(); }
+ EIGEN_DEVICE_FUNC
+ inline Index innerStride() const { return m_expression.innerStride(); }
+ EIGEN_DEVICE_FUNC
+ inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
+ EIGEN_DEVICE_FUNC
+ inline const Scalar* data() const { return m_expression.data(); }
+ EIGEN_DEVICE_FUNC
+ inline const Scalar& coeffRef(Index rowId, Index colId) const
+ {
+ return m_expression.coeffRef(rowId, colId);
+ }
+ EIGEN_DEVICE_FUNC
+ inline const Scalar& coeffRef(Index index) const
+ {
+ return m_expression.coeffRef(index);
+ }
+ template<typename Dest>
+ EIGEN_DEVICE_FUNC
+ inline void evalTo(Dest& dst) const { dst = m_expression; }
+ const typename internal::remove_all<NestedExpressionType>::type&
+ EIGEN_DEVICE_FUNC
+ nestedExpression() const
+ {
+ return m_expression;
+ }
+ EIGEN_DEVICE_FUNC
+ void resize(Index newSize) { m_expression.resize(newSize); }
+ EIGEN_DEVICE_FUNC
+ void resize(Index rows, Index cols) { m_expression.resize(rows,cols); }
+ protected:
+ NestedExpressionType m_expression;
+};
+}
+#endif
+// end #include "src/Core/ArrayWrapper.h"
+// #include "src/Core/SelfCwiseBinaryOp.h"
+namespace Eigen {
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator/=(const Scalar& other)
+{
+ internal::call_assignment(this->derived(), PlainObject::Constant(rows(), cols(),other), internal::div_assign_op<Scalar, Scalar>());
+ return derived();
+}
+}
+// end #include "src/Core/SelfCwiseBinaryOp.h"