summaryrefslogtreecommitdiff
path: root/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU
diff options
context:
space:
mode:
Diffstat (limited to 'runtimes/nn/depend/external/eigen/Eigen/src/SparseLU')
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU.h775
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLUImpl.h66
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_Memory.h226
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_Structs.h110
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h301
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_Utils.h80
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h181
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h179
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h107
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h280
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h126
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h130
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h223
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h258
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h137
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h136
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h83
17 files changed, 3398 insertions, 0 deletions
diff --git a/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU.h b/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU.h
new file mode 100644
index 000000000..f883ab383
--- /dev/null
+++ b/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU.h
@@ -0,0 +1,775 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
+// Copyright (C) 2012-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+#ifndef EIGEN_SPARSE_LU_H
+#define EIGEN_SPARSE_LU_H
+
+namespace Eigen {
+
+template <typename _MatrixType, typename _OrderingType = COLAMDOrdering<typename _MatrixType::StorageIndex> > class SparseLU;
+template <typename MappedSparseMatrixType> struct SparseLUMatrixLReturnType;
+template <typename MatrixLType, typename MatrixUType> struct SparseLUMatrixUReturnType;
+
+/** \ingroup SparseLU_Module
+ * \class SparseLU
+ *
+ * \brief Sparse supernodal LU factorization for general matrices
+ *
+ * This class implements the supernodal LU factorization for general matrices.
+ * It uses the main techniques from the sequential SuperLU package
+ * (http://crd-legacy.lbl.gov/~xiaoye/SuperLU/). It handles transparently real
+ * and complex arithmetics with single and double precision, depending on the
+ * scalar type of your input matrix.
+ * The code has been optimized to provide BLAS-3 operations during supernode-panel updates.
+ * It benefits directly from the built-in high-performant Eigen BLAS routines.
+ * Moreover, when the size of a supernode is very small, the BLAS calls are avoided to
+ * enable a better optimization from the compiler. For best performance,
+ * you should compile it with NDEBUG flag to avoid the numerous bounds checking on vectors.
+ *
+ * An important parameter of this class is the ordering method. It is used to reorder the columns
+ * (and eventually the rows) of the matrix to reduce the number of new elements that are created during
+ * numerical factorization. The cheapest method available is COLAMD.
+ * See \link OrderingMethods_Module the OrderingMethods module \endlink for the list of
+ * built-in and external ordering methods.
+ *
+ * Simple example with key steps
+ * \code
+ * VectorXd x(n), b(n);
+ * SparseMatrix<double, ColMajor> A;
+ * SparseLU<SparseMatrix<scalar, ColMajor>, COLAMDOrdering<Index> > solver;
+ * // fill A and b;
+ * // Compute the ordering permutation vector from the structural pattern of A
+ * solver.analyzePattern(A);
+ * // Compute the numerical factorization
+ * solver.factorize(A);
+ * //Use the factors to solve the linear system
+ * x = solver.solve(b);
+ * \endcode
+ *
+ * \warning The input matrix A should be in a \b compressed and \b column-major form.
+ * Otherwise an expensive copy will be made. You can call the inexpensive makeCompressed() to get a compressed matrix.
+ *
+ * \note Unlike the initial SuperLU implementation, there is no step to equilibrate the matrix.
+ * For badly scaled matrices, this step can be useful to reduce the pivoting during factorization.
+ * If this is the case for your matrices, you can try the basic scaling method at
+ * "unsupported/Eigen/src/IterativeSolvers/Scaling.h"
+ *
+ * \tparam _MatrixType The type of the sparse matrix. It must be a column-major SparseMatrix<>
+ * \tparam _OrderingType The ordering method to use, either AMD, COLAMD or METIS. Default is COLMAD
+ *
+ * \implsparsesolverconcept
+ *
+ * \sa \ref TutorialSparseSolverConcept
+ * \sa \ref OrderingMethods_Module
+ */
+template <typename _MatrixType, typename _OrderingType>
+class SparseLU : public SparseSolverBase<SparseLU<_MatrixType,_OrderingType> >, public internal::SparseLUImpl<typename _MatrixType::Scalar, typename _MatrixType::StorageIndex>
+{
+ protected:
+ typedef SparseSolverBase<SparseLU<_MatrixType,_OrderingType> > APIBase;
+ using APIBase::m_isInitialized;
+ public:
+ using APIBase::_solve_impl;
+
+ typedef _MatrixType MatrixType;
+ typedef _OrderingType OrderingType;
+ typedef typename MatrixType::Scalar Scalar;
+ typedef typename MatrixType::RealScalar RealScalar;
+ typedef typename MatrixType::StorageIndex StorageIndex;
+ typedef SparseMatrix<Scalar,ColMajor,StorageIndex> NCMatrix;
+ typedef internal::MappedSuperNodalMatrix<Scalar, StorageIndex> SCMatrix;
+ typedef Matrix<Scalar,Dynamic,1> ScalarVector;
+ typedef Matrix<StorageIndex,Dynamic,1> IndexVector;
+ typedef PermutationMatrix<Dynamic, Dynamic, StorageIndex> PermutationType;
+ typedef internal::SparseLUImpl<Scalar, StorageIndex> Base;
+
+ enum {
+ ColsAtCompileTime = MatrixType::ColsAtCompileTime,
+ MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
+ };
+
+ public:
+ SparseLU():m_lastError(""),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0),m_detPermR(1)
+ {
+ initperfvalues();
+ }
+ explicit SparseLU(const MatrixType& matrix)
+ : m_lastError(""),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0),m_detPermR(1)
+ {
+ initperfvalues();
+ compute(matrix);
+ }
+
+ ~SparseLU()
+ {
+ // Free all explicit dynamic pointers
+ }
+
+ void analyzePattern (const MatrixType& matrix);
+ void factorize (const MatrixType& matrix);
+ void simplicialfactorize(const MatrixType& matrix);
+
+ /**
+ * Compute the symbolic and numeric factorization of the input sparse matrix.
+ * The input matrix should be in column-major storage.
+ */
+ void compute (const MatrixType& matrix)
+ {
+ // Analyze
+ analyzePattern(matrix);
+ //Factorize
+ factorize(matrix);
+ }
+
+ inline Index rows() const { return m_mat.rows(); }
+ inline Index cols() const { return m_mat.cols(); }
+ /** Indicate that the pattern of the input matrix is symmetric */
+ void isSymmetric(bool sym)
+ {
+ m_symmetricmode = sym;
+ }
+
+ /** \returns an expression of the matrix L, internally stored as supernodes
+ * The only operation available with this expression is the triangular solve
+ * \code
+ * y = b; matrixL().solveInPlace(y);
+ * \endcode
+ */
+ SparseLUMatrixLReturnType<SCMatrix> matrixL() const
+ {
+ return SparseLUMatrixLReturnType<SCMatrix>(m_Lstore);
+ }
+ /** \returns an expression of the matrix U,
+ * The only operation available with this expression is the triangular solve
+ * \code
+ * y = b; matrixU().solveInPlace(y);
+ * \endcode
+ */
+ SparseLUMatrixUReturnType<SCMatrix,MappedSparseMatrix<Scalar,ColMajor,StorageIndex> > matrixU() const
+ {
+ return SparseLUMatrixUReturnType<SCMatrix, MappedSparseMatrix<Scalar,ColMajor,StorageIndex> >(m_Lstore, m_Ustore);
+ }
+
+ /**
+ * \returns a reference to the row matrix permutation \f$ P_r \f$ such that \f$P_r A P_c^T = L U\f$
+ * \sa colsPermutation()
+ */
+ inline const PermutationType& rowsPermutation() const
+ {
+ return m_perm_r;
+ }
+ /**
+ * \returns a reference to the column matrix permutation\f$ P_c^T \f$ such that \f$P_r A P_c^T = L U\f$
+ * \sa rowsPermutation()
+ */
+ inline const PermutationType& colsPermutation() const
+ {
+ return m_perm_c;
+ }
+ /** Set the threshold used for a diagonal entry to be an acceptable pivot. */
+ void setPivotThreshold(const RealScalar& thresh)
+ {
+ m_diagpivotthresh = thresh;
+ }
+
+#ifdef EIGEN_PARSED_BY_DOXYGEN
+ /** \returns the solution X of \f$ A X = B \f$ using the current decomposition of A.
+ *
+ * \warning the destination matrix X in X = this->solve(B) must be colmun-major.
+ *
+ * \sa compute()
+ */
+ template<typename Rhs>
+ inline const Solve<SparseLU, Rhs> solve(const MatrixBase<Rhs>& B) const;
+#endif // EIGEN_PARSED_BY_DOXYGEN
+
+ /** \brief Reports whether previous computation was successful.
+ *
+ * \returns \c Success if computation was succesful,
+ * \c NumericalIssue if the LU factorization reports a problem, zero diagonal for instance
+ * \c InvalidInput if the input matrix is invalid
+ *
+ * \sa iparm()
+ */
+ ComputationInfo info() const
+ {
+ eigen_assert(m_isInitialized && "Decomposition is not initialized.");
+ return m_info;
+ }
+
+ /**
+ * \returns A string describing the type of error
+ */
+ std::string lastErrorMessage() const
+ {
+ return m_lastError;
+ }
+
+ template<typename Rhs, typename Dest>
+ bool _solve_impl(const MatrixBase<Rhs> &B, MatrixBase<Dest> &X_base) const
+ {
+ Dest& X(X_base.derived());
+ eigen_assert(m_factorizationIsOk && "The matrix should be factorized first");
+ EIGEN_STATIC_ASSERT((Dest::Flags&RowMajorBit)==0,
+ THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES);
+
+ // Permute the right hand side to form X = Pr*B
+ // on return, X is overwritten by the computed solution
+ X.resize(B.rows(),B.cols());
+
+ // this ugly const_cast_derived() helps to detect aliasing when applying the permutations
+ for(Index j = 0; j < B.cols(); ++j)
+ X.col(j) = rowsPermutation() * B.const_cast_derived().col(j);
+
+ //Forward substitution with L
+ this->matrixL().solveInPlace(X);
+ this->matrixU().solveInPlace(X);
+
+ // Permute back the solution
+ for (Index j = 0; j < B.cols(); ++j)
+ X.col(j) = colsPermutation().inverse() * X.col(j);
+
+ return true;
+ }
+
+ /**
+ * \returns the absolute value of the determinant of the matrix of which
+ * *this is the QR decomposition.
+ *
+ * \warning a determinant can be very big or small, so for matrices
+ * of large enough dimension, there is a risk of overflow/underflow.
+ * One way to work around that is to use logAbsDeterminant() instead.
+ *
+ * \sa logAbsDeterminant(), signDeterminant()
+ */
+ Scalar absDeterminant()
+ {
+ using std::abs;
+ eigen_assert(m_factorizationIsOk && "The matrix should be factorized first.");
+ // Initialize with the determinant of the row matrix
+ Scalar det = Scalar(1.);
+ // Note that the diagonal blocks of U are stored in supernodes,
+ // which are available in the L part :)
+ for (Index j = 0; j < this->cols(); ++j)
+ {
+ for (typename SCMatrix::InnerIterator it(m_Lstore, j); it; ++it)
+ {
+ if(it.index() == j)
+ {
+ det *= abs(it.value());
+ break;
+ }
+ }
+ }
+ return det;
+ }
+
+ /** \returns the natural log of the absolute value of the determinant of the matrix
+ * of which **this is the QR decomposition
+ *
+ * \note This method is useful to work around the risk of overflow/underflow that's
+ * inherent to the determinant computation.
+ *
+ * \sa absDeterminant(), signDeterminant()
+ */
+ Scalar logAbsDeterminant() const
+ {
+ using std::log;
+ using std::abs;
+
+ eigen_assert(m_factorizationIsOk && "The matrix should be factorized first.");
+ Scalar det = Scalar(0.);
+ for (Index j = 0; j < this->cols(); ++j)
+ {
+ for (typename SCMatrix::InnerIterator it(m_Lstore, j); it; ++it)
+ {
+ if(it.row() < j) continue;
+ if(it.row() == j)
+ {
+ det += log(abs(it.value()));
+ break;
+ }
+ }
+ }
+ return det;
+ }
+
+ /** \returns A number representing the sign of the determinant
+ *
+ * \sa absDeterminant(), logAbsDeterminant()
+ */
+ Scalar signDeterminant()
+ {
+ eigen_assert(m_factorizationIsOk && "The matrix should be factorized first.");
+ // Initialize with the determinant of the row matrix
+ Index det = 1;
+ // Note that the diagonal blocks of U are stored in supernodes,
+ // which are available in the L part :)
+ for (Index j = 0; j < this->cols(); ++j)
+ {
+ for (typename SCMatrix::InnerIterator it(m_Lstore, j); it; ++it)
+ {
+ if(it.index() == j)
+ {
+ if(it.value()<0)
+ det = -det;
+ else if(it.value()==0)
+ return 0;
+ break;
+ }
+ }
+ }
+ return det * m_detPermR * m_detPermC;
+ }
+
+ /** \returns The determinant of the matrix.
+ *
+ * \sa absDeterminant(), logAbsDeterminant()
+ */
+ Scalar determinant()
+ {
+ eigen_assert(m_factorizationIsOk && "The matrix should be factorized first.");
+ // Initialize with the determinant of the row matrix
+ Scalar det = Scalar(1.);
+ // Note that the diagonal blocks of U are stored in supernodes,
+ // which are available in the L part :)
+ for (Index j = 0; j < this->cols(); ++j)
+ {
+ for (typename SCMatrix::InnerIterator it(m_Lstore, j); it; ++it)
+ {
+ if(it.index() == j)
+ {
+ det *= it.value();
+ break;
+ }
+ }
+ }
+ return (m_detPermR * m_detPermC) > 0 ? det : -det;
+ }
+
+ protected:
+ // Functions
+ void initperfvalues()
+ {
+ m_perfv.panel_size = 16;
+ m_perfv.relax = 1;
+ m_perfv.maxsuper = 128;
+ m_perfv.rowblk = 16;
+ m_perfv.colblk = 8;
+ m_perfv.fillfactor = 20;
+ }
+
+ // Variables
+ mutable ComputationInfo m_info;
+ bool m_factorizationIsOk;
+ bool m_analysisIsOk;
+ std::string m_lastError;
+ NCMatrix m_mat; // The input (permuted ) matrix
+ SCMatrix m_Lstore; // The lower triangular matrix (supernodal)
+ MappedSparseMatrix<Scalar,ColMajor,StorageIndex> m_Ustore; // The upper triangular matrix
+ PermutationType m_perm_c; // Column permutation
+ PermutationType m_perm_r ; // Row permutation
+ IndexVector m_etree; // Column elimination tree
+
+ typename Base::GlobalLU_t m_glu;
+
+ // SparseLU options
+ bool m_symmetricmode;
+ // values for performance
+ internal::perfvalues m_perfv;
+ RealScalar m_diagpivotthresh; // Specifies the threshold used for a diagonal entry to be an acceptable pivot
+ Index m_nnzL, m_nnzU; // Nonzeros in L and U factors
+ Index m_detPermR, m_detPermC; // Determinants of the permutation matrices
+ private:
+ // Disable copy constructor
+ SparseLU (const SparseLU& );
+
+}; // End class SparseLU
+
+
+
+// Functions needed by the anaysis phase
+/**
+ * Compute the column permutation to minimize the fill-in
+ *
+ * - Apply this permutation to the input matrix -
+ *
+ * - Compute the column elimination tree on the permuted matrix
+ *
+ * - Postorder the elimination tree and the column permutation
+ *
+ */
+template <typename MatrixType, typename OrderingType>
+void SparseLU<MatrixType, OrderingType>::analyzePattern(const MatrixType& mat)
+{
+
+ //TODO It is possible as in SuperLU to compute row and columns scaling vectors to equilibrate the matrix mat.
+
+ // Firstly, copy the whole input matrix.
+ m_mat = mat;
+
+ // Compute fill-in ordering
+ OrderingType ord;
+ ord(m_mat,m_perm_c);
+
+ // Apply the permutation to the column of the input matrix
+ if (m_perm_c.size())
+ {
+ m_mat.uncompress(); //NOTE: The effect of this command is only to create the InnerNonzeros pointers. FIXME : This vector is filled but not subsequently used.
+ // Then, permute only the column pointers
+ ei_declare_aligned_stack_constructed_variable(StorageIndex,outerIndexPtr,mat.cols()+1,mat.isCompressed()?const_cast<StorageIndex*>(mat.outerIndexPtr()):0);
+
+ // If the input matrix 'mat' is uncompressed, then the outer-indices do not match the ones of m_mat, and a copy is thus needed.
+ if(!mat.isCompressed())
+ IndexVector::Map(outerIndexPtr, mat.cols()+1) = IndexVector::Map(m_mat.outerIndexPtr(),mat.cols()+1);
+
+ // Apply the permutation and compute the nnz per column.
+ for (Index i = 0; i < mat.cols(); i++)
+ {
+ m_mat.outerIndexPtr()[m_perm_c.indices()(i)] = outerIndexPtr[i];
+ m_mat.innerNonZeroPtr()[m_perm_c.indices()(i)] = outerIndexPtr[i+1] - outerIndexPtr[i];
+ }
+ }
+
+ // Compute the column elimination tree of the permuted matrix
+ IndexVector firstRowElt;
+ internal::coletree(m_mat, m_etree,firstRowElt);
+
+ // In symmetric mode, do not do postorder here
+ if (!m_symmetricmode) {
+ IndexVector post, iwork;
+ // Post order etree
+ internal::treePostorder(StorageIndex(m_mat.cols()), m_etree, post);
+
+
+ // Renumber etree in postorder
+ Index m = m_mat.cols();
+ iwork.resize(m+1);
+ for (Index i = 0; i < m; ++i) iwork(post(i)) = post(m_etree(i));
+ m_etree = iwork;
+
+ // Postmultiply A*Pc by post, i.e reorder the matrix according to the postorder of the etree
+ PermutationType post_perm(m);
+ for (Index i = 0; i < m; i++)
+ post_perm.indices()(i) = post(i);
+
+ // Combine the two permutations : postorder the permutation for future use
+ if(m_perm_c.size()) {
+ m_perm_c = post_perm * m_perm_c;
+ }
+
+ } // end postordering
+
+ m_analysisIsOk = true;
+}
+
+// Functions needed by the numerical factorization phase
+
+
+/**
+ * - Numerical factorization
+ * - Interleaved with the symbolic factorization
+ * On exit, info is
+ *
+ * = 0: successful factorization
+ *
+ * > 0: if info = i, and i is
+ *
+ * <= A->ncol: U(i,i) is exactly zero. The factorization has
+ * been completed, but the factor U is exactly singular,
+ * and division by zero will occur if it is used to solve a
+ * system of equations.
+ *
+ * > A->ncol: number of bytes allocated when memory allocation
+ * failure occurred, plus A->ncol. If lwork = -1, it is
+ * the estimated amount of space needed, plus A->ncol.
+ */
+template <typename MatrixType, typename OrderingType>
+void SparseLU<MatrixType, OrderingType>::factorize(const MatrixType& matrix)
+{
+ using internal::emptyIdxLU;
+ eigen_assert(m_analysisIsOk && "analyzePattern() should be called first");
+ eigen_assert((matrix.rows() == matrix.cols()) && "Only for squared matrices");
+
+ typedef typename IndexVector::Scalar StorageIndex;
+
+ m_isInitialized = true;
+
+
+ // Apply the column permutation computed in analyzepattern()
+ // m_mat = matrix * m_perm_c.inverse();
+ m_mat = matrix;
+ if (m_perm_c.size())
+ {
+ m_mat.uncompress(); //NOTE: The effect of this command is only to create the InnerNonzeros pointers.
+ //Then, permute only the column pointers
+ const StorageIndex * outerIndexPtr;
+ if (matrix.isCompressed()) outerIndexPtr = matrix.outerIndexPtr();
+ else
+ {
+ StorageIndex* outerIndexPtr_t = new StorageIndex[matrix.cols()+1];
+ for(Index i = 0; i <= matrix.cols(); i++) outerIndexPtr_t[i] = m_mat.outerIndexPtr()[i];
+ outerIndexPtr = outerIndexPtr_t;
+ }
+ for (Index i = 0; i < matrix.cols(); i++)
+ {
+ m_mat.outerIndexPtr()[m_perm_c.indices()(i)] = outerIndexPtr[i];
+ m_mat.innerNonZeroPtr()[m_perm_c.indices()(i)] = outerIndexPtr[i+1] - outerIndexPtr[i];
+ }
+ if(!matrix.isCompressed()) delete[] outerIndexPtr;
+ }
+ else
+ { //FIXME This should not be needed if the empty permutation is handled transparently
+ m_perm_c.resize(matrix.cols());
+ for(StorageIndex i = 0; i < matrix.cols(); ++i) m_perm_c.indices()(i) = i;
+ }
+
+ Index m = m_mat.rows();
+ Index n = m_mat.cols();
+ Index nnz = m_mat.nonZeros();
+ Index maxpanel = m_perfv.panel_size * m;
+ // Allocate working storage common to the factor routines
+ Index lwork = 0;
+ Index info = Base::memInit(m, n, nnz, lwork, m_perfv.fillfactor, m_perfv.panel_size, m_glu);
+ if (info)
+ {
+ m_lastError = "UNABLE TO ALLOCATE WORKING MEMORY\n\n" ;
+ m_factorizationIsOk = false;
+ return ;
+ }
+
+ // Set up pointers for integer working arrays
+ IndexVector segrep(m); segrep.setZero();
+ IndexVector parent(m); parent.setZero();
+ IndexVector xplore(m); xplore.setZero();
+ IndexVector repfnz(maxpanel);
+ IndexVector panel_lsub(maxpanel);
+ IndexVector xprune(n); xprune.setZero();
+ IndexVector marker(m*internal::LUNoMarker); marker.setZero();
+
+ repfnz.setConstant(-1);
+ panel_lsub.setConstant(-1);
+
+ // Set up pointers for scalar working arrays
+ ScalarVector dense;
+ dense.setZero(maxpanel);
+ ScalarVector tempv;
+ tempv.setZero(internal::LUnumTempV(m, m_perfv.panel_size, m_perfv.maxsuper, /*m_perfv.rowblk*/m) );
+
+ // Compute the inverse of perm_c
+ PermutationType iperm_c(m_perm_c.inverse());
+
+ // Identify initial relaxed snodes
+ IndexVector relax_end(n);
+ if ( m_symmetricmode == true )
+ Base::heap_relax_snode(n, m_etree, m_perfv.relax, marker, relax_end);
+ else
+ Base::relax_snode(n, m_etree, m_perfv.relax, marker, relax_end);
+
+
+ m_perm_r.resize(m);
+ m_perm_r.indices().setConstant(-1);
+ marker.setConstant(-1);
+ m_detPermR = 1; // Record the determinant of the row permutation
+
+ m_glu.supno(0) = emptyIdxLU; m_glu.xsup.setConstant(0);
+ m_glu.xsup(0) = m_glu.xlsub(0) = m_glu.xusub(0) = m_glu.xlusup(0) = Index(0);
+
+ // Work on one 'panel' at a time. A panel is one of the following :
+ // (a) a relaxed supernode at the bottom of the etree, or
+ // (b) panel_size contiguous columns, <panel_size> defined by the user
+ Index jcol;
+ IndexVector panel_histo(n);
+ Index pivrow; // Pivotal row number in the original row matrix
+ Index nseg1; // Number of segments in U-column above panel row jcol
+ Index nseg; // Number of segments in each U-column
+ Index irep;
+ Index i, k, jj;
+ for (jcol = 0; jcol < n; )
+ {
+ // Adjust panel size so that a panel won't overlap with the next relaxed snode.
+ Index panel_size = m_perfv.panel_size; // upper bound on panel width
+ for (k = jcol + 1; k < (std::min)(jcol+panel_size, n); k++)
+ {
+ if (relax_end(k) != emptyIdxLU)
+ {
+ panel_size = k - jcol;
+ break;
+ }
+ }
+ if (k == n)
+ panel_size = n - jcol;
+
+ // Symbolic outer factorization on a panel of columns
+ Base::panel_dfs(m, panel_size, jcol, m_mat, m_perm_r.indices(), nseg1, dense, panel_lsub, segrep, repfnz, xprune, marker, parent, xplore, m_glu);
+
+ // Numeric sup-panel updates in topological order
+ Base::panel_bmod(m, panel_size, jcol, nseg1, dense, tempv, segrep, repfnz, m_glu);
+
+ // Sparse LU within the panel, and below the panel diagonal
+ for ( jj = jcol; jj< jcol + panel_size; jj++)
+ {
+ k = (jj - jcol) * m; // Column index for w-wide arrays
+
+ nseg = nseg1; // begin after all the panel segments
+ //Depth-first-search for the current column
+ VectorBlock<IndexVector> panel_lsubk(panel_lsub, k, m);
+ VectorBlock<IndexVector> repfnz_k(repfnz, k, m);
+ info = Base::column_dfs(m, jj, m_perm_r.indices(), m_perfv.maxsuper, nseg, panel_lsubk, segrep, repfnz_k, xprune, marker, parent, xplore, m_glu);
+ if ( info )
+ {
+ m_lastError = "UNABLE TO EXPAND MEMORY IN COLUMN_DFS() ";
+ m_info = NumericalIssue;
+ m_factorizationIsOk = false;
+ return;
+ }
+ // Numeric updates to this column
+ VectorBlock<ScalarVector> dense_k(dense, k, m);
+ VectorBlock<IndexVector> segrep_k(segrep, nseg1, m-nseg1);
+ info = Base::column_bmod(jj, (nseg - nseg1), dense_k, tempv, segrep_k, repfnz_k, jcol, m_glu);
+ if ( info )
+ {
+ m_lastError = "UNABLE TO EXPAND MEMORY IN COLUMN_BMOD() ";
+ m_info = NumericalIssue;
+ m_factorizationIsOk = false;
+ return;
+ }
+
+ // Copy the U-segments to ucol(*)
+ info = Base::copy_to_ucol(jj, nseg, segrep, repfnz_k ,m_perm_r.indices(), dense_k, m_glu);
+ if ( info )
+ {
+ m_lastError = "UNABLE TO EXPAND MEMORY IN COPY_TO_UCOL() ";
+ m_info = NumericalIssue;
+ m_factorizationIsOk = false;
+ return;
+ }
+
+ // Form the L-segment
+ info = Base::pivotL(jj, m_diagpivotthresh, m_perm_r.indices(), iperm_c.indices(), pivrow, m_glu);
+ if ( info )
+ {
+ m_lastError = "THE MATRIX IS STRUCTURALLY SINGULAR ... ZERO COLUMN AT ";
+ std::ostringstream returnInfo;
+ returnInfo << info;
+ m_lastError += returnInfo.str();
+ m_info = NumericalIssue;
+ m_factorizationIsOk = false;
+ return;
+ }
+
+ // Update the determinant of the row permutation matrix
+ // FIXME: the following test is not correct, we should probably take iperm_c into account and pivrow is not directly the row pivot.
+ if (pivrow != jj) m_detPermR = -m_detPermR;
+
+ // Prune columns (0:jj-1) using column jj
+ Base::pruneL(jj, m_perm_r.indices(), pivrow, nseg, segrep, repfnz_k, xprune, m_glu);
+
+ // Reset repfnz for this column
+ for (i = 0; i < nseg; i++)
+ {
+ irep = segrep(i);
+ repfnz_k(irep) = emptyIdxLU;
+ }
+ } // end SparseLU within the panel
+ jcol += panel_size; // Move to the next panel
+ } // end for -- end elimination
+
+ m_detPermR = m_perm_r.determinant();
+ m_detPermC = m_perm_c.determinant();
+
+ // Count the number of nonzeros in factors
+ Base::countnz(n, m_nnzL, m_nnzU, m_glu);
+ // Apply permutation to the L subscripts
+ Base::fixupL(n, m_perm_r.indices(), m_glu);
+
+ // Create supernode matrix L
+ m_Lstore.setInfos(m, n, m_glu.lusup, m_glu.xlusup, m_glu.lsub, m_glu.xlsub, m_glu.supno, m_glu.xsup);
+ // Create the column major upper sparse matrix U;
+ new (&m_Ustore) MappedSparseMatrix<Scalar, ColMajor, StorageIndex> ( m, n, m_nnzU, m_glu.xusub.data(), m_glu.usub.data(), m_glu.ucol.data() );
+
+ m_info = Success;
+ m_factorizationIsOk = true;
+}
+
+template<typename MappedSupernodalType>
+struct SparseLUMatrixLReturnType : internal::no_assignment_operator
+{
+ typedef typename MappedSupernodalType::Scalar Scalar;
+ explicit SparseLUMatrixLReturnType(const MappedSupernodalType& mapL) : m_mapL(mapL)
+ { }
+ Index rows() { return m_mapL.rows(); }
+ Index cols() { return m_mapL.cols(); }
+ template<typename Dest>
+ void solveInPlace( MatrixBase<Dest> &X) const
+ {
+ m_mapL.solveInPlace(X);
+ }
+ const MappedSupernodalType& m_mapL;
+};
+
+template<typename MatrixLType, typename MatrixUType>
+struct SparseLUMatrixUReturnType : internal::no_assignment_operator
+{
+ typedef typename MatrixLType::Scalar Scalar;
+ SparseLUMatrixUReturnType(const MatrixLType& mapL, const MatrixUType& mapU)
+ : m_mapL(mapL),m_mapU(mapU)
+ { }
+ Index rows() { return m_mapL.rows(); }
+ Index cols() { return m_mapL.cols(); }
+
+ template<typename Dest> void solveInPlace(MatrixBase<Dest> &X) const
+ {
+ Index nrhs = X.cols();
+ Index n = X.rows();
+ // Backward solve with U
+ for (Index k = m_mapL.nsuper(); k >= 0; k--)
+ {
+ Index fsupc = m_mapL.supToCol()[k];
+ Index lda = m_mapL.colIndexPtr()[fsupc+1] - m_mapL.colIndexPtr()[fsupc]; // leading dimension
+ Index nsupc = m_mapL.supToCol()[k+1] - fsupc;
+ Index luptr = m_mapL.colIndexPtr()[fsupc];
+
+ if (nsupc == 1)
+ {
+ for (Index j = 0; j < nrhs; j++)
+ {
+ X(fsupc, j) /= m_mapL.valuePtr()[luptr];
+ }
+ }
+ else
+ {
+ Map<const Matrix<Scalar,Dynamic,Dynamic, ColMajor>, 0, OuterStride<> > A( &(m_mapL.valuePtr()[luptr]), nsupc, nsupc, OuterStride<>(lda) );
+ Map< Matrix<Scalar,Dynamic,Dest::ColsAtCompileTime, ColMajor>, 0, OuterStride<> > U (&(X(fsupc,0)), nsupc, nrhs, OuterStride<>(n) );
+ U = A.template triangularView<Upper>().solve(U);
+ }
+
+ for (Index j = 0; j < nrhs; ++j)
+ {
+ for (Index jcol = fsupc; jcol < fsupc + nsupc; jcol++)
+ {
+ typename MatrixUType::InnerIterator it(m_mapU, jcol);
+ for ( ; it; ++it)
+ {
+ Index irow = it.index();
+ X(irow, j) -= X(jcol, j) * it.value();
+ }
+ }
+ }
+ } // End For U-solve
+ }
+ const MatrixLType& m_mapL;
+ const MatrixUType& m_mapU;
+};
+
+} // End namespace Eigen
+
+#endif
diff --git a/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLUImpl.h b/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLUImpl.h
new file mode 100644
index 000000000..fc0cfc4de
--- /dev/null
+++ b/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLUImpl.h
@@ -0,0 +1,66 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#ifndef SPARSELU_IMPL_H
+#define SPARSELU_IMPL_H
+
+namespace Eigen {
+namespace internal {
+
+/** \ingroup SparseLU_Module
+ * \class SparseLUImpl
+ * Base class for sparseLU
+ */
+template <typename Scalar, typename StorageIndex>
+class SparseLUImpl
+{
+ public:
+ typedef Matrix<Scalar,Dynamic,1> ScalarVector;
+ typedef Matrix<StorageIndex,Dynamic,1> IndexVector;
+ typedef Matrix<Scalar,Dynamic,Dynamic,ColMajor> ScalarMatrix;
+ typedef Map<ScalarMatrix, 0, OuterStride<> > MappedMatrixBlock;
+ typedef typename ScalarVector::RealScalar RealScalar;
+ typedef Ref<Matrix<Scalar,Dynamic,1> > BlockScalarVector;
+ typedef Ref<Matrix<StorageIndex,Dynamic,1> > BlockIndexVector;
+ typedef LU_GlobalLU_t<IndexVector, ScalarVector> GlobalLU_t;
+ typedef SparseMatrix<Scalar,ColMajor,StorageIndex> MatrixType;
+
+ protected:
+ template <typename VectorType>
+ Index expand(VectorType& vec, Index& length, Index nbElts, Index keep_prev, Index& num_expansions);
+ Index memInit(Index m, Index n, Index annz, Index lwork, Index fillratio, Index panel_size, GlobalLU_t& glu);
+ template <typename VectorType>
+ Index memXpand(VectorType& vec, Index& maxlen, Index nbElts, MemType memtype, Index& num_expansions);
+ void heap_relax_snode (const Index n, IndexVector& et, const Index relax_columns, IndexVector& descendants, IndexVector& relax_end);
+ void relax_snode (const Index n, IndexVector& et, const Index relax_columns, IndexVector& descendants, IndexVector& relax_end);
+ Index snode_dfs(const Index jcol, const Index kcol,const MatrixType& mat, IndexVector& xprune, IndexVector& marker, GlobalLU_t& glu);
+ Index snode_bmod (const Index jcol, const Index fsupc, ScalarVector& dense, GlobalLU_t& glu);
+ Index pivotL(const Index jcol, const RealScalar& diagpivotthresh, IndexVector& perm_r, IndexVector& iperm_c, Index& pivrow, GlobalLU_t& glu);
+ template <typename Traits>
+ void dfs_kernel(const StorageIndex jj, IndexVector& perm_r,
+ Index& nseg, IndexVector& panel_lsub, IndexVector& segrep,
+ Ref<IndexVector> repfnz_col, IndexVector& xprune, Ref<IndexVector> marker, IndexVector& parent,
+ IndexVector& xplore, GlobalLU_t& glu, Index& nextl_col, Index krow, Traits& traits);
+ void panel_dfs(const Index m, const Index w, const Index jcol, MatrixType& A, IndexVector& perm_r, Index& nseg, ScalarVector& dense, IndexVector& panel_lsub, IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu);
+
+ void panel_bmod(const Index m, const Index w, const Index jcol, const Index nseg, ScalarVector& dense, ScalarVector& tempv, IndexVector& segrep, IndexVector& repfnz, GlobalLU_t& glu);
+ Index column_dfs(const Index m, const Index jcol, IndexVector& perm_r, Index maxsuper, Index& nseg, BlockIndexVector lsub_col, IndexVector& segrep, BlockIndexVector repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu);
+ Index column_bmod(const Index jcol, const Index nseg, BlockScalarVector dense, ScalarVector& tempv, BlockIndexVector segrep, BlockIndexVector repfnz, Index fpanelc, GlobalLU_t& glu);
+ Index copy_to_ucol(const Index jcol, const Index nseg, IndexVector& segrep, BlockIndexVector repfnz ,IndexVector& perm_r, BlockScalarVector dense, GlobalLU_t& glu);
+ void pruneL(const Index jcol, const IndexVector& perm_r, const Index pivrow, const Index nseg, const IndexVector& segrep, BlockIndexVector repfnz, IndexVector& xprune, GlobalLU_t& glu);
+ void countnz(const Index n, Index& nnzL, Index& nnzU, GlobalLU_t& glu);
+ void fixupL(const Index n, const IndexVector& perm_r, GlobalLU_t& glu);
+
+ template<typename , typename >
+ friend struct column_dfs_traits;
+};
+
+} // end namespace internal
+} // namespace Eigen
+
+#endif
diff --git a/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_Memory.h b/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_Memory.h
new file mode 100644
index 000000000..4dc42e87b
--- /dev/null
+++ b/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_Memory.h
@@ -0,0 +1,226 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/*
+
+ * NOTE: This file is the modified version of [s,d,c,z]memory.c files in SuperLU
+
+ * -- SuperLU routine (version 3.1) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * August 1, 2008
+ *
+ * Copyright (c) 1994 by Xerox Corporation. All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+ *
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ */
+
+#ifndef EIGEN_SPARSELU_MEMORY
+#define EIGEN_SPARSELU_MEMORY
+
+namespace Eigen {
+namespace internal {
+
+enum { LUNoMarker = 3 };
+enum {emptyIdxLU = -1};
+inline Index LUnumTempV(Index& m, Index& w, Index& t, Index& b)
+{
+ return (std::max)(m, (t+b)*w);
+}
+
+template< typename Scalar>
+inline Index LUTempSpace(Index&m, Index& w)
+{
+ return (2*w + 4 + LUNoMarker) * m * sizeof(Index) + (w + 1) * m * sizeof(Scalar);
+}
+
+
+
+
+/**
+ * Expand the existing storage to accomodate more fill-ins
+ * \param vec Valid pointer to the vector to allocate or expand
+ * \param[in,out] length At input, contain the current length of the vector that is to be increased. At output, length of the newly allocated vector
+ * \param[in] nbElts Current number of elements in the factors
+ * \param keep_prev 1: use length and do not expand the vector; 0: compute new_len and expand
+ * \param[in,out] num_expansions Number of times the memory has been expanded
+ */
+template <typename Scalar, typename StorageIndex>
+template <typename VectorType>
+Index SparseLUImpl<Scalar,StorageIndex>::expand(VectorType& vec, Index& length, Index nbElts, Index keep_prev, Index& num_expansions)
+{
+
+ float alpha = 1.5; // Ratio of the memory increase
+ Index new_len; // New size of the allocated memory
+
+ if(num_expansions == 0 || keep_prev)
+ new_len = length ; // First time allocate requested
+ else
+ new_len = (std::max)(length+1,Index(alpha * length));
+
+ VectorType old_vec; // Temporary vector to hold the previous values
+ if (nbElts > 0 )
+ old_vec = vec.segment(0,nbElts);
+
+ //Allocate or expand the current vector
+#ifdef EIGEN_EXCEPTIONS
+ try
+#endif
+ {
+ vec.resize(new_len);
+ }
+#ifdef EIGEN_EXCEPTIONS
+ catch(std::bad_alloc& )
+#else
+ if(!vec.size())
+#endif
+ {
+ if (!num_expansions)
+ {
+ // First time to allocate from LUMemInit()
+ // Let LUMemInit() deals with it.
+ return -1;
+ }
+ if (keep_prev)
+ {
+ // In this case, the memory length should not not be reduced
+ return new_len;
+ }
+ else
+ {
+ // Reduce the size and increase again
+ Index tries = 0; // Number of attempts
+ do
+ {
+ alpha = (alpha + 1)/2;
+ new_len = (std::max)(length+1,Index(alpha * length));
+#ifdef EIGEN_EXCEPTIONS
+ try
+#endif
+ {
+ vec.resize(new_len);
+ }
+#ifdef EIGEN_EXCEPTIONS
+ catch(std::bad_alloc& )
+#else
+ if (!vec.size())
+#endif
+ {
+ tries += 1;
+ if ( tries > 10) return new_len;
+ }
+ } while (!vec.size());
+ }
+ }
+ //Copy the previous values to the newly allocated space
+ if (nbElts > 0)
+ vec.segment(0, nbElts) = old_vec;
+
+
+ length = new_len;
+ if(num_expansions) ++num_expansions;
+ return 0;
+}
+
+/**
+ * \brief Allocate various working space for the numerical factorization phase.
+ * \param m number of rows of the input matrix
+ * \param n number of columns
+ * \param annz number of initial nonzeros in the matrix
+ * \param lwork if lwork=-1, this routine returns an estimated size of the required memory
+ * \param glu persistent data to facilitate multiple factors : will be deleted later ??
+ * \param fillratio estimated ratio of fill in the factors
+ * \param panel_size Size of a panel
+ * \return an estimated size of the required memory if lwork = -1; otherwise, return the size of actually allocated memory when allocation failed, and 0 on success
+ * \note Unlike SuperLU, this routine does not support successive factorization with the same pattern and the same row permutation
+ */
+template <typename Scalar, typename StorageIndex>
+Index SparseLUImpl<Scalar,StorageIndex>::memInit(Index m, Index n, Index annz, Index lwork, Index fillratio, Index panel_size, GlobalLU_t& glu)
+{
+ Index& num_expansions = glu.num_expansions; //No memory expansions so far
+ num_expansions = 0;
+ glu.nzumax = glu.nzlumax = (std::min)(fillratio * (annz+1) / n, m) * n; // estimated number of nonzeros in U
+ glu.nzlmax = (std::max)(Index(4), fillratio) * (annz+1) / 4; // estimated nnz in L factor
+ // Return the estimated size to the user if necessary
+ Index tempSpace;
+ tempSpace = (2*panel_size + 4 + LUNoMarker) * m * sizeof(Index) + (panel_size + 1) * m * sizeof(Scalar);
+ if (lwork == emptyIdxLU)
+ {
+ Index estimated_size;
+ estimated_size = (5 * n + 5) * sizeof(Index) + tempSpace
+ + (glu.nzlmax + glu.nzumax) * sizeof(Index) + (glu.nzlumax+glu.nzumax) * sizeof(Scalar) + n;
+ return estimated_size;
+ }
+
+ // Setup the required space
+
+ // First allocate Integer pointers for L\U factors
+ glu.xsup.resize(n+1);
+ glu.supno.resize(n+1);
+ glu.xlsub.resize(n+1);
+ glu.xlusup.resize(n+1);
+ glu.xusub.resize(n+1);
+
+ // Reserve memory for L/U factors
+ do
+ {
+ if( (expand<ScalarVector>(glu.lusup, glu.nzlumax, 0, 0, num_expansions)<0)
+ || (expand<ScalarVector>(glu.ucol, glu.nzumax, 0, 0, num_expansions)<0)
+ || (expand<IndexVector> (glu.lsub, glu.nzlmax, 0, 0, num_expansions)<0)
+ || (expand<IndexVector> (glu.usub, glu.nzumax, 0, 1, num_expansions)<0) )
+ {
+ //Reduce the estimated size and retry
+ glu.nzlumax /= 2;
+ glu.nzumax /= 2;
+ glu.nzlmax /= 2;
+ if (glu.nzlumax < annz ) return glu.nzlumax;
+ }
+ } while (!glu.lusup.size() || !glu.ucol.size() || !glu.lsub.size() || !glu.usub.size());
+
+ ++num_expansions;
+ return 0;
+
+} // end LuMemInit
+
+/**
+ * \brief Expand the existing storage
+ * \param vec vector to expand
+ * \param[in,out] maxlen On input, previous size of vec (Number of elements to copy ). on output, new size
+ * \param nbElts current number of elements in the vector.
+ * \param memtype Type of the element to expand
+ * \param num_expansions Number of expansions
+ * \return 0 on success, > 0 size of the memory allocated so far
+ */
+template <typename Scalar, typename StorageIndex>
+template <typename VectorType>
+Index SparseLUImpl<Scalar,StorageIndex>::memXpand(VectorType& vec, Index& maxlen, Index nbElts, MemType memtype, Index& num_expansions)
+{
+ Index failed_size;
+ if (memtype == USUB)
+ failed_size = this->expand<VectorType>(vec, maxlen, nbElts, 1, num_expansions);
+ else
+ failed_size = this->expand<VectorType>(vec, maxlen, nbElts, 0, num_expansions);
+
+ if (failed_size)
+ return failed_size;
+
+ return 0 ;
+}
+
+} // end namespace internal
+
+} // end namespace Eigen
+#endif // EIGEN_SPARSELU_MEMORY
diff --git a/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_Structs.h b/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_Structs.h
new file mode 100644
index 000000000..cf5ec449b
--- /dev/null
+++ b/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_Structs.h
@@ -0,0 +1,110 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/*
+ * NOTE: This file comes from a partly modified version of files slu_[s,d,c,z]defs.h
+ * -- SuperLU routine (version 4.1) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * November, 2010
+ *
+ * Global data structures used in LU factorization -
+ *
+ * nsuper: #supernodes = nsuper + 1, numbered [0, nsuper].
+ * (xsup,supno): supno[i] is the supernode no to which i belongs;
+ * xsup(s) points to the beginning of the s-th supernode.
+ * e.g. supno 0 1 2 2 3 3 3 4 4 4 4 4 (n=12)
+ * xsup 0 1 2 4 7 12
+ * Note: dfs will be performed on supernode rep. relative to the new
+ * row pivoting ordering
+ *
+ * (xlsub,lsub): lsub[*] contains the compressed subscript of
+ * rectangular supernodes; xlsub[j] points to the starting
+ * location of the j-th column in lsub[*]. Note that xlsub
+ * is indexed by column.
+ * Storage: original row subscripts
+ *
+ * During the course of sparse LU factorization, we also use
+ * (xlsub,lsub) for the purpose of symmetric pruning. For each
+ * supernode {s,s+1,...,t=s+r} with first column s and last
+ * column t, the subscript set
+ * lsub[j], j=xlsub[s], .., xlsub[s+1]-1
+ * is the structure of column s (i.e. structure of this supernode).
+ * It is used for the storage of numerical values.
+ * Furthermore,
+ * lsub[j], j=xlsub[t], .., xlsub[t+1]-1
+ * is the structure of the last column t of this supernode.
+ * It is for the purpose of symmetric pruning. Therefore, the
+ * structural subscripts can be rearranged without making physical
+ * interchanges among the numerical values.
+ *
+ * However, if the supernode has only one column, then we
+ * only keep one set of subscripts. For any subscript interchange
+ * performed, similar interchange must be done on the numerical
+ * values.
+ *
+ * The last column structures (for pruning) will be removed
+ * after the numercial LU factorization phase.
+ *
+ * (xlusup,lusup): lusup[*] contains the numerical values of the
+ * rectangular supernodes; xlusup[j] points to the starting
+ * location of the j-th column in storage vector lusup[*]
+ * Note: xlusup is indexed by column.
+ * Each rectangular supernode is stored by column-major
+ * scheme, consistent with Fortran 2-dim array storage.
+ *
+ * (xusub,ucol,usub): ucol[*] stores the numerical values of
+ * U-columns outside the rectangular supernodes. The row
+ * subscript of nonzero ucol[k] is stored in usub[k].
+ * xusub[i] points to the starting location of column i in ucol.
+ * Storage: new row subscripts; that is subscripts of PA.
+ */
+
+#ifndef EIGEN_LU_STRUCTS
+#define EIGEN_LU_STRUCTS
+namespace Eigen {
+namespace internal {
+
+typedef enum {LUSUP, UCOL, LSUB, USUB, LLVL, ULVL} MemType;
+
+template <typename IndexVector, typename ScalarVector>
+struct LU_GlobalLU_t {
+ typedef typename IndexVector::Scalar StorageIndex;
+ IndexVector xsup; //First supernode column ... xsup(s) points to the beginning of the s-th supernode
+ IndexVector supno; // Supernode number corresponding to this column (column to supernode mapping)
+ ScalarVector lusup; // nonzero values of L ordered by columns
+ IndexVector lsub; // Compressed row indices of L rectangular supernodes.
+ IndexVector xlusup; // pointers to the beginning of each column in lusup
+ IndexVector xlsub; // pointers to the beginning of each column in lsub
+ Index nzlmax; // Current max size of lsub
+ Index nzlumax; // Current max size of lusup
+ ScalarVector ucol; // nonzero values of U ordered by columns
+ IndexVector usub; // row indices of U columns in ucol
+ IndexVector xusub; // Pointers to the beginning of each column of U in ucol
+ Index nzumax; // Current max size of ucol
+ Index n; // Number of columns in the matrix
+ Index num_expansions;
+};
+
+// Values to set for performance
+struct perfvalues {
+ Index panel_size; // a panel consists of at most <panel_size> consecutive columns
+ Index relax; // To control degree of relaxing supernodes. If the number of nodes (columns)
+ // in a subtree of the elimination tree is less than relax, this subtree is considered
+ // as one supernode regardless of the row structures of those columns
+ Index maxsuper; // The maximum size for a supernode in complete LU
+ Index rowblk; // The minimum row dimension for 2-D blocking to be used;
+ Index colblk; // The minimum column dimension for 2-D blocking to be used;
+ Index fillfactor; // The estimated fills factors for L and U, compared with A
+};
+
+} // end namespace internal
+
+} // end namespace Eigen
+#endif // EIGEN_LU_STRUCTS
diff --git a/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h b/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h
new file mode 100644
index 000000000..721e1883b
--- /dev/null
+++ b/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h
@@ -0,0 +1,301 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
+// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_SPARSELU_SUPERNODAL_MATRIX_H
+#define EIGEN_SPARSELU_SUPERNODAL_MATRIX_H
+
+namespace Eigen {
+namespace internal {
+
+/** \ingroup SparseLU_Module
+ * \brief a class to manipulate the L supernodal factor from the SparseLU factorization
+ *
+ * This class contain the data to easily store
+ * and manipulate the supernodes during the factorization and solution phase of Sparse LU.
+ * Only the lower triangular matrix has supernodes.
+ *
+ * NOTE : This class corresponds to the SCformat structure in SuperLU
+ *
+ */
+/* TODO
+ * InnerIterator as for sparsematrix
+ * SuperInnerIterator to iterate through all supernodes
+ * Function for triangular solve
+ */
+template <typename _Scalar, typename _StorageIndex>
+class MappedSuperNodalMatrix
+{
+ public:
+ typedef _Scalar Scalar;
+ typedef _StorageIndex StorageIndex;
+ typedef Matrix<StorageIndex,Dynamic,1> IndexVector;
+ typedef Matrix<Scalar,Dynamic,1> ScalarVector;
+ public:
+ MappedSuperNodalMatrix()
+ {
+
+ }
+ MappedSuperNodalMatrix(Index m, Index n, ScalarVector& nzval, IndexVector& nzval_colptr, IndexVector& rowind,
+ IndexVector& rowind_colptr, IndexVector& col_to_sup, IndexVector& sup_to_col )
+ {
+ setInfos(m, n, nzval, nzval_colptr, rowind, rowind_colptr, col_to_sup, sup_to_col);
+ }
+
+ ~MappedSuperNodalMatrix()
+ {
+
+ }
+ /**
+ * Set appropriate pointers for the lower triangular supernodal matrix
+ * These infos are available at the end of the numerical factorization
+ * FIXME This class will be modified such that it can be use in the course
+ * of the factorization.
+ */
+ void setInfos(Index m, Index n, ScalarVector& nzval, IndexVector& nzval_colptr, IndexVector& rowind,
+ IndexVector& rowind_colptr, IndexVector& col_to_sup, IndexVector& sup_to_col )
+ {
+ m_row = m;
+ m_col = n;
+ m_nzval = nzval.data();
+ m_nzval_colptr = nzval_colptr.data();
+ m_rowind = rowind.data();
+ m_rowind_colptr = rowind_colptr.data();
+ m_nsuper = col_to_sup(n);
+ m_col_to_sup = col_to_sup.data();
+ m_sup_to_col = sup_to_col.data();
+ }
+
+ /**
+ * Number of rows
+ */
+ Index rows() { return m_row; }
+
+ /**
+ * Number of columns
+ */
+ Index cols() { return m_col; }
+
+ /**
+ * Return the array of nonzero values packed by column
+ *
+ * The size is nnz
+ */
+ Scalar* valuePtr() { return m_nzval; }
+
+ const Scalar* valuePtr() const
+ {
+ return m_nzval;
+ }
+ /**
+ * Return the pointers to the beginning of each column in \ref valuePtr()
+ */
+ StorageIndex* colIndexPtr()
+ {
+ return m_nzval_colptr;
+ }
+
+ const StorageIndex* colIndexPtr() const
+ {
+ return m_nzval_colptr;
+ }
+
+ /**
+ * Return the array of compressed row indices of all supernodes
+ */
+ StorageIndex* rowIndex() { return m_rowind; }
+
+ const StorageIndex* rowIndex() const
+ {
+ return m_rowind;
+ }
+
+ /**
+ * Return the location in \em rowvaluePtr() which starts each column
+ */
+ StorageIndex* rowIndexPtr() { return m_rowind_colptr; }
+
+ const StorageIndex* rowIndexPtr() const
+ {
+ return m_rowind_colptr;
+ }
+
+ /**
+ * Return the array of column-to-supernode mapping
+ */
+ StorageIndex* colToSup() { return m_col_to_sup; }
+
+ const StorageIndex* colToSup() const
+ {
+ return m_col_to_sup;
+ }
+ /**
+ * Return the array of supernode-to-column mapping
+ */
+ StorageIndex* supToCol() { return m_sup_to_col; }
+
+ const StorageIndex* supToCol() const
+ {
+ return m_sup_to_col;
+ }
+
+ /**
+ * Return the number of supernodes
+ */
+ Index nsuper() const
+ {
+ return m_nsuper;
+ }
+
+ class InnerIterator;
+ template<typename Dest>
+ void solveInPlace( MatrixBase<Dest>&X) const;
+
+
+
+
+ protected:
+ Index m_row; // Number of rows
+ Index m_col; // Number of columns
+ Index m_nsuper; // Number of supernodes
+ Scalar* m_nzval; //array of nonzero values packed by column
+ StorageIndex* m_nzval_colptr; //nzval_colptr[j] Stores the location in nzval[] which starts column j
+ StorageIndex* m_rowind; // Array of compressed row indices of rectangular supernodes
+ StorageIndex* m_rowind_colptr; //rowind_colptr[j] stores the location in rowind[] which starts column j
+ StorageIndex* m_col_to_sup; // col_to_sup[j] is the supernode number to which column j belongs
+ StorageIndex* m_sup_to_col; //sup_to_col[s] points to the starting column of the s-th supernode
+
+ private :
+};
+
+/**
+ * \brief InnerIterator class to iterate over nonzero values of the current column in the supernodal matrix L
+ *
+ */
+template<typename Scalar, typename StorageIndex>
+class MappedSuperNodalMatrix<Scalar,StorageIndex>::InnerIterator
+{
+ public:
+ InnerIterator(const MappedSuperNodalMatrix& mat, Index outer)
+ : m_matrix(mat),
+ m_outer(outer),
+ m_supno(mat.colToSup()[outer]),
+ m_idval(mat.colIndexPtr()[outer]),
+ m_startidval(m_idval),
+ m_endidval(mat.colIndexPtr()[outer+1]),
+ m_idrow(mat.rowIndexPtr()[mat.supToCol()[mat.colToSup()[outer]]]),
+ m_endidrow(mat.rowIndexPtr()[mat.supToCol()[mat.colToSup()[outer]]+1])
+ {}
+ inline InnerIterator& operator++()
+ {
+ m_idval++;
+ m_idrow++;
+ return *this;
+ }
+ inline Scalar value() const { return m_matrix.valuePtr()[m_idval]; }
+
+ inline Scalar& valueRef() { return const_cast<Scalar&>(m_matrix.valuePtr()[m_idval]); }
+
+ inline Index index() const { return m_matrix.rowIndex()[m_idrow]; }
+ inline Index row() const { return index(); }
+ inline Index col() const { return m_outer; }
+
+ inline Index supIndex() const { return m_supno; }
+
+ inline operator bool() const
+ {
+ return ( (m_idval < m_endidval) && (m_idval >= m_startidval)
+ && (m_idrow < m_endidrow) );
+ }
+
+ protected:
+ const MappedSuperNodalMatrix& m_matrix; // Supernodal lower triangular matrix
+ const Index m_outer; // Current column
+ const Index m_supno; // Current SuperNode number
+ Index m_idval; // Index to browse the values in the current column
+ const Index m_startidval; // Start of the column value
+ const Index m_endidval; // End of the column value
+ Index m_idrow; // Index to browse the row indices
+ Index m_endidrow; // End index of row indices of the current column
+};
+
+/**
+ * \brief Solve with the supernode triangular matrix
+ *
+ */
+template<typename Scalar, typename Index_>
+template<typename Dest>
+void MappedSuperNodalMatrix<Scalar,Index_>::solveInPlace( MatrixBase<Dest>&X) const
+{
+ /* Explicit type conversion as the Index type of MatrixBase<Dest> may be wider than Index */
+// eigen_assert(X.rows() <= NumTraits<Index>::highest());
+// eigen_assert(X.cols() <= NumTraits<Index>::highest());
+ Index n = int(X.rows());
+ Index nrhs = Index(X.cols());
+ const Scalar * Lval = valuePtr(); // Nonzero values
+ Matrix<Scalar,Dynamic,Dest::ColsAtCompileTime, ColMajor> work(n, nrhs); // working vector
+ work.setZero();
+ for (Index k = 0; k <= nsuper(); k ++)
+ {
+ Index fsupc = supToCol()[k]; // First column of the current supernode
+ Index istart = rowIndexPtr()[fsupc]; // Pointer index to the subscript of the current column
+ Index nsupr = rowIndexPtr()[fsupc+1] - istart; // Number of rows in the current supernode
+ Index nsupc = supToCol()[k+1] - fsupc; // Number of columns in the current supernode
+ Index nrow = nsupr - nsupc; // Number of rows in the non-diagonal part of the supernode
+ Index irow; //Current index row
+
+ if (nsupc == 1 )
+ {
+ for (Index j = 0; j < nrhs; j++)
+ {
+ InnerIterator it(*this, fsupc);
+ ++it; // Skip the diagonal element
+ for (; it; ++it)
+ {
+ irow = it.row();
+ X(irow, j) -= X(fsupc, j) * it.value();
+ }
+ }
+ }
+ else
+ {
+ // The supernode has more than one column
+ Index luptr = colIndexPtr()[fsupc];
+ Index lda = colIndexPtr()[fsupc+1] - luptr;
+
+ // Triangular solve
+ Map<const Matrix<Scalar,Dynamic,Dynamic, ColMajor>, 0, OuterStride<> > A( &(Lval[luptr]), nsupc, nsupc, OuterStride<>(lda) );
+ Map< Matrix<Scalar,Dynamic,Dest::ColsAtCompileTime, ColMajor>, 0, OuterStride<> > U (&(X(fsupc,0)), nsupc, nrhs, OuterStride<>(n) );
+ U = A.template triangularView<UnitLower>().solve(U);
+
+ // Matrix-vector product
+ new (&A) Map<const Matrix<Scalar,Dynamic,Dynamic, ColMajor>, 0, OuterStride<> > ( &(Lval[luptr+nsupc]), nrow, nsupc, OuterStride<>(lda) );
+ work.topRows(nrow).noalias() = A * U;
+
+ //Begin Scatter
+ for (Index j = 0; j < nrhs; j++)
+ {
+ Index iptr = istart + nsupc;
+ for (Index i = 0; i < nrow; i++)
+ {
+ irow = rowIndex()[iptr];
+ X(irow, j) -= work(i, j); // Scatter operation
+ work(i, j) = Scalar(0);
+ iptr++;
+ }
+ }
+ }
+ }
+}
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_SPARSELU_MATRIX_H
diff --git a/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_Utils.h b/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_Utils.h
new file mode 100644
index 000000000..9e3dab44d
--- /dev/null
+++ b/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_Utils.h
@@ -0,0 +1,80 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+#ifndef EIGEN_SPARSELU_UTILS_H
+#define EIGEN_SPARSELU_UTILS_H
+
+namespace Eigen {
+namespace internal {
+
+/**
+ * \brief Count Nonzero elements in the factors
+ */
+template <typename Scalar, typename StorageIndex>
+void SparseLUImpl<Scalar,StorageIndex>::countnz(const Index n, Index& nnzL, Index& nnzU, GlobalLU_t& glu)
+{
+ nnzL = 0;
+ nnzU = (glu.xusub)(n);
+ Index nsuper = (glu.supno)(n);
+ Index jlen;
+ Index i, j, fsupc;
+ if (n <= 0 ) return;
+ // For each supernode
+ for (i = 0; i <= nsuper; i++)
+ {
+ fsupc = glu.xsup(i);
+ jlen = glu.xlsub(fsupc+1) - glu.xlsub(fsupc);
+
+ for (j = fsupc; j < glu.xsup(i+1); j++)
+ {
+ nnzL += jlen;
+ nnzU += j - fsupc + 1;
+ jlen--;
+ }
+ }
+}
+
+/**
+ * \brief Fix up the data storage lsub for L-subscripts.
+ *
+ * It removes the subscripts sets for structural pruning,
+ * and applies permutation to the remaining subscripts
+ *
+ */
+template <typename Scalar, typename StorageIndex>
+void SparseLUImpl<Scalar,StorageIndex>::fixupL(const Index n, const IndexVector& perm_r, GlobalLU_t& glu)
+{
+ Index fsupc, i, j, k, jstart;
+
+ StorageIndex nextl = 0;
+ Index nsuper = (glu.supno)(n);
+
+ // For each supernode
+ for (i = 0; i <= nsuper; i++)
+ {
+ fsupc = glu.xsup(i);
+ jstart = glu.xlsub(fsupc);
+ glu.xlsub(fsupc) = nextl;
+ for (j = jstart; j < glu.xlsub(fsupc + 1); j++)
+ {
+ glu.lsub(nextl) = perm_r(glu.lsub(j)); // Now indexed into P*A
+ nextl++;
+ }
+ for (k = fsupc+1; k < glu.xsup(i+1); k++)
+ glu.xlsub(k) = nextl; // other columns in supernode i
+ }
+
+ glu.xlsub(n) = nextl;
+}
+
+} // end namespace internal
+
+} // end namespace Eigen
+#endif // EIGEN_SPARSELU_UTILS_H
diff --git a/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h b/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h
new file mode 100644
index 000000000..b57f06802
--- /dev/null
+++ b/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h
@@ -0,0 +1,181 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
+// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/*
+
+ * NOTE: This file is the modified version of xcolumn_bmod.c file in SuperLU
+
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ *
+ * Copyright (c) 1994 by Xerox Corporation. All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+ *
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ */
+#ifndef SPARSELU_COLUMN_BMOD_H
+#define SPARSELU_COLUMN_BMOD_H
+
+namespace Eigen {
+
+namespace internal {
+/**
+ * \brief Performs numeric block updates (sup-col) in topological order
+ *
+ * \param jcol current column to update
+ * \param nseg Number of segments in the U part
+ * \param dense Store the full representation of the column
+ * \param tempv working array
+ * \param segrep segment representative ...
+ * \param repfnz ??? First nonzero column in each row ??? ...
+ * \param fpanelc First column in the current panel
+ * \param glu Global LU data.
+ * \return 0 - successful return
+ * > 0 - number of bytes allocated when run out of space
+ *
+ */
+template <typename Scalar, typename StorageIndex>
+Index SparseLUImpl<Scalar,StorageIndex>::column_bmod(const Index jcol, const Index nseg, BlockScalarVector dense, ScalarVector& tempv,
+ BlockIndexVector segrep, BlockIndexVector repfnz, Index fpanelc, GlobalLU_t& glu)
+{
+ Index jsupno, k, ksub, krep, ksupno;
+ Index lptr, nrow, isub, irow, nextlu, new_next, ufirst;
+ Index fsupc, nsupc, nsupr, luptr, kfnz, no_zeros;
+ /* krep = representative of current k-th supernode
+ * fsupc = first supernodal column
+ * nsupc = number of columns in a supernode
+ * nsupr = number of rows in a supernode
+ * luptr = location of supernodal LU-block in storage
+ * kfnz = first nonz in the k-th supernodal segment
+ * no_zeros = no lf leading zeros in a supernodal U-segment
+ */
+
+ jsupno = glu.supno(jcol);
+ // For each nonzero supernode segment of U[*,j] in topological order
+ k = nseg - 1;
+ Index d_fsupc; // distance between the first column of the current panel and the
+ // first column of the current snode
+ Index fst_col; // First column within small LU update
+ Index segsize;
+ for (ksub = 0; ksub < nseg; ksub++)
+ {
+ krep = segrep(k); k--;
+ ksupno = glu.supno(krep);
+ if (jsupno != ksupno )
+ {
+ // outside the rectangular supernode
+ fsupc = glu.xsup(ksupno);
+ fst_col = (std::max)(fsupc, fpanelc);
+
+ // Distance from the current supernode to the current panel;
+ // d_fsupc = 0 if fsupc > fpanelc
+ d_fsupc = fst_col - fsupc;
+
+ luptr = glu.xlusup(fst_col) + d_fsupc;
+ lptr = glu.xlsub(fsupc) + d_fsupc;
+
+ kfnz = repfnz(krep);
+ kfnz = (std::max)(kfnz, fpanelc);
+
+ segsize = krep - kfnz + 1;
+ nsupc = krep - fst_col + 1;
+ nsupr = glu.xlsub(fsupc+1) - glu.xlsub(fsupc);
+ nrow = nsupr - d_fsupc - nsupc;
+ Index lda = glu.xlusup(fst_col+1) - glu.xlusup(fst_col);
+
+
+ // Perform a triangular solver and block update,
+ // then scatter the result of sup-col update to dense
+ no_zeros = kfnz - fst_col;
+ if(segsize==1)
+ LU_kernel_bmod<1>::run(segsize, dense, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros);
+ else
+ LU_kernel_bmod<Dynamic>::run(segsize, dense, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros);
+ } // end if jsupno
+ } // end for each segment
+
+ // Process the supernodal portion of L\U[*,j]
+ nextlu = glu.xlusup(jcol);
+ fsupc = glu.xsup(jsupno);
+
+ // copy the SPA dense into L\U[*,j]
+ Index mem;
+ new_next = nextlu + glu.xlsub(fsupc + 1) - glu.xlsub(fsupc);
+ Index offset = internal::first_multiple<Index>(new_next, internal::packet_traits<Scalar>::size) - new_next;
+ if(offset)
+ new_next += offset;
+ while (new_next > glu.nzlumax )
+ {
+ mem = memXpand<ScalarVector>(glu.lusup, glu.nzlumax, nextlu, LUSUP, glu.num_expansions);
+ if (mem) return mem;
+ }
+
+ for (isub = glu.xlsub(fsupc); isub < glu.xlsub(fsupc+1); isub++)
+ {
+ irow = glu.lsub(isub);
+ glu.lusup(nextlu) = dense(irow);
+ dense(irow) = Scalar(0.0);
+ ++nextlu;
+ }
+
+ if(offset)
+ {
+ glu.lusup.segment(nextlu,offset).setZero();
+ nextlu += offset;
+ }
+ glu.xlusup(jcol + 1) = StorageIndex(nextlu); // close L\U(*,jcol);
+
+ /* For more updates within the panel (also within the current supernode),
+ * should start from the first column of the panel, or the first column
+ * of the supernode, whichever is bigger. There are two cases:
+ * 1) fsupc < fpanelc, then fst_col <-- fpanelc
+ * 2) fsupc >= fpanelc, then fst_col <-- fsupc
+ */
+ fst_col = (std::max)(fsupc, fpanelc);
+
+ if (fst_col < jcol)
+ {
+ // Distance between the current supernode and the current panel
+ // d_fsupc = 0 if fsupc >= fpanelc
+ d_fsupc = fst_col - fsupc;
+
+ lptr = glu.xlsub(fsupc) + d_fsupc;
+ luptr = glu.xlusup(fst_col) + d_fsupc;
+ nsupr = glu.xlsub(fsupc+1) - glu.xlsub(fsupc); // leading dimension
+ nsupc = jcol - fst_col; // excluding jcol
+ nrow = nsupr - d_fsupc - nsupc;
+
+ // points to the beginning of jcol in snode L\U(jsupno)
+ ufirst = glu.xlusup(jcol) + d_fsupc;
+ Index lda = glu.xlusup(jcol+1) - glu.xlusup(jcol);
+ MappedMatrixBlock A( &(glu.lusup.data()[luptr]), nsupc, nsupc, OuterStride<>(lda) );
+ VectorBlock<ScalarVector> u(glu.lusup, ufirst, nsupc);
+ u = A.template triangularView<UnitLower>().solve(u);
+
+ new (&A) MappedMatrixBlock ( &(glu.lusup.data()[luptr+nsupc]), nrow, nsupc, OuterStride<>(lda) );
+ VectorBlock<ScalarVector> l(glu.lusup, ufirst+nsupc, nrow);
+ l.noalias() -= A * u;
+
+ } // End if fst_col
+ return 0;
+}
+
+} // end namespace internal
+} // end namespace Eigen
+
+#endif // SPARSELU_COLUMN_BMOD_H
diff --git a/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h b/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h
new file mode 100644
index 000000000..c98b30e32
--- /dev/null
+++ b/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h
@@ -0,0 +1,179 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/*
+
+ * NOTE: This file is the modified version of [s,d,c,z]column_dfs.c file in SuperLU
+
+ * -- SuperLU routine (version 2.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * November 15, 1997
+ *
+ * Copyright (c) 1994 by Xerox Corporation. All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+ *
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ */
+#ifndef SPARSELU_COLUMN_DFS_H
+#define SPARSELU_COLUMN_DFS_H
+
+template <typename Scalar, typename StorageIndex> class SparseLUImpl;
+namespace Eigen {
+
+namespace internal {
+
+template<typename IndexVector, typename ScalarVector>
+struct column_dfs_traits : no_assignment_operator
+{
+ typedef typename ScalarVector::Scalar Scalar;
+ typedef typename IndexVector::Scalar StorageIndex;
+ column_dfs_traits(Index jcol, Index& jsuper, typename SparseLUImpl<Scalar, StorageIndex>::GlobalLU_t& glu, SparseLUImpl<Scalar, StorageIndex>& luImpl)
+ : m_jcol(jcol), m_jsuper_ref(jsuper), m_glu(glu), m_luImpl(luImpl)
+ {}
+ bool update_segrep(Index /*krep*/, Index /*jj*/)
+ {
+ return true;
+ }
+ void mem_expand(IndexVector& lsub, Index& nextl, Index chmark)
+ {
+ if (nextl >= m_glu.nzlmax)
+ m_luImpl.memXpand(lsub, m_glu.nzlmax, nextl, LSUB, m_glu.num_expansions);
+ if (chmark != (m_jcol-1)) m_jsuper_ref = emptyIdxLU;
+ }
+ enum { ExpandMem = true };
+
+ Index m_jcol;
+ Index& m_jsuper_ref;
+ typename SparseLUImpl<Scalar, StorageIndex>::GlobalLU_t& m_glu;
+ SparseLUImpl<Scalar, StorageIndex>& m_luImpl;
+};
+
+
+/**
+ * \brief Performs a symbolic factorization on column jcol and decide the supernode boundary
+ *
+ * A supernode representative is the last column of a supernode.
+ * The nonzeros in U[*,j] are segments that end at supernodes representatives.
+ * The routine returns a list of the supernodal representatives
+ * in topological order of the dfs that generates them.
+ * The location of the first nonzero in each supernodal segment
+ * (supernodal entry location) is also returned.
+ *
+ * \param m number of rows in the matrix
+ * \param jcol Current column
+ * \param perm_r Row permutation
+ * \param maxsuper Maximum number of column allowed in a supernode
+ * \param [in,out] nseg Number of segments in current U[*,j] - new segments appended
+ * \param lsub_col defines the rhs vector to start the dfs
+ * \param [in,out] segrep Segment representatives - new segments appended
+ * \param repfnz First nonzero location in each row
+ * \param xprune
+ * \param marker marker[i] == jj, if i was visited during dfs of current column jj;
+ * \param parent
+ * \param xplore working array
+ * \param glu global LU data
+ * \return 0 success
+ * > 0 number of bytes allocated when run out of space
+ *
+ */
+template <typename Scalar, typename StorageIndex>
+Index SparseLUImpl<Scalar,StorageIndex>::column_dfs(const Index m, const Index jcol, IndexVector& perm_r, Index maxsuper, Index& nseg,
+ BlockIndexVector lsub_col, IndexVector& segrep, BlockIndexVector repfnz, IndexVector& xprune,
+ IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu)
+{
+
+ Index jsuper = glu.supno(jcol);
+ Index nextl = glu.xlsub(jcol);
+ VectorBlock<IndexVector> marker2(marker, 2*m, m);
+
+
+ column_dfs_traits<IndexVector, ScalarVector> traits(jcol, jsuper, glu, *this);
+
+ // For each nonzero in A(*,jcol) do dfs
+ for (Index k = 0; ((k < m) ? lsub_col[k] != emptyIdxLU : false) ; k++)
+ {
+ Index krow = lsub_col(k);
+ lsub_col(k) = emptyIdxLU;
+ Index kmark = marker2(krow);
+
+ // krow was visited before, go to the next nonz;
+ if (kmark == jcol) continue;
+
+ dfs_kernel(StorageIndex(jcol), perm_r, nseg, glu.lsub, segrep, repfnz, xprune, marker2, parent,
+ xplore, glu, nextl, krow, traits);
+ } // for each nonzero ...
+
+ Index fsupc;
+ StorageIndex nsuper = glu.supno(jcol);
+ StorageIndex jcolp1 = StorageIndex(jcol) + 1;
+ Index jcolm1 = jcol - 1;
+
+ // check to see if j belongs in the same supernode as j-1
+ if ( jcol == 0 )
+ { // Do nothing for column 0
+ nsuper = glu.supno(0) = 0 ;
+ }
+ else
+ {
+ fsupc = glu.xsup(nsuper);
+ StorageIndex jptr = glu.xlsub(jcol); // Not yet compressed
+ StorageIndex jm1ptr = glu.xlsub(jcolm1);
+
+ // Use supernodes of type T2 : see SuperLU paper
+ if ( (nextl-jptr != jptr-jm1ptr-1) ) jsuper = emptyIdxLU;
+
+ // Make sure the number of columns in a supernode doesn't
+ // exceed threshold
+ if ( (jcol - fsupc) >= maxsuper) jsuper = emptyIdxLU;
+
+ /* If jcol starts a new supernode, reclaim storage space in
+ * glu.lsub from previous supernode. Note we only store
+ * the subscript set of the first and last columns of
+ * a supernode. (first for num values, last for pruning)
+ */
+ if (jsuper == emptyIdxLU)
+ { // starts a new supernode
+ if ( (fsupc < jcolm1-1) )
+ { // >= 3 columns in nsuper
+ StorageIndex ito = glu.xlsub(fsupc+1);
+ glu.xlsub(jcolm1) = ito;
+ StorageIndex istop = ito + jptr - jm1ptr;
+ xprune(jcolm1) = istop; // intialize xprune(jcol-1)
+ glu.xlsub(jcol) = istop;
+
+ for (StorageIndex ifrom = jm1ptr; ifrom < nextl; ++ifrom, ++ito)
+ glu.lsub(ito) = glu.lsub(ifrom);
+ nextl = ito; // = istop + length(jcol)
+ }
+ nsuper++;
+ glu.supno(jcol) = nsuper;
+ } // if a new supernode
+ } // end else: jcol > 0
+
+ // Tidy up the pointers before exit
+ glu.xsup(nsuper+1) = jcolp1;
+ glu.supno(jcolp1) = nsuper;
+ xprune(jcol) = StorageIndex(nextl); // Intialize upper bound for pruning
+ glu.xlsub(jcolp1) = StorageIndex(nextl);
+
+ return 0;
+}
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif
diff --git a/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h b/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h
new file mode 100644
index 000000000..c32d8d8b1
--- /dev/null
+++ b/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h
@@ -0,0 +1,107 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+/*
+
+ * NOTE: This file is the modified version of [s,d,c,z]copy_to_ucol.c file in SuperLU
+
+ * -- SuperLU routine (version 2.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * November 15, 1997
+ *
+ * Copyright (c) 1994 by Xerox Corporation. All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+ *
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ */
+#ifndef SPARSELU_COPY_TO_UCOL_H
+#define SPARSELU_COPY_TO_UCOL_H
+
+namespace Eigen {
+namespace internal {
+
+/**
+ * \brief Performs numeric block updates (sup-col) in topological order
+ *
+ * \param jcol current column to update
+ * \param nseg Number of segments in the U part
+ * \param segrep segment representative ...
+ * \param repfnz First nonzero column in each row ...
+ * \param perm_r Row permutation
+ * \param dense Store the full representation of the column
+ * \param glu Global LU data.
+ * \return 0 - successful return
+ * > 0 - number of bytes allocated when run out of space
+ *
+ */
+template <typename Scalar, typename StorageIndex>
+Index SparseLUImpl<Scalar,StorageIndex>::copy_to_ucol(const Index jcol, const Index nseg, IndexVector& segrep,
+ BlockIndexVector repfnz ,IndexVector& perm_r, BlockScalarVector dense, GlobalLU_t& glu)
+{
+ Index ksub, krep, ksupno;
+
+ Index jsupno = glu.supno(jcol);
+
+ // For each nonzero supernode segment of U[*,j] in topological order
+ Index k = nseg - 1, i;
+ StorageIndex nextu = glu.xusub(jcol);
+ Index kfnz, isub, segsize;
+ Index new_next,irow;
+ Index fsupc, mem;
+ for (ksub = 0; ksub < nseg; ksub++)
+ {
+ krep = segrep(k); k--;
+ ksupno = glu.supno(krep);
+ if (jsupno != ksupno ) // should go into ucol();
+ {
+ kfnz = repfnz(krep);
+ if (kfnz != emptyIdxLU)
+ { // Nonzero U-segment
+ fsupc = glu.xsup(ksupno);
+ isub = glu.xlsub(fsupc) + kfnz - fsupc;
+ segsize = krep - kfnz + 1;
+ new_next = nextu + segsize;
+ while (new_next > glu.nzumax)
+ {
+ mem = memXpand<ScalarVector>(glu.ucol, glu.nzumax, nextu, UCOL, glu.num_expansions);
+ if (mem) return mem;
+ mem = memXpand<IndexVector>(glu.usub, glu.nzumax, nextu, USUB, glu.num_expansions);
+ if (mem) return mem;
+
+ }
+
+ for (i = 0; i < segsize; i++)
+ {
+ irow = glu.lsub(isub);
+ glu.usub(nextu) = perm_r(irow); // Unlike the L part, the U part is stored in its final order
+ glu.ucol(nextu) = dense(irow);
+ dense(irow) = Scalar(0.0);
+ nextu++;
+ isub++;
+ }
+
+ } // end nonzero U-segment
+
+ } // end if jsupno
+
+ } // end for each segment
+ glu.xusub(jcol + 1) = nextu; // close U(*,jcol)
+ return 0;
+}
+
+} // namespace internal
+} // end namespace Eigen
+
+#endif // SPARSELU_COPY_TO_UCOL_H
diff --git a/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h b/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h
new file mode 100644
index 000000000..95ba7413f
--- /dev/null
+++ b/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h
@@ -0,0 +1,280 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_SPARSELU_GEMM_KERNEL_H
+#define EIGEN_SPARSELU_GEMM_KERNEL_H
+
+namespace Eigen {
+
+namespace internal {
+
+
+/** \internal
+ * A general matrix-matrix product kernel optimized for the SparseLU factorization.
+ * - A, B, and C must be column major
+ * - lda and ldc must be multiples of the respective packet size
+ * - C must have the same alignment as A
+ */
+template<typename Scalar>
+EIGEN_DONT_INLINE
+void sparselu_gemm(Index m, Index n, Index d, const Scalar* A, Index lda, const Scalar* B, Index ldb, Scalar* C, Index ldc)
+{
+ using namespace Eigen::internal;
+
+ typedef typename packet_traits<Scalar>::type Packet;
+ enum {
+ NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
+ PacketSize = packet_traits<Scalar>::size,
+ PM = 8, // peeling in M
+ RN = 2, // register blocking
+ RK = NumberOfRegisters>=16 ? 4 : 2, // register blocking
+ BM = 4096/sizeof(Scalar), // number of rows of A-C per chunk
+ SM = PM*PacketSize // step along M
+ };
+ Index d_end = (d/RK)*RK; // number of columns of A (rows of B) suitable for full register blocking
+ Index n_end = (n/RN)*RN; // number of columns of B-C suitable for processing RN columns at once
+ Index i0 = internal::first_default_aligned(A,m);
+
+ eigen_internal_assert(((lda%PacketSize)==0) && ((ldc%PacketSize)==0) && (i0==internal::first_default_aligned(C,m)));
+
+ // handle the non aligned rows of A and C without any optimization:
+ for(Index i=0; i<i0; ++i)
+ {
+ for(Index j=0; j<n; ++j)
+ {
+ Scalar c = C[i+j*ldc];
+ for(Index k=0; k<d; ++k)
+ c += B[k+j*ldb] * A[i+k*lda];
+ C[i+j*ldc] = c;
+ }
+ }
+ // process the remaining rows per chunk of BM rows
+ for(Index ib=i0; ib<m; ib+=BM)
+ {
+ Index actual_b = std::min<Index>(BM, m-ib); // actual number of rows
+ Index actual_b_end1 = (actual_b/SM)*SM; // actual number of rows suitable for peeling
+ Index actual_b_end2 = (actual_b/PacketSize)*PacketSize; // actual number of rows suitable for vectorization
+
+ // Let's process two columns of B-C at once
+ for(Index j=0; j<n_end; j+=RN)
+ {
+ const Scalar* Bc0 = B+(j+0)*ldb;
+ const Scalar* Bc1 = B+(j+1)*ldb;
+
+ for(Index k=0; k<d_end; k+=RK)
+ {
+
+ // load and expand a RN x RK block of B
+ Packet b00, b10, b20, b30, b01, b11, b21, b31;
+ { b00 = pset1<Packet>(Bc0[0]); }
+ { b10 = pset1<Packet>(Bc0[1]); }
+ if(RK==4) { b20 = pset1<Packet>(Bc0[2]); }
+ if(RK==4) { b30 = pset1<Packet>(Bc0[3]); }
+ { b01 = pset1<Packet>(Bc1[0]); }
+ { b11 = pset1<Packet>(Bc1[1]); }
+ if(RK==4) { b21 = pset1<Packet>(Bc1[2]); }
+ if(RK==4) { b31 = pset1<Packet>(Bc1[3]); }
+
+ Packet a0, a1, a2, a3, c0, c1, t0, t1;
+
+ const Scalar* A0 = A+ib+(k+0)*lda;
+ const Scalar* A1 = A+ib+(k+1)*lda;
+ const Scalar* A2 = A+ib+(k+2)*lda;
+ const Scalar* A3 = A+ib+(k+3)*lda;
+
+ Scalar* C0 = C+ib+(j+0)*ldc;
+ Scalar* C1 = C+ib+(j+1)*ldc;
+
+ a0 = pload<Packet>(A0);
+ a1 = pload<Packet>(A1);
+ if(RK==4)
+ {
+ a2 = pload<Packet>(A2);
+ a3 = pload<Packet>(A3);
+ }
+ else
+ {
+ // workaround "may be used uninitialized in this function" warning
+ a2 = a3 = a0;
+ }
+
+#define KMADD(c, a, b, tmp) {tmp = b; tmp = pmul(a,tmp); c = padd(c,tmp);}
+#define WORK(I) \
+ c0 = pload<Packet>(C0+i+(I)*PacketSize); \
+ c1 = pload<Packet>(C1+i+(I)*PacketSize); \
+ KMADD(c0, a0, b00, t0) \
+ KMADD(c1, a0, b01, t1) \
+ a0 = pload<Packet>(A0+i+(I+1)*PacketSize); \
+ KMADD(c0, a1, b10, t0) \
+ KMADD(c1, a1, b11, t1) \
+ a1 = pload<Packet>(A1+i+(I+1)*PacketSize); \
+ if(RK==4){ KMADD(c0, a2, b20, t0) }\
+ if(RK==4){ KMADD(c1, a2, b21, t1) }\
+ if(RK==4){ a2 = pload<Packet>(A2+i+(I+1)*PacketSize); }\
+ if(RK==4){ KMADD(c0, a3, b30, t0) }\
+ if(RK==4){ KMADD(c1, a3, b31, t1) }\
+ if(RK==4){ a3 = pload<Packet>(A3+i+(I+1)*PacketSize); }\
+ pstore(C0+i+(I)*PacketSize, c0); \
+ pstore(C1+i+(I)*PacketSize, c1)
+
+ // process rows of A' - C' with aggressive vectorization and peeling
+ for(Index i=0; i<actual_b_end1; i+=PacketSize*8)
+ {
+ EIGEN_ASM_COMMENT("SPARSELU_GEMML_KERNEL1");
+ prefetch((A0+i+(5)*PacketSize));
+ prefetch((A1+i+(5)*PacketSize));
+ if(RK==4) prefetch((A2+i+(5)*PacketSize));
+ if(RK==4) prefetch((A3+i+(5)*PacketSize));
+
+ WORK(0);
+ WORK(1);
+ WORK(2);
+ WORK(3);
+ WORK(4);
+ WORK(5);
+ WORK(6);
+ WORK(7);
+ }
+ // process the remaining rows with vectorization only
+ for(Index i=actual_b_end1; i<actual_b_end2; i+=PacketSize)
+ {
+ WORK(0);
+ }
+#undef WORK
+ // process the remaining rows without vectorization
+ for(Index i=actual_b_end2; i<actual_b; ++i)
+ {
+ if(RK==4)
+ {
+ C0[i] += A0[i]*Bc0[0]+A1[i]*Bc0[1]+A2[i]*Bc0[2]+A3[i]*Bc0[3];
+ C1[i] += A0[i]*Bc1[0]+A1[i]*Bc1[1]+A2[i]*Bc1[2]+A3[i]*Bc1[3];
+ }
+ else
+ {
+ C0[i] += A0[i]*Bc0[0]+A1[i]*Bc0[1];
+ C1[i] += A0[i]*Bc1[0]+A1[i]*Bc1[1];
+ }
+ }
+
+ Bc0 += RK;
+ Bc1 += RK;
+ } // peeled loop on k
+ } // peeled loop on the columns j
+ // process the last column (we now perform a matrix-vector product)
+ if((n-n_end)>0)
+ {
+ const Scalar* Bc0 = B+(n-1)*ldb;
+
+ for(Index k=0; k<d_end; k+=RK)
+ {
+
+ // load and expand a 1 x RK block of B
+ Packet b00, b10, b20, b30;
+ b00 = pset1<Packet>(Bc0[0]);
+ b10 = pset1<Packet>(Bc0[1]);
+ if(RK==4) b20 = pset1<Packet>(Bc0[2]);
+ if(RK==4) b30 = pset1<Packet>(Bc0[3]);
+
+ Packet a0, a1, a2, a3, c0, t0/*, t1*/;
+
+ const Scalar* A0 = A+ib+(k+0)*lda;
+ const Scalar* A1 = A+ib+(k+1)*lda;
+ const Scalar* A2 = A+ib+(k+2)*lda;
+ const Scalar* A3 = A+ib+(k+3)*lda;
+
+ Scalar* C0 = C+ib+(n_end)*ldc;
+
+ a0 = pload<Packet>(A0);
+ a1 = pload<Packet>(A1);
+ if(RK==4)
+ {
+ a2 = pload<Packet>(A2);
+ a3 = pload<Packet>(A3);
+ }
+ else
+ {
+ // workaround "may be used uninitialized in this function" warning
+ a2 = a3 = a0;
+ }
+
+#define WORK(I) \
+ c0 = pload<Packet>(C0+i+(I)*PacketSize); \
+ KMADD(c0, a0, b00, t0) \
+ a0 = pload<Packet>(A0+i+(I+1)*PacketSize); \
+ KMADD(c0, a1, b10, t0) \
+ a1 = pload<Packet>(A1+i+(I+1)*PacketSize); \
+ if(RK==4){ KMADD(c0, a2, b20, t0) }\
+ if(RK==4){ a2 = pload<Packet>(A2+i+(I+1)*PacketSize); }\
+ if(RK==4){ KMADD(c0, a3, b30, t0) }\
+ if(RK==4){ a3 = pload<Packet>(A3+i+(I+1)*PacketSize); }\
+ pstore(C0+i+(I)*PacketSize, c0);
+
+ // agressive vectorization and peeling
+ for(Index i=0; i<actual_b_end1; i+=PacketSize*8)
+ {
+ EIGEN_ASM_COMMENT("SPARSELU_GEMML_KERNEL2");
+ WORK(0);
+ WORK(1);
+ WORK(2);
+ WORK(3);
+ WORK(4);
+ WORK(5);
+ WORK(6);
+ WORK(7);
+ }
+ // vectorization only
+ for(Index i=actual_b_end1; i<actual_b_end2; i+=PacketSize)
+ {
+ WORK(0);
+ }
+ // remaining scalars
+ for(Index i=actual_b_end2; i<actual_b; ++i)
+ {
+ if(RK==4)
+ C0[i] += A0[i]*Bc0[0]+A1[i]*Bc0[1]+A2[i]*Bc0[2]+A3[i]*Bc0[3];
+ else
+ C0[i] += A0[i]*Bc0[0]+A1[i]*Bc0[1];
+ }
+
+ Bc0 += RK;
+#undef WORK
+ }
+ }
+
+ // process the last columns of A, corresponding to the last rows of B
+ Index rd = d-d_end;
+ if(rd>0)
+ {
+ for(Index j=0; j<n; ++j)
+ {
+ enum {
+ Alignment = PacketSize>1 ? Aligned : 0
+ };
+ typedef Map<Matrix<Scalar,Dynamic,1>, Alignment > MapVector;
+ typedef Map<const Matrix<Scalar,Dynamic,1>, Alignment > ConstMapVector;
+ if(rd==1) MapVector(C+j*ldc+ib,actual_b) += B[0+d_end+j*ldb] * ConstMapVector(A+(d_end+0)*lda+ib, actual_b);
+
+ else if(rd==2) MapVector(C+j*ldc+ib,actual_b) += B[0+d_end+j*ldb] * ConstMapVector(A+(d_end+0)*lda+ib, actual_b)
+ + B[1+d_end+j*ldb] * ConstMapVector(A+(d_end+1)*lda+ib, actual_b);
+
+ else MapVector(C+j*ldc+ib,actual_b) += B[0+d_end+j*ldb] * ConstMapVector(A+(d_end+0)*lda+ib, actual_b)
+ + B[1+d_end+j*ldb] * ConstMapVector(A+(d_end+1)*lda+ib, actual_b)
+ + B[2+d_end+j*ldb] * ConstMapVector(A+(d_end+2)*lda+ib, actual_b);
+ }
+ }
+
+ } // blocking on the rows of A and C
+}
+#undef KMADD
+
+} // namespace internal
+
+} // namespace Eigen
+
+#endif // EIGEN_SPARSELU_GEMM_KERNEL_H
diff --git a/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h b/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h
new file mode 100644
index 000000000..6f75d500e
--- /dev/null
+++ b/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h
@@ -0,0 +1,126 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/* This file is a modified version of heap_relax_snode.c file in SuperLU
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ *
+ * Copyright (c) 1994 by Xerox Corporation. All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+ *
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ */
+
+#ifndef SPARSELU_HEAP_RELAX_SNODE_H
+#define SPARSELU_HEAP_RELAX_SNODE_H
+
+namespace Eigen {
+namespace internal {
+
+/**
+ * \brief Identify the initial relaxed supernodes
+ *
+ * This routine applied to a symmetric elimination tree.
+ * It assumes that the matrix has been reordered according to the postorder of the etree
+ * \param n The number of columns
+ * \param et elimination tree
+ * \param relax_columns Maximum number of columns allowed in a relaxed snode
+ * \param descendants Number of descendants of each node in the etree
+ * \param relax_end last column in a supernode
+ */
+template <typename Scalar, typename StorageIndex>
+void SparseLUImpl<Scalar,StorageIndex>::heap_relax_snode (const Index n, IndexVector& et, const Index relax_columns, IndexVector& descendants, IndexVector& relax_end)
+{
+
+ // The etree may not be postordered, but its heap ordered
+ IndexVector post;
+ internal::treePostorder(StorageIndex(n), et, post); // Post order etree
+ IndexVector inv_post(n+1);
+ for (StorageIndex i = 0; i < n+1; ++i) inv_post(post(i)) = i; // inv_post = post.inverse()???
+
+ // Renumber etree in postorder
+ IndexVector iwork(n);
+ IndexVector et_save(n+1);
+ for (Index i = 0; i < n; ++i)
+ {
+ iwork(post(i)) = post(et(i));
+ }
+ et_save = et; // Save the original etree
+ et = iwork;
+
+ // compute the number of descendants of each node in the etree
+ relax_end.setConstant(emptyIdxLU);
+ Index j, parent;
+ descendants.setZero();
+ for (j = 0; j < n; j++)
+ {
+ parent = et(j);
+ if (parent != n) // not the dummy root
+ descendants(parent) += descendants(j) + 1;
+ }
+ // Identify the relaxed supernodes by postorder traversal of the etree
+ Index snode_start; // beginning of a snode
+ StorageIndex k;
+ Index nsuper_et_post = 0; // Number of relaxed snodes in postordered etree
+ Index nsuper_et = 0; // Number of relaxed snodes in the original etree
+ StorageIndex l;
+ for (j = 0; j < n; )
+ {
+ parent = et(j);
+ snode_start = j;
+ while ( parent != n && descendants(parent) < relax_columns )
+ {
+ j = parent;
+ parent = et(j);
+ }
+ // Found a supernode in postordered etree, j is the last column
+ ++nsuper_et_post;
+ k = StorageIndex(n);
+ for (Index i = snode_start; i <= j; ++i)
+ k = (std::min)(k, inv_post(i));
+ l = inv_post(j);
+ if ( (l - k) == (j - snode_start) ) // Same number of columns in the snode
+ {
+ // This is also a supernode in the original etree
+ relax_end(k) = l; // Record last column
+ ++nsuper_et;
+ }
+ else
+ {
+ for (Index i = snode_start; i <= j; ++i)
+ {
+ l = inv_post(i);
+ if (descendants(i) == 0)
+ {
+ relax_end(l) = l;
+ ++nsuper_et;
+ }
+ }
+ }
+ j++;
+ // Search for a new leaf
+ while (descendants(j) != 0 && j < n) j++;
+ } // End postorder traversal of the etree
+
+ // Recover the original etree
+ et = et_save;
+}
+
+} // end namespace internal
+
+} // end namespace Eigen
+#endif // SPARSELU_HEAP_RELAX_SNODE_H
diff --git a/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h b/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h
new file mode 100644
index 000000000..8c1b3e8bc
--- /dev/null
+++ b/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h
@@ -0,0 +1,130 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
+// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef SPARSELU_KERNEL_BMOD_H
+#define SPARSELU_KERNEL_BMOD_H
+
+namespace Eigen {
+namespace internal {
+
+template <int SegSizeAtCompileTime> struct LU_kernel_bmod
+{
+ /** \internal
+ * \brief Performs numeric block updates from a given supernode to a single column
+ *
+ * \param segsize Size of the segment (and blocks ) to use for updates
+ * \param[in,out] dense Packed values of the original matrix
+ * \param tempv temporary vector to use for updates
+ * \param lusup array containing the supernodes
+ * \param lda Leading dimension in the supernode
+ * \param nrow Number of rows in the rectangular part of the supernode
+ * \param lsub compressed row subscripts of supernodes
+ * \param lptr pointer to the first column of the current supernode in lsub
+ * \param no_zeros Number of nonzeros elements before the diagonal part of the supernode
+ */
+ template <typename BlockScalarVector, typename ScalarVector, typename IndexVector>
+ static EIGEN_DONT_INLINE void run(const Index segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, Index& luptr, const Index lda,
+ const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros);
+};
+
+template <int SegSizeAtCompileTime>
+template <typename BlockScalarVector, typename ScalarVector, typename IndexVector>
+EIGEN_DONT_INLINE void LU_kernel_bmod<SegSizeAtCompileTime>::run(const Index segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, Index& luptr, const Index lda,
+ const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros)
+{
+ typedef typename ScalarVector::Scalar Scalar;
+ // First, copy U[*,j] segment from dense(*) to tempv(*)
+ // The result of triangular solve is in tempv[*];
+ // The result of matric-vector update is in dense[*]
+ Index isub = lptr + no_zeros;
+ Index i;
+ Index irow;
+ for (i = 0; i < ((SegSizeAtCompileTime==Dynamic)?segsize:SegSizeAtCompileTime); i++)
+ {
+ irow = lsub(isub);
+ tempv(i) = dense(irow);
+ ++isub;
+ }
+ // Dense triangular solve -- start effective triangle
+ luptr += lda * no_zeros + no_zeros;
+ // Form Eigen matrix and vector
+ Map<Matrix<Scalar,SegSizeAtCompileTime,SegSizeAtCompileTime, ColMajor>, 0, OuterStride<> > A( &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(lda) );
+ Map<Matrix<Scalar,SegSizeAtCompileTime,1> > u(tempv.data(), segsize);
+
+ u = A.template triangularView<UnitLower>().solve(u);
+
+ // Dense matrix-vector product y <-- B*x
+ luptr += segsize;
+ const Index PacketSize = internal::packet_traits<Scalar>::size;
+ Index ldl = internal::first_multiple(nrow, PacketSize);
+ Map<Matrix<Scalar,Dynamic,SegSizeAtCompileTime, ColMajor>, 0, OuterStride<> > B( &(lusup.data()[luptr]), nrow, segsize, OuterStride<>(lda) );
+ Index aligned_offset = internal::first_default_aligned(tempv.data()+segsize, PacketSize);
+ Index aligned_with_B_offset = (PacketSize-internal::first_default_aligned(B.data(), PacketSize))%PacketSize;
+ Map<Matrix<Scalar,Dynamic,1>, 0, OuterStride<> > l(tempv.data()+segsize+aligned_offset+aligned_with_B_offset, nrow, OuterStride<>(ldl) );
+
+ l.setZero();
+ internal::sparselu_gemm<Scalar>(l.rows(), l.cols(), B.cols(), B.data(), B.outerStride(), u.data(), u.outerStride(), l.data(), l.outerStride());
+
+ // Scatter tempv[] into SPA dense[] as a temporary storage
+ isub = lptr + no_zeros;
+ for (i = 0; i < ((SegSizeAtCompileTime==Dynamic)?segsize:SegSizeAtCompileTime); i++)
+ {
+ irow = lsub(isub++);
+ dense(irow) = tempv(i);
+ }
+
+ // Scatter l into SPA dense[]
+ for (i = 0; i < nrow; i++)
+ {
+ irow = lsub(isub++);
+ dense(irow) -= l(i);
+ }
+}
+
+template <> struct LU_kernel_bmod<1>
+{
+ template <typename BlockScalarVector, typename ScalarVector, typename IndexVector>
+ static EIGEN_DONT_INLINE void run(const Index /*segsize*/, BlockScalarVector& dense, ScalarVector& /*tempv*/, ScalarVector& lusup, Index& luptr,
+ const Index lda, const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros);
+};
+
+
+template <typename BlockScalarVector, typename ScalarVector, typename IndexVector>
+EIGEN_DONT_INLINE void LU_kernel_bmod<1>::run(const Index /*segsize*/, BlockScalarVector& dense, ScalarVector& /*tempv*/, ScalarVector& lusup, Index& luptr,
+ const Index lda, const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros)
+{
+ typedef typename ScalarVector::Scalar Scalar;
+ typedef typename IndexVector::Scalar StorageIndex;
+ Scalar f = dense(lsub(lptr + no_zeros));
+ luptr += lda * no_zeros + no_zeros + 1;
+ const Scalar* a(lusup.data() + luptr);
+ const StorageIndex* irow(lsub.data()+lptr + no_zeros + 1);
+ Index i = 0;
+ for (; i+1 < nrow; i+=2)
+ {
+ Index i0 = *(irow++);
+ Index i1 = *(irow++);
+ Scalar a0 = *(a++);
+ Scalar a1 = *(a++);
+ Scalar d0 = dense.coeff(i0);
+ Scalar d1 = dense.coeff(i1);
+ d0 -= f*a0;
+ d1 -= f*a1;
+ dense.coeffRef(i0) = d0;
+ dense.coeffRef(i1) = d1;
+ }
+ if(i<nrow)
+ dense.coeffRef(*(irow++)) -= f * *(a++);
+}
+
+} // end namespace internal
+
+} // end namespace Eigen
+#endif // SPARSELU_KERNEL_BMOD_H
diff --git a/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h b/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h
new file mode 100644
index 000000000..822cf32c3
--- /dev/null
+++ b/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h
@@ -0,0 +1,223 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
+// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/*
+
+ * NOTE: This file is the modified version of [s,d,c,z]panel_bmod.c file in SuperLU
+
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ *
+ * Copyright (c) 1994 by Xerox Corporation. All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+ *
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ */
+#ifndef SPARSELU_PANEL_BMOD_H
+#define SPARSELU_PANEL_BMOD_H
+
+namespace Eigen {
+namespace internal {
+
+/**
+ * \brief Performs numeric block updates (sup-panel) in topological order.
+ *
+ * Before entering this routine, the original nonzeros in the panel
+ * were already copied i nto the spa[m,w]
+ *
+ * \param m number of rows in the matrix
+ * \param w Panel size
+ * \param jcol Starting column of the panel
+ * \param nseg Number of segments in the U part
+ * \param dense Store the full representation of the panel
+ * \param tempv working array
+ * \param segrep segment representative... first row in the segment
+ * \param repfnz First nonzero rows
+ * \param glu Global LU data.
+ *
+ *
+ */
+template <typename Scalar, typename StorageIndex>
+void SparseLUImpl<Scalar,StorageIndex>::panel_bmod(const Index m, const Index w, const Index jcol,
+ const Index nseg, ScalarVector& dense, ScalarVector& tempv,
+ IndexVector& segrep, IndexVector& repfnz, GlobalLU_t& glu)
+{
+
+ Index ksub,jj,nextl_col;
+ Index fsupc, nsupc, nsupr, nrow;
+ Index krep, kfnz;
+ Index lptr; // points to the row subscripts of a supernode
+ Index luptr; // ...
+ Index segsize,no_zeros ;
+ // For each nonz supernode segment of U[*,j] in topological order
+ Index k = nseg - 1;
+ const Index PacketSize = internal::packet_traits<Scalar>::size;
+
+ for (ksub = 0; ksub < nseg; ksub++)
+ { // For each updating supernode
+ /* krep = representative of current k-th supernode
+ * fsupc = first supernodal column
+ * nsupc = number of columns in a supernode
+ * nsupr = number of rows in a supernode
+ */
+ krep = segrep(k); k--;
+ fsupc = glu.xsup(glu.supno(krep));
+ nsupc = krep - fsupc + 1;
+ nsupr = glu.xlsub(fsupc+1) - glu.xlsub(fsupc);
+ nrow = nsupr - nsupc;
+ lptr = glu.xlsub(fsupc);
+
+ // loop over the panel columns to detect the actual number of columns and rows
+ Index u_rows = 0;
+ Index u_cols = 0;
+ for (jj = jcol; jj < jcol + w; jj++)
+ {
+ nextl_col = (jj-jcol) * m;
+ VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row
+
+ kfnz = repfnz_col(krep);
+ if ( kfnz == emptyIdxLU )
+ continue; // skip any zero segment
+
+ segsize = krep - kfnz + 1;
+ u_cols++;
+ u_rows = (std::max)(segsize,u_rows);
+ }
+
+ if(nsupc >= 2)
+ {
+ Index ldu = internal::first_multiple<Index>(u_rows, PacketSize);
+ Map<ScalarMatrix, Aligned, OuterStride<> > U(tempv.data(), u_rows, u_cols, OuterStride<>(ldu));
+
+ // gather U
+ Index u_col = 0;
+ for (jj = jcol; jj < jcol + w; jj++)
+ {
+ nextl_col = (jj-jcol) * m;
+ VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row
+ VectorBlock<ScalarVector> dense_col(dense, nextl_col, m); // Scatter/gather entire matrix column from/to here
+
+ kfnz = repfnz_col(krep);
+ if ( kfnz == emptyIdxLU )
+ continue; // skip any zero segment
+
+ segsize = krep - kfnz + 1;
+ luptr = glu.xlusup(fsupc);
+ no_zeros = kfnz - fsupc;
+
+ Index isub = lptr + no_zeros;
+ Index off = u_rows-segsize;
+ for (Index i = 0; i < off; i++) U(i,u_col) = 0;
+ for (Index i = 0; i < segsize; i++)
+ {
+ Index irow = glu.lsub(isub);
+ U(i+off,u_col) = dense_col(irow);
+ ++isub;
+ }
+ u_col++;
+ }
+ // solve U = A^-1 U
+ luptr = glu.xlusup(fsupc);
+ Index lda = glu.xlusup(fsupc+1) - glu.xlusup(fsupc);
+ no_zeros = (krep - u_rows + 1) - fsupc;
+ luptr += lda * no_zeros + no_zeros;
+ MappedMatrixBlock A(glu.lusup.data()+luptr, u_rows, u_rows, OuterStride<>(lda) );
+ U = A.template triangularView<UnitLower>().solve(U);
+
+ // update
+ luptr += u_rows;
+ MappedMatrixBlock B(glu.lusup.data()+luptr, nrow, u_rows, OuterStride<>(lda) );
+ eigen_assert(tempv.size()>w*ldu + nrow*w + 1);
+
+ Index ldl = internal::first_multiple<Index>(nrow, PacketSize);
+ Index offset = (PacketSize-internal::first_default_aligned(B.data(), PacketSize)) % PacketSize;
+ MappedMatrixBlock L(tempv.data()+w*ldu+offset, nrow, u_cols, OuterStride<>(ldl));
+
+ L.setZero();
+ internal::sparselu_gemm<Scalar>(L.rows(), L.cols(), B.cols(), B.data(), B.outerStride(), U.data(), U.outerStride(), L.data(), L.outerStride());
+
+ // scatter U and L
+ u_col = 0;
+ for (jj = jcol; jj < jcol + w; jj++)
+ {
+ nextl_col = (jj-jcol) * m;
+ VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row
+ VectorBlock<ScalarVector> dense_col(dense, nextl_col, m); // Scatter/gather entire matrix column from/to here
+
+ kfnz = repfnz_col(krep);
+ if ( kfnz == emptyIdxLU )
+ continue; // skip any zero segment
+
+ segsize = krep - kfnz + 1;
+ no_zeros = kfnz - fsupc;
+ Index isub = lptr + no_zeros;
+
+ Index off = u_rows-segsize;
+ for (Index i = 0; i < segsize; i++)
+ {
+ Index irow = glu.lsub(isub++);
+ dense_col(irow) = U.coeff(i+off,u_col);
+ U.coeffRef(i+off,u_col) = 0;
+ }
+
+ // Scatter l into SPA dense[]
+ for (Index i = 0; i < nrow; i++)
+ {
+ Index irow = glu.lsub(isub++);
+ dense_col(irow) -= L.coeff(i,u_col);
+ L.coeffRef(i,u_col) = 0;
+ }
+ u_col++;
+ }
+ }
+ else // level 2 only
+ {
+ // Sequence through each column in the panel
+ for (jj = jcol; jj < jcol + w; jj++)
+ {
+ nextl_col = (jj-jcol) * m;
+ VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row
+ VectorBlock<ScalarVector> dense_col(dense, nextl_col, m); // Scatter/gather entire matrix column from/to here
+
+ kfnz = repfnz_col(krep);
+ if ( kfnz == emptyIdxLU )
+ continue; // skip any zero segment
+
+ segsize = krep - kfnz + 1;
+ luptr = glu.xlusup(fsupc);
+
+ Index lda = glu.xlusup(fsupc+1)-glu.xlusup(fsupc);// nsupr
+
+ // Perform a trianglar solve and block update,
+ // then scatter the result of sup-col update to dense[]
+ no_zeros = kfnz - fsupc;
+ if(segsize==1) LU_kernel_bmod<1>::run(segsize, dense_col, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros);
+ else if(segsize==2) LU_kernel_bmod<2>::run(segsize, dense_col, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros);
+ else if(segsize==3) LU_kernel_bmod<3>::run(segsize, dense_col, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros);
+ else LU_kernel_bmod<Dynamic>::run(segsize, dense_col, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros);
+ } // End for each column in the panel
+ }
+
+ } // End for each updating supernode
+} // end panel bmod
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // SPARSELU_PANEL_BMOD_H
diff --git a/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h b/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h
new file mode 100644
index 000000000..155df7336
--- /dev/null
+++ b/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h
@@ -0,0 +1,258 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/*
+
+ * NOTE: This file is the modified version of [s,d,c,z]panel_dfs.c file in SuperLU
+
+ * -- SuperLU routine (version 2.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * November 15, 1997
+ *
+ * Copyright (c) 1994 by Xerox Corporation. All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+ *
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ */
+#ifndef SPARSELU_PANEL_DFS_H
+#define SPARSELU_PANEL_DFS_H
+
+namespace Eigen {
+
+namespace internal {
+
+template<typename IndexVector>
+struct panel_dfs_traits
+{
+ typedef typename IndexVector::Scalar StorageIndex;
+ panel_dfs_traits(Index jcol, StorageIndex* marker)
+ : m_jcol(jcol), m_marker(marker)
+ {}
+ bool update_segrep(Index krep, StorageIndex jj)
+ {
+ if(m_marker[krep]<m_jcol)
+ {
+ m_marker[krep] = jj;
+ return true;
+ }
+ return false;
+ }
+ void mem_expand(IndexVector& /*glu.lsub*/, Index /*nextl*/, Index /*chmark*/) {}
+ enum { ExpandMem = false };
+ Index m_jcol;
+ StorageIndex* m_marker;
+};
+
+
+template <typename Scalar, typename StorageIndex>
+template <typename Traits>
+void SparseLUImpl<Scalar,StorageIndex>::dfs_kernel(const StorageIndex jj, IndexVector& perm_r,
+ Index& nseg, IndexVector& panel_lsub, IndexVector& segrep,
+ Ref<IndexVector> repfnz_col, IndexVector& xprune, Ref<IndexVector> marker, IndexVector& parent,
+ IndexVector& xplore, GlobalLU_t& glu,
+ Index& nextl_col, Index krow, Traits& traits
+ )
+{
+
+ StorageIndex kmark = marker(krow);
+
+ // For each unmarked krow of jj
+ marker(krow) = jj;
+ StorageIndex kperm = perm_r(krow);
+ if (kperm == emptyIdxLU ) {
+ // krow is in L : place it in structure of L(*, jj)
+ panel_lsub(nextl_col++) = StorageIndex(krow); // krow is indexed into A
+
+ traits.mem_expand(panel_lsub, nextl_col, kmark);
+ }
+ else
+ {
+ // krow is in U : if its supernode-representative krep
+ // has been explored, update repfnz(*)
+ // krep = supernode representative of the current row
+ StorageIndex krep = glu.xsup(glu.supno(kperm)+1) - 1;
+ // First nonzero element in the current column:
+ StorageIndex myfnz = repfnz_col(krep);
+
+ if (myfnz != emptyIdxLU )
+ {
+ // Representative visited before
+ if (myfnz > kperm ) repfnz_col(krep) = kperm;
+
+ }
+ else
+ {
+ // Otherwise, perform dfs starting at krep
+ StorageIndex oldrep = emptyIdxLU;
+ parent(krep) = oldrep;
+ repfnz_col(krep) = kperm;
+ StorageIndex xdfs = glu.xlsub(krep);
+ Index maxdfs = xprune(krep);
+
+ StorageIndex kpar;
+ do
+ {
+ // For each unmarked kchild of krep
+ while (xdfs < maxdfs)
+ {
+ StorageIndex kchild = glu.lsub(xdfs);
+ xdfs++;
+ StorageIndex chmark = marker(kchild);
+
+ if (chmark != jj )
+ {
+ marker(kchild) = jj;
+ StorageIndex chperm = perm_r(kchild);
+
+ if (chperm == emptyIdxLU)
+ {
+ // case kchild is in L: place it in L(*, j)
+ panel_lsub(nextl_col++) = kchild;
+ traits.mem_expand(panel_lsub, nextl_col, chmark);
+ }
+ else
+ {
+ // case kchild is in U :
+ // chrep = its supernode-rep. If its rep has been explored,
+ // update its repfnz(*)
+ StorageIndex chrep = glu.xsup(glu.supno(chperm)+1) - 1;
+ myfnz = repfnz_col(chrep);
+
+ if (myfnz != emptyIdxLU)
+ { // Visited before
+ if (myfnz > chperm)
+ repfnz_col(chrep) = chperm;
+ }
+ else
+ { // Cont. dfs at snode-rep of kchild
+ xplore(krep) = xdfs;
+ oldrep = krep;
+ krep = chrep; // Go deeper down G(L)
+ parent(krep) = oldrep;
+ repfnz_col(krep) = chperm;
+ xdfs = glu.xlsub(krep);
+ maxdfs = xprune(krep);
+
+ } // end if myfnz != -1
+ } // end if chperm == -1
+
+ } // end if chmark !=jj
+ } // end while xdfs < maxdfs
+
+ // krow has no more unexplored nbrs :
+ // Place snode-rep krep in postorder DFS, if this
+ // segment is seen for the first time. (Note that
+ // "repfnz(krep)" may change later.)
+ // Baktrack dfs to its parent
+ if(traits.update_segrep(krep,jj))
+ //if (marker1(krep) < jcol )
+ {
+ segrep(nseg) = krep;
+ ++nseg;
+ //marker1(krep) = jj;
+ }
+
+ kpar = parent(krep); // Pop recursion, mimic recursion
+ if (kpar == emptyIdxLU)
+ break; // dfs done
+ krep = kpar;
+ xdfs = xplore(krep);
+ maxdfs = xprune(krep);
+
+ } while (kpar != emptyIdxLU); // Do until empty stack
+
+ } // end if (myfnz = -1)
+
+ } // end if (kperm == -1)
+}
+
+/**
+ * \brief Performs a symbolic factorization on a panel of columns [jcol, jcol+w)
+ *
+ * A supernode representative is the last column of a supernode.
+ * The nonzeros in U[*,j] are segments that end at supernodes representatives
+ *
+ * The routine returns a list of the supernodal representatives
+ * in topological order of the dfs that generates them. This list is
+ * a superset of the topological order of each individual column within
+ * the panel.
+ * The location of the first nonzero in each supernodal segment
+ * (supernodal entry location) is also returned. Each column has
+ * a separate list for this purpose.
+ *
+ * Two markers arrays are used for dfs :
+ * marker[i] == jj, if i was visited during dfs of current column jj;
+ * marker1[i] >= jcol, if i was visited by earlier columns in this panel;
+ *
+ * \param[in] m number of rows in the matrix
+ * \param[in] w Panel size
+ * \param[in] jcol Starting column of the panel
+ * \param[in] A Input matrix in column-major storage
+ * \param[in] perm_r Row permutation
+ * \param[out] nseg Number of U segments
+ * \param[out] dense Accumulate the column vectors of the panel
+ * \param[out] panel_lsub Subscripts of the row in the panel
+ * \param[out] segrep Segment representative i.e first nonzero row of each segment
+ * \param[out] repfnz First nonzero location in each row
+ * \param[out] xprune The pruned elimination tree
+ * \param[out] marker work vector
+ * \param parent The elimination tree
+ * \param xplore work vector
+ * \param glu The global data structure
+ *
+ */
+
+template <typename Scalar, typename StorageIndex>
+void SparseLUImpl<Scalar,StorageIndex>::panel_dfs(const Index m, const Index w, const Index jcol, MatrixType& A, IndexVector& perm_r, Index& nseg, ScalarVector& dense, IndexVector& panel_lsub, IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu)
+{
+ Index nextl_col; // Next available position in panel_lsub[*,jj]
+
+ // Initialize pointers
+ VectorBlock<IndexVector> marker1(marker, m, m);
+ nseg = 0;
+
+ panel_dfs_traits<IndexVector> traits(jcol, marker1.data());
+
+ // For each column in the panel
+ for (StorageIndex jj = StorageIndex(jcol); jj < jcol + w; jj++)
+ {
+ nextl_col = (jj - jcol) * m;
+
+ VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m); // First nonzero location in each row
+ VectorBlock<ScalarVector> dense_col(dense,nextl_col, m); // Accumulate a column vector here
+
+
+ // For each nnz in A[*, jj] do depth first search
+ for (typename MatrixType::InnerIterator it(A, jj); it; ++it)
+ {
+ Index krow = it.row();
+ dense_col(krow) = it.value();
+
+ StorageIndex kmark = marker(krow);
+ if (kmark == jj)
+ continue; // krow visited before, go to the next nonzero
+
+ dfs_kernel(jj, perm_r, nseg, panel_lsub, segrep, repfnz_col, xprune, marker, parent,
+ xplore, glu, nextl_col, krow, traits);
+ }// end for nonzeros in column jj
+
+ } // end for column jj
+}
+
+} // end namespace internal
+} // end namespace Eigen
+
+#endif // SPARSELU_PANEL_DFS_H
diff --git a/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h b/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h
new file mode 100644
index 000000000..a86dac93f
--- /dev/null
+++ b/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h
@@ -0,0 +1,137 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/*
+
+ * NOTE: This file is the modified version of xpivotL.c file in SuperLU
+
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ *
+ * Copyright (c) 1994 by Xerox Corporation. All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+ *
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ */
+#ifndef SPARSELU_PIVOTL_H
+#define SPARSELU_PIVOTL_H
+
+namespace Eigen {
+namespace internal {
+
+/**
+ * \brief Performs the numerical pivotin on the current column of L, and the CDIV operation.
+ *
+ * Pivot policy :
+ * (1) Compute thresh = u * max_(i>=j) abs(A_ij);
+ * (2) IF user specifies pivot row k and abs(A_kj) >= thresh THEN
+ * pivot row = k;
+ * ELSE IF abs(A_jj) >= thresh THEN
+ * pivot row = j;
+ * ELSE
+ * pivot row = m;
+ *
+ * Note: If you absolutely want to use a given pivot order, then set u=0.0.
+ *
+ * \param jcol The current column of L
+ * \param diagpivotthresh diagonal pivoting threshold
+ * \param[in,out] perm_r Row permutation (threshold pivoting)
+ * \param[in] iperm_c column permutation - used to finf diagonal of Pc*A*Pc'
+ * \param[out] pivrow The pivot row
+ * \param glu Global LU data
+ * \return 0 if success, i > 0 if U(i,i) is exactly zero
+ *
+ */
+template <typename Scalar, typename StorageIndex>
+Index SparseLUImpl<Scalar,StorageIndex>::pivotL(const Index jcol, const RealScalar& diagpivotthresh, IndexVector& perm_r, IndexVector& iperm_c, Index& pivrow, GlobalLU_t& glu)
+{
+
+ Index fsupc = (glu.xsup)((glu.supno)(jcol)); // First column in the supernode containing the column jcol
+ Index nsupc = jcol - fsupc; // Number of columns in the supernode portion, excluding jcol; nsupc >=0
+ Index lptr = glu.xlsub(fsupc); // pointer to the starting location of the row subscripts for this supernode portion
+ Index nsupr = glu.xlsub(fsupc+1) - lptr; // Number of rows in the supernode
+ Index lda = glu.xlusup(fsupc+1) - glu.xlusup(fsupc); // leading dimension
+ Scalar* lu_sup_ptr = &(glu.lusup.data()[glu.xlusup(fsupc)]); // Start of the current supernode
+ Scalar* lu_col_ptr = &(glu.lusup.data()[glu.xlusup(jcol)]); // Start of jcol in the supernode
+ StorageIndex* lsub_ptr = &(glu.lsub.data()[lptr]); // Start of row indices of the supernode
+
+ // Determine the largest abs numerical value for partial pivoting
+ Index diagind = iperm_c(jcol); // diagonal index
+ RealScalar pivmax(-1.0);
+ Index pivptr = nsupc;
+ Index diag = emptyIdxLU;
+ RealScalar rtemp;
+ Index isub, icol, itemp, k;
+ for (isub = nsupc; isub < nsupr; ++isub) {
+ using std::abs;
+ rtemp = abs(lu_col_ptr[isub]);
+ if (rtemp > pivmax) {
+ pivmax = rtemp;
+ pivptr = isub;
+ }
+ if (lsub_ptr[isub] == diagind) diag = isub;
+ }
+
+ // Test for singularity
+ if ( pivmax <= RealScalar(0.0) ) {
+ // if pivmax == -1, the column is structurally empty, otherwise it is only numerically zero
+ pivrow = pivmax < RealScalar(0.0) ? diagind : lsub_ptr[pivptr];
+ perm_r(pivrow) = StorageIndex(jcol);
+ return (jcol+1);
+ }
+
+ RealScalar thresh = diagpivotthresh * pivmax;
+
+ // Choose appropriate pivotal element
+
+ {
+ // Test if the diagonal element can be used as a pivot (given the threshold value)
+ if (diag >= 0 )
+ {
+ // Diagonal element exists
+ using std::abs;
+ rtemp = abs(lu_col_ptr[diag]);
+ if (rtemp != RealScalar(0.0) && rtemp >= thresh) pivptr = diag;
+ }
+ pivrow = lsub_ptr[pivptr];
+ }
+
+ // Record pivot row
+ perm_r(pivrow) = StorageIndex(jcol);
+ // Interchange row subscripts
+ if (pivptr != nsupc )
+ {
+ std::swap( lsub_ptr[pivptr], lsub_ptr[nsupc] );
+ // Interchange numerical values as well, for the two rows in the whole snode
+ // such that L is indexed the same way as A
+ for (icol = 0; icol <= nsupc; icol++)
+ {
+ itemp = pivptr + icol * lda;
+ std::swap(lu_sup_ptr[itemp], lu_sup_ptr[nsupc + icol * lda]);
+ }
+ }
+ // cdiv operations
+ Scalar temp = Scalar(1.0) / lu_col_ptr[nsupc];
+ for (k = nsupc+1; k < nsupr; k++)
+ lu_col_ptr[k] *= temp;
+ return 0;
+}
+
+} // end namespace internal
+} // end namespace Eigen
+
+#endif // SPARSELU_PIVOTL_H
diff --git a/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h b/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h
new file mode 100644
index 000000000..ad32fed5e
--- /dev/null
+++ b/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h
@@ -0,0 +1,136 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/*
+
+ * NOTE: This file is the modified version of [s,d,c,z]pruneL.c file in SuperLU
+
+ * -- SuperLU routine (version 2.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * November 15, 1997
+ *
+ * Copyright (c) 1994 by Xerox Corporation. All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+ *
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ */
+#ifndef SPARSELU_PRUNEL_H
+#define SPARSELU_PRUNEL_H
+
+namespace Eigen {
+namespace internal {
+
+/**
+ * \brief Prunes the L-structure.
+ *
+ * It prunes the L-structure of supernodes whose L-structure contains the current pivot row "pivrow"
+ *
+ *
+ * \param jcol The current column of L
+ * \param[in] perm_r Row permutation
+ * \param[out] pivrow The pivot row
+ * \param nseg Number of segments
+ * \param segrep
+ * \param repfnz
+ * \param[out] xprune
+ * \param glu Global LU data
+ *
+ */
+template <typename Scalar, typename StorageIndex>
+void SparseLUImpl<Scalar,StorageIndex>::pruneL(const Index jcol, const IndexVector& perm_r, const Index pivrow, const Index nseg,
+ const IndexVector& segrep, BlockIndexVector repfnz, IndexVector& xprune, GlobalLU_t& glu)
+{
+ // For each supernode-rep irep in U(*,j]
+ Index jsupno = glu.supno(jcol);
+ Index i,irep,irep1;
+ bool movnum, do_prune = false;
+ Index kmin = 0, kmax = 0, minloc, maxloc,krow;
+ for (i = 0; i < nseg; i++)
+ {
+ irep = segrep(i);
+ irep1 = irep + 1;
+ do_prune = false;
+
+ // Don't prune with a zero U-segment
+ if (repfnz(irep) == emptyIdxLU) continue;
+
+ // If a snode overlaps with the next panel, then the U-segment
+ // is fragmented into two parts -- irep and irep1. We should let
+ // pruning occur at the rep-column in irep1s snode.
+ if (glu.supno(irep) == glu.supno(irep1) ) continue; // don't prune
+
+ // If it has not been pruned & it has a nonz in row L(pivrow,i)
+ if (glu.supno(irep) != jsupno )
+ {
+ if ( xprune (irep) >= glu.xlsub(irep1) )
+ {
+ kmin = glu.xlsub(irep);
+ kmax = glu.xlsub(irep1) - 1;
+ for (krow = kmin; krow <= kmax; krow++)
+ {
+ if (glu.lsub(krow) == pivrow)
+ {
+ do_prune = true;
+ break;
+ }
+ }
+ }
+
+ if (do_prune)
+ {
+ // do a quicksort-type partition
+ // movnum=true means that the num values have to be exchanged
+ movnum = false;
+ if (irep == glu.xsup(glu.supno(irep)) ) // Snode of size 1
+ movnum = true;
+
+ while (kmin <= kmax)
+ {
+ if (perm_r(glu.lsub(kmax)) == emptyIdxLU)
+ kmax--;
+ else if ( perm_r(glu.lsub(kmin)) != emptyIdxLU)
+ kmin++;
+ else
+ {
+ // kmin below pivrow (not yet pivoted), and kmax
+ // above pivrow: interchange the two suscripts
+ std::swap(glu.lsub(kmin), glu.lsub(kmax));
+
+ // If the supernode has only one column, then we
+ // only keep one set of subscripts. For any subscript
+ // intercnahge performed, similar interchange must be
+ // done on the numerical values.
+ if (movnum)
+ {
+ minloc = glu.xlusup(irep) + ( kmin - glu.xlsub(irep) );
+ maxloc = glu.xlusup(irep) + ( kmax - glu.xlsub(irep) );
+ std::swap(glu.lusup(minloc), glu.lusup(maxloc));
+ }
+ kmin++;
+ kmax--;
+ }
+ } // end while
+
+ xprune(irep) = StorageIndex(kmin); //Pruning
+ } // end if do_prune
+ } // end pruning
+ } // End for each U-segment
+}
+
+} // end namespace internal
+} // end namespace Eigen
+
+#endif // SPARSELU_PRUNEL_H
diff --git a/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h b/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h
new file mode 100644
index 000000000..c408d01b4
--- /dev/null
+++ b/runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h
@@ -0,0 +1,83 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/* This file is a modified version of heap_relax_snode.c file in SuperLU
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ *
+ * Copyright (c) 1994 by Xerox Corporation. All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+ *
+ * Permission is hereby granted to use or copy this program for any
+ * purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is
+ * granted, provided the above notices are retained, and a notice that
+ * the code was modified is included with the above copyright notice.
+ */
+
+#ifndef SPARSELU_RELAX_SNODE_H
+#define SPARSELU_RELAX_SNODE_H
+
+namespace Eigen {
+
+namespace internal {
+
+/**
+ * \brief Identify the initial relaxed supernodes
+ *
+ * This routine is applied to a column elimination tree.
+ * It assumes that the matrix has been reordered according to the postorder of the etree
+ * \param n the number of columns
+ * \param et elimination tree
+ * \param relax_columns Maximum number of columns allowed in a relaxed snode
+ * \param descendants Number of descendants of each node in the etree
+ * \param relax_end last column in a supernode
+ */
+template <typename Scalar, typename StorageIndex>
+void SparseLUImpl<Scalar,StorageIndex>::relax_snode (const Index n, IndexVector& et, const Index relax_columns, IndexVector& descendants, IndexVector& relax_end)
+{
+
+ // compute the number of descendants of each node in the etree
+ Index parent;
+ relax_end.setConstant(emptyIdxLU);
+ descendants.setZero();
+ for (Index j = 0; j < n; j++)
+ {
+ parent = et(j);
+ if (parent != n) // not the dummy root
+ descendants(parent) += descendants(j) + 1;
+ }
+ // Identify the relaxed supernodes by postorder traversal of the etree
+ Index snode_start; // beginning of a snode
+ for (Index j = 0; j < n; )
+ {
+ parent = et(j);
+ snode_start = j;
+ while ( parent != n && descendants(parent) < relax_columns )
+ {
+ j = parent;
+ parent = et(j);
+ }
+ // Found a supernode in postordered etree, j is the last column
+ relax_end(snode_start) = StorageIndex(j); // Record last column
+ j++;
+ // Search for a new leaf
+ while (descendants(j) != 0 && j < n) j++;
+ } // End postorder traversal of the etree
+
+}
+
+} // end namespace internal
+
+} // end namespace Eigen
+#endif