summaryrefslogtreecommitdiff
path: root/SRC/chetrf_aa.f
diff options
context:
space:
mode:
authorJulie <julie@cs.utk.edu>2016-11-10 23:17:11 -0600
committerJulie <julie@cs.utk.edu>2016-11-10 23:17:11 -0600
commit5bb749aeff27bb0c8742ecc0aff4cae43c53298e (patch)
tree6e908da76a4c8e251ec4f5abb2e684b774e4d81f /SRC/chetrf_aa.f
parent1264ec40a7aa8634d192006a104c1e34f3f70ed9 (diff)
downloadlapack-5bb749aeff27bb0c8742ecc0aff4cae43c53298e.tar.gz
lapack-5bb749aeff27bb0c8742ecc0aff4cae43c53298e.tar.bz2
lapack-5bb749aeff27bb0c8742ecc0aff4cae43c53298e.zip
Changing Assen routines suffix from _Aassen to _aa
Diffstat (limited to 'SRC/chetrf_aa.f')
-rw-r--r--SRC/chetrf_aa.f483
1 files changed, 483 insertions, 0 deletions
diff --git a/SRC/chetrf_aa.f b/SRC/chetrf_aa.f
new file mode 100644
index 00000000..883e9f00
--- /dev/null
+++ b/SRC/chetrf_aa.f
@@ -0,0 +1,483 @@
+*> \brief \b CHETRF_AA
+*
+* =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+* http://www.netlib.org/lapack/explore-html/
+*
+*> \htmlonly
+*> Download CHETRF_AA + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/chetrf_aa.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/chetrf_aa.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/chetrf_aa.f">
+*> [TXT]</a>
+*> \endhtmlonly
+*
+* Definition:
+* ===========
+*
+* SUBROUTINE CHETRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO )
+*
+* .. Scalar Arguments ..
+* CHARACTER UPLO
+* INTEGER N, LDA, LWORK, INFO
+* ..
+* .. Array Arguments ..
+* INTEGER IPIV( * )
+* COMPLEX A( LDA, * ), WORK( * )
+* ..
+*
+*> \par Purpose:
+* =============
+*>
+*> \verbatim
+*>
+*> CHETRF_AA computes the factorization of a complex hermitian matrix A
+*> using the Aasen's algorithm. The form of the factorization is
+*>
+*> A = U*T*U**T or A = L*T*L**T
+*>
+*> where U (or L) is a product of permutation and unit upper (lower)
+*> triangular matrices, and T is a hermitian tridiagonal matrix.
+*>
+*> This is the blocked version of the algorithm, calling Level 3 BLAS.
+*> \endverbatim
+*
+* Arguments:
+* ==========
+*
+*> \param[in] UPLO
+*> \verbatim
+*> UPLO is CHARACTER*1
+*> = 'U': Upper triangle of A is stored;
+*> = 'L': Lower triangle of A is stored.
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*> N is INTEGER
+*> The order of the matrix A. N >= 0.
+*> \endverbatim
+*>
+*> \param[in,out] A
+*> \verbatim
+*> A is COMPLEX array, dimension (LDA,N)
+*> On entry, the hermitian matrix A. If UPLO = 'U', the leading
+*> N-by-N upper triangular part of A contains the upper
+*> triangular part of the matrix A, and the strictly lower
+*> triangular part of A is not referenced. If UPLO = 'L', the
+*> leading N-by-N lower triangular part of A contains the lower
+*> triangular part of the matrix A, and the strictly upper
+*> triangular part of A is not referenced.
+*>
+*> On exit, the tridiagonal matrix is stored in the diagonals
+*> and the subdiagonals of A just below (or above) the diagonals,
+*> and L is stored below (or above) the subdiaonals, when UPLO
+*> is 'L' (or 'U').
+*> \endverbatim
+*>
+*> \param[in] LDA
+*> \verbatim
+*> LDA is INTEGER
+*> The leading dimension of the array A. LDA >= max(1,N).
+*> \endverbatim
+*>
+*> \param[out] IPIV
+*> \verbatim
+*> IPIV is INTEGER array, dimension (N)
+*> On exit, it contains the details of the interchanges, i.e.,
+*> the row and column k of A were interchanged with the
+*> row and column IPIV(k).
+*> \endverbatim
+*>
+*> \param[out] WORK
+*> \verbatim
+*> WORK is COMPLEX array, dimension (MAX(1,LWORK))
+*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*> \endverbatim
+*>
+*> \param[in] LWORK
+*> \verbatim
+*> LWORK is INTEGER
+*> The length of WORK. LWORK >= 2*N. For optimum performance
+*> LWORK >= N*(1+NB), where NB is the optimal blocksize.
+*>
+*> If LWORK = -1, then a workspace query is assumed; the routine
+*> only calculates the optimal size of the WORK array, returns
+*> this value as the first entry of the WORK array, and no error
+*> message related to LWORK is issued by XERBLA.
+*> \endverbatim
+*>
+*> \param[out] INFO
+*> \verbatim
+*> INFO is INTEGER
+*> = 0: successful exit
+*> < 0: if INFO = -i, the i-th argument had an illegal value
+*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization
+*> has been completed, but the block diagonal matrix D is
+*> exactly singular, and division by zero will occur if it
+*> is used to solve a system of equations.
+*> \endverbatim
+*
+* Authors:
+* ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \date November 2016
+*
+*> \ingroup complexHEcomputational
+*
+* =====================================================================
+ SUBROUTINE CHETRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO)
+*
+* -- LAPACK computational routine (version 3.7.0) --
+* -- LAPACK is a software package provided by Univ. of Tennessee, --
+* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+* November 2016
+*
+ IMPLICIT NONE
+*
+* .. Scalar Arguments ..
+ CHARACTER UPLO
+ INTEGER N, LDA, LWORK, INFO
+* ..
+* .. Array Arguments ..
+ INTEGER IPIV( * )
+ COMPLEX A( LDA, * ), WORK( * )
+* ..
+*
+* =====================================================================
+* .. Parameters ..
+ COMPLEX ZERO, ONE
+ PARAMETER ( ZERO = (0.0E+0, 0.0E+0), ONE = (1.0E+0, 0.0E+0) )
+*
+* .. Local Scalars ..
+ LOGICAL LQUERY, UPPER
+ INTEGER J, LWKOPT, IINFO
+ INTEGER NB, MJ, NJ, K1, K2, J1, J2, J3, JB
+ COMPLEX ALPHA
+* ..
+* .. External Functions ..
+ LOGICAL LSAME
+ INTEGER ILAENV
+ EXTERNAL LSAME, ILAENV
+* ..
+* .. External Subroutines ..
+ EXTERNAL XERBLA
+* ..
+* .. Intrinsic Functions ..
+ INTRINSIC REAL, CONJG, MAX
+* ..
+* .. Executable Statements ..
+*
+* Determine the block size
+*
+ NB = ILAENV( 1, 'CHETRF', UPLO, N, -1, -1, -1 )
+*
+* Test the input parameters.
+*
+ INFO = 0
+ UPPER = LSAME( UPLO, 'U' )
+ LQUERY = ( LWORK.EQ.-1 )
+ IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+ INFO = -1
+ ELSE IF( N.LT.0 ) THEN
+ INFO = -2
+ ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+ INFO = -4
+ ELSE IF( LWORK.LT.( 2*N ) .AND. .NOT.LQUERY ) THEN
+ INFO = -7
+ END IF
+*
+ IF( INFO.EQ.0 ) THEN
+ LWKOPT = (NB+1)*N
+ WORK( 1 ) = LWKOPT
+ END IF
+*
+ IF( INFO.NE.0 ) THEN
+ CALL XERBLA( 'CHETRF_AA', -INFO )
+ RETURN
+ ELSE IF( LQUERY ) THEN
+ RETURN
+ END IF
+*
+* Quick return
+*
+ IF ( N.EQ.0 ) THEN
+ RETURN
+ ENDIF
+ IPIV( 1 ) = 1
+ IF ( N.EQ.1 ) THEN
+ A( 1, 1 ) = REAL( A( 1, 1 ) )
+ IF ( A( 1, 1 ).EQ.ZERO ) THEN
+ INFO = 1
+ END IF
+ RETURN
+ END IF
+*
+* Adjubst block size based on the workspace size
+*
+ IF( LWORK.LT.((1+NB)*N) ) THEN
+ NB = ( LWORK-N ) / N
+ END IF
+*
+ IF( UPPER ) THEN
+*
+* .....................................................
+* Factorize A as L*D*L**T using the upper triangle of A
+* .....................................................
+*
+* copy first row A(1, 1:N) into H(1:n) (stored in WORK(1:N))
+*
+ CALL CCOPY( N, A( 1, 1 ), LDA, WORK( 1 ), 1 )
+*
+* J is the main loop index, increasing from 1 to N in steps of
+* JB, where JB is the number of columns factorized by CLAHEF;
+* JB is either NB, or N-J+1 for the last block
+*
+ J = 0
+ 10 CONTINUE
+ IF( J.GE.N )
+ $ GO TO 20
+*
+* each step of the main loop
+* J is the last column of the previous panel
+* J1 is the first column of the current panel
+* K1 identifies if the previous column of the panel has been
+* explicitly stored, e.g., K1=1 for the first panel, and
+* K1=0 for the rest
+*
+ J1 = J + 1
+ JB = MIN( N-J1+1, NB )
+ K1 = MAX(1, J)-J
+*
+* Panel factorization
+*
+ CALL CLAHEF_AA( UPLO, 2-K1, N-J, JB,
+ $ A( MAX(1, J), J+1 ), LDA,
+ $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ),
+ $ IINFO )
+ IF( (IINFO.GT.0) .AND. (INFO.EQ.0) ) THEN
+ INFO = IINFO+J
+ ENDIF
+*
+* Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot)
+*
+ DO J2 = J+2, MIN(N, J+JB+1)
+ IPIV( J2 ) = IPIV( J2 ) + J
+ IF( (J2.NE.IPIV(J2)) .AND. ((J1-K1).GT.2) ) THEN
+ CALL CSWAP( J1-K1-2, A( 1, J2 ), 1,
+ $ A( 1, IPIV(J2) ), 1 )
+ END IF
+ END DO
+ J = J + JB
+*
+* Trailing submatrix update, where
+* the row A(J1-1, J2-1:N) stores U(J1, J2+1:N) and
+* WORK stores the current block of the auxiriarly matrix H
+*
+ IF( J.LT.N ) THEN
+*
+* if the first panel and JB=1 (NB=1), then nothing to do
+*
+ IF( J1.GT.1 .OR. JB.GT.1 ) THEN
+*
+* Merge rank-1 update with BLAS-3 update
+*
+ ALPHA = CONJG( A( J, J+1 ) )
+ A( J, J+1 ) = ONE
+ CALL CCOPY( N-J, A( J-1, J+1 ), LDA,
+ $ WORK( (J+1-J1+1)+JB*N ), 1 )
+ CALL CSCAL( N-J, ALPHA, WORK( (J+1-J1+1)+JB*N ), 1 )
+*
+* K1 identifies if the previous column of the panel has been
+* explicitly stored, e.g., K1=0 and K2=1 for the first panel,
+* and K1=1 and K2=0 for the rest
+*
+ IF( J1.GT.1 ) THEN
+*
+* Not first panel
+*
+ K2 = 1
+ ELSE
+*
+* First panel
+*
+ K2 = 0
+*
+* First update skips the first column
+*
+ JB = JB - 1
+ END IF
+*
+ DO J2 = J+1, N, NB
+ NJ = MIN( NB, N-J2+1 )
+*
+* Update (J2, J2) diagonal block with CGEMV
+*
+ J3 = J2
+ DO MJ = NJ-1, 1, -1
+ CALL CGEMM( 'Conjugate transpose', 'Transpose',
+ $ 1, MJ, JB+1,
+ $ -ONE, A( J1-K2, J3 ), LDA,
+ $ WORK( (J3-J1+1)+K1*N ), N,
+ $ ONE, A( J3, J3 ), LDA )
+ J3 = J3 + 1
+ END DO
+*
+* Update off-diagonal block of J2-th block row with CGEMM
+*
+ CALL CGEMM( 'Conjugate transpose', 'Transpose',
+ $ NJ, N-J3+1, JB+1,
+ $ -ONE, A( J1-K2, J2 ), LDA,
+ $ WORK( (J3-J1+1)+K1*N ), N,
+ $ ONE, A( J2, J3 ), LDA )
+ END DO
+*
+* Recover T( J, J+1 )
+*
+ A( J, J+1 ) = CONJG( ALPHA )
+ END IF
+*
+* WORK(J+1, 1) stores H(J+1, 1)
+*
+ CALL CCOPY( N-J, A( J+1, J+1 ), LDA, WORK( 1 ), 1 )
+ END IF
+ GO TO 10
+ ELSE
+*
+* .....................................................
+* Factorize A as L*D*L**T using the lower triangle of A
+* .....................................................
+*
+* copy first column A(1:N, 1) into H(1:N, 1)
+* (stored in WORK(1:N))
+*
+ CALL CCOPY( N, A( 1, 1 ), 1, WORK( 1 ), 1 )
+*
+* J is the main loop index, increasing from 1 to N in steps of
+* JB, where JB is the number of columns factorized by CLAHEF;
+* JB is either NB, or N-J+1 for the last block
+*
+ J = 0
+ 11 CONTINUE
+ IF( J.GE.N )
+ $ GO TO 20
+*
+* each step of the main loop
+* J is the last column of the previous panel
+* J1 is the first column of the current panel
+* K1 identifies if the previous column of the panel has been
+* explicitly stored, e.g., K1=1 for the first panel, and
+* K1=0 for the rest
+*
+ J1 = J+1
+ JB = MIN( N-J1+1, NB )
+ K1 = MAX(1, J)-J
+*
+* Panel factorization
+*
+ CALL CLAHEF_AA( UPLO, 2-K1, N-J, JB,
+ $ A( J+1, MAX(1, J) ), LDA,
+ $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ), IINFO)
+ IF( (IINFO.GT.0) .AND. (INFO.EQ.0) ) THEN
+ INFO = IINFO+J
+ ENDIF
+*
+* Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot)
+*
+ DO J2 = J+2, MIN(N, J+JB+1)
+ IPIV( J2 ) = IPIV( J2 ) + J
+ IF( (J2.NE.IPIV(J2)) .AND. ((J1-K1).GT.2) ) THEN
+ CALL CSWAP( J1-K1-2, A( J2, 1 ), LDA,
+ $ A( IPIV(J2), 1 ), LDA )
+ END IF
+ END DO
+ J = J + JB
+*
+* Trailing submatrix update, where
+* A(J2+1, J1-1) stores L(J2+1, J1) and
+* WORK(J2+1, 1) stores H(J2+1, 1)
+*
+ IF( J.LT.N ) THEN
+*
+* if the first panel and JB=1 (NB=1), then nothing to do
+*
+ IF( J1.GT.1 .OR. JB.GT.1 ) THEN
+*
+* Merge rank-1 update with BLAS-3 update
+*
+ ALPHA = CONJG( A( J+1, J ) )
+ A( J+1, J ) = ONE
+ CALL CCOPY( N-J, A( J+1, J-1 ), 1,
+ $ WORK( (J+1-J1+1)+JB*N ), 1 )
+ CALL CSCAL( N-J, ALPHA, WORK( (J+1-J1+1)+JB*N ), 1 )
+*
+* K1 identifies if the previous column of the panel has been
+* explicitly stored, e.g., K1=0 and K2=1 for the first panel,
+* and K1=1 and K2=0 for the rest
+*
+ IF( J1.GT.1 ) THEN
+*
+* Not first panel
+*
+ K2 = 1
+ ELSE
+*
+* First panel
+*
+ K2 = 0
+*
+* First update skips the first column
+*
+ JB = JB - 1
+ END IF
+*
+ DO J2 = J+1, N, NB
+ NJ = MIN( NB, N-J2+1 )
+*
+* Update (J2, J2) diagonal block with CGEMV
+*
+ J3 = J2
+ DO MJ = NJ-1, 1, -1
+ CALL CGEMM( 'No transpose', 'Conjugate transpose',
+ $ MJ, 1, JB+1,
+ $ -ONE, WORK( (J3-J1+1)+K1*N ), N,
+ $ A( J3, J1-K2 ), LDA,
+ $ ONE, A( J3, J3 ), LDA )
+ J3 = J3 + 1
+ END DO
+*
+* Update off-diagonal block of J2-th block column with CGEMM
+*
+ CALL CGEMM( 'No transpose', 'Conjugate transpose',
+ $ N-J3+1, NJ, JB+1,
+ $ -ONE, WORK( (J3-J1+1)+K1*N ), N,
+ $ A( J2, J1-K2 ), LDA,
+ $ ONE, A( J3, J2 ), LDA )
+ END DO
+*
+* Recover T( J+1, J )
+*
+ A( J+1, J ) = CONJG( ALPHA )
+ END IF
+*
+* WORK(J+1, 1) stores H(J+1, 1)
+*
+ CALL CCOPY( N-J, A( J+1, J+1 ), 1, WORK( 1 ), 1 )
+ END IF
+ GO TO 11
+ END IF
+*
+ 20 CONTINUE
+ RETURN
+*
+* End of CHETRF_AA
+*
+ END