diff options
author | julielangou <julie@cs.utk.edu> | 2016-11-19 14:40:33 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2016-11-19 14:40:33 -0800 |
commit | 01cdfedf1d726a003f7a4e7331f32a7e434f1707 (patch) | |
tree | 4a9ff4318f5a1d66a4dc2690d9dc5ccb242981a3 /SRC/ssytrd_sy2sb.f | |
parent | ead2c73f1a6dad1342bf32987c0b2f2eaf61f18a (diff) | |
parent | ae36f785d68d637ef0a60f9028c38eb2d369d9f1 (diff) | |
download | lapack-01cdfedf1d726a003f7a4e7331f32a7e434f1707.tar.gz lapack-01cdfedf1d726a003f7a4e7331f32a7e434f1707.tar.bz2 lapack-01cdfedf1d726a003f7a4e7331f32a7e434f1707.zip |
Merge pull request #83 from haidarazzam/master
adding the 2stage symmetric eigenvalue routines drivers checking
Diffstat (limited to 'SRC/ssytrd_sy2sb.f')
-rw-r--r-- | SRC/ssytrd_sy2sb.f | 517 |
1 files changed, 517 insertions, 0 deletions
diff --git a/SRC/ssytrd_sy2sb.f b/SRC/ssytrd_sy2sb.f new file mode 100644 index 00000000..3dbbaf1f --- /dev/null +++ b/SRC/ssytrd_sy2sb.f @@ -0,0 +1,517 @@ +*> \brief \b SSYTRD_SY2SB +* +* @generated from zhetrd_he2hb.f, fortran z -> s, Sun Nov 6 19:34:06 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYTRD_SY2SB + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/ssytrd.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/ssytrd.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/ssytrd.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYTRD_SY2SB( UPLO, N, KD, A, LDA, AB, LDAB, TAU, +* WORK, LWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDAB, LWORK, N, KD +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), AB( LDAB, * ), +* TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYTRD_SY2SB reduces a real symmetric matrix A to real symmetric +*> band-diagonal form AB by a orthogonal similarity transformation: +*> Q**T * A * Q = AB. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the reduced matrix if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> The reduced matrix is stored in the array AB. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> On exit, if UPLO = 'U', the diagonal and first superdiagonal +*> of A are overwritten by the corresponding elements of the +*> tridiagonal matrix T, and the elements above the first +*> superdiagonal, with the array TAU, represent the orthogonal +*> matrix Q as a product of elementary reflectors; if UPLO +*> = 'L', the diagonal and first subdiagonal of A are over- +*> written by the corresponding elements of the tridiagonal +*> matrix T, and the elements below the first subdiagonal, with +*> the array TAU, represent the orthogonal matrix Q as a product +*> of elementary reflectors. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB,N) +*> On exit, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is REAL array, dimension (N-KD) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension LWORK. +*> On exit, if INFO = 0, or if LWORK=-1, +*> WORK(1) returns the size of LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK which should be calculated +* by a workspace query. LWORK = MAX(1, LWORK_QUERY) +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> LWORK_QUERY = N*KD + N*max(KD,FACTOPTNB) + 2*KD*KD +*> where FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice otherwise +*> putting LWORK=-1 will provide the size of WORK. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2016 +* +*> \ingroup realSYcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Implemented by Azzam Haidar. +*> +*> All details are available on technical report, SC11, SC13 papers. +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +*> +*> \verbatim +*> +*> If UPLO = 'U', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(k)**T . . . H(2)**T H(1)**T, where k = n-kd. +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(1:i+kd-1) = 0 and v(i+kd) = 1; conjg(v(i+kd+1:n)) is stored on exit in +*> A(i,i+kd+1:n), and tau in TAU(i). +*> +*> If UPLO = 'L', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(1) H(2) . . . H(k), where k = n-kd. +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(kd+1:i) = 0 and v(i+kd+1) = 1; v(i+kd+2:n) is stored on exit in +* A(i+kd+2:n,i), and tau in TAU(i). +*> +*> The contents of A on exit are illustrated by the following examples +*> with n = 5: +*> +*> if UPLO = 'U': if UPLO = 'L': +*> +*> ( ab ab/v1 v1 v1 v1 ) ( ab ) +*> ( ab ab/v2 v2 v2 ) ( ab/v1 ab ) +*> ( ab ab/v3 v3 ) ( v1 ab/v2 ab ) +*> ( ab ab/v4 ) ( v1 v2 ab/v3 ab ) +*> ( ab ) ( v1 v2 v3 ab/v4 ab ) +*> +*> where d and e denote diagonal and off-diagonal elements of T, and vi +*> denotes an element of the vector defining H(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SSYTRD_SY2SB( UPLO, N, KD, A, LDA, AB, LDAB, TAU, + $ WORK, LWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK computational routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDAB, LWORK, N, KD +* .. +* .. Array Arguments .. + REAL A( LDA, * ), AB( LDAB, * ), + $ TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL RONE + REAL ZERO, ONE, HALF + PARAMETER ( RONE = 1.0E+0, + $ ZERO = 0.0E+0, + $ ONE = 1.0E+0, + $ HALF = 0.5E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER I, J, IINFO, LWMIN, PN, PK, LK, + $ LDT, LDW, LDS2, LDS1, + $ LS2, LS1, LW, LT, + $ TPOS, WPOS, S2POS, S1POS +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, SSYR2K, SSYMM, SGEMM, + $ SLARFT, SGELQF, SGEQRF, SLASET +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. Executable Statements .. +* +* Determine the minimal workspace size required +* and test the input parameters +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + LWMIN = ILAENV( 20, 'SSYTRD_SY2SB', '', N, KD, -1, -1 ) + + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDAB.LT.MAX( 1, KD+1 ) ) THEN + INFO = -7 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYTRD_SY2SB', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + WORK( 1 ) = LWMIN + RETURN + END IF +* +* Quick return if possible +* Copy the upper/lower portion of A into AB +* + IF( N.LE.KD+1 ) THEN + IF( UPPER ) THEN + DO 100 I = 1, N + LK = MIN( KD+1, I ) + CALL SCOPY( LK, A( I-LK+1, I ), 1, + $ AB( KD+1-LK+1, I ), 1 ) + 100 CONTINUE + ELSE + DO 110 I = 1, N + LK = MIN( KD+1, N-I+1 ) + CALL SCOPY( LK, A( I, I ), 1, AB( 1, I ), 1 ) + 110 CONTINUE + ENDIF + WORK( 1 ) = 1 + RETURN + END IF +* +* Determine the pointer position for the workspace +* + LDT = KD + LDS1 = KD + LT = LDT*KD + LW = N*KD + LS1 = LDS1*KD + LS2 = LWMIN - LT - LW - LS1 +* LS2 = N*MAX(KD,FACTOPTNB) + TPOS = 1 + WPOS = TPOS + LT + S1POS = WPOS + LW + S2POS = S1POS + LS1 + IF( UPPER ) THEN + LDW = KD + LDS2 = KD + ELSE + LDW = N + LDS2 = N + ENDIF +* +* +* Set the workspace of the triangular matrix T to zero once such a +* way everytime T is generated the upper/lower portion will be always zero +* + CALL SLASET( "A", LDT, KD, ZERO, ZERO, WORK( TPOS ), LDT ) +* + IF( UPPER ) THEN + DO 10 I = 1, N - KD, KD + PN = N-I-KD+1 + PK = MIN( N-I-KD+1, KD ) +* +* Compute the LQ factorization of the current block +* + CALL SGELQF( KD, PN, A( I, I+KD ), LDA, + $ TAU( I ), WORK( S2POS ), LS2, IINFO ) +* +* Copy the upper portion of A into AB +* + DO 20 J = I, I+PK-1 + LK = MIN( KD, N-J ) + 1 + CALL SCOPY( LK, A( J, J ), LDA, AB( KD+1, J ), LDAB-1 ) + 20 CONTINUE +* + CALL SLASET( 'Lower', PK, PK, ZERO, ONE, + $ A( I, I+KD ), LDA ) +* +* Form the matrix T +* + CALL SLARFT( 'Forward', 'Rowwise', PN, PK, + $ A( I, I+KD ), LDA, TAU( I ), + $ WORK( TPOS ), LDT ) +* +* Compute W: +* + CALL SGEMM( 'Conjugate', 'No transpose', PK, PN, PK, + $ ONE, WORK( TPOS ), LDT, + $ A( I, I+KD ), LDA, + $ ZERO, WORK( S2POS ), LDS2 ) +* + CALL SSYMM( 'Right', UPLO, PK, PN, + $ ONE, A( I+KD, I+KD ), LDA, + $ WORK( S2POS ), LDS2, + $ ZERO, WORK( WPOS ), LDW ) +* + CALL SGEMM( 'No transpose', 'Conjugate', PK, PK, PN, + $ ONE, WORK( WPOS ), LDW, + $ WORK( S2POS ), LDS2, + $ ZERO, WORK( S1POS ), LDS1 ) +* + CALL SGEMM( 'No transpose', 'No transpose', PK, PN, PK, + $ -HALF, WORK( S1POS ), LDS1, + $ A( I, I+KD ), LDA, + $ ONE, WORK( WPOS ), LDW ) +* +* +* Update the unreduced submatrix A(i+kd:n,i+kd:n), using +* an update of the form: A := A - V'*W - W'*V +* + CALL SSYR2K( UPLO, 'Conjugate', PN, PK, + $ -ONE, A( I, I+KD ), LDA, + $ WORK( WPOS ), LDW, + $ RONE, A( I+KD, I+KD ), LDA ) + 10 CONTINUE +* +* Copy the upper band to AB which is the band storage matrix +* + DO 30 J = N-KD+1, N + LK = MIN(KD, N-J) + 1 + CALL SCOPY( LK, A( J, J ), LDA, AB( KD+1, J ), LDAB-1 ) + 30 CONTINUE +* + ELSE +* +* Reduce the lower triangle of A to lower band matrix +* + DO 40 I = 1, N - KD, KD + PN = N-I-KD+1 + PK = MIN( N-I-KD+1, KD ) +* +* Compute the QR factorization of the current block +* + CALL SGEQRF( PN, KD, A( I+KD, I ), LDA, + $ TAU( I ), WORK( S2POS ), LS2, IINFO ) +* +* Copy the upper portion of A into AB +* + DO 50 J = I, I+PK-1 + LK = MIN( KD, N-J ) + 1 + CALL SCOPY( LK, A( J, J ), 1, AB( 1, J ), 1 ) + 50 CONTINUE +* + CALL SLASET( 'Upper', PK, PK, ZERO, ONE, + $ A( I+KD, I ), LDA ) +* +* Form the matrix T +* + CALL SLARFT( 'Forward', 'Columnwise', PN, PK, + $ A( I+KD, I ), LDA, TAU( I ), + $ WORK( TPOS ), LDT ) +* +* Compute W: +* + CALL SGEMM( 'No transpose', 'No transpose', PN, PK, PK, + $ ONE, A( I+KD, I ), LDA, + $ WORK( TPOS ), LDT, + $ ZERO, WORK( S2POS ), LDS2 ) +* + CALL SSYMM( 'Left', UPLO, PN, PK, + $ ONE, A( I+KD, I+KD ), LDA, + $ WORK( S2POS ), LDS2, + $ ZERO, WORK( WPOS ), LDW ) +* + CALL SGEMM( 'Conjugate', 'No transpose', PK, PK, PN, + $ ONE, WORK( S2POS ), LDS2, + $ WORK( WPOS ), LDW, + $ ZERO, WORK( S1POS ), LDS1 ) +* + CALL SGEMM( 'No transpose', 'No transpose', PN, PK, PK, + $ -HALF, A( I+KD, I ), LDA, + $ WORK( S1POS ), LDS1, + $ ONE, WORK( WPOS ), LDW ) +* +* +* Update the unreduced submatrix A(i+kd:n,i+kd:n), using +* an update of the form: A := A - V*W' - W*V' +* + CALL SSYR2K( UPLO, 'No transpose', PN, PK, + $ -ONE, A( I+KD, I ), LDA, + $ WORK( WPOS ), LDW, + $ RONE, A( I+KD, I+KD ), LDA ) +* ================================================================== +* RESTORE A FOR COMPARISON AND CHECKING TO BE REMOVED +* DO 45 J = I, I+PK-1 +* LK = MIN( KD, N-J ) + 1 +* CALL SCOPY( LK, AB( 1, J ), 1, A( J, J ), 1 ) +* 45 CONTINUE +* ================================================================== + 40 CONTINUE +* +* Copy the lower band to AB which is the band storage matrix +* + DO 60 J = N-KD+1, N + LK = MIN(KD, N-J) + 1 + CALL SCOPY( LK, A( J, J ), 1, AB( 1, J ), 1 ) + 60 CONTINUE + + END IF +* + WORK( 1 ) = LWMIN + RETURN +* +* End of SSYTRD_SY2SB +* + END |