diff options
author | philippe.theveny <philippe.theveny@8a072113-8704-0410-8d35-dd094bca7971> | 2015-05-14 18:50:57 +0000 |
---|---|---|
committer | philippe.theveny <philippe.theveny@8a072113-8704-0410-8d35-dd094bca7971> | 2015-05-14 18:50:57 +0000 |
commit | 34420f19e306e8fe09664d3bebce237d5f2ab92f (patch) | |
tree | 91f3e1d80220648d207ed4fd769c55f819fe3e4f | |
parent | 6eff56f7e1ac1d65b85726164a3fa5d4f5a7f1d5 (diff) | |
download | lapack-34420f19e306e8fe09664d3bebce237d5f2ab92f.tar.gz lapack-34420f19e306e8fe09664d3bebce237d5f2ab92f.tar.bz2 lapack-34420f19e306e8fe09664d3bebce237d5f2ab92f.zip |
This partially fixes bug 061 reported by Victor Liu.
Some compilers allocate local arrays on the heap when their size is
above a particular threshold.
This leads to wrong results when multiple threads call the same routine.
The bug fix consists in using a larger workspace, as proposed by Victor Liu
(Tue Nov 13, 2012).
Some routines still have large local arrays and cannot be fixed that way
because they have no workspace parameter: xGBTRF, xBPTRF, and xHSEQR.
-rw-r--r-- | SRC/cgehrd.f | 46 | ||||
-rw-r--r-- | SRC/cunmlq.f | 48 | ||||
-rw-r--r-- | SRC/cunmql.f | 45 | ||||
-rw-r--r-- | SRC/cunmqr.f | 31 | ||||
-rw-r--r-- | SRC/cunmrq.f | 43 | ||||
-rw-r--r-- | SRC/cunmrz.f | 50 | ||||
-rw-r--r-- | SRC/dgehrd.f | 45 | ||||
-rw-r--r-- | SRC/dormlq.f | 33 | ||||
-rw-r--r-- | SRC/dormql.f | 43 | ||||
-rw-r--r-- | SRC/dormqr.f | 33 | ||||
-rw-r--r-- | SRC/dormrq.f | 43 | ||||
-rw-r--r-- | SRC/dormrz.f | 41 | ||||
-rw-r--r-- | SRC/sgehrd.f | 46 | ||||
-rw-r--r-- | SRC/sormlq.f | 33 | ||||
-rw-r--r-- | SRC/sormql.f | 42 | ||||
-rw-r--r-- | SRC/sormqr.f | 33 | ||||
-rw-r--r-- | SRC/sormrq.f | 43 | ||||
-rw-r--r-- | SRC/sormrz.f | 41 | ||||
-rw-r--r-- | SRC/zgehrd.f | 46 | ||||
-rw-r--r-- | SRC/zunmlq.f | 33 | ||||
-rw-r--r-- | SRC/zunmql.f | 43 | ||||
-rw-r--r-- | SRC/zunmqr.f | 33 | ||||
-rw-r--r-- | SRC/zunmrq.f | 45 | ||||
-rw-r--r-- | SRC/zunmrz.f | 43 |
24 files changed, 424 insertions, 558 deletions
diff --git a/SRC/cgehrd.f b/SRC/cgehrd.f index b590b3c1..4a6e20dd 100644 --- a/SRC/cgehrd.f +++ b/SRC/cgehrd.f @@ -97,8 +97,7 @@ *> \verbatim *> LWORK is INTEGER *> The length of the array WORK. LWORK >= max(1,N). -*> For optimum performance LWORK >= N*NB, where NB is the -*> optimal blocksize. +*> For good performance, LWORK should generally be larger. *> *> If LWORK = -1, then a workspace query is assumed; the routine *> only calculates the optimal size of the WORK array, returns @@ -183,21 +182,19 @@ * ===================================================================== * * .. Parameters .. - INTEGER NBMAX, LDT - PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) COMPLEX ZERO, ONE PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), $ ONE = ( 1.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL LQUERY - INTEGER I, IB, IINFO, IWS, J, LDWORK, LWKOPT, NB, + INTEGER I, IB, IINFO, IWT, J, LDWORK, LWKOPT, NB, $ NBMIN, NH, NX COMPLEX EI * .. -* .. Local Arrays .. - COMPLEX T( LDT, NBMAX ) -* .. * .. External Subroutines .. EXTERNAL CAXPY, CGEHD2, CGEMM, CLAHR2, CLARFB, CTRMM, $ XERBLA @@ -214,9 +211,6 @@ * Test the input parameters * INFO = 0 - NB = MIN( NBMAX, ILAENV( 1, 'CGEHRD', ' ', N, ILO, IHI, -1 ) ) - LWKOPT = N*NB - WORK( 1 ) = LWKOPT LQUERY = ( LWORK.EQ.-1 ) IF( N.LT.0 ) THEN INFO = -1 @@ -229,6 +223,16 @@ ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN INFO = -8 END IF +* + IF( INFO.EQ.0 ) THEN +* +* Compute the workspace requirements +* + NB = MIN( NBMAX, ILAENV( 1, 'CGEHRD', ' ', N, ILO, IHI, -1 ) ) + LWKOPT = N*NB + TSIZE + WORK( 1 ) = LWKOPT + END IF +* IF( INFO.NE.0 ) THEN CALL XERBLA( 'CGEHRD', -INFO ) RETURN @@ -257,7 +261,6 @@ * NB = MIN( NBMAX, ILAENV( 1, 'CGEHRD', ' ', N, ILO, IHI, -1 ) ) NBMIN = 2 - IWS = 1 IF( NB.GT.1 .AND. NB.LT.NH ) THEN * * Determine when to cross over from blocked to unblocked code @@ -268,8 +271,7 @@ * * Determine if workspace is large enough for blocked code * - IWS = N*NB - IF( LWORK.LT.IWS ) THEN + IF( LWORK.LT.N*NB+TSIZE ) THEN * * Not enough workspace to use optimal NB: determine the * minimum value of NB, and reduce NB or force use of @@ -277,8 +279,8 @@ * NBMIN = MAX( 2, ILAENV( 2, 'CGEHRD', ' ', N, ILO, IHI, $ -1 ) ) - IF( LWORK.GE.N*NBMIN ) THEN - NB = LWORK / N + IF( LWORK.GE.(N*NBMIN+TSIZE) ) THEN + NB = (LWORK-TSIZE) / N ELSE NB = 1 END IF @@ -297,6 +299,7 @@ * * Use blocked code * + IWT = 1 + N*NB DO 40 I = ILO, IHI - 1 - NX, NB IB = MIN( NB, IHI-I ) * @@ -304,8 +307,8 @@ * matrices V and T of the block reflector H = I - V*T*V**H * which performs the reduction, and also the matrix Y = A*V*T * - CALL CLAHR2( IHI, I, IB, A( 1, I ), LDA, TAU( I ), T, LDT, - $ WORK, LDWORK ) + CALL CLAHR2( IHI, I, IB, A( 1, I ), LDA, TAU( I ), + $ WORK( IWT ), LDT, WORK, LDWORK ) * * Apply the block reflector H to A(1:ihi,i+ib:ihi) from the * right, computing A := A - Y * V**H. V(i+ib,ib-1) must be set @@ -335,15 +338,16 @@ * CALL CLARFB( 'Left', 'Conjugate transpose', 'Forward', $ 'Columnwise', - $ IHI-I, N-I-IB+1, IB, A( I+1, I ), LDA, T, LDT, - $ A( I+1, I+IB ), LDA, WORK, LDWORK ) + $ IHI-I, N-I-IB+1, IB, A( I+1, I ), LDA, + $ WORK( IWT ), LDT, A( I+1, I+IB ), LDA, + $ WORK, LDWORK ) 40 CONTINUE END IF * * Use unblocked code to reduce the rest of the matrix * CALL CGEHD2( N, I, IHI, A, LDA, TAU, WORK, IINFO ) - WORK( 1 ) = IWS + WORK( 1 ) = LWKOPT * RETURN * diff --git a/SRC/cunmlq.f b/SRC/cunmlq.f index 7770a6d4..7a5e3718 100644 --- a/SRC/cunmlq.f +++ b/SRC/cunmlq.f @@ -137,9 +137,7 @@ *> The dimension of the array WORK. *> If SIDE = 'L', LWORK >= max(1,N); *> if SIDE = 'R', LWORK >= max(1,M). -*> For optimum performance LWORK >= N*NB if SIDE 'L', and -*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal -*> blocksize. +*> For good performance, LWORK should generally be larger. *> *> If LWORK = -1, then a workspace query is assumed; the routine *> only calculates the optimal size of the WORK array, returns @@ -187,18 +185,16 @@ * ===================================================================== * * .. Parameters .. - INTEGER NBMAX, LDT - PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER TRANST - INTEGER I, I1, I2, I3, IB, IC, IINFO, IWS, JC, LDWORK, + INTEGER I, I1, I2, I3, IB, IC, IINFO, IWT, JC, LDWORK, $ LWKOPT, MI, NB, NBMIN, NI, NQ, NW * .. -* .. Local Arrays .. - COMPLEX T( LDT, NBMAX ) -* .. * .. External Functions .. LOGICAL LSAME INTEGER ILAENV @@ -247,13 +243,16 @@ END IF * IF( INFO.EQ.0 ) THEN -* -* Determine the block size. NB may be at most NBMAX, where NBMAX -* is used to define the local array T. -* - NB = MIN( NBMAX, ILAENV( 1, 'CUNMLQ', SIDE // TRANS, M, N, K, - $ -1 ) ) - LWKOPT = MAX( 1, NW )*NB +* +* Compute the workspace requirements +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN + LWKOPT = 1 + ELSE + NB = MIN( NBMAX, ILAENV( 1, 'CUNMLQ', SIDE // TRANS, M, N, + $ K, -1 ) ) + LWKOPT = MAX( 1, NW )*NB + TSIZE + END IF WORK( 1 ) = LWKOPT END IF * @@ -267,21 +266,19 @@ * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN - WORK( 1 ) = 1 RETURN END IF * +* Determine the block size +* NBMIN = 2 LDWORK = NW IF( NB.GT.1 .AND. NB.LT.K ) THEN - IWS = NW*NB - IF( LWORK.LT.IWS ) THEN - NB = LWORK / LDWORK + IF( LWORK.LT.NW*NB+TSIZE ) THEN + NB = (LWORK-TSIZE) / LDWORK NBMIN = MAX( 2, ILAENV( 2, 'CUNMLQ', SIDE // TRANS, M, N, K, $ -1 ) ) END IF - ELSE - IWS = NW END IF * IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN @@ -294,6 +291,7 @@ * * Use blocked code * + IWT = 1 + NW*NB IF( ( LEFT .AND. NOTRAN ) .OR. $ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN I1 = 1 @@ -326,7 +324,7 @@ * H = H(i) H(i+1) . . . H(i+ib-1) * CALL CLARFT( 'Forward', 'Rowwise', NQ-I+1, IB, A( I, I ), - $ LDA, TAU( I ), T, LDT ) + $ LDA, TAU( I ), WORK( IWT ), LDT ) IF( LEFT ) THEN * * H or H**H is applied to C(i:m,1:n) @@ -344,8 +342,8 @@ * Apply H or H**H * CALL CLARFB( SIDE, TRANST, 'Forward', 'Rowwise', MI, NI, IB, - $ A( I, I ), LDA, T, LDT, C( IC, JC ), LDC, WORK, - $ LDWORK ) + $ A( I, I ), LDA, WORK( IWT ), LDT, + $ C( IC, JC ), LDC, WORK, LDWORK ) 10 CONTINUE END IF WORK( 1 ) = LWKOPT diff --git a/SRC/cunmql.f b/SRC/cunmql.f index b23709d0..7543712f 100644 --- a/SRC/cunmql.f +++ b/SRC/cunmql.f @@ -137,9 +137,7 @@ *> The dimension of the array WORK. *> If SIDE = 'L', LWORK >= max(1,N); *> if SIDE = 'R', LWORK >= max(1,M). -*> For optimum performance LWORK >= N*NB if SIDE = 'L', and -*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal -*> blocksize. +*> For good performance, LWORK should generally be larger. *> *> If LWORK = -1, then a workspace query is assumed; the routine *> only calculates the optimal size of the WORK array, returns @@ -187,17 +185,15 @@ * ===================================================================== * * .. Parameters .. - INTEGER NBMAX, LDT - PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN - INTEGER I, I1, I2, I3, IB, IINFO, IWS, LDWORK, LWKOPT, + INTEGER I, I1, I2, I3, IB, IINFO, IWT, LDWORK, LWKOPT, $ MI, NB, NBMIN, NI, NQ, NW * .. -* .. Local Arrays .. - COMPLEX T( LDT, NBMAX ) -* .. * .. External Functions .. LOGICAL LSAME INTEGER ILAENV @@ -241,25 +237,22 @@ INFO = -7 ELSE IF( LDC.LT.MAX( 1, M ) ) THEN INFO = -10 + ELSE IF( LWORK.LT.NW .AND. .NOT.LQUERY ) THEN + INFO = -12 END IF * IF( INFO.EQ.0 ) THEN +* +* Compute the workspace requirements +* IF( M.EQ.0 .OR. N.EQ.0 ) THEN LWKOPT = 1 ELSE -* -* Determine the block size. NB may be at most NBMAX, where -* NBMAX is used to define the local array T. -* NB = MIN( NBMAX, ILAENV( 1, 'CUNMQL', SIDE // TRANS, M, N, $ K, -1 ) ) - LWKOPT = NW*NB + LWKOPT = NW*NB + TSIZE END IF WORK( 1 ) = LWKOPT -* - IF( LWORK.LT.NW .AND. .NOT.LQUERY ) THEN - INFO = -12 - END IF END IF * IF( INFO.NE.0 ) THEN @@ -275,17 +268,16 @@ RETURN END IF * +* Determine the block size +* NBMIN = 2 LDWORK = NW IF( NB.GT.1 .AND. NB.LT.K ) THEN - IWS = NW*NB - IF( LWORK.LT.IWS ) THEN - NB = LWORK / LDWORK + IF( LWORK.LT.(NW*NB+TSIZE) ) THEN + NB = (LWORK-TSIZE) / LDWORK NBMIN = MAX( 2, ILAENV( 2, 'CUNMQL', SIDE // TRANS, M, N, K, $ -1 ) ) END IF - ELSE - IWS = NW END IF * IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN @@ -298,6 +290,7 @@ * * Use blocked code * + IWT = 1 + NW*NB IF( ( LEFT .AND. NOTRAN ) .OR. $ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN I1 = 1 @@ -322,7 +315,7 @@ * H = H(i+ib-1) . . . H(i+1) H(i) * CALL CLARFT( 'Backward', 'Columnwise', NQ-K+I+IB-1, IB, - $ A( 1, I ), LDA, TAU( I ), T, LDT ) + $ A( 1, I ), LDA, TAU( I ), WORK( IWT ), LDT ) IF( LEFT ) THEN * * H or H**H is applied to C(1:m-k+i+ib-1,1:n) @@ -338,8 +331,8 @@ * Apply H or H**H * CALL CLARFB( SIDE, TRANS, 'Backward', 'Columnwise', MI, NI, - $ IB, A( 1, I ), LDA, T, LDT, C, LDC, WORK, - $ LDWORK ) + $ IB, A( 1, I ), LDA, WORK( IWT ), LDT, C, LDC, + $ WORK, LDWORK ) 10 CONTINUE END IF WORK( 1 ) = LWKOPT diff --git a/SRC/cunmqr.f b/SRC/cunmqr.f index e4bdabae..903b8d44 100644 --- a/SRC/cunmqr.f +++ b/SRC/cunmqr.f @@ -137,9 +137,7 @@ *> The dimension of the array WORK. *> If SIDE = 'L', LWORK >= max(1,N); *> if SIDE = 'R', LWORK >= max(1,M). -*> For optimum performance LWORK >= N*NB if SIDE = 'L', and -*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal -*> blocksize. +*> For good performance, LWORK should generally be larger. *> *> If LWORK = -1, then a workspace query is assumed; the routine *> only calculates the optimal size of the WORK array, returns @@ -187,17 +185,15 @@ * ===================================================================== * * .. Parameters .. - INTEGER NBMAX, LDT - PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN - INTEGER I, I1, I2, I3, IB, IC, IINFO, IWS, JC, LDWORK, + INTEGER I, I1, I2, I3, IB, IC, IINFO, IWT, JC, LDWORK, $ LWKOPT, MI, NB, NBMIN, NI, NQ, NW * .. -* .. Local Arrays .. - COMPLEX T( LDT, NBMAX ) -* .. * .. External Functions .. LOGICAL LSAME INTEGER ILAENV @@ -247,12 +243,11 @@ * IF( INFO.EQ.0 ) THEN * -* Determine the block size. NB may be at most NBMAX, where NBMAX -* is used to define the local array T. +* Compute the workspace requirements * NB = MIN( NBMAX, ILAENV( 1, 'CUNMQR', SIDE // TRANS, M, N, K, $ -1 ) ) - LWKOPT = MAX( 1, NW )*NB + LWKOPT = MAX( 1, NW )*NB + TSIZE WORK( 1 ) = LWKOPT END IF * @@ -273,9 +268,8 @@ NBMIN = 2 LDWORK = NW IF( NB.GT.1 .AND. NB.LT.K ) THEN - IWS = NW*NB - IF( LWORK.LT.IWS ) THEN - NB = LWORK / LDWORK + IF( LWORK.LT.NW*NB+TSIZE ) THEN + NB = (LWORK-TSIZE) / LDWORK NBMIN = MAX( 2, ILAENV( 2, 'CUNMQR', SIDE // TRANS, M, N, K, $ -1 ) ) END IF @@ -293,6 +287,7 @@ * * Use blocked code * + IWT = 1 + NW*NB IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN I1 = 1 @@ -319,7 +314,7 @@ * H = H(i) H(i+1) . . . H(i+ib-1) * CALL CLARFT( 'Forward', 'Columnwise', NQ-I+1, IB, A( I, I ), - $ LDA, TAU( I ), T, LDT ) + $ LDA, TAU( I ), WORK( IWT ), LDT ) IF( LEFT ) THEN * * H or H**H is applied to C(i:m,1:n) @@ -337,8 +332,8 @@ * Apply H or H**H * CALL CLARFB( SIDE, TRANS, 'Forward', 'Columnwise', MI, NI, - $ IB, A( I, I ), LDA, T, LDT, C( IC, JC ), LDC, - $ WORK, LDWORK ) + $ IB, A( I, I ), LDA, WORK( IWT ), LDT, + $ C( IC, JC ), LDC, WORK, LDWORK ) 10 CONTINUE END IF WORK( 1 ) = LWKOPT diff --git a/SRC/cunmrq.f b/SRC/cunmrq.f index 88a4f0b2..342de81b 100644 --- a/SRC/cunmrq.f +++ b/SRC/cunmrq.f @@ -137,9 +137,7 @@ *> The dimension of the array WORK. *> If SIDE = 'L', LWORK >= max(1,N); *> if SIDE = 'R', LWORK >= max(1,M). -*> For optimum performance LWORK >= N*NB if SIDE = 'L', and -*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal -*> blocksize. +*> For good performance, LWORK should generally be larger. *> *> If LWORK = -1, then a workspace query is assumed; the routine *> only calculates the optimal size of the WORK array, returns @@ -187,18 +185,16 @@ * ===================================================================== * * .. Parameters .. - INTEGER NBMAX, LDT - PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER TRANST - INTEGER I, I1, I2, I3, IB, IINFO, IWS, LDWORK, LWKOPT, + INTEGER I, I1, I2, I3, IB, IINFO, IWT, LDWORK, LWKOPT, $ MI, NB, NBMIN, NI, NQ, NW * .. -* .. Local Arrays .. - COMPLEX T( LDT, NBMAX ) -* .. * .. External Functions .. LOGICAL LSAME INTEGER ILAENV @@ -242,25 +238,22 @@ INFO = -7 ELSE IF( LDC.LT.MAX( 1, M ) ) THEN INFO = -10 + ELSE IF( LWORK.LT.NW .AND. .NOT.LQUERY ) THEN + INFO = -12 END IF * IF( INFO.EQ.0 ) THEN +* +* Compute the workspace requirements +* IF( M.EQ.0 .OR. N.EQ.0 ) THEN LWKOPT = 1 ELSE -* -* Determine the block size. NB may be at most NBMAX, where -* NBMAX is used to define the local array T. -* NB = MIN( NBMAX, ILAENV( 1, 'CUNMRQ', SIDE // TRANS, M, N, $ K, -1 ) ) - LWKOPT = NW*NB + LWKOPT = NW*NB + TSIZE END IF WORK( 1 ) = LWKOPT -* - IF( LWORK.LT.NW .AND. .NOT.LQUERY ) THEN - INFO = -12 - END IF END IF * IF( INFO.NE.0 ) THEN @@ -279,14 +272,11 @@ NBMIN = 2 LDWORK = NW IF( NB.GT.1 .AND. NB.LT.K ) THEN - IWS = NW*NB - IF( LWORK.LT.IWS ) THEN - NB = LWORK / LDWORK + IF( LWORK.LT.NW*NB+TSIZE ) THEN + NB = (LWORK-TSIZE) / LDWORK NBMIN = MAX( 2, ILAENV( 2, 'CUNMRQ', SIDE // TRANS, M, N, K, $ -1 ) ) END IF - ELSE - IWS = NW END IF * IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN @@ -299,6 +289,7 @@ * * Use blocked code * + IWT = 1 + NW*NB IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN I1 = 1 @@ -329,7 +320,7 @@ * H = H(i+ib-1) . . . H(i+1) H(i) * CALL CLARFT( 'Backward', 'Rowwise', NQ-K+I+IB-1, IB, - $ A( I, 1 ), LDA, TAU( I ), T, LDT ) + $ A( I, 1 ), LDA, TAU( I ), WORK( IWT ), LDT ) IF( LEFT ) THEN * * H or H**H is applied to C(1:m-k+i+ib-1,1:n) @@ -345,8 +336,8 @@ * Apply H or H**H * CALL CLARFB( SIDE, TRANST, 'Backward', 'Rowwise', MI, NI, - $ IB, A( I, 1 ), LDA, T, LDT, C, LDC, WORK, - $ LDWORK ) + $ IB, A( I, 1 ), LDA, WORK( IWT ), LDT, C, LDC, + $ WORK, LDWORK ) 10 CONTINUE END IF WORK( 1 ) = LWKOPT diff --git a/SRC/cunmrz.f b/SRC/cunmrz.f index 1a742693..5d93764e 100644 --- a/SRC/cunmrz.f +++ b/SRC/cunmrz.f @@ -145,9 +145,7 @@ *> The dimension of the array WORK. *> If SIDE = 'L', LWORK >= max(1,N); *> if SIDE = 'R', LWORK >= max(1,M). -*> For optimum performance LWORK >= N*NB if SIDE = 'L', and -*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal -*> blocksize. +*> For good performance, LWORK should generally be larger. *> *> If LWORK = -1, then a workspace query is assumed; the routine *> only calculates the optimal size of the WORK array, returns @@ -205,18 +203,16 @@ * ===================================================================== * * .. Parameters .. - INTEGER NBMAX, LDT - PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER TRANST - INTEGER I, I1, I2, I3, IB, IC, IINFO, IWS, JA, JC, + INTEGER I, I1, I2, I3, IB, IC, IINFO, IWT, JA, JC, $ LDWORK, LWKOPT, MI, NB, NBMIN, NI, NQ, NW * .. -* .. Local Arrays .. - COMPLEX T( LDT, NBMAX ) -* .. * .. External Functions .. LOGICAL LSAME INTEGER ILAENV @@ -263,25 +259,22 @@ INFO = -8 ELSE IF( LDC.LT.MAX( 1, M ) ) THEN INFO = -11 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -13 END IF * IF( INFO.EQ.0 ) THEN +* +* Compute the workspace requirements +* IF( M.EQ.0 .OR. N.EQ.0 ) THEN LWKOPT = 1 ELSE -* -* Determine the block size. NB may be at most NBMAX, where -* NBMAX is used to define the local array T. -* NB = MIN( NBMAX, ILAENV( 1, 'CUNMRQ', SIDE // TRANS, M, N, $ K, -1 ) ) - LWKOPT = NW*NB + LWKOPT = NW*NB + TSIZE END IF WORK( 1 ) = LWKOPT -* - IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN - INFO = -13 - END IF END IF * IF( INFO.NE.0 ) THEN @@ -297,22 +290,18 @@ RETURN END IF * -* Determine the block size. NB may be at most NBMAX, where NBMAX -* is used to define the local array T. +* Determine the block size. * NB = MIN( NBMAX, ILAENV( 1, 'CUNMRQ', SIDE // TRANS, M, N, K, - $ -1 ) ) + $ -1 ) ) NBMIN = 2 LDWORK = NW IF( NB.GT.1 .AND. NB.LT.K ) THEN - IWS = NW*NB - IF( LWORK.LT.IWS ) THEN - NB = LWORK / LDWORK + IF( LWORK.LT.NW*NB+TSIZE ) THEN + NB = (LWORK-TSIZE) / LDWORK NBMIN = MAX( 2, ILAENV( 2, 'CUNMRQ', SIDE // TRANS, M, N, K, - $ -1 ) ) + $ -1 ) ) END IF - ELSE - IWS = NW END IF * IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN @@ -325,6 +314,7 @@ * * Use blocked code * + IWT = 1 + NW*NB IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN I1 = 1 @@ -359,7 +349,7 @@ * H = H(i+ib-1) . . . H(i+1) H(i) * CALL CLARZT( 'Backward', 'Rowwise', L, IB, A( I, JA ), LDA, - $ TAU( I ), T, LDT ) + $ TAU( I ), WORK( IWT ), LDT ) * IF( LEFT ) THEN * @@ -378,8 +368,8 @@ * Apply H or H**H * CALL CLARZB( SIDE, TRANST, 'Backward', 'Rowwise', MI, NI, - $ IB, L, A( I, JA ), LDA, T, LDT, C( IC, JC ), - $ LDC, WORK, LDWORK ) + $ IB, L, A( I, JA ), LDA, WORK( IWT ), LDT, + $ C( IC, JC ), LDC, WORK, LDWORK ) 10 CONTINUE * END IF diff --git a/SRC/dgehrd.f b/SRC/dgehrd.f index 0dda2e2f..ccaf9055 100644 --- a/SRC/dgehrd.f +++ b/SRC/dgehrd.f @@ -97,8 +97,7 @@ *> \verbatim *> LWORK is INTEGER *> The length of the array WORK. LWORK >= max(1,N). -*> For optimum performance LWORK >= N*NB, where NB is the -*> optimal blocksize. +*> For good performance, LWORK should generally be larger. *> *> If LWORK = -1, then a workspace query is assumed; the routine *> only calculates the optimal size of the WORK array, returns @@ -183,21 +182,19 @@ * ===================================================================== * * .. Parameters .. - INTEGER NBMAX, LDT - PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D+0, $ ONE = 1.0D+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY - INTEGER I, IB, IINFO, IWS, J, LDWORK, LWKOPT, NB, + INTEGER I, IB, IINFO, IWT, J, LDWORK, LWKOPT, NB, $ NBMIN, NH, NX DOUBLE PRECISION EI * .. -* .. Local Arrays .. - DOUBLE PRECISION T( LDT, NBMAX ) -* .. * .. External Subroutines .. EXTERNAL DAXPY, DGEHD2, DGEMM, DLAHR2, DLARFB, DTRMM, $ XERBLA @@ -214,9 +211,6 @@ * Test the input parameters * INFO = 0 - NB = MIN( NBMAX, ILAENV( 1, 'DGEHRD', ' ', N, ILO, IHI, -1 ) ) - LWKOPT = N*NB - WORK( 1 ) = LWKOPT LQUERY = ( LWORK.EQ.-1 ) IF( N.LT.0 ) THEN INFO = -1 @@ -229,6 +223,16 @@ ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN INFO = -8 END IF +* + IF( INFO.EQ.0 ) THEN +* +* Compute the workspace requirements +* + NB = MIN( NBMAX, ILAENV( 1, 'DGEHRD', ' ', N, ILO, IHI, -1 ) ) + LWKOPT = N*NB + TSIZE + WORK( 1 ) = LWKOPT + END IF +* IF( INFO.NE.0 ) THEN CALL XERBLA( 'DGEHRD', -INFO ) RETURN @@ -268,8 +272,7 @@ * * Determine if workspace is large enough for blocked code * - IWS = N*NB - IF( LWORK.LT.IWS ) THEN + IF( LWORK.LT.N*NB+TSIZE ) THEN * * Not enough workspace to use optimal NB: determine the * minimum value of NB, and reduce NB or force use of @@ -277,8 +280,8 @@ * NBMIN = MAX( 2, ILAENV( 2, 'DGEHRD', ' ', N, ILO, IHI, $ -1 ) ) - IF( LWORK.GE.N*NBMIN ) THEN - NB = LWORK / N + IF( LWORK.GE.(N*NBMIN + TSIZE) ) THEN + NB = (LWORK-TSIZE) / N ELSE NB = 1 END IF @@ -297,6 +300,7 @@ * * Use blocked code * + IWT = 1 + N*NB DO 40 I = ILO, IHI - 1 - NX, NB IB = MIN( NB, IHI-I ) * @@ -304,8 +308,8 @@ * matrices V and T of the block reflector H = I - V*T*V**T * which performs the reduction, and also the matrix Y = A*V*T * - CALL DLAHR2( IHI, I, IB, A( 1, I ), LDA, TAU( I ), T, LDT, - $ WORK, LDWORK ) + CALL DLAHR2( IHI, I, IB, A( 1, I ), LDA, TAU( I ), + $ WORK( IWT ), LDT, WORK, LDWORK ) * * Apply the block reflector H to A(1:ihi,i+ib:ihi) from the * right, computing A := A - Y * V**T. V(i+ib,ib-1) must be set @@ -335,15 +339,16 @@ * CALL DLARFB( 'Left', 'Transpose', 'Forward', $ 'Columnwise', - $ IHI-I, N-I-IB+1, IB, A( I+1, I ), LDA, T, LDT, - $ A( I+1, I+IB ), LDA, WORK, LDWORK ) + $ IHI-I, N-I-IB+1, IB, A( I+1, I ), LDA, + $ WORK( IWT ), LDT, A( I+1, I+IB ), LDA, + $ WORK, LDWORK ) 40 CONTINUE END IF * * Use unblocked code to reduce the rest of the matrix * CALL DGEHD2( N, I, IHI, A, LDA, TAU, WORK, IINFO ) - WORK( 1 ) = IWS + WORK( 1 ) = LWKOPT * RETURN * diff --git a/SRC/dormlq.f b/SRC/dormlq.f index ebbd4d26..8c395372 100644 --- a/SRC/dormlq.f +++ b/SRC/dormlq.f @@ -136,9 +136,7 @@ *> The dimension of the array WORK. *> If SIDE = 'L', LWORK >= max(1,N); *> if SIDE = 'R', LWORK >= max(1,M). -*> For optimum performance LWORK >= N*NB if SIDE = 'L', and -*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal -*> blocksize. +*> For good performance, LWORK should generally be larger. *> *> If LWORK = -1, then a workspace query is assumed; the routine *> only calculates the optimal size of the WORK array, returns @@ -185,18 +183,16 @@ * ===================================================================== * * .. Parameters .. - INTEGER NBMAX, LDT - PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER TRANST - INTEGER I, I1, I2, I3, IB, IC, IINFO, IWS, JC, LDWORK, + INTEGER I, I1, I2, I3, IB, IC, IINFO, IWT, JC, LDWORK, $ LWKOPT, MI, NB, NBMIN, NI, NQ, NW * .. -* .. Local Arrays .. - DOUBLE PRECISION T( LDT, NBMAX ) -* .. * .. External Functions .. LOGICAL LSAME INTEGER ILAENV @@ -246,12 +242,11 @@ * IF( INFO.EQ.0 ) THEN * -* Determine the block size. NB may be at most NBMAX, where NBMAX -* is used to define the local array T. +* Compute the workspace requirements * NB = MIN( NBMAX, ILAENV( 1, 'DORMLQ', SIDE // TRANS, M, N, K, $ -1 ) ) - LWKOPT = MAX( 1, NW )*NB + LWKOPT = MAX( 1, NW )*NB + TSIZE WORK( 1 ) = LWKOPT END IF * @@ -272,14 +267,11 @@ NBMIN = 2 LDWORK = NW IF( NB.GT.1 .AND. NB.LT.K ) THEN - IWS = NW*NB - IF( LWORK.LT.IWS ) THEN - NB = LWORK / LDWORK + IF( LWORK.LT.NW*NB+TSIZE ) THEN + NB = (LWORK-TSIZE) / LDWORK NBMIN = MAX( 2, ILAENV( 2, 'DORMLQ', SIDE // TRANS, M, N, K, $ -1 ) ) END IF - ELSE - IWS = NW END IF * IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN @@ -292,6 +284,7 @@ * * Use blocked code * + IWT = 1 + NW*NB IF( ( LEFT .AND. NOTRAN ) .OR. $ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN I1 = 1 @@ -324,7 +317,7 @@ * H = H(i) H(i+1) . . . H(i+ib-1) * CALL DLARFT( 'Forward', 'Rowwise', NQ-I+1, IB, A( I, I ), - $ LDA, TAU( I ), T, LDT ) + $ LDA, TAU( I ), WORK( IWT ), LDT ) IF( LEFT ) THEN * * H or H**T is applied to C(i:m,1:n) @@ -342,8 +335,8 @@ * Apply H or H**T * CALL DLARFB( SIDE, TRANST, 'Forward', 'Rowwise', MI, NI, IB, - $ A( I, I ), LDA, T, LDT, C( IC, JC ), LDC, WORK, - $ LDWORK ) + $ A( I, I ), LDA, WORK( IWT ), LDT, + $ C( IC, JC ), LDC, WORK, LDWORK ) 10 CONTINUE END IF WORK( 1 ) = LWKOPT diff --git a/SRC/dormql.f b/SRC/dormql.f index 96c6f195..512f234e 100644 --- a/SRC/dormql.f +++ b/SRC/dormql.f @@ -136,9 +136,7 @@ *> The dimension of the array WORK. *> If SIDE = 'L', LWORK >= max(1,N); *> if SIDE = 'R', LWORK >= max(1,M). -*> For optimum performance LWORK >= N*NB if SIDE = 'L', and -*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal -*> blocksize. +*> For good performance, LWORK should generally be larger. *> *> If LWORK = -1, then a workspace query is assumed; the routine *> only calculates the optimal size of the WORK array, returns @@ -185,17 +183,15 @@ * ===================================================================== * * .. Parameters .. - INTEGER NBMAX, LDT - PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN - INTEGER I, I1, I2, I3, IB, IINFO, IWS, LDWORK, LWKOPT, + INTEGER I, I1, I2, I3, IB, IINFO, IWT, LDWORK, LWKOPT, $ MI, NB, NBMIN, NI, NQ, NW * .. -* .. Local Arrays .. - DOUBLE PRECISION T( LDT, NBMAX ) -* .. * .. External Functions .. LOGICAL LSAME INTEGER ILAENV @@ -239,25 +235,22 @@ INFO = -7 ELSE IF( LDC.LT.MAX( 1, M ) ) THEN INFO = -10 + ELSE IF( LWORK.LT.NW .AND. .NOT.LQUERY ) THEN + INFO = -12 END IF * IF( INFO.EQ.0 ) THEN +* +* Compute the workspace requirements +* IF( M.EQ.0 .OR. N.EQ.0 ) THEN LWKOPT = 1 ELSE -* -* Determine the block size. NB may be at most NBMAX, where -* NBMAX is used to define the local array T. -* NB = MIN( NBMAX, ILAENV( 1, 'DORMQL', SIDE // TRANS, M, N, $ K, -1 ) ) - LWKOPT = NW*NB + LWKOPT = NW*NB + TSIZE END IF WORK( 1 ) = LWKOPT -* - IF( LWORK.LT.NW .AND. .NOT.LQUERY ) THEN - INFO = -12 - END IF END IF * IF( INFO.NE.0 ) THEN @@ -276,14 +269,11 @@ NBMIN = 2 LDWORK = NW IF( NB.GT.1 .AND. NB.LT.K ) THEN - IWS = NW*NB - IF( LWORK.LT.IWS ) THEN - NB = LWORK / LDWORK + IF( LWORK.LT.NW*NB+TSIZE ) THEN + NB = (LWORK-TSIZE) / LDWORK NBMIN = MAX( 2, ILAENV( 2, 'DORMQL', SIDE // TRANS, M, N, K, $ -1 ) ) END IF - ELSE - IWS = NW END IF * IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN @@ -296,6 +286,7 @@ * * Use blocked code * + IWT = 1 + NW*NB IF( ( LEFT .AND. NOTRAN ) .OR. $ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN I1 = 1 @@ -320,7 +311,7 @@ * H = H(i+ib-1) . . . H(i+1) H(i) * CALL DLARFT( 'Backward', 'Columnwise', NQ-K+I+IB-1, IB, - $ A( 1, I ), LDA, TAU( I ), T, LDT ) + $ A( 1, I ), LDA, TAU( I ), WORK( IWT ), LDT ) IF( LEFT ) THEN * * H or H**T is applied to C(1:m-k+i+ib-1,1:n) @@ -336,8 +327,8 @@ * Apply H or H**T * CALL DLARFB( SIDE, TRANS, 'Backward', 'Columnwise', MI, NI, - $ IB, A( 1, I ), LDA, T, LDT, C, LDC, WORK, - $ LDWORK ) + $ IB, A( 1, I ), LDA, WORK( IWT ), LDT, C, LDC, + $ WORK, LDWORK ) 10 CONTINUE END IF WORK( 1 ) = LWKOPT diff --git a/SRC/dormqr.f b/SRC/dormqr.f index c0767ecf..e3f423b9 100644 --- a/SRC/dormqr.f +++ b/SRC/dormqr.f @@ -136,9 +136,7 @@ *> The dimension of the array WORK. *> If SIDE = 'L', LWORK >= max(1,N); *> if SIDE = 'R', LWORK >= max(1,M). -*> For optimum performance LWORK >= N*NB if SIDE = 'L', and -*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal -*> blocksize. +*> For good performance, LWORK should generally be larger. *> *> If LWORK = -1, then a workspace query is assumed; the routine *> only calculates the optimal size of the WORK array, returns @@ -185,17 +183,15 @@ * ===================================================================== * * .. Parameters .. - INTEGER NBMAX, LDT - PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN - INTEGER I, I1, I2, I3, IB, IC, IINFO, IWS, JC, LDWORK, + INTEGER I, I1, I2, I3, IB, IC, IINFO, IWT, JC, LDWORK, $ LWKOPT, MI, NB, NBMIN, NI, NQ, NW * .. -* .. Local Arrays .. - DOUBLE PRECISION T( LDT, NBMAX ) -* .. * .. External Functions .. LOGICAL LSAME INTEGER ILAENV @@ -245,12 +241,11 @@ * IF( INFO.EQ.0 ) THEN * -* Determine the block size. NB may be at most NBMAX, where NBMAX -* is used to define the local array T. +* Compute the workspace requirements * NB = MIN( NBMAX, ILAENV( 1, 'DORMQR', SIDE // TRANS, M, N, K, $ -1 ) ) - LWKOPT = MAX( 1, NW )*NB + LWKOPT = MAX( 1, NW )*NB + TSIZE WORK( 1 ) = LWKOPT END IF * @@ -271,14 +266,11 @@ NBMIN = 2 LDWORK = NW IF( NB.GT.1 .AND. NB.LT.K ) THEN - IWS = NW*NB - IF( LWORK.LT.IWS ) THEN - NB = LWORK / LDWORK + IF( LWORK.LT.NW*NB+TSIZE ) THEN + NB = (LWORK-TSIZE) / LDWORK NBMIN = MAX( 2, ILAENV( 2, 'DORMQR', SIDE // TRANS, M, N, K, $ -1 ) ) END IF - ELSE - IWS = NW END IF * IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN @@ -291,6 +283,7 @@ * * Use blocked code * + IWT = 1 + NW*NB IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN I1 = 1 @@ -317,7 +310,7 @@ * H = H(i) H(i+1) . . . H(i+ib-1) * CALL DLARFT( 'Forward', 'Columnwise', NQ-I+1, IB, A( I, I ), - $ LDA, TAU( I ), T, LDT ) + $ LDA, TAU( I ), WORK( IWT ), LDT ) IF( LEFT ) THEN * * H or H**T is applied to C(i:m,1:n) @@ -335,8 +328,8 @@ * Apply H or H**T * CALL DLARFB( SIDE, TRANS, 'Forward', 'Columnwise', MI, NI, - $ IB, A( I, I ), LDA, T, LDT, C( IC, JC ), LDC, - $ WORK, LDWORK ) + $ IB, A( I, I ), LDA, WORK( IWT ), LDT, + $ C( IC, JC ), LDC, WORK, LDWORK ) 10 CONTINUE END IF WORK( 1 ) = LWKOPT diff --git a/SRC/dormrq.f b/SRC/dormrq.f index ca0db3b8..4cf2b449 100644 --- a/SRC/dormrq.f +++ b/SRC/dormrq.f @@ -136,9 +136,7 @@ *> The dimension of the array WORK. *> If SIDE = 'L', LWORK >= max(1,N); *> if SIDE = 'R', LWORK >= max(1,M). -*> For optimum performance LWORK >= N*NB if SIDE = 'L', and -*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal -*> blocksize. +*> For good performance, LWORK should generally be larger. *> *> If LWORK = -1, then a workspace query is assumed; the routine *> only calculates the optimal size of the WORK array, returns @@ -185,18 +183,16 @@ * ===================================================================== * * .. Parameters .. - INTEGER NBMAX, LDT - PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER TRANST - INTEGER I, I1, I2, I3, IB, IINFO, IWS, LDWORK, LWKOPT, + INTEGER I, I1, I2, I3, IB, IINFO, IWT, LDWORK, LWKOPT, $ MI, NB, NBMIN, NI, NQ, NW * .. -* .. Local Arrays .. - DOUBLE PRECISION T( LDT, NBMAX ) -* .. * .. External Functions .. LOGICAL LSAME INTEGER ILAENV @@ -240,25 +236,22 @@ INFO = -7 ELSE IF( LDC.LT.MAX( 1, M ) ) THEN INFO = -10 + ELSE IF( LWORK.LT.NW .AND. .NOT.LQUERY ) THEN + INFO = -12 END IF * IF( INFO.EQ.0 ) THEN +* +* Compute the workspace requirements +* IF( M.EQ.0 .OR. N.EQ.0 ) THEN LWKOPT = 1 ELSE -* -* Determine the block size. NB may be at most NBMAX, where -* NBMAX is used to define the local array T. -* NB = MIN( NBMAX, ILAENV( 1, 'DORMRQ', SIDE // TRANS, M, N, $ K, -1 ) ) - LWKOPT = NW*NB + LWKOPT = NW*NB + TSIZE END IF WORK( 1 ) = LWKOPT -* - IF( LWORK.LT.NW .AND. .NOT.LQUERY ) THEN - INFO = -12 - END IF END IF * IF( INFO.NE.0 ) THEN @@ -277,14 +270,11 @@ NBMIN = 2 LDWORK = NW IF( NB.GT.1 .AND. NB.LT.K ) THEN - IWS = NW*NB - IF( LWORK.LT.IWS ) THEN - NB = LWORK / LDWORK + IF( LWORK.LT.NW*NB+TSIZE ) THEN + NB = (LWORK-TSIZE) / LDWORK NBMIN = MAX( 2, ILAENV( 2, 'DORMRQ', SIDE // TRANS, M, N, K, $ -1 ) ) END IF - ELSE - IWS = NW END IF * IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN @@ -297,6 +287,7 @@ * * Use blocked code * + IWT = 1 + NW*NB IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN I1 = 1 @@ -327,7 +318,7 @@ * H = H(i+ib-1) . . . H(i+1) H(i) * CALL DLARFT( 'Backward', 'Rowwise', NQ-K+I+IB-1, IB, - $ A( I, 1 ), LDA, TAU( I ), T, LDT ) + $ A( I, 1 ), LDA, TAU( I ), WORK( IWT ), LDT ) IF( LEFT ) THEN * * H or H**T is applied to C(1:m-k+i+ib-1,1:n) @@ -343,8 +334,8 @@ * Apply H or H**T * CALL DLARFB( SIDE, TRANST, 'Backward', 'Rowwise', MI, NI, - $ IB, A( I, 1 ), LDA, T, LDT, C, LDC, WORK, - $ LDWORK ) + $ IB, A( I, 1 ), LDA, WORK( IWT ), LDT, C, LDC, + $ WORK, LDWORK ) 10 CONTINUE END IF WORK( 1 ) = LWKOPT diff --git a/SRC/dormrz.f b/SRC/dormrz.f index e971f869..f055763c 100644 --- a/SRC/dormrz.f +++ b/SRC/dormrz.f @@ -145,9 +145,7 @@ *> The dimension of the array WORK. *> If SIDE = 'L', LWORK >= max(1,N); *> if SIDE = 'R', LWORK >= max(1,M). -*> For optimum performance LWORK >= N*NB if SIDE = 'L', and -*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal -*> blocksize. +*> For good performance, LWORK should generally be larger. *> *> If LWORK = -1, then a workspace query is assumed; the routine *> only calculates the optimal size of the WORK array, returns @@ -205,18 +203,16 @@ * ===================================================================== * * .. Parameters .. - INTEGER NBMAX, LDT - PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER TRANST - INTEGER I, I1, I2, I3, IB, IC, IINFO, IWS, JA, JC, + INTEGER I, I1, I2, I3, IB, IC, IINFO, IWT, JA, JC, $ LDWORK, LWKOPT, MI, NB, NBMIN, NI, NQ, NW * .. -* .. Local Arrays .. - DOUBLE PRECISION T( LDT, NBMAX ) -* .. * .. External Functions .. LOGICAL LSAME INTEGER ILAENV @@ -263,25 +259,22 @@ INFO = -8 ELSE IF( LDC.LT.MAX( 1, M ) ) THEN INFO = -11 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -13 END IF * IF( INFO.EQ.0 ) THEN +* +* Compute the workspace requirements +* IF( M.EQ.0 .OR. N.EQ.0 ) THEN LWKOPT = 1 ELSE -* -* Determine the block size. NB may be at most NBMAX, where -* NBMAX is used to define the local array T. -* NB = MIN( NBMAX, ILAENV( 1, 'DORMRQ', SIDE // TRANS, M, N, $ K, -1 ) ) - LWKOPT = NW*NB + LWKOPT = NW*NB + TSIZE END IF WORK( 1 ) = LWKOPT -* - IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN - INFO = -13 - END IF END IF * IF( INFO.NE.0 ) THEN @@ -301,9 +294,8 @@ NBMIN = 2 LDWORK = NW IF( NB.GT.1 .AND. NB.LT.K ) THEN - IWS = NW*NB - IF( LWORK.LT.IWS ) THEN - NB = LWORK / LDWORK + IF( LWORK.LT.NW*NB+TSIZE ) THEN + NB = (LWORK-TSIZE) / LDWORK NBMIN = MAX( 2, ILAENV( 2, 'DORMRQ', SIDE // TRANS, M, N, K, $ -1 ) ) END IF @@ -321,6 +313,7 @@ * * Use blocked code * + IWT = 1 + NW*NB IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN I1 = 1 @@ -355,7 +348,7 @@ * H = H(i+ib-1) . . . H(i+1) H(i) * CALL DLARZT( 'Backward', 'Rowwise', L, IB, A( I, JA ), LDA, - $ TAU( I ), T, LDT ) + $ TAU( I ), WORK( IWT ), LDT ) * IF( LEFT ) THEN * @@ -374,8 +367,8 @@ * Apply H or H**T * CALL DLARZB( SIDE, TRANST, 'Backward', 'Rowwise', MI, NI, - $ IB, L, A( I, JA ), LDA, T, LDT, C( IC, JC ), - $ LDC, WORK, LDWORK ) + $ IB, L, A( I, JA ), LDA, WORK( IWT ), LDT, + $ C( IC, JC ), LDC, WORK, LDWORK ) 10 CONTINUE * END IF diff --git a/SRC/sgehrd.f b/SRC/sgehrd.f index b88ea03b..3ceea0e2 100644 --- a/SRC/sgehrd.f +++ b/SRC/sgehrd.f @@ -97,8 +97,7 @@ *> \verbatim *> LWORK is INTEGER *> The length of the array WORK. LWORK >= max(1,N). -*> For optimum performance LWORK >= N*NB, where NB is the -*> optimal blocksize. +*> For good performance, LWORK should generally be larger. *> *> If LWORK = -1, then a workspace query is assumed; the routine *> only calculates the optimal size of the WORK array, returns @@ -183,21 +182,19 @@ * ===================================================================== * * .. Parameters .. - INTEGER NBMAX, LDT - PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) REAL ZERO, ONE PARAMETER ( ZERO = 0.0E+0, $ ONE = 1.0E+0 ) * .. * .. Local Scalars .. LOGICAL LQUERY - INTEGER I, IB, IINFO, IWS, J, LDWORK, LWKOPT, NB, + INTEGER I, IB, IINFO, IWT, J, LDWORK, LWKOPT, NB, $ NBMIN, NH, NX REAL EI * .. -* .. Local Arrays .. - REAL T( LDT, NBMAX ) -* .. * .. External Subroutines .. EXTERNAL SAXPY, SGEHD2, SGEMM, SLAHR2, SLARFB, STRMM, $ XERBLA @@ -214,9 +211,6 @@ * Test the input parameters * INFO = 0 - NB = MIN( NBMAX, ILAENV( 1, 'SGEHRD', ' ', N, ILO, IHI, -1 ) ) - LWKOPT = N*NB - WORK( 1 ) = LWKOPT LQUERY = ( LWORK.EQ.-1 ) IF( N.LT.0 ) THEN INFO = -1 @@ -229,6 +223,16 @@ ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN INFO = -8 END IF +* + IF( INFO.EQ.0 ) THEN +* +* Compute the workspace requirements +* + NB = MIN( NBMAX, ILAENV( 1, 'SGEHRD', ' ', N, ILO, IHI, -1 ) ) + LWKOPT = N*NB + TSIZE + WORK( 1 ) = LWKOPT + END IF +* IF( INFO.NE.0 ) THEN CALL XERBLA( 'SGEHRD', -INFO ) RETURN @@ -257,7 +261,6 @@ * NB = MIN( NBMAX, ILAENV( 1, 'SGEHRD', ' ', N, ILO, IHI, -1 ) ) NBMIN = 2 - IWS = 1 IF( NB.GT.1 .AND. NB.LT.NH ) THEN * * Determine when to cross over from blocked to unblocked code @@ -268,8 +271,7 @@ * * Determine if workspace is large enough for blocked code * - IWS = N*NB - IF( LWORK.LT.IWS ) THEN + IF( LWORK.LT.N*NB+TSIZE ) THEN * * Not enough workspace to use optimal NB: determine the * minimum value of NB, and reduce NB or force use of @@ -277,8 +279,8 @@ * NBMIN = MAX( 2, ILAENV( 2, 'SGEHRD', ' ', N, ILO, IHI, $ -1 ) ) - IF( LWORK.GE.N*NBMIN ) THEN - NB = LWORK / N + IF( LWORK.GE.(N*NBMIN + TSIZE) ) THEN + NB = (LWORK-TSIZE) / N ELSE NB = 1 END IF @@ -297,6 +299,7 @@ * * Use blocked code * + IWT = 1 + N*NB DO 40 I = ILO, IHI - 1 - NX, NB IB = MIN( NB, IHI-I ) * @@ -304,8 +307,8 @@ * matrices V and T of the block reflector H = I - V*T*V**T * which performs the reduction, and also the matrix Y = A*V*T * - CALL SLAHR2( IHI, I, IB, A( 1, I ), LDA, TAU( I ), T, LDT, - $ WORK, LDWORK ) + CALL SLAHR2( IHI, I, IB, A( 1, I ), LDA, TAU( I ), + $ WORK( IWT ), LDT, WORK, LDWORK ) * * Apply the block reflector H to A(1:ihi,i+ib:ihi) from the * right, computing A := A - Y * V**T. V(i+ib,ib-1) must be set @@ -335,15 +338,16 @@ * CALL SLARFB( 'Left', 'Transpose', 'Forward', $ 'Columnwise', - $ IHI-I, N-I-IB+1, IB, A( I+1, I ), LDA, T, LDT, - $ A( I+1, I+IB ), LDA, WORK, LDWORK ) + $ IHI-I, N-I-IB+1, IB, A( I+1, I ), LDA, + $ WORK( IWT ), LDT, A( I+1, I+IB ), LDA, + $ WORK, LDWORK ) 40 CONTINUE END IF * * Use unblocked code to reduce the rest of the matrix * CALL SGEHD2( N, I, IHI, A, LDA, TAU, WORK, IINFO ) - WORK( 1 ) = IWS + WORK( 1 ) = LWKOPT * RETURN * diff --git a/SRC/sormlq.f b/SRC/sormlq.f index 9652664c..63c68214 100644 --- a/SRC/sormlq.f +++ b/SRC/sormlq.f @@ -137,9 +137,7 @@ *> The dimension of the array WORK. *> If SIDE = 'L', LWORK >= max(1,N); *> if SIDE = 'R', LWORK >= max(1,M). -*> For optimum performance LWORK >= N*NB if SIDE = 'L', and -*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal -*> blocksize. +*> For good performance, LWORK should generally be larger. *> *> If LWORK = -1, then a workspace query is assumed; the routine *> only calculates the optimal size of the WORK array, returns @@ -187,18 +185,16 @@ * ===================================================================== * * .. Parameters .. - INTEGER NBMAX, LDT - PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER TRANST - INTEGER I, I1, I2, I3, IB, IC, IINFO, IWS, JC, LDWORK, + INTEGER I, I1, I2, I3, IB, IC, IINFO, IWT, JC, LDWORK, $ LWKOPT, MI, NB, NBMIN, NI, NQ, NW * .. -* .. Local Arrays .. - REAL T( LDT, NBMAX ) -* .. * .. External Functions .. LOGICAL LSAME INTEGER ILAENV @@ -248,12 +244,11 @@ * IF( INFO.EQ.0 ) THEN * -* Determine the block size. NB may be at most NBMAX, where NBMAX -* is used to define the local array T. +* Compute the workspace requirements * NB = MIN( NBMAX, ILAENV( 1, 'SORMLQ', SIDE // TRANS, M, N, K, $ -1 ) ) - LWKOPT = MAX( 1, NW )*NB + LWKOPT = MAX( 1, NW )*NB + TSIZE WORK( 1 ) = LWKOPT END IF * @@ -274,14 +269,11 @@ NBMIN = 2 LDWORK = NW IF( NB.GT.1 .AND. NB.LT.K ) THEN - IWS = NW*NB - IF( LWORK.LT.IWS ) THEN - NB = LWORK / LDWORK + IF( LWORK.LT.NW*NB+TSIZE ) THEN + NB = (LWORK-TSIZE) / LDWORK NBMIN = MAX( 2, ILAENV( 2, 'SORMLQ', SIDE // TRANS, M, N, K, $ -1 ) ) END IF - ELSE - IWS = NW END IF * IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN @@ -294,6 +286,7 @@ * * Use blocked code * + IWT = 1 + NW*NB IF( ( LEFT .AND. NOTRAN ) .OR. $ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN I1 = 1 @@ -326,7 +319,7 @@ * H = H(i) H(i+1) . . . H(i+ib-1) * CALL SLARFT( 'Forward', 'Rowwise', NQ-I+1, IB, A( I, I ), - $ LDA, TAU( I ), T, LDT ) + $ LDA, TAU( I ), WORK( IWT ), LDT ) IF( LEFT ) THEN * * H or H**T is applied to C(i:m,1:n) @@ -344,8 +337,8 @@ * Apply H or H**T * CALL SLARFB( SIDE, TRANST, 'Forward', 'Rowwise', MI, NI, IB, - $ A( I, I ), LDA, T, LDT, C( IC, JC ), LDC, WORK, - $ LDWORK ) + $ A( I, I ), LDA, WORK( IWT ), LDT, + $ C( IC, JC ), LDC, WORK, LDWORK ) 10 CONTINUE END IF WORK( 1 ) = LWKOPT diff --git a/SRC/sormql.f b/SRC/sormql.f index ed419042..ccb47c3d 100644 --- a/SRC/sormql.f +++ b/SRC/sormql.f @@ -137,9 +137,7 @@ *> The dimension of the array WORK. *> If SIDE = 'L', LWORK >= max(1,N); *> if SIDE = 'R', LWORK >= max(1,M). -*> For optimum performance LWORK >= N*NB if SIDE = 'L', and -*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal -*> blocksize. +*> For good performance, LWORK should generally be larger. *> *> If LWORK = -1, then a workspace query is assumed; the routine *> only calculates the optimal size of the WORK array, returns @@ -187,17 +185,15 @@ * ===================================================================== * * .. Parameters .. - INTEGER NBMAX, LDT - PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN - INTEGER I, I1, I2, I3, IB, IINFO, IWS, LDWORK, LWKOPT, + INTEGER I, I1, I2, I3, IB, IINFO, IWT, LDWORK, LWKOPT, $ MI, NB, NBMIN, NI, NQ, NW * .. -* .. Local Arrays .. - REAL T( LDT, NBMAX ) -* .. * .. External Functions .. LOGICAL LSAME INTEGER ILAENV @@ -241,26 +237,22 @@ INFO = -7 ELSE IF( LDC.LT.MAX( 1, M ) ) THEN INFO = -10 + ELSE IF( LWORK.LT.NW .AND. .NOT.LQUERY ) THEN + INFO = -12 END IF * IF( INFO.EQ.0 ) THEN +* +* Compute the workspace requirements +* IF( M.EQ.0 .OR. N.EQ.0 ) THEN LWKOPT = 1 ELSE -* -* Determine the block size. NB may be at most NBMAX, where -* NBMAX is used to define the local array T. -* -* NB = MIN( NBMAX, ILAENV( 1, 'SORMQL', SIDE // TRANS, M, N, $ K, -1 ) ) - LWKOPT = NW*NB + LWKOPT = NW*NB + TSIZE END IF WORK( 1 ) = LWKOPT -* - IF( LWORK.LT.NW .AND. .NOT.LQUERY ) THEN - INFO = -12 - END IF END IF * IF( INFO.NE.0 ) THEN @@ -279,9 +271,8 @@ NBMIN = 2 LDWORK = NW IF( NB.GT.1 .AND. NB.LT.K ) THEN - IWS = NW*NB - IF( LWORK.LT.IWS ) THEN - NB = LWORK / LDWORK + IF( LWORK.LT.NW*NB+TSIZE ) THEN + NB = (LWORK-TSIZE) / LDWORK NBMIN = MAX( 2, ILAENV( 2, 'SORMQL', SIDE // TRANS, M, N, K, $ -1 ) ) END IF @@ -299,6 +290,7 @@ * * Use blocked code * + IWT = 1 + NW*NB IF( ( LEFT .AND. NOTRAN ) .OR. $ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN I1 = 1 @@ -323,7 +315,7 @@ * H = H(i+ib-1) . . . H(i+1) H(i) * CALL SLARFT( 'Backward', 'Columnwise', NQ-K+I+IB-1, IB, - $ A( 1, I ), LDA, TAU( I ), T, LDT ) + $ A( 1, I ), LDA, TAU( I ), WORK( IWT ), LDT ) IF( LEFT ) THEN * * H or H**T is applied to C(1:m-k+i+ib-1,1:n) @@ -339,8 +331,8 @@ * Apply H or H**T * CALL SLARFB( SIDE, TRANS, 'Backward', 'Columnwise', MI, NI, - $ IB, A( 1, I ), LDA, T, LDT, C, LDC, WORK, - $ LDWORK ) + $ IB, A( 1, I ), LDA, WORK( IWT ), LDT, C, LDC, + $ WORK, LDWORK ) 10 CONTINUE END IF WORK( 1 ) = LWKOPT diff --git a/SRC/sormqr.f b/SRC/sormqr.f index 6e89d125..d890f72d 100644 --- a/SRC/sormqr.f +++ b/SRC/sormqr.f @@ -137,9 +137,7 @@ *> The dimension of the array WORK. *> If SIDE = 'L', LWORK >= max(1,N); *> if SIDE = 'R', LWORK >= max(1,M). -*> For optimum performance LWORK >= N*NB if SIDE = 'L', and -*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal -*> blocksize. +*> For good performance, LWORK should generally be larger. *> *> If LWORK = -1, then a workspace query is assumed; the routine *> only calculates the optimal size of the WORK array, returns @@ -187,17 +185,15 @@ * ===================================================================== * * .. Parameters .. - INTEGER NBMAX, LDT - PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN - INTEGER I, I1, I2, I3, IB, IC, IINFO, IWS, JC, LDWORK, + INTEGER I, I1, I2, I3, IB, IC, IINFO, IWT, JC, LDWORK, $ LWKOPT, MI, NB, NBMIN, NI, NQ, NW * .. -* .. Local Arrays .. - REAL T( LDT, NBMAX ) -* .. * .. External Functions .. LOGICAL LSAME INTEGER ILAENV @@ -247,12 +243,11 @@ * IF( INFO.EQ.0 ) THEN * -* Determine the block size. NB may be at most NBMAX, where NBMAX -* is used to define the local array T. +* Compute the workspace requirements * NB = MIN( NBMAX, ILAENV( 1, 'SORMQR', SIDE // TRANS, M, N, K, $ -1 ) ) - LWKOPT = MAX( 1, NW )*NB + LWKOPT = MAX( 1, NW )*NB + TSIZE WORK( 1 ) = LWKOPT END IF * @@ -273,14 +268,11 @@ NBMIN = 2 LDWORK = NW IF( NB.GT.1 .AND. NB.LT.K ) THEN - IWS = NW*NB - IF( LWORK.LT.IWS ) THEN - NB = LWORK / LDWORK + IF( LWORK.LT.NW*NB+TSIZE ) THEN + NB = (LWORK-TSIZE) / LDWORK NBMIN = MAX( 2, ILAENV( 2, 'SORMQR', SIDE // TRANS, M, N, K, $ -1 ) ) END IF - ELSE - IWS = NW END IF * IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN @@ -293,6 +285,7 @@ * * Use blocked code * + IWT = 1 + NW*NB IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN I1 = 1 @@ -319,7 +312,7 @@ * H = H(i) H(i+1) . . . H(i+ib-1) * CALL SLARFT( 'Forward', 'Columnwise', NQ-I+1, IB, A( I, I ), - $ LDA, TAU( I ), T, LDT ) + $ LDA, TAU( I ), WORK( IWT ), LDT ) IF( LEFT ) THEN * * H or H**T is applied to C(i:m,1:n) @@ -337,8 +330,8 @@ * Apply H or H**T * CALL SLARFB( SIDE, TRANS, 'Forward', 'Columnwise', MI, NI, - $ IB, A( I, I ), LDA, T, LDT, C( IC, JC ), LDC, - $ WORK, LDWORK ) + $ IB, A( I, I ), LDA, WORK( IWT ), LDT, + $ C( IC, JC ), LDC, WORK, LDWORK ) 10 CONTINUE END IF WORK( 1 ) = LWKOPT diff --git a/SRC/sormrq.f b/SRC/sormrq.f index 88eb5ecb..156f17d8 100644 --- a/SRC/sormrq.f +++ b/SRC/sormrq.f @@ -137,9 +137,7 @@ *> The dimension of the array WORK. *> If SIDE = 'L', LWORK >= max(1,N); *> if SIDE = 'R', LWORK >= max(1,M). -*> For optimum performance LWORK >= N*NB if SIDE = 'L', and -*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal -*> blocksize. +*> For good performance, LWORK should generally be larger. *> *> If LWORK = -1, then a workspace query is assumed; the routine *> only calculates the optimal size of the WORK array, returns @@ -187,18 +185,16 @@ * ===================================================================== * * .. Parameters .. - INTEGER NBMAX, LDT - PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER TRANST - INTEGER I, I1, I2, I3, IB, IINFO, IWS, LDWORK, LWKOPT, + INTEGER I, I1, I2, I3, IB, IINFO, IWT, LDWORK, LWKOPT, $ MI, NB, NBMIN, NI, NQ, NW * .. -* .. Local Arrays .. - REAL T( LDT, NBMAX ) -* .. * .. External Functions .. LOGICAL LSAME INTEGER ILAENV @@ -242,25 +238,22 @@ INFO = -7 ELSE IF( LDC.LT.MAX( 1, M ) ) THEN INFO = -10 + ELSE IF( LWORK.LT.NW .AND. .NOT.LQUERY ) THEN + INFO = -12 END IF * IF( INFO.EQ.0 ) THEN +* +* Compute the workspace requirements +* IF( M.EQ.0 .OR. N.EQ.0 ) THEN LWKOPT = 1 ELSE -* -* Determine the block size. NB may be at most NBMAX, where -* NBMAX is used to define the local array T. -* NB = MIN( NBMAX, ILAENV( 1, 'SORMRQ', SIDE // TRANS, M, N, $ K, -1 ) ) - LWKOPT = NW*NB + LWKOPT = NW*NB + TSIZE END IF WORK( 1 ) = LWKOPT -* - IF( LWORK.LT.NW .AND. .NOT.LQUERY ) THEN - INFO = -12 - END IF END IF * IF( INFO.NE.0 ) THEN @@ -279,14 +272,11 @@ NBMIN = 2 LDWORK = NW IF( NB.GT.1 .AND. NB.LT.K ) THEN - IWS = NW*NB - IF( LWORK.LT.IWS ) THEN - NB = LWORK / LDWORK + IF( LWORK.LT.NW*NB+TSIZE ) THEN + NB = (LWORK-TSIZE) / LDWORK NBMIN = MAX( 2, ILAENV( 2, 'SORMRQ', SIDE // TRANS, M, N, K, $ -1 ) ) END IF - ELSE - IWS = NW END IF * IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN @@ -299,6 +289,7 @@ * * Use blocked code * + IWT = 1 + NW*NB IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN I1 = 1 @@ -329,7 +320,7 @@ * H = H(i+ib-1) . . . H(i+1) H(i) * CALL SLARFT( 'Backward', 'Rowwise', NQ-K+I+IB-1, IB, - $ A( I, 1 ), LDA, TAU( I ), T, LDT ) + $ A( I, 1 ), LDA, TAU( I ), WORK( IWT ), LDT ) IF( LEFT ) THEN * * H or H**T is applied to C(1:m-k+i+ib-1,1:n) @@ -345,8 +336,8 @@ * Apply H or H**T * CALL SLARFB( SIDE, TRANST, 'Backward', 'Rowwise', MI, NI, - $ IB, A( I, 1 ), LDA, T, LDT, C, LDC, WORK, - $ LDWORK ) + $ IB, A( I, 1 ), LDA, WORK( IWT ), LDT, C, LDC, + $ WORK, LDWORK ) 10 CONTINUE END IF WORK( 1 ) = LWKOPT diff --git a/SRC/sormrz.f b/SRC/sormrz.f index 1f55f322..33d5184e 100644 --- a/SRC/sormrz.f +++ b/SRC/sormrz.f @@ -145,9 +145,7 @@ *> The dimension of the array WORK. *> If SIDE = 'L', LWORK >= max(1,N); *> if SIDE = 'R', LWORK >= max(1,M). -*> For optimum performance LWORK >= N*NB if SIDE = 'L', and -*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal -*> blocksize. +*> For good performance, LWORK should generally be larger. *> *> If LWORK = -1, then a workspace query is assumed; the routine *> only calculates the optimal size of the WORK array, returns @@ -205,18 +203,16 @@ * ===================================================================== * * .. Parameters .. - INTEGER NBMAX, LDT - PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER TRANST - INTEGER I, I1, I2, I3, IB, IC, IINFO, IWS, JA, JC, + INTEGER I, I1, I2, I3, IB, IC, IINFO, IWT, JA, JC, $ LDWORK, LWKOPT, MI, NB, NBMIN, NI, NQ, NW * .. -* .. Local Arrays .. - REAL T( LDT, NBMAX ) -* .. * .. External Functions .. LOGICAL LSAME INTEGER ILAENV @@ -263,25 +259,22 @@ INFO = -8 ELSE IF( LDC.LT.MAX( 1, M ) ) THEN INFO = -11 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -13 END IF * IF( INFO.EQ.0 ) THEN +* +* Compute the workspace requirements +* IF( M.EQ.0 .OR. N.EQ.0 ) THEN LWKOPT = 1 ELSE -* -* Determine the block size. NB may be at most NBMAX, where -* NBMAX is used to define the local array T. -* NB = MIN( NBMAX, ILAENV( 1, 'SORMRQ', SIDE // TRANS, M, N, $ K, -1 ) ) - LWKOPT = NW*NB + LWKOPT = NW*NB + TSIZE END IF WORK( 1 ) = LWKOPT -* - IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN - INFO = -13 - END IF END IF * IF( INFO.NE.0 ) THEN @@ -300,9 +293,8 @@ NBMIN = 2 LDWORK = NW IF( NB.GT.1 .AND. NB.LT.K ) THEN - IWS = NW*NB - IF( LWORK.LT.IWS ) THEN - NB = LWORK / LDWORK + IF( LWORK.LT.NW*NB+TSIZE ) THEN + NB = (LWORK-TSIZE) / LDWORK NBMIN = MAX( 2, ILAENV( 2, 'SORMRQ', SIDE // TRANS, M, N, K, $ -1 ) ) END IF @@ -320,6 +312,7 @@ * * Use blocked code * + IWT = 1 + NW*NB IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN I1 = 1 @@ -354,7 +347,7 @@ * H = H(i+ib-1) . . . H(i+1) H(i) * CALL SLARZT( 'Backward', 'Rowwise', L, IB, A( I, JA ), LDA, - $ TAU( I ), T, LDT ) + $ TAU( I ), WORK( IWT ), LDT ) * IF( LEFT ) THEN * @@ -373,8 +366,8 @@ * Apply H or H**T * CALL SLARZB( SIDE, TRANST, 'Backward', 'Rowwise', MI, NI, - $ IB, L, A( I, JA ), LDA, T, LDT, C( IC, JC ), - $ LDC, WORK, LDWORK ) + $ IB, L, A( I, JA ), LDA, WORK( IWT ), LDT, + $ C( IC, JC ), LDC, WORK, LDWORK ) 10 CONTINUE * END IF diff --git a/SRC/zgehrd.f b/SRC/zgehrd.f index 2674af64..c5db663d 100644 --- a/SRC/zgehrd.f +++ b/SRC/zgehrd.f @@ -97,8 +97,7 @@ *> \verbatim *> LWORK is INTEGER *> The length of the array WORK. LWORK >= max(1,N). -*> For optimum performance LWORK >= N*NB, where NB is the -*> optimal blocksize. +*> For good performance, LWORK should generally be larger. *> *> If LWORK = -1, then a workspace query is assumed; the routine *> only calculates the optimal size of the WORK array, returns @@ -183,21 +182,19 @@ * ===================================================================== * * .. Parameters .. - INTEGER NBMAX, LDT - PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) COMPLEX*16 ZERO, ONE PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ), $ ONE = ( 1.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. LOGICAL LQUERY - INTEGER I, IB, IINFO, IWS, J, LDWORK, LWKOPT, NB, + INTEGER I, IB, IINFO, IWT, J, LDWORK, LWKOPT, NB, $ NBMIN, NH, NX COMPLEX*16 EI * .. -* .. Local Arrays .. - COMPLEX*16 T( LDT, NBMAX ) -* .. * .. External Subroutines .. EXTERNAL ZAXPY, ZGEHD2, ZGEMM, ZLAHR2, ZLARFB, ZTRMM, $ XERBLA @@ -214,9 +211,6 @@ * Test the input parameters * INFO = 0 - NB = MIN( NBMAX, ILAENV( 1, 'ZGEHRD', ' ', N, ILO, IHI, -1 ) ) - LWKOPT = N*NB - WORK( 1 ) = LWKOPT LQUERY = ( LWORK.EQ.-1 ) IF( N.LT.0 ) THEN INFO = -1 @@ -229,6 +223,16 @@ ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN INFO = -8 END IF +* + IF( INFO.NE.0 ) THEN +* +* Compute the workspace requirements +* + NB = MIN( NBMAX, ILAENV( 1, 'ZGEHRD', ' ', N, ILO, IHI, -1 ) ) + LWKOPT = N*NB + TSIZE + WORK( 1 ) = LWKOPT + ENDIF +* IF( INFO.NE.0 ) THEN CALL XERBLA( 'ZGEHRD', -INFO ) RETURN @@ -257,7 +261,6 @@ * NB = MIN( NBMAX, ILAENV( 1, 'ZGEHRD', ' ', N, ILO, IHI, -1 ) ) NBMIN = 2 - IWS = 1 IF( NB.GT.1 .AND. NB.LT.NH ) THEN * * Determine when to cross over from blocked to unblocked code @@ -268,8 +271,7 @@ * * Determine if workspace is large enough for blocked code * - IWS = N*NB - IF( LWORK.LT.IWS ) THEN + IF( LWORK.LT.N*NB+TSIZE ) THEN * * Not enough workspace to use optimal NB: determine the * minimum value of NB, and reduce NB or force use of @@ -277,8 +279,8 @@ * NBMIN = MAX( 2, ILAENV( 2, 'ZGEHRD', ' ', N, ILO, IHI, $ -1 ) ) - IF( LWORK.GE.N*NBMIN ) THEN - NB = LWORK / N + IF( LWORK.GE.(N*NBMIN + TSIZE) ) THEN + NB = (LWORK-TSIZE) / N ELSE NB = 1 END IF @@ -297,6 +299,7 @@ * * Use blocked code * + IWT = 1 + N*NB DO 40 I = ILO, IHI - 1 - NX, NB IB = MIN( NB, IHI-I ) * @@ -304,8 +307,8 @@ * matrices V and T of the block reflector H = I - V*T*V**H * which performs the reduction, and also the matrix Y = A*V*T * - CALL ZLAHR2( IHI, I, IB, A( 1, I ), LDA, TAU( I ), T, LDT, - $ WORK, LDWORK ) + CALL ZLAHR2( IHI, I, IB, A( 1, I ), LDA, TAU( I ), + $ WORK( IWT ), LDT, WORK, LDWORK ) * * Apply the block reflector H to A(1:ihi,i+ib:ihi) from the * right, computing A := A - Y * V**H. V(i+ib,ib-1) must be set @@ -335,15 +338,16 @@ * CALL ZLARFB( 'Left', 'Conjugate transpose', 'Forward', $ 'Columnwise', - $ IHI-I, N-I-IB+1, IB, A( I+1, I ), LDA, T, LDT, - $ A( I+1, I+IB ), LDA, WORK, LDWORK ) + $ IHI-I, N-I-IB+1, IB, A( I+1, I ), LDA, + $ WORK( IWT ), LDT, A( I+1, I+IB ), LDA, + $ WORK, LDWORK ) 40 CONTINUE END IF * * Use unblocked code to reduce the rest of the matrix * CALL ZGEHD2( N, I, IHI, A, LDA, TAU, WORK, IINFO ) - WORK( 1 ) = IWS + WORK( 1 ) = LWKOPT * RETURN * diff --git a/SRC/zunmlq.f b/SRC/zunmlq.f index 583d2b78..076cf8e4 100644 --- a/SRC/zunmlq.f +++ b/SRC/zunmlq.f @@ -136,9 +136,7 @@ *> The dimension of the array WORK. *> If SIDE = 'L', LWORK >= max(1,N); *> if SIDE = 'R', LWORK >= max(1,M). -*> For optimum performance LWORK >= N*NB if SIDE 'L', and -*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal -*> blocksize. +*> For good performance, LWORK should generally be larger. *> *> If LWORK = -1, then a workspace query is assumed; the routine *> only calculates the optimal size of the WORK array, returns @@ -185,18 +183,16 @@ * ===================================================================== * * .. Parameters .. - INTEGER NBMAX, LDT - PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER TRANST - INTEGER I, I1, I2, I3, IB, IC, IINFO, IWS, JC, LDWORK, + INTEGER I, I1, I2, I3, IB, IC, IINFO, IWT, JC, LDWORK, $ LWKOPT, MI, NB, NBMIN, NI, NQ, NW * .. -* .. Local Arrays .. - COMPLEX*16 T( LDT, NBMAX ) -* .. * .. External Functions .. LOGICAL LSAME INTEGER ILAENV @@ -246,12 +242,11 @@ * IF( INFO.EQ.0 ) THEN * -* Determine the block size. NB may be at most NBMAX, where NBMAX -* is used to define the local array T. +* Compute the workspace requirements * NB = MIN( NBMAX, ILAENV( 1, 'ZUNMLQ', SIDE // TRANS, M, N, K, $ -1 ) ) - LWKOPT = MAX( 1, NW )*NB + LWKOPT = MAX( 1, NW )*NB + TSIZE WORK( 1 ) = LWKOPT END IF * @@ -272,14 +267,11 @@ NBMIN = 2 LDWORK = NW IF( NB.GT.1 .AND. NB.LT.K ) THEN - IWS = NW*NB - IF( LWORK.LT.IWS ) THEN - NB = LWORK / LDWORK + IF( LWORK.LT.NW*NB+TSIZE ) THEN + NB = (LWORK-TSIZE) / LDWORK NBMIN = MAX( 2, ILAENV( 2, 'ZUNMLQ', SIDE // TRANS, M, N, K, $ -1 ) ) END IF - ELSE - IWS = NW END IF * IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN @@ -292,6 +284,7 @@ * * Use blocked code * + IWT = 1 + NW*NB IF( ( LEFT .AND. NOTRAN ) .OR. $ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN I1 = 1 @@ -324,7 +317,7 @@ * H = H(i) H(i+1) . . . H(i+ib-1) * CALL ZLARFT( 'Forward', 'Rowwise', NQ-I+1, IB, A( I, I ), - $ LDA, TAU( I ), T, LDT ) + $ LDA, TAU( I ), WORK( IWT ), LDT ) IF( LEFT ) THEN * * H or H**H is applied to C(i:m,1:n) @@ -342,8 +335,8 @@ * Apply H or H**H * CALL ZLARFB( SIDE, TRANST, 'Forward', 'Rowwise', MI, NI, IB, - $ A( I, I ), LDA, T, LDT, C( IC, JC ), LDC, WORK, - $ LDWORK ) + $ A( I, I ), LDA, WORK( IWT ), LDT, + $ C( IC, JC ), LDC, WORK, LDWORK ) 10 CONTINUE END IF WORK( 1 ) = LWKOPT diff --git a/SRC/zunmql.f b/SRC/zunmql.f index 6f6cdd32..0a969694 100644 --- a/SRC/zunmql.f +++ b/SRC/zunmql.f @@ -136,9 +136,7 @@ *> The dimension of the array WORK. *> If SIDE = 'L', LWORK >= max(1,N); *> if SIDE = 'R', LWORK >= max(1,M). -*> For optimum performance LWORK >= N*NB if SIDE = 'L', and -*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal -*> blocksize. +*> For good performance, LWORK should genreally be larger. *> *> If LWORK = -1, then a workspace query is assumed; the routine *> only calculates the optimal size of the WORK array, returns @@ -185,17 +183,15 @@ * ===================================================================== * * .. Parameters .. - INTEGER NBMAX, LDT - PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN - INTEGER I, I1, I2, I3, IB, IINFO, IWS, LDWORK, LWKOPT, + INTEGER I, I1, I2, I3, IB, IINFO, IWT, LDWORK, LWKOPT, $ MI, NB, NBMIN, NI, NQ, NW * .. -* .. Local Arrays .. - COMPLEX*16 T( LDT, NBMAX ) -* .. * .. External Functions .. LOGICAL LSAME INTEGER ILAENV @@ -239,25 +235,22 @@ INFO = -7 ELSE IF( LDC.LT.MAX( 1, M ) ) THEN INFO = -10 + ELSE IF( LWORK.LT.NW .AND. .NOT.LQUERY ) THEN + INFO = -12 END IF * IF( INFO.EQ.0 ) THEN +* +* Compute the workspace requirements +* IF( M.EQ.0 .OR. N.EQ.0 ) THEN LWKOPT = 1 ELSE -* -* Determine the block size. NB may be at most NBMAX, where -* NBMAX is used to define the local array T. -* NB = MIN( NBMAX, ILAENV( 1, 'ZUNMQL', SIDE // TRANS, M, N, $ K, -1 ) ) - LWKOPT = NW*NB + LWKOPT = NW*NB + TSIZE END IF WORK( 1 ) = LWKOPT -* - IF( LWORK.LT.NW .AND. .NOT.LQUERY ) THEN - INFO = -12 - END IF END IF * IF( INFO.NE.0 ) THEN @@ -276,14 +269,11 @@ NBMIN = 2 LDWORK = NW IF( NB.GT.1 .AND. NB.LT.K ) THEN - IWS = NW*NB - IF( LWORK.LT.IWS ) THEN - NB = LWORK / LDWORK + IF( LWORK.LT.NW*NB+TSIZE ) THEN + NB = (LWORK-TSIZE) / LDWORK NBMIN = MAX( 2, ILAENV( 2, 'ZUNMQL', SIDE // TRANS, M, N, K, $ -1 ) ) END IF - ELSE - IWS = NW END IF * IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN @@ -296,6 +286,7 @@ * * Use blocked code * + IWT = 1 + NW*NB IF( ( LEFT .AND. NOTRAN ) .OR. $ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN I1 = 1 @@ -320,7 +311,7 @@ * H = H(i+ib-1) . . . H(i+1) H(i) * CALL ZLARFT( 'Backward', 'Columnwise', NQ-K+I+IB-1, IB, - $ A( 1, I ), LDA, TAU( I ), T, LDT ) + $ A( 1, I ), LDA, TAU( I ), WORK( IWT ), LDT ) IF( LEFT ) THEN * * H or H**H is applied to C(1:m-k+i+ib-1,1:n) @@ -336,8 +327,8 @@ * Apply H or H**H * CALL ZLARFB( SIDE, TRANS, 'Backward', 'Columnwise', MI, NI, - $ IB, A( 1, I ), LDA, T, LDT, C, LDC, WORK, - $ LDWORK ) + $ IB, A( 1, I ), LDA, WORK( IWT ), LDT, C, LDC, + $ WORK, LDWORK ) 10 CONTINUE END IF WORK( 1 ) = LWKOPT diff --git a/SRC/zunmqr.f b/SRC/zunmqr.f index e20ccf83..8a755aaf 100644 --- a/SRC/zunmqr.f +++ b/SRC/zunmqr.f @@ -136,9 +136,7 @@ *> The dimension of the array WORK. *> If SIDE = 'L', LWORK >= max(1,N); *> if SIDE = 'R', LWORK >= max(1,M). -*> For optimum performance LWORK >= N*NB if SIDE = 'L', and -*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal -*> blocksize. +*> For good performance, LWORK should generally be larger. *> *> If LWORK = -1, then a workspace query is assumed; the routine *> only calculates the optimal size of the WORK array, returns @@ -185,17 +183,15 @@ * ===================================================================== * * .. Parameters .. - INTEGER NBMAX, LDT - PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN - INTEGER I, I1, I2, I3, IB, IC, IINFO, IWS, JC, LDWORK, + INTEGER I, I1, I2, I3, IB, IC, IINFO, IWT, JC, LDWORK, $ LWKOPT, MI, NB, NBMIN, NI, NQ, NW * .. -* .. Local Arrays .. - COMPLEX*16 T( LDT, NBMAX ) -* .. * .. External Functions .. LOGICAL LSAME INTEGER ILAENV @@ -245,12 +241,11 @@ * IF( INFO.EQ.0 ) THEN * -* Determine the block size. NB may be at most NBMAX, where NBMAX -* is used to define the local array T. +* Compute the workspace requirements * NB = MIN( NBMAX, ILAENV( 1, 'ZUNMQR', SIDE // TRANS, M, N, K, $ -1 ) ) - LWKOPT = MAX( 1, NW )*NB + LWKOPT = MAX( 1, NW )*NB + TSIZE WORK( 1 ) = LWKOPT END IF * @@ -271,14 +266,11 @@ NBMIN = 2 LDWORK = NW IF( NB.GT.1 .AND. NB.LT.K ) THEN - IWS = NW*NB - IF( LWORK.LT.IWS ) THEN - NB = LWORK / LDWORK + IF( LWORK.LT.NW*NB+TSIZE ) THEN + NB = (LWORK-TSIZE) / LDWORK NBMIN = MAX( 2, ILAENV( 2, 'ZUNMQR', SIDE // TRANS, M, N, K, $ -1 ) ) END IF - ELSE - IWS = NW END IF * IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN @@ -291,6 +283,7 @@ * * Use blocked code * + IWT = 1 + NW*NB IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN I1 = 1 @@ -317,7 +310,7 @@ * H = H(i) H(i+1) . . . H(i+ib-1) * CALL ZLARFT( 'Forward', 'Columnwise', NQ-I+1, IB, A( I, I ), - $ LDA, TAU( I ), T, LDT ) + $ LDA, TAU( I ), WORK( IWT ), LDT ) IF( LEFT ) THEN * * H or H**H is applied to C(i:m,1:n) @@ -335,8 +328,8 @@ * Apply H or H**H * CALL ZLARFB( SIDE, TRANS, 'Forward', 'Columnwise', MI, NI, - $ IB, A( I, I ), LDA, T, LDT, C( IC, JC ), LDC, - $ WORK, LDWORK ) + $ IB, A( I, I ), LDA, WORK( IWT ), LDT, + $ C( IC, JC ), LDC, WORK, LDWORK ) 10 CONTINUE END IF WORK( 1 ) = LWKOPT diff --git a/SRC/zunmrq.f b/SRC/zunmrq.f index d8b39314..cddec191 100644 --- a/SRC/zunmrq.f +++ b/SRC/zunmrq.f @@ -136,9 +136,7 @@ *> The dimension of the array WORK. *> If SIDE = 'L', LWORK >= max(1,N); *> if SIDE = 'R', LWORK >= max(1,M). -*> For optimum performance LWORK >= N*NB if SIDE = 'L', and -*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal -*> blocksize. +*> For good performance, LWORK should generally be larger. *> *> If LWORK = -1, then a workspace query is assumed; the routine *> only calculates the optimal size of the WORK array, returns @@ -185,18 +183,16 @@ * ===================================================================== * * .. Parameters .. - INTEGER NBMAX, LDT - PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER TRANST - INTEGER I, I1, I2, I3, IB, IINFO, IWS, LDWORK, LWKOPT, + INTEGER I, I1, I2, I3, IB, IINFO, IWT, LDWORK, LWKOPT, $ MI, NB, NBMIN, NI, NQ, NW * .. -* .. Local Arrays .. - COMPLEX*16 T( LDT, NBMAX ) -* .. * .. External Functions .. LOGICAL LSAME INTEGER ILAENV @@ -240,25 +236,22 @@ INFO = -7 ELSE IF( LDC.LT.MAX( 1, M ) ) THEN INFO = -10 + ELSE IF( LWORK.LT.NW .AND. .NOT.LQUERY ) THEN + INFO = -12 END IF * IF( INFO.EQ.0 ) THEN +* +* Compute the workspace requirements +* IF( M.EQ.0 .OR. N.EQ.0 ) THEN LWKOPT = 1 ELSE -* -* Determine the block size. NB may be at most NBMAX, where -* NBMAX is used to define the local array T. -* NB = MIN( NBMAX, ILAENV( 1, 'ZUNMRQ', SIDE // TRANS, M, N, $ K, -1 ) ) - LWKOPT = NW*NB + LWKOPT = NW*NB + TSIZE END IF WORK( 1 ) = LWKOPT -* - IF( LWORK.LT.NW .AND. .NOT.LQUERY ) THEN - INFO = -12 - END IF END IF * IF( INFO.NE.0 ) THEN @@ -277,14 +270,11 @@ NBMIN = 2 LDWORK = NW IF( NB.GT.1 .AND. NB.LT.K ) THEN - IWS = NW*NB - IF( LWORK.LT.IWS ) THEN - NB = LWORK / LDWORK + IF( LWORK.LT.NW*NB+TSIZE ) THEN + NB = (LWORK-TSIZE) / LDWORK NBMIN = MAX( 2, ILAENV( 2, 'ZUNMRQ', SIDE // TRANS, M, N, K, - $ -1 ) ) + $ -1 ) ) END IF - ELSE - IWS = NW END IF * IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN @@ -297,6 +287,7 @@ * * Use blocked code * + IWT = 1 + NW*NB IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN I1 = 1 @@ -327,7 +318,7 @@ * H = H(i+ib-1) . . . H(i+1) H(i) * CALL ZLARFT( 'Backward', 'Rowwise', NQ-K+I+IB-1, IB, - $ A( I, 1 ), LDA, TAU( I ), T, LDT ) + $ A( I, 1 ), LDA, TAU( I ), WORK( IWT ), LDT ) IF( LEFT ) THEN * * H or H**H is applied to C(1:m-k+i+ib-1,1:n) @@ -343,8 +334,8 @@ * Apply H or H**H * CALL ZLARFB( SIDE, TRANST, 'Backward', 'Rowwise', MI, NI, - $ IB, A( I, 1 ), LDA, T, LDT, C, LDC, WORK, - $ LDWORK ) + $ IB, A( I, 1 ), LDA, WORK( IWT ), LDT, C, LDC, + $ WORK, LDWORK ) 10 CONTINUE END IF WORK( 1 ) = LWKOPT diff --git a/SRC/zunmrz.f b/SRC/zunmrz.f index 48e5d275..ab26ad08 100644 --- a/SRC/zunmrz.f +++ b/SRC/zunmrz.f @@ -145,9 +145,7 @@ *> The dimension of the array WORK. *> If SIDE = 'L', LWORK >= max(1,N); *> if SIDE = 'R', LWORK >= max(1,M). -*> For optimum performance LWORK >= N*NB if SIDE = 'L', and -*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal -*> blocksize. +*> For good performance, LWORK should generally be larger. *> *> If LWORK = -1, then a workspace query is assumed; the routine *> only calculates the optimal size of the WORK array, returns @@ -205,18 +203,16 @@ * ===================================================================== * * .. Parameters .. - INTEGER NBMAX, LDT - PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER TRANST - INTEGER I, I1, I2, I3, IB, IC, IINFO, IWS, JA, JC, + INTEGER I, I1, I2, I3, IB, IC, IINFO, IWT, JA, JC, $ LDWORK, LWKOPT, MI, NB, NBMIN, NI, NQ, NW * .. -* .. Local Arrays .. - COMPLEX*16 T( LDT, NBMAX ) -* .. * .. External Functions .. LOGICAL LSAME INTEGER ILAENV @@ -263,25 +259,22 @@ INFO = -8 ELSE IF( LDC.LT.MAX( 1, M ) ) THEN INFO = -11 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -13 END IF * IF( INFO.EQ.0 ) THEN +* +* Compute the workspace requirements +* IF( M.EQ.0 .OR. N.EQ.0 ) THEN LWKOPT = 1 ELSE -* -* Determine the block size. NB may be at most NBMAX, where -* NBMAX is used to define the local array T. -* NB = MIN( NBMAX, ILAENV( 1, 'ZUNMRQ', SIDE // TRANS, M, N, $ K, -1 ) ) - LWKOPT = NW*NB + LWKOPT = NW*NB + TSIZE END IF WORK( 1 ) = LWKOPT -* - IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN - INFO = -13 - END IF END IF * IF( INFO.NE.0 ) THEN @@ -305,14 +298,11 @@ NBMIN = 2 LDWORK = NW IF( NB.GT.1 .AND. NB.LT.K ) THEN - IWS = NW*NB - IF( LWORK.LT.IWS ) THEN - NB = LWORK / LDWORK + IF( LWORK.LT.NW*NB+TSIZE ) THEN + NB = (LWORK-TSIZE) / LDWORK NBMIN = MAX( 2, ILAENV( 2, 'ZUNMRQ', SIDE // TRANS, M, N, K, $ -1 ) ) END IF - ELSE - IWS = NW END IF * IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN @@ -325,6 +315,7 @@ * * Use blocked code * + IWT = 1 + NW*NB IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN I1 = 1 @@ -359,7 +350,7 @@ * H = H(i+ib-1) . . . H(i+1) H(i) * CALL ZLARZT( 'Backward', 'Rowwise', L, IB, A( I, JA ), LDA, - $ TAU( I ), T, LDT ) + $ TAU( I ), WORK( IWT ), LDT ) * IF( LEFT ) THEN * @@ -378,8 +369,8 @@ * Apply H or H**H * CALL ZLARZB( SIDE, TRANST, 'Backward', 'Rowwise', MI, NI, - $ IB, L, A( I, JA ), LDA, T, LDT, C( IC, JC ), - $ LDC, WORK, LDWORK ) + $ IB, L, A( I, JA ), LDA, WORK( IWT ), LDT, + $ C( IC, JC ), LDC, WORK, LDWORK ) 10 CONTINUE * END IF |