summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--SRC/cgehrd.f46
-rw-r--r--SRC/cunmlq.f48
-rw-r--r--SRC/cunmql.f45
-rw-r--r--SRC/cunmqr.f31
-rw-r--r--SRC/cunmrq.f43
-rw-r--r--SRC/cunmrz.f50
-rw-r--r--SRC/dgehrd.f45
-rw-r--r--SRC/dormlq.f33
-rw-r--r--SRC/dormql.f43
-rw-r--r--SRC/dormqr.f33
-rw-r--r--SRC/dormrq.f43
-rw-r--r--SRC/dormrz.f41
-rw-r--r--SRC/sgehrd.f46
-rw-r--r--SRC/sormlq.f33
-rw-r--r--SRC/sormql.f42
-rw-r--r--SRC/sormqr.f33
-rw-r--r--SRC/sormrq.f43
-rw-r--r--SRC/sormrz.f41
-rw-r--r--SRC/zgehrd.f46
-rw-r--r--SRC/zunmlq.f33
-rw-r--r--SRC/zunmql.f43
-rw-r--r--SRC/zunmqr.f33
-rw-r--r--SRC/zunmrq.f45
-rw-r--r--SRC/zunmrz.f43
24 files changed, 424 insertions, 558 deletions
diff --git a/SRC/cgehrd.f b/SRC/cgehrd.f
index b590b3c1..4a6e20dd 100644
--- a/SRC/cgehrd.f
+++ b/SRC/cgehrd.f
@@ -97,8 +97,7 @@
*> \verbatim
*> LWORK is INTEGER
*> The length of the array WORK. LWORK >= max(1,N).
-*> For optimum performance LWORK >= N*NB, where NB is the
-*> optimal blocksize.
+*> For good performance, LWORK should generally be larger.
*>
*> If LWORK = -1, then a workspace query is assumed; the routine
*> only calculates the optimal size of the WORK array, returns
@@ -183,21 +182,19 @@
* =====================================================================
*
* .. Parameters ..
- INTEGER NBMAX, LDT
- PARAMETER ( NBMAX = 64, LDT = NBMAX+1 )
+ INTEGER NBMAX, LDT, TSIZE
+ PARAMETER ( NBMAX = 64, LDT = NBMAX+1,
+ $ TSIZE = LDT*NBMAX )
COMPLEX ZERO, ONE
PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ),
$ ONE = ( 1.0E+0, 0.0E+0 ) )
* ..
* .. Local Scalars ..
LOGICAL LQUERY
- INTEGER I, IB, IINFO, IWS, J, LDWORK, LWKOPT, NB,
+ INTEGER I, IB, IINFO, IWT, J, LDWORK, LWKOPT, NB,
$ NBMIN, NH, NX
COMPLEX EI
* ..
-* .. Local Arrays ..
- COMPLEX T( LDT, NBMAX )
-* ..
* .. External Subroutines ..
EXTERNAL CAXPY, CGEHD2, CGEMM, CLAHR2, CLARFB, CTRMM,
$ XERBLA
@@ -214,9 +211,6 @@
* Test the input parameters
*
INFO = 0
- NB = MIN( NBMAX, ILAENV( 1, 'CGEHRD', ' ', N, ILO, IHI, -1 ) )
- LWKOPT = N*NB
- WORK( 1 ) = LWKOPT
LQUERY = ( LWORK.EQ.-1 )
IF( N.LT.0 ) THEN
INFO = -1
@@ -229,6 +223,16 @@
ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
INFO = -8
END IF
+*
+ IF( INFO.EQ.0 ) THEN
+*
+* Compute the workspace requirements
+*
+ NB = MIN( NBMAX, ILAENV( 1, 'CGEHRD', ' ', N, ILO, IHI, -1 ) )
+ LWKOPT = N*NB + TSIZE
+ WORK( 1 ) = LWKOPT
+ END IF
+*
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'CGEHRD', -INFO )
RETURN
@@ -257,7 +261,6 @@
*
NB = MIN( NBMAX, ILAENV( 1, 'CGEHRD', ' ', N, ILO, IHI, -1 ) )
NBMIN = 2
- IWS = 1
IF( NB.GT.1 .AND. NB.LT.NH ) THEN
*
* Determine when to cross over from blocked to unblocked code
@@ -268,8 +271,7 @@
*
* Determine if workspace is large enough for blocked code
*
- IWS = N*NB
- IF( LWORK.LT.IWS ) THEN
+ IF( LWORK.LT.N*NB+TSIZE ) THEN
*
* Not enough workspace to use optimal NB: determine the
* minimum value of NB, and reduce NB or force use of
@@ -277,8 +279,8 @@
*
NBMIN = MAX( 2, ILAENV( 2, 'CGEHRD', ' ', N, ILO, IHI,
$ -1 ) )
- IF( LWORK.GE.N*NBMIN ) THEN
- NB = LWORK / N
+ IF( LWORK.GE.(N*NBMIN+TSIZE) ) THEN
+ NB = (LWORK-TSIZE) / N
ELSE
NB = 1
END IF
@@ -297,6 +299,7 @@
*
* Use blocked code
*
+ IWT = 1 + N*NB
DO 40 I = ILO, IHI - 1 - NX, NB
IB = MIN( NB, IHI-I )
*
@@ -304,8 +307,8 @@
* matrices V and T of the block reflector H = I - V*T*V**H
* which performs the reduction, and also the matrix Y = A*V*T
*
- CALL CLAHR2( IHI, I, IB, A( 1, I ), LDA, TAU( I ), T, LDT,
- $ WORK, LDWORK )
+ CALL CLAHR2( IHI, I, IB, A( 1, I ), LDA, TAU( I ),
+ $ WORK( IWT ), LDT, WORK, LDWORK )
*
* Apply the block reflector H to A(1:ihi,i+ib:ihi) from the
* right, computing A := A - Y * V**H. V(i+ib,ib-1) must be set
@@ -335,15 +338,16 @@
*
CALL CLARFB( 'Left', 'Conjugate transpose', 'Forward',
$ 'Columnwise',
- $ IHI-I, N-I-IB+1, IB, A( I+1, I ), LDA, T, LDT,
- $ A( I+1, I+IB ), LDA, WORK, LDWORK )
+ $ IHI-I, N-I-IB+1, IB, A( I+1, I ), LDA,
+ $ WORK( IWT ), LDT, A( I+1, I+IB ), LDA,
+ $ WORK, LDWORK )
40 CONTINUE
END IF
*
* Use unblocked code to reduce the rest of the matrix
*
CALL CGEHD2( N, I, IHI, A, LDA, TAU, WORK, IINFO )
- WORK( 1 ) = IWS
+ WORK( 1 ) = LWKOPT
*
RETURN
*
diff --git a/SRC/cunmlq.f b/SRC/cunmlq.f
index 7770a6d4..7a5e3718 100644
--- a/SRC/cunmlq.f
+++ b/SRC/cunmlq.f
@@ -137,9 +137,7 @@
*> The dimension of the array WORK.
*> If SIDE = 'L', LWORK >= max(1,N);
*> if SIDE = 'R', LWORK >= max(1,M).
-*> For optimum performance LWORK >= N*NB if SIDE 'L', and
-*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal
-*> blocksize.
+*> For good performance, LWORK should generally be larger.
*>
*> If LWORK = -1, then a workspace query is assumed; the routine
*> only calculates the optimal size of the WORK array, returns
@@ -187,18 +185,16 @@
* =====================================================================
*
* .. Parameters ..
- INTEGER NBMAX, LDT
- PARAMETER ( NBMAX = 64, LDT = NBMAX+1 )
+ INTEGER NBMAX, LDT, TSIZE
+ PARAMETER ( NBMAX = 64, LDT = NBMAX+1,
+ $ TSIZE = LDT*NBMAX )
* ..
* .. Local Scalars ..
LOGICAL LEFT, LQUERY, NOTRAN
CHARACTER TRANST
- INTEGER I, I1, I2, I3, IB, IC, IINFO, IWS, JC, LDWORK,
+ INTEGER I, I1, I2, I3, IB, IC, IINFO, IWT, JC, LDWORK,
$ LWKOPT, MI, NB, NBMIN, NI, NQ, NW
* ..
-* .. Local Arrays ..
- COMPLEX T( LDT, NBMAX )
-* ..
* .. External Functions ..
LOGICAL LSAME
INTEGER ILAENV
@@ -247,13 +243,16 @@
END IF
*
IF( INFO.EQ.0 ) THEN
-*
-* Determine the block size. NB may be at most NBMAX, where NBMAX
-* is used to define the local array T.
-*
- NB = MIN( NBMAX, ILAENV( 1, 'CUNMLQ', SIDE // TRANS, M, N, K,
- $ -1 ) )
- LWKOPT = MAX( 1, NW )*NB
+*
+* Compute the workspace requirements
+*
+ IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN
+ LWKOPT = 1
+ ELSE
+ NB = MIN( NBMAX, ILAENV( 1, 'CUNMLQ', SIDE // TRANS, M, N,
+ $ K, -1 ) )
+ LWKOPT = MAX( 1, NW )*NB + TSIZE
+ END IF
WORK( 1 ) = LWKOPT
END IF
*
@@ -267,21 +266,19 @@
* Quick return if possible
*
IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN
- WORK( 1 ) = 1
RETURN
END IF
*
+* Determine the block size
+*
NBMIN = 2
LDWORK = NW
IF( NB.GT.1 .AND. NB.LT.K ) THEN
- IWS = NW*NB
- IF( LWORK.LT.IWS ) THEN
- NB = LWORK / LDWORK
+ IF( LWORK.LT.NW*NB+TSIZE ) THEN
+ NB = (LWORK-TSIZE) / LDWORK
NBMIN = MAX( 2, ILAENV( 2, 'CUNMLQ', SIDE // TRANS, M, N, K,
$ -1 ) )
END IF
- ELSE
- IWS = NW
END IF
*
IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN
@@ -294,6 +291,7 @@
*
* Use blocked code
*
+ IWT = 1 + NW*NB
IF( ( LEFT .AND. NOTRAN ) .OR.
$ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN
I1 = 1
@@ -326,7 +324,7 @@
* H = H(i) H(i+1) . . . H(i+ib-1)
*
CALL CLARFT( 'Forward', 'Rowwise', NQ-I+1, IB, A( I, I ),
- $ LDA, TAU( I ), T, LDT )
+ $ LDA, TAU( I ), WORK( IWT ), LDT )
IF( LEFT ) THEN
*
* H or H**H is applied to C(i:m,1:n)
@@ -344,8 +342,8 @@
* Apply H or H**H
*
CALL CLARFB( SIDE, TRANST, 'Forward', 'Rowwise', MI, NI, IB,
- $ A( I, I ), LDA, T, LDT, C( IC, JC ), LDC, WORK,
- $ LDWORK )
+ $ A( I, I ), LDA, WORK( IWT ), LDT,
+ $ C( IC, JC ), LDC, WORK, LDWORK )
10 CONTINUE
END IF
WORK( 1 ) = LWKOPT
diff --git a/SRC/cunmql.f b/SRC/cunmql.f
index b23709d0..7543712f 100644
--- a/SRC/cunmql.f
+++ b/SRC/cunmql.f
@@ -137,9 +137,7 @@
*> The dimension of the array WORK.
*> If SIDE = 'L', LWORK >= max(1,N);
*> if SIDE = 'R', LWORK >= max(1,M).
-*> For optimum performance LWORK >= N*NB if SIDE = 'L', and
-*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal
-*> blocksize.
+*> For good performance, LWORK should generally be larger.
*>
*> If LWORK = -1, then a workspace query is assumed; the routine
*> only calculates the optimal size of the WORK array, returns
@@ -187,17 +185,15 @@
* =====================================================================
*
* .. Parameters ..
- INTEGER NBMAX, LDT
- PARAMETER ( NBMAX = 64, LDT = NBMAX+1 )
+ INTEGER NBMAX, LDT, TSIZE
+ PARAMETER ( NBMAX = 64, LDT = NBMAX+1,
+ $ TSIZE = LDT*NBMAX )
* ..
* .. Local Scalars ..
LOGICAL LEFT, LQUERY, NOTRAN
- INTEGER I, I1, I2, I3, IB, IINFO, IWS, LDWORK, LWKOPT,
+ INTEGER I, I1, I2, I3, IB, IINFO, IWT, LDWORK, LWKOPT,
$ MI, NB, NBMIN, NI, NQ, NW
* ..
-* .. Local Arrays ..
- COMPLEX T( LDT, NBMAX )
-* ..
* .. External Functions ..
LOGICAL LSAME
INTEGER ILAENV
@@ -241,25 +237,22 @@
INFO = -7
ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
INFO = -10
+ ELSE IF( LWORK.LT.NW .AND. .NOT.LQUERY ) THEN
+ INFO = -12
END IF
*
IF( INFO.EQ.0 ) THEN
+*
+* Compute the workspace requirements
+*
IF( M.EQ.0 .OR. N.EQ.0 ) THEN
LWKOPT = 1
ELSE
-*
-* Determine the block size. NB may be at most NBMAX, where
-* NBMAX is used to define the local array T.
-*
NB = MIN( NBMAX, ILAENV( 1, 'CUNMQL', SIDE // TRANS, M, N,
$ K, -1 ) )
- LWKOPT = NW*NB
+ LWKOPT = NW*NB + TSIZE
END IF
WORK( 1 ) = LWKOPT
-*
- IF( LWORK.LT.NW .AND. .NOT.LQUERY ) THEN
- INFO = -12
- END IF
END IF
*
IF( INFO.NE.0 ) THEN
@@ -275,17 +268,16 @@
RETURN
END IF
*
+* Determine the block size
+*
NBMIN = 2
LDWORK = NW
IF( NB.GT.1 .AND. NB.LT.K ) THEN
- IWS = NW*NB
- IF( LWORK.LT.IWS ) THEN
- NB = LWORK / LDWORK
+ IF( LWORK.LT.(NW*NB+TSIZE) ) THEN
+ NB = (LWORK-TSIZE) / LDWORK
NBMIN = MAX( 2, ILAENV( 2, 'CUNMQL', SIDE // TRANS, M, N, K,
$ -1 ) )
END IF
- ELSE
- IWS = NW
END IF
*
IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN
@@ -298,6 +290,7 @@
*
* Use blocked code
*
+ IWT = 1 + NW*NB
IF( ( LEFT .AND. NOTRAN ) .OR.
$ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN
I1 = 1
@@ -322,7 +315,7 @@
* H = H(i+ib-1) . . . H(i+1) H(i)
*
CALL CLARFT( 'Backward', 'Columnwise', NQ-K+I+IB-1, IB,
- $ A( 1, I ), LDA, TAU( I ), T, LDT )
+ $ A( 1, I ), LDA, TAU( I ), WORK( IWT ), LDT )
IF( LEFT ) THEN
*
* H or H**H is applied to C(1:m-k+i+ib-1,1:n)
@@ -338,8 +331,8 @@
* Apply H or H**H
*
CALL CLARFB( SIDE, TRANS, 'Backward', 'Columnwise', MI, NI,
- $ IB, A( 1, I ), LDA, T, LDT, C, LDC, WORK,
- $ LDWORK )
+ $ IB, A( 1, I ), LDA, WORK( IWT ), LDT, C, LDC,
+ $ WORK, LDWORK )
10 CONTINUE
END IF
WORK( 1 ) = LWKOPT
diff --git a/SRC/cunmqr.f b/SRC/cunmqr.f
index e4bdabae..903b8d44 100644
--- a/SRC/cunmqr.f
+++ b/SRC/cunmqr.f
@@ -137,9 +137,7 @@
*> The dimension of the array WORK.
*> If SIDE = 'L', LWORK >= max(1,N);
*> if SIDE = 'R', LWORK >= max(1,M).
-*> For optimum performance LWORK >= N*NB if SIDE = 'L', and
-*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal
-*> blocksize.
+*> For good performance, LWORK should generally be larger.
*>
*> If LWORK = -1, then a workspace query is assumed; the routine
*> only calculates the optimal size of the WORK array, returns
@@ -187,17 +185,15 @@
* =====================================================================
*
* .. Parameters ..
- INTEGER NBMAX, LDT
- PARAMETER ( NBMAX = 64, LDT = NBMAX+1 )
+ INTEGER NBMAX, LDT, TSIZE
+ PARAMETER ( NBMAX = 64, LDT = NBMAX+1,
+ $ TSIZE = LDT*NBMAX )
* ..
* .. Local Scalars ..
LOGICAL LEFT, LQUERY, NOTRAN
- INTEGER I, I1, I2, I3, IB, IC, IINFO, IWS, JC, LDWORK,
+ INTEGER I, I1, I2, I3, IB, IC, IINFO, IWT, JC, LDWORK,
$ LWKOPT, MI, NB, NBMIN, NI, NQ, NW
* ..
-* .. Local Arrays ..
- COMPLEX T( LDT, NBMAX )
-* ..
* .. External Functions ..
LOGICAL LSAME
INTEGER ILAENV
@@ -247,12 +243,11 @@
*
IF( INFO.EQ.0 ) THEN
*
-* Determine the block size. NB may be at most NBMAX, where NBMAX
-* is used to define the local array T.
+* Compute the workspace requirements
*
NB = MIN( NBMAX, ILAENV( 1, 'CUNMQR', SIDE // TRANS, M, N, K,
$ -1 ) )
- LWKOPT = MAX( 1, NW )*NB
+ LWKOPT = MAX( 1, NW )*NB + TSIZE
WORK( 1 ) = LWKOPT
END IF
*
@@ -273,9 +268,8 @@
NBMIN = 2
LDWORK = NW
IF( NB.GT.1 .AND. NB.LT.K ) THEN
- IWS = NW*NB
- IF( LWORK.LT.IWS ) THEN
- NB = LWORK / LDWORK
+ IF( LWORK.LT.NW*NB+TSIZE ) THEN
+ NB = (LWORK-TSIZE) / LDWORK
NBMIN = MAX( 2, ILAENV( 2, 'CUNMQR', SIDE // TRANS, M, N, K,
$ -1 ) )
END IF
@@ -293,6 +287,7 @@
*
* Use blocked code
*
+ IWT = 1 + NW*NB
IF( ( LEFT .AND. .NOT.NOTRAN ) .OR.
$ ( .NOT.LEFT .AND. NOTRAN ) ) THEN
I1 = 1
@@ -319,7 +314,7 @@
* H = H(i) H(i+1) . . . H(i+ib-1)
*
CALL CLARFT( 'Forward', 'Columnwise', NQ-I+1, IB, A( I, I ),
- $ LDA, TAU( I ), T, LDT )
+ $ LDA, TAU( I ), WORK( IWT ), LDT )
IF( LEFT ) THEN
*
* H or H**H is applied to C(i:m,1:n)
@@ -337,8 +332,8 @@
* Apply H or H**H
*
CALL CLARFB( SIDE, TRANS, 'Forward', 'Columnwise', MI, NI,
- $ IB, A( I, I ), LDA, T, LDT, C( IC, JC ), LDC,
- $ WORK, LDWORK )
+ $ IB, A( I, I ), LDA, WORK( IWT ), LDT,
+ $ C( IC, JC ), LDC, WORK, LDWORK )
10 CONTINUE
END IF
WORK( 1 ) = LWKOPT
diff --git a/SRC/cunmrq.f b/SRC/cunmrq.f
index 88a4f0b2..342de81b 100644
--- a/SRC/cunmrq.f
+++ b/SRC/cunmrq.f
@@ -137,9 +137,7 @@
*> The dimension of the array WORK.
*> If SIDE = 'L', LWORK >= max(1,N);
*> if SIDE = 'R', LWORK >= max(1,M).
-*> For optimum performance LWORK >= N*NB if SIDE = 'L', and
-*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal
-*> blocksize.
+*> For good performance, LWORK should generally be larger.
*>
*> If LWORK = -1, then a workspace query is assumed; the routine
*> only calculates the optimal size of the WORK array, returns
@@ -187,18 +185,16 @@
* =====================================================================
*
* .. Parameters ..
- INTEGER NBMAX, LDT
- PARAMETER ( NBMAX = 64, LDT = NBMAX+1 )
+ INTEGER NBMAX, LDT, TSIZE
+ PARAMETER ( NBMAX = 64, LDT = NBMAX+1,
+ $ TSIZE = LDT*NBMAX )
* ..
* .. Local Scalars ..
LOGICAL LEFT, LQUERY, NOTRAN
CHARACTER TRANST
- INTEGER I, I1, I2, I3, IB, IINFO, IWS, LDWORK, LWKOPT,
+ INTEGER I, I1, I2, I3, IB, IINFO, IWT, LDWORK, LWKOPT,
$ MI, NB, NBMIN, NI, NQ, NW
* ..
-* .. Local Arrays ..
- COMPLEX T( LDT, NBMAX )
-* ..
* .. External Functions ..
LOGICAL LSAME
INTEGER ILAENV
@@ -242,25 +238,22 @@
INFO = -7
ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
INFO = -10
+ ELSE IF( LWORK.LT.NW .AND. .NOT.LQUERY ) THEN
+ INFO = -12
END IF
*
IF( INFO.EQ.0 ) THEN
+*
+* Compute the workspace requirements
+*
IF( M.EQ.0 .OR. N.EQ.0 ) THEN
LWKOPT = 1
ELSE
-*
-* Determine the block size. NB may be at most NBMAX, where
-* NBMAX is used to define the local array T.
-*
NB = MIN( NBMAX, ILAENV( 1, 'CUNMRQ', SIDE // TRANS, M, N,
$ K, -1 ) )
- LWKOPT = NW*NB
+ LWKOPT = NW*NB + TSIZE
END IF
WORK( 1 ) = LWKOPT
-*
- IF( LWORK.LT.NW .AND. .NOT.LQUERY ) THEN
- INFO = -12
- END IF
END IF
*
IF( INFO.NE.0 ) THEN
@@ -279,14 +272,11 @@
NBMIN = 2
LDWORK = NW
IF( NB.GT.1 .AND. NB.LT.K ) THEN
- IWS = NW*NB
- IF( LWORK.LT.IWS ) THEN
- NB = LWORK / LDWORK
+ IF( LWORK.LT.NW*NB+TSIZE ) THEN
+ NB = (LWORK-TSIZE) / LDWORK
NBMIN = MAX( 2, ILAENV( 2, 'CUNMRQ', SIDE // TRANS, M, N, K,
$ -1 ) )
END IF
- ELSE
- IWS = NW
END IF
*
IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN
@@ -299,6 +289,7 @@
*
* Use blocked code
*
+ IWT = 1 + NW*NB
IF( ( LEFT .AND. .NOT.NOTRAN ) .OR.
$ ( .NOT.LEFT .AND. NOTRAN ) ) THEN
I1 = 1
@@ -329,7 +320,7 @@
* H = H(i+ib-1) . . . H(i+1) H(i)
*
CALL CLARFT( 'Backward', 'Rowwise', NQ-K+I+IB-1, IB,
- $ A( I, 1 ), LDA, TAU( I ), T, LDT )
+ $ A( I, 1 ), LDA, TAU( I ), WORK( IWT ), LDT )
IF( LEFT ) THEN
*
* H or H**H is applied to C(1:m-k+i+ib-1,1:n)
@@ -345,8 +336,8 @@
* Apply H or H**H
*
CALL CLARFB( SIDE, TRANST, 'Backward', 'Rowwise', MI, NI,
- $ IB, A( I, 1 ), LDA, T, LDT, C, LDC, WORK,
- $ LDWORK )
+ $ IB, A( I, 1 ), LDA, WORK( IWT ), LDT, C, LDC,
+ $ WORK, LDWORK )
10 CONTINUE
END IF
WORK( 1 ) = LWKOPT
diff --git a/SRC/cunmrz.f b/SRC/cunmrz.f
index 1a742693..5d93764e 100644
--- a/SRC/cunmrz.f
+++ b/SRC/cunmrz.f
@@ -145,9 +145,7 @@
*> The dimension of the array WORK.
*> If SIDE = 'L', LWORK >= max(1,N);
*> if SIDE = 'R', LWORK >= max(1,M).
-*> For optimum performance LWORK >= N*NB if SIDE = 'L', and
-*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal
-*> blocksize.
+*> For good performance, LWORK should generally be larger.
*>
*> If LWORK = -1, then a workspace query is assumed; the routine
*> only calculates the optimal size of the WORK array, returns
@@ -205,18 +203,16 @@
* =====================================================================
*
* .. Parameters ..
- INTEGER NBMAX, LDT
- PARAMETER ( NBMAX = 64, LDT = NBMAX+1 )
+ INTEGER NBMAX, LDT, TSIZE
+ PARAMETER ( NBMAX = 64, LDT = NBMAX+1,
+ $ TSIZE = LDT*NBMAX )
* ..
* .. Local Scalars ..
LOGICAL LEFT, LQUERY, NOTRAN
CHARACTER TRANST
- INTEGER I, I1, I2, I3, IB, IC, IINFO, IWS, JA, JC,
+ INTEGER I, I1, I2, I3, IB, IC, IINFO, IWT, JA, JC,
$ LDWORK, LWKOPT, MI, NB, NBMIN, NI, NQ, NW
* ..
-* .. Local Arrays ..
- COMPLEX T( LDT, NBMAX )
-* ..
* .. External Functions ..
LOGICAL LSAME
INTEGER ILAENV
@@ -263,25 +259,22 @@
INFO = -8
ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
INFO = -11
+ ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN
+ INFO = -13
END IF
*
IF( INFO.EQ.0 ) THEN
+*
+* Compute the workspace requirements
+*
IF( M.EQ.0 .OR. N.EQ.0 ) THEN
LWKOPT = 1
ELSE
-*
-* Determine the block size. NB may be at most NBMAX, where
-* NBMAX is used to define the local array T.
-*
NB = MIN( NBMAX, ILAENV( 1, 'CUNMRQ', SIDE // TRANS, M, N,
$ K, -1 ) )
- LWKOPT = NW*NB
+ LWKOPT = NW*NB + TSIZE
END IF
WORK( 1 ) = LWKOPT
-*
- IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN
- INFO = -13
- END IF
END IF
*
IF( INFO.NE.0 ) THEN
@@ -297,22 +290,18 @@
RETURN
END IF
*
-* Determine the block size. NB may be at most NBMAX, where NBMAX
-* is used to define the local array T.
+* Determine the block size.
*
NB = MIN( NBMAX, ILAENV( 1, 'CUNMRQ', SIDE // TRANS, M, N, K,
- $ -1 ) )
+ $ -1 ) )
NBMIN = 2
LDWORK = NW
IF( NB.GT.1 .AND. NB.LT.K ) THEN
- IWS = NW*NB
- IF( LWORK.LT.IWS ) THEN
- NB = LWORK / LDWORK
+ IF( LWORK.LT.NW*NB+TSIZE ) THEN
+ NB = (LWORK-TSIZE) / LDWORK
NBMIN = MAX( 2, ILAENV( 2, 'CUNMRQ', SIDE // TRANS, M, N, K,
- $ -1 ) )
+ $ -1 ) )
END IF
- ELSE
- IWS = NW
END IF
*
IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN
@@ -325,6 +314,7 @@
*
* Use blocked code
*
+ IWT = 1 + NW*NB
IF( ( LEFT .AND. .NOT.NOTRAN ) .OR.
$ ( .NOT.LEFT .AND. NOTRAN ) ) THEN
I1 = 1
@@ -359,7 +349,7 @@
* H = H(i+ib-1) . . . H(i+1) H(i)
*
CALL CLARZT( 'Backward', 'Rowwise', L, IB, A( I, JA ), LDA,
- $ TAU( I ), T, LDT )
+ $ TAU( I ), WORK( IWT ), LDT )
*
IF( LEFT ) THEN
*
@@ -378,8 +368,8 @@
* Apply H or H**H
*
CALL CLARZB( SIDE, TRANST, 'Backward', 'Rowwise', MI, NI,
- $ IB, L, A( I, JA ), LDA, T, LDT, C( IC, JC ),
- $ LDC, WORK, LDWORK )
+ $ IB, L, A( I, JA ), LDA, WORK( IWT ), LDT,
+ $ C( IC, JC ), LDC, WORK, LDWORK )
10 CONTINUE
*
END IF
diff --git a/SRC/dgehrd.f b/SRC/dgehrd.f
index 0dda2e2f..ccaf9055 100644
--- a/SRC/dgehrd.f
+++ b/SRC/dgehrd.f
@@ -97,8 +97,7 @@
*> \verbatim
*> LWORK is INTEGER
*> The length of the array WORK. LWORK >= max(1,N).
-*> For optimum performance LWORK >= N*NB, where NB is the
-*> optimal blocksize.
+*> For good performance, LWORK should generally be larger.
*>
*> If LWORK = -1, then a workspace query is assumed; the routine
*> only calculates the optimal size of the WORK array, returns
@@ -183,21 +182,19 @@
* =====================================================================
*
* .. Parameters ..
- INTEGER NBMAX, LDT
- PARAMETER ( NBMAX = 64, LDT = NBMAX+1 )
+ INTEGER NBMAX, LDT, TSIZE
+ PARAMETER ( NBMAX = 64, LDT = NBMAX+1,
+ $ TSIZE = LDT*NBMAX )
DOUBLE PRECISION ZERO, ONE
PARAMETER ( ZERO = 0.0D+0,
$ ONE = 1.0D+0 )
* ..
* .. Local Scalars ..
LOGICAL LQUERY
- INTEGER I, IB, IINFO, IWS, J, LDWORK, LWKOPT, NB,
+ INTEGER I, IB, IINFO, IWT, J, LDWORK, LWKOPT, NB,
$ NBMIN, NH, NX
DOUBLE PRECISION EI
* ..
-* .. Local Arrays ..
- DOUBLE PRECISION T( LDT, NBMAX )
-* ..
* .. External Subroutines ..
EXTERNAL DAXPY, DGEHD2, DGEMM, DLAHR2, DLARFB, DTRMM,
$ XERBLA
@@ -214,9 +211,6 @@
* Test the input parameters
*
INFO = 0
- NB = MIN( NBMAX, ILAENV( 1, 'DGEHRD', ' ', N, ILO, IHI, -1 ) )
- LWKOPT = N*NB
- WORK( 1 ) = LWKOPT
LQUERY = ( LWORK.EQ.-1 )
IF( N.LT.0 ) THEN
INFO = -1
@@ -229,6 +223,16 @@
ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
INFO = -8
END IF
+*
+ IF( INFO.EQ.0 ) THEN
+*
+* Compute the workspace requirements
+*
+ NB = MIN( NBMAX, ILAENV( 1, 'DGEHRD', ' ', N, ILO, IHI, -1 ) )
+ LWKOPT = N*NB + TSIZE
+ WORK( 1 ) = LWKOPT
+ END IF
+*
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'DGEHRD', -INFO )
RETURN
@@ -268,8 +272,7 @@
*
* Determine if workspace is large enough for blocked code
*
- IWS = N*NB
- IF( LWORK.LT.IWS ) THEN
+ IF( LWORK.LT.N*NB+TSIZE ) THEN
*
* Not enough workspace to use optimal NB: determine the
* minimum value of NB, and reduce NB or force use of
@@ -277,8 +280,8 @@
*
NBMIN = MAX( 2, ILAENV( 2, 'DGEHRD', ' ', N, ILO, IHI,
$ -1 ) )
- IF( LWORK.GE.N*NBMIN ) THEN
- NB = LWORK / N
+ IF( LWORK.GE.(N*NBMIN + TSIZE) ) THEN
+ NB = (LWORK-TSIZE) / N
ELSE
NB = 1
END IF
@@ -297,6 +300,7 @@
*
* Use blocked code
*
+ IWT = 1 + N*NB
DO 40 I = ILO, IHI - 1 - NX, NB
IB = MIN( NB, IHI-I )
*
@@ -304,8 +308,8 @@
* matrices V and T of the block reflector H = I - V*T*V**T
* which performs the reduction, and also the matrix Y = A*V*T
*
- CALL DLAHR2( IHI, I, IB, A( 1, I ), LDA, TAU( I ), T, LDT,
- $ WORK, LDWORK )
+ CALL DLAHR2( IHI, I, IB, A( 1, I ), LDA, TAU( I ),
+ $ WORK( IWT ), LDT, WORK, LDWORK )
*
* Apply the block reflector H to A(1:ihi,i+ib:ihi) from the
* right, computing A := A - Y * V**T. V(i+ib,ib-1) must be set
@@ -335,15 +339,16 @@
*
CALL DLARFB( 'Left', 'Transpose', 'Forward',
$ 'Columnwise',
- $ IHI-I, N-I-IB+1, IB, A( I+1, I ), LDA, T, LDT,
- $ A( I+1, I+IB ), LDA, WORK, LDWORK )
+ $ IHI-I, N-I-IB+1, IB, A( I+1, I ), LDA,
+ $ WORK( IWT ), LDT, A( I+1, I+IB ), LDA,
+ $ WORK, LDWORK )
40 CONTINUE
END IF
*
* Use unblocked code to reduce the rest of the matrix
*
CALL DGEHD2( N, I, IHI, A, LDA, TAU, WORK, IINFO )
- WORK( 1 ) = IWS
+ WORK( 1 ) = LWKOPT
*
RETURN
*
diff --git a/SRC/dormlq.f b/SRC/dormlq.f
index ebbd4d26..8c395372 100644
--- a/SRC/dormlq.f
+++ b/SRC/dormlq.f
@@ -136,9 +136,7 @@
*> The dimension of the array WORK.
*> If SIDE = 'L', LWORK >= max(1,N);
*> if SIDE = 'R', LWORK >= max(1,M).
-*> For optimum performance LWORK >= N*NB if SIDE = 'L', and
-*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal
-*> blocksize.
+*> For good performance, LWORK should generally be larger.
*>
*> If LWORK = -1, then a workspace query is assumed; the routine
*> only calculates the optimal size of the WORK array, returns
@@ -185,18 +183,16 @@
* =====================================================================
*
* .. Parameters ..
- INTEGER NBMAX, LDT
- PARAMETER ( NBMAX = 64, LDT = NBMAX+1 )
+ INTEGER NBMAX, LDT, TSIZE
+ PARAMETER ( NBMAX = 64, LDT = NBMAX+1,
+ $ TSIZE = LDT*NBMAX )
* ..
* .. Local Scalars ..
LOGICAL LEFT, LQUERY, NOTRAN
CHARACTER TRANST
- INTEGER I, I1, I2, I3, IB, IC, IINFO, IWS, JC, LDWORK,
+ INTEGER I, I1, I2, I3, IB, IC, IINFO, IWT, JC, LDWORK,
$ LWKOPT, MI, NB, NBMIN, NI, NQ, NW
* ..
-* .. Local Arrays ..
- DOUBLE PRECISION T( LDT, NBMAX )
-* ..
* .. External Functions ..
LOGICAL LSAME
INTEGER ILAENV
@@ -246,12 +242,11 @@
*
IF( INFO.EQ.0 ) THEN
*
-* Determine the block size. NB may be at most NBMAX, where NBMAX
-* is used to define the local array T.
+* Compute the workspace requirements
*
NB = MIN( NBMAX, ILAENV( 1, 'DORMLQ', SIDE // TRANS, M, N, K,
$ -1 ) )
- LWKOPT = MAX( 1, NW )*NB
+ LWKOPT = MAX( 1, NW )*NB + TSIZE
WORK( 1 ) = LWKOPT
END IF
*
@@ -272,14 +267,11 @@
NBMIN = 2
LDWORK = NW
IF( NB.GT.1 .AND. NB.LT.K ) THEN
- IWS = NW*NB
- IF( LWORK.LT.IWS ) THEN
- NB = LWORK / LDWORK
+ IF( LWORK.LT.NW*NB+TSIZE ) THEN
+ NB = (LWORK-TSIZE) / LDWORK
NBMIN = MAX( 2, ILAENV( 2, 'DORMLQ', SIDE // TRANS, M, N, K,
$ -1 ) )
END IF
- ELSE
- IWS = NW
END IF
*
IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN
@@ -292,6 +284,7 @@
*
* Use blocked code
*
+ IWT = 1 + NW*NB
IF( ( LEFT .AND. NOTRAN ) .OR.
$ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN
I1 = 1
@@ -324,7 +317,7 @@
* H = H(i) H(i+1) . . . H(i+ib-1)
*
CALL DLARFT( 'Forward', 'Rowwise', NQ-I+1, IB, A( I, I ),
- $ LDA, TAU( I ), T, LDT )
+ $ LDA, TAU( I ), WORK( IWT ), LDT )
IF( LEFT ) THEN
*
* H or H**T is applied to C(i:m,1:n)
@@ -342,8 +335,8 @@
* Apply H or H**T
*
CALL DLARFB( SIDE, TRANST, 'Forward', 'Rowwise', MI, NI, IB,
- $ A( I, I ), LDA, T, LDT, C( IC, JC ), LDC, WORK,
- $ LDWORK )
+ $ A( I, I ), LDA, WORK( IWT ), LDT,
+ $ C( IC, JC ), LDC, WORK, LDWORK )
10 CONTINUE
END IF
WORK( 1 ) = LWKOPT
diff --git a/SRC/dormql.f b/SRC/dormql.f
index 96c6f195..512f234e 100644
--- a/SRC/dormql.f
+++ b/SRC/dormql.f
@@ -136,9 +136,7 @@
*> The dimension of the array WORK.
*> If SIDE = 'L', LWORK >= max(1,N);
*> if SIDE = 'R', LWORK >= max(1,M).
-*> For optimum performance LWORK >= N*NB if SIDE = 'L', and
-*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal
-*> blocksize.
+*> For good performance, LWORK should generally be larger.
*>
*> If LWORK = -1, then a workspace query is assumed; the routine
*> only calculates the optimal size of the WORK array, returns
@@ -185,17 +183,15 @@
* =====================================================================
*
* .. Parameters ..
- INTEGER NBMAX, LDT
- PARAMETER ( NBMAX = 64, LDT = NBMAX+1 )
+ INTEGER NBMAX, LDT, TSIZE
+ PARAMETER ( NBMAX = 64, LDT = NBMAX+1,
+ $ TSIZE = LDT*NBMAX )
* ..
* .. Local Scalars ..
LOGICAL LEFT, LQUERY, NOTRAN
- INTEGER I, I1, I2, I3, IB, IINFO, IWS, LDWORK, LWKOPT,
+ INTEGER I, I1, I2, I3, IB, IINFO, IWT, LDWORK, LWKOPT,
$ MI, NB, NBMIN, NI, NQ, NW
* ..
-* .. Local Arrays ..
- DOUBLE PRECISION T( LDT, NBMAX )
-* ..
* .. External Functions ..
LOGICAL LSAME
INTEGER ILAENV
@@ -239,25 +235,22 @@
INFO = -7
ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
INFO = -10
+ ELSE IF( LWORK.LT.NW .AND. .NOT.LQUERY ) THEN
+ INFO = -12
END IF
*
IF( INFO.EQ.0 ) THEN
+*
+* Compute the workspace requirements
+*
IF( M.EQ.0 .OR. N.EQ.0 ) THEN
LWKOPT = 1
ELSE
-*
-* Determine the block size. NB may be at most NBMAX, where
-* NBMAX is used to define the local array T.
-*
NB = MIN( NBMAX, ILAENV( 1, 'DORMQL', SIDE // TRANS, M, N,
$ K, -1 ) )
- LWKOPT = NW*NB
+ LWKOPT = NW*NB + TSIZE
END IF
WORK( 1 ) = LWKOPT
-*
- IF( LWORK.LT.NW .AND. .NOT.LQUERY ) THEN
- INFO = -12
- END IF
END IF
*
IF( INFO.NE.0 ) THEN
@@ -276,14 +269,11 @@
NBMIN = 2
LDWORK = NW
IF( NB.GT.1 .AND. NB.LT.K ) THEN
- IWS = NW*NB
- IF( LWORK.LT.IWS ) THEN
- NB = LWORK / LDWORK
+ IF( LWORK.LT.NW*NB+TSIZE ) THEN
+ NB = (LWORK-TSIZE) / LDWORK
NBMIN = MAX( 2, ILAENV( 2, 'DORMQL', SIDE // TRANS, M, N, K,
$ -1 ) )
END IF
- ELSE
- IWS = NW
END IF
*
IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN
@@ -296,6 +286,7 @@
*
* Use blocked code
*
+ IWT = 1 + NW*NB
IF( ( LEFT .AND. NOTRAN ) .OR.
$ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN
I1 = 1
@@ -320,7 +311,7 @@
* H = H(i+ib-1) . . . H(i+1) H(i)
*
CALL DLARFT( 'Backward', 'Columnwise', NQ-K+I+IB-1, IB,
- $ A( 1, I ), LDA, TAU( I ), T, LDT )
+ $ A( 1, I ), LDA, TAU( I ), WORK( IWT ), LDT )
IF( LEFT ) THEN
*
* H or H**T is applied to C(1:m-k+i+ib-1,1:n)
@@ -336,8 +327,8 @@
* Apply H or H**T
*
CALL DLARFB( SIDE, TRANS, 'Backward', 'Columnwise', MI, NI,
- $ IB, A( 1, I ), LDA, T, LDT, C, LDC, WORK,
- $ LDWORK )
+ $ IB, A( 1, I ), LDA, WORK( IWT ), LDT, C, LDC,
+ $ WORK, LDWORK )
10 CONTINUE
END IF
WORK( 1 ) = LWKOPT
diff --git a/SRC/dormqr.f b/SRC/dormqr.f
index c0767ecf..e3f423b9 100644
--- a/SRC/dormqr.f
+++ b/SRC/dormqr.f
@@ -136,9 +136,7 @@
*> The dimension of the array WORK.
*> If SIDE = 'L', LWORK >= max(1,N);
*> if SIDE = 'R', LWORK >= max(1,M).
-*> For optimum performance LWORK >= N*NB if SIDE = 'L', and
-*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal
-*> blocksize.
+*> For good performance, LWORK should generally be larger.
*>
*> If LWORK = -1, then a workspace query is assumed; the routine
*> only calculates the optimal size of the WORK array, returns
@@ -185,17 +183,15 @@
* =====================================================================
*
* .. Parameters ..
- INTEGER NBMAX, LDT
- PARAMETER ( NBMAX = 64, LDT = NBMAX+1 )
+ INTEGER NBMAX, LDT, TSIZE
+ PARAMETER ( NBMAX = 64, LDT = NBMAX+1,
+ $ TSIZE = LDT*NBMAX )
* ..
* .. Local Scalars ..
LOGICAL LEFT, LQUERY, NOTRAN
- INTEGER I, I1, I2, I3, IB, IC, IINFO, IWS, JC, LDWORK,
+ INTEGER I, I1, I2, I3, IB, IC, IINFO, IWT, JC, LDWORK,
$ LWKOPT, MI, NB, NBMIN, NI, NQ, NW
* ..
-* .. Local Arrays ..
- DOUBLE PRECISION T( LDT, NBMAX )
-* ..
* .. External Functions ..
LOGICAL LSAME
INTEGER ILAENV
@@ -245,12 +241,11 @@
*
IF( INFO.EQ.0 ) THEN
*
-* Determine the block size. NB may be at most NBMAX, where NBMAX
-* is used to define the local array T.
+* Compute the workspace requirements
*
NB = MIN( NBMAX, ILAENV( 1, 'DORMQR', SIDE // TRANS, M, N, K,
$ -1 ) )
- LWKOPT = MAX( 1, NW )*NB
+ LWKOPT = MAX( 1, NW )*NB + TSIZE
WORK( 1 ) = LWKOPT
END IF
*
@@ -271,14 +266,11 @@
NBMIN = 2
LDWORK = NW
IF( NB.GT.1 .AND. NB.LT.K ) THEN
- IWS = NW*NB
- IF( LWORK.LT.IWS ) THEN
- NB = LWORK / LDWORK
+ IF( LWORK.LT.NW*NB+TSIZE ) THEN
+ NB = (LWORK-TSIZE) / LDWORK
NBMIN = MAX( 2, ILAENV( 2, 'DORMQR', SIDE // TRANS, M, N, K,
$ -1 ) )
END IF
- ELSE
- IWS = NW
END IF
*
IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN
@@ -291,6 +283,7 @@
*
* Use blocked code
*
+ IWT = 1 + NW*NB
IF( ( LEFT .AND. .NOT.NOTRAN ) .OR.
$ ( .NOT.LEFT .AND. NOTRAN ) ) THEN
I1 = 1
@@ -317,7 +310,7 @@
* H = H(i) H(i+1) . . . H(i+ib-1)
*
CALL DLARFT( 'Forward', 'Columnwise', NQ-I+1, IB, A( I, I ),
- $ LDA, TAU( I ), T, LDT )
+ $ LDA, TAU( I ), WORK( IWT ), LDT )
IF( LEFT ) THEN
*
* H or H**T is applied to C(i:m,1:n)
@@ -335,8 +328,8 @@
* Apply H or H**T
*
CALL DLARFB( SIDE, TRANS, 'Forward', 'Columnwise', MI, NI,
- $ IB, A( I, I ), LDA, T, LDT, C( IC, JC ), LDC,
- $ WORK, LDWORK )
+ $ IB, A( I, I ), LDA, WORK( IWT ), LDT,
+ $ C( IC, JC ), LDC, WORK, LDWORK )
10 CONTINUE
END IF
WORK( 1 ) = LWKOPT
diff --git a/SRC/dormrq.f b/SRC/dormrq.f
index ca0db3b8..4cf2b449 100644
--- a/SRC/dormrq.f
+++ b/SRC/dormrq.f
@@ -136,9 +136,7 @@
*> The dimension of the array WORK.
*> If SIDE = 'L', LWORK >= max(1,N);
*> if SIDE = 'R', LWORK >= max(1,M).
-*> For optimum performance LWORK >= N*NB if SIDE = 'L', and
-*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal
-*> blocksize.
+*> For good performance, LWORK should generally be larger.
*>
*> If LWORK = -1, then a workspace query is assumed; the routine
*> only calculates the optimal size of the WORK array, returns
@@ -185,18 +183,16 @@
* =====================================================================
*
* .. Parameters ..
- INTEGER NBMAX, LDT
- PARAMETER ( NBMAX = 64, LDT = NBMAX+1 )
+ INTEGER NBMAX, LDT, TSIZE
+ PARAMETER ( NBMAX = 64, LDT = NBMAX+1,
+ $ TSIZE = LDT*NBMAX )
* ..
* .. Local Scalars ..
LOGICAL LEFT, LQUERY, NOTRAN
CHARACTER TRANST
- INTEGER I, I1, I2, I3, IB, IINFO, IWS, LDWORK, LWKOPT,
+ INTEGER I, I1, I2, I3, IB, IINFO, IWT, LDWORK, LWKOPT,
$ MI, NB, NBMIN, NI, NQ, NW
* ..
-* .. Local Arrays ..
- DOUBLE PRECISION T( LDT, NBMAX )
-* ..
* .. External Functions ..
LOGICAL LSAME
INTEGER ILAENV
@@ -240,25 +236,22 @@
INFO = -7
ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
INFO = -10
+ ELSE IF( LWORK.LT.NW .AND. .NOT.LQUERY ) THEN
+ INFO = -12
END IF
*
IF( INFO.EQ.0 ) THEN
+*
+* Compute the workspace requirements
+*
IF( M.EQ.0 .OR. N.EQ.0 ) THEN
LWKOPT = 1
ELSE
-*
-* Determine the block size. NB may be at most NBMAX, where
-* NBMAX is used to define the local array T.
-*
NB = MIN( NBMAX, ILAENV( 1, 'DORMRQ', SIDE // TRANS, M, N,
$ K, -1 ) )
- LWKOPT = NW*NB
+ LWKOPT = NW*NB + TSIZE
END IF
WORK( 1 ) = LWKOPT
-*
- IF( LWORK.LT.NW .AND. .NOT.LQUERY ) THEN
- INFO = -12
- END IF
END IF
*
IF( INFO.NE.0 ) THEN
@@ -277,14 +270,11 @@
NBMIN = 2
LDWORK = NW
IF( NB.GT.1 .AND. NB.LT.K ) THEN
- IWS = NW*NB
- IF( LWORK.LT.IWS ) THEN
- NB = LWORK / LDWORK
+ IF( LWORK.LT.NW*NB+TSIZE ) THEN
+ NB = (LWORK-TSIZE) / LDWORK
NBMIN = MAX( 2, ILAENV( 2, 'DORMRQ', SIDE // TRANS, M, N, K,
$ -1 ) )
END IF
- ELSE
- IWS = NW
END IF
*
IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN
@@ -297,6 +287,7 @@
*
* Use blocked code
*
+ IWT = 1 + NW*NB
IF( ( LEFT .AND. .NOT.NOTRAN ) .OR.
$ ( .NOT.LEFT .AND. NOTRAN ) ) THEN
I1 = 1
@@ -327,7 +318,7 @@
* H = H(i+ib-1) . . . H(i+1) H(i)
*
CALL DLARFT( 'Backward', 'Rowwise', NQ-K+I+IB-1, IB,
- $ A( I, 1 ), LDA, TAU( I ), T, LDT )
+ $ A( I, 1 ), LDA, TAU( I ), WORK( IWT ), LDT )
IF( LEFT ) THEN
*
* H or H**T is applied to C(1:m-k+i+ib-1,1:n)
@@ -343,8 +334,8 @@
* Apply H or H**T
*
CALL DLARFB( SIDE, TRANST, 'Backward', 'Rowwise', MI, NI,
- $ IB, A( I, 1 ), LDA, T, LDT, C, LDC, WORK,
- $ LDWORK )
+ $ IB, A( I, 1 ), LDA, WORK( IWT ), LDT, C, LDC,
+ $ WORK, LDWORK )
10 CONTINUE
END IF
WORK( 1 ) = LWKOPT
diff --git a/SRC/dormrz.f b/SRC/dormrz.f
index e971f869..f055763c 100644
--- a/SRC/dormrz.f
+++ b/SRC/dormrz.f
@@ -145,9 +145,7 @@
*> The dimension of the array WORK.
*> If SIDE = 'L', LWORK >= max(1,N);
*> if SIDE = 'R', LWORK >= max(1,M).
-*> For optimum performance LWORK >= N*NB if SIDE = 'L', and
-*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal
-*> blocksize.
+*> For good performance, LWORK should generally be larger.
*>
*> If LWORK = -1, then a workspace query is assumed; the routine
*> only calculates the optimal size of the WORK array, returns
@@ -205,18 +203,16 @@
* =====================================================================
*
* .. Parameters ..
- INTEGER NBMAX, LDT
- PARAMETER ( NBMAX = 64, LDT = NBMAX+1 )
+ INTEGER NBMAX, LDT, TSIZE
+ PARAMETER ( NBMAX = 64, LDT = NBMAX+1,
+ $ TSIZE = LDT*NBMAX )
* ..
* .. Local Scalars ..
LOGICAL LEFT, LQUERY, NOTRAN
CHARACTER TRANST
- INTEGER I, I1, I2, I3, IB, IC, IINFO, IWS, JA, JC,
+ INTEGER I, I1, I2, I3, IB, IC, IINFO, IWT, JA, JC,
$ LDWORK, LWKOPT, MI, NB, NBMIN, NI, NQ, NW
* ..
-* .. Local Arrays ..
- DOUBLE PRECISION T( LDT, NBMAX )
-* ..
* .. External Functions ..
LOGICAL LSAME
INTEGER ILAENV
@@ -263,25 +259,22 @@
INFO = -8
ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
INFO = -11
+ ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN
+ INFO = -13
END IF
*
IF( INFO.EQ.0 ) THEN
+*
+* Compute the workspace requirements
+*
IF( M.EQ.0 .OR. N.EQ.0 ) THEN
LWKOPT = 1
ELSE
-*
-* Determine the block size. NB may be at most NBMAX, where
-* NBMAX is used to define the local array T.
-*
NB = MIN( NBMAX, ILAENV( 1, 'DORMRQ', SIDE // TRANS, M, N,
$ K, -1 ) )
- LWKOPT = NW*NB
+ LWKOPT = NW*NB + TSIZE
END IF
WORK( 1 ) = LWKOPT
-*
- IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN
- INFO = -13
- END IF
END IF
*
IF( INFO.NE.0 ) THEN
@@ -301,9 +294,8 @@
NBMIN = 2
LDWORK = NW
IF( NB.GT.1 .AND. NB.LT.K ) THEN
- IWS = NW*NB
- IF( LWORK.LT.IWS ) THEN
- NB = LWORK / LDWORK
+ IF( LWORK.LT.NW*NB+TSIZE ) THEN
+ NB = (LWORK-TSIZE) / LDWORK
NBMIN = MAX( 2, ILAENV( 2, 'DORMRQ', SIDE // TRANS, M, N, K,
$ -1 ) )
END IF
@@ -321,6 +313,7 @@
*
* Use blocked code
*
+ IWT = 1 + NW*NB
IF( ( LEFT .AND. .NOT.NOTRAN ) .OR.
$ ( .NOT.LEFT .AND. NOTRAN ) ) THEN
I1 = 1
@@ -355,7 +348,7 @@
* H = H(i+ib-1) . . . H(i+1) H(i)
*
CALL DLARZT( 'Backward', 'Rowwise', L, IB, A( I, JA ), LDA,
- $ TAU( I ), T, LDT )
+ $ TAU( I ), WORK( IWT ), LDT )
*
IF( LEFT ) THEN
*
@@ -374,8 +367,8 @@
* Apply H or H**T
*
CALL DLARZB( SIDE, TRANST, 'Backward', 'Rowwise', MI, NI,
- $ IB, L, A( I, JA ), LDA, T, LDT, C( IC, JC ),
- $ LDC, WORK, LDWORK )
+ $ IB, L, A( I, JA ), LDA, WORK( IWT ), LDT,
+ $ C( IC, JC ), LDC, WORK, LDWORK )
10 CONTINUE
*
END IF
diff --git a/SRC/sgehrd.f b/SRC/sgehrd.f
index b88ea03b..3ceea0e2 100644
--- a/SRC/sgehrd.f
+++ b/SRC/sgehrd.f
@@ -97,8 +97,7 @@
*> \verbatim
*> LWORK is INTEGER
*> The length of the array WORK. LWORK >= max(1,N).
-*> For optimum performance LWORK >= N*NB, where NB is the
-*> optimal blocksize.
+*> For good performance, LWORK should generally be larger.
*>
*> If LWORK = -1, then a workspace query is assumed; the routine
*> only calculates the optimal size of the WORK array, returns
@@ -183,21 +182,19 @@
* =====================================================================
*
* .. Parameters ..
- INTEGER NBMAX, LDT
- PARAMETER ( NBMAX = 64, LDT = NBMAX+1 )
+ INTEGER NBMAX, LDT, TSIZE
+ PARAMETER ( NBMAX = 64, LDT = NBMAX+1,
+ $ TSIZE = LDT*NBMAX )
REAL ZERO, ONE
PARAMETER ( ZERO = 0.0E+0,
$ ONE = 1.0E+0 )
* ..
* .. Local Scalars ..
LOGICAL LQUERY
- INTEGER I, IB, IINFO, IWS, J, LDWORK, LWKOPT, NB,
+ INTEGER I, IB, IINFO, IWT, J, LDWORK, LWKOPT, NB,
$ NBMIN, NH, NX
REAL EI
* ..
-* .. Local Arrays ..
- REAL T( LDT, NBMAX )
-* ..
* .. External Subroutines ..
EXTERNAL SAXPY, SGEHD2, SGEMM, SLAHR2, SLARFB, STRMM,
$ XERBLA
@@ -214,9 +211,6 @@
* Test the input parameters
*
INFO = 0
- NB = MIN( NBMAX, ILAENV( 1, 'SGEHRD', ' ', N, ILO, IHI, -1 ) )
- LWKOPT = N*NB
- WORK( 1 ) = LWKOPT
LQUERY = ( LWORK.EQ.-1 )
IF( N.LT.0 ) THEN
INFO = -1
@@ -229,6 +223,16 @@
ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
INFO = -8
END IF
+*
+ IF( INFO.EQ.0 ) THEN
+*
+* Compute the workspace requirements
+*
+ NB = MIN( NBMAX, ILAENV( 1, 'SGEHRD', ' ', N, ILO, IHI, -1 ) )
+ LWKOPT = N*NB + TSIZE
+ WORK( 1 ) = LWKOPT
+ END IF
+*
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'SGEHRD', -INFO )
RETURN
@@ -257,7 +261,6 @@
*
NB = MIN( NBMAX, ILAENV( 1, 'SGEHRD', ' ', N, ILO, IHI, -1 ) )
NBMIN = 2
- IWS = 1
IF( NB.GT.1 .AND. NB.LT.NH ) THEN
*
* Determine when to cross over from blocked to unblocked code
@@ -268,8 +271,7 @@
*
* Determine if workspace is large enough for blocked code
*
- IWS = N*NB
- IF( LWORK.LT.IWS ) THEN
+ IF( LWORK.LT.N*NB+TSIZE ) THEN
*
* Not enough workspace to use optimal NB: determine the
* minimum value of NB, and reduce NB or force use of
@@ -277,8 +279,8 @@
*
NBMIN = MAX( 2, ILAENV( 2, 'SGEHRD', ' ', N, ILO, IHI,
$ -1 ) )
- IF( LWORK.GE.N*NBMIN ) THEN
- NB = LWORK / N
+ IF( LWORK.GE.(N*NBMIN + TSIZE) ) THEN
+ NB = (LWORK-TSIZE) / N
ELSE
NB = 1
END IF
@@ -297,6 +299,7 @@
*
* Use blocked code
*
+ IWT = 1 + N*NB
DO 40 I = ILO, IHI - 1 - NX, NB
IB = MIN( NB, IHI-I )
*
@@ -304,8 +307,8 @@
* matrices V and T of the block reflector H = I - V*T*V**T
* which performs the reduction, and also the matrix Y = A*V*T
*
- CALL SLAHR2( IHI, I, IB, A( 1, I ), LDA, TAU( I ), T, LDT,
- $ WORK, LDWORK )
+ CALL SLAHR2( IHI, I, IB, A( 1, I ), LDA, TAU( I ),
+ $ WORK( IWT ), LDT, WORK, LDWORK )
*
* Apply the block reflector H to A(1:ihi,i+ib:ihi) from the
* right, computing A := A - Y * V**T. V(i+ib,ib-1) must be set
@@ -335,15 +338,16 @@
*
CALL SLARFB( 'Left', 'Transpose', 'Forward',
$ 'Columnwise',
- $ IHI-I, N-I-IB+1, IB, A( I+1, I ), LDA, T, LDT,
- $ A( I+1, I+IB ), LDA, WORK, LDWORK )
+ $ IHI-I, N-I-IB+1, IB, A( I+1, I ), LDA,
+ $ WORK( IWT ), LDT, A( I+1, I+IB ), LDA,
+ $ WORK, LDWORK )
40 CONTINUE
END IF
*
* Use unblocked code to reduce the rest of the matrix
*
CALL SGEHD2( N, I, IHI, A, LDA, TAU, WORK, IINFO )
- WORK( 1 ) = IWS
+ WORK( 1 ) = LWKOPT
*
RETURN
*
diff --git a/SRC/sormlq.f b/SRC/sormlq.f
index 9652664c..63c68214 100644
--- a/SRC/sormlq.f
+++ b/SRC/sormlq.f
@@ -137,9 +137,7 @@
*> The dimension of the array WORK.
*> If SIDE = 'L', LWORK >= max(1,N);
*> if SIDE = 'R', LWORK >= max(1,M).
-*> For optimum performance LWORK >= N*NB if SIDE = 'L', and
-*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal
-*> blocksize.
+*> For good performance, LWORK should generally be larger.
*>
*> If LWORK = -1, then a workspace query is assumed; the routine
*> only calculates the optimal size of the WORK array, returns
@@ -187,18 +185,16 @@
* =====================================================================
*
* .. Parameters ..
- INTEGER NBMAX, LDT
- PARAMETER ( NBMAX = 64, LDT = NBMAX+1 )
+ INTEGER NBMAX, LDT, TSIZE
+ PARAMETER ( NBMAX = 64, LDT = NBMAX+1,
+ $ TSIZE = LDT*NBMAX )
* ..
* .. Local Scalars ..
LOGICAL LEFT, LQUERY, NOTRAN
CHARACTER TRANST
- INTEGER I, I1, I2, I3, IB, IC, IINFO, IWS, JC, LDWORK,
+ INTEGER I, I1, I2, I3, IB, IC, IINFO, IWT, JC, LDWORK,
$ LWKOPT, MI, NB, NBMIN, NI, NQ, NW
* ..
-* .. Local Arrays ..
- REAL T( LDT, NBMAX )
-* ..
* .. External Functions ..
LOGICAL LSAME
INTEGER ILAENV
@@ -248,12 +244,11 @@
*
IF( INFO.EQ.0 ) THEN
*
-* Determine the block size. NB may be at most NBMAX, where NBMAX
-* is used to define the local array T.
+* Compute the workspace requirements
*
NB = MIN( NBMAX, ILAENV( 1, 'SORMLQ', SIDE // TRANS, M, N, K,
$ -1 ) )
- LWKOPT = MAX( 1, NW )*NB
+ LWKOPT = MAX( 1, NW )*NB + TSIZE
WORK( 1 ) = LWKOPT
END IF
*
@@ -274,14 +269,11 @@
NBMIN = 2
LDWORK = NW
IF( NB.GT.1 .AND. NB.LT.K ) THEN
- IWS = NW*NB
- IF( LWORK.LT.IWS ) THEN
- NB = LWORK / LDWORK
+ IF( LWORK.LT.NW*NB+TSIZE ) THEN
+ NB = (LWORK-TSIZE) / LDWORK
NBMIN = MAX( 2, ILAENV( 2, 'SORMLQ', SIDE // TRANS, M, N, K,
$ -1 ) )
END IF
- ELSE
- IWS = NW
END IF
*
IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN
@@ -294,6 +286,7 @@
*
* Use blocked code
*
+ IWT = 1 + NW*NB
IF( ( LEFT .AND. NOTRAN ) .OR.
$ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN
I1 = 1
@@ -326,7 +319,7 @@
* H = H(i) H(i+1) . . . H(i+ib-1)
*
CALL SLARFT( 'Forward', 'Rowwise', NQ-I+1, IB, A( I, I ),
- $ LDA, TAU( I ), T, LDT )
+ $ LDA, TAU( I ), WORK( IWT ), LDT )
IF( LEFT ) THEN
*
* H or H**T is applied to C(i:m,1:n)
@@ -344,8 +337,8 @@
* Apply H or H**T
*
CALL SLARFB( SIDE, TRANST, 'Forward', 'Rowwise', MI, NI, IB,
- $ A( I, I ), LDA, T, LDT, C( IC, JC ), LDC, WORK,
- $ LDWORK )
+ $ A( I, I ), LDA, WORK( IWT ), LDT,
+ $ C( IC, JC ), LDC, WORK, LDWORK )
10 CONTINUE
END IF
WORK( 1 ) = LWKOPT
diff --git a/SRC/sormql.f b/SRC/sormql.f
index ed419042..ccb47c3d 100644
--- a/SRC/sormql.f
+++ b/SRC/sormql.f
@@ -137,9 +137,7 @@
*> The dimension of the array WORK.
*> If SIDE = 'L', LWORK >= max(1,N);
*> if SIDE = 'R', LWORK >= max(1,M).
-*> For optimum performance LWORK >= N*NB if SIDE = 'L', and
-*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal
-*> blocksize.
+*> For good performance, LWORK should generally be larger.
*>
*> If LWORK = -1, then a workspace query is assumed; the routine
*> only calculates the optimal size of the WORK array, returns
@@ -187,17 +185,15 @@
* =====================================================================
*
* .. Parameters ..
- INTEGER NBMAX, LDT
- PARAMETER ( NBMAX = 64, LDT = NBMAX+1 )
+ INTEGER NBMAX, LDT, TSIZE
+ PARAMETER ( NBMAX = 64, LDT = NBMAX+1,
+ $ TSIZE = LDT*NBMAX )
* ..
* .. Local Scalars ..
LOGICAL LEFT, LQUERY, NOTRAN
- INTEGER I, I1, I2, I3, IB, IINFO, IWS, LDWORK, LWKOPT,
+ INTEGER I, I1, I2, I3, IB, IINFO, IWT, LDWORK, LWKOPT,
$ MI, NB, NBMIN, NI, NQ, NW
* ..
-* .. Local Arrays ..
- REAL T( LDT, NBMAX )
-* ..
* .. External Functions ..
LOGICAL LSAME
INTEGER ILAENV
@@ -241,26 +237,22 @@
INFO = -7
ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
INFO = -10
+ ELSE IF( LWORK.LT.NW .AND. .NOT.LQUERY ) THEN
+ INFO = -12
END IF
*
IF( INFO.EQ.0 ) THEN
+*
+* Compute the workspace requirements
+*
IF( M.EQ.0 .OR. N.EQ.0 ) THEN
LWKOPT = 1
ELSE
-*
-* Determine the block size. NB may be at most NBMAX, where
-* NBMAX is used to define the local array T.
-*
-*
NB = MIN( NBMAX, ILAENV( 1, 'SORMQL', SIDE // TRANS, M, N,
$ K, -1 ) )
- LWKOPT = NW*NB
+ LWKOPT = NW*NB + TSIZE
END IF
WORK( 1 ) = LWKOPT
-*
- IF( LWORK.LT.NW .AND. .NOT.LQUERY ) THEN
- INFO = -12
- END IF
END IF
*
IF( INFO.NE.0 ) THEN
@@ -279,9 +271,8 @@
NBMIN = 2
LDWORK = NW
IF( NB.GT.1 .AND. NB.LT.K ) THEN
- IWS = NW*NB
- IF( LWORK.LT.IWS ) THEN
- NB = LWORK / LDWORK
+ IF( LWORK.LT.NW*NB+TSIZE ) THEN
+ NB = (LWORK-TSIZE) / LDWORK
NBMIN = MAX( 2, ILAENV( 2, 'SORMQL', SIDE // TRANS, M, N, K,
$ -1 ) )
END IF
@@ -299,6 +290,7 @@
*
* Use blocked code
*
+ IWT = 1 + NW*NB
IF( ( LEFT .AND. NOTRAN ) .OR.
$ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN
I1 = 1
@@ -323,7 +315,7 @@
* H = H(i+ib-1) . . . H(i+1) H(i)
*
CALL SLARFT( 'Backward', 'Columnwise', NQ-K+I+IB-1, IB,
- $ A( 1, I ), LDA, TAU( I ), T, LDT )
+ $ A( 1, I ), LDA, TAU( I ), WORK( IWT ), LDT )
IF( LEFT ) THEN
*
* H or H**T is applied to C(1:m-k+i+ib-1,1:n)
@@ -339,8 +331,8 @@
* Apply H or H**T
*
CALL SLARFB( SIDE, TRANS, 'Backward', 'Columnwise', MI, NI,
- $ IB, A( 1, I ), LDA, T, LDT, C, LDC, WORK,
- $ LDWORK )
+ $ IB, A( 1, I ), LDA, WORK( IWT ), LDT, C, LDC,
+ $ WORK, LDWORK )
10 CONTINUE
END IF
WORK( 1 ) = LWKOPT
diff --git a/SRC/sormqr.f b/SRC/sormqr.f
index 6e89d125..d890f72d 100644
--- a/SRC/sormqr.f
+++ b/SRC/sormqr.f
@@ -137,9 +137,7 @@
*> The dimension of the array WORK.
*> If SIDE = 'L', LWORK >= max(1,N);
*> if SIDE = 'R', LWORK >= max(1,M).
-*> For optimum performance LWORK >= N*NB if SIDE = 'L', and
-*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal
-*> blocksize.
+*> For good performance, LWORK should generally be larger.
*>
*> If LWORK = -1, then a workspace query is assumed; the routine
*> only calculates the optimal size of the WORK array, returns
@@ -187,17 +185,15 @@
* =====================================================================
*
* .. Parameters ..
- INTEGER NBMAX, LDT
- PARAMETER ( NBMAX = 64, LDT = NBMAX+1 )
+ INTEGER NBMAX, LDT, TSIZE
+ PARAMETER ( NBMAX = 64, LDT = NBMAX+1,
+ $ TSIZE = LDT*NBMAX )
* ..
* .. Local Scalars ..
LOGICAL LEFT, LQUERY, NOTRAN
- INTEGER I, I1, I2, I3, IB, IC, IINFO, IWS, JC, LDWORK,
+ INTEGER I, I1, I2, I3, IB, IC, IINFO, IWT, JC, LDWORK,
$ LWKOPT, MI, NB, NBMIN, NI, NQ, NW
* ..
-* .. Local Arrays ..
- REAL T( LDT, NBMAX )
-* ..
* .. External Functions ..
LOGICAL LSAME
INTEGER ILAENV
@@ -247,12 +243,11 @@
*
IF( INFO.EQ.0 ) THEN
*
-* Determine the block size. NB may be at most NBMAX, where NBMAX
-* is used to define the local array T.
+* Compute the workspace requirements
*
NB = MIN( NBMAX, ILAENV( 1, 'SORMQR', SIDE // TRANS, M, N, K,
$ -1 ) )
- LWKOPT = MAX( 1, NW )*NB
+ LWKOPT = MAX( 1, NW )*NB + TSIZE
WORK( 1 ) = LWKOPT
END IF
*
@@ -273,14 +268,11 @@
NBMIN = 2
LDWORK = NW
IF( NB.GT.1 .AND. NB.LT.K ) THEN
- IWS = NW*NB
- IF( LWORK.LT.IWS ) THEN
- NB = LWORK / LDWORK
+ IF( LWORK.LT.NW*NB+TSIZE ) THEN
+ NB = (LWORK-TSIZE) / LDWORK
NBMIN = MAX( 2, ILAENV( 2, 'SORMQR', SIDE // TRANS, M, N, K,
$ -1 ) )
END IF
- ELSE
- IWS = NW
END IF
*
IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN
@@ -293,6 +285,7 @@
*
* Use blocked code
*
+ IWT = 1 + NW*NB
IF( ( LEFT .AND. .NOT.NOTRAN ) .OR.
$ ( .NOT.LEFT .AND. NOTRAN ) ) THEN
I1 = 1
@@ -319,7 +312,7 @@
* H = H(i) H(i+1) . . . H(i+ib-1)
*
CALL SLARFT( 'Forward', 'Columnwise', NQ-I+1, IB, A( I, I ),
- $ LDA, TAU( I ), T, LDT )
+ $ LDA, TAU( I ), WORK( IWT ), LDT )
IF( LEFT ) THEN
*
* H or H**T is applied to C(i:m,1:n)
@@ -337,8 +330,8 @@
* Apply H or H**T
*
CALL SLARFB( SIDE, TRANS, 'Forward', 'Columnwise', MI, NI,
- $ IB, A( I, I ), LDA, T, LDT, C( IC, JC ), LDC,
- $ WORK, LDWORK )
+ $ IB, A( I, I ), LDA, WORK( IWT ), LDT,
+ $ C( IC, JC ), LDC, WORK, LDWORK )
10 CONTINUE
END IF
WORK( 1 ) = LWKOPT
diff --git a/SRC/sormrq.f b/SRC/sormrq.f
index 88eb5ecb..156f17d8 100644
--- a/SRC/sormrq.f
+++ b/SRC/sormrq.f
@@ -137,9 +137,7 @@
*> The dimension of the array WORK.
*> If SIDE = 'L', LWORK >= max(1,N);
*> if SIDE = 'R', LWORK >= max(1,M).
-*> For optimum performance LWORK >= N*NB if SIDE = 'L', and
-*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal
-*> blocksize.
+*> For good performance, LWORK should generally be larger.
*>
*> If LWORK = -1, then a workspace query is assumed; the routine
*> only calculates the optimal size of the WORK array, returns
@@ -187,18 +185,16 @@
* =====================================================================
*
* .. Parameters ..
- INTEGER NBMAX, LDT
- PARAMETER ( NBMAX = 64, LDT = NBMAX+1 )
+ INTEGER NBMAX, LDT, TSIZE
+ PARAMETER ( NBMAX = 64, LDT = NBMAX+1,
+ $ TSIZE = LDT*NBMAX )
* ..
* .. Local Scalars ..
LOGICAL LEFT, LQUERY, NOTRAN
CHARACTER TRANST
- INTEGER I, I1, I2, I3, IB, IINFO, IWS, LDWORK, LWKOPT,
+ INTEGER I, I1, I2, I3, IB, IINFO, IWT, LDWORK, LWKOPT,
$ MI, NB, NBMIN, NI, NQ, NW
* ..
-* .. Local Arrays ..
- REAL T( LDT, NBMAX )
-* ..
* .. External Functions ..
LOGICAL LSAME
INTEGER ILAENV
@@ -242,25 +238,22 @@
INFO = -7
ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
INFO = -10
+ ELSE IF( LWORK.LT.NW .AND. .NOT.LQUERY ) THEN
+ INFO = -12
END IF
*
IF( INFO.EQ.0 ) THEN
+*
+* Compute the workspace requirements
+*
IF( M.EQ.0 .OR. N.EQ.0 ) THEN
LWKOPT = 1
ELSE
-*
-* Determine the block size. NB may be at most NBMAX, where
-* NBMAX is used to define the local array T.
-*
NB = MIN( NBMAX, ILAENV( 1, 'SORMRQ', SIDE // TRANS, M, N,
$ K, -1 ) )
- LWKOPT = NW*NB
+ LWKOPT = NW*NB + TSIZE
END IF
WORK( 1 ) = LWKOPT
-*
- IF( LWORK.LT.NW .AND. .NOT.LQUERY ) THEN
- INFO = -12
- END IF
END IF
*
IF( INFO.NE.0 ) THEN
@@ -279,14 +272,11 @@
NBMIN = 2
LDWORK = NW
IF( NB.GT.1 .AND. NB.LT.K ) THEN
- IWS = NW*NB
- IF( LWORK.LT.IWS ) THEN
- NB = LWORK / LDWORK
+ IF( LWORK.LT.NW*NB+TSIZE ) THEN
+ NB = (LWORK-TSIZE) / LDWORK
NBMIN = MAX( 2, ILAENV( 2, 'SORMRQ', SIDE // TRANS, M, N, K,
$ -1 ) )
END IF
- ELSE
- IWS = NW
END IF
*
IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN
@@ -299,6 +289,7 @@
*
* Use blocked code
*
+ IWT = 1 + NW*NB
IF( ( LEFT .AND. .NOT.NOTRAN ) .OR.
$ ( .NOT.LEFT .AND. NOTRAN ) ) THEN
I1 = 1
@@ -329,7 +320,7 @@
* H = H(i+ib-1) . . . H(i+1) H(i)
*
CALL SLARFT( 'Backward', 'Rowwise', NQ-K+I+IB-1, IB,
- $ A( I, 1 ), LDA, TAU( I ), T, LDT )
+ $ A( I, 1 ), LDA, TAU( I ), WORK( IWT ), LDT )
IF( LEFT ) THEN
*
* H or H**T is applied to C(1:m-k+i+ib-1,1:n)
@@ -345,8 +336,8 @@
* Apply H or H**T
*
CALL SLARFB( SIDE, TRANST, 'Backward', 'Rowwise', MI, NI,
- $ IB, A( I, 1 ), LDA, T, LDT, C, LDC, WORK,
- $ LDWORK )
+ $ IB, A( I, 1 ), LDA, WORK( IWT ), LDT, C, LDC,
+ $ WORK, LDWORK )
10 CONTINUE
END IF
WORK( 1 ) = LWKOPT
diff --git a/SRC/sormrz.f b/SRC/sormrz.f
index 1f55f322..33d5184e 100644
--- a/SRC/sormrz.f
+++ b/SRC/sormrz.f
@@ -145,9 +145,7 @@
*> The dimension of the array WORK.
*> If SIDE = 'L', LWORK >= max(1,N);
*> if SIDE = 'R', LWORK >= max(1,M).
-*> For optimum performance LWORK >= N*NB if SIDE = 'L', and
-*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal
-*> blocksize.
+*> For good performance, LWORK should generally be larger.
*>
*> If LWORK = -1, then a workspace query is assumed; the routine
*> only calculates the optimal size of the WORK array, returns
@@ -205,18 +203,16 @@
* =====================================================================
*
* .. Parameters ..
- INTEGER NBMAX, LDT
- PARAMETER ( NBMAX = 64, LDT = NBMAX+1 )
+ INTEGER NBMAX, LDT, TSIZE
+ PARAMETER ( NBMAX = 64, LDT = NBMAX+1,
+ $ TSIZE = LDT*NBMAX )
* ..
* .. Local Scalars ..
LOGICAL LEFT, LQUERY, NOTRAN
CHARACTER TRANST
- INTEGER I, I1, I2, I3, IB, IC, IINFO, IWS, JA, JC,
+ INTEGER I, I1, I2, I3, IB, IC, IINFO, IWT, JA, JC,
$ LDWORK, LWKOPT, MI, NB, NBMIN, NI, NQ, NW
* ..
-* .. Local Arrays ..
- REAL T( LDT, NBMAX )
-* ..
* .. External Functions ..
LOGICAL LSAME
INTEGER ILAENV
@@ -263,25 +259,22 @@
INFO = -8
ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
INFO = -11
+ ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN
+ INFO = -13
END IF
*
IF( INFO.EQ.0 ) THEN
+*
+* Compute the workspace requirements
+*
IF( M.EQ.0 .OR. N.EQ.0 ) THEN
LWKOPT = 1
ELSE
-*
-* Determine the block size. NB may be at most NBMAX, where
-* NBMAX is used to define the local array T.
-*
NB = MIN( NBMAX, ILAENV( 1, 'SORMRQ', SIDE // TRANS, M, N,
$ K, -1 ) )
- LWKOPT = NW*NB
+ LWKOPT = NW*NB + TSIZE
END IF
WORK( 1 ) = LWKOPT
-*
- IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN
- INFO = -13
- END IF
END IF
*
IF( INFO.NE.0 ) THEN
@@ -300,9 +293,8 @@
NBMIN = 2
LDWORK = NW
IF( NB.GT.1 .AND. NB.LT.K ) THEN
- IWS = NW*NB
- IF( LWORK.LT.IWS ) THEN
- NB = LWORK / LDWORK
+ IF( LWORK.LT.NW*NB+TSIZE ) THEN
+ NB = (LWORK-TSIZE) / LDWORK
NBMIN = MAX( 2, ILAENV( 2, 'SORMRQ', SIDE // TRANS, M, N, K,
$ -1 ) )
END IF
@@ -320,6 +312,7 @@
*
* Use blocked code
*
+ IWT = 1 + NW*NB
IF( ( LEFT .AND. .NOT.NOTRAN ) .OR.
$ ( .NOT.LEFT .AND. NOTRAN ) ) THEN
I1 = 1
@@ -354,7 +347,7 @@
* H = H(i+ib-1) . . . H(i+1) H(i)
*
CALL SLARZT( 'Backward', 'Rowwise', L, IB, A( I, JA ), LDA,
- $ TAU( I ), T, LDT )
+ $ TAU( I ), WORK( IWT ), LDT )
*
IF( LEFT ) THEN
*
@@ -373,8 +366,8 @@
* Apply H or H**T
*
CALL SLARZB( SIDE, TRANST, 'Backward', 'Rowwise', MI, NI,
- $ IB, L, A( I, JA ), LDA, T, LDT, C( IC, JC ),
- $ LDC, WORK, LDWORK )
+ $ IB, L, A( I, JA ), LDA, WORK( IWT ), LDT,
+ $ C( IC, JC ), LDC, WORK, LDWORK )
10 CONTINUE
*
END IF
diff --git a/SRC/zgehrd.f b/SRC/zgehrd.f
index 2674af64..c5db663d 100644
--- a/SRC/zgehrd.f
+++ b/SRC/zgehrd.f
@@ -97,8 +97,7 @@
*> \verbatim
*> LWORK is INTEGER
*> The length of the array WORK. LWORK >= max(1,N).
-*> For optimum performance LWORK >= N*NB, where NB is the
-*> optimal blocksize.
+*> For good performance, LWORK should generally be larger.
*>
*> If LWORK = -1, then a workspace query is assumed; the routine
*> only calculates the optimal size of the WORK array, returns
@@ -183,21 +182,19 @@
* =====================================================================
*
* .. Parameters ..
- INTEGER NBMAX, LDT
- PARAMETER ( NBMAX = 64, LDT = NBMAX+1 )
+ INTEGER NBMAX, LDT, TSIZE
+ PARAMETER ( NBMAX = 64, LDT = NBMAX+1,
+ $ TSIZE = LDT*NBMAX )
COMPLEX*16 ZERO, ONE
PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ),
$ ONE = ( 1.0D+0, 0.0D+0 ) )
* ..
* .. Local Scalars ..
LOGICAL LQUERY
- INTEGER I, IB, IINFO, IWS, J, LDWORK, LWKOPT, NB,
+ INTEGER I, IB, IINFO, IWT, J, LDWORK, LWKOPT, NB,
$ NBMIN, NH, NX
COMPLEX*16 EI
* ..
-* .. Local Arrays ..
- COMPLEX*16 T( LDT, NBMAX )
-* ..
* .. External Subroutines ..
EXTERNAL ZAXPY, ZGEHD2, ZGEMM, ZLAHR2, ZLARFB, ZTRMM,
$ XERBLA
@@ -214,9 +211,6 @@
* Test the input parameters
*
INFO = 0
- NB = MIN( NBMAX, ILAENV( 1, 'ZGEHRD', ' ', N, ILO, IHI, -1 ) )
- LWKOPT = N*NB
- WORK( 1 ) = LWKOPT
LQUERY = ( LWORK.EQ.-1 )
IF( N.LT.0 ) THEN
INFO = -1
@@ -229,6 +223,16 @@
ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
INFO = -8
END IF
+*
+ IF( INFO.NE.0 ) THEN
+*
+* Compute the workspace requirements
+*
+ NB = MIN( NBMAX, ILAENV( 1, 'ZGEHRD', ' ', N, ILO, IHI, -1 ) )
+ LWKOPT = N*NB + TSIZE
+ WORK( 1 ) = LWKOPT
+ ENDIF
+*
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'ZGEHRD', -INFO )
RETURN
@@ -257,7 +261,6 @@
*
NB = MIN( NBMAX, ILAENV( 1, 'ZGEHRD', ' ', N, ILO, IHI, -1 ) )
NBMIN = 2
- IWS = 1
IF( NB.GT.1 .AND. NB.LT.NH ) THEN
*
* Determine when to cross over from blocked to unblocked code
@@ -268,8 +271,7 @@
*
* Determine if workspace is large enough for blocked code
*
- IWS = N*NB
- IF( LWORK.LT.IWS ) THEN
+ IF( LWORK.LT.N*NB+TSIZE ) THEN
*
* Not enough workspace to use optimal NB: determine the
* minimum value of NB, and reduce NB or force use of
@@ -277,8 +279,8 @@
*
NBMIN = MAX( 2, ILAENV( 2, 'ZGEHRD', ' ', N, ILO, IHI,
$ -1 ) )
- IF( LWORK.GE.N*NBMIN ) THEN
- NB = LWORK / N
+ IF( LWORK.GE.(N*NBMIN + TSIZE) ) THEN
+ NB = (LWORK-TSIZE) / N
ELSE
NB = 1
END IF
@@ -297,6 +299,7 @@
*
* Use blocked code
*
+ IWT = 1 + N*NB
DO 40 I = ILO, IHI - 1 - NX, NB
IB = MIN( NB, IHI-I )
*
@@ -304,8 +307,8 @@
* matrices V and T of the block reflector H = I - V*T*V**H
* which performs the reduction, and also the matrix Y = A*V*T
*
- CALL ZLAHR2( IHI, I, IB, A( 1, I ), LDA, TAU( I ), T, LDT,
- $ WORK, LDWORK )
+ CALL ZLAHR2( IHI, I, IB, A( 1, I ), LDA, TAU( I ),
+ $ WORK( IWT ), LDT, WORK, LDWORK )
*
* Apply the block reflector H to A(1:ihi,i+ib:ihi) from the
* right, computing A := A - Y * V**H. V(i+ib,ib-1) must be set
@@ -335,15 +338,16 @@
*
CALL ZLARFB( 'Left', 'Conjugate transpose', 'Forward',
$ 'Columnwise',
- $ IHI-I, N-I-IB+1, IB, A( I+1, I ), LDA, T, LDT,
- $ A( I+1, I+IB ), LDA, WORK, LDWORK )
+ $ IHI-I, N-I-IB+1, IB, A( I+1, I ), LDA,
+ $ WORK( IWT ), LDT, A( I+1, I+IB ), LDA,
+ $ WORK, LDWORK )
40 CONTINUE
END IF
*
* Use unblocked code to reduce the rest of the matrix
*
CALL ZGEHD2( N, I, IHI, A, LDA, TAU, WORK, IINFO )
- WORK( 1 ) = IWS
+ WORK( 1 ) = LWKOPT
*
RETURN
*
diff --git a/SRC/zunmlq.f b/SRC/zunmlq.f
index 583d2b78..076cf8e4 100644
--- a/SRC/zunmlq.f
+++ b/SRC/zunmlq.f
@@ -136,9 +136,7 @@
*> The dimension of the array WORK.
*> If SIDE = 'L', LWORK >= max(1,N);
*> if SIDE = 'R', LWORK >= max(1,M).
-*> For optimum performance LWORK >= N*NB if SIDE 'L', and
-*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal
-*> blocksize.
+*> For good performance, LWORK should generally be larger.
*>
*> If LWORK = -1, then a workspace query is assumed; the routine
*> only calculates the optimal size of the WORK array, returns
@@ -185,18 +183,16 @@
* =====================================================================
*
* .. Parameters ..
- INTEGER NBMAX, LDT
- PARAMETER ( NBMAX = 64, LDT = NBMAX+1 )
+ INTEGER NBMAX, LDT, TSIZE
+ PARAMETER ( NBMAX = 64, LDT = NBMAX+1,
+ $ TSIZE = LDT*NBMAX )
* ..
* .. Local Scalars ..
LOGICAL LEFT, LQUERY, NOTRAN
CHARACTER TRANST
- INTEGER I, I1, I2, I3, IB, IC, IINFO, IWS, JC, LDWORK,
+ INTEGER I, I1, I2, I3, IB, IC, IINFO, IWT, JC, LDWORK,
$ LWKOPT, MI, NB, NBMIN, NI, NQ, NW
* ..
-* .. Local Arrays ..
- COMPLEX*16 T( LDT, NBMAX )
-* ..
* .. External Functions ..
LOGICAL LSAME
INTEGER ILAENV
@@ -246,12 +242,11 @@
*
IF( INFO.EQ.0 ) THEN
*
-* Determine the block size. NB may be at most NBMAX, where NBMAX
-* is used to define the local array T.
+* Compute the workspace requirements
*
NB = MIN( NBMAX, ILAENV( 1, 'ZUNMLQ', SIDE // TRANS, M, N, K,
$ -1 ) )
- LWKOPT = MAX( 1, NW )*NB
+ LWKOPT = MAX( 1, NW )*NB + TSIZE
WORK( 1 ) = LWKOPT
END IF
*
@@ -272,14 +267,11 @@
NBMIN = 2
LDWORK = NW
IF( NB.GT.1 .AND. NB.LT.K ) THEN
- IWS = NW*NB
- IF( LWORK.LT.IWS ) THEN
- NB = LWORK / LDWORK
+ IF( LWORK.LT.NW*NB+TSIZE ) THEN
+ NB = (LWORK-TSIZE) / LDWORK
NBMIN = MAX( 2, ILAENV( 2, 'ZUNMLQ', SIDE // TRANS, M, N, K,
$ -1 ) )
END IF
- ELSE
- IWS = NW
END IF
*
IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN
@@ -292,6 +284,7 @@
*
* Use blocked code
*
+ IWT = 1 + NW*NB
IF( ( LEFT .AND. NOTRAN ) .OR.
$ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN
I1 = 1
@@ -324,7 +317,7 @@
* H = H(i) H(i+1) . . . H(i+ib-1)
*
CALL ZLARFT( 'Forward', 'Rowwise', NQ-I+1, IB, A( I, I ),
- $ LDA, TAU( I ), T, LDT )
+ $ LDA, TAU( I ), WORK( IWT ), LDT )
IF( LEFT ) THEN
*
* H or H**H is applied to C(i:m,1:n)
@@ -342,8 +335,8 @@
* Apply H or H**H
*
CALL ZLARFB( SIDE, TRANST, 'Forward', 'Rowwise', MI, NI, IB,
- $ A( I, I ), LDA, T, LDT, C( IC, JC ), LDC, WORK,
- $ LDWORK )
+ $ A( I, I ), LDA, WORK( IWT ), LDT,
+ $ C( IC, JC ), LDC, WORK, LDWORK )
10 CONTINUE
END IF
WORK( 1 ) = LWKOPT
diff --git a/SRC/zunmql.f b/SRC/zunmql.f
index 6f6cdd32..0a969694 100644
--- a/SRC/zunmql.f
+++ b/SRC/zunmql.f
@@ -136,9 +136,7 @@
*> The dimension of the array WORK.
*> If SIDE = 'L', LWORK >= max(1,N);
*> if SIDE = 'R', LWORK >= max(1,M).
-*> For optimum performance LWORK >= N*NB if SIDE = 'L', and
-*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal
-*> blocksize.
+*> For good performance, LWORK should genreally be larger.
*>
*> If LWORK = -1, then a workspace query is assumed; the routine
*> only calculates the optimal size of the WORK array, returns
@@ -185,17 +183,15 @@
* =====================================================================
*
* .. Parameters ..
- INTEGER NBMAX, LDT
- PARAMETER ( NBMAX = 64, LDT = NBMAX+1 )
+ INTEGER NBMAX, LDT, TSIZE
+ PARAMETER ( NBMAX = 64, LDT = NBMAX+1,
+ $ TSIZE = LDT*NBMAX )
* ..
* .. Local Scalars ..
LOGICAL LEFT, LQUERY, NOTRAN
- INTEGER I, I1, I2, I3, IB, IINFO, IWS, LDWORK, LWKOPT,
+ INTEGER I, I1, I2, I3, IB, IINFO, IWT, LDWORK, LWKOPT,
$ MI, NB, NBMIN, NI, NQ, NW
* ..
-* .. Local Arrays ..
- COMPLEX*16 T( LDT, NBMAX )
-* ..
* .. External Functions ..
LOGICAL LSAME
INTEGER ILAENV
@@ -239,25 +235,22 @@
INFO = -7
ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
INFO = -10
+ ELSE IF( LWORK.LT.NW .AND. .NOT.LQUERY ) THEN
+ INFO = -12
END IF
*
IF( INFO.EQ.0 ) THEN
+*
+* Compute the workspace requirements
+*
IF( M.EQ.0 .OR. N.EQ.0 ) THEN
LWKOPT = 1
ELSE
-*
-* Determine the block size. NB may be at most NBMAX, where
-* NBMAX is used to define the local array T.
-*
NB = MIN( NBMAX, ILAENV( 1, 'ZUNMQL', SIDE // TRANS, M, N,
$ K, -1 ) )
- LWKOPT = NW*NB
+ LWKOPT = NW*NB + TSIZE
END IF
WORK( 1 ) = LWKOPT
-*
- IF( LWORK.LT.NW .AND. .NOT.LQUERY ) THEN
- INFO = -12
- END IF
END IF
*
IF( INFO.NE.0 ) THEN
@@ -276,14 +269,11 @@
NBMIN = 2
LDWORK = NW
IF( NB.GT.1 .AND. NB.LT.K ) THEN
- IWS = NW*NB
- IF( LWORK.LT.IWS ) THEN
- NB = LWORK / LDWORK
+ IF( LWORK.LT.NW*NB+TSIZE ) THEN
+ NB = (LWORK-TSIZE) / LDWORK
NBMIN = MAX( 2, ILAENV( 2, 'ZUNMQL', SIDE // TRANS, M, N, K,
$ -1 ) )
END IF
- ELSE
- IWS = NW
END IF
*
IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN
@@ -296,6 +286,7 @@
*
* Use blocked code
*
+ IWT = 1 + NW*NB
IF( ( LEFT .AND. NOTRAN ) .OR.
$ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN
I1 = 1
@@ -320,7 +311,7 @@
* H = H(i+ib-1) . . . H(i+1) H(i)
*
CALL ZLARFT( 'Backward', 'Columnwise', NQ-K+I+IB-1, IB,
- $ A( 1, I ), LDA, TAU( I ), T, LDT )
+ $ A( 1, I ), LDA, TAU( I ), WORK( IWT ), LDT )
IF( LEFT ) THEN
*
* H or H**H is applied to C(1:m-k+i+ib-1,1:n)
@@ -336,8 +327,8 @@
* Apply H or H**H
*
CALL ZLARFB( SIDE, TRANS, 'Backward', 'Columnwise', MI, NI,
- $ IB, A( 1, I ), LDA, T, LDT, C, LDC, WORK,
- $ LDWORK )
+ $ IB, A( 1, I ), LDA, WORK( IWT ), LDT, C, LDC,
+ $ WORK, LDWORK )
10 CONTINUE
END IF
WORK( 1 ) = LWKOPT
diff --git a/SRC/zunmqr.f b/SRC/zunmqr.f
index e20ccf83..8a755aaf 100644
--- a/SRC/zunmqr.f
+++ b/SRC/zunmqr.f
@@ -136,9 +136,7 @@
*> The dimension of the array WORK.
*> If SIDE = 'L', LWORK >= max(1,N);
*> if SIDE = 'R', LWORK >= max(1,M).
-*> For optimum performance LWORK >= N*NB if SIDE = 'L', and
-*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal
-*> blocksize.
+*> For good performance, LWORK should generally be larger.
*>
*> If LWORK = -1, then a workspace query is assumed; the routine
*> only calculates the optimal size of the WORK array, returns
@@ -185,17 +183,15 @@
* =====================================================================
*
* .. Parameters ..
- INTEGER NBMAX, LDT
- PARAMETER ( NBMAX = 64, LDT = NBMAX+1 )
+ INTEGER NBMAX, LDT, TSIZE
+ PARAMETER ( NBMAX = 64, LDT = NBMAX+1,
+ $ TSIZE = LDT*NBMAX )
* ..
* .. Local Scalars ..
LOGICAL LEFT, LQUERY, NOTRAN
- INTEGER I, I1, I2, I3, IB, IC, IINFO, IWS, JC, LDWORK,
+ INTEGER I, I1, I2, I3, IB, IC, IINFO, IWT, JC, LDWORK,
$ LWKOPT, MI, NB, NBMIN, NI, NQ, NW
* ..
-* .. Local Arrays ..
- COMPLEX*16 T( LDT, NBMAX )
-* ..
* .. External Functions ..
LOGICAL LSAME
INTEGER ILAENV
@@ -245,12 +241,11 @@
*
IF( INFO.EQ.0 ) THEN
*
-* Determine the block size. NB may be at most NBMAX, where NBMAX
-* is used to define the local array T.
+* Compute the workspace requirements
*
NB = MIN( NBMAX, ILAENV( 1, 'ZUNMQR', SIDE // TRANS, M, N, K,
$ -1 ) )
- LWKOPT = MAX( 1, NW )*NB
+ LWKOPT = MAX( 1, NW )*NB + TSIZE
WORK( 1 ) = LWKOPT
END IF
*
@@ -271,14 +266,11 @@
NBMIN = 2
LDWORK = NW
IF( NB.GT.1 .AND. NB.LT.K ) THEN
- IWS = NW*NB
- IF( LWORK.LT.IWS ) THEN
- NB = LWORK / LDWORK
+ IF( LWORK.LT.NW*NB+TSIZE ) THEN
+ NB = (LWORK-TSIZE) / LDWORK
NBMIN = MAX( 2, ILAENV( 2, 'ZUNMQR', SIDE // TRANS, M, N, K,
$ -1 ) )
END IF
- ELSE
- IWS = NW
END IF
*
IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN
@@ -291,6 +283,7 @@
*
* Use blocked code
*
+ IWT = 1 + NW*NB
IF( ( LEFT .AND. .NOT.NOTRAN ) .OR.
$ ( .NOT.LEFT .AND. NOTRAN ) ) THEN
I1 = 1
@@ -317,7 +310,7 @@
* H = H(i) H(i+1) . . . H(i+ib-1)
*
CALL ZLARFT( 'Forward', 'Columnwise', NQ-I+1, IB, A( I, I ),
- $ LDA, TAU( I ), T, LDT )
+ $ LDA, TAU( I ), WORK( IWT ), LDT )
IF( LEFT ) THEN
*
* H or H**H is applied to C(i:m,1:n)
@@ -335,8 +328,8 @@
* Apply H or H**H
*
CALL ZLARFB( SIDE, TRANS, 'Forward', 'Columnwise', MI, NI,
- $ IB, A( I, I ), LDA, T, LDT, C( IC, JC ), LDC,
- $ WORK, LDWORK )
+ $ IB, A( I, I ), LDA, WORK( IWT ), LDT,
+ $ C( IC, JC ), LDC, WORK, LDWORK )
10 CONTINUE
END IF
WORK( 1 ) = LWKOPT
diff --git a/SRC/zunmrq.f b/SRC/zunmrq.f
index d8b39314..cddec191 100644
--- a/SRC/zunmrq.f
+++ b/SRC/zunmrq.f
@@ -136,9 +136,7 @@
*> The dimension of the array WORK.
*> If SIDE = 'L', LWORK >= max(1,N);
*> if SIDE = 'R', LWORK >= max(1,M).
-*> For optimum performance LWORK >= N*NB if SIDE = 'L', and
-*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal
-*> blocksize.
+*> For good performance, LWORK should generally be larger.
*>
*> If LWORK = -1, then a workspace query is assumed; the routine
*> only calculates the optimal size of the WORK array, returns
@@ -185,18 +183,16 @@
* =====================================================================
*
* .. Parameters ..
- INTEGER NBMAX, LDT
- PARAMETER ( NBMAX = 64, LDT = NBMAX+1 )
+ INTEGER NBMAX, LDT, TSIZE
+ PARAMETER ( NBMAX = 64, LDT = NBMAX+1,
+ $ TSIZE = LDT*NBMAX )
* ..
* .. Local Scalars ..
LOGICAL LEFT, LQUERY, NOTRAN
CHARACTER TRANST
- INTEGER I, I1, I2, I3, IB, IINFO, IWS, LDWORK, LWKOPT,
+ INTEGER I, I1, I2, I3, IB, IINFO, IWT, LDWORK, LWKOPT,
$ MI, NB, NBMIN, NI, NQ, NW
* ..
-* .. Local Arrays ..
- COMPLEX*16 T( LDT, NBMAX )
-* ..
* .. External Functions ..
LOGICAL LSAME
INTEGER ILAENV
@@ -240,25 +236,22 @@
INFO = -7
ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
INFO = -10
+ ELSE IF( LWORK.LT.NW .AND. .NOT.LQUERY ) THEN
+ INFO = -12
END IF
*
IF( INFO.EQ.0 ) THEN
+*
+* Compute the workspace requirements
+*
IF( M.EQ.0 .OR. N.EQ.0 ) THEN
LWKOPT = 1
ELSE
-*
-* Determine the block size. NB may be at most NBMAX, where
-* NBMAX is used to define the local array T.
-*
NB = MIN( NBMAX, ILAENV( 1, 'ZUNMRQ', SIDE // TRANS, M, N,
$ K, -1 ) )
- LWKOPT = NW*NB
+ LWKOPT = NW*NB + TSIZE
END IF
WORK( 1 ) = LWKOPT
-*
- IF( LWORK.LT.NW .AND. .NOT.LQUERY ) THEN
- INFO = -12
- END IF
END IF
*
IF( INFO.NE.0 ) THEN
@@ -277,14 +270,11 @@
NBMIN = 2
LDWORK = NW
IF( NB.GT.1 .AND. NB.LT.K ) THEN
- IWS = NW*NB
- IF( LWORK.LT.IWS ) THEN
- NB = LWORK / LDWORK
+ IF( LWORK.LT.NW*NB+TSIZE ) THEN
+ NB = (LWORK-TSIZE) / LDWORK
NBMIN = MAX( 2, ILAENV( 2, 'ZUNMRQ', SIDE // TRANS, M, N, K,
- $ -1 ) )
+ $ -1 ) )
END IF
- ELSE
- IWS = NW
END IF
*
IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN
@@ -297,6 +287,7 @@
*
* Use blocked code
*
+ IWT = 1 + NW*NB
IF( ( LEFT .AND. .NOT.NOTRAN ) .OR.
$ ( .NOT.LEFT .AND. NOTRAN ) ) THEN
I1 = 1
@@ -327,7 +318,7 @@
* H = H(i+ib-1) . . . H(i+1) H(i)
*
CALL ZLARFT( 'Backward', 'Rowwise', NQ-K+I+IB-1, IB,
- $ A( I, 1 ), LDA, TAU( I ), T, LDT )
+ $ A( I, 1 ), LDA, TAU( I ), WORK( IWT ), LDT )
IF( LEFT ) THEN
*
* H or H**H is applied to C(1:m-k+i+ib-1,1:n)
@@ -343,8 +334,8 @@
* Apply H or H**H
*
CALL ZLARFB( SIDE, TRANST, 'Backward', 'Rowwise', MI, NI,
- $ IB, A( I, 1 ), LDA, T, LDT, C, LDC, WORK,
- $ LDWORK )
+ $ IB, A( I, 1 ), LDA, WORK( IWT ), LDT, C, LDC,
+ $ WORK, LDWORK )
10 CONTINUE
END IF
WORK( 1 ) = LWKOPT
diff --git a/SRC/zunmrz.f b/SRC/zunmrz.f
index 48e5d275..ab26ad08 100644
--- a/SRC/zunmrz.f
+++ b/SRC/zunmrz.f
@@ -145,9 +145,7 @@
*> The dimension of the array WORK.
*> If SIDE = 'L', LWORK >= max(1,N);
*> if SIDE = 'R', LWORK >= max(1,M).
-*> For optimum performance LWORK >= N*NB if SIDE = 'L', and
-*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal
-*> blocksize.
+*> For good performance, LWORK should generally be larger.
*>
*> If LWORK = -1, then a workspace query is assumed; the routine
*> only calculates the optimal size of the WORK array, returns
@@ -205,18 +203,16 @@
* =====================================================================
*
* .. Parameters ..
- INTEGER NBMAX, LDT
- PARAMETER ( NBMAX = 64, LDT = NBMAX+1 )
+ INTEGER NBMAX, LDT, TSIZE
+ PARAMETER ( NBMAX = 64, LDT = NBMAX+1,
+ $ TSIZE = LDT*NBMAX )
* ..
* .. Local Scalars ..
LOGICAL LEFT, LQUERY, NOTRAN
CHARACTER TRANST
- INTEGER I, I1, I2, I3, IB, IC, IINFO, IWS, JA, JC,
+ INTEGER I, I1, I2, I3, IB, IC, IINFO, IWT, JA, JC,
$ LDWORK, LWKOPT, MI, NB, NBMIN, NI, NQ, NW
* ..
-* .. Local Arrays ..
- COMPLEX*16 T( LDT, NBMAX )
-* ..
* .. External Functions ..
LOGICAL LSAME
INTEGER ILAENV
@@ -263,25 +259,22 @@
INFO = -8
ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
INFO = -11
+ ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN
+ INFO = -13
END IF
*
IF( INFO.EQ.0 ) THEN
+*
+* Compute the workspace requirements
+*
IF( M.EQ.0 .OR. N.EQ.0 ) THEN
LWKOPT = 1
ELSE
-*
-* Determine the block size. NB may be at most NBMAX, where
-* NBMAX is used to define the local array T.
-*
NB = MIN( NBMAX, ILAENV( 1, 'ZUNMRQ', SIDE // TRANS, M, N,
$ K, -1 ) )
- LWKOPT = NW*NB
+ LWKOPT = NW*NB + TSIZE
END IF
WORK( 1 ) = LWKOPT
-*
- IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN
- INFO = -13
- END IF
END IF
*
IF( INFO.NE.0 ) THEN
@@ -305,14 +298,11 @@
NBMIN = 2
LDWORK = NW
IF( NB.GT.1 .AND. NB.LT.K ) THEN
- IWS = NW*NB
- IF( LWORK.LT.IWS ) THEN
- NB = LWORK / LDWORK
+ IF( LWORK.LT.NW*NB+TSIZE ) THEN
+ NB = (LWORK-TSIZE) / LDWORK
NBMIN = MAX( 2, ILAENV( 2, 'ZUNMRQ', SIDE // TRANS, M, N, K,
$ -1 ) )
END IF
- ELSE
- IWS = NW
END IF
*
IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN
@@ -325,6 +315,7 @@
*
* Use blocked code
*
+ IWT = 1 + NW*NB
IF( ( LEFT .AND. .NOT.NOTRAN ) .OR.
$ ( .NOT.LEFT .AND. NOTRAN ) ) THEN
I1 = 1
@@ -359,7 +350,7 @@
* H = H(i+ib-1) . . . H(i+1) H(i)
*
CALL ZLARZT( 'Backward', 'Rowwise', L, IB, A( I, JA ), LDA,
- $ TAU( I ), T, LDT )
+ $ TAU( I ), WORK( IWT ), LDT )
*
IF( LEFT ) THEN
*
@@ -378,8 +369,8 @@
* Apply H or H**H
*
CALL ZLARZB( SIDE, TRANST, 'Backward', 'Rowwise', MI, NI,
- $ IB, L, A( I, JA ), LDA, T, LDT, C( IC, JC ),
- $ LDC, WORK, LDWORK )
+ $ IB, L, A( I, JA ), LDA, WORK( IWT ), LDT,
+ $ C( IC, JC ), LDC, WORK, LDWORK )
10 CONTINUE
*
END IF