diff options
author | philippe.theveny <philippe.theveny@8a072113-8704-0410-8d35-dd094bca7971> | 2015-05-14 18:50:57 +0000 |
---|---|---|
committer | philippe.theveny <philippe.theveny@8a072113-8704-0410-8d35-dd094bca7971> | 2015-05-14 18:50:57 +0000 |
commit | 34420f19e306e8fe09664d3bebce237d5f2ab92f (patch) | |
tree | 91f3e1d80220648d207ed4fd769c55f819fe3e4f /SRC/cunmlq.f | |
parent | 6eff56f7e1ac1d65b85726164a3fa5d4f5a7f1d5 (diff) | |
download | lapack-34420f19e306e8fe09664d3bebce237d5f2ab92f.tar.gz lapack-34420f19e306e8fe09664d3bebce237d5f2ab92f.tar.bz2 lapack-34420f19e306e8fe09664d3bebce237d5f2ab92f.zip |
This partially fixes bug 061 reported by Victor Liu.
Some compilers allocate local arrays on the heap when their size is
above a particular threshold.
This leads to wrong results when multiple threads call the same routine.
The bug fix consists in using a larger workspace, as proposed by Victor Liu
(Tue Nov 13, 2012).
Some routines still have large local arrays and cannot be fixed that way
because they have no workspace parameter: xGBTRF, xBPTRF, and xHSEQR.
Diffstat (limited to 'SRC/cunmlq.f')
-rw-r--r-- | SRC/cunmlq.f | 48 |
1 files changed, 23 insertions, 25 deletions
diff --git a/SRC/cunmlq.f b/SRC/cunmlq.f index 7770a6d4..7a5e3718 100644 --- a/SRC/cunmlq.f +++ b/SRC/cunmlq.f @@ -137,9 +137,7 @@ *> The dimension of the array WORK. *> If SIDE = 'L', LWORK >= max(1,N); *> if SIDE = 'R', LWORK >= max(1,M). -*> For optimum performance LWORK >= N*NB if SIDE 'L', and -*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal -*> blocksize. +*> For good performance, LWORK should generally be larger. *> *> If LWORK = -1, then a workspace query is assumed; the routine *> only calculates the optimal size of the WORK array, returns @@ -187,18 +185,16 @@ * ===================================================================== * * .. Parameters .. - INTEGER NBMAX, LDT - PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) * .. * .. Local Scalars .. LOGICAL LEFT, LQUERY, NOTRAN CHARACTER TRANST - INTEGER I, I1, I2, I3, IB, IC, IINFO, IWS, JC, LDWORK, + INTEGER I, I1, I2, I3, IB, IC, IINFO, IWT, JC, LDWORK, $ LWKOPT, MI, NB, NBMIN, NI, NQ, NW * .. -* .. Local Arrays .. - COMPLEX T( LDT, NBMAX ) -* .. * .. External Functions .. LOGICAL LSAME INTEGER ILAENV @@ -247,13 +243,16 @@ END IF * IF( INFO.EQ.0 ) THEN -* -* Determine the block size. NB may be at most NBMAX, where NBMAX -* is used to define the local array T. -* - NB = MIN( NBMAX, ILAENV( 1, 'CUNMLQ', SIDE // TRANS, M, N, K, - $ -1 ) ) - LWKOPT = MAX( 1, NW )*NB +* +* Compute the workspace requirements +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN + LWKOPT = 1 + ELSE + NB = MIN( NBMAX, ILAENV( 1, 'CUNMLQ', SIDE // TRANS, M, N, + $ K, -1 ) ) + LWKOPT = MAX( 1, NW )*NB + TSIZE + END IF WORK( 1 ) = LWKOPT END IF * @@ -267,21 +266,19 @@ * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN - WORK( 1 ) = 1 RETURN END IF * +* Determine the block size +* NBMIN = 2 LDWORK = NW IF( NB.GT.1 .AND. NB.LT.K ) THEN - IWS = NW*NB - IF( LWORK.LT.IWS ) THEN - NB = LWORK / LDWORK + IF( LWORK.LT.NW*NB+TSIZE ) THEN + NB = (LWORK-TSIZE) / LDWORK NBMIN = MAX( 2, ILAENV( 2, 'CUNMLQ', SIDE // TRANS, M, N, K, $ -1 ) ) END IF - ELSE - IWS = NW END IF * IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN @@ -294,6 +291,7 @@ * * Use blocked code * + IWT = 1 + NW*NB IF( ( LEFT .AND. NOTRAN ) .OR. $ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN I1 = 1 @@ -326,7 +324,7 @@ * H = H(i) H(i+1) . . . H(i+ib-1) * CALL CLARFT( 'Forward', 'Rowwise', NQ-I+1, IB, A( I, I ), - $ LDA, TAU( I ), T, LDT ) + $ LDA, TAU( I ), WORK( IWT ), LDT ) IF( LEFT ) THEN * * H or H**H is applied to C(i:m,1:n) @@ -344,8 +342,8 @@ * Apply H or H**H * CALL CLARFB( SIDE, TRANST, 'Forward', 'Rowwise', MI, NI, IB, - $ A( I, I ), LDA, T, LDT, C( IC, JC ), LDC, WORK, - $ LDWORK ) + $ A( I, I ), LDA, WORK( IWT ), LDT, + $ C( IC, JC ), LDC, WORK, LDWORK ) 10 CONTINUE END IF WORK( 1 ) = LWKOPT |