diff options
author | philippe.theveny <philippe.theveny@8a072113-8704-0410-8d35-dd094bca7971> | 2015-05-14 18:50:57 +0000 |
---|---|---|
committer | philippe.theveny <philippe.theveny@8a072113-8704-0410-8d35-dd094bca7971> | 2015-05-14 18:50:57 +0000 |
commit | 34420f19e306e8fe09664d3bebce237d5f2ab92f (patch) | |
tree | 91f3e1d80220648d207ed4fd769c55f819fe3e4f /SRC/cgehrd.f | |
parent | 6eff56f7e1ac1d65b85726164a3fa5d4f5a7f1d5 (diff) | |
download | lapack-34420f19e306e8fe09664d3bebce237d5f2ab92f.tar.gz lapack-34420f19e306e8fe09664d3bebce237d5f2ab92f.tar.bz2 lapack-34420f19e306e8fe09664d3bebce237d5f2ab92f.zip |
This partially fixes bug 061 reported by Victor Liu.
Some compilers allocate local arrays on the heap when their size is
above a particular threshold.
This leads to wrong results when multiple threads call the same routine.
The bug fix consists in using a larger workspace, as proposed by Victor Liu
(Tue Nov 13, 2012).
Some routines still have large local arrays and cannot be fixed that way
because they have no workspace parameter: xGBTRF, xBPTRF, and xHSEQR.
Diffstat (limited to 'SRC/cgehrd.f')
-rw-r--r-- | SRC/cgehrd.f | 46 |
1 files changed, 25 insertions, 21 deletions
diff --git a/SRC/cgehrd.f b/SRC/cgehrd.f index b590b3c1..4a6e20dd 100644 --- a/SRC/cgehrd.f +++ b/SRC/cgehrd.f @@ -97,8 +97,7 @@ *> \verbatim *> LWORK is INTEGER *> The length of the array WORK. LWORK >= max(1,N). -*> For optimum performance LWORK >= N*NB, where NB is the -*> optimal blocksize. +*> For good performance, LWORK should generally be larger. *> *> If LWORK = -1, then a workspace query is assumed; the routine *> only calculates the optimal size of the WORK array, returns @@ -183,21 +182,19 @@ * ===================================================================== * * .. Parameters .. - INTEGER NBMAX, LDT - PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) COMPLEX ZERO, ONE PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), $ ONE = ( 1.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. LOGICAL LQUERY - INTEGER I, IB, IINFO, IWS, J, LDWORK, LWKOPT, NB, + INTEGER I, IB, IINFO, IWT, J, LDWORK, LWKOPT, NB, $ NBMIN, NH, NX COMPLEX EI * .. -* .. Local Arrays .. - COMPLEX T( LDT, NBMAX ) -* .. * .. External Subroutines .. EXTERNAL CAXPY, CGEHD2, CGEMM, CLAHR2, CLARFB, CTRMM, $ XERBLA @@ -214,9 +211,6 @@ * Test the input parameters * INFO = 0 - NB = MIN( NBMAX, ILAENV( 1, 'CGEHRD', ' ', N, ILO, IHI, -1 ) ) - LWKOPT = N*NB - WORK( 1 ) = LWKOPT LQUERY = ( LWORK.EQ.-1 ) IF( N.LT.0 ) THEN INFO = -1 @@ -229,6 +223,16 @@ ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN INFO = -8 END IF +* + IF( INFO.EQ.0 ) THEN +* +* Compute the workspace requirements +* + NB = MIN( NBMAX, ILAENV( 1, 'CGEHRD', ' ', N, ILO, IHI, -1 ) ) + LWKOPT = N*NB + TSIZE + WORK( 1 ) = LWKOPT + END IF +* IF( INFO.NE.0 ) THEN CALL XERBLA( 'CGEHRD', -INFO ) RETURN @@ -257,7 +261,6 @@ * NB = MIN( NBMAX, ILAENV( 1, 'CGEHRD', ' ', N, ILO, IHI, -1 ) ) NBMIN = 2 - IWS = 1 IF( NB.GT.1 .AND. NB.LT.NH ) THEN * * Determine when to cross over from blocked to unblocked code @@ -268,8 +271,7 @@ * * Determine if workspace is large enough for blocked code * - IWS = N*NB - IF( LWORK.LT.IWS ) THEN + IF( LWORK.LT.N*NB+TSIZE ) THEN * * Not enough workspace to use optimal NB: determine the * minimum value of NB, and reduce NB or force use of @@ -277,8 +279,8 @@ * NBMIN = MAX( 2, ILAENV( 2, 'CGEHRD', ' ', N, ILO, IHI, $ -1 ) ) - IF( LWORK.GE.N*NBMIN ) THEN - NB = LWORK / N + IF( LWORK.GE.(N*NBMIN+TSIZE) ) THEN + NB = (LWORK-TSIZE) / N ELSE NB = 1 END IF @@ -297,6 +299,7 @@ * * Use blocked code * + IWT = 1 + N*NB DO 40 I = ILO, IHI - 1 - NX, NB IB = MIN( NB, IHI-I ) * @@ -304,8 +307,8 @@ * matrices V and T of the block reflector H = I - V*T*V**H * which performs the reduction, and also the matrix Y = A*V*T * - CALL CLAHR2( IHI, I, IB, A( 1, I ), LDA, TAU( I ), T, LDT, - $ WORK, LDWORK ) + CALL CLAHR2( IHI, I, IB, A( 1, I ), LDA, TAU( I ), + $ WORK( IWT ), LDT, WORK, LDWORK ) * * Apply the block reflector H to A(1:ihi,i+ib:ihi) from the * right, computing A := A - Y * V**H. V(i+ib,ib-1) must be set @@ -335,15 +338,16 @@ * CALL CLARFB( 'Left', 'Conjugate transpose', 'Forward', $ 'Columnwise', - $ IHI-I, N-I-IB+1, IB, A( I+1, I ), LDA, T, LDT, - $ A( I+1, I+IB ), LDA, WORK, LDWORK ) + $ IHI-I, N-I-IB+1, IB, A( I+1, I ), LDA, + $ WORK( IWT ), LDT, A( I+1, I+IB ), LDA, + $ WORK, LDWORK ) 40 CONTINUE END IF * * Use unblocked code to reduce the rest of the matrix * CALL CGEHD2( N, I, IHI, A, LDA, TAU, WORK, IINFO ) - WORK( 1 ) = IWS + WORK( 1 ) = LWKOPT * RETURN * |