summaryrefslogtreecommitdiff
path: root/SRC/cunmlq.f
diff options
context:
space:
mode:
authorphilippe.theveny <philippe.theveny@8a072113-8704-0410-8d35-dd094bca7971>2015-05-14 18:50:57 +0000
committerphilippe.theveny <philippe.theveny@8a072113-8704-0410-8d35-dd094bca7971>2015-05-14 18:50:57 +0000
commit34420f19e306e8fe09664d3bebce237d5f2ab92f (patch)
tree91f3e1d80220648d207ed4fd769c55f819fe3e4f /SRC/cunmlq.f
parent6eff56f7e1ac1d65b85726164a3fa5d4f5a7f1d5 (diff)
downloadlapack-34420f19e306e8fe09664d3bebce237d5f2ab92f.tar.gz
lapack-34420f19e306e8fe09664d3bebce237d5f2ab92f.tar.bz2
lapack-34420f19e306e8fe09664d3bebce237d5f2ab92f.zip
This partially fixes bug 061 reported by Victor Liu.
Some compilers allocate local arrays on the heap when their size is above a particular threshold. This leads to wrong results when multiple threads call the same routine. The bug fix consists in using a larger workspace, as proposed by Victor Liu (Tue Nov 13, 2012). Some routines still have large local arrays and cannot be fixed that way because they have no workspace parameter: xGBTRF, xBPTRF, and xHSEQR.
Diffstat (limited to 'SRC/cunmlq.f')
-rw-r--r--SRC/cunmlq.f48
1 files changed, 23 insertions, 25 deletions
diff --git a/SRC/cunmlq.f b/SRC/cunmlq.f
index 7770a6d4..7a5e3718 100644
--- a/SRC/cunmlq.f
+++ b/SRC/cunmlq.f
@@ -137,9 +137,7 @@
*> The dimension of the array WORK.
*> If SIDE = 'L', LWORK >= max(1,N);
*> if SIDE = 'R', LWORK >= max(1,M).
-*> For optimum performance LWORK >= N*NB if SIDE 'L', and
-*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal
-*> blocksize.
+*> For good performance, LWORK should generally be larger.
*>
*> If LWORK = -1, then a workspace query is assumed; the routine
*> only calculates the optimal size of the WORK array, returns
@@ -187,18 +185,16 @@
* =====================================================================
*
* .. Parameters ..
- INTEGER NBMAX, LDT
- PARAMETER ( NBMAX = 64, LDT = NBMAX+1 )
+ INTEGER NBMAX, LDT, TSIZE
+ PARAMETER ( NBMAX = 64, LDT = NBMAX+1,
+ $ TSIZE = LDT*NBMAX )
* ..
* .. Local Scalars ..
LOGICAL LEFT, LQUERY, NOTRAN
CHARACTER TRANST
- INTEGER I, I1, I2, I3, IB, IC, IINFO, IWS, JC, LDWORK,
+ INTEGER I, I1, I2, I3, IB, IC, IINFO, IWT, JC, LDWORK,
$ LWKOPT, MI, NB, NBMIN, NI, NQ, NW
* ..
-* .. Local Arrays ..
- COMPLEX T( LDT, NBMAX )
-* ..
* .. External Functions ..
LOGICAL LSAME
INTEGER ILAENV
@@ -247,13 +243,16 @@
END IF
*
IF( INFO.EQ.0 ) THEN
-*
-* Determine the block size. NB may be at most NBMAX, where NBMAX
-* is used to define the local array T.
-*
- NB = MIN( NBMAX, ILAENV( 1, 'CUNMLQ', SIDE // TRANS, M, N, K,
- $ -1 ) )
- LWKOPT = MAX( 1, NW )*NB
+*
+* Compute the workspace requirements
+*
+ IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN
+ LWKOPT = 1
+ ELSE
+ NB = MIN( NBMAX, ILAENV( 1, 'CUNMLQ', SIDE // TRANS, M, N,
+ $ K, -1 ) )
+ LWKOPT = MAX( 1, NW )*NB + TSIZE
+ END IF
WORK( 1 ) = LWKOPT
END IF
*
@@ -267,21 +266,19 @@
* Quick return if possible
*
IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN
- WORK( 1 ) = 1
RETURN
END IF
*
+* Determine the block size
+*
NBMIN = 2
LDWORK = NW
IF( NB.GT.1 .AND. NB.LT.K ) THEN
- IWS = NW*NB
- IF( LWORK.LT.IWS ) THEN
- NB = LWORK / LDWORK
+ IF( LWORK.LT.NW*NB+TSIZE ) THEN
+ NB = (LWORK-TSIZE) / LDWORK
NBMIN = MAX( 2, ILAENV( 2, 'CUNMLQ', SIDE // TRANS, M, N, K,
$ -1 ) )
END IF
- ELSE
- IWS = NW
END IF
*
IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN
@@ -294,6 +291,7 @@
*
* Use blocked code
*
+ IWT = 1 + NW*NB
IF( ( LEFT .AND. NOTRAN ) .OR.
$ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN
I1 = 1
@@ -326,7 +324,7 @@
* H = H(i) H(i+1) . . . H(i+ib-1)
*
CALL CLARFT( 'Forward', 'Rowwise', NQ-I+1, IB, A( I, I ),
- $ LDA, TAU( I ), T, LDT )
+ $ LDA, TAU( I ), WORK( IWT ), LDT )
IF( LEFT ) THEN
*
* H or H**H is applied to C(i:m,1:n)
@@ -344,8 +342,8 @@
* Apply H or H**H
*
CALL CLARFB( SIDE, TRANST, 'Forward', 'Rowwise', MI, NI, IB,
- $ A( I, I ), LDA, T, LDT, C( IC, JC ), LDC, WORK,
- $ LDWORK )
+ $ A( I, I ), LDA, WORK( IWT ), LDT,
+ $ C( IC, JC ), LDC, WORK, LDWORK )
10 CONTINUE
END IF
WORK( 1 ) = LWKOPT