summaryrefslogtreecommitdiff
path: root/SRC/dormql.f
diff options
context:
space:
mode:
authorphilippe.theveny <philippe.theveny@8a072113-8704-0410-8d35-dd094bca7971>2015-05-14 18:50:57 +0000
committerphilippe.theveny <philippe.theveny@8a072113-8704-0410-8d35-dd094bca7971>2015-05-14 18:50:57 +0000
commit34420f19e306e8fe09664d3bebce237d5f2ab92f (patch)
tree91f3e1d80220648d207ed4fd769c55f819fe3e4f /SRC/dormql.f
parent6eff56f7e1ac1d65b85726164a3fa5d4f5a7f1d5 (diff)
downloadlapack-34420f19e306e8fe09664d3bebce237d5f2ab92f.tar.gz
lapack-34420f19e306e8fe09664d3bebce237d5f2ab92f.tar.bz2
lapack-34420f19e306e8fe09664d3bebce237d5f2ab92f.zip
This partially fixes bug 061 reported by Victor Liu.
Some compilers allocate local arrays on the heap when their size is above a particular threshold. This leads to wrong results when multiple threads call the same routine. The bug fix consists in using a larger workspace, as proposed by Victor Liu (Tue Nov 13, 2012). Some routines still have large local arrays and cannot be fixed that way because they have no workspace parameter: xGBTRF, xBPTRF, and xHSEQR.
Diffstat (limited to 'SRC/dormql.f')
-rw-r--r--SRC/dormql.f43
1 files changed, 17 insertions, 26 deletions
diff --git a/SRC/dormql.f b/SRC/dormql.f
index 96c6f195..512f234e 100644
--- a/SRC/dormql.f
+++ b/SRC/dormql.f
@@ -136,9 +136,7 @@
*> The dimension of the array WORK.
*> If SIDE = 'L', LWORK >= max(1,N);
*> if SIDE = 'R', LWORK >= max(1,M).
-*> For optimum performance LWORK >= N*NB if SIDE = 'L', and
-*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal
-*> blocksize.
+*> For good performance, LWORK should generally be larger.
*>
*> If LWORK = -1, then a workspace query is assumed; the routine
*> only calculates the optimal size of the WORK array, returns
@@ -185,17 +183,15 @@
* =====================================================================
*
* .. Parameters ..
- INTEGER NBMAX, LDT
- PARAMETER ( NBMAX = 64, LDT = NBMAX+1 )
+ INTEGER NBMAX, LDT, TSIZE
+ PARAMETER ( NBMAX = 64, LDT = NBMAX+1,
+ $ TSIZE = LDT*NBMAX )
* ..
* .. Local Scalars ..
LOGICAL LEFT, LQUERY, NOTRAN
- INTEGER I, I1, I2, I3, IB, IINFO, IWS, LDWORK, LWKOPT,
+ INTEGER I, I1, I2, I3, IB, IINFO, IWT, LDWORK, LWKOPT,
$ MI, NB, NBMIN, NI, NQ, NW
* ..
-* .. Local Arrays ..
- DOUBLE PRECISION T( LDT, NBMAX )
-* ..
* .. External Functions ..
LOGICAL LSAME
INTEGER ILAENV
@@ -239,25 +235,22 @@
INFO = -7
ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
INFO = -10
+ ELSE IF( LWORK.LT.NW .AND. .NOT.LQUERY ) THEN
+ INFO = -12
END IF
*
IF( INFO.EQ.0 ) THEN
+*
+* Compute the workspace requirements
+*
IF( M.EQ.0 .OR. N.EQ.0 ) THEN
LWKOPT = 1
ELSE
-*
-* Determine the block size. NB may be at most NBMAX, where
-* NBMAX is used to define the local array T.
-*
NB = MIN( NBMAX, ILAENV( 1, 'DORMQL', SIDE // TRANS, M, N,
$ K, -1 ) )
- LWKOPT = NW*NB
+ LWKOPT = NW*NB + TSIZE
END IF
WORK( 1 ) = LWKOPT
-*
- IF( LWORK.LT.NW .AND. .NOT.LQUERY ) THEN
- INFO = -12
- END IF
END IF
*
IF( INFO.NE.0 ) THEN
@@ -276,14 +269,11 @@
NBMIN = 2
LDWORK = NW
IF( NB.GT.1 .AND. NB.LT.K ) THEN
- IWS = NW*NB
- IF( LWORK.LT.IWS ) THEN
- NB = LWORK / LDWORK
+ IF( LWORK.LT.NW*NB+TSIZE ) THEN
+ NB = (LWORK-TSIZE) / LDWORK
NBMIN = MAX( 2, ILAENV( 2, 'DORMQL', SIDE // TRANS, M, N, K,
$ -1 ) )
END IF
- ELSE
- IWS = NW
END IF
*
IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN
@@ -296,6 +286,7 @@
*
* Use blocked code
*
+ IWT = 1 + NW*NB
IF( ( LEFT .AND. NOTRAN ) .OR.
$ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN
I1 = 1
@@ -320,7 +311,7 @@
* H = H(i+ib-1) . . . H(i+1) H(i)
*
CALL DLARFT( 'Backward', 'Columnwise', NQ-K+I+IB-1, IB,
- $ A( 1, I ), LDA, TAU( I ), T, LDT )
+ $ A( 1, I ), LDA, TAU( I ), WORK( IWT ), LDT )
IF( LEFT ) THEN
*
* H or H**T is applied to C(1:m-k+i+ib-1,1:n)
@@ -336,8 +327,8 @@
* Apply H or H**T
*
CALL DLARFB( SIDE, TRANS, 'Backward', 'Columnwise', MI, NI,
- $ IB, A( 1, I ), LDA, T, LDT, C, LDC, WORK,
- $ LDWORK )
+ $ IB, A( 1, I ), LDA, WORK( IWT ), LDT, C, LDC,
+ $ WORK, LDWORK )
10 CONTINUE
END IF
WORK( 1 ) = LWKOPT