diff options
author | Zhang Xianyi <traits.zhang@gmail.com> | 2013-11-07 01:08:39 +0800 |
---|---|---|
committer | Zhang Xianyi <traits.zhang@gmail.com> | 2013-11-07 01:08:39 +0800 |
commit | 73770e60b898cabc6d0d6ff10f99aba9baa56f9b (patch) | |
tree | 0ce897756e6dc5817045e65c0cecaf6fe7a26ff8 /lapack | |
parent | 6d9d70c55c5755d3ca666d92f626539c23063dd6 (diff) | |
download | openblas-73770e60b898cabc6d0d6ff10f99aba9baa56f9b.tar.gz openblas-73770e60b898cabc6d0d6ff10f99aba9baa56f9b.tar.bz2 openblas-73770e60b898cabc6d0d6ff10f99aba9baa56f9b.zip |
Refs #309. Fixed trtri_U single thread computational bug.
Diffstat (limited to 'lapack')
-rw-r--r-- | lapack/trtri/Makefile | 4 | ||||
-rw-r--r-- | lapack/trtri/dtrtri_lapack.f | 242 | ||||
-rw-r--r-- | lapack/trtri/trtri_U_single.c | 19 |
3 files changed, 15 insertions, 250 deletions
diff --git a/lapack/trtri/Makefile b/lapack/trtri/Makefile index 10d3cb7fd..626c47bbf 100644 --- a/lapack/trtri/Makefile +++ b/lapack/trtri/Makefile @@ -13,7 +13,6 @@ ZBLASOBJS = ztrtri_UU_single.$(SUFFIX) ztrtri_UN_single.$(SUFFIX) ztrtri_LU_sing XBLASOBJS = xtrtri_UU_single.$(SUFFIX) xtrtri_UN_single.$(SUFFIX) xtrtri_LU_single.$(SUFFIX) xtrtri_LN_single.$(SUFFIX) -DBLASOBJS += dtrtri_lapack.$(SUFFIX) ifdef SMP SBLASOBJS += strtri_UU_parallel.$(SUFFIX) strtri_UN_parallel.$(SUFFIX) strtri_LU_parallel.$(SUFFIX) strtri_LN_parallel.$(SUFFIX) @@ -54,9 +53,6 @@ dtrtri_UU_single.$(SUFFIX) : trtri_U_single.c dtrtri_UN_single.$(SUFFIX) : trtri_U_single.c $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -UUNIT $< -o $(@F) -dtrtri_lapack.$(SUFFIX) : dtrtri_lapack.f - $(FC) -c $(FFLAGS) -UCOMPLEX -DDOUBLE -DUNIT $< -o $(@F) - dtrtri_LU_single.$(SUFFIX) : trtri_L_single.c $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DUNIT $< -o $(@F) diff --git a/lapack/trtri/dtrtri_lapack.f b/lapack/trtri/dtrtri_lapack.f deleted file mode 100644 index 8e9a08170..000000000 --- a/lapack/trtri/dtrtri_lapack.f +++ /dev/null @@ -1,242 +0,0 @@ -*> \brief \b DTRTRI -* -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -*> \htmlonly -*> Download DTRTRI + dependencies -*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dtrtri.f"> -*> [TGZ]</a> -*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dtrtri.f"> -*> [ZIP]</a> -*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dtrtri.f"> -*> [TXT]</a> -*> \endhtmlonly -* -* Definition: -* =========== -* -* SUBROUTINE DTRTRI( UPLO, DIAG, N, A, LDA, INFO ) -* -* .. Scalar Arguments .. -* CHARACTER DIAG, UPLO -* INTEGER INFO, LDA, N -* .. -* .. Array Arguments .. -* DOUBLE PRECISION A( LDA, * ) -* .. -* -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> -*> DTRTRI computes the inverse of a real upper or lower triangular -*> matrix A. -*> -*> This is the Level 3 BLAS version of the algorithm. -*> \endverbatim -* -* Arguments: -* ========== -* -*> \param[in] UPLO -*> \verbatim -*> UPLO is CHARACTER*1 -*> = 'U': A is upper triangular; -*> = 'L': A is lower triangular. -*> \endverbatim -*> -*> \param[in] DIAG -*> \verbatim -*> DIAG is CHARACTER*1 -*> = 'N': A is non-unit triangular; -*> = 'U': A is unit triangular. -*> \endverbatim -*> -*> \param[in] N -*> \verbatim -*> N is INTEGER -*> The order of the matrix A. N >= 0. -*> \endverbatim -*> -*> \param[in,out] A -*> \verbatim -*> A is DOUBLE PRECISION array, dimension (LDA,N) -*> On entry, the triangular matrix A. If UPLO = 'U', the -*> leading N-by-N upper triangular part of the array A contains -*> the upper triangular matrix, and the strictly lower -*> triangular part of A is not referenced. If UPLO = 'L', the -*> leading N-by-N lower triangular part of the array A contains -*> the lower triangular matrix, and the strictly upper -*> triangular part of A is not referenced. If DIAG = 'U', the -*> diagonal elements of A are also not referenced and are -*> assumed to be 1. -*> On exit, the (triangular) inverse of the original matrix, in -*> the same storage format. -*> \endverbatim -*> -*> \param[in] LDA -*> \verbatim -*> LDA is INTEGER -*> The leading dimension of the array A. LDA >= max(1,N). -*> \endverbatim -*> -*> \param[out] INFO -*> \verbatim -*> INFO is INTEGER -*> = 0: successful exit -*> < 0: if INFO = -i, the i-th argument had an illegal value -*> > 0: if INFO = i, A(i,i) is exactly zero. The triangular -*> matrix is singular and its inverse can not be computed. -*> \endverbatim -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date November 2011 -* -*> \ingroup doubleOTHERcomputational -* -* ===================================================================== - SUBROUTINE DTRTRILAPACK( UPLO, DIAG, N, A, LDA, INFO ) -* -* -- LAPACK computational routine (version 3.4.0) -- -* -- LAPACK is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* November 2011 -* -* .. Scalar Arguments .. - CHARACTER DIAG, UPLO - INTEGER INFO, LDA, N -* .. -* .. Array Arguments .. - DOUBLE PRECISION A( LDA, * ) -* .. -* -* ===================================================================== -* -* .. Parameters .. - DOUBLE PRECISION ONE, ZERO - PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) -* .. -* .. Local Scalars .. - LOGICAL NOUNIT, UPPER - INTEGER J, JB, NB, NN -* .. -* .. External Functions .. - LOGICAL LSAME - INTEGER ILAENV - EXTERNAL LSAME, ILAENV -* .. -* .. External Subroutines .. - EXTERNAL DTRMM, DTRSM, DTRTI2, XERBLA -* .. -* .. Intrinsic Functions .. - INTRINSIC MAX, MIN -* .. -* .. Executable Statements .. -* -* Test the input parameters. -* - INFO = 0 - UPPER = LSAME( UPLO, 'U' ) - NOUNIT = LSAME( DIAG, 'N' ) - IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN - INFO = -1 - ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN - INFO = -2 - ELSE IF( N.LT.0 ) THEN - INFO = -3 - ELSE IF( LDA.LT.MAX( 1, N ) ) THEN - INFO = -5 - END IF - IF( INFO.NE.0 ) THEN - CALL XERBLA( 'DTRTRI', -INFO ) - RETURN - END IF -* -* Quick return if possible -* - IF( N.EQ.0 ) - $ RETURN -* -* Check for singularity if non-unit. -* - IF( NOUNIT ) THEN - DO 10 INFO = 1, N - IF( A( INFO, INFO ).EQ.ZERO ) - $ RETURN - 10 CONTINUE - INFO = 0 - END IF -* -* Determine the block size for this environment. -* - NB = ILAENV( 1, 'DTRTRI', UPLO // DIAG, N, -1, -1, -1 ) - IF( NB.LE.1 .OR. NB.GE.N ) THEN -* -* Use unblocked code -* - CALL DTRTI2( UPLO, DIAG, N, A, LDA, INFO ) - ELSE -* -* Use blocked code -* - IF( UPPER ) THEN -* -* Compute inverse of upper triangular matrix -* - DO 20 J = 1, N, NB - JB = MIN( NB, N-J+1 ) -* -* Compute rows 1:j-1 of current block column -* - CALL DTRMM( 'Left', 'Upper', 'No transpose', DIAG, J-1, - $ JB, ONE, A, LDA, A( 1, J ), LDA ) - CALL DTRSM( 'Right', 'Upper', 'No transpose', DIAG, J-1, - $ JB, -ONE, A( J, J ), LDA, A( 1, J ), LDA ) -* -* Compute inverse of current diagonal block -* - CALL DTRTI2( 'Upper', DIAG, JB, A( J, J ), LDA, INFO ) - 20 CONTINUE - ELSE -* -* Compute inverse of lower triangular matrix -* - NN = ( ( N-1 ) / NB )*NB + 1 - DO 30 J = NN, 1, -NB - JB = MIN( NB, N-J+1 ) - IF( J+JB.LE.N ) THEN -* -* Compute rows j+jb:n of current block column -* - CALL DTRMM( 'Left', 'Lower', 'No transpose', DIAG, - $ N-J-JB+1, JB, ONE, A( J+JB, J+JB ), LDA, - $ A( J+JB, J ), LDA ) - CALL DTRSM( 'Right', 'Lower', 'No transpose', DIAG, - $ N-J-JB+1, JB, -ONE, A( J, J ), LDA, - $ A( J+JB, J ), LDA ) - END IF -* -* Compute inverse of current diagonal block -* - CALL DTRTI2( 'Lower', DIAG, JB, A( J, J ), LDA, INFO ) - 30 CONTINUE - END IF - END IF -* - RETURN -* -* End of DTRTRI -* - END diff --git a/lapack/trtri/trtri_U_single.c b/lapack/trtri/trtri_U_single.c index 72133d896..c79281cfb 100644 --- a/lapack/trtri/trtri_U_single.c +++ b/lapack/trtri/trtri_U_single.c @@ -127,8 +127,14 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, if (min_i > GEMM_P) min_i = GEMM_P; if (ls == i + bk) { - NEG_TCOPY (bk, min_i, a + (is + i * lda) * COMPSIZE, lda, sa); - + //NEG_TCOPY (bk, min_i, a + (is + i * lda) * COMPSIZE, lda, sa); + + GEMM_BETA(min_i, bk, 0, dm1, +#ifdef COMPLEX + ZERO, +#endif + NULL, 0, NULL, 0, a + (is + i * lda) * COMPSIZE, lda); + TRSM_KERNEL_RN(min_i, bk, bk, dm1, #ifdef COMPLEX ZERO, @@ -171,8 +177,13 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, min_i = i - is; if (min_i > GEMM_P) min_i = GEMM_P; - NEG_TCOPY (bk, min_i, a + (is + i * lda) * COMPSIZE, lda, sa); - + //NEG_TCOPY (bk, min_i, a + (is + i * lda) * COMPSIZE, lda, sa); + GEMM_BETA(min_i, bk, 0, dm1, +#ifdef COMPLEX + ZERO, +#endif + NULL, 0, NULL, 0, a + (is + i * lda) * COMPSIZE, lda); + TRSM_KERNEL_RN(min_i, bk, bk, dm1, #ifdef COMPLEX ZERO, |