summaryrefslogtreecommitdiff
path: root/lapack
diff options
context:
space:
mode:
authorZhang Xianyi <traits.zhang@gmail.com>2013-11-07 01:08:39 +0800
committerZhang Xianyi <traits.zhang@gmail.com>2013-11-07 01:08:39 +0800
commit73770e60b898cabc6d0d6ff10f99aba9baa56f9b (patch)
tree0ce897756e6dc5817045e65c0cecaf6fe7a26ff8 /lapack
parent6d9d70c55c5755d3ca666d92f626539c23063dd6 (diff)
downloadopenblas-73770e60b898cabc6d0d6ff10f99aba9baa56f9b.tar.gz
openblas-73770e60b898cabc6d0d6ff10f99aba9baa56f9b.tar.bz2
openblas-73770e60b898cabc6d0d6ff10f99aba9baa56f9b.zip
Refs #309. Fixed trtri_U single thread computational bug.
Diffstat (limited to 'lapack')
-rw-r--r--lapack/trtri/Makefile4
-rw-r--r--lapack/trtri/dtrtri_lapack.f242
-rw-r--r--lapack/trtri/trtri_U_single.c19
3 files changed, 15 insertions, 250 deletions
diff --git a/lapack/trtri/Makefile b/lapack/trtri/Makefile
index 10d3cb7fd..626c47bbf 100644
--- a/lapack/trtri/Makefile
+++ b/lapack/trtri/Makefile
@@ -13,7 +13,6 @@ ZBLASOBJS = ztrtri_UU_single.$(SUFFIX) ztrtri_UN_single.$(SUFFIX) ztrtri_LU_sing
XBLASOBJS = xtrtri_UU_single.$(SUFFIX) xtrtri_UN_single.$(SUFFIX) xtrtri_LU_single.$(SUFFIX) xtrtri_LN_single.$(SUFFIX)
-DBLASOBJS += dtrtri_lapack.$(SUFFIX)
ifdef SMP
SBLASOBJS += strtri_UU_parallel.$(SUFFIX) strtri_UN_parallel.$(SUFFIX) strtri_LU_parallel.$(SUFFIX) strtri_LN_parallel.$(SUFFIX)
@@ -54,9 +53,6 @@ dtrtri_UU_single.$(SUFFIX) : trtri_U_single.c
dtrtri_UN_single.$(SUFFIX) : trtri_U_single.c
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -UUNIT $< -o $(@F)
-dtrtri_lapack.$(SUFFIX) : dtrtri_lapack.f
- $(FC) -c $(FFLAGS) -UCOMPLEX -DDOUBLE -DUNIT $< -o $(@F)
-
dtrtri_LU_single.$(SUFFIX) : trtri_L_single.c
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DUNIT $< -o $(@F)
diff --git a/lapack/trtri/dtrtri_lapack.f b/lapack/trtri/dtrtri_lapack.f
deleted file mode 100644
index 8e9a08170..000000000
--- a/lapack/trtri/dtrtri_lapack.f
+++ /dev/null
@@ -1,242 +0,0 @@
-*> \brief \b DTRTRI
-*
-* =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-* http://www.netlib.org/lapack/explore-html/
-*
-*> \htmlonly
-*> Download DTRTRI + dependencies
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dtrtri.f">
-*> [TGZ]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dtrtri.f">
-*> [ZIP]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dtrtri.f">
-*> [TXT]</a>
-*> \endhtmlonly
-*
-* Definition:
-* ===========
-*
-* SUBROUTINE DTRTRI( UPLO, DIAG, N, A, LDA, INFO )
-*
-* .. Scalar Arguments ..
-* CHARACTER DIAG, UPLO
-* INTEGER INFO, LDA, N
-* ..
-* .. Array Arguments ..
-* DOUBLE PRECISION A( LDA, * )
-* ..
-*
-*
-*> \par Purpose:
-* =============
-*>
-*> \verbatim
-*>
-*> DTRTRI computes the inverse of a real upper or lower triangular
-*> matrix A.
-*>
-*> This is the Level 3 BLAS version of the algorithm.
-*> \endverbatim
-*
-* Arguments:
-* ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*> UPLO is CHARACTER*1
-*> = 'U': A is upper triangular;
-*> = 'L': A is lower triangular.
-*> \endverbatim
-*>
-*> \param[in] DIAG
-*> \verbatim
-*> DIAG is CHARACTER*1
-*> = 'N': A is non-unit triangular;
-*> = 'U': A is unit triangular.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*> N is INTEGER
-*> The order of the matrix A. N >= 0.
-*> \endverbatim
-*>
-*> \param[in,out] A
-*> \verbatim
-*> A is DOUBLE PRECISION array, dimension (LDA,N)
-*> On entry, the triangular matrix A. If UPLO = 'U', the
-*> leading N-by-N upper triangular part of the array A contains
-*> the upper triangular matrix, and the strictly lower
-*> triangular part of A is not referenced. If UPLO = 'L', the
-*> leading N-by-N lower triangular part of the array A contains
-*> the lower triangular matrix, and the strictly upper
-*> triangular part of A is not referenced. If DIAG = 'U', the
-*> diagonal elements of A are also not referenced and are
-*> assumed to be 1.
-*> On exit, the (triangular) inverse of the original matrix, in
-*> the same storage format.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*> LDA is INTEGER
-*> The leading dimension of the array A. LDA >= max(1,N).
-*> \endverbatim
-*>
-*> \param[out] INFO
-*> \verbatim
-*> INFO is INTEGER
-*> = 0: successful exit
-*> < 0: if INFO = -i, the i-th argument had an illegal value
-*> > 0: if INFO = i, A(i,i) is exactly zero. The triangular
-*> matrix is singular and its inverse can not be computed.
-*> \endverbatim
-*
-* Authors:
-* ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date November 2011
-*
-*> \ingroup doubleOTHERcomputational
-*
-* =====================================================================
- SUBROUTINE DTRTRILAPACK( UPLO, DIAG, N, A, LDA, INFO )
-*
-* -- LAPACK computational routine (version 3.4.0) --
-* -- LAPACK is a software package provided by Univ. of Tennessee, --
-* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-* November 2011
-*
-* .. Scalar Arguments ..
- CHARACTER DIAG, UPLO
- INTEGER INFO, LDA, N
-* ..
-* .. Array Arguments ..
- DOUBLE PRECISION A( LDA, * )
-* ..
-*
-* =====================================================================
-*
-* .. Parameters ..
- DOUBLE PRECISION ONE, ZERO
- PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 )
-* ..
-* .. Local Scalars ..
- LOGICAL NOUNIT, UPPER
- INTEGER J, JB, NB, NN
-* ..
-* .. External Functions ..
- LOGICAL LSAME
- INTEGER ILAENV
- EXTERNAL LSAME, ILAENV
-* ..
-* .. External Subroutines ..
- EXTERNAL DTRMM, DTRSM, DTRTI2, XERBLA
-* ..
-* .. Intrinsic Functions ..
- INTRINSIC MAX, MIN
-* ..
-* .. Executable Statements ..
-*
-* Test the input parameters.
-*
- INFO = 0
- UPPER = LSAME( UPLO, 'U' )
- NOUNIT = LSAME( DIAG, 'N' )
- IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
- INFO = -1
- ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
- INFO = -2
- ELSE IF( N.LT.0 ) THEN
- INFO = -3
- ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
- INFO = -5
- END IF
- IF( INFO.NE.0 ) THEN
- CALL XERBLA( 'DTRTRI', -INFO )
- RETURN
- END IF
-*
-* Quick return if possible
-*
- IF( N.EQ.0 )
- $ RETURN
-*
-* Check for singularity if non-unit.
-*
- IF( NOUNIT ) THEN
- DO 10 INFO = 1, N
- IF( A( INFO, INFO ).EQ.ZERO )
- $ RETURN
- 10 CONTINUE
- INFO = 0
- END IF
-*
-* Determine the block size for this environment.
-*
- NB = ILAENV( 1, 'DTRTRI', UPLO // DIAG, N, -1, -1, -1 )
- IF( NB.LE.1 .OR. NB.GE.N ) THEN
-*
-* Use unblocked code
-*
- CALL DTRTI2( UPLO, DIAG, N, A, LDA, INFO )
- ELSE
-*
-* Use blocked code
-*
- IF( UPPER ) THEN
-*
-* Compute inverse of upper triangular matrix
-*
- DO 20 J = 1, N, NB
- JB = MIN( NB, N-J+1 )
-*
-* Compute rows 1:j-1 of current block column
-*
- CALL DTRMM( 'Left', 'Upper', 'No transpose', DIAG, J-1,
- $ JB, ONE, A, LDA, A( 1, J ), LDA )
- CALL DTRSM( 'Right', 'Upper', 'No transpose', DIAG, J-1,
- $ JB, -ONE, A( J, J ), LDA, A( 1, J ), LDA )
-*
-* Compute inverse of current diagonal block
-*
- CALL DTRTI2( 'Upper', DIAG, JB, A( J, J ), LDA, INFO )
- 20 CONTINUE
- ELSE
-*
-* Compute inverse of lower triangular matrix
-*
- NN = ( ( N-1 ) / NB )*NB + 1
- DO 30 J = NN, 1, -NB
- JB = MIN( NB, N-J+1 )
- IF( J+JB.LE.N ) THEN
-*
-* Compute rows j+jb:n of current block column
-*
- CALL DTRMM( 'Left', 'Lower', 'No transpose', DIAG,
- $ N-J-JB+1, JB, ONE, A( J+JB, J+JB ), LDA,
- $ A( J+JB, J ), LDA )
- CALL DTRSM( 'Right', 'Lower', 'No transpose', DIAG,
- $ N-J-JB+1, JB, -ONE, A( J, J ), LDA,
- $ A( J+JB, J ), LDA )
- END IF
-*
-* Compute inverse of current diagonal block
-*
- CALL DTRTI2( 'Lower', DIAG, JB, A( J, J ), LDA, INFO )
- 30 CONTINUE
- END IF
- END IF
-*
- RETURN
-*
-* End of DTRTRI
-*
- END
diff --git a/lapack/trtri/trtri_U_single.c b/lapack/trtri/trtri_U_single.c
index 72133d896..c79281cfb 100644
--- a/lapack/trtri/trtri_U_single.c
+++ b/lapack/trtri/trtri_U_single.c
@@ -127,8 +127,14 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
if (min_i > GEMM_P) min_i = GEMM_P;
if (ls == i + bk) {
- NEG_TCOPY (bk, min_i, a + (is + i * lda) * COMPSIZE, lda, sa);
-
+ //NEG_TCOPY (bk, min_i, a + (is + i * lda) * COMPSIZE, lda, sa);
+
+ GEMM_BETA(min_i, bk, 0, dm1,
+#ifdef COMPLEX
+ ZERO,
+#endif
+ NULL, 0, NULL, 0, a + (is + i * lda) * COMPSIZE, lda);
+
TRSM_KERNEL_RN(min_i, bk, bk, dm1,
#ifdef COMPLEX
ZERO,
@@ -171,8 +177,13 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
min_i = i - is;
if (min_i > GEMM_P) min_i = GEMM_P;
- NEG_TCOPY (bk, min_i, a + (is + i * lda) * COMPSIZE, lda, sa);
-
+ //NEG_TCOPY (bk, min_i, a + (is + i * lda) * COMPSIZE, lda, sa);
+ GEMM_BETA(min_i, bk, 0, dm1,
+#ifdef COMPLEX
+ ZERO,
+#endif
+ NULL, 0, NULL, 0, a + (is + i * lda) * COMPSIZE, lda);
+
TRSM_KERNEL_RN(min_i, bk, bk, dm1,
#ifdef COMPLEX
ZERO,