diff options
author | wernsaar <wernsaar@googlemail.com> | 2013-12-01 13:16:41 +0100 |
---|---|---|
committer | wernsaar <wernsaar@googlemail.com> | 2013-12-01 13:16:41 +0100 |
commit | 4be4db590c14963410508e96833b9b47c6c5d586 (patch) | |
tree | e8fb012d1f5696401ca5de35643eedc5e3b6edfe /lapack | |
parent | 9d3fae15a8dba680c6f3369cd580c59b651201dd (diff) | |
parent | 5048a80032a9d020b585ffc59323274f1d14e6b7 (diff) | |
download | openblas-4be4db590c14963410508e96833b9b47c6c5d586.tar.gz openblas-4be4db590c14963410508e96833b9b47c6c5d586.tar.bz2 openblas-4be4db590c14963410508e96833b9b47c6c5d586.zip |
Merge remote branch 'origin/develop' into armv7
Diffstat (limited to 'lapack')
-rw-r--r-- | lapack/getrf/getrf_parallel.c | 14 | ||||
-rw-r--r-- | lapack/getrf/getrf_parallel_omp.c | 2 | ||||
-rw-r--r-- | lapack/getrf/getrf_single.c | 2 | ||||
-rw-r--r-- | lapack/potrf/potrf_parallel.c | 2 | ||||
-rw-r--r-- | lapack/trtri/Makefile | 4 | ||||
-rw-r--r-- | lapack/trtri/dtrtri_lapack.f | 242 | ||||
-rw-r--r-- | lapack/trtri/trtri_U_single.c | 19 |
7 files changed, 25 insertions, 260 deletions
diff --git a/lapack/getrf/getrf_parallel.c b/lapack/getrf/getrf_parallel.c index 21ea9d5f5..3dbc70e9d 100644 --- a/lapack/getrf/getrf_parallel.c +++ b/lapack/getrf/getrf_parallel.c @@ -67,14 +67,14 @@ double sqrt(double); #undef GETRF_FACTOR #define GETRF_FACTOR 1.00 -static inline long FORMULA1(long M, long N, long IS, long BK, long T) { +static inline BLASLONG FORMULA1(BLASLONG M, BLASLONG N, BLASLONG IS, BLASLONG BK, BLASLONG T) { double m = (double)(M - IS - BK); double n = (double)(N - IS - BK); double b = (double)BK; double a = (double)T; - return (long)((n + GETRF_FACTOR * m * b * (1. - a) / (b + m)) / a); + return (BLASLONG)((n + GETRF_FACTOR * m * b * (1. - a) / (b + m)) / a); } @@ -111,7 +111,7 @@ static void inner_basic_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *ra if (args -> a == NULL) { TRSM_ILTCOPY(k, k, (FLOAT *)args -> b, lda, 0, sb); - sbb = (FLOAT *)((((long)(sb + k * k * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B); + sbb = (FLOAT *)((((BLASULONG)(sb + k * k * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B); } else { sb = (FLOAT *)args -> a; } @@ -221,7 +221,7 @@ static int inner_advanced_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG * if (args -> a == NULL) { TRSM_ILTCOPY(k, k, (FLOAT *)args -> b, lda, 0, sb); - sbb = (FLOAT *)((((long)(sb + k * k * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B); + sbb = (FLOAT *)((((BLASULONG)(sb + k * k * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B); } else { sb = (FLOAT *)args -> a; } @@ -448,7 +448,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, TRSM_ILTCOPY(bk, bk, a, lda, 0, sb); - sbb = (FLOAT *)((((long)(sb + bk * bk * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B); + sbb = (FLOAT *)((((BLASULONG)(sb + bk * bk * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B); is = 0; num_cpu = 0; @@ -685,7 +685,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, if (width > n - init_bk) width = n - init_bk; if (width < init_bk) { - long temp; + BLASLONG temp; temp = FORMULA2(m, n, 0, init_bk, args -> nthreads); temp = (temp + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1); @@ -708,7 +708,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, is = 0; num_cpu = 0; - sbb = (FLOAT *)((((long)(sb + GEMM_PQ * GEMM_PQ * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B); + sbb = (FLOAT *)((((BLASULONG)(sb + GEMM_PQ * GEMM_PQ * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B); while (is < mn) { diff --git a/lapack/getrf/getrf_parallel_omp.c b/lapack/getrf/getrf_parallel_omp.c index 4922b9b52..6eda30a52 100644 --- a/lapack/getrf/getrf_parallel_omp.c +++ b/lapack/getrf/getrf_parallel_omp.c @@ -178,7 +178,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, return info; } - sbb = (FLOAT *)((((long)(sb + blocking * blocking * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B); + sbb = (FLOAT *)((((BLASULONG)(sb + blocking * blocking * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B); info = 0; diff --git a/lapack/getrf/getrf_single.c b/lapack/getrf/getrf_single.c index fcea0ae89..f1818ea97 100644 --- a/lapack/getrf/getrf_single.c +++ b/lapack/getrf/getrf_single.c @@ -82,7 +82,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, return info; } - sbb = (FLOAT *)((((long)(sb + blocking * blocking * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B); + sbb = (FLOAT *)((((BLASULONG)(sb + blocking * blocking * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B); info = 0; diff --git a/lapack/potrf/potrf_parallel.c b/lapack/potrf/potrf_parallel.c index eec9b6e05..11f7f533c 100644 --- a/lapack/potrf/potrf_parallel.c +++ b/lapack/potrf/potrf_parallel.c @@ -185,7 +185,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, div_n = ((m_to - m_from + DIVIDE_RATE - 1) / DIVIDE_RATE + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); - buffer[0] = (FLOAT *)((((long)(sb + k * k * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B); + buffer[0] = (FLOAT *)((((BLASULONG)(sb + k * k * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B); for (i = 1; i < DIVIDE_RATE; i++) { buffer[i] = buffer[i - 1] + GEMM_Q * div_n * COMPSIZE; } diff --git a/lapack/trtri/Makefile b/lapack/trtri/Makefile index 10d3cb7fd..626c47bbf 100644 --- a/lapack/trtri/Makefile +++ b/lapack/trtri/Makefile @@ -13,7 +13,6 @@ ZBLASOBJS = ztrtri_UU_single.$(SUFFIX) ztrtri_UN_single.$(SUFFIX) ztrtri_LU_sing XBLASOBJS = xtrtri_UU_single.$(SUFFIX) xtrtri_UN_single.$(SUFFIX) xtrtri_LU_single.$(SUFFIX) xtrtri_LN_single.$(SUFFIX) -DBLASOBJS += dtrtri_lapack.$(SUFFIX) ifdef SMP SBLASOBJS += strtri_UU_parallel.$(SUFFIX) strtri_UN_parallel.$(SUFFIX) strtri_LU_parallel.$(SUFFIX) strtri_LN_parallel.$(SUFFIX) @@ -54,9 +53,6 @@ dtrtri_UU_single.$(SUFFIX) : trtri_U_single.c dtrtri_UN_single.$(SUFFIX) : trtri_U_single.c $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -UUNIT $< -o $(@F) -dtrtri_lapack.$(SUFFIX) : dtrtri_lapack.f - $(FC) -c $(FFLAGS) -UCOMPLEX -DDOUBLE -DUNIT $< -o $(@F) - dtrtri_LU_single.$(SUFFIX) : trtri_L_single.c $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DUNIT $< -o $(@F) diff --git a/lapack/trtri/dtrtri_lapack.f b/lapack/trtri/dtrtri_lapack.f deleted file mode 100644 index 8e9a08170..000000000 --- a/lapack/trtri/dtrtri_lapack.f +++ /dev/null @@ -1,242 +0,0 @@ -*> \brief \b DTRTRI -* -* =========== DOCUMENTATION =========== -* -* Online html documentation available at -* http://www.netlib.org/lapack/explore-html/ -* -*> \htmlonly -*> Download DTRTRI + dependencies -*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dtrtri.f"> -*> [TGZ]</a> -*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dtrtri.f"> -*> [ZIP]</a> -*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dtrtri.f"> -*> [TXT]</a> -*> \endhtmlonly -* -* Definition: -* =========== -* -* SUBROUTINE DTRTRI( UPLO, DIAG, N, A, LDA, INFO ) -* -* .. Scalar Arguments .. -* CHARACTER DIAG, UPLO -* INTEGER INFO, LDA, N -* .. -* .. Array Arguments .. -* DOUBLE PRECISION A( LDA, * ) -* .. -* -* -*> \par Purpose: -* ============= -*> -*> \verbatim -*> -*> DTRTRI computes the inverse of a real upper or lower triangular -*> matrix A. -*> -*> This is the Level 3 BLAS version of the algorithm. -*> \endverbatim -* -* Arguments: -* ========== -* -*> \param[in] UPLO -*> \verbatim -*> UPLO is CHARACTER*1 -*> = 'U': A is upper triangular; -*> = 'L': A is lower triangular. -*> \endverbatim -*> -*> \param[in] DIAG -*> \verbatim -*> DIAG is CHARACTER*1 -*> = 'N': A is non-unit triangular; -*> = 'U': A is unit triangular. -*> \endverbatim -*> -*> \param[in] N -*> \verbatim -*> N is INTEGER -*> The order of the matrix A. N >= 0. -*> \endverbatim -*> -*> \param[in,out] A -*> \verbatim -*> A is DOUBLE PRECISION array, dimension (LDA,N) -*> On entry, the triangular matrix A. If UPLO = 'U', the -*> leading N-by-N upper triangular part of the array A contains -*> the upper triangular matrix, and the strictly lower -*> triangular part of A is not referenced. If UPLO = 'L', the -*> leading N-by-N lower triangular part of the array A contains -*> the lower triangular matrix, and the strictly upper -*> triangular part of A is not referenced. If DIAG = 'U', the -*> diagonal elements of A are also not referenced and are -*> assumed to be 1. -*> On exit, the (triangular) inverse of the original matrix, in -*> the same storage format. -*> \endverbatim -*> -*> \param[in] LDA -*> \verbatim -*> LDA is INTEGER -*> The leading dimension of the array A. LDA >= max(1,N). -*> \endverbatim -*> -*> \param[out] INFO -*> \verbatim -*> INFO is INTEGER -*> = 0: successful exit -*> < 0: if INFO = -i, the i-th argument had an illegal value -*> > 0: if INFO = i, A(i,i) is exactly zero. The triangular -*> matrix is singular and its inverse can not be computed. -*> \endverbatim -* -* Authors: -* ======== -* -*> \author Univ. of Tennessee -*> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver -*> \author NAG Ltd. -* -*> \date November 2011 -* -*> \ingroup doubleOTHERcomputational -* -* ===================================================================== - SUBROUTINE DTRTRILAPACK( UPLO, DIAG, N, A, LDA, INFO ) -* -* -- LAPACK computational routine (version 3.4.0) -- -* -- LAPACK is a software package provided by Univ. of Tennessee, -- -* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* November 2011 -* -* .. Scalar Arguments .. - CHARACTER DIAG, UPLO - INTEGER INFO, LDA, N -* .. -* .. Array Arguments .. - DOUBLE PRECISION A( LDA, * ) -* .. -* -* ===================================================================== -* -* .. Parameters .. - DOUBLE PRECISION ONE, ZERO - PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) -* .. -* .. Local Scalars .. - LOGICAL NOUNIT, UPPER - INTEGER J, JB, NB, NN -* .. -* .. External Functions .. - LOGICAL LSAME - INTEGER ILAENV - EXTERNAL LSAME, ILAENV -* .. -* .. External Subroutines .. - EXTERNAL DTRMM, DTRSM, DTRTI2, XERBLA -* .. -* .. Intrinsic Functions .. - INTRINSIC MAX, MIN -* .. -* .. Executable Statements .. -* -* Test the input parameters. -* - INFO = 0 - UPPER = LSAME( UPLO, 'U' ) - NOUNIT = LSAME( DIAG, 'N' ) - IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN - INFO = -1 - ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN - INFO = -2 - ELSE IF( N.LT.0 ) THEN - INFO = -3 - ELSE IF( LDA.LT.MAX( 1, N ) ) THEN - INFO = -5 - END IF - IF( INFO.NE.0 ) THEN - CALL XERBLA( 'DTRTRI', -INFO ) - RETURN - END IF -* -* Quick return if possible -* - IF( N.EQ.0 ) - $ RETURN -* -* Check for singularity if non-unit. -* - IF( NOUNIT ) THEN - DO 10 INFO = 1, N - IF( A( INFO, INFO ).EQ.ZERO ) - $ RETURN - 10 CONTINUE - INFO = 0 - END IF -* -* Determine the block size for this environment. -* - NB = ILAENV( 1, 'DTRTRI', UPLO // DIAG, N, -1, -1, -1 ) - IF( NB.LE.1 .OR. NB.GE.N ) THEN -* -* Use unblocked code -* - CALL DTRTI2( UPLO, DIAG, N, A, LDA, INFO ) - ELSE -* -* Use blocked code -* - IF( UPPER ) THEN -* -* Compute inverse of upper triangular matrix -* - DO 20 J = 1, N, NB - JB = MIN( NB, N-J+1 ) -* -* Compute rows 1:j-1 of current block column -* - CALL DTRMM( 'Left', 'Upper', 'No transpose', DIAG, J-1, - $ JB, ONE, A, LDA, A( 1, J ), LDA ) - CALL DTRSM( 'Right', 'Upper', 'No transpose', DIAG, J-1, - $ JB, -ONE, A( J, J ), LDA, A( 1, J ), LDA ) -* -* Compute inverse of current diagonal block -* - CALL DTRTI2( 'Upper', DIAG, JB, A( J, J ), LDA, INFO ) - 20 CONTINUE - ELSE -* -* Compute inverse of lower triangular matrix -* - NN = ( ( N-1 ) / NB )*NB + 1 - DO 30 J = NN, 1, -NB - JB = MIN( NB, N-J+1 ) - IF( J+JB.LE.N ) THEN -* -* Compute rows j+jb:n of current block column -* - CALL DTRMM( 'Left', 'Lower', 'No transpose', DIAG, - $ N-J-JB+1, JB, ONE, A( J+JB, J+JB ), LDA, - $ A( J+JB, J ), LDA ) - CALL DTRSM( 'Right', 'Lower', 'No transpose', DIAG, - $ N-J-JB+1, JB, -ONE, A( J, J ), LDA, - $ A( J+JB, J ), LDA ) - END IF -* -* Compute inverse of current diagonal block -* - CALL DTRTI2( 'Lower', DIAG, JB, A( J, J ), LDA, INFO ) - 30 CONTINUE - END IF - END IF -* - RETURN -* -* End of DTRTRI -* - END diff --git a/lapack/trtri/trtri_U_single.c b/lapack/trtri/trtri_U_single.c index 72133d896..c79281cfb 100644 --- a/lapack/trtri/trtri_U_single.c +++ b/lapack/trtri/trtri_U_single.c @@ -127,8 +127,14 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, if (min_i > GEMM_P) min_i = GEMM_P; if (ls == i + bk) { - NEG_TCOPY (bk, min_i, a + (is + i * lda) * COMPSIZE, lda, sa); - + //NEG_TCOPY (bk, min_i, a + (is + i * lda) * COMPSIZE, lda, sa); + + GEMM_BETA(min_i, bk, 0, dm1, +#ifdef COMPLEX + ZERO, +#endif + NULL, 0, NULL, 0, a + (is + i * lda) * COMPSIZE, lda); + TRSM_KERNEL_RN(min_i, bk, bk, dm1, #ifdef COMPLEX ZERO, @@ -171,8 +177,13 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, min_i = i - is; if (min_i > GEMM_P) min_i = GEMM_P; - NEG_TCOPY (bk, min_i, a + (is + i * lda) * COMPSIZE, lda, sa); - + //NEG_TCOPY (bk, min_i, a + (is + i * lda) * COMPSIZE, lda, sa); + GEMM_BETA(min_i, bk, 0, dm1, +#ifdef COMPLEX + ZERO, +#endif + NULL, 0, NULL, 0, a + (is + i * lda) * COMPSIZE, lda); + TRSM_KERNEL_RN(min_i, bk, bk, dm1, #ifdef COMPLEX ZERO, |