diff options
author | Zhang Xianyi <traits.zhang@gmail.com> | 2014-07-09 08:48:00 +0800 |
---|---|---|
committer | Zhang Xianyi <traits.zhang@gmail.com> | 2014-07-09 08:48:00 +0800 |
commit | f9991fd5f627569827f73c5be46addfaf8ced6a5 (patch) | |
tree | 6e586d1588cbddbc1465036aa72ef89ba0f480d5 | |
parent | da3d70420ae5a1795fda107d89efdaed5834ff94 (diff) | |
parent | 47688e24e99079191488b2fc57248f92753e0611 (diff) | |
download | openblas-0.2.10.rc2.tar.gz openblas-0.2.10.rc2.tar.bz2 openblas-0.2.10.rc2.zip |
Merge branch 'develop'v0.2.10.rc2
-rw-r--r-- | Makefile | 8 | ||||
-rw-r--r-- | Makefile.rule | 2 | ||||
-rw-r--r-- | cblas.h | 3 | ||||
-rw-r--r-- | driver/level3/Makefile | 10 | ||||
-rw-r--r-- | driver/others/openblas_get_config.c | 25 | ||||
-rw-r--r-- | driver/others/parameter.c | 3 | ||||
-rw-r--r-- | exports/gensymbol | 3 | ||||
-rw-r--r-- | getarch.c | 9 | ||||
-rw-r--r-- | interface/Makefile | 8 | ||||
-rw-r--r-- | interface/ger.c | 14 | ||||
-rw-r--r-- | interface/sbmv.c | 18 | ||||
-rw-r--r-- | interface/zger.c | 15 | ||||
-rw-r--r-- | interface/zsbmv.c | 16 | ||||
-rw-r--r-- | kernel/Makefile.L3 | 6 | ||||
-rw-r--r-- | kernel/generic/dot.c | 104 | ||||
-rw-r--r-- | kernel/setparam-ref.c | 29 | ||||
-rw-r--r-- | kernel/x86_64/KERNEL | 6 | ||||
-rw-r--r-- | kernel/x86_64/KERNEL.BULLDOZER | 1 | ||||
-rw-r--r-- | kernel/x86_64/KERNEL.PILEDRIVER | 1 | ||||
-rw-r--r-- | kernel/x86_64/KERNEL.PRESCOTT | 2 | ||||
-rw-r--r-- | make.inc | 1 | ||||
-rw-r--r-- | utest/Makefile | 8 |
22 files changed, 246 insertions, 46 deletions
@@ -247,10 +247,16 @@ ifndef NOFORTRAN -@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "CEXTRALIB = $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc -ifeq ($(F_COMPILER), GFORTRAN) +ifeq ($(FC), GFORTRAN) -@echo "TIMER = INT_ETIME" >> $(NETLIB_LAPACK_DIR)/make.inc +ifdef SMP + -@echo "LOADER = $(FC) -pthread" >> $(NETLIB_LAPACK_DIR)/make.inc +else + -@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc +endif else -@echo "TIMER = NONE" >> $(NETLIB_LAPACK_DIR)/make.inc + -@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc endif -@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc endif diff --git a/Makefile.rule b/Makefile.rule index 90ff80ef8..f04591907 100644 --- a/Makefile.rule +++ b/Makefile.rule @@ -3,7 +3,7 @@ # # This library's version -VERSION = 0.2.10.rc1 +VERSION = 0.2.10.rc2 # If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a # and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library @@ -16,6 +16,9 @@ void goto_set_num_threads(int num_threads); /*Get the build configure on runtime.*/ char* openblas_get_config(void); +/*Get the CPU corename on runtime.*/ +char* openblas_get_corename(void); + /* Get the parallelization type which is used by OpenBLAS */ int openblas_get_parallel(void); /* OpenBLAS is compiled for sequential use */ diff --git a/driver/level3/Makefile b/driver/level3/Makefile index 4c004ee80..d62921e84 100644 --- a/driver/level3/Makefile +++ b/driver/level3/Makefile @@ -1,12 +1,14 @@ TOPDIR = ../.. include ../../Makefile.system +USE_GEMM3M = 0 + ifeq ($(ARCH), x86) -USE_GEMM3M = 1 +USE_GEMM3M = 0 endif ifeq ($(ARCH), x86_64) -USE_GEMM3M = 1 +USE_GEMM3M = 0 endif ifeq ($(ARCH), ia64) @@ -168,7 +170,7 @@ XBLASOBJS += \ xher2k_kernel_UN.$(SUFFIX) xher2k_kernel_UC.$(SUFFIX) \ xher2k_kernel_LN.$(SUFFIX) xher2k_kernel_LC.$(SUFFIX) -ifdef USE_GEMM3M +ifeq ($(USE_GEMM3M), 1) CBLASOBJS += \ cgemm3m_nn.$(SUFFIX) cgemm3m_cn.$(SUFFIX) cgemm3m_tn.$(SUFFIX) cgemm3m_nc.$(SUFFIX) \ @@ -239,7 +241,7 @@ CBLASOBJS += cherk_thread_UN.$(SUFFIX) cherk_thread_UC.$(SUFFIX) cherk_thread ZBLASOBJS += zherk_thread_UN.$(SUFFIX) zherk_thread_UC.$(SUFFIX) zherk_thread_LN.$(SUFFIX) zherk_thread_LC.$(SUFFIX) XBLASOBJS += xherk_thread_UN.$(SUFFIX) xherk_thread_UC.$(SUFFIX) xherk_thread_LN.$(SUFFIX) xherk_thread_LC.$(SUFFIX) -ifdef USE_GEMM3M +ifeq ($(USE_GEMM3M), 1) CBLASOBJS += cgemm3m_thread_nn.$(SUFFIX) cgemm3m_thread_nt.$(SUFFIX) cgemm3m_thread_nr.$(SUFFIX) cgemm3m_thread_nc.$(SUFFIX) CBLASOBJS += cgemm3m_thread_tn.$(SUFFIX) cgemm3m_thread_tt.$(SUFFIX) cgemm3m_thread_tr.$(SUFFIX) cgemm3m_thread_tc.$(SUFFIX) diff --git a/driver/others/openblas_get_config.c b/driver/others/openblas_get_config.c index d8da2e398..0fecbf951 100644 --- a/driver/others/openblas_get_config.c +++ b/driver/others/openblas_get_config.c @@ -32,6 +32,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" +#include <string.h> + static char* openblas_config_str="" #ifdef USE64BITINT "USE64BITINT " @@ -51,9 +53,32 @@ static char* openblas_config_str="" #ifdef NO_AFFINITY "NO_AFFINITY " #endif +#ifndef DYNAMIC_ARCH + CHAR_CORENAME +#endif ; +#ifdef DYNAMIC_ARCH +char *gotoblas_corename(); +static char tmp_config_str[256]; +#endif + + char* CNAME() { +#ifndef DYNAMIC_ARCH return openblas_config_str; +#else + strcpy(tmp_config_str, openblas_config_str); + strcat(tmp_config_str, gotoblas_corename()); + return tmp_config_str; +#endif } + +char* openblas_get_corename() { +#ifndef DYNAMIC_ARCH + return CHAR_CORENAME; +#else + return gotoblas_corename(); +#endif +} diff --git a/driver/others/parameter.c b/driver/others/parameter.c index 12787403e..a0a8b5188 100644 --- a/driver/others/parameter.c +++ b/driver/others/parameter.c @@ -165,7 +165,8 @@ int get_L2_size(void){ #if defined(ATHLON) || defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) || \ defined(CORE_PRESCOTT) || defined(CORE_CORE2) || defined(PENRYN) || defined(DUNNINGTON) || \ - defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC) + defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC) || \ + defined(PILEDRIVER) || defined(HASWELL) cpuid(0x80000006, &eax, &ebx, &ecx, &edx); diff --git a/exports/gensymbol b/exports/gensymbol index 6c21de455..0769ae0f3 100644 --- a/exports/gensymbol +++ b/exports/gensymbol @@ -73,7 +73,7 @@ ); @gemm3mobjs = ( - zgemm3m, cgemm3m, zsymm3m, csymm3m, zhemm3m, chemm3m, + ); @@ -85,6 +85,7 @@ @misc_no_underscore_objs = ( goto_set_num_threads, openblas_get_config, + openblas_get_corename, ); @misc_underscore_objs = ( @@ -952,6 +952,15 @@ int main(int argc, char *argv[]){ #else get_cpuconfig(); #endif + +#ifdef FORCE + printf("#define CHAR_CORENAME \"%s\"\n", CORENAME); +#else +#if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) || defined(__arm__) + printf("#define CHAR_CORENAME \"%s\"\n", get_corename()); +#endif +#endif + break; case '2' : /* SMP */ diff --git a/interface/Makefile b/interface/Makefile index 465d722b0..a24702630 100644 --- a/interface/Makefile +++ b/interface/Makefile @@ -1,6 +1,8 @@ TOPDIR = .. include $(TOPDIR)/Makefile.system +SUPPORT_GEMM3M = 0 + ifeq ($(ARCH), x86) SUPPORT_GEMM3M = 0 endif @@ -124,7 +126,7 @@ ZBLAS3OBJS = \ zhemm.$(SUFFIX) zherk.$(SUFFIX) zher2k.$(SUFFIX) \ zomatcopy.$(SUFFIX) zimatcopy.$(SUFFIX) -ifdef SUPPORT_GEMM3M +ifeq ($(SUPPORT_GEMM3M), 1) CBLAS3OBJS += cgemm3m.$(SUFFIX) csymm3m.$(SUFFIX) chemm3m.$(SUFFIX) @@ -182,7 +184,7 @@ XBLAS3OBJS = \ xtrsm.$(SUFFIX) xsyrk.$(SUFFIX) xsyr2k.$(SUFFIX) \ xhemm.$(SUFFIX) xherk.$(SUFFIX) xher2k.$(SUFFIX) -ifdef SUPPORT_GEMM3M +ifeq ($(SUPPORT_GEMM3M), 1) XBLAS3OBJS += xgemm3m.$(SUFFIX) xsymm3m.$(SUFFIX) xhemm3m.$(SUFFIX) @@ -238,7 +240,7 @@ XBLAS3OBJS = \ xtrsm.$(SUFFIX) xsyrk.$(SUFFIX) xsyr2k.$(SUFFIX) \ xhemm.$(SUFFIX) xherk.$(SUFFIX) xher2k.$(SUFFIX) -ifdef SUPPORT_GEMM3M +ifeq ($(SUPPORT_GEMM3M), 1) XBLAS3OBJS += xgemm3m.$(SUFFIX) xsymm3m.$(SUFFIX) xhemm3m.$(SUFFIX) diff --git a/interface/ger.c b/interface/ger.c index 7c9cd425f..9857d2423 100644 --- a/interface/ger.c +++ b/interface/ger.c @@ -42,6 +42,12 @@ #include "functable.h" #endif +#ifdef SMP +#ifdef __64BIT__ +#define SMPTEST 1 +#endif +#endif + #ifdef XDOUBLE #define ERROR_NAME "QGER " #elif defined DOUBLE @@ -75,7 +81,7 @@ void NAME(blasint *M, blasint *N, FLOAT *Alpha, blasint incy = *INCY; blasint lda = *LDA; FLOAT *buffer; -#ifdef SMPBUG +#ifdef SMPTEST int nthreads; #endif @@ -107,7 +113,7 @@ void CNAME(enum CBLAS_ORDER order, FLOAT *buffer; blasint info, t; -#ifdef SMPBUG +#ifdef SMPTEST int nthreads; #endif @@ -167,7 +173,7 @@ void CNAME(enum CBLAS_ORDER order, buffer = (FLOAT *)blas_memory_alloc(1); -#ifdef SMPBUG +#ifdef SMPTEST nthreads = num_cpu_avail(2); @@ -176,7 +182,7 @@ void CNAME(enum CBLAS_ORDER order, GER(m, n, 0, alpha, x, incx, y, incy, a, lda, buffer); -#ifdef SMPBUG +#ifdef SMPTEST } else { GER_THREAD(m, n, alpha, x, incx, y, incy, a, lda, buffer, nthreads); diff --git a/interface/sbmv.c b/interface/sbmv.c index 0dac736cb..761a9a0d0 100644 --- a/interface/sbmv.c +++ b/interface/sbmv.c @@ -43,6 +43,14 @@ #include "functable.h" #endif +/* +#ifdef SMP +#ifdef __64BIT__ +#define SMPTEST 1 +#endif +#endif +*/ + #ifdef XDOUBLE #define ERROR_NAME "QSBMV " #elif defined(DOUBLE) @@ -61,7 +69,7 @@ static int (*sbmv[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLA #endif }; -#ifdef SMPBUG +#ifdef SMPTEST static int (*sbmv_thread[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = { #ifdef XDOUBLE qsbmv_thread_U, qsbmv_thread_L, @@ -90,7 +98,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint * blasint info; int uplo; FLOAT *buffer; -#ifdef SMPBUG +#ifdef SMPTEST int nthreads; #endif @@ -130,7 +138,7 @@ void CNAME(enum CBLAS_ORDER order, FLOAT *buffer; int uplo; blasint info; -#ifdef SMPBUG +#ifdef SMPTEST int nthreads; #endif @@ -189,7 +197,7 @@ void CNAME(enum CBLAS_ORDER order, buffer = (FLOAT *)blas_memory_alloc(1); -#ifdef SMPBUG +#ifdef SMPTEST nthreads = num_cpu_avail(2); if (nthreads == 1) { @@ -197,7 +205,7 @@ void CNAME(enum CBLAS_ORDER order, (sbmv[uplo])(n, k, alpha, a, lda, x, incx, y, incy, buffer); -#ifdef SMPBUG +#ifdef SMPTEST } else { (sbmv_thread[uplo])(n, k, alpha, a, lda, x, incx, y, incy, buffer, nthreads); diff --git a/interface/zger.c b/interface/zger.c index cefc839c2..f46a462e2 100644 --- a/interface/zger.c +++ b/interface/zger.c @@ -42,6 +42,13 @@ #include "functable.h" #endif +#ifdef SMP +#ifdef __64BIT__ +#define SMPTEST 1 +#endif +#endif + + #ifdef XDOUBLE #ifndef CONJ #define ERROR_NAME "XGERU " @@ -109,7 +116,7 @@ void NAME(blasint *M, blasint *N, FLOAT *Alpha, blasint incy = *INCY; blasint lda = *LDA; FLOAT *buffer; -#ifdef SMPBUG +#ifdef SMPTEST int nthreads; #endif @@ -144,7 +151,7 @@ void CNAME(enum CBLAS_ORDER order, FLOAT *buffer; blasint info, t; -#ifdef SMPBUG +#ifdef SMPTEST int nthreads; #endif @@ -205,7 +212,7 @@ void CNAME(enum CBLAS_ORDER order, buffer = (FLOAT *)blas_memory_alloc(1); -#ifdef SMPBUG +#ifdef SMPTEST nthreads = num_cpu_avail(2); if (nthreads == 1) { @@ -221,7 +228,7 @@ void CNAME(enum CBLAS_ORDER order, } #endif -#ifdef SMPBUG +#ifdef SMPTEST } else { diff --git a/interface/zsbmv.c b/interface/zsbmv.c index 2efe85ba9..b71d4c519 100644 --- a/interface/zsbmv.c +++ b/interface/zsbmv.c @@ -43,6 +43,14 @@ #include "functable.h" #endif +/* +#ifdef SMP +#ifdef __64BIT__ +#define SMPTEST 1 +#endif +#endif +*/ + #ifdef XDOUBLE #define ERROR_NAME "XSBMV " #elif defined(DOUBLE) @@ -61,7 +69,7 @@ static int (*sbmv[])(BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, FLOAT #endif }; -#ifdef SMPBUG +#ifdef SMPTEST static int (*sbmv_thread[])(BLASLONG, BLASLONG, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = { #ifdef XDOUBLE xsbmv_thread_U, xsbmv_thread_L, @@ -90,7 +98,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint * blasint info; int uplo; FLOAT *buffer; -#ifdef SMPBUG +#ifdef SMPTEST int nthreads; #endif @@ -131,7 +139,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint * buffer = (FLOAT *)blas_memory_alloc(1); -#ifdef SMPBUG +#ifdef SMPTEST nthreads = num_cpu_avail(2); if (nthreads == 1) { @@ -139,7 +147,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint * (sbmv[uplo])(n, k, alpha_r, alpha_i, a, lda, b, incx, c, incy, buffer); -#ifdef SMPBUG +#ifdef SMPTEST } else { (sbmv_thread[uplo])(n, k, ALPHA, a, lda, b, incx, c, incy, buffer, nthreads); diff --git a/kernel/Makefile.L3 b/kernel/Makefile.L3 index be78dfc3d..268177c0f 100644 --- a/kernel/Makefile.L3 +++ b/kernel/Makefile.L3 @@ -1,3 +1,5 @@ +USE_GEMM3M = 0 + ifeq ($(ARCH), x86) USE_GEMM3M = 1 endif @@ -122,7 +124,7 @@ XBLASOBJS += \ xtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) xtrsm_kernel_RT$(TSUFFIX).$(SUFFIX) \ xtrsm_kernel_RR$(TSUFFIX).$(SUFFIX) xtrsm_kernel_RC$(TSUFFIX).$(SUFFIX) \ -ifdef USE_GEMM3M +ifeq ($(USE_GEMM3M), 1) CBLASOBJS += cgemm3m_kernel$(TSUFFIX).$(SUFFIX) ZBLASOBJS += zgemm3m_kernel$(TSUFFIX).$(SUFFIX) @@ -256,7 +258,7 @@ XBLASOBJS += \ xhemm_iutcopy$(TSUFFIX).$(SUFFIX) xhemm_iltcopy$(TSUFFIX).$(SUFFIX) \ xhemm_outcopy$(TSUFFIX).$(SUFFIX) xhemm_oltcopy$(TSUFFIX).$(SUFFIX) -ifdef USE_GEMM3M +ifeq ($(USE_GEMM3M), 1) CBLASOBJS += \ cgemm3m_incopyb$(TSUFFIX).$(SUFFIX) cgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) \ diff --git a/kernel/generic/dot.c b/kernel/generic/dot.c new file mode 100644 index 000000000..bc07bc78f --- /dev/null +++ b/kernel/generic/dot.c @@ -0,0 +1,104 @@ +/*************************************************************************** +Copyright (c) 2014, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + + +#include "common.h" + +#if defined(DSDOT) +double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) +#else +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) +#endif +{ + BLASLONG i=0; + BLASLONG ix=0,iy=0; + +#if defined(DSDOT) + double dot = 0.0 ; +#else + FLOAT dot = 0.0 ; +#endif + + if ( n < 0 ) return(dot); + + if ( (inc_x == 1) && (inc_y == 1) ) + { + + int n1 = n & -4; + + while(i < n1) + { + +#if defined(DSDOT) + dot += (double) y[i] * (double) x[i] + + (double) y[i+1] * (double) x[i+1] + + (double) y[i+2] * (double) x[i+2] + + (double) y[i+3] * (double) x[i+3] ; +#else + dot += y[i] * x[i] + + y[i+1] * x[i+1] + + y[i+2] * x[i+2] + + y[i+3] * x[i+3] ; +#endif + i+=4 ; + + } + + while(i < n) + { + +#if defined(DSDOT) + dot += (double) y[i] * (double) x[i] ; +#else + dot += y[i] * x[i] ; +#endif + i++ ; + + } + return(dot); + + + } + + while(i < n) + { + +#if defined(DSDOT) + dot += (double) y[iy] * (double) x[ix] ; +#else + dot += y[iy] * x[ix] ; +#endif + ix += inc_x ; + iy += inc_y ; + i++ ; + + } + return(dot); + +} + + diff --git a/kernel/setparam-ref.c b/kernel/setparam-ref.c index 806c1928c..5086420c1 100644 --- a/kernel/setparam-ref.c +++ b/kernel/setparam-ref.c @@ -714,13 +714,13 @@ static void init_parameter(void) { fprintf(stderr, "Core2\n"); #endif - TABLE_NAME.sgemm_p = 92 * (l2 >> 9); - TABLE_NAME.dgemm_p = 46 * (l2 >> 9); - TABLE_NAME.cgemm_p = 46 * (l2 >> 9); - TABLE_NAME.zgemm_p = 23 * (l2 >> 9); + TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8; + TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8; + TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4; + TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4; #ifdef EXPRECISION - TABLE_NAME.qgemm_p = 92 * (l2 >> 9); - TABLE_NAME.xgemm_p = 46 * (l2 >> 9); + TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8; + TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4; #endif #endif @@ -740,6 +740,23 @@ static void init_parameter(void) { #endif #endif +#ifdef DUNNINGTON + +#ifdef DEBUG + fprintf(stderr, "Dunnington\n"); +#endif + + TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8; + TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8; + TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4; + TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4; +#ifdef EXPRECISION + TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8; + TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4; +#endif +#endif + + #ifdef NEHALEM #ifdef DEBUG diff --git a/kernel/x86_64/KERNEL b/kernel/x86_64/KERNEL index fa6282c53..ec21826d7 100644 --- a/kernel/x86_64/KERNEL +++ b/kernel/x86_64/KERNEL @@ -119,15 +119,13 @@ XCOPYKERNEL = zcopy.S endif ifndef SDOTKERNEL -SDOTKERNEL = dot_sse.S +SDOTKERNEL = ../generic/dot.c endif - ifndef DSDOTKERNEL -DSDOTKERNEL = ../arm/dot.c +DSDOTKERNEL = ../generic/dot.c endif - ifndef DDOTKERNEL DDOTKERNEL = dot_sse2.S endif diff --git a/kernel/x86_64/KERNEL.BULLDOZER b/kernel/x86_64/KERNEL.BULLDOZER index d9b9f84f3..6e09813c3 100644 --- a/kernel/x86_64/KERNEL.BULLDOZER +++ b/kernel/x86_64/KERNEL.BULLDOZER @@ -6,7 +6,6 @@ ZGEMVTKERNEL = zgemv_t.S DGEMVNKERNEL = dgemv_n_bulldozer.S DGEMVTKERNEL = dgemv_t_bulldozer.S -DAXPYKERNEL = daxpy_bulldozer.S DDOTKERNEL = ddot_bulldozer.S DCOPYKERNEL = dcopy_bulldozer.S diff --git a/kernel/x86_64/KERNEL.PILEDRIVER b/kernel/x86_64/KERNEL.PILEDRIVER index b083b5d53..e4ac0d895 100644 --- a/kernel/x86_64/KERNEL.PILEDRIVER +++ b/kernel/x86_64/KERNEL.PILEDRIVER @@ -6,7 +6,6 @@ ZGEMVTKERNEL = zgemv_t.S DGEMVNKERNEL = dgemv_n_bulldozer.S DGEMVTKERNEL = dgemv_t_bulldozer.S -DAXPYKERNEL = daxpy_bulldozer.S DDOTKERNEL = ddot_bulldozer.S DCOPYKERNEL = dcopy_bulldozer.S diff --git a/kernel/x86_64/KERNEL.PRESCOTT b/kernel/x86_64/KERNEL.PRESCOTT index 9b3e514d1..0ea43ad7b 100644 --- a/kernel/x86_64/KERNEL.PRESCOTT +++ b/kernel/x86_64/KERNEL.PRESCOTT @@ -19,7 +19,7 @@ DGEMMINCOPYOBJ = DGEMMITCOPYOBJ = DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) -CGEMMKERNEL = zgemm_kernel_4x2_sse3.S +CGEMMKERNEL = zgemm_kernel_4x2_sse.S CGEMMINCOPY = ../generic/zgemm_ncopy_4.c CGEMMITCOPY = ../generic/zgemm_tcopy_4.c CGEMMONCOPY = zgemm_ncopy_2.S @@ -1,7 +1,6 @@ SHELL = /bin/sh PLAT = _LINUX DRVOPTS = $(OPTS) -LOADER = $(FORTRAN) ARCHFLAGS= -ru #RANLIB = ranlib diff --git a/utest/Makefile b/utest/Makefile index 31cb93176..fa05458cc 100644 --- a/utest/Makefile +++ b/utest/Makefile @@ -1,15 +1,19 @@ UTEST_CHECK = 1 TOPDIR = .. -include $(TOPDIR)/Makefile.system TARGET=openblas_utest +.PHONY : all +.NOTPARALLEL : all run_test $(TARGET) + CUNIT_URL=http://downloads.sourceforge.net/project/cunit/CUnit/2.1-2/CUnit-2.1-2-src.tar.bz2 CUNIT_DIR=$(CURDIR)/CUnit-2.1-2 CUNIT_LIB=$(CUNIT_DIR)/lib/libcunit.a -CFLAGS+=-I$(CUNIT_DIR)/include +CFLAGS +=-I$(CUNIT_DIR)/include + +include $(TOPDIR)/Makefile.system OBJS=main.o test_rot.o test_swap.o test_axpy.o test_dotu.o test_rotmg.o test_dsdot.o test_amax.o test_fork.o |