summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZhang Xianyi <traits.zhang@gmail.com>2014-07-09 08:48:00 +0800
committerZhang Xianyi <traits.zhang@gmail.com>2014-07-09 08:48:00 +0800
commitf9991fd5f627569827f73c5be46addfaf8ced6a5 (patch)
tree6e586d1588cbddbc1465036aa72ef89ba0f480d5
parentda3d70420ae5a1795fda107d89efdaed5834ff94 (diff)
parent47688e24e99079191488b2fc57248f92753e0611 (diff)
downloadopenblas-0.2.10.rc2.tar.gz
openblas-0.2.10.rc2.tar.bz2
openblas-0.2.10.rc2.zip
Merge branch 'develop'v0.2.10.rc2
-rw-r--r--Makefile8
-rw-r--r--Makefile.rule2
-rw-r--r--cblas.h3
-rw-r--r--driver/level3/Makefile10
-rw-r--r--driver/others/openblas_get_config.c25
-rw-r--r--driver/others/parameter.c3
-rw-r--r--exports/gensymbol3
-rw-r--r--getarch.c9
-rw-r--r--interface/Makefile8
-rw-r--r--interface/ger.c14
-rw-r--r--interface/sbmv.c18
-rw-r--r--interface/zger.c15
-rw-r--r--interface/zsbmv.c16
-rw-r--r--kernel/Makefile.L36
-rw-r--r--kernel/generic/dot.c104
-rw-r--r--kernel/setparam-ref.c29
-rw-r--r--kernel/x86_64/KERNEL6
-rw-r--r--kernel/x86_64/KERNEL.BULLDOZER1
-rw-r--r--kernel/x86_64/KERNEL.PILEDRIVER1
-rw-r--r--kernel/x86_64/KERNEL.PRESCOTT2
-rw-r--r--make.inc1
-rw-r--r--utest/Makefile8
22 files changed, 246 insertions, 46 deletions
diff --git a/Makefile b/Makefile
index 397836da1..2e378883b 100644
--- a/Makefile
+++ b/Makefile
@@ -247,10 +247,16 @@ ifndef NOFORTRAN
-@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "CEXTRALIB = $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
-ifeq ($(F_COMPILER), GFORTRAN)
+ifeq ($(FC), GFORTRAN)
-@echo "TIMER = INT_ETIME" >> $(NETLIB_LAPACK_DIR)/make.inc
+ifdef SMP
+ -@echo "LOADER = $(FC) -pthread" >> $(NETLIB_LAPACK_DIR)/make.inc
+else
+ -@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
+endif
else
-@echo "TIMER = NONE" >> $(NETLIB_LAPACK_DIR)/make.inc
+ -@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
-@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc
endif
diff --git a/Makefile.rule b/Makefile.rule
index 90ff80ef8..f04591907 100644
--- a/Makefile.rule
+++ b/Makefile.rule
@@ -3,7 +3,7 @@
#
# This library's version
-VERSION = 0.2.10.rc1
+VERSION = 0.2.10.rc2
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
diff --git a/cblas.h b/cblas.h
index 841ad6330..ef072e6ff 100644
--- a/cblas.h
+++ b/cblas.h
@@ -16,6 +16,9 @@ void goto_set_num_threads(int num_threads);
/*Get the build configure on runtime.*/
char* openblas_get_config(void);
+/*Get the CPU corename on runtime.*/
+char* openblas_get_corename(void);
+
/* Get the parallelization type which is used by OpenBLAS */
int openblas_get_parallel(void);
/* OpenBLAS is compiled for sequential use */
diff --git a/driver/level3/Makefile b/driver/level3/Makefile
index 4c004ee80..d62921e84 100644
--- a/driver/level3/Makefile
+++ b/driver/level3/Makefile
@@ -1,12 +1,14 @@
TOPDIR = ../..
include ../../Makefile.system
+USE_GEMM3M = 0
+
ifeq ($(ARCH), x86)
-USE_GEMM3M = 1
+USE_GEMM3M = 0
endif
ifeq ($(ARCH), x86_64)
-USE_GEMM3M = 1
+USE_GEMM3M = 0
endif
ifeq ($(ARCH), ia64)
@@ -168,7 +170,7 @@ XBLASOBJS += \
xher2k_kernel_UN.$(SUFFIX) xher2k_kernel_UC.$(SUFFIX) \
xher2k_kernel_LN.$(SUFFIX) xher2k_kernel_LC.$(SUFFIX)
-ifdef USE_GEMM3M
+ifeq ($(USE_GEMM3M), 1)
CBLASOBJS += \
cgemm3m_nn.$(SUFFIX) cgemm3m_cn.$(SUFFIX) cgemm3m_tn.$(SUFFIX) cgemm3m_nc.$(SUFFIX) \
@@ -239,7 +241,7 @@ CBLASOBJS += cherk_thread_UN.$(SUFFIX) cherk_thread_UC.$(SUFFIX) cherk_thread
ZBLASOBJS += zherk_thread_UN.$(SUFFIX) zherk_thread_UC.$(SUFFIX) zherk_thread_LN.$(SUFFIX) zherk_thread_LC.$(SUFFIX)
XBLASOBJS += xherk_thread_UN.$(SUFFIX) xherk_thread_UC.$(SUFFIX) xherk_thread_LN.$(SUFFIX) xherk_thread_LC.$(SUFFIX)
-ifdef USE_GEMM3M
+ifeq ($(USE_GEMM3M), 1)
CBLASOBJS += cgemm3m_thread_nn.$(SUFFIX) cgemm3m_thread_nt.$(SUFFIX) cgemm3m_thread_nr.$(SUFFIX) cgemm3m_thread_nc.$(SUFFIX)
CBLASOBJS += cgemm3m_thread_tn.$(SUFFIX) cgemm3m_thread_tt.$(SUFFIX) cgemm3m_thread_tr.$(SUFFIX) cgemm3m_thread_tc.$(SUFFIX)
diff --git a/driver/others/openblas_get_config.c b/driver/others/openblas_get_config.c
index d8da2e398..0fecbf951 100644
--- a/driver/others/openblas_get_config.c
+++ b/driver/others/openblas_get_config.c
@@ -32,6 +32,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h"
+#include <string.h>
+
static char* openblas_config_str=""
#ifdef USE64BITINT
"USE64BITINT "
@@ -51,9 +53,32 @@ static char* openblas_config_str=""
#ifdef NO_AFFINITY
"NO_AFFINITY "
#endif
+#ifndef DYNAMIC_ARCH
+ CHAR_CORENAME
+#endif
;
+#ifdef DYNAMIC_ARCH
+char *gotoblas_corename();
+static char tmp_config_str[256];
+#endif
+
+
char* CNAME() {
+#ifndef DYNAMIC_ARCH
return openblas_config_str;
+#else
+ strcpy(tmp_config_str, openblas_config_str);
+ strcat(tmp_config_str, gotoblas_corename());
+ return tmp_config_str;
+#endif
}
+
+char* openblas_get_corename() {
+#ifndef DYNAMIC_ARCH
+ return CHAR_CORENAME;
+#else
+ return gotoblas_corename();
+#endif
+}
diff --git a/driver/others/parameter.c b/driver/others/parameter.c
index 12787403e..a0a8b5188 100644
--- a/driver/others/parameter.c
+++ b/driver/others/parameter.c
@@ -165,7 +165,8 @@ int get_L2_size(void){
#if defined(ATHLON) || defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) || \
defined(CORE_PRESCOTT) || defined(CORE_CORE2) || defined(PENRYN) || defined(DUNNINGTON) || \
- defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC)
+ defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC) || \
+ defined(PILEDRIVER) || defined(HASWELL)
cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
diff --git a/exports/gensymbol b/exports/gensymbol
index 6c21de455..0769ae0f3 100644
--- a/exports/gensymbol
+++ b/exports/gensymbol
@@ -73,7 +73,7 @@
);
@gemm3mobjs = (
- zgemm3m, cgemm3m, zsymm3m, csymm3m, zhemm3m, chemm3m,
+
);
@@ -85,6 +85,7 @@
@misc_no_underscore_objs = (
goto_set_num_threads,
openblas_get_config,
+ openblas_get_corename,
);
@misc_underscore_objs = (
diff --git a/getarch.c b/getarch.c
index 234f7e172..3e9914259 100644
--- a/getarch.c
+++ b/getarch.c
@@ -952,6 +952,15 @@ int main(int argc, char *argv[]){
#else
get_cpuconfig();
#endif
+
+#ifdef FORCE
+ printf("#define CHAR_CORENAME \"%s\"\n", CORENAME);
+#else
+#if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) || defined(__arm__)
+ printf("#define CHAR_CORENAME \"%s\"\n", get_corename());
+#endif
+#endif
+
break;
case '2' : /* SMP */
diff --git a/interface/Makefile b/interface/Makefile
index 465d722b0..a24702630 100644
--- a/interface/Makefile
+++ b/interface/Makefile
@@ -1,6 +1,8 @@
TOPDIR = ..
include $(TOPDIR)/Makefile.system
+SUPPORT_GEMM3M = 0
+
ifeq ($(ARCH), x86)
SUPPORT_GEMM3M = 0
endif
@@ -124,7 +126,7 @@ ZBLAS3OBJS = \
zhemm.$(SUFFIX) zherk.$(SUFFIX) zher2k.$(SUFFIX) \
zomatcopy.$(SUFFIX) zimatcopy.$(SUFFIX)
-ifdef SUPPORT_GEMM3M
+ifeq ($(SUPPORT_GEMM3M), 1)
CBLAS3OBJS += cgemm3m.$(SUFFIX) csymm3m.$(SUFFIX) chemm3m.$(SUFFIX)
@@ -182,7 +184,7 @@ XBLAS3OBJS = \
xtrsm.$(SUFFIX) xsyrk.$(SUFFIX) xsyr2k.$(SUFFIX) \
xhemm.$(SUFFIX) xherk.$(SUFFIX) xher2k.$(SUFFIX)
-ifdef SUPPORT_GEMM3M
+ifeq ($(SUPPORT_GEMM3M), 1)
XBLAS3OBJS += xgemm3m.$(SUFFIX) xsymm3m.$(SUFFIX) xhemm3m.$(SUFFIX)
@@ -238,7 +240,7 @@ XBLAS3OBJS = \
xtrsm.$(SUFFIX) xsyrk.$(SUFFIX) xsyr2k.$(SUFFIX) \
xhemm.$(SUFFIX) xherk.$(SUFFIX) xher2k.$(SUFFIX)
-ifdef SUPPORT_GEMM3M
+ifeq ($(SUPPORT_GEMM3M), 1)
XBLAS3OBJS += xgemm3m.$(SUFFIX) xsymm3m.$(SUFFIX) xhemm3m.$(SUFFIX)
diff --git a/interface/ger.c b/interface/ger.c
index 7c9cd425f..9857d2423 100644
--- a/interface/ger.c
+++ b/interface/ger.c
@@ -42,6 +42,12 @@
#include "functable.h"
#endif
+#ifdef SMP
+#ifdef __64BIT__
+#define SMPTEST 1
+#endif
+#endif
+
#ifdef XDOUBLE
#define ERROR_NAME "QGER "
#elif defined DOUBLE
@@ -75,7 +81,7 @@ void NAME(blasint *M, blasint *N, FLOAT *Alpha,
blasint incy = *INCY;
blasint lda = *LDA;
FLOAT *buffer;
-#ifdef SMPBUG
+#ifdef SMPTEST
int nthreads;
#endif
@@ -107,7 +113,7 @@ void CNAME(enum CBLAS_ORDER order,
FLOAT *buffer;
blasint info, t;
-#ifdef SMPBUG
+#ifdef SMPTEST
int nthreads;
#endif
@@ -167,7 +173,7 @@ void CNAME(enum CBLAS_ORDER order,
buffer = (FLOAT *)blas_memory_alloc(1);
-#ifdef SMPBUG
+#ifdef SMPTEST
nthreads = num_cpu_avail(2);
@@ -176,7 +182,7 @@ void CNAME(enum CBLAS_ORDER order,
GER(m, n, 0, alpha, x, incx, y, incy, a, lda, buffer);
-#ifdef SMPBUG
+#ifdef SMPTEST
} else {
GER_THREAD(m, n, alpha, x, incx, y, incy, a, lda, buffer, nthreads);
diff --git a/interface/sbmv.c b/interface/sbmv.c
index 0dac736cb..761a9a0d0 100644
--- a/interface/sbmv.c
+++ b/interface/sbmv.c
@@ -43,6 +43,14 @@
#include "functable.h"
#endif
+/*
+#ifdef SMP
+#ifdef __64BIT__
+#define SMPTEST 1
+#endif
+#endif
+*/
+
#ifdef XDOUBLE
#define ERROR_NAME "QSBMV "
#elif defined(DOUBLE)
@@ -61,7 +69,7 @@ static int (*sbmv[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLA
#endif
};
-#ifdef SMPBUG
+#ifdef SMPTEST
static int (*sbmv_thread[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = {
#ifdef XDOUBLE
qsbmv_thread_U, qsbmv_thread_L,
@@ -90,7 +98,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint *
blasint info;
int uplo;
FLOAT *buffer;
-#ifdef SMPBUG
+#ifdef SMPTEST
int nthreads;
#endif
@@ -130,7 +138,7 @@ void CNAME(enum CBLAS_ORDER order,
FLOAT *buffer;
int uplo;
blasint info;
-#ifdef SMPBUG
+#ifdef SMPTEST
int nthreads;
#endif
@@ -189,7 +197,7 @@ void CNAME(enum CBLAS_ORDER order,
buffer = (FLOAT *)blas_memory_alloc(1);
-#ifdef SMPBUG
+#ifdef SMPTEST
nthreads = num_cpu_avail(2);
if (nthreads == 1) {
@@ -197,7 +205,7 @@ void CNAME(enum CBLAS_ORDER order,
(sbmv[uplo])(n, k, alpha, a, lda, x, incx, y, incy, buffer);
-#ifdef SMPBUG
+#ifdef SMPTEST
} else {
(sbmv_thread[uplo])(n, k, alpha, a, lda, x, incx, y, incy, buffer, nthreads);
diff --git a/interface/zger.c b/interface/zger.c
index cefc839c2..f46a462e2 100644
--- a/interface/zger.c
+++ b/interface/zger.c
@@ -42,6 +42,13 @@
#include "functable.h"
#endif
+#ifdef SMP
+#ifdef __64BIT__
+#define SMPTEST 1
+#endif
+#endif
+
+
#ifdef XDOUBLE
#ifndef CONJ
#define ERROR_NAME "XGERU "
@@ -109,7 +116,7 @@ void NAME(blasint *M, blasint *N, FLOAT *Alpha,
blasint incy = *INCY;
blasint lda = *LDA;
FLOAT *buffer;
-#ifdef SMPBUG
+#ifdef SMPTEST
int nthreads;
#endif
@@ -144,7 +151,7 @@ void CNAME(enum CBLAS_ORDER order,
FLOAT *buffer;
blasint info, t;
-#ifdef SMPBUG
+#ifdef SMPTEST
int nthreads;
#endif
@@ -205,7 +212,7 @@ void CNAME(enum CBLAS_ORDER order,
buffer = (FLOAT *)blas_memory_alloc(1);
-#ifdef SMPBUG
+#ifdef SMPTEST
nthreads = num_cpu_avail(2);
if (nthreads == 1) {
@@ -221,7 +228,7 @@ void CNAME(enum CBLAS_ORDER order,
}
#endif
-#ifdef SMPBUG
+#ifdef SMPTEST
} else {
diff --git a/interface/zsbmv.c b/interface/zsbmv.c
index 2efe85ba9..b71d4c519 100644
--- a/interface/zsbmv.c
+++ b/interface/zsbmv.c
@@ -43,6 +43,14 @@
#include "functable.h"
#endif
+/*
+#ifdef SMP
+#ifdef __64BIT__
+#define SMPTEST 1
+#endif
+#endif
+*/
+
#ifdef XDOUBLE
#define ERROR_NAME "XSBMV "
#elif defined(DOUBLE)
@@ -61,7 +69,7 @@ static int (*sbmv[])(BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, FLOAT
#endif
};
-#ifdef SMPBUG
+#ifdef SMPTEST
static int (*sbmv_thread[])(BLASLONG, BLASLONG, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = {
#ifdef XDOUBLE
xsbmv_thread_U, xsbmv_thread_L,
@@ -90,7 +98,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint *
blasint info;
int uplo;
FLOAT *buffer;
-#ifdef SMPBUG
+#ifdef SMPTEST
int nthreads;
#endif
@@ -131,7 +139,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint *
buffer = (FLOAT *)blas_memory_alloc(1);
-#ifdef SMPBUG
+#ifdef SMPTEST
nthreads = num_cpu_avail(2);
if (nthreads == 1) {
@@ -139,7 +147,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint *
(sbmv[uplo])(n, k, alpha_r, alpha_i, a, lda, b, incx, c, incy, buffer);
-#ifdef SMPBUG
+#ifdef SMPTEST
} else {
(sbmv_thread[uplo])(n, k, ALPHA, a, lda, b, incx, c, incy, buffer, nthreads);
diff --git a/kernel/Makefile.L3 b/kernel/Makefile.L3
index be78dfc3d..268177c0f 100644
--- a/kernel/Makefile.L3
+++ b/kernel/Makefile.L3
@@ -1,3 +1,5 @@
+USE_GEMM3M = 0
+
ifeq ($(ARCH), x86)
USE_GEMM3M = 1
endif
@@ -122,7 +124,7 @@ XBLASOBJS += \
xtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) xtrsm_kernel_RT$(TSUFFIX).$(SUFFIX) \
xtrsm_kernel_RR$(TSUFFIX).$(SUFFIX) xtrsm_kernel_RC$(TSUFFIX).$(SUFFIX) \
-ifdef USE_GEMM3M
+ifeq ($(USE_GEMM3M), 1)
CBLASOBJS += cgemm3m_kernel$(TSUFFIX).$(SUFFIX)
ZBLASOBJS += zgemm3m_kernel$(TSUFFIX).$(SUFFIX)
@@ -256,7 +258,7 @@ XBLASOBJS += \
xhemm_iutcopy$(TSUFFIX).$(SUFFIX) xhemm_iltcopy$(TSUFFIX).$(SUFFIX) \
xhemm_outcopy$(TSUFFIX).$(SUFFIX) xhemm_oltcopy$(TSUFFIX).$(SUFFIX)
-ifdef USE_GEMM3M
+ifeq ($(USE_GEMM3M), 1)
CBLASOBJS += \
cgemm3m_incopyb$(TSUFFIX).$(SUFFIX) cgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) \
diff --git a/kernel/generic/dot.c b/kernel/generic/dot.c
new file mode 100644
index 000000000..bc07bc78f
--- /dev/null
+++ b/kernel/generic/dot.c
@@ -0,0 +1,104 @@
+/***************************************************************************
+Copyright (c) 2014, The OpenBLAS Project
+All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in
+the documentation and/or other materials provided with the
+distribution.
+3. Neither the name of the OpenBLAS project nor the names of
+its contributors may be used to endorse or promote products
+derived from this software without specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*****************************************************************************/
+
+
+#include "common.h"
+
+#if defined(DSDOT)
+double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
+#else
+FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
+#endif
+{
+ BLASLONG i=0;
+ BLASLONG ix=0,iy=0;
+
+#if defined(DSDOT)
+ double dot = 0.0 ;
+#else
+ FLOAT dot = 0.0 ;
+#endif
+
+ if ( n < 0 ) return(dot);
+
+ if ( (inc_x == 1) && (inc_y == 1) )
+ {
+
+ int n1 = n & -4;
+
+ while(i < n1)
+ {
+
+#if defined(DSDOT)
+ dot += (double) y[i] * (double) x[i]
+ + (double) y[i+1] * (double) x[i+1]
+ + (double) y[i+2] * (double) x[i+2]
+ + (double) y[i+3] * (double) x[i+3] ;
+#else
+ dot += y[i] * x[i]
+ + y[i+1] * x[i+1]
+ + y[i+2] * x[i+2]
+ + y[i+3] * x[i+3] ;
+#endif
+ i+=4 ;
+
+ }
+
+ while(i < n)
+ {
+
+#if defined(DSDOT)
+ dot += (double) y[i] * (double) x[i] ;
+#else
+ dot += y[i] * x[i] ;
+#endif
+ i++ ;
+
+ }
+ return(dot);
+
+
+ }
+
+ while(i < n)
+ {
+
+#if defined(DSDOT)
+ dot += (double) y[iy] * (double) x[ix] ;
+#else
+ dot += y[iy] * x[ix] ;
+#endif
+ ix += inc_x ;
+ iy += inc_y ;
+ i++ ;
+
+ }
+ return(dot);
+
+}
+
+
diff --git a/kernel/setparam-ref.c b/kernel/setparam-ref.c
index 806c1928c..5086420c1 100644
--- a/kernel/setparam-ref.c
+++ b/kernel/setparam-ref.c
@@ -714,13 +714,13 @@ static void init_parameter(void) {
fprintf(stderr, "Core2\n");
#endif
- TABLE_NAME.sgemm_p = 92 * (l2 >> 9);
- TABLE_NAME.dgemm_p = 46 * (l2 >> 9);
- TABLE_NAME.cgemm_p = 46 * (l2 >> 9);
- TABLE_NAME.zgemm_p = 23 * (l2 >> 9);
+ TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8;
+ TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8;
+ TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4;
+ TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4;
#ifdef EXPRECISION
- TABLE_NAME.qgemm_p = 92 * (l2 >> 9);
- TABLE_NAME.xgemm_p = 46 * (l2 >> 9);
+ TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8;
+ TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4;
#endif
#endif
@@ -740,6 +740,23 @@ static void init_parameter(void) {
#endif
#endif
+#ifdef DUNNINGTON
+
+#ifdef DEBUG
+ fprintf(stderr, "Dunnington\n");
+#endif
+
+ TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
+ TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
+ TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
+ TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
+#ifdef EXPRECISION
+ TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
+ TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
+#endif
+#endif
+
+
#ifdef NEHALEM
#ifdef DEBUG
diff --git a/kernel/x86_64/KERNEL b/kernel/x86_64/KERNEL
index fa6282c53..ec21826d7 100644
--- a/kernel/x86_64/KERNEL
+++ b/kernel/x86_64/KERNEL
@@ -119,15 +119,13 @@ XCOPYKERNEL = zcopy.S
endif
ifndef SDOTKERNEL
-SDOTKERNEL = dot_sse.S
+SDOTKERNEL = ../generic/dot.c
endif
-
ifndef DSDOTKERNEL
-DSDOTKERNEL = ../arm/dot.c
+DSDOTKERNEL = ../generic/dot.c
endif
-
ifndef DDOTKERNEL
DDOTKERNEL = dot_sse2.S
endif
diff --git a/kernel/x86_64/KERNEL.BULLDOZER b/kernel/x86_64/KERNEL.BULLDOZER
index d9b9f84f3..6e09813c3 100644
--- a/kernel/x86_64/KERNEL.BULLDOZER
+++ b/kernel/x86_64/KERNEL.BULLDOZER
@@ -6,7 +6,6 @@ ZGEMVTKERNEL = zgemv_t.S
DGEMVNKERNEL = dgemv_n_bulldozer.S
DGEMVTKERNEL = dgemv_t_bulldozer.S
-DAXPYKERNEL = daxpy_bulldozer.S
DDOTKERNEL = ddot_bulldozer.S
DCOPYKERNEL = dcopy_bulldozer.S
diff --git a/kernel/x86_64/KERNEL.PILEDRIVER b/kernel/x86_64/KERNEL.PILEDRIVER
index b083b5d53..e4ac0d895 100644
--- a/kernel/x86_64/KERNEL.PILEDRIVER
+++ b/kernel/x86_64/KERNEL.PILEDRIVER
@@ -6,7 +6,6 @@ ZGEMVTKERNEL = zgemv_t.S
DGEMVNKERNEL = dgemv_n_bulldozer.S
DGEMVTKERNEL = dgemv_t_bulldozer.S
-DAXPYKERNEL = daxpy_bulldozer.S
DDOTKERNEL = ddot_bulldozer.S
DCOPYKERNEL = dcopy_bulldozer.S
diff --git a/kernel/x86_64/KERNEL.PRESCOTT b/kernel/x86_64/KERNEL.PRESCOTT
index 9b3e514d1..0ea43ad7b 100644
--- a/kernel/x86_64/KERNEL.PRESCOTT
+++ b/kernel/x86_64/KERNEL.PRESCOTT
@@ -19,7 +19,7 @@ DGEMMINCOPYOBJ =
DGEMMITCOPYOBJ =
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
-CGEMMKERNEL = zgemm_kernel_4x2_sse3.S
+CGEMMKERNEL = zgemm_kernel_4x2_sse.S
CGEMMINCOPY = ../generic/zgemm_ncopy_4.c
CGEMMITCOPY = ../generic/zgemm_tcopy_4.c
CGEMMONCOPY = zgemm_ncopy_2.S
diff --git a/make.inc b/make.inc
index da430b7d6..485cb7d48 100644
--- a/make.inc
+++ b/make.inc
@@ -1,7 +1,6 @@
SHELL = /bin/sh
PLAT = _LINUX
DRVOPTS = $(OPTS)
-LOADER = $(FORTRAN)
ARCHFLAGS= -ru
#RANLIB = ranlib
diff --git a/utest/Makefile b/utest/Makefile
index 31cb93176..fa05458cc 100644
--- a/utest/Makefile
+++ b/utest/Makefile
@@ -1,15 +1,19 @@
UTEST_CHECK = 1
TOPDIR = ..
-include $(TOPDIR)/Makefile.system
TARGET=openblas_utest
+.PHONY : all
+.NOTPARALLEL : all run_test $(TARGET)
+
CUNIT_URL=http://downloads.sourceforge.net/project/cunit/CUnit/2.1-2/CUnit-2.1-2-src.tar.bz2
CUNIT_DIR=$(CURDIR)/CUnit-2.1-2
CUNIT_LIB=$(CUNIT_DIR)/lib/libcunit.a
-CFLAGS+=-I$(CUNIT_DIR)/include
+CFLAGS +=-I$(CUNIT_DIR)/include
+
+include $(TOPDIR)/Makefile.system
OBJS=main.o test_rot.o test_swap.o test_axpy.o test_dotu.o test_rotmg.o test_dsdot.o test_amax.o test_fork.o