summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile.L36
-rw-r--r--kernel/generic/dot.c104
-rw-r--r--kernel/setparam-ref.c29
-rw-r--r--kernel/x86_64/KERNEL6
-rw-r--r--kernel/x86_64/KERNEL.BULLDOZER1
-rw-r--r--kernel/x86_64/KERNEL.PILEDRIVER1
-rw-r--r--kernel/x86_64/KERNEL.PRESCOTT2
7 files changed, 134 insertions, 15 deletions
diff --git a/kernel/Makefile.L3 b/kernel/Makefile.L3
index be78dfc3d..268177c0f 100644
--- a/kernel/Makefile.L3
+++ b/kernel/Makefile.L3
@@ -1,3 +1,5 @@
+USE_GEMM3M = 0
+
ifeq ($(ARCH), x86)
USE_GEMM3M = 1
endif
@@ -122,7 +124,7 @@ XBLASOBJS += \
xtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) xtrsm_kernel_RT$(TSUFFIX).$(SUFFIX) \
xtrsm_kernel_RR$(TSUFFIX).$(SUFFIX) xtrsm_kernel_RC$(TSUFFIX).$(SUFFIX) \
-ifdef USE_GEMM3M
+ifeq ($(USE_GEMM3M), 1)
CBLASOBJS += cgemm3m_kernel$(TSUFFIX).$(SUFFIX)
ZBLASOBJS += zgemm3m_kernel$(TSUFFIX).$(SUFFIX)
@@ -256,7 +258,7 @@ XBLASOBJS += \
xhemm_iutcopy$(TSUFFIX).$(SUFFIX) xhemm_iltcopy$(TSUFFIX).$(SUFFIX) \
xhemm_outcopy$(TSUFFIX).$(SUFFIX) xhemm_oltcopy$(TSUFFIX).$(SUFFIX)
-ifdef USE_GEMM3M
+ifeq ($(USE_GEMM3M), 1)
CBLASOBJS += \
cgemm3m_incopyb$(TSUFFIX).$(SUFFIX) cgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) \
diff --git a/kernel/generic/dot.c b/kernel/generic/dot.c
new file mode 100644
index 000000000..bc07bc78f
--- /dev/null
+++ b/kernel/generic/dot.c
@@ -0,0 +1,104 @@
+/***************************************************************************
+Copyright (c) 2014, The OpenBLAS Project
+All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in
+the documentation and/or other materials provided with the
+distribution.
+3. Neither the name of the OpenBLAS project nor the names of
+its contributors may be used to endorse or promote products
+derived from this software without specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*****************************************************************************/
+
+
+#include "common.h"
+
+#if defined(DSDOT)
+double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
+#else
+FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
+#endif
+{
+ BLASLONG i=0;
+ BLASLONG ix=0,iy=0;
+
+#if defined(DSDOT)
+ double dot = 0.0 ;
+#else
+ FLOAT dot = 0.0 ;
+#endif
+
+ if ( n < 0 ) return(dot);
+
+ if ( (inc_x == 1) && (inc_y == 1) )
+ {
+
+ int n1 = n & -4;
+
+ while(i < n1)
+ {
+
+#if defined(DSDOT)
+ dot += (double) y[i] * (double) x[i]
+ + (double) y[i+1] * (double) x[i+1]
+ + (double) y[i+2] * (double) x[i+2]
+ + (double) y[i+3] * (double) x[i+3] ;
+#else
+ dot += y[i] * x[i]
+ + y[i+1] * x[i+1]
+ + y[i+2] * x[i+2]
+ + y[i+3] * x[i+3] ;
+#endif
+ i+=4 ;
+
+ }
+
+ while(i < n)
+ {
+
+#if defined(DSDOT)
+ dot += (double) y[i] * (double) x[i] ;
+#else
+ dot += y[i] * x[i] ;
+#endif
+ i++ ;
+
+ }
+ return(dot);
+
+
+ }
+
+ while(i < n)
+ {
+
+#if defined(DSDOT)
+ dot += (double) y[iy] * (double) x[ix] ;
+#else
+ dot += y[iy] * x[ix] ;
+#endif
+ ix += inc_x ;
+ iy += inc_y ;
+ i++ ;
+
+ }
+ return(dot);
+
+}
+
+
diff --git a/kernel/setparam-ref.c b/kernel/setparam-ref.c
index 806c1928c..5086420c1 100644
--- a/kernel/setparam-ref.c
+++ b/kernel/setparam-ref.c
@@ -714,13 +714,13 @@ static void init_parameter(void) {
fprintf(stderr, "Core2\n");
#endif
- TABLE_NAME.sgemm_p = 92 * (l2 >> 9);
- TABLE_NAME.dgemm_p = 46 * (l2 >> 9);
- TABLE_NAME.cgemm_p = 46 * (l2 >> 9);
- TABLE_NAME.zgemm_p = 23 * (l2 >> 9);
+ TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8;
+ TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8;
+ TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4;
+ TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4;
#ifdef EXPRECISION
- TABLE_NAME.qgemm_p = 92 * (l2 >> 9);
- TABLE_NAME.xgemm_p = 46 * (l2 >> 9);
+ TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8;
+ TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4;
#endif
#endif
@@ -740,6 +740,23 @@ static void init_parameter(void) {
#endif
#endif
+#ifdef DUNNINGTON
+
+#ifdef DEBUG
+ fprintf(stderr, "Dunnington\n");
+#endif
+
+ TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
+ TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
+ TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
+ TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
+#ifdef EXPRECISION
+ TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
+ TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
+#endif
+#endif
+
+
#ifdef NEHALEM
#ifdef DEBUG
diff --git a/kernel/x86_64/KERNEL b/kernel/x86_64/KERNEL
index fa6282c53..ec21826d7 100644
--- a/kernel/x86_64/KERNEL
+++ b/kernel/x86_64/KERNEL
@@ -119,15 +119,13 @@ XCOPYKERNEL = zcopy.S
endif
ifndef SDOTKERNEL
-SDOTKERNEL = dot_sse.S
+SDOTKERNEL = ../generic/dot.c
endif
-
ifndef DSDOTKERNEL
-DSDOTKERNEL = ../arm/dot.c
+DSDOTKERNEL = ../generic/dot.c
endif
-
ifndef DDOTKERNEL
DDOTKERNEL = dot_sse2.S
endif
diff --git a/kernel/x86_64/KERNEL.BULLDOZER b/kernel/x86_64/KERNEL.BULLDOZER
index d9b9f84f3..6e09813c3 100644
--- a/kernel/x86_64/KERNEL.BULLDOZER
+++ b/kernel/x86_64/KERNEL.BULLDOZER
@@ -6,7 +6,6 @@ ZGEMVTKERNEL = zgemv_t.S
DGEMVNKERNEL = dgemv_n_bulldozer.S
DGEMVTKERNEL = dgemv_t_bulldozer.S
-DAXPYKERNEL = daxpy_bulldozer.S
DDOTKERNEL = ddot_bulldozer.S
DCOPYKERNEL = dcopy_bulldozer.S
diff --git a/kernel/x86_64/KERNEL.PILEDRIVER b/kernel/x86_64/KERNEL.PILEDRIVER
index b083b5d53..e4ac0d895 100644
--- a/kernel/x86_64/KERNEL.PILEDRIVER
+++ b/kernel/x86_64/KERNEL.PILEDRIVER
@@ -6,7 +6,6 @@ ZGEMVTKERNEL = zgemv_t.S
DGEMVNKERNEL = dgemv_n_bulldozer.S
DGEMVTKERNEL = dgemv_t_bulldozer.S
-DAXPYKERNEL = daxpy_bulldozer.S
DDOTKERNEL = ddot_bulldozer.S
DCOPYKERNEL = dcopy_bulldozer.S
diff --git a/kernel/x86_64/KERNEL.PRESCOTT b/kernel/x86_64/KERNEL.PRESCOTT
index 9b3e514d1..0ea43ad7b 100644
--- a/kernel/x86_64/KERNEL.PRESCOTT
+++ b/kernel/x86_64/KERNEL.PRESCOTT
@@ -19,7 +19,7 @@ DGEMMINCOPYOBJ =
DGEMMITCOPYOBJ =
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
-CGEMMKERNEL = zgemm_kernel_4x2_sse3.S
+CGEMMKERNEL = zgemm_kernel_4x2_sse.S
CGEMMINCOPY = ../generic/zgemm_ncopy_4.c
CGEMMITCOPY = ../generic/zgemm_tcopy_4.c
CGEMMONCOPY = zgemm_ncopy_2.S