summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenis Steckelmacher <steckdenis@yahoo.fr>2017-03-19 15:32:50 +0100
committerDenis Steckelmacher <steckdenis@yahoo.fr>2017-03-19 15:32:50 +0100
commitc9ff735da6deafa25a0a10f25b9fcf77195fb055 (patch)
tree4d23f47c1bff360424e07692f3bc7a932f944b71
parent99880f79068fc12b3025840671a838f0d4be3c9e (diff)
downloadopenblas-c9ff735da6deafa25a0a10f25b9fcf77195fb055.tar.gz
openblas-c9ff735da6deafa25a0a10f25b9fcf77195fb055.tar.bz2
openblas-c9ff735da6deafa25a0a10f25b9fcf77195fb055.zip
Add ZEN support (tested for auto-detected static backend)
-rw-r--r--Makefile.system8
-rw-r--r--TargetList.txt1
-rw-r--r--cmake/arch.cmake2
-rw-r--r--cmake/system.cmake2
-rw-r--r--cpuid.h2
-rw-r--r--cpuid_x86.c51
-rw-r--r--driver/others/dynamic.c35
-rw-r--r--driver/others/parameter.c4
-rw-r--r--getarch.c19
-rw-r--r--kernel/CMakeLists.txt2
-rw-r--r--kernel/Makefile.L34
-rw-r--r--kernel/setparam-ref.c16
-rw-r--r--kernel/x86/KERNEL.ZEN1
-rw-r--r--kernel/x86_64/KERNEL.ZEN98
-rw-r--r--kernel/x86_64/caxpy.c2
-rw-r--r--kernel/x86_64/cdot.c2
-rw-r--r--kernel/x86_64/cgemv_n_4.c2
-rw-r--r--kernel/x86_64/cgemv_t_4.c2
-rw-r--r--kernel/x86_64/cscal.c2
-rw-r--r--kernel/x86_64/daxpy.c2
-rw-r--r--kernel/x86_64/ddot.c2
-rw-r--r--kernel/x86_64/dgemv_n_4.c2
-rw-r--r--kernel/x86_64/dgemv_t_4.c2
-rw-r--r--kernel/x86_64/dscal.c2
-rw-r--r--kernel/x86_64/dsymv_L.c2
-rw-r--r--kernel/x86_64/dsymv_U.c2
-rw-r--r--kernel/x86_64/saxpy.c2
-rw-r--r--kernel/x86_64/sdot.c2
-rw-r--r--kernel/x86_64/sgemv_n_4.c2
-rw-r--r--kernel/x86_64/sgemv_t_4.c2
-rw-r--r--kernel/x86_64/ssymv_L.c2
-rw-r--r--kernel/x86_64/ssymv_U.c2
-rw-r--r--kernel/x86_64/symv_L_sse.S2
-rw-r--r--kernel/x86_64/symv_L_sse2.S2
-rw-r--r--kernel/x86_64/symv_U_sse.S2
-rw-r--r--kernel/x86_64/symv_U_sse2.S2
-rw-r--r--kernel/x86_64/zaxpy.c2
-rw-r--r--kernel/x86_64/zdot.c2
-rw-r--r--kernel/x86_64/zgemv_n_4.c2
-rw-r--r--kernel/x86_64/zgemv_t_4.c2
-rw-r--r--kernel/x86_64/zscal.c2
-rw-r--r--kernel/x86_64/zsymv_L_sse.S2
-rw-r--r--kernel/x86_64/zsymv_L_sse2.S2
-rw-r--r--kernel/x86_64/zsymv_U_sse.S2
-rw-r--r--kernel/x86_64/zsymv_U_sse2.S2
-rw-r--r--param.h90
46 files changed, 339 insertions, 58 deletions
diff --git a/Makefile.system b/Makefile.system
index e8cc29686..207c9961a 100644
--- a/Makefile.system
+++ b/Makefile.system
@@ -68,6 +68,9 @@ endif
ifeq ($(TARGET), EXCAVATOR)
GETARCH_FLAGS := -DFORCE_BARCELONA
endif
+ifeq ($(TARGET), ZEN)
+GETARCH_FLAGS := -DFORCE_BARCELONA
+endif
endif
@@ -98,6 +101,9 @@ endif
ifeq ($(TARGET_CORE), EXCAVATOR)
GETARCH_FLAGS := -DFORCE_BARCELONA
endif
+ifeq ($(TARGET_CORE), ZEN)
+GETARCH_FLAGS := -DFORCE_BARCELONA
+endif
endif
@@ -443,7 +449,7 @@ ifneq ($(NO_AVX), 1)
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER EXCAVATOR
endif
ifneq ($(NO_AVX2), 1)
-DYNAMIC_CORE += HASWELL
+DYNAMIC_CORE += HASWELL ZEN
endif
endif
diff --git a/TargetList.txt b/TargetList.txt
index 9d3355428..743996e94 100644
--- a/TargetList.txt
+++ b/TargetList.txt
@@ -34,6 +34,7 @@ BULLDOZER
PILEDRIVER
STEAMROLLER
EXCAVATOR
+ZEN
c)VIA CPU:
SSE_GENERIC
diff --git a/cmake/arch.cmake b/cmake/arch.cmake
index 0f66a98ca..d32d4fc24 100644
--- a/cmake/arch.cmake
+++ b/cmake/arch.cmake
@@ -73,7 +73,7 @@ if (DYNAMIC_ARCH)
set(DYNAMIC_CORE "${DYNAMIC_CORE} SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER")
endif ()
if (NOT NO_AVX2)
- set(DYNAMIC_CORE "${DYNAMIC_CORE} HASWELL")
+ set(DYNAMIC_CORE "${DYNAMIC_CORE} HASWELL ZEN")
endif ()
endif ()
diff --git a/cmake/system.cmake b/cmake/system.cmake
index aa046a56a..ee64f85b4 100644
--- a/cmake/system.cmake
+++ b/cmake/system.cmake
@@ -22,7 +22,7 @@ if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32)
if (${TARGET} STREQUAL "HASWELL" OR ${TARGET} STREQUAL "SANDYBRIDGE")
set(TARGET "NEHALEM")
endif ()
- if (${TARGET} STREQUAL "BULLDOZER" OR ${TARGET} STREQUAL "PILEDRIVER")
+ if (${TARGET} STREQUAL "BULLDOZER" OR ${TARGET} STREQUAL "PILEDRIVER" OR ${TARGET} STREQUAL "ZEN")
set(TARGET "BARCELONA")
endif ()
endif ()
diff --git a/cpuid.h b/cpuid.h
index e9bd2d016..1dacc49ba 100644
--- a/cpuid.h
+++ b/cpuid.h
@@ -114,6 +114,7 @@
#define CORE_HASWELL 24
#define CORE_STEAMROLLER 25
#define CORE_EXCAVATOR 26
+#define CORE_ZEN 27
#define HAVE_SSE (1 << 0)
#define HAVE_SSE2 (1 << 1)
@@ -209,5 +210,6 @@ typedef struct {
#define CPUTYPE_HASWELL 48
#define CPUTYPE_STEAMROLLER 49
#define CPUTYPE_EXCAVATOR 50
+#define CPUTYPE_ZEN 51
#endif
diff --git a/cpuid_x86.c b/cpuid_x86.c
index 814a79d4d..5a21debe0 100644
--- a/cpuid_x86.c
+++ b/cpuid_x86.c
@@ -1281,6 +1281,8 @@ int get_cpuname(void){
case 3:
case 10:
return CPUTYPE_BARCELONA;
+ case 5:
+ return CPUTYPE_BOBCAT;
case 6:
switch (model) {
case 1:
@@ -1295,8 +1297,8 @@ int get_cpuname(void){
return CPUTYPE_PILEDRIVER;
else
return CPUTYPE_BARCELONA; //OS don't support AVX.
- case 5: // New EXCAVATOR CPUS
- if(support_avx())
+ case 5: // New EXCAVATOR CPUS
+ if(support_avx())
return CPUTYPE_EXCAVATOR;
else
return CPUTYPE_BARCELONA; //OS don't support AVX.
@@ -1322,8 +1324,19 @@ int get_cpuname(void){
break;
}
break;
- case 5:
- return CPUTYPE_BOBCAT;
+ case 8:
+ switch (model) {
+ case 1:
+ // AMD Ryzen
+ if(support_avx())
+#ifndef NO_AVX2
+ return CPUTYPE_ZEN;
+#else
+ return CPUTYPE_SANDYBRIDGE; // Zen is closer in architecture to Sandy Bridge than to Excavator
+#endif
+ else
+ return CPUTYPE_BARCELONA;
+ }
}
break;
}
@@ -1450,6 +1463,7 @@ static char *cpuname[] = {
"HASWELL",
"STEAMROLLER",
"EXCAVATOR",
+ "ZEN",
};
static char *lowercpuname[] = {
@@ -1503,6 +1517,7 @@ static char *lowercpuname[] = {
"haswell",
"steamroller",
"excavator",
+ "zen",
};
static char *corename[] = {
@@ -1533,6 +1548,7 @@ static char *corename[] = {
"HASWELL",
"STEAMROLLER",
"EXCAVATOR",
+ "ZEN",
};
static char *corename_lower[] = {
@@ -1563,6 +1579,7 @@ static char *corename_lower[] = {
"haswell",
"steamroller",
"excavator",
+ "zen",
};
@@ -1776,15 +1793,16 @@ int get_coretype(void){
break;
case 9:
case 8:
- if (model == 14) // Kaby Lake
+ if (model == 14) { // Kaby Lake
if(support_avx())
#ifndef NO_AVX2
- return CORE_HASWELL;
+ return CORE_HASWELL;
#else
- return CORE_SANDYBRIDGE;
+ return CORE_SANDYBRIDGE;
#endif
else
return CORE_NEHALEM;
+ }
}
break;
@@ -1841,9 +1859,22 @@ int get_coretype(void){
}
break;
}
-
-
- }else return CORE_BARCELONA;
+ } else if (exfamily == 8) {
+ switch (model) {
+ case 1:
+ // AMD Ryzen
+ if(support_avx())
+#ifndef NO_AVX2
+ return CORE_ZEN;
+#else
+ return CORE_SANDYBRIDGE; // Zen is closer in architecture to Sandy Bridge than to Excavator
+#endif
+ else
+ return CORE_BARCELONA;
+ }
+ } else {
+ return CORE_BARCELONA;
+ }
}
}
diff --git a/driver/others/dynamic.c b/driver/others/dynamic.c
index 13efe20ac..54b18f245 100644
--- a/driver/others/dynamic.c
+++ b/driver/others/dynamic.c
@@ -70,8 +70,10 @@ extern gotoblas_t gotoblas_STEAMROLLER;
extern gotoblas_t gotoblas_EXCAVATOR;
#ifdef NO_AVX2
#define gotoblas_HASWELL gotoblas_SANDYBRIDGE
+#define gotoblas_ZEN gotoblas_SANDYBRIDGE
#else
extern gotoblas_t gotoblas_HASWELL;
+extern gotoblas_t gotoblas_ZEN;
#endif
#else
//Use NEHALEM kernels for sandy bridge
@@ -81,6 +83,7 @@ extern gotoblas_t gotoblas_HASWELL;
#define gotoblas_PILEDRIVER gotoblas_BARCELONA
#define gotoblas_STEAMROLLER gotoblas_BARCELONA
#define gotoblas_EXCAVATOR gotoblas_BARCELONA
+#define gotoblas_ZEN gotoblas_BARCELONA
#endif
@@ -355,14 +358,14 @@ static gotoblas_t *get_coretype(void){
openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK);
return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
}
- }else if(model == 5){
- if(support_avx())
- return &gotoblas_EXCAVATOR;
- else{
- openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK);
- return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
- }
- }else if(model == 0){
+ }else if(model == 5){
+ if(support_avx())
+ return &gotoblas_EXCAVATOR;
+ else{
+ openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK);
+ return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
+ }
+ }else if(model == 0){
if (exmodel == 1) {
//AMD Trinity
if(support_avx())
@@ -389,9 +392,16 @@ static gotoblas_t *get_coretype(void){
}
}
-
-
- } else {
+ } else if (family == 8) {
+ if (model == 1) {
+ if(support_avx())
+ return &gotoblas_ZEN;
+ else{
+ openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK);
+ return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
+ }
+ }
+ }else {
return &gotoblas_BARCELONA;
}
}
@@ -431,6 +441,7 @@ static char *corename[] = {
"Haswell",
"Steamroller",
"Excavator",
+ "Zen"
};
char *gotoblas_corename(void) {
@@ -457,6 +468,7 @@ char *gotoblas_corename(void) {
if (gotoblas == &gotoblas_HASWELL) return corename[20];
if (gotoblas == &gotoblas_STEAMROLLER) return corename[21];
if (gotoblas == &gotoblas_EXCAVATOR) return corename[22];
+ if (gotoblas == &gotoblas_ZEN) return corename[23];
return corename[0];
}
@@ -487,6 +499,7 @@ static gotoblas_t *force_coretype(char *coretype){
switch (found)
{
+ case 23: return (&gotoblas_ZEN);
case 22: return (&gotoblas_EXCAVATOR);
case 21: return (&gotoblas_STEAMROLLER);
case 20: return (&gotoblas_HASWELL);
diff --git a/driver/others/parameter.c b/driver/others/parameter.c
index c33c0eaa6..31a48644f 100644
--- a/driver/others/parameter.c
+++ b/driver/others/parameter.c
@@ -167,7 +167,7 @@ int get_L2_size(void){
#if defined(ATHLON) || defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) || \
defined(CORE_PRESCOTT) || defined(CORE_CORE2) || defined(PENRYN) || defined(DUNNINGTON) || \
defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC) || \
- defined(PILEDRIVER) || defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR)
+ defined(PILEDRIVER) || defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined(ZEN)
cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
@@ -251,7 +251,7 @@ int get_L2_size(void){
void blas_set_parameter(void){
int factor;
-#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(SANDYBRIDGE) || defined(NEHALEM) || defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR)
+#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(SANDYBRIDGE) || defined(NEHALEM) || defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined(ZEN)
int size = 16;
#else
int size = get_L2_size();
diff --git a/getarch.c b/getarch.c
index 897f272ad..728655b88 100644
--- a/getarch.c
+++ b/getarch.c
@@ -473,6 +473,25 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define CORENAME "EXCAVATOR"
#endif
+#if defined (FORCE_ZEN)
+#define FORCE
+#define FORCE_INTEL
+#define ARCHITECTURE "X86"
+#define SUBARCHITECTURE "ZEN"
+#define ARCHCONFIG "-DZEN " \
+ "-DL1_CODE_SIZE=32768 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=8 " \
+ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL2_CODE_ASSOCIATIVE=8 " \
+ "-DL2_SIZE=524288 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=8 " \
+ "-DL3_SIZE=16777216 -DL3_LINESIZE=64 -DL3_ASSOCIATIVE=8 " \
+ "-DITB_DEFAULT_ENTRIES=64 -DITB_SIZE=4096 " \
+ "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
+ "-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 " \
+ "-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU -DHAVE_CFLUSH " \
+ "-DHAVE_AVX -DHAVE_FMA3 -DFMA3"
+#define LIBNAME "excavator"
+#define CORENAME "EXCAVATOR"
+#endif
+
#ifdef FORCE_SSE_GENERIC
#define FORCE
diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt
index 17c2b1b89..8bfcccf17 100644
--- a/kernel/CMakeLists.txt
+++ b/kernel/CMakeLists.txt
@@ -118,7 +118,7 @@ endforeach ()
# Makefile.L3
set(USE_TRMM false)
-if (${ARCH} STREQUAL "arm" OR ${ARCH} STREQUAL "arm64" OR "${TARGET}" STREQUAL "LONGSOON3B" OR "${TARGET}" STREQUAL "GENERIC" OR "${CORE}" STREQUAL "generic" OR "${TARGET}" STREQUAL "HASWELL" OR "${CORE}" STREQUAL "haswell")
+if (${ARCH} STREQUAL "arm" OR ${ARCH} STREQUAL "arm64" OR "${TARGET}" STREQUAL "LONGSOON3B" OR "${TARGET}" STREQUAL "GENERIC" OR "${CORE}" STREQUAL "generic" OR "${TARGET}" STREQUAL "HASWELL" OR "${CORE}" STREQUAL "haswell" OR "{CORE}" STREQUAL "zen")
set(USE_TRMM true)
endif ()
diff --git a/kernel/Makefile.L3 b/kernel/Makefile.L3
index 86e692e5c..066426396 100644
--- a/kernel/Makefile.L3
+++ b/kernel/Makefile.L3
@@ -32,6 +32,10 @@ ifeq ($(CORE), HASWELL)
USE_TRMM = 1
endif
+ifeq ($(CORE), ZEN)
+USE_TRMM = 1
+endif
+
ifeq ($(CORE), POWER8)
USE_TRMM = 1
endif
diff --git a/kernel/setparam-ref.c b/kernel/setparam-ref.c
index a0a221695..9320cb56c 100644
--- a/kernel/setparam-ref.c
+++ b/kernel/setparam-ref.c
@@ -982,6 +982,22 @@ static void init_parameter(void) {
#endif
#endif
+#ifdef ZEN
+
+#ifdef DEBUG
+ fprintf(stderr, "Zen\n");
+#endif
+
+ TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
+ TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
+ TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
+ TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
+#ifdef EXPRECISION
+ TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
+ TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
+#endif
+#endif
+
#ifdef NANO
diff --git a/kernel/x86/KERNEL.ZEN b/kernel/x86/KERNEL.ZEN
new file mode 100644
index 000000000..d9abfc78a
--- /dev/null
+++ b/kernel/x86/KERNEL.ZEN
@@ -0,0 +1 @@
+include $(KERNELDIR)/KERNEL.BARCELONA
diff --git a/kernel/x86_64/KERNEL.ZEN b/kernel/x86_64/KERNEL.ZEN
new file mode 100644
index 000000000..f2e1374d3
--- /dev/null
+++ b/kernel/x86_64/KERNEL.ZEN
@@ -0,0 +1,98 @@
+DSCALKERNEL = dscal.c
+CSCALKERNEL = cscal.c
+ZSCALKERNEL = zscal.c
+
+SGEMVNKERNEL = sgemv_n_4.c
+SGEMVTKERNEL = sgemv_t_4.c
+
+DGEMVNKERNEL = dgemv_n_4.c
+DGEMVTKERNEL = dgemv_t_4.c
+
+ZGEMVNKERNEL = zgemv_n_4.c
+ZGEMVTKERNEL = zgemv_t_4.c
+
+CGEMVNKERNEL = cgemv_n_4.c
+CGEMVTKERNEL = cgemv_t_4.c
+
+SSYMV_L_KERNEL = ssymv_L.c
+SSYMV_U_KERNEL = ssymv_U.c
+DSYMV_L_KERNEL = dsymv_L.c
+DSYMV_U_KERNEL = dsymv_U.c
+
+SDOTKERNEL = sdot.c
+DDOTKERNEL = ddot.c
+CDOTKERNEL = cdot.c
+ZDOTKERNEL = zdot.c
+
+SAXPYKERNEL = saxpy.c
+DAXPYKERNEL = daxpy.c
+CAXPYKERNEL = caxpy.c
+ZAXPYKERNEL = zaxpy.c
+
+STRMMKERNEL = sgemm_kernel_16x4_haswell.S
+SGEMMKERNEL = sgemm_kernel_16x4_haswell.S
+SGEMMINCOPY = ../generic/gemm_ncopy_16.c
+SGEMMITCOPY = ../generic/gemm_tcopy_16.c
+SGEMMONCOPY = ../generic/gemm_ncopy_4.c
+SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
+SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
+SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
+SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
+SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
+
+DTRMMKERNEL = dtrmm_kernel_4x8_haswell.c
+DGEMMKERNEL = dgemm_kernel_4x8_haswell.S
+DGEMMINCOPY = ../generic/gemm_ncopy_4.c
+DGEMMITCOPY = ../generic/gemm_tcopy_4.c
+DGEMMONCOPY = ../generic/gemm_ncopy_8.c
+DGEMMOTCOPY = ../generic/gemm_tcopy_8.c
+DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
+DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
+DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
+DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
+
+CTRMMKERNEL = cgemm_kernel_8x2_haswell.S
+CGEMMKERNEL = cgemm_kernel_8x2_haswell.S
+CGEMMINCOPY = ../generic/zgemm_ncopy_8.c
+CGEMMITCOPY = ../generic/zgemm_tcopy_8.c
+CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
+CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
+CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
+CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
+CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
+CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
+
+ZTRMMKERNEL = zgemm_kernel_4x2_haswell.S
+ZGEMMKERNEL = zgemm_kernel_4x2_haswell.S
+ZGEMMINCOPY = ../generic/zgemm_ncopy_4.c
+ZGEMMITCOPY = ../generic/zgemm_tcopy_4.c
+ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
+ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
+ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
+ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
+ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
+ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
+
+STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
+STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
+STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
+STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
+
+DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
+DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
+DTRSMKERNEL_RN = dtrsm_kernel_RN_haswell.c
+DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
+
+CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
+CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
+CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
+CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
+
+ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
+ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
+ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
+ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
+
+CGEMM3MKERNEL = zgemm3m_kernel_4x8_nehalem.S
+ZGEMM3MKERNEL = zgemm3m_kernel_2x8_nehalem.S
+
diff --git a/kernel/x86_64/caxpy.c b/kernel/x86_64/caxpy.c
index 5af9b8fcc..b1ec19bd3 100644
--- a/kernel/x86_64/caxpy.c
+++ b/kernel/x86_64/caxpy.c
@@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "caxpy_microk_steamroller-2.c"
#elif defined(BULLDOZER)
#include "caxpy_microk_bulldozer-2.c"
-#elif defined(HASWELL)
+#elif defined(HASWELL) || defined(ZEN)
#include "caxpy_microk_haswell-2.c"
#elif defined(SANDYBRIDGE)
#include "caxpy_microk_sandy-2.c"
diff --git a/kernel/x86_64/cdot.c b/kernel/x86_64/cdot.c
index 4d33b6f5c..ce396a2ce 100644
--- a/kernel/x86_64/cdot.c
+++ b/kernel/x86_64/cdot.c
@@ -34,7 +34,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "cdot_microk_bulldozer-2.c"
#elif defined(STEAMROLLER) || defined(PILEDRIVER) || defined(EXCAVATOR)
#include "cdot_microk_steamroller-2.c"
-#elif defined(HASWELL)
+#elif defined(HASWELL) || defined(ZEN)
#include "cdot_microk_haswell-2.c"
#elif defined(SANDYBRIDGE)
#include "cdot_microk_sandy-2.c"
diff --git a/kernel/x86_64/cgemv_n_4.c b/kernel/x86_64/cgemv_n_4.c
index 235510534..d0a2c84e2 100644
--- a/kernel/x86_64/cgemv_n_4.c
+++ b/kernel/x86_64/cgemv_n_4.c
@@ -29,7 +29,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <stdio.h>
#include "common.h"
-#if defined(HASWELL)
+#if defined(HASWELL) || defined(ZEN)
#include "cgemv_n_microk_haswell-4.c"
#elif defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR)
#include "cgemv_n_microk_bulldozer-4.c"
diff --git a/kernel/x86_64/cgemv_t_4.c b/kernel/x86_64/cgemv_t_4.c
index 1a714f61f..3dc19dc4e 100644
--- a/kernel/x86_64/cgemv_t_4.c
+++ b/kernel/x86_64/cgemv_t_4.c
@@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h"
-#if defined(HASWELL)
+#if defined(HASWELL) || defined(ZEN)
#include "cgemv_t_microk_haswell-4.c"
#elif defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR)
#include "cgemv_t_microk_bulldozer-4.c"
diff --git a/kernel/x86_64/cscal.c b/kernel/x86_64/cscal.c
index c44d12e3d..9b9179da0 100644
--- a/kernel/x86_64/cscal.c
+++ b/kernel/x86_64/cscal.c
@@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h"
-#if defined(HASWELL)
+#if defined(HASWELL) || defined(ZEN)
#include "cscal_microk_haswell-2.c"
#elif defined(BULLDOZER) || defined(PILEDRIVER)
#include "cscal_microk_bulldozer-2.c"
diff --git a/kernel/x86_64/daxpy.c b/kernel/x86_64/daxpy.c
index 18569e6e4..4bde62824 100644
--- a/kernel/x86_64/daxpy.c
+++ b/kernel/x86_64/daxpy.c
@@ -37,7 +37,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "daxpy_microk_steamroller-2.c"
#elif defined(PILEDRIVER)
#include "daxpy_microk_piledriver-2.c"
-#elif defined(HASWELL)
+#elif defined(HASWELL) || defined(ZEN)
#include "daxpy_microk_haswell-2.c"
#elif defined(SANDYBRIDGE)
#include "daxpy_microk_sandy-2.c"
diff --git a/kernel/x86_64/ddot.c b/kernel/x86_64/ddot.c
index a45dd7f3b..0a20564cf 100644
--- a/kernel/x86_64/ddot.c
+++ b/kernel/x86_64/ddot.c
@@ -37,7 +37,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "ddot_microk_piledriver-2.c"
#elif defined(NEHALEM)
#include "ddot_microk_nehalem-2.c"
-#elif defined(HASWELL)
+#elif defined(HASWELL) || defined(ZEN)
#include "ddot_microk_haswell-2.c"
#elif defined(SANDYBRIDGE)
#include "ddot_microk_sandy-2.c"
diff --git a/kernel/x86_64/dgemv_n_4.c b/kernel/x86_64/dgemv_n_4.c
index 4200b8acd..f8234fbc1 100644
--- a/kernel/x86_64/dgemv_n_4.c
+++ b/kernel/x86_64/dgemv_n_4.c
@@ -31,7 +31,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#if defined(NEHALEM)
#include "dgemv_n_microk_nehalem-4.c"
-#elif defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR)
+#elif defined(HASWELL) || defined(ZEN) || defined(STEAMROLLER) || defined(EXCAVATOR)
#include "dgemv_n_microk_haswell-4.c"
#endif
diff --git a/kernel/x86_64/dgemv_t_4.c b/kernel/x86_64/dgemv_t_4.c
index 42f11f39a..3891e16cc 100644
--- a/kernel/x86_64/dgemv_t_4.c
+++ b/kernel/x86_64/dgemv_t_4.c
@@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h"
-#if defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR)
+#if defined(HASWELL) || defined(ZEN) || defined(STEAMROLLER) || defined(EXCAVATOR)
#include "dgemv_t_microk_haswell-4.c"
#endif
diff --git a/kernel/x86_64/dscal.c b/kernel/x86_64/dscal.c
index b6bde6c9f..78ad52179 100644
--- a/kernel/x86_64/dscal.c
+++ b/kernel/x86_64/dscal.c
@@ -31,7 +31,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "dscal_microk_bulldozer-2.c"
#elif defined(SANDYBRIDGE)
#include "dscal_microk_sandy-2.c"
-#elif defined(HASWELL)
+#elif defined(HASWELL) || defined(ZEN)
#include "dscal_microk_haswell-2.c"
#endif
diff --git a/kernel/x86_64/dsymv_L.c b/kernel/x86_64/dsymv_L.c
index e10784ad7..3e8db3fa3 100644
--- a/kernel/x86_64/dsymv_L.c
+++ b/kernel/x86_64/dsymv_L.c
@@ -30,7 +30,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR)
#include "dsymv_L_microk_bulldozer-2.c"
-#elif defined(HASWELL)
+#elif defined(HASWELL) || defined(ZEN)
#include "dsymv_L_microk_haswell-2.c"
#elif defined(SANDYBRIDGE)
#include "dsymv_L_microk_sandy-2.c"
diff --git a/kernel/x86_64/dsymv_U.c b/kernel/x86_64/dsymv_U.c
index bd07ce2c3..226458bf7 100644
--- a/kernel/x86_64/dsymv_U.c
+++ b/kernel/x86_64/dsymv_U.c
@@ -31,7 +31,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR)
#include "dsymv_U_microk_bulldozer-2.c"
-#elif defined(HASWELL)
+#elif defined(HASWELL) || defined(ZEN)
#include "dsymv_U_microk_haswell-2.c"
#elif defined(SANDYBRIDGE)
#include "dsymv_U_microk_sandy-2.c"
diff --git a/kernel/x86_64/saxpy.c b/kernel/x86_64/saxpy.c
index b9e5d5784..d89fe408a 100644
--- a/kernel/x86_64/saxpy.c
+++ b/kernel/x86_64/saxpy.c
@@ -31,7 +31,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#if defined(NEHALEM)
#include "saxpy_microk_nehalem-2.c"
-#elif defined(HASWELL)
+#elif defined(HASWELL) || defined(ZEN)
#include "saxpy_microk_haswell-2.c"
#elif defined(SANDYBRIDGE)
#include "saxpy_microk_sandy-2.c"
diff --git a/kernel/x86_64/sdot.c b/kernel/x86_64/sdot.c
index d9fc417a0..389252f8b 100644
--- a/kernel/x86_64/sdot.c
+++ b/kernel/x86_64/sdot.c
@@ -34,7 +34,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "sdot_microk_steamroller-2.c"
#elif defined(NEHALEM)
#include "sdot_microk_nehalem-2.c"
-#elif defined(HASWELL)
+#elif defined(HASWELL) || defined(ZEN)
#include "sdot_microk_haswell-2.c"
#elif defined(SANDYBRIDGE)
#include "sdot_microk_sandy-2.c"
diff --git a/kernel/x86_64/sgemv_n_4.c b/kernel/x86_64/sgemv_n_4.c
index bdf68dd07..7c091c765 100644
--- a/kernel/x86_64/sgemv_n_4.c
+++ b/kernel/x86_64/sgemv_n_4.c
@@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "sgemv_n_microk_nehalem-4.c"
#elif defined(SANDYBRIDGE)
#include "sgemv_n_microk_sandy-4.c"
-#elif defined(HASWELL)
+#elif defined(HASWELL) || defined(ZEN)
#include "sgemv_n_microk_haswell-4.c"
#endif
diff --git a/kernel/x86_64/sgemv_t_4.c b/kernel/x86_64/sgemv_t_4.c
index 62550e65c..6f9c7caa0 100644
--- a/kernel/x86_64/sgemv_t_4.c
+++ b/kernel/x86_64/sgemv_t_4.c
@@ -34,7 +34,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "sgemv_t_microk_bulldozer-4.c"
#elif defined(SANDYBRIDGE)
#include "sgemv_t_microk_sandy-4.c"
-#elif defined(HASWELL)
+#elif defined(HASWELL) || defined(ZEN)
#include "sgemv_t_microk_haswell-4.c"
#endif
diff --git a/kernel/x86_64/ssymv_L.c b/kernel/x86_64/ssymv_L.c
index 3813981ed..199d8a517 100644
--- a/kernel/x86_64/ssymv_L.c
+++ b/kernel/x86_64/ssymv_L.c
@@ -32,7 +32,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "ssymv_L_microk_bulldozer-2.c"
#elif defined(NEHALEM)
#include "ssymv_L_microk_nehalem-2.c"
-#elif defined(HASWELL)
+#elif defined(HASWELL) || defined(ZEN)
#include "ssymv_L_microk_haswell-2.c"
#elif defined(SANDYBRIDGE)
#include "ssymv_L_microk_sandy-2.c"
diff --git a/kernel/x86_64/ssymv_U.c b/kernel/x86_64/ssymv_U.c
index e4d3c9b30..104b29355 100644
--- a/kernel/x86_64/ssymv_U.c
+++ b/kernel/x86_64/ssymv_U.c
@@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "ssymv_U_microk_bulldozer-2.c"
#elif defined(NEHALEM)
#include "ssymv_U_microk_nehalem-2.c"
-#elif defined(HASWELL)
+#elif defined(HASWELL) || defined(ZEN)
#include "ssymv_U_microk_haswell-2.c"
#elif defined(SANDYBRIDGE)
#include "ssymv_U_microk_sandy-2.c"
diff --git a/kernel/x86_64/symv_L_sse.S b/kernel/x86_64/symv_L_sse.S
index cda0b476d..8cae3fc1b 100644
--- a/kernel/x86_64/symv_L_sse.S
+++ b/kernel/x86_64/symv_L_sse.S
@@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 12)
#endif
-#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL)
+#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 12)
diff --git a/kernel/x86_64/symv_L_sse2.S b/kernel/x86_64/symv_L_sse2.S
index 0afc1e8c0..d7091624d 100644
--- a/kernel/x86_64/symv_L_sse2.S
+++ b/kernel/x86_64/symv_L_sse2.S
@@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 12)
#endif
-#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL)
+#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 12)
diff --git a/kernel/x86_64/symv_U_sse.S b/kernel/x86_64/symv_U_sse.S
index 691012cb1..3549b9863 100644
--- a/kernel/x86_64/symv_U_sse.S
+++ b/kernel/x86_64/symv_U_sse.S
@@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 12)
#endif
-#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL)
+#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 12)
diff --git a/kernel/x86_64/symv_U_sse2.S b/kernel/x86_64/symv_U_sse2.S
index 8ecbb39e6..882b035a9 100644
--- a/kernel/x86_64/symv_U_sse2.S
+++ b/kernel/x86_64/symv_U_sse2.S
@@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 12)
#endif
-#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL)
+#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 24)
diff --git a/kernel/x86_64/zaxpy.c b/kernel/x86_64/zaxpy.c
index 0cd555a68..8cb1d532f 100644
--- a/kernel/x86_64/zaxpy.c
+++ b/kernel/x86_64/zaxpy.c
@@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "zaxpy_microk_bulldozer-2.c"
#elif defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR)
#include "zaxpy_microk_steamroller-2.c"
-#elif defined(HASWELL)
+#elif defined(HASWELL) || defined(ZEN)
#include "zaxpy_microk_haswell-2.c"
#elif defined(SANDYBRIDGE)
#include "zaxpy_microk_sandy-2.c"
diff --git a/kernel/x86_64/zdot.c b/kernel/x86_64/zdot.c
index c1399be37..2fcacc87a 100644
--- a/kernel/x86_64/zdot.c
+++ b/kernel/x86_64/zdot.c
@@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "zdot_microk_bulldozer-2.c"
#elif defined(STEAMROLLER) || defined(PILEDRIVER) || defined(EXCAVATOR)
#include "zdot_microk_steamroller-2.c"
-#elif defined(HASWELL)
+#elif defined(HASWELL) || defined(ZEN)
#include "zdot_microk_haswell-2.c"
#elif defined(SANDYBRIDGE)
#include "zdot_microk_sandy-2.c"
diff --git a/kernel/x86_64/zgemv_n_4.c b/kernel/x86_64/zgemv_n_4.c
index 4171fc99f..1d0f1e8f7 100644
--- a/kernel/x86_64/zgemv_n_4.c
+++ b/kernel/x86_64/zgemv_n_4.c
@@ -30,7 +30,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h"
-#if defined(HASWELL)
+#if defined(HASWELL) || defined(ZEN)
#include "zgemv_n_microk_haswell-4.c"
#elif defined(SANDYBRIDGE)
#include "zgemv_n_microk_sandy-4.c"
diff --git a/kernel/x86_64/zgemv_t_4.c b/kernel/x86_64/zgemv_t_4.c
index 0524c71f7..20ccf06f7 100644
--- a/kernel/x86_64/zgemv_t_4.c
+++ b/kernel/x86_64/zgemv_t_4.c
@@ -31,7 +31,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR)
#include "zgemv_t_microk_bulldozer-4.c"
-#elif defined(HASWELL)
+#elif defined(HASWELL) || defined(ZEN)
#include "zgemv_t_microk_haswell-4.c"
#endif
diff --git a/kernel/x86_64/zscal.c b/kernel/x86_64/zscal.c
index 7ca8774b7..aa5d8fac0 100644
--- a/kernel/x86_64/zscal.c
+++ b/kernel/x86_64/zscal.c
@@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h"
-#if defined(HASWELL)
+#if defined(HASWELL) || defined(ZEN)
#include "zscal_microk_haswell-2.c"
#elif defined(BULLDOZER) || defined(PILEDRIVER)
#include "zscal_microk_bulldozer-2.c"
diff --git a/kernel/x86_64/zsymv_L_sse.S b/kernel/x86_64/zsymv_L_sse.S
index 3a5243bab..dd95eea17 100644
--- a/kernel/x86_64/zsymv_L_sse.S
+++ b/kernel/x86_64/zsymv_L_sse.S
@@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 24)
#endif
-#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL)
+#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 24)
diff --git a/kernel/x86_64/zsymv_L_sse2.S b/kernel/x86_64/zsymv_L_sse2.S
index 295ab1a83..75124cf3e 100644
--- a/kernel/x86_64/zsymv_L_sse2.S
+++ b/kernel/x86_64/zsymv_L_sse2.S
@@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 24)
#endif
-#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL)
+#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 24)
diff --git a/kernel/x86_64/zsymv_U_sse.S b/kernel/x86_64/zsymv_U_sse.S
index cf302e4ed..db1a4ff5f 100644
--- a/kernel/x86_64/zsymv_U_sse.S
+++ b/kernel/x86_64/zsymv_U_sse.S
@@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 24)
#endif
-#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL)
+#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 24)
diff --git a/kernel/x86_64/zsymv_U_sse2.S b/kernel/x86_64/zsymv_U_sse2.S
index 7c290137d..599765a6d 100644
--- a/kernel/x86_64/zsymv_U_sse2.S
+++ b/kernel/x86_64/zsymv_U_sse2.S
@@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 24)
#endif
-#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL)
+#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 24)
diff --git a/param.h b/param.h
index 8e5a3544e..78cb86c43 100644
--- a/param.h
+++ b/param.h
@@ -595,6 +595,96 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif
+#ifdef ZEN
+#define SNUMOPT 8
+#define DNUMOPT 4
+
+#define GEMM_DEFAULT_OFFSET_A 64
+#define GEMM_DEFAULT_OFFSET_B 832
+#define GEMM_DEFAULT_ALIGN 0x0fffUL
+
+#define QGEMM_DEFAULT_UNROLL_N 2
+#define CGEMM_DEFAULT_UNROLL_N 2
+#define ZGEMM_DEFAULT_UNROLL_N 2
+#define XGEMM_DEFAULT_UNROLL_N 1
+
+#ifdef ARCH_X86
+#define SGEMM_DEFAULT_UNROLL_N 4
+#define DGEMM_DEFAULT_UNROLL_N 4
+#define SGEMM_DEFAULT_UNROLL_M 4
+#define DGEMM_DEFAULT_UNROLL_M 2
+#define QGEMM_DEFAULT_UNROLL_M 2
+#define CGEMM_DEFAULT_UNROLL_M 2
+#define ZGEMM_DEFAULT_UNROLL_M 1
+#define XGEMM_DEFAULT_UNROLL_M 1
+#else
+#define SGEMM_DEFAULT_UNROLL_N 2
+#define DGEMM_DEFAULT_UNROLL_N 2
+#define SGEMM_DEFAULT_UNROLL_M 16
+#define DGEMM_DEFAULT_UNROLL_M 8
+#define QGEMM_DEFAULT_UNROLL_M 2
+#define CGEMM_DEFAULT_UNROLL_M 4
+#define ZGEMM_DEFAULT_UNROLL_M 4
+#define XGEMM_DEFAULT_UNROLL_M 1
+#define CGEMM3M_DEFAULT_UNROLL_N 4
+#define CGEMM3M_DEFAULT_UNROLL_M 8
+#define ZGEMM3M_DEFAULT_UNROLL_N 4
+#define ZGEMM3M_DEFAULT_UNROLL_M 4
+#define GEMV_UNROLL 8
+#endif
+
+#if defined(ARCH_X86_64)
+#define SGEMM_DEFAULT_P 768
+#define DGEMM_DEFAULT_P 576
+#define ZGEMM_DEFAULT_P 288
+#define CGEMM_DEFAULT_P 576
+#else
+#define SGEMM_DEFAULT_P 448
+#define DGEMM_DEFAULT_P 480
+#define ZGEMM_DEFAULT_P 112
+#define CGEMM_DEFAULT_P 224
+#endif
+#define QGEMM_DEFAULT_P 112
+#define XGEMM_DEFAULT_P 56
+
+#if defined(ARCH_X86_64)
+#define SGEMM_DEFAULT_Q 192
+#define DGEMM_DEFAULT_Q 160
+#define ZGEMM_DEFAULT_Q 160
+#define CGEMM_DEFAULT_Q 160
+#else
+#define SGEMM_DEFAULT_Q 224
+#define DGEMM_DEFAULT_Q 224
+#define ZGEMM_DEFAULT_Q 224
+#define CGEMM_DEFAULT_Q 224
+#endif
+#define QGEMM_DEFAULT_Q 224
+#define XGEMM_DEFAULT_Q 224
+
+#define CGEMM3M_DEFAULT_P 448
+#define ZGEMM3M_DEFAULT_P 224
+#define XGEMM3M_DEFAULT_P 112
+#define CGEMM3M_DEFAULT_Q 224
+#define ZGEMM3M_DEFAULT_Q 224
+#define XGEMM3M_DEFAULT_Q 224
+#define CGEMM3M_DEFAULT_R 12288
+#define ZGEMM3M_DEFAULT_R 12288
+#define XGEMM3M_DEFAULT_R 12288
+
+#define SGEMM_DEFAULT_R 12288
+#define QGEMM_DEFAULT_R qgemm_r
+#define DGEMM_DEFAULT_R 12288
+#define CGEMM_DEFAULT_R cgemm_r
+#define ZGEMM_DEFAULT_R zgemm_r
+#define XGEMM_DEFAULT_R xgemm_r
+
+#define SYMV_P 16
+#define HAVE_EXCLUSIVE_CACHE
+
+#define GEMM_THREAD gemm_thread_mn
+
+#endif
+
#ifdef ATHLON
#define SNUMOPT 4