summaryrefslogtreecommitdiff
path: root/common_power.h
diff options
context:
space:
mode:
authorAbdelRauf <quickwritereader@gmail.com>2019-03-14 10:42:04 +0000
committerAbdelRauf <quickwritereader@gmail.com>2019-03-29 15:49:40 +0000
commit853a18bc17628fb1e8615503304ceedef9d45030 (patch)
tree8687c346f2d3c140a2670c55bdc9967915445c78 /common_power.h
parent498ac98581accf80085c020874ad6a9513f95996 (diff)
downloadopenblas-853a18bc17628fb1e8615503304ceedef9d45030.tar.gz
openblas-853a18bc17628fb1e8615503304ceedef9d45030.tar.bz2
openblas-853a18bc17628fb1e8615503304ceedef9d45030.zip
power9 makefile. dgemm based on power8 kernel with following changes : 32x unrolled 16x4 kernel and 8x4 kernel using (lxv stxv butterfly rank1 update). improvement from 17 to 22-23gflops. dtrmm cases were added into dgemm itself
Diffstat (limited to 'common_power.h')
-rw-r--r--common_power.h8
1 files changed, 4 insertions, 4 deletions
diff --git a/common_power.h b/common_power.h
index e3a1a7aef..ddbee9412 100644
--- a/common_power.h
+++ b/common_power.h
@@ -39,7 +39,7 @@
#ifndef COMMON_POWER
#define COMMON_POWER
-#if defined(POWER8)
+#if defined(POWER8) || defined(POWER9)
#define MB __asm__ __volatile__ ("eieio":::"memory")
#define WMB __asm__ __volatile__ ("eieio":::"memory")
#else
@@ -241,7 +241,7 @@ static inline int blas_quickdivide(blasint x, blasint y){
#define HAVE_PREFETCH
#endif
-#if defined(POWER3) || defined(POWER6) || defined(PPCG4) || defined(CELL) || defined(POWER8)
+#if defined(POWER3) || defined(POWER6) || defined(PPCG4) || defined(CELL) || defined(POWER8) || defined(POWER9)
#define DCBT_ARG 0
#else
#define DCBT_ARG 8
@@ -263,7 +263,7 @@ static inline int blas_quickdivide(blasint x, blasint y){
#define L1_PREFETCH dcbtst
#endif
-#if defined(POWER8)
+#if defined(POWER8) || defined(POWER9)
#define L1_DUALFETCH
#define L1_PREFETCHSIZE (16 + 128 * 100)
#define L1_PREFETCH dcbtst
@@ -802,7 +802,7 @@ Lmcount$lazy_ptr:
#define BUFFER_SIZE ( 2 << 20)
#elif defined(PPC440FP2)
#define BUFFER_SIZE ( 16 << 20)
-#elif defined(POWER8)
+#elif defined(POWER8) || defined(POWER9)
#define BUFFER_SIZE ( 64 << 20)
#else
#define BUFFER_SIZE ( 16 << 20)