diff options
author | AbdelRauf <quickwritereader@gmail.com> | 2019-03-14 10:42:04 +0000 |
---|---|---|
committer | AbdelRauf <quickwritereader@gmail.com> | 2019-03-29 15:49:40 +0000 |
commit | 853a18bc17628fb1e8615503304ceedef9d45030 (patch) | |
tree | 8687c346f2d3c140a2670c55bdc9967915445c78 /common_power.h | |
parent | 498ac98581accf80085c020874ad6a9513f95996 (diff) | |
download | openblas-853a18bc17628fb1e8615503304ceedef9d45030.tar.gz openblas-853a18bc17628fb1e8615503304ceedef9d45030.tar.bz2 openblas-853a18bc17628fb1e8615503304ceedef9d45030.zip |
power9 makefile. dgemm based on power8 kernel with following changes : 32x unrolled 16x4 kernel and 8x4 kernel using (lxv stxv butterfly rank1 update). improvement from 17 to 22-23gflops. dtrmm cases were added into dgemm itself
Diffstat (limited to 'common_power.h')
-rw-r--r-- | common_power.h | 8 |
1 files changed, 4 insertions, 4 deletions
diff --git a/common_power.h b/common_power.h index e3a1a7aef..ddbee9412 100644 --- a/common_power.h +++ b/common_power.h @@ -39,7 +39,7 @@ #ifndef COMMON_POWER #define COMMON_POWER -#if defined(POWER8) +#if defined(POWER8) || defined(POWER9) #define MB __asm__ __volatile__ ("eieio":::"memory") #define WMB __asm__ __volatile__ ("eieio":::"memory") #else @@ -241,7 +241,7 @@ static inline int blas_quickdivide(blasint x, blasint y){ #define HAVE_PREFETCH #endif -#if defined(POWER3) || defined(POWER6) || defined(PPCG4) || defined(CELL) || defined(POWER8) +#if defined(POWER3) || defined(POWER6) || defined(PPCG4) || defined(CELL) || defined(POWER8) || defined(POWER9) #define DCBT_ARG 0 #else #define DCBT_ARG 8 @@ -263,7 +263,7 @@ static inline int blas_quickdivide(blasint x, blasint y){ #define L1_PREFETCH dcbtst #endif -#if defined(POWER8) +#if defined(POWER8) || defined(POWER9) #define L1_DUALFETCH #define L1_PREFETCHSIZE (16 + 128 * 100) #define L1_PREFETCH dcbtst @@ -802,7 +802,7 @@ Lmcount$lazy_ptr: #define BUFFER_SIZE ( 2 << 20) #elif defined(PPC440FP2) #define BUFFER_SIZE ( 16 << 20) -#elif defined(POWER8) +#elif defined(POWER8) || defined(POWER9) #define BUFFER_SIZE ( 64 << 20) #else #define BUFFER_SIZE ( 16 << 20) |