diff options
author | wernsaar <wernsaar@googlemail.com> | 2014-09-20 14:27:10 +0200 |
---|---|---|
committer | wernsaar <wernsaar@googlemail.com> | 2014-09-20 14:27:10 +0200 |
commit | 7aae4a62e78daa586774248aa31679311d7bd5cc (patch) | |
tree | 8d6cb41e5199b789819f47c173b11ee4a6e4a19f | |
parent | 7a911569b8502d97075bb63ebcbfed18bc13bc97 (diff) | |
download | openblas-7aae4a62e78daa586774248aa31679311d7bd5cc.tar.gz openblas-7aae4a62e78daa586774248aa31679311d7bd5cc.tar.bz2 openblas-7aae4a62e78daa586774248aa31679311d7bd5cc.zip |
enabled use of GEMM3M functions
-rw-r--r-- | common_param.h | 109 | ||||
-rw-r--r-- | driver/level3/Makefile | 4 | ||||
-rw-r--r-- | exports/gensymbol | 4 | ||||
-rw-r--r-- | interface/Makefile | 4 | ||||
-rw-r--r-- | kernel/setparam-ref.c | 90 | ||||
-rw-r--r-- | param.h | 48 |
6 files changed, 235 insertions, 24 deletions
diff --git a/common_param.h b/common_param.h index 1c362e8cb..e9f35c033 100644 --- a/common_param.h +++ b/common_param.h @@ -435,6 +435,9 @@ BLASLONG (*icamin_k)(BLASLONG, float *, BLASLONG); int (*chemm_outcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *); int (*chemm_oltcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *); + int cgemm3m_p, cgemm3m_q, cgemm3m_r; + int cgemm3m_unroll_m, cgemm3m_unroll_n, cgemm3m_unroll_mn; + int (*cgemm3m_kernel)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG); int (*cgemm3m_incopyb)(BLASLONG, BLASLONG, float *, BLASLONG, float *); @@ -595,6 +598,9 @@ BLASLONG (*izamin_k)(BLASLONG, double *, BLASLONG); int (*zhemm_outcopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *); int (*zhemm_oltcopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *); + int zgemm3m_p, zgemm3m_q, zgemm3m_r; + int zgemm3m_unroll_m, zgemm3m_unroll_n, zgemm3m_unroll_mn; + int (*zgemm3m_kernel)(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG); int (*zgemm3m_incopyb)(BLASLONG, BLASLONG, double *, BLASLONG, double *); @@ -757,6 +763,9 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG); int (*xhemm_outcopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *); int (*xhemm_oltcopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *); + int xgemm3m_p, xgemm3m_q, xgemm3m_r; + int xgemm3m_unroll_m, xgemm3m_unroll_n, xgemm3m_unroll_mn; + int (*xgemm3m_kernel)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG); int (*xgemm3m_incopyb)(BLASLONG, BLASLONG, xdouble *, BLASLONG, xdouble *); @@ -900,6 +909,27 @@ extern gotoblas_t *gotoblas; #define XGEMM_UNROLL_N gotoblas -> xgemm_unroll_n #define XGEMM_UNROLL_MN gotoblas -> xgemm_unroll_mn +#define CGEMM3M_P gotoblas -> cgemm3m_p +#define CGEMM3M_Q gotoblas -> cgemm3m_q +#define CGEMM3M_R gotoblas -> cgemm3m_r +#define CGEMM3M_UNROLL_M gotoblas -> cgemm3m_unroll_m +#define CGEMM3M_UNROLL_N gotoblas -> cgemm3m_unroll_n +#define CGEMM3M_UNROLL_MN gotoblas -> cgemm3m_unroll_mn + +#define ZGEMM3M_P gotoblas -> zgemm3m_p +#define ZGEMM3M_Q gotoblas -> zgemm3m_q +#define ZGEMM3M_R gotoblas -> zgemm3m_r +#define ZGEMM3M_UNROLL_M gotoblas -> zgemm3m_unroll_m +#define ZGEMM3M_UNROLL_N gotoblas -> zgemm3m_unroll_n +#define ZGEMM3M_UNROLL_MN gotoblas -> zgemm3m_unroll_mn + +#define XGEMM3M_P gotoblas -> xgemm3m_p +#define XGEMM3M_Q gotoblas -> xgemm3m_q +#define XGEMM3M_R gotoblas -> xgemm3m_r +#define XGEMM3M_UNROLL_M gotoblas -> xgemm3m_unroll_m +#define XGEMM3M_UNROLL_N gotoblas -> xgemm3m_unroll_n +#define XGEMM3M_UNROLL_MN gotoblas -> xgemm3m_unroll_mn + #else #define DTB_ENTRIES DTB_DEFAULT_ENTRIES @@ -972,6 +1002,55 @@ extern gotoblas_t *gotoblas; #define XGEMM_UNROLL_N XGEMM_DEFAULT_UNROLL_N #define XGEMM_UNROLL_MN MAX((XGEMM_UNROLL_M), (XGEMM_UNROLL_N)) +#ifdef CGEMM_DEFAULT_UNROLL_N + +#define CGEMM3M_P CGEMM3M_DEFAULT_P +#define CGEMM3M_Q CGEMM3M_DEFAULT_Q +#define CGEMM3M_R CGEMM3M_DEFAULT_R +#define CGEMM3M_UNROLL_M CGEMM3M_DEFAULT_UNROLL_M +#define CGEMM3M_UNROLL_N CGEMM3M_DEFAULT_UNROLL_N +#define CGEMM3M_UNROLL_MN MAX((CGEMM3M_UNROLL_M), (CGEMM3M_UNROLL_N)) + +#else + +#define CGEMM3M_P SGEMM_DEFAULT_P +#define CGEMM3M_Q SGEMM_DEFAULT_Q +#define CGEMM3M_R SGEMM_DEFAULT_R +#define CGEMM3M_UNROLL_M SGEMM_DEFAULT_UNROLL_M +#define CGEMM3M_UNROLL_N SGEMM_DEFAULT_UNROLL_N +#define CGEMM3M_UNROLL_MN MAX((CGEMM_UNROLL_M), (CGEMM_UNROLL_N)) + +#endif + + +#ifdef ZGEMM_DEFAULT_UNROLL_N + +#define ZGEMM3M_P ZGEMM3M_DEFAULT_P +#define ZGEMM3M_Q ZGEMM3M_DEFAULT_Q +#define ZGEMM3M_R ZGEMM3M_DEFAULT_R +#define ZGEMM3M_UNROLL_M ZGEMM3M_DEFAULT_UNROLL_M +#define ZGEMM3M_UNROLL_N ZGEMM3M_DEFAULT_UNROLL_N +#define ZGEMM3M_UNROLL_MN MAX((ZGEMM_UNROLL_M), (ZGEMM_UNROLL_N)) + +#else + +#define ZGEMM3M_P DGEMM_DEFAULT_P +#define ZGEMM3M_Q DGEMM_DEFAULT_Q +#define ZGEMM3M_R DGEMM_DEFAULT_R +#define ZGEMM3M_UNROLL_M DGEMM_DEFAULT_UNROLL_M +#define ZGEMM3M_UNROLL_N DGEMM_DEFAULT_UNROLL_N +#define ZGEMM3M_UNROLL_MN MAX((ZGEMM_UNROLL_M), (ZGEMM_UNROLL_N)) + +#endif + +#define XGEMM3M_P QGEMM_DEFAULT_P +#define XGEMM3M_Q QGEMM_DEFAULT_Q +#define XGEMM3M_R QGEMM_DEFAULT_R +#define XGEMM3M_UNROLL_M QGEMM_DEFAULT_UNROLL_M +#define XGEMM3M_UNROLL_N QGEMM_DEFAULT_UNROLL_N +#define XGEMM3M_UNROLL_MN MAX((QGEMM_UNROLL_M), (QGEMM_UNROLL_N)) + + #endif #endif @@ -1054,14 +1133,14 @@ extern gotoblas_t *gotoblas; #endif #ifdef XDOUBLE -#define GEMM3M_UNROLL_M QGEMM_UNROLL_M -#define GEMM3M_UNROLL_N QGEMM_UNROLL_N +#define GEMM3M_UNROLL_M XGEMM3M_UNROLL_M +#define GEMM3M_UNROLL_N XGEMM3M_UNROLL_N #elif defined(DOUBLE) -#define GEMM3M_UNROLL_M DGEMM_UNROLL_M -#define GEMM3M_UNROLL_N DGEMM_UNROLL_N +#define GEMM3M_UNROLL_M ZGEMM3M_UNROLL_M +#define GEMM3M_UNROLL_N ZGEMM3M_UNROLL_N #else -#define GEMM3M_UNROLL_M SGEMM_UNROLL_M -#define GEMM3M_UNROLL_N SGEMM_UNROLL_N +#define GEMM3M_UNROLL_M CGEMM3M_UNROLL_M +#define GEMM3M_UNROLL_N CGEMM3M_UNROLL_N #endif @@ -1123,31 +1202,31 @@ extern gotoblas_t *gotoblas; #ifndef GEMM3M_P #ifdef XDOUBLE -#define GEMM3M_P QGEMM_P +#define GEMM3M_P XGEMM3M_P #elif defined(DOUBLE) -#define GEMM3M_P DGEMM_P +#define GEMM3M_P ZGEMM3M_P #else -#define GEMM3M_P SGEMM_P +#define GEMM3M_P CGEMM3M_P #endif #endif #ifndef GEMM3M_Q #ifdef XDOUBLE -#define GEMM3M_Q QGEMM_Q +#define GEMM3M_Q XGEMM3M_Q #elif defined(DOUBLE) -#define GEMM3M_Q DGEMM_Q +#define GEMM3M_Q ZGEMM3M_Q #else -#define GEMM3M_Q SGEMM_Q +#define GEMM3M_Q CGEMM3M_Q #endif #endif #ifndef GEMM3M_R #ifdef XDOUBLE -#define GEMM3M_R QGEMM_R +#define GEMM3M_R XGEMM3M_R #elif defined(DOUBLE) -#define GEMM3M_R DGEMM_R +#define GEMM3M_R ZGEMM3M_R #else -#define GEMM3M_R SGEMM_R +#define GEMM3M_R CGEMM3M_R #endif #endif diff --git a/driver/level3/Makefile b/driver/level3/Makefile index d62921e84..352225206 100644 --- a/driver/level3/Makefile +++ b/driver/level3/Makefile @@ -4,11 +4,11 @@ include ../../Makefile.system USE_GEMM3M = 0 ifeq ($(ARCH), x86) -USE_GEMM3M = 0 +USE_GEMM3M = 1 endif ifeq ($(ARCH), x86_64) -USE_GEMM3M = 0 +USE_GEMM3M = 1 endif ifeq ($(ARCH), ia64) diff --git a/exports/gensymbol b/exports/gensymbol index e5049678a..69454d71b 100644 --- a/exports/gensymbol +++ b/exports/gensymbol @@ -75,7 +75,9 @@ ); @gemm3mobjs = ( - + cgemm3m,zgemm3m, + chemm3m,zhemm3m, + csymm3m,zsymm3m ); diff --git a/interface/Makefile b/interface/Makefile index cced14fb2..567224119 100644 --- a/interface/Makefile +++ b/interface/Makefile @@ -4,11 +4,11 @@ include $(TOPDIR)/Makefile.system SUPPORT_GEMM3M = 0 ifeq ($(ARCH), x86) -SUPPORT_GEMM3M = 0 +SUPPORT_GEMM3M = 1 endif ifeq ($(ARCH), x86_64) -SUPPORT_GEMM3M = 0 +SUPPORT_GEMM3M = 1 endif ifeq ($(ARCH), ia64) diff --git a/kernel/setparam-ref.c b/kernel/setparam-ref.c index b1beeae5c..0d7bbd4ac 100644 --- a/kernel/setparam-ref.c +++ b/kernel/setparam-ref.c @@ -293,6 +293,14 @@ gotoblas_t TABLE_NAME = { #endif chemm_outcopyTS, chemm_oltcopyTS, + 0, 0, 0, +#ifdef CGEMM3M_DEFAULT_UNROLL_M + CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N), +#else + SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N), +#endif + + cgemm3m_kernelTS, cgemm3m_incopybTS, cgemm3m_incopyrTS, @@ -391,6 +399,14 @@ gotoblas_t TABLE_NAME = { #endif zhemm_outcopyTS, zhemm_oltcopyTS, + 0, 0, 0, +#ifdef ZGEMM3M_DEFAULT_UNROLL_M + ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N), +#else + DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N), +#endif + + zgemm3m_kernelTS, zgemm3m_incopybTS, zgemm3m_incopyrTS, @@ -486,6 +502,9 @@ gotoblas_t TABLE_NAME = { #endif xhemm_outcopyTS, xhemm_oltcopyTS, + 0, 0, 0, + QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N), + xgemm3m_kernelTS, xgemm3m_incopybTS, xgemm3m_incopyrTS, @@ -661,9 +680,23 @@ static void init_parameter(void) { TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q; TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q; TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q; + +#ifdef CGEMM3M_DEFAULT_Q + TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q; +#else + TABLE_NAME.cgemm3m_q = SGEMM_DEFAULT_Q; +#endif + +#ifdef ZGEMM3M_DEFAULT_Q + TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q; +#else + TABLE_NAME.zgemm3m_q = DGEMM_DEFAULT_Q; +#endif + #ifdef EXPRECISION TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q; TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q; + TABLE_NAME.xgemm3m_q = QGEMM_DEFAULT_Q; #endif #if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON) @@ -918,20 +951,56 @@ static void init_parameter(void) { TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; + + + #ifdef EXPRECISION TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; #endif + +#endif + + +#ifdef CGEMM3M_DEFAULT_P + TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P; +#else + TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p; +#endif + +#ifdef ZGEMM3M_DEFAULT_P + TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P; +#else + TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p; +#endif + +#ifdef EXPRECISION + TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p; #endif + TABLE_NAME.sgemm_p = (TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1) & ~(SGEMM_DEFAULT_UNROLL_M - 1); TABLE_NAME.dgemm_p = (TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1) & ~(DGEMM_DEFAULT_UNROLL_M - 1); TABLE_NAME.cgemm_p = (TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1) & ~(CGEMM_DEFAULT_UNROLL_M - 1); TABLE_NAME.zgemm_p = (TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1) & ~(ZGEMM_DEFAULT_UNROLL_M - 1); + +#ifdef CGEMM3M_DEFAULT_UNROLL_M + TABLE_NAME.cgemm3m_p = (TABLE_NAME.cgemm3m_p + CGEMM3M_DEFAULT_UNROLL_M - 1) & ~(CGEMM3M_DEFAULT_UNROLL_M - 1); +#else + TABLE_NAME.cgemm3m_p = (TABLE_NAME.cgemm3m_p + SGEMM_DEFAULT_UNROLL_M - 1) & ~(SGEMM_DEFAULT_UNROLL_M - 1); +#endif + +#ifdef ZGEMM3M_DEFAULT_UNROLL_M + TABLE_NAME.zgemm3m_p = (TABLE_NAME.zgemm3m_p + ZGEMM3M_DEFAULT_UNROLL_M - 1) & ~(ZGEMM3M_DEFAULT_UNROLL_M - 1); +#else + TABLE_NAME.zgemm3m_p = (TABLE_NAME.zgemm3m_p + DGEMM_DEFAULT_UNROLL_M - 1) & ~(DGEMM_DEFAULT_UNROLL_M - 1); +#endif + #ifdef QUAD_PRECISION TABLE_NAME.qgemm_p = (TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1) & ~(QGEMM_DEFAULT_UNROLL_M - 1); TABLE_NAME.xgemm_p = (TABLE_NAME.xgemm_p + XGEMM_DEFAULT_UNROLL_M - 1) & ~(XGEMM_DEFAULT_UNROLL_M - 1); + TABLE_NAME.xgemm3m_p = (TABLE_NAME.xgemm3m_p + QGEMM_DEFAULT_UNROLL_M - 1) & ~(QGEMM_DEFAULT_UNROLL_M - 1); #endif #ifdef DEBUG @@ -965,11 +1034,32 @@ static void init_parameter(void) { + TABLE_NAME.align) & ~TABLE_NAME.align) ) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15); + TABLE_NAME.cgemm3m_r = (((BUFFER_SIZE - + ((TABLE_NAME.cgemm3m_p * TABLE_NAME.cgemm3m_q * 8 + TABLE_NAME.offsetA + + TABLE_NAME.align) & ~TABLE_NAME.align) + ) / (TABLE_NAME.cgemm3m_q * 8) - 15) & ~15); + + TABLE_NAME.zgemm3m_r = (((BUFFER_SIZE - + ((TABLE_NAME.zgemm3m_p * TABLE_NAME.zgemm3m_q * 16 + TABLE_NAME.offsetA + + TABLE_NAME.align) & ~TABLE_NAME.align) + ) / (TABLE_NAME.zgemm3m_q * 16) - 15) & ~15); + + + + #ifdef EXPRECISION TABLE_NAME.xgemm_r = (((BUFFER_SIZE - ((TABLE_NAME.xgemm_p * TABLE_NAME.xgemm_q * 32 + TABLE_NAME.offsetA + TABLE_NAME.align) & ~TABLE_NAME.align) ) / (TABLE_NAME.xgemm_q * 32) - 15) & ~15); + + TABLE_NAME.xgemm3m_r = (((BUFFER_SIZE - + ((TABLE_NAME.xgemm3m_p * TABLE_NAME.xgemm3m_q * 32 + TABLE_NAME.offsetA + + TABLE_NAME.align) & ~TABLE_NAME.align) + ) / (TABLE_NAME.xgemm3m_q * 32) - 15) & ~15); + #endif + + } @@ -289,6 +289,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ZGEMM_DEFAULT_Q 224 #define XGEMM_DEFAULT_Q 224 +#define CGEMM3M_DEFAULT_P 448 +#define ZGEMM3M_DEFAULT_P 224 +#define XGEMM3M_DEFAULT_P 112 +#define CGEMM3M_DEFAULT_Q 224 +#define ZGEMM3M_DEFAULT_Q 224 +#define XGEMM3M_DEFAULT_Q 224 +#define CGEMM3M_DEFAULT_R 12288 +#define ZGEMM3M_DEFAULT_R 12288 +#define XGEMM3M_DEFAULT_R 12288 + #define SGEMM_DEFAULT_R sgemm_r #define QGEMM_DEFAULT_R qgemm_r #define DGEMM_DEFAULT_R dgemm_r @@ -371,6 +381,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define QGEMM_DEFAULT_Q 224 #define XGEMM_DEFAULT_Q 224 +#define CGEMM3M_DEFAULT_P 448 +#define ZGEMM3M_DEFAULT_P 224 +#define XGEMM3M_DEFAULT_P 112 +#define CGEMM3M_DEFAULT_Q 224 +#define ZGEMM3M_DEFAULT_Q 224 +#define XGEMM3M_DEFAULT_Q 224 +#define CGEMM3M_DEFAULT_R 12288 +#define ZGEMM3M_DEFAULT_R 12288 +#define XGEMM3M_DEFAULT_R 12288 + #define SGEMM_DEFAULT_R 12288 #define QGEMM_DEFAULT_R qgemm_r #define DGEMM_DEFAULT_R 12288 @@ -1073,10 +1093,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GETRF_FACTOR 0.72 -#define CGEMM3M_DEFAULT_UNROLL_N 4 -#define CGEMM3M_DEFAULT_UNROLL_M 8 -#define ZGEMM3M_DEFAULT_UNROLL_N 2 -#define ZGEMM3M_DEFAULT_UNROLL_M 8 #endif @@ -1157,6 +1173,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ZGEMM3M_DEFAULT_UNROLL_N 2 #define ZGEMM3M_DEFAULT_UNROLL_M 8 +#define CGEMM3M_DEFAULT_P 448 +#define ZGEMM3M_DEFAULT_P 224 +#define XGEMM3M_DEFAULT_P 112 +#define CGEMM3M_DEFAULT_Q 224 +#define ZGEMM3M_DEFAULT_Q 224 +#define XGEMM3M_DEFAULT_Q 224 +#define CGEMM3M_DEFAULT_R 12288 +#define ZGEMM3M_DEFAULT_R 12288 +#define XGEMM3M_DEFAULT_R 12288 + + + #define GETRF_FACTOR 0.72 #endif @@ -1263,6 +1291,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define CGEMM3M_DEFAULT_UNROLL_M 8 #define ZGEMM3M_DEFAULT_UNROLL_N 2 #define ZGEMM3M_DEFAULT_UNROLL_M 8 + + +#define CGEMM3M_DEFAULT_P 448 +#define ZGEMM3M_DEFAULT_P 224 +#define XGEMM3M_DEFAULT_P 112 +#define CGEMM3M_DEFAULT_Q 224 +#define ZGEMM3M_DEFAULT_Q 224 +#define XGEMM3M_DEFAULT_Q 224 +#define CGEMM3M_DEFAULT_R 12288 +#define ZGEMM3M_DEFAULT_R 12288 +#define XGEMM3M_DEFAULT_R 12288 + #endif |