summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorwernsaar <wernsaar@googlemail.com>2014-09-20 14:27:10 +0200
committerwernsaar <wernsaar@googlemail.com>2014-09-20 14:27:10 +0200
commit7aae4a62e78daa586774248aa31679311d7bd5cc (patch)
tree8d6cb41e5199b789819f47c173b11ee4a6e4a19f
parent7a911569b8502d97075bb63ebcbfed18bc13bc97 (diff)
downloadopenblas-7aae4a62e78daa586774248aa31679311d7bd5cc.tar.gz
openblas-7aae4a62e78daa586774248aa31679311d7bd5cc.tar.bz2
openblas-7aae4a62e78daa586774248aa31679311d7bd5cc.zip
enabled use of GEMM3M functions
-rw-r--r--common_param.h109
-rw-r--r--driver/level3/Makefile4
-rw-r--r--exports/gensymbol4
-rw-r--r--interface/Makefile4
-rw-r--r--kernel/setparam-ref.c90
-rw-r--r--param.h48
6 files changed, 235 insertions, 24 deletions
diff --git a/common_param.h b/common_param.h
index 1c362e8cb..e9f35c033 100644
--- a/common_param.h
+++ b/common_param.h
@@ -435,6 +435,9 @@ BLASLONG (*icamin_k)(BLASLONG, float *, BLASLONG);
int (*chemm_outcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
int (*chemm_oltcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
+ int cgemm3m_p, cgemm3m_q, cgemm3m_r;
+ int cgemm3m_unroll_m, cgemm3m_unroll_n, cgemm3m_unroll_mn;
+
int (*cgemm3m_kernel)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG);
int (*cgemm3m_incopyb)(BLASLONG, BLASLONG, float *, BLASLONG, float *);
@@ -595,6 +598,9 @@ BLASLONG (*izamin_k)(BLASLONG, double *, BLASLONG);
int (*zhemm_outcopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
int (*zhemm_oltcopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
+ int zgemm3m_p, zgemm3m_q, zgemm3m_r;
+ int zgemm3m_unroll_m, zgemm3m_unroll_n, zgemm3m_unroll_mn;
+
int (*zgemm3m_kernel)(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG);
int (*zgemm3m_incopyb)(BLASLONG, BLASLONG, double *, BLASLONG, double *);
@@ -757,6 +763,9 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
int (*xhemm_outcopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
int (*xhemm_oltcopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
+ int xgemm3m_p, xgemm3m_q, xgemm3m_r;
+ int xgemm3m_unroll_m, xgemm3m_unroll_n, xgemm3m_unroll_mn;
+
int (*xgemm3m_kernel)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG);
int (*xgemm3m_incopyb)(BLASLONG, BLASLONG, xdouble *, BLASLONG, xdouble *);
@@ -900,6 +909,27 @@ extern gotoblas_t *gotoblas;
#define XGEMM_UNROLL_N gotoblas -> xgemm_unroll_n
#define XGEMM_UNROLL_MN gotoblas -> xgemm_unroll_mn
+#define CGEMM3M_P gotoblas -> cgemm3m_p
+#define CGEMM3M_Q gotoblas -> cgemm3m_q
+#define CGEMM3M_R gotoblas -> cgemm3m_r
+#define CGEMM3M_UNROLL_M gotoblas -> cgemm3m_unroll_m
+#define CGEMM3M_UNROLL_N gotoblas -> cgemm3m_unroll_n
+#define CGEMM3M_UNROLL_MN gotoblas -> cgemm3m_unroll_mn
+
+#define ZGEMM3M_P gotoblas -> zgemm3m_p
+#define ZGEMM3M_Q gotoblas -> zgemm3m_q
+#define ZGEMM3M_R gotoblas -> zgemm3m_r
+#define ZGEMM3M_UNROLL_M gotoblas -> zgemm3m_unroll_m
+#define ZGEMM3M_UNROLL_N gotoblas -> zgemm3m_unroll_n
+#define ZGEMM3M_UNROLL_MN gotoblas -> zgemm3m_unroll_mn
+
+#define XGEMM3M_P gotoblas -> xgemm3m_p
+#define XGEMM3M_Q gotoblas -> xgemm3m_q
+#define XGEMM3M_R gotoblas -> xgemm3m_r
+#define XGEMM3M_UNROLL_M gotoblas -> xgemm3m_unroll_m
+#define XGEMM3M_UNROLL_N gotoblas -> xgemm3m_unroll_n
+#define XGEMM3M_UNROLL_MN gotoblas -> xgemm3m_unroll_mn
+
#else
#define DTB_ENTRIES DTB_DEFAULT_ENTRIES
@@ -972,6 +1002,55 @@ extern gotoblas_t *gotoblas;
#define XGEMM_UNROLL_N XGEMM_DEFAULT_UNROLL_N
#define XGEMM_UNROLL_MN MAX((XGEMM_UNROLL_M), (XGEMM_UNROLL_N))
+#ifdef CGEMM_DEFAULT_UNROLL_N
+
+#define CGEMM3M_P CGEMM3M_DEFAULT_P
+#define CGEMM3M_Q CGEMM3M_DEFAULT_Q
+#define CGEMM3M_R CGEMM3M_DEFAULT_R
+#define CGEMM3M_UNROLL_M CGEMM3M_DEFAULT_UNROLL_M
+#define CGEMM3M_UNROLL_N CGEMM3M_DEFAULT_UNROLL_N
+#define CGEMM3M_UNROLL_MN MAX((CGEMM3M_UNROLL_M), (CGEMM3M_UNROLL_N))
+
+#else
+
+#define CGEMM3M_P SGEMM_DEFAULT_P
+#define CGEMM3M_Q SGEMM_DEFAULT_Q
+#define CGEMM3M_R SGEMM_DEFAULT_R
+#define CGEMM3M_UNROLL_M SGEMM_DEFAULT_UNROLL_M
+#define CGEMM3M_UNROLL_N SGEMM_DEFAULT_UNROLL_N
+#define CGEMM3M_UNROLL_MN MAX((CGEMM_UNROLL_M), (CGEMM_UNROLL_N))
+
+#endif
+
+
+#ifdef ZGEMM_DEFAULT_UNROLL_N
+
+#define ZGEMM3M_P ZGEMM3M_DEFAULT_P
+#define ZGEMM3M_Q ZGEMM3M_DEFAULT_Q
+#define ZGEMM3M_R ZGEMM3M_DEFAULT_R
+#define ZGEMM3M_UNROLL_M ZGEMM3M_DEFAULT_UNROLL_M
+#define ZGEMM3M_UNROLL_N ZGEMM3M_DEFAULT_UNROLL_N
+#define ZGEMM3M_UNROLL_MN MAX((ZGEMM_UNROLL_M), (ZGEMM_UNROLL_N))
+
+#else
+
+#define ZGEMM3M_P DGEMM_DEFAULT_P
+#define ZGEMM3M_Q DGEMM_DEFAULT_Q
+#define ZGEMM3M_R DGEMM_DEFAULT_R
+#define ZGEMM3M_UNROLL_M DGEMM_DEFAULT_UNROLL_M
+#define ZGEMM3M_UNROLL_N DGEMM_DEFAULT_UNROLL_N
+#define ZGEMM3M_UNROLL_MN MAX((ZGEMM_UNROLL_M), (ZGEMM_UNROLL_N))
+
+#endif
+
+#define XGEMM3M_P QGEMM_DEFAULT_P
+#define XGEMM3M_Q QGEMM_DEFAULT_Q
+#define XGEMM3M_R QGEMM_DEFAULT_R
+#define XGEMM3M_UNROLL_M QGEMM_DEFAULT_UNROLL_M
+#define XGEMM3M_UNROLL_N QGEMM_DEFAULT_UNROLL_N
+#define XGEMM3M_UNROLL_MN MAX((QGEMM_UNROLL_M), (QGEMM_UNROLL_N))
+
+
#endif
#endif
@@ -1054,14 +1133,14 @@ extern gotoblas_t *gotoblas;
#endif
#ifdef XDOUBLE
-#define GEMM3M_UNROLL_M QGEMM_UNROLL_M
-#define GEMM3M_UNROLL_N QGEMM_UNROLL_N
+#define GEMM3M_UNROLL_M XGEMM3M_UNROLL_M
+#define GEMM3M_UNROLL_N XGEMM3M_UNROLL_N
#elif defined(DOUBLE)
-#define GEMM3M_UNROLL_M DGEMM_UNROLL_M
-#define GEMM3M_UNROLL_N DGEMM_UNROLL_N
+#define GEMM3M_UNROLL_M ZGEMM3M_UNROLL_M
+#define GEMM3M_UNROLL_N ZGEMM3M_UNROLL_N
#else
-#define GEMM3M_UNROLL_M SGEMM_UNROLL_M
-#define GEMM3M_UNROLL_N SGEMM_UNROLL_N
+#define GEMM3M_UNROLL_M CGEMM3M_UNROLL_M
+#define GEMM3M_UNROLL_N CGEMM3M_UNROLL_N
#endif
@@ -1123,31 +1202,31 @@ extern gotoblas_t *gotoblas;
#ifndef GEMM3M_P
#ifdef XDOUBLE
-#define GEMM3M_P QGEMM_P
+#define GEMM3M_P XGEMM3M_P
#elif defined(DOUBLE)
-#define GEMM3M_P DGEMM_P
+#define GEMM3M_P ZGEMM3M_P
#else
-#define GEMM3M_P SGEMM_P
+#define GEMM3M_P CGEMM3M_P
#endif
#endif
#ifndef GEMM3M_Q
#ifdef XDOUBLE
-#define GEMM3M_Q QGEMM_Q
+#define GEMM3M_Q XGEMM3M_Q
#elif defined(DOUBLE)
-#define GEMM3M_Q DGEMM_Q
+#define GEMM3M_Q ZGEMM3M_Q
#else
-#define GEMM3M_Q SGEMM_Q
+#define GEMM3M_Q CGEMM3M_Q
#endif
#endif
#ifndef GEMM3M_R
#ifdef XDOUBLE
-#define GEMM3M_R QGEMM_R
+#define GEMM3M_R XGEMM3M_R
#elif defined(DOUBLE)
-#define GEMM3M_R DGEMM_R
+#define GEMM3M_R ZGEMM3M_R
#else
-#define GEMM3M_R SGEMM_R
+#define GEMM3M_R CGEMM3M_R
#endif
#endif
diff --git a/driver/level3/Makefile b/driver/level3/Makefile
index d62921e84..352225206 100644
--- a/driver/level3/Makefile
+++ b/driver/level3/Makefile
@@ -4,11 +4,11 @@ include ../../Makefile.system
USE_GEMM3M = 0
ifeq ($(ARCH), x86)
-USE_GEMM3M = 0
+USE_GEMM3M = 1
endif
ifeq ($(ARCH), x86_64)
-USE_GEMM3M = 0
+USE_GEMM3M = 1
endif
ifeq ($(ARCH), ia64)
diff --git a/exports/gensymbol b/exports/gensymbol
index e5049678a..69454d71b 100644
--- a/exports/gensymbol
+++ b/exports/gensymbol
@@ -75,7 +75,9 @@
);
@gemm3mobjs = (
-
+ cgemm3m,zgemm3m,
+ chemm3m,zhemm3m,
+ csymm3m,zsymm3m
);
diff --git a/interface/Makefile b/interface/Makefile
index cced14fb2..567224119 100644
--- a/interface/Makefile
+++ b/interface/Makefile
@@ -4,11 +4,11 @@ include $(TOPDIR)/Makefile.system
SUPPORT_GEMM3M = 0
ifeq ($(ARCH), x86)
-SUPPORT_GEMM3M = 0
+SUPPORT_GEMM3M = 1
endif
ifeq ($(ARCH), x86_64)
-SUPPORT_GEMM3M = 0
+SUPPORT_GEMM3M = 1
endif
ifeq ($(ARCH), ia64)
diff --git a/kernel/setparam-ref.c b/kernel/setparam-ref.c
index b1beeae5c..0d7bbd4ac 100644
--- a/kernel/setparam-ref.c
+++ b/kernel/setparam-ref.c
@@ -293,6 +293,14 @@ gotoblas_t TABLE_NAME = {
#endif
chemm_outcopyTS, chemm_oltcopyTS,
+ 0, 0, 0,
+#ifdef CGEMM3M_DEFAULT_UNROLL_M
+ CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N),
+#else
+ SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
+#endif
+
+
cgemm3m_kernelTS,
cgemm3m_incopybTS, cgemm3m_incopyrTS,
@@ -391,6 +399,14 @@ gotoblas_t TABLE_NAME = {
#endif
zhemm_outcopyTS, zhemm_oltcopyTS,
+ 0, 0, 0,
+#ifdef ZGEMM3M_DEFAULT_UNROLL_M
+ ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N),
+#else
+ DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
+#endif
+
+
zgemm3m_kernelTS,
zgemm3m_incopybTS, zgemm3m_incopyrTS,
@@ -486,6 +502,9 @@ gotoblas_t TABLE_NAME = {
#endif
xhemm_outcopyTS, xhemm_oltcopyTS,
+ 0, 0, 0,
+ QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
+
xgemm3m_kernelTS,
xgemm3m_incopybTS, xgemm3m_incopyrTS,
@@ -661,9 +680,23 @@ static void init_parameter(void) {
TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
+
+#ifdef CGEMM3M_DEFAULT_Q
+ TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
+#else
+ TABLE_NAME.cgemm3m_q = SGEMM_DEFAULT_Q;
+#endif
+
+#ifdef ZGEMM3M_DEFAULT_Q
+ TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
+#else
+ TABLE_NAME.zgemm3m_q = DGEMM_DEFAULT_Q;
+#endif
+
#ifdef EXPRECISION
TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
+ TABLE_NAME.xgemm3m_q = QGEMM_DEFAULT_Q;
#endif
#if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON)
@@ -918,20 +951,56 @@ static void init_parameter(void) {
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
+
+
+
#ifdef EXPRECISION
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
#endif
+
+#endif
+
+
+#ifdef CGEMM3M_DEFAULT_P
+ TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
+#else
+ TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
+#endif
+
+#ifdef ZGEMM3M_DEFAULT_P
+ TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
+#else
+ TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
+#endif
+
+#ifdef EXPRECISION
+ TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
#endif
+
TABLE_NAME.sgemm_p = (TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1) & ~(SGEMM_DEFAULT_UNROLL_M - 1);
TABLE_NAME.dgemm_p = (TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1) & ~(DGEMM_DEFAULT_UNROLL_M - 1);
TABLE_NAME.cgemm_p = (TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1) & ~(CGEMM_DEFAULT_UNROLL_M - 1);
TABLE_NAME.zgemm_p = (TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1) & ~(ZGEMM_DEFAULT_UNROLL_M - 1);
+
+#ifdef CGEMM3M_DEFAULT_UNROLL_M
+ TABLE_NAME.cgemm3m_p = (TABLE_NAME.cgemm3m_p + CGEMM3M_DEFAULT_UNROLL_M - 1) & ~(CGEMM3M_DEFAULT_UNROLL_M - 1);
+#else
+ TABLE_NAME.cgemm3m_p = (TABLE_NAME.cgemm3m_p + SGEMM_DEFAULT_UNROLL_M - 1) & ~(SGEMM_DEFAULT_UNROLL_M - 1);
+#endif
+
+#ifdef ZGEMM3M_DEFAULT_UNROLL_M
+ TABLE_NAME.zgemm3m_p = (TABLE_NAME.zgemm3m_p + ZGEMM3M_DEFAULT_UNROLL_M - 1) & ~(ZGEMM3M_DEFAULT_UNROLL_M - 1);
+#else
+ TABLE_NAME.zgemm3m_p = (TABLE_NAME.zgemm3m_p + DGEMM_DEFAULT_UNROLL_M - 1) & ~(DGEMM_DEFAULT_UNROLL_M - 1);
+#endif
+
#ifdef QUAD_PRECISION
TABLE_NAME.qgemm_p = (TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1) & ~(QGEMM_DEFAULT_UNROLL_M - 1);
TABLE_NAME.xgemm_p = (TABLE_NAME.xgemm_p + XGEMM_DEFAULT_UNROLL_M - 1) & ~(XGEMM_DEFAULT_UNROLL_M - 1);
+ TABLE_NAME.xgemm3m_p = (TABLE_NAME.xgemm3m_p + QGEMM_DEFAULT_UNROLL_M - 1) & ~(QGEMM_DEFAULT_UNROLL_M - 1);
#endif
#ifdef DEBUG
@@ -965,11 +1034,32 @@ static void init_parameter(void) {
+ TABLE_NAME.align) & ~TABLE_NAME.align)
) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15);
+ TABLE_NAME.cgemm3m_r = (((BUFFER_SIZE -
+ ((TABLE_NAME.cgemm3m_p * TABLE_NAME.cgemm3m_q * 8 + TABLE_NAME.offsetA
+ + TABLE_NAME.align) & ~TABLE_NAME.align)
+ ) / (TABLE_NAME.cgemm3m_q * 8) - 15) & ~15);
+
+ TABLE_NAME.zgemm3m_r = (((BUFFER_SIZE -
+ ((TABLE_NAME.zgemm3m_p * TABLE_NAME.zgemm3m_q * 16 + TABLE_NAME.offsetA
+ + TABLE_NAME.align) & ~TABLE_NAME.align)
+ ) / (TABLE_NAME.zgemm3m_q * 16) - 15) & ~15);
+
+
+
+
#ifdef EXPRECISION
TABLE_NAME.xgemm_r = (((BUFFER_SIZE -
((TABLE_NAME.xgemm_p * TABLE_NAME.xgemm_q * 32 + TABLE_NAME.offsetA
+ TABLE_NAME.align) & ~TABLE_NAME.align)
) / (TABLE_NAME.xgemm_q * 32) - 15) & ~15);
+
+ TABLE_NAME.xgemm3m_r = (((BUFFER_SIZE -
+ ((TABLE_NAME.xgemm3m_p * TABLE_NAME.xgemm3m_q * 32 + TABLE_NAME.offsetA
+ + TABLE_NAME.align) & ~TABLE_NAME.align)
+ ) / (TABLE_NAME.xgemm3m_q * 32) - 15) & ~15);
+
#endif
+
+
}
diff --git a/param.h b/param.h
index 82f4ad842..4adb0a1de 100644
--- a/param.h
+++ b/param.h
@@ -289,6 +289,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ZGEMM_DEFAULT_Q 224
#define XGEMM_DEFAULT_Q 224
+#define CGEMM3M_DEFAULT_P 448
+#define ZGEMM3M_DEFAULT_P 224
+#define XGEMM3M_DEFAULT_P 112
+#define CGEMM3M_DEFAULT_Q 224
+#define ZGEMM3M_DEFAULT_Q 224
+#define XGEMM3M_DEFAULT_Q 224
+#define CGEMM3M_DEFAULT_R 12288
+#define ZGEMM3M_DEFAULT_R 12288
+#define XGEMM3M_DEFAULT_R 12288
+
#define SGEMM_DEFAULT_R sgemm_r
#define QGEMM_DEFAULT_R qgemm_r
#define DGEMM_DEFAULT_R dgemm_r
@@ -371,6 +381,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define QGEMM_DEFAULT_Q 224
#define XGEMM_DEFAULT_Q 224
+#define CGEMM3M_DEFAULT_P 448
+#define ZGEMM3M_DEFAULT_P 224
+#define XGEMM3M_DEFAULT_P 112
+#define CGEMM3M_DEFAULT_Q 224
+#define ZGEMM3M_DEFAULT_Q 224
+#define XGEMM3M_DEFAULT_Q 224
+#define CGEMM3M_DEFAULT_R 12288
+#define ZGEMM3M_DEFAULT_R 12288
+#define XGEMM3M_DEFAULT_R 12288
+
#define SGEMM_DEFAULT_R 12288
#define QGEMM_DEFAULT_R qgemm_r
#define DGEMM_DEFAULT_R 12288
@@ -1073,10 +1093,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define GETRF_FACTOR 0.72
-#define CGEMM3M_DEFAULT_UNROLL_N 4
-#define CGEMM3M_DEFAULT_UNROLL_M 8
-#define ZGEMM3M_DEFAULT_UNROLL_N 2
-#define ZGEMM3M_DEFAULT_UNROLL_M 8
#endif
@@ -1157,6 +1173,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ZGEMM3M_DEFAULT_UNROLL_N 2
#define ZGEMM3M_DEFAULT_UNROLL_M 8
+#define CGEMM3M_DEFAULT_P 448
+#define ZGEMM3M_DEFAULT_P 224
+#define XGEMM3M_DEFAULT_P 112
+#define CGEMM3M_DEFAULT_Q 224
+#define ZGEMM3M_DEFAULT_Q 224
+#define XGEMM3M_DEFAULT_Q 224
+#define CGEMM3M_DEFAULT_R 12288
+#define ZGEMM3M_DEFAULT_R 12288
+#define XGEMM3M_DEFAULT_R 12288
+
+
+
#define GETRF_FACTOR 0.72
#endif
@@ -1263,6 +1291,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define CGEMM3M_DEFAULT_UNROLL_M 8
#define ZGEMM3M_DEFAULT_UNROLL_N 2
#define ZGEMM3M_DEFAULT_UNROLL_M 8
+
+
+#define CGEMM3M_DEFAULT_P 448
+#define ZGEMM3M_DEFAULT_P 224
+#define XGEMM3M_DEFAULT_P 112
+#define CGEMM3M_DEFAULT_Q 224
+#define ZGEMM3M_DEFAULT_Q 224
+#define XGEMM3M_DEFAULT_Q 224
+#define CGEMM3M_DEFAULT_R 12288
+#define ZGEMM3M_DEFAULT_R 12288
+#define XGEMM3M_DEFAULT_R 12288
+
#endif