summaryrefslogtreecommitdiff
path: root/driver
diff options
context:
space:
mode:
authorMartin Kroeker <martin@ruby.chemie.uni-freiburg.de>2019-01-05 16:55:33 +0100
committerGitHub <noreply@github.com>2019-01-05 16:55:33 +0100
commitae1d1f74f7ff96b8345189bcba058b7acdc7d494 (patch)
tree4457cd6b9c894c3394c45ffcea05e1dd928711f8 /driver
parent20d1aad13f59d6146bcdf8be6716cd8cc020d2bd (diff)
downloadopenblas-ae1d1f74f7ff96b8345189bcba058b7acdc7d494.tar.gz
openblas-ae1d1f74f7ff96b8345189bcba058b7acdc7d494.tar.bz2
openblas-ae1d1f74f7ff96b8345189bcba058b7acdc7d494.zip
Query AVX2 and AVX512 capability for runtime cpu selection
Diffstat (limited to 'driver')
-rw-r--r--driver/others/dynamic.c141
1 files changed, 102 insertions, 39 deletions
diff --git a/driver/others/dynamic.c b/driver/others/dynamic.c
index 1f67dc521..7cc911d32 100644
--- a/driver/others/dynamic.c
+++ b/driver/others/dynamic.c
@@ -304,9 +304,47 @@ int support_avx(){
#endif
}
+int support_avx2(){
+#ifndef NO_AVX2
+ int eax, ebx, ecx=0, edx;
+ int ret=0;
+
+ if (!support_avx)
+ return 0;
+ cpuid(7, &eax, &ebx, &ecx, &edx);
+ if((ebx & (1<<7)) != 0)
+ ret=1; //OS supports AVX2
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+int support_avx512(){
+#ifndef NO_AVX512
+ int eax, ebx, ecx, edx;
+ int ret=0;
+
+ if (!support_avx)
+ return 0;
+ cpuid(7, &eax, &ebx, &ecx, &edx);
+ if((ebx & (1<<7)) != 1){
+ ret=0; //OS does not even support AVX2
+ }
+ if((ebx & (1<<31)) != 0){
+ ret=1; //OS supports AVX512VL
+ }
+ return ret;
+#else
+ return 0;
+#endif
+}
+
extern void openblas_warning(int verbose, const char * msg);
#define FALLBACK_VERBOSE 1
#define NEHALEM_FALLBACK "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Nehalem kernels as a fallback, which may give poorer performance.\n"
+#define SANDYBRIDGE_FALLBACK "OpenBLAS : Your OS does not support AVX2 instructions. OpenBLAS is using Sandybridge kernels as a fallback, which may give poorer performance.\n"
+#define HASWELL_FALLBACK "OpenBLAS : Your OS does not support AVX512 instructions. OpenBLAS is using Haswell kernels as a fallback, which may give poorer performance.\n"
#define BARCELONA_FALLBACK "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Barcelona kernels as a fallback, which may give poorer performance.\n"
static int get_vendor(void){
@@ -403,18 +441,24 @@ static gotoblas_t *get_coretype(void){
}
//Intel Haswell
if (model == 12 || model == 15) {
- if(support_avx())
+ if(support_avx2())
return &gotoblas_HASWELL;
- else{
+ if(support_avx()) {
+ openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
+ return &gotoblas_SANDYBRIDGE;
+ } else {
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
}
}
//Intel Broadwell
if (model == 13) {
- if(support_avx())
+ if(support_avx2())
return &gotoblas_HASWELL;
- else{
+ if(support_avx()) {
+ openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
+ return &gotoblas_SANDYBRIDGE;
+ } else {
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
}
@@ -424,27 +468,36 @@ static gotoblas_t *get_coretype(void){
case 4:
//Intel Haswell
if (model == 5 || model == 6) {
- if(support_avx())
+ if(support_avx2())
return &gotoblas_HASWELL;
- else{
+ if(support_avx()) {
+ openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
+ return &gotoblas_SANDYBRIDGE;
+ } else {
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
}
}
//Intel Broadwell
if (model == 7 || model == 15) {
- if(support_avx())
+ if(support_avx2())
return &gotoblas_HASWELL;
- else{
+ if(support_avx()) {
+ openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
+ return &gotoblas_SANDYBRIDGE;
+ } else {
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
}
}
//Intel Skylake
if (model == 14) {
- if(support_avx())
+ if(support_avx2())
return &gotoblas_HASWELL;
- else{
+ if(support_avx()) {
+ openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
+ return &gotoblas_SANDYBRIDGE;
+ } else {
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
}
@@ -457,40 +510,50 @@ static gotoblas_t *get_coretype(void){
case 5:
//Intel Broadwell
if (model == 6) {
- if(support_avx())
+ if(support_avx2())
return &gotoblas_HASWELL;
- else{
+ if(support_avx()) {
+ openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
+ return &gotoblas_SANDYBRIDGE;
+ } else {
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
}
}
if (model == 5) {
// Intel Skylake X
-#ifndef NO_AVX512
- return &gotoblas_SKYLAKEX;
-#else
- if(support_avx())
+ if (support_avx512())
+ return &gotoblas_SKYLAKEX;
+ if(support_avx2())
return &gotoblas_HASWELL;
- else {
- openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
- return &gotoblas_NEHALEM;
- }
-#endif
+ if(support_avx()) {
+ openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
+ return &gotoblas_SANDYBRIDGE;
+ } else {
+ openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
+ return &gotoblas_NEHALEM;
+ }
}
//Intel Skylake
if (model == 14) {
- if(support_avx())
+ if(support_avx2())
return &gotoblas_HASWELL;
- else{
+ if(support_avx()) {
+ openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
+ return &gotoblas_SANDYBRIDGE;
+ } else {
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
}
}
//Intel Phi Knights Landing
if (model == 7) {
- if(support_avx())
+ if(support_avx2())
return &gotoblas_HASWELL;
- else{
+ if(support_avx()) {
+ openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
+ return &gotoblas_SANDYBRIDGE;
+ } else {
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
}
@@ -503,26 +566,26 @@ static gotoblas_t *get_coretype(void){
case 6:
if (model == 6) {
// Cannon Lake
-#ifndef NO_AVX512
- return &gotoblas_SKYLAKEX;
-#else
- if(support_avx())
-#ifndef NO_AVX2
- return &gotoblas_HASWELL;
-#else
- return &gotoblas_SANDYBRIDGE;
-#endif
- else
- return &gotoblas_NEHALEM;
-#endif
+ if(support_avx2())
+ return &gotoblas_HASWELL;
+ if(support_avx()) {
+ openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
+ return &gotoblas_SANDYBRIDGE;
+ } else {
+ openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
+ return &gotoblas_NEHALEM;
+ }
}
return NULL;
case 9:
case 8:
if (model == 14 ) { // Kaby Lake
- if(support_avx())
+ if(support_avx2())
return &gotoblas_HASWELL;
- else{
+ if(support_avx()) {
+ openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
+ return &gotoblas_SANDYBRIDGE;
+ } else {
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
}