diff options
author | Martin Kroeker <martin@ruby.chemie.uni-freiburg.de> | 2019-01-05 16:55:33 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-01-05 16:55:33 +0100 |
commit | ae1d1f74f7ff96b8345189bcba058b7acdc7d494 (patch) | |
tree | 4457cd6b9c894c3394c45ffcea05e1dd928711f8 /driver | |
parent | 20d1aad13f59d6146bcdf8be6716cd8cc020d2bd (diff) | |
download | openblas-ae1d1f74f7ff96b8345189bcba058b7acdc7d494.tar.gz openblas-ae1d1f74f7ff96b8345189bcba058b7acdc7d494.tar.bz2 openblas-ae1d1f74f7ff96b8345189bcba058b7acdc7d494.zip |
Query AVX2 and AVX512 capability for runtime cpu selection
Diffstat (limited to 'driver')
-rw-r--r-- | driver/others/dynamic.c | 141 |
1 files changed, 102 insertions, 39 deletions
diff --git a/driver/others/dynamic.c b/driver/others/dynamic.c index 1f67dc521..7cc911d32 100644 --- a/driver/others/dynamic.c +++ b/driver/others/dynamic.c @@ -304,9 +304,47 @@ int support_avx(){ #endif } +int support_avx2(){ +#ifndef NO_AVX2 + int eax, ebx, ecx=0, edx; + int ret=0; + + if (!support_avx) + return 0; + cpuid(7, &eax, &ebx, &ecx, &edx); + if((ebx & (1<<7)) != 0) + ret=1; //OS supports AVX2 + return ret; +#else + return 0; +#endif +} + +int support_avx512(){ +#ifndef NO_AVX512 + int eax, ebx, ecx, edx; + int ret=0; + + if (!support_avx) + return 0; + cpuid(7, &eax, &ebx, &ecx, &edx); + if((ebx & (1<<7)) != 1){ + ret=0; //OS does not even support AVX2 + } + if((ebx & (1<<31)) != 0){ + ret=1; //OS supports AVX512VL + } + return ret; +#else + return 0; +#endif +} + extern void openblas_warning(int verbose, const char * msg); #define FALLBACK_VERBOSE 1 #define NEHALEM_FALLBACK "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Nehalem kernels as a fallback, which may give poorer performance.\n" +#define SANDYBRIDGE_FALLBACK "OpenBLAS : Your OS does not support AVX2 instructions. OpenBLAS is using Sandybridge kernels as a fallback, which may give poorer performance.\n" +#define HASWELL_FALLBACK "OpenBLAS : Your OS does not support AVX512 instructions. OpenBLAS is using Haswell kernels as a fallback, which may give poorer performance.\n" #define BARCELONA_FALLBACK "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Barcelona kernels as a fallback, which may give poorer performance.\n" static int get_vendor(void){ @@ -403,18 +441,24 @@ static gotoblas_t *get_coretype(void){ } //Intel Haswell if (model == 12 || model == 15) { - if(support_avx()) + if(support_avx2()) return &gotoblas_HASWELL; - else{ + if(support_avx()) { + openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); + return &gotoblas_SANDYBRIDGE; + } else { openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. } } //Intel Broadwell if (model == 13) { - if(support_avx()) + if(support_avx2()) return &gotoblas_HASWELL; - else{ + if(support_avx()) { + openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); + return &gotoblas_SANDYBRIDGE; + } else { openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. } @@ -424,27 +468,36 @@ static gotoblas_t *get_coretype(void){ case 4: //Intel Haswell if (model == 5 || model == 6) { - if(support_avx()) + if(support_avx2()) return &gotoblas_HASWELL; - else{ + if(support_avx()) { + openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); + return &gotoblas_SANDYBRIDGE; + } else { openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. } } //Intel Broadwell if (model == 7 || model == 15) { - if(support_avx()) + if(support_avx2()) return &gotoblas_HASWELL; - else{ + if(support_avx()) { + openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); + return &gotoblas_SANDYBRIDGE; + } else { openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. } } //Intel Skylake if (model == 14) { - if(support_avx()) + if(support_avx2()) return &gotoblas_HASWELL; - else{ + if(support_avx()) { + openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); + return &gotoblas_SANDYBRIDGE; + } else { openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. } @@ -457,40 +510,50 @@ static gotoblas_t *get_coretype(void){ case 5: //Intel Broadwell if (model == 6) { - if(support_avx()) + if(support_avx2()) return &gotoblas_HASWELL; - else{ + if(support_avx()) { + openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); + return &gotoblas_SANDYBRIDGE; + } else { openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. } } if (model == 5) { // Intel Skylake X -#ifndef NO_AVX512 - return &gotoblas_SKYLAKEX; -#else - if(support_avx()) + if (support_avx512()) + return &gotoblas_SKYLAKEX; + if(support_avx2()) return &gotoblas_HASWELL; - else { - openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); - return &gotoblas_NEHALEM; - } -#endif + if(support_avx()) { + openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); + return &gotoblas_SANDYBRIDGE; + } else { + openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); + return &gotoblas_NEHALEM; + } } //Intel Skylake if (model == 14) { - if(support_avx()) + if(support_avx2()) return &gotoblas_HASWELL; - else{ + if(support_avx()) { + openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); + return &gotoblas_SANDYBRIDGE; + } else { openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. } } //Intel Phi Knights Landing if (model == 7) { - if(support_avx()) + if(support_avx2()) return &gotoblas_HASWELL; - else{ + if(support_avx()) { + openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); + return &gotoblas_SANDYBRIDGE; + } else { openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. } @@ -503,26 +566,26 @@ static gotoblas_t *get_coretype(void){ case 6: if (model == 6) { // Cannon Lake -#ifndef NO_AVX512 - return &gotoblas_SKYLAKEX; -#else - if(support_avx()) -#ifndef NO_AVX2 - return &gotoblas_HASWELL; -#else - return &gotoblas_SANDYBRIDGE; -#endif - else - return &gotoblas_NEHALEM; -#endif + if(support_avx2()) + return &gotoblas_HASWELL; + if(support_avx()) { + openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); + return &gotoblas_SANDYBRIDGE; + } else { + openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); + return &gotoblas_NEHALEM; + } } return NULL; case 9: case 8: if (model == 14 ) { // Kaby Lake - if(support_avx()) + if(support_avx2()) return &gotoblas_HASWELL; - else{ + if(support_avx()) { + openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); + return &gotoblas_SANDYBRIDGE; + } else { openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. } |