diff options
author | Martin Kroeker <martin@ruby.chemie.uni-freiburg.de> | 2020-10-14 09:02:03 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-10-14 09:02:03 +0200 |
commit | 756802df61c08898a904a300431b41d60c46785c (patch) | |
tree | 5d895b21bebad78c6b751248a00bc63d3d9dde2a | |
parent | 01492decf460bbd23751ad5f4ccac4d62792def5 (diff) | |
parent | 75e3a92df6b4100c05d034c85a6076678b5cc6af (diff) | |
download | openblas-756802df61c08898a904a300431b41d60c46785c.tar.gz openblas-756802df61c08898a904a300431b41d60c46785c.tar.bz2 openblas-756802df61c08898a904a300431b41d60c46785c.zip |
Merge pull request #2890 from martin-frbg/s-d-sum
Revert special handling of Windows xNRM2 and enable C+intrinsics kern…
-rw-r--r-- | Makefile.x86_64 | 6 | ||||
-rw-r--r-- | cmake/cc.cmake | 17 | ||||
-rw-r--r-- | kernel/Makefile | 7 | ||||
-rw-r--r-- | kernel/x86_64/KERNEL | 11 |
4 files changed, 29 insertions, 12 deletions
diff --git a/Makefile.x86_64 b/Makefile.x86_64 index e793a1c2f..8a3fc4eae 100644 --- a/Makefile.x86_64 +++ b/Makefile.x86_64 @@ -12,6 +12,10 @@ ifdef HAVE_SSE3 ifndef DYNAMIC_ARCH CCOMMON_OPT += -msse3 FCOMMON_OPT += -msse3 +ifdef HAVE_SSSE3 +CCOMMON_OPT += -mssse3 +FCOMMON_OPT += -mssse3 +endif endif endif @@ -60,7 +64,7 @@ endif endif endif -ifeq ($(CORE), HASWELL) +ifeq ($(CORE), $(filter $(CORE), HASWELL ZEN SKYLAKEX COOPERLAKE)) ifndef DYNAMIC_ARCH ifndef NO_AVX2 ifeq ($(C_COMPILER), GCC) diff --git a/cmake/cc.cmake b/cmake/cc.cmake index c490dd9ab..9f5cc1bf7 100644 --- a/cmake/cc.cmake +++ b/cmake/cc.cmake @@ -109,10 +109,25 @@ if (${CORE} STREQUAL "COOPERLAKE") if (NOT NO_AVX512) execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) if (${GCC_VERSION} VERSION_GREATER 10.1 OR ${GCC_VERSION} VERSION_EQUAL 10.1) - set (CCOMMON_OPT = "${CCOMMON_OPT} -march=cooperlake") + set (CCOMMON_OPT "${CCOMMON_OPT} -march=cooperlake") else () set (CCOMMON_OPT "${CCOMMON_OPT} -march=skylake-avx512") endif() endif () endif () endif () + +if (NOT DYNAMIC_ARCH) + if (HAVE_AVX2) + set (CCOMMON_OPT "${CCOMMON_OPT} -mavx2") + endif () + if (HAVE_AVX) + set (CCOMMON_OPT "${CCOMMON_OPT} -mavx") + endif () + if (HAVE_SSE3) + set (CCOMMON_OPT "${CCOMMON_OPT} -msse3") + endif () + if (HAVE_SSSE3) + set (CCOMMON_OPT "${CCOMMON_OPT} -mssse3") + endif () +endif() diff --git a/kernel/Makefile b/kernel/Makefile index 290fb2afe..c95c15f56 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -8,6 +8,9 @@ include $(TOPDIR)/Makefile.system ifdef HAVE_SSE3 CFLAGS += -msse3 endif +ifdef HAVE_SSSE3 +CFLAGS += -mssse3 +endif ifeq ($(C_COMPILER), GCC) GCCVERSIONGTEQ9 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 9) @@ -41,8 +44,8 @@ ifdef NO_AVX2 endif ifdef TARGET_CORE - ifeq ($(TARGET_CORE), $(filter $(TARGET_CORE),PRESCOTT CORE2 PENRYN DUNNINGTON ATOM NANO NEHALEM BARCELONA BOBCAT BULLDOZER PILEDRIVER EXCAVATOR STEAMROLLER OPTERON_SSE3)) - override CFLAGS += -msse3 + ifeq ($(TARGET_CORE), $(filter $(TARGET_CORE),PRESCOTT CORE2 PENRYN DUNNINGTON ATOM NANO SANDYBRIDGE HASWELL NEHALEM ZEN BARCELONA BOBCAT BULLDOZER PILEDRIVER EXCAVATOR STEAMROLLER OPTERON_SSE3)) + override CFLAGS += -msse3 -mssse3 endif ifeq ($(TARGET_CORE), COOPERLAKE) override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) diff --git a/kernel/x86_64/KERNEL b/kernel/x86_64/KERNEL index 4f110f0bf..855e1ff8c 100644 --- a/kernel/x86_64/KERNEL +++ b/kernel/x86_64/KERNEL @@ -259,12 +259,8 @@ SNRM2KERNEL = nrm2_sse.S endif ifndef DNRM2KERNEL -ifeq ($(OSNAME),WINNT) -DNRM2KERNEL = ../arm/nrm2.c -else DNRM2KERNEL = nrm2.S endif -endif ifndef QNRM2KERNEL QNRM2KERNEL = nrm2.S @@ -275,12 +271,8 @@ CNRM2KERNEL = znrm2_sse.S endif ifndef ZNRM2KERNEL -ifeq ($(OSNAME),WINNT) -ZNRM2KERNEL = ../arm/znrm2.c -else ZNRM2KERNEL = znrm2.S endif -endif ifndef XNRM2KERNEL XNRM2KERNEL = znrm2.S @@ -486,3 +478,6 @@ XTRSMKERNEL_RN = xtrsm_kernel_LT_1x1.S XTRSMKERNEL_RT = xtrsm_kernel_LT_1x1.S XGEMM3MKERNEL = xgemm3m_kernel_2x2.S + +SSUMKERNEL = ../arm/sum.c +DSUMKERNEL = ../arm/sum.c |