summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin Kroeker <martin@ruby.chemie.uni-freiburg.de>2020-10-14 09:02:03 +0200
committerGitHub <noreply@github.com>2020-10-14 09:02:03 +0200
commit756802df61c08898a904a300431b41d60c46785c (patch)
tree5d895b21bebad78c6b751248a00bc63d3d9dde2a
parent01492decf460bbd23751ad5f4ccac4d62792def5 (diff)
parent75e3a92df6b4100c05d034c85a6076678b5cc6af (diff)
downloadopenblas-756802df61c08898a904a300431b41d60c46785c.tar.gz
openblas-756802df61c08898a904a300431b41d60c46785c.tar.bz2
openblas-756802df61c08898a904a300431b41d60c46785c.zip
Merge pull request #2890 from martin-frbg/s-d-sum
Revert special handling of Windows xNRM2 and enable C+intrinsics kern…
-rw-r--r--Makefile.x86_646
-rw-r--r--cmake/cc.cmake17
-rw-r--r--kernel/Makefile7
-rw-r--r--kernel/x86_64/KERNEL11
4 files changed, 29 insertions, 12 deletions
diff --git a/Makefile.x86_64 b/Makefile.x86_64
index e793a1c2f..8a3fc4eae 100644
--- a/Makefile.x86_64
+++ b/Makefile.x86_64
@@ -12,6 +12,10 @@ ifdef HAVE_SSE3
ifndef DYNAMIC_ARCH
CCOMMON_OPT += -msse3
FCOMMON_OPT += -msse3
+ifdef HAVE_SSSE3
+CCOMMON_OPT += -mssse3
+FCOMMON_OPT += -mssse3
+endif
endif
endif
@@ -60,7 +64,7 @@ endif
endif
endif
-ifeq ($(CORE), HASWELL)
+ifeq ($(CORE), $(filter $(CORE), HASWELL ZEN SKYLAKEX COOPERLAKE))
ifndef DYNAMIC_ARCH
ifndef NO_AVX2
ifeq ($(C_COMPILER), GCC)
diff --git a/cmake/cc.cmake b/cmake/cc.cmake
index c490dd9ab..9f5cc1bf7 100644
--- a/cmake/cc.cmake
+++ b/cmake/cc.cmake
@@ -109,10 +109,25 @@ if (${CORE} STREQUAL "COOPERLAKE")
if (NOT NO_AVX512)
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
if (${GCC_VERSION} VERSION_GREATER 10.1 OR ${GCC_VERSION} VERSION_EQUAL 10.1)
- set (CCOMMON_OPT = "${CCOMMON_OPT} -march=cooperlake")
+ set (CCOMMON_OPT "${CCOMMON_OPT} -march=cooperlake")
else ()
set (CCOMMON_OPT "${CCOMMON_OPT} -march=skylake-avx512")
endif()
endif ()
endif ()
endif ()
+
+if (NOT DYNAMIC_ARCH)
+ if (HAVE_AVX2)
+ set (CCOMMON_OPT "${CCOMMON_OPT} -mavx2")
+ endif ()
+ if (HAVE_AVX)
+ set (CCOMMON_OPT "${CCOMMON_OPT} -mavx")
+ endif ()
+ if (HAVE_SSE3)
+ set (CCOMMON_OPT "${CCOMMON_OPT} -msse3")
+ endif ()
+ if (HAVE_SSSE3)
+ set (CCOMMON_OPT "${CCOMMON_OPT} -mssse3")
+ endif ()
+endif()
diff --git a/kernel/Makefile b/kernel/Makefile
index 290fb2afe..c95c15f56 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -8,6 +8,9 @@ include $(TOPDIR)/Makefile.system
ifdef HAVE_SSE3
CFLAGS += -msse3
endif
+ifdef HAVE_SSSE3
+CFLAGS += -mssse3
+endif
ifeq ($(C_COMPILER), GCC)
GCCVERSIONGTEQ9 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 9)
@@ -41,8 +44,8 @@ ifdef NO_AVX2
endif
ifdef TARGET_CORE
- ifeq ($(TARGET_CORE), $(filter $(TARGET_CORE),PRESCOTT CORE2 PENRYN DUNNINGTON ATOM NANO NEHALEM BARCELONA BOBCAT BULLDOZER PILEDRIVER EXCAVATOR STEAMROLLER OPTERON_SSE3))
- override CFLAGS += -msse3
+ ifeq ($(TARGET_CORE), $(filter $(TARGET_CORE),PRESCOTT CORE2 PENRYN DUNNINGTON ATOM NANO SANDYBRIDGE HASWELL NEHALEM ZEN BARCELONA BOBCAT BULLDOZER PILEDRIVER EXCAVATOR STEAMROLLER OPTERON_SSE3))
+ override CFLAGS += -msse3 -mssse3
endif
ifeq ($(TARGET_CORE), COOPERLAKE)
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE)
diff --git a/kernel/x86_64/KERNEL b/kernel/x86_64/KERNEL
index 4f110f0bf..855e1ff8c 100644
--- a/kernel/x86_64/KERNEL
+++ b/kernel/x86_64/KERNEL
@@ -259,12 +259,8 @@ SNRM2KERNEL = nrm2_sse.S
endif
ifndef DNRM2KERNEL
-ifeq ($(OSNAME),WINNT)
-DNRM2KERNEL = ../arm/nrm2.c
-else
DNRM2KERNEL = nrm2.S
endif
-endif
ifndef QNRM2KERNEL
QNRM2KERNEL = nrm2.S
@@ -275,12 +271,8 @@ CNRM2KERNEL = znrm2_sse.S
endif
ifndef ZNRM2KERNEL
-ifeq ($(OSNAME),WINNT)
-ZNRM2KERNEL = ../arm/znrm2.c
-else
ZNRM2KERNEL = znrm2.S
endif
-endif
ifndef XNRM2KERNEL
XNRM2KERNEL = znrm2.S
@@ -486,3 +478,6 @@ XTRSMKERNEL_RN = xtrsm_kernel_LT_1x1.S
XTRSMKERNEL_RT = xtrsm_kernel_LT_1x1.S
XGEMM3MKERNEL = xgemm3m_kernel_2x2.S
+
+SSUMKERNEL = ../arm/sum.c
+DSUMKERNEL = ../arm/sum.c