summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZhang Xianyi <traits.zhang@gmail.com>2013-08-01 23:53:12 +0800
committerZhang Xianyi <traits.zhang@gmail.com>2013-08-01 23:53:12 +0800
commit835293cc1afde82544b2d24780213aabe82ad788 (patch)
tree5b1c3b007251840dffe55fee3137ea395c7dbf00
parentf26b7a08aa6e3fa2a060fc902a334141e22f02d4 (diff)
parentb736aa811089fddf586188447f4e25c77df3b103 (diff)
downloadopenblas-0.2.8.tar.gz
openblas-0.2.8.tar.bz2
openblas-0.2.8.zip
Merge branch 'hotfix-v0.2.8'v0.2.8
-rw-r--r--CONTRIBUTORS.md4
-rw-r--r--Changelog.txt12
-rw-r--r--Makefile.generic5
-rw-r--r--Makefile.power11
-rw-r--r--Makefile.rule2
-rw-r--r--Makefile.sparc1
-rw-r--r--Makefile.system7
-rw-r--r--Makefile.x863
-rw-r--r--Makefile.x86_6413
-rw-r--r--common_alpha.h10
-rw-r--r--common_ia64.h9
-rw-r--r--common_mips64.h9
-rw-r--r--common_sparc.h11
-rw-r--r--common_x86.h4
-rw-r--r--common_x86_64.h5
-rw-r--r--cpuid.h8
-rw-r--r--cpuid_x86.c2
-rw-r--r--ctest/Makefile2
-rw-r--r--driver/others/dynamic.c8
-rw-r--r--driver/others/init.c3
-rw-r--r--exports/Makefile40
-rw-r--r--exports/gensymbol69
-rw-r--r--f_check21
-rw-r--r--ftest3.f6
-rw-r--r--getarch.c6
-rw-r--r--getarch_2nd.c2
-rw-r--r--interface/trtri.c7
-rw-r--r--lapack/trtri/dtrtri_lapack.f2
-rw-r--r--test/Makefile2
29 files changed, 202 insertions, 82 deletions
diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
index 4a13bcc2c..59df77609 100644
--- a/CONTRIBUTORS.md
+++ b/CONTRIBUTORS.md
@@ -79,5 +79,9 @@ In chronological order:
* [2013-07-11] create openblas_get_parallel to retrieve information which parallelization
model is used by OpenBLAS.
+* Sébastien Fabbro <bicatali@gentoo.org>
+ * [2013-07-24] Modify makefile to respect user's LDFLAGS
+ * [2013-07-24] Add stack markings for GNU as arch-independent for assembler files
+
* [Your name or handle] <[email or website]>
* [Date] [Brief summary of your changes]
diff --git a/Changelog.txt b/Changelog.txt
index 574f46226..dd186b683 100644
--- a/Changelog.txt
+++ b/Changelog.txt
@@ -1,5 +1,17 @@
OpenBLAS ChangeLog
====================================================================
+Version 0.2.8
+01-Aug-2013
+common:
+ * Support Open64 5.0. (#266)
+ * Add executable stack markings. (#262, Thank Sébastien Fabbro)
+ * Respect user's LDFLAGS (Thank Sébastien Fabbro)
+
+x86/x86-64:
+ * Rollback bulldozer and piledriver kernels to barcelona kernels (#263)
+ We will fix the compuational error bug in bulldozer and piledriver kernels.
+
+====================================================================
Version 0.2.7
20-Jul-2013
common:
diff --git a/Makefile.generic b/Makefile.generic
index 770aaf850..a5e50b1f0 100644
--- a/Makefile.generic
+++ b/Makefile.generic
@@ -1,6 +1 @@
COPT = -Wall -O2 # -DGEMMTEST
-ifdef BINARY64
-else
-# LDFLAGS = -m elf32ppc
-LDFLAGS = -m elf_i386
-endif
diff --git a/Makefile.power b/Makefile.power
index 35eb2cb7b..c6d6aeb50 100644
--- a/Makefile.power
+++ b/Makefile.power
@@ -17,13 +17,7 @@ endif
endif
ifdef BINARY64
-ifeq ($(OSNAME), Linux)
-LDFLAGS = -m elf64ppc
-endif
-ifeq ($(OSNAME), Darwin)
-LDFLAGS = -arch ppc64
-endif
ifeq ($(OSNAME), AIX)
CCOMMON_OPT += -mpowerpc64 -maix64
@@ -34,17 +28,12 @@ ifeq ($(COMPILER_F77), xlf)
FCOMMON_OPT += -q64
endif
ARFLAGS = -X 64
-LDFLAGS = -b64
ASFLAGS = -a64
endif
else
-ifeq ($(OSNAME), Linux)
-LDFLAGS = -m elf32ppc
-endif
ifeq ($(OSNAME), AIX)
CCOMMON_OPT += -Wa,-a32
ARFLAGS = -X 32
-LDFLAGS = -b32
ASFLAGS = -a32
endif
endif
diff --git a/Makefile.rule b/Makefile.rule
index fb377c377..a92eb500a 100644
--- a/Makefile.rule
+++ b/Makefile.rule
@@ -3,7 +3,7 @@
#
# This library's version
-VERSION = 0.2.7
+VERSION = 0.2.8
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
diff --git a/Makefile.sparc b/Makefile.sparc
index c2b878e73..c58c77e1a 100644
--- a/Makefile.sparc
+++ b/Makefile.sparc
@@ -10,7 +10,6 @@ endif
ifeq ($(COMPILER_F77), f90)
FCOMMON_OPT += -xarch=v9
endif
-LDFLAGS = -64
else
CCOMMON_OPT += -mcpu=v9
diff --git a/Makefile.system b/Makefile.system
index b1f9ba514..727b08960 100644
--- a/Makefile.system
+++ b/Makefile.system
@@ -324,14 +324,16 @@ ifeq ($(ARCH), x86)
DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \
CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
ifneq ($(NO_AVX), 1)
-DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER
+DYNAMIC_CORE += SANDYBRIDGE
+#BULLDOZER PILEDRIVER
endif
endif
ifeq ($(ARCH), x86_64)
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
ifneq ($(NO_AVX), 1)
-DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER
+DYNAMIC_CORE += SANDYBRIDGE
+#BULLDOZER PILEDRIVER
endif
endif
@@ -895,6 +897,7 @@ export CC
export FC
export BU
export FU
+export NEED2UNDERSCORES
export USE_THREAD
export NUM_THREADS
export NUM_CORES
diff --git a/Makefile.x86 b/Makefile.x86
index 94ca7c4a7..cd7cc9f90 100644
--- a/Makefile.x86
+++ b/Makefile.x86
@@ -1,8 +1,5 @@
# COMPILER_PREFIX = mingw32-
-ifeq ($(OSNAME), Linux)
-LDFLAGS = -melf_i386
-endif
ifeq ($(OSNAME), Interix)
ARFLAGS = -m x86
diff --git a/Makefile.x86_64 b/Makefile.x86_64
index b939e5459..c8d4b237b 100644
--- a/Makefile.x86_64
+++ b/Makefile.x86_64
@@ -2,25 +2,12 @@
ifeq ($(OSNAME), SunOS)
ifdef BINARY64
-LDFLAGS = -64
ifeq ($(F_COMPILER), SUN)
FCOMMON_OPT += -m64
endif
endif
endif
-ifeq ($(OSNAME), FreeBSD)
-LDFLAGS = -m elf_x86_64_fbsd
-endif
-
-ifeq ($(OSNAME), Linux)
-LDFLAGS = -m elf_x86_64
-endif
-
-ifeq ($(OSNAME), Darwin)
-LDFLAGS =
-endif
-
ifeq ($(OSNAME), Interix)
ARFLAGS = -m x64
endif
diff --git a/common_alpha.h b/common_alpha.h
index cf794739c..3b46c74ce 100644
--- a/common_alpha.h
+++ b/common_alpha.h
@@ -150,9 +150,17 @@ REALNAME:
#define PROFCODE .prologue 0
#endif
+#if defined(__linux__) && defined(__ELF__)
+#define GNUSTACK .section .note.GNU-stack,"",%progbits
+#else
+#define GNUSTACK
+#endif
+
#define EPILOGUE \
.end REALNAME; \
- .ident VERSION
+ .ident VERSION; \
+ GNUSTACK
+
#endif
#ifdef DOUBLE
diff --git a/common_ia64.h b/common_ia64.h
index 81939cc1b..3530e0b1d 100644
--- a/common_ia64.h
+++ b/common_ia64.h
@@ -379,8 +379,15 @@ REALNAME:
#define PROFCODE
#endif
+#if defined(__linux__) && defined(__ELF__)
+#define GNUSTACK .section .note.GNU-stack,"",%progbits
+#else
+#define GNUSTACK
+#endif
+
#define EPILOGUE \
- .endp REALNAME
+ .endp REALNAME ; \
+ GNUSTACK
#define START_ADDRESS 0x20000fc800000000UL
diff --git a/common_mips64.h b/common_mips64.h
index 1bee69407..c08fb2c75 100644
--- a/common_mips64.h
+++ b/common_mips64.h
@@ -235,10 +235,17 @@ REALNAME: ;\
.set noreorder ;\
.set nomacro
+#if defined(__linux__) && defined(__ELF__)
+#define GNUSTACK .section .note.GNU-stack,"",%progbits
+#else
+#define GNUSTACK
+#endif
+
#define EPILOGUE \
.set macro ;\
.set reorder ;\
- .end REALNAME
+ .end REALNAME ;\
+ GNUSTACK
#define PROFCODE
#endif
diff --git a/common_sparc.h b/common_sparc.h
index 35d8bdb5f..cfd27f768 100644
--- a/common_sparc.h
+++ b/common_sparc.h
@@ -199,8 +199,17 @@ static __inline int blas_quickdivide(blasint x, blasint y){
.type REALNAME, #function; \
.proc 07; \
REALNAME:;
+
+#if defined(__linux__) && defined(__ELF__)
+#define GNUSTACK .section .note.GNU-stack,"",%progbits
+#else
+#define GNUSTACK
+#endif
+
#define EPILOGUE \
- .size REALNAME, .-REALNAME
+ .size REALNAME, .-REALNAME; \
+ GNUSTACK
+
#endif
#endif
diff --git a/common_x86.h b/common_x86.h
index 5f56839f8..48517d900 100644
--- a/common_x86.h
+++ b/common_x86.h
@@ -301,7 +301,9 @@ REALNAME:
#define PROFCODE
#endif
-#define EPILOGUE .size REALNAME, .-REALNAME
+#define EPILOGUE \
+ .size REALNAME, .-REALNAME; \
+ .section .note.GNU-stack,"",%progbits
#endif
diff --git a/common_x86_64.h b/common_x86_64.h
index 8f9f73680..188903848 100644
--- a/common_x86_64.h
+++ b/common_x86_64.h
@@ -372,7 +372,10 @@ REALNAME:
#define PROFCODE
#endif
-#define EPILOGUE .size REALNAME, .-REALNAME
+#define EPILOGUE \
+ .size REALNAME, .-REALNAME; \
+ .section .note.GNU-stack,"",%progbits
+
#endif
diff --git a/cpuid.h b/cpuid.h
index 2cbbd4539..4311ce95e 100644
--- a/cpuid.h
+++ b/cpuid.h
@@ -105,8 +105,8 @@
#define CORE_NANO 19
#define CORE_SANDYBRIDGE 20
#define CORE_BOBCAT 21
-#define CORE_BULLDOZER 22
-#define CORE_PILEDRIVER 23
+#define CORE_BULLDOZER CORE_BARCELONA
+#define CORE_PILEDRIVER CORE_BARCELONA
#define CORE_HASWELL CORE_SANDYBRIDGE
#define HAVE_SSE (1 << 0)
@@ -198,8 +198,8 @@ typedef struct {
#define CPUTYPE_NANO 43
#define CPUTYPE_SANDYBRIDGE 44
#define CPUTYPE_BOBCAT 45
-#define CPUTYPE_BULLDOZER 46
-#define CPUTYPE_PILEDRIVER 47
+#define CPUTYPE_BULLDOZER CPUTYPE_BARCELONA
+#define CPUTYPE_PILEDRIVER CPUTYPE_BARCELONA
// this define is because BLAS doesn't have haswell specific optimizations yet
#define CPUTYPE_HASWELL CPUTYPE_SANDYBRIDGE
diff --git a/cpuid_x86.c b/cpuid_x86.c
index 98af9d041..9e850a2aa 100644
--- a/cpuid_x86.c
+++ b/cpuid_x86.c
@@ -229,8 +229,8 @@ int get_cputype(int gettype){
if ((ecx & (1 << 20)) != 0) feature |= HAVE_SSE4_2;
#ifndef NO_AVX
if (support_avx()) feature |= HAVE_AVX;
+ if ((ecx & (1 << 12)) != 0) feature |= HAVE_FMA3;
#endif
- if ((ecx & (1 << 20)) != 0) feature |= HAVE_FMA3;
if (have_excpuid() >= 0x01) {
cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
diff --git a/ctest/Makefile b/ctest/Makefile
index b1295640f..099116895 100644
--- a/ctest/Makefile
+++ b/ctest/Makefile
@@ -77,7 +77,7 @@ endif
clean ::
rm -f x*
-FLDFLAGS = $(FFLAGS:-fPIC=)
+FLDFLAGS = $(FFLAGS:-fPIC=) $(LDFLAGS)
CEXTRALIB =
# Single real
diff --git a/driver/others/dynamic.c b/driver/others/dynamic.c
index 197cc2b2d..bf60efb19 100644
--- a/driver/others/dynamic.c
+++ b/driver/others/dynamic.c
@@ -63,14 +63,16 @@ extern gotoblas_t gotoblas_BARCELONA;
extern gotoblas_t gotoblas_BOBCAT;
#ifndef NO_AVX
extern gotoblas_t gotoblas_SANDYBRIDGE;
-extern gotoblas_t gotoblas_BULLDOZER;
-extern gotoblas_t gotoblas_PILEDRIVER;
+//extern gotoblas_t gotoblas_BULLDOZER;
+//extern gotoblas_t gotoblas_PILEDRIVER;
#else
//Use NEHALEM kernels for sandy bridge
#define gotoblas_SANDYBRIDGE gotoblas_NEHALEM
+#endif
+
#define gotoblas_BULLDOZER gotoblas_BARCELONA
#define gotoblas_PILEDRIVER gotoblas_BARCELONA
-#endif
+
//Use sandy bridge kernels for haswell.
#define gotoblas_HASWELL gotoblas_SANDYBRIDGE
diff --git a/driver/others/init.c b/driver/others/init.c
index 4efc2816a..5da71cec1 100644
--- a/driver/others/init.c
+++ b/driver/others/init.c
@@ -83,6 +83,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <dirent.h>
#include <dlfcn.h>
#include <unistd.h>
+#include <string.h>
#define MAX_NODES 16
#define MAX_CPUS 256
@@ -315,7 +316,7 @@ static int numa_check(void) {
}
while ((dir = readdir(dp)) != NULL) {
- if (*(unsigned int *) dir -> d_name == 0x065646f6eU) {
+ if (strncmp(dir->d_name, "node", 4)==0) {
node = atoi(&dir -> d_name[4]);
diff --git a/exports/Makefile b/exports/Makefile
index 6502d5d01..64d7e181f 100644
--- a/exports/Makefile
+++ b/exports/Makefile
@@ -18,6 +18,10 @@ ifndef NO_LAPACKE
NO_LAPACKE = 0
endif
+ifndef NEED2UNDERSCORES
+NEED2UNDERSCORES=0
+endif
+
ifeq ($(OSNAME), WINNT)
ifeq ($(F_COMPILER), GFORTRAN)
EXTRALIB += -lgfortran
@@ -89,18 +93,18 @@ else
endif
libgoto2_shared.dll : ../$(LIBNAME) libgoto2_shared.def
- $(CC) $(CFLAGS) libgoto2_shared.def -shared -o $(@F) \
+ $(CC) $(CFLAGS) $(LDFLAGS) libgoto2_shared.def -shared -o $(@F) \
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive \
-Wl,--out-implib,libgoto2_shared.lib $(FEXTRALIB)
libopenblas.def : gensymbol
- perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F)
+ perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F)
libgoto2_shared.def : gensymbol
- perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F)
+ perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F)
libgoto_hpl.def : gensymbol
- perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F)
+ perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F)
$(LIBDYNNAME) : ../$(LIBNAME) osx.def
$(CC) $(CFLAGS) -all_load -headerpad_max_install_names -install_name $(CURDIR)/../$(LIBDYNNAME) -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB)
@@ -116,14 +120,14 @@ ifeq ($(OSNAME), Linux)
so : ../$(LIBSONAME)
../$(LIBSONAME) : ../$(LIBNAME) linux.def linktest.c
- $(CC) $(CFLAGS) -shared -o ../$(LIBSONAME) \
+ $(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive \
-Wl,--retain-symbols-file=linux.def -Wl,-soname,$(LIBPREFIX).so.$(MAJOR_VERSION) $(EXTRALIB)
ifneq ($(C_COMPILER), LSB)
- $(CC) $(CFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK.
+ $(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK.
else
#Use FC on LSB
- $(FC) $(FFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK.
+ $(FC) $(FFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK.
endif
rm -f linktest
@@ -135,10 +139,10 @@ ifeq ($(OSNAME), $(filter $(OSNAME),FreeBSD NetBSD))
so : ../$(LIBSONAME)
../$(LIBSONAME) : ../$(LIBNAME) linux.def linktest.c
- $(CC) $(CFLAGS) -shared -o ../$(LIBSONAME) \
+ $(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive \
-Wl,--retain-symbols-file=linux.def $(FEXTRALIB) $(EXTRALIB)
- $(CC) $(CFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK.
+ $(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK.
rm -f linktest
endif
@@ -148,15 +152,15 @@ ifeq ($(OSNAME), OSF1)
so : ../$(LIBSONAME)
../$(LIBSONAME) :
- $(CC) -shared -o ../$(LIBSONAME) ../$(LIBNAME)
+ $(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) ../$(LIBNAME)
endif
ifeq ($(OSNAME), SunOS)
so : ../$(LIBSONAME)
- $(CC) $(CFLAGS) -shared -o ../$(LIBSONAME) \
+ $(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive $(EXTRALIB)
- $(CC) $(CFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK.
+ $(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK.
rm -f linktest
endif
@@ -187,23 +191,23 @@ static : ../$(LIBNAME)
rm -f goto.$(SUFFIX)
linux.def : gensymbol ../Makefile.system ../getarch.c
- perl ./gensymbol linux $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F)
+ perl ./gensymbol linux $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F)
osx.def : gensymbol ../Makefile.system ../getarch.c
- perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F)
+ perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F)
aix.def : gensymbol ../Makefile.system ../getarch.c
- perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F)
+ perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F)
symbol.S : gensymbol
- perl ./gensymbol win2kasm noarch dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > symbol.S
+ perl ./gensymbol win2kasm noarch dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > symbol.S
test : linktest.c
- $(CC) $(CFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK.
+ $(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK.
rm -f linktest
linktest.c : gensymbol ../Makefile.system ../getarch.c
- perl ./gensymbol linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > linktest.c
+ perl ./gensymbol linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > linktest.c
clean ::
@rm -f *.def *.dylib __.SYMDEF*
diff --git a/exports/gensymbol b/exports/gensymbol
index 0a9729a6c..707641291 100644
--- a/exports/gensymbol
+++ b/exports/gensymbol
@@ -114,8 +114,8 @@
# ALLAUX -- Auxiliary routines called from all precisions
# already provided by @blasobjs: xerbla, lsame
- ilaenv, ieeeck, lsamen, xerbla_array, iparmq,
- ilaprec, ilatrans, ilauplo, iladiag, chla_transtype,
+ ilaenv, ieeeck, lsamen, iparmq,
+ ilaprec, ilatrans, ilauplo, iladiag,
ilaver, slamch, slamc3,
# SCLAUX -- Auxiliary routines called from both REAL and COMPLEX.
@@ -2672,12 +2672,25 @@
#LAPACKE_zlagsy_work,
);
+#These function may need 2 underscores.
+@lapack_embeded_underscore_objs=(xerbla_array, chla_transtype,);
+
if ($ARGV[5] == 1) {
#NO_LAPACK=1
@underscore_objs = (@blasobjs, @misc_underscore_objs);
} elsif (-d "../lapack-3.1.1" || -d "../lapack-3.4.0" || -d "../lapack-3.4.1" ||
-d "../lapack-3.4.2" || -d "../lapack-netlib") {
- @underscore_objs = (@blasobjs, @lapackobjs, @lapackobjs2, @misc_underscore_objs);
+
+ if ($ARGV[7] == 0){
+ # NEED2UNDERSCORES=0
+ # Don't need 2 underscores
+ @underscore_objs = (@blasobjs, @lapackobjs, @lapackobjs2, @misc_underscore_objs, @lapack_embeded_underscore_objs);
+ }else{
+ # Need 2 underscores
+ @underscore_objs = (@blasobjs, @lapackobjs, @lapackobjs2, @misc_underscore_objs);
+ @need_2underscore_objs = (@lapack_embeded_underscore_objs);
+ };
+
} else {
@underscore_objs = (@blasobjs, @lapackobjs, @misc_underscore_objs);
}
@@ -2729,6 +2742,10 @@ if ($ARGV[0] eq "linux"){
print $objs, $bu, "\n";
}
+ foreach $objs (@need_2underscore_objs) {
+ print $objs, $bu, $bu, "\n";
+ }
+
# if ($ARGV[4] == 0) {
foreach $objs (@no_underscore_objs) {
print $objs, "\n";
@@ -2750,6 +2767,10 @@ if ($ARGV[0] eq "osx"){
print "_", $objs, $bu, "\n";
}
+ foreach $objs (@need_2underscore_objs) {
+ print "_", $objs, $bu, $bu, "\n";
+ }
+
# if ($ARGV[4] == 0) {
foreach $objs (@no_underscore_objs) {
print "_", $objs, "\n";
@@ -2767,6 +2788,10 @@ if ($ARGV[0] eq "aix"){
print $objs, $bu, "\n";
}
+ foreach $objs (@need_2underscore_objs) {
+ print $objs, $bu, $bu, "\n";
+ }
+
# if ($ARGV[4] == 0) {
foreach $objs (@no_underscore_objs) {
print $objs, "\n";
@@ -2791,6 +2816,17 @@ if ($ARGV[0] eq "win2k"){
print "\t$uppercase=$objs", "_ \@", $count, "\n";
$count ++;
}
+
+ foreach $objs (@need_2underscore_objs) {
+ $uppercase = $objs;
+ $uppercase =~ tr/[a-z]/[A-Z]/;
+ print "\t$objs=$objs","__ \@", $count, "\n";
+ $count ++;
+ print "\t",$objs, "__=$objs","__ \@", $count, "\n";
+ $count ++;
+ print "\t$uppercase=$objs", "__ \@", $count, "\n";
+ $count ++;
+ }
#for misc_common_objs
foreach $objs (@misc_common_objs) {
@@ -2852,6 +2888,18 @@ if ($ARGV[0] eq "microsoft"){
print "\t$uppercase\_ = $objs","_\n";
$count ++;
}
+
+ foreach $objs (@need_2underscore_objs) {
+ $uppercase = $objs;
+ $uppercase =~ tr/[a-z]/[A-Z]/;
+ print "\t$objs=$objs","__ \@", $count, "\n";
+ $count ++;
+ print "\t",$objs, "__=$objs","__ \@", $count, "\n";
+ $count ++;
+ print "\t$uppercase=$objs", "__ \@", $count, "\n";
+ $count ++;
+ }
+
exit(0);
}
@@ -2868,6 +2916,16 @@ if ($ARGV[0] eq "win2kasm"){
print "_", $uppercase, "_:\n";
print "\tjmp\t_", $objs, "_\n";
}
+
+ foreach $objs (@need_2underscore_objs) {
+ $uppercase = $objs;
+ $uppercase =~ tr/[a-z]/[A-Z]/;
+ print "\t.align 16\n";
+ print "\t.globl _", $uppercase, "__\n";
+ print "_", $uppercase, "__:\n";
+ print "\tjmp\t_", $objs, "__\n";
+ }
+
exit(0);
}
@@ -2880,6 +2938,11 @@ if ($ARGV[0] eq "linktest"){
foreach $objs (@underscore_objs) {
print $objs, $bu, "();\n" if $objs ne "xerbla";
}
+
+ foreach $objs (@need_2underscore_objs) {
+ print $objs, $bu, $bu, "();\n";
+ }
+
# if ($ARGV[4] == 0) {
foreach $objs (@no_underscore_objs) {
print $objs, "();\n";
diff --git a/f_check b/f_check
index d7c0b2328..86f1fa689 100644
--- a/f_check
+++ b/f_check
@@ -114,6 +114,12 @@ if ($compiler eq "") {
$vendor = IBM;
$openmp = "-openmp";
}
+
+ # for embeded underscore name, e.g. zho_ge, it may append 2 underscores.
+ $data = `$compiler -O2 -S ftest3.f > /dev/null 2>&1 && cat ftest3.s && rm -f ftest3.s`;
+ if ($data =~ /zho_ge__/) {
+ $need2bu = 1;
+ }
}
if ($vendor eq "") {
@@ -245,6 +251,8 @@ if ($link ne "") {
$link =~ s/\-rpath\s+/\-rpath\@/g;
+ $link =~ s/\-rpath-link\s+/\-rpath-link\@/g;
+
@flags = split(/[\s\,\n]/, $link);
# remove leading and trailing quotes from each flag.
@flags = map {s/^['"]|['"]$//g; $_} @flags;
@@ -265,7 +273,15 @@ if ($link ne "") {
$linker_L .= "-Wl,". $flags . " ";
}
- if ($flags =~ /^\-rpath/) {
+ if ($flags =~ /^\-rpath\@/) {
+ $flags =~ s/\@/\,/g;
+ if ($vendor eq "PGI") {
+ $flags =~ s/lib$/libso/;
+ }
+ $linker_L .= "-Wl,". $flags . " " ;
+ }
+
+ if ($flags =~ /^\-rpath-link\@/) {
$flags =~ s/\@/\,/g;
if ($vendor eq "PGI") {
$flags =~ s/lib$/libso/;
@@ -309,6 +325,9 @@ print MAKEFILE "NOFORTRAN=1\n" if $nofortran == 1;
print CONFFILE "#define BUNDERSCORE\t$bu\n" if $bu ne "";
print CONFFILE "#define NEEDBUNDERSCORE\t1\n" if $bu ne "";
+print CONFFILE "#define NEED2UNDERSCORES\t1\n" if $need2bu ne "";
+
+print MAKEFILE "NEED2UNDERSCORES=1\n" if $need2bu ne "";
if (($linker_l ne "") || ($linker_a ne "")) {
print MAKEFILE "FEXTRALIB=$linker_L $linker_l $linker_a\n";
diff --git a/ftest3.f b/ftest3.f
new file mode 100644
index 000000000..8f2cd332f
--- /dev/null
+++ b/ftest3.f
@@ -0,0 +1,6 @@
+ double complex function zho_ge()
+
+ zho_ge = (0.0d0,0.0d0)
+
+ return
+ end
diff --git a/getarch.c b/getarch.c
index 3ffda6244..ec9bb75a8 100644
--- a/getarch.c
+++ b/getarch.c
@@ -354,7 +354,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define CORENAME "OPTERON"
#endif
-#if defined(FORCE_BARCELONA) || defined(FORCE_SHANGHAI) || defined(FORCE_ISTANBUL)
+#if defined(FORCE_BARCELONA) || defined(FORCE_SHANGHAI) || defined(FORCE_ISTANBUL) || defined (FORCE_PILEDRIVER) || defined (FORCE_BULLDOZER)
#define FORCE
#define FORCE_INTEL
#define ARCHITECTURE "X86"
@@ -384,7 +384,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define CORENAME "BOBCAT"
#endif
-#if defined (FORCE_BULLDOZER)
+#if 0
#define FORCE
#define FORCE_INTEL
#define ARCHITECTURE "X86"
@@ -400,7 +400,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define CORENAME "BULLDOZER"
#endif
-#if defined (FORCE_PILEDRIVER)
+#if 0
#define FORCE
#define FORCE_INTEL
#define ARCHITECTURE "X86"
diff --git a/getarch_2nd.c b/getarch_2nd.c
index fc800cfac..0b140bba4 100644
--- a/getarch_2nd.c
+++ b/getarch_2nd.c
@@ -8,7 +8,7 @@
int main(int argc, char **argv) {
- if ( (argc <= 1) || (argc >= 2) && (*argv[1] == '0')) {
+ if ( (argc <= 1) || ((argc >= 2) && (*argv[1] == '0'))) {
printf("SGEMM_UNROLL_M=%d\n", SGEMM_DEFAULT_UNROLL_M);
printf("SGEMM_UNROLL_N=%d\n", SGEMM_DEFAULT_UNROLL_N);
printf("DGEMM_UNROLL_M=%d\n", DGEMM_DEFAULT_UNROLL_M);
diff --git a/interface/trtri.c b/interface/trtri.c
index 0564bc183..007dbd7fa 100644
--- a/interface/trtri.c
+++ b/interface/trtri.c
@@ -60,7 +60,7 @@ static blasint (*trtri_parallel[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *
};
#endif
-extern void dtrtri_lapack_(char *UPLO, char *DIAG, int *N, double *a, int *ldA, int *Info);
+extern void BLASFUNC(dtrtrilapack)(char *UPLO, char *DIAG, int *N, double *a, int *ldA, int *Info);
int NAME(char *UPLO, char *DIAG, blasint *N, FLOAT *a, blasint *ldA, blasint *Info){
@@ -137,7 +137,10 @@ int NAME(char *UPLO, char *DIAG, blasint *N, FLOAT *a, blasint *ldA, blasint *In
// double trtri_U single thread error
// call dtrtri from lapack for a walk around.
if(uplo==0){
- dtrtri_lapack_(UPLO, DIAG, N, a, ldA, Info);
+ BLASFUNC(dtrtrilapack)(UPLO, DIAG, N, a, ldA, Info);
+#ifndef PPC440
+ blas_memory_free(buffer);
+#endif
return 0;
}
#endif
diff --git a/lapack/trtri/dtrtri_lapack.f b/lapack/trtri/dtrtri_lapack.f
index 31a880f76..8e9a08170 100644
--- a/lapack/trtri/dtrtri_lapack.f
+++ b/lapack/trtri/dtrtri_lapack.f
@@ -107,7 +107,7 @@
*> \ingroup doubleOTHERcomputational
*
* =====================================================================
- SUBROUTINE DTRTRI_LAPACK( UPLO, DIAG, N, A, LDA, INFO )
+ SUBROUTINE DTRTRILAPACK( UPLO, DIAG, N, A, LDA, INFO )
*
* -- LAPACK computational routine (version 3.4.0) --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
diff --git a/test/Makefile b/test/Makefile
index 2df499b11..0bc06e85f 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -88,7 +88,7 @@ else
endif
endif
-FLDFLAGS = $(FFLAGS:-fPIC=)
+FLDFLAGS = $(FFLAGS:-fPIC=) $(LDFLAGS)
CEXTRALIB =