diff options
author | Zhang Xianyi <traits.zhang@gmail.com> | 2013-08-01 23:53:12 +0800 |
---|---|---|
committer | Zhang Xianyi <traits.zhang@gmail.com> | 2013-08-01 23:53:12 +0800 |
commit | 835293cc1afde82544b2d24780213aabe82ad788 (patch) | |
tree | 5b1c3b007251840dffe55fee3137ea395c7dbf00 | |
parent | f26b7a08aa6e3fa2a060fc902a334141e22f02d4 (diff) | |
parent | b736aa811089fddf586188447f4e25c77df3b103 (diff) | |
download | openblas-0.2.8.tar.gz openblas-0.2.8.tar.bz2 openblas-0.2.8.zip |
Merge branch 'hotfix-v0.2.8'v0.2.8
-rw-r--r-- | CONTRIBUTORS.md | 4 | ||||
-rw-r--r-- | Changelog.txt | 12 | ||||
-rw-r--r-- | Makefile.generic | 5 | ||||
-rw-r--r-- | Makefile.power | 11 | ||||
-rw-r--r-- | Makefile.rule | 2 | ||||
-rw-r--r-- | Makefile.sparc | 1 | ||||
-rw-r--r-- | Makefile.system | 7 | ||||
-rw-r--r-- | Makefile.x86 | 3 | ||||
-rw-r--r-- | Makefile.x86_64 | 13 | ||||
-rw-r--r-- | common_alpha.h | 10 | ||||
-rw-r--r-- | common_ia64.h | 9 | ||||
-rw-r--r-- | common_mips64.h | 9 | ||||
-rw-r--r-- | common_sparc.h | 11 | ||||
-rw-r--r-- | common_x86.h | 4 | ||||
-rw-r--r-- | common_x86_64.h | 5 | ||||
-rw-r--r-- | cpuid.h | 8 | ||||
-rw-r--r-- | cpuid_x86.c | 2 | ||||
-rw-r--r-- | ctest/Makefile | 2 | ||||
-rw-r--r-- | driver/others/dynamic.c | 8 | ||||
-rw-r--r-- | driver/others/init.c | 3 | ||||
-rw-r--r-- | exports/Makefile | 40 | ||||
-rw-r--r-- | exports/gensymbol | 69 | ||||
-rw-r--r-- | f_check | 21 | ||||
-rw-r--r-- | ftest3.f | 6 | ||||
-rw-r--r-- | getarch.c | 6 | ||||
-rw-r--r-- | getarch_2nd.c | 2 | ||||
-rw-r--r-- | interface/trtri.c | 7 | ||||
-rw-r--r-- | lapack/trtri/dtrtri_lapack.f | 2 | ||||
-rw-r--r-- | test/Makefile | 2 |
29 files changed, 202 insertions, 82 deletions
diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 4a13bcc2c..59df77609 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -79,5 +79,9 @@ In chronological order: * [2013-07-11] create openblas_get_parallel to retrieve information which parallelization model is used by OpenBLAS. +* Sébastien Fabbro <bicatali@gentoo.org> + * [2013-07-24] Modify makefile to respect user's LDFLAGS + * [2013-07-24] Add stack markings for GNU as arch-independent for assembler files + * [Your name or handle] <[email or website]> * [Date] [Brief summary of your changes] diff --git a/Changelog.txt b/Changelog.txt index 574f46226..dd186b683 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -1,5 +1,17 @@ OpenBLAS ChangeLog ==================================================================== +Version 0.2.8 +01-Aug-2013 +common: + * Support Open64 5.0. (#266) + * Add executable stack markings. (#262, Thank Sébastien Fabbro) + * Respect user's LDFLAGS (Thank Sébastien Fabbro) + +x86/x86-64: + * Rollback bulldozer and piledriver kernels to barcelona kernels (#263) + We will fix the compuational error bug in bulldozer and piledriver kernels. + +==================================================================== Version 0.2.7 20-Jul-2013 common: diff --git a/Makefile.generic b/Makefile.generic index 770aaf850..a5e50b1f0 100644 --- a/Makefile.generic +++ b/Makefile.generic @@ -1,6 +1 @@ COPT = -Wall -O2 # -DGEMMTEST -ifdef BINARY64 -else -# LDFLAGS = -m elf32ppc -LDFLAGS = -m elf_i386 -endif diff --git a/Makefile.power b/Makefile.power index 35eb2cb7b..c6d6aeb50 100644 --- a/Makefile.power +++ b/Makefile.power @@ -17,13 +17,7 @@ endif endif ifdef BINARY64 -ifeq ($(OSNAME), Linux) -LDFLAGS = -m elf64ppc -endif -ifeq ($(OSNAME), Darwin) -LDFLAGS = -arch ppc64 -endif ifeq ($(OSNAME), AIX) CCOMMON_OPT += -mpowerpc64 -maix64 @@ -34,17 +28,12 @@ ifeq ($(COMPILER_F77), xlf) FCOMMON_OPT += -q64 endif ARFLAGS = -X 64 -LDFLAGS = -b64 ASFLAGS = -a64 endif else -ifeq ($(OSNAME), Linux) -LDFLAGS = -m elf32ppc -endif ifeq ($(OSNAME), AIX) CCOMMON_OPT += -Wa,-a32 ARFLAGS = -X 32 -LDFLAGS = -b32 ASFLAGS = -a32 endif endif diff --git a/Makefile.rule b/Makefile.rule index fb377c377..a92eb500a 100644 --- a/Makefile.rule +++ b/Makefile.rule @@ -3,7 +3,7 @@ # # This library's version -VERSION = 0.2.7 +VERSION = 0.2.8 # If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a # and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library diff --git a/Makefile.sparc b/Makefile.sparc index c2b878e73..c58c77e1a 100644 --- a/Makefile.sparc +++ b/Makefile.sparc @@ -10,7 +10,6 @@ endif ifeq ($(COMPILER_F77), f90) FCOMMON_OPT += -xarch=v9 endif -LDFLAGS = -64 else CCOMMON_OPT += -mcpu=v9 diff --git a/Makefile.system b/Makefile.system index b1f9ba514..727b08960 100644 --- a/Makefile.system +++ b/Makefile.system @@ -324,14 +324,16 @@ ifeq ($(ARCH), x86) DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \ CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO ifneq ($(NO_AVX), 1) -DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER +DYNAMIC_CORE += SANDYBRIDGE +#BULLDOZER PILEDRIVER endif endif ifeq ($(ARCH), x86_64) DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO ifneq ($(NO_AVX), 1) -DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER +DYNAMIC_CORE += SANDYBRIDGE +#BULLDOZER PILEDRIVER endif endif @@ -895,6 +897,7 @@ export CC export FC export BU export FU +export NEED2UNDERSCORES export USE_THREAD export NUM_THREADS export NUM_CORES diff --git a/Makefile.x86 b/Makefile.x86 index 94ca7c4a7..cd7cc9f90 100644 --- a/Makefile.x86 +++ b/Makefile.x86 @@ -1,8 +1,5 @@ # COMPILER_PREFIX = mingw32- -ifeq ($(OSNAME), Linux) -LDFLAGS = -melf_i386 -endif ifeq ($(OSNAME), Interix) ARFLAGS = -m x86 diff --git a/Makefile.x86_64 b/Makefile.x86_64 index b939e5459..c8d4b237b 100644 --- a/Makefile.x86_64 +++ b/Makefile.x86_64 @@ -2,25 +2,12 @@ ifeq ($(OSNAME), SunOS) ifdef BINARY64 -LDFLAGS = -64 ifeq ($(F_COMPILER), SUN) FCOMMON_OPT += -m64 endif endif endif -ifeq ($(OSNAME), FreeBSD) -LDFLAGS = -m elf_x86_64_fbsd -endif - -ifeq ($(OSNAME), Linux) -LDFLAGS = -m elf_x86_64 -endif - -ifeq ($(OSNAME), Darwin) -LDFLAGS = -endif - ifeq ($(OSNAME), Interix) ARFLAGS = -m x64 endif diff --git a/common_alpha.h b/common_alpha.h index cf794739c..3b46c74ce 100644 --- a/common_alpha.h +++ b/common_alpha.h @@ -150,9 +150,17 @@ REALNAME: #define PROFCODE .prologue 0 #endif +#if defined(__linux__) && defined(__ELF__) +#define GNUSTACK .section .note.GNU-stack,"",%progbits +#else +#define GNUSTACK +#endif + #define EPILOGUE \ .end REALNAME; \ - .ident VERSION + .ident VERSION; \ + GNUSTACK + #endif #ifdef DOUBLE diff --git a/common_ia64.h b/common_ia64.h index 81939cc1b..3530e0b1d 100644 --- a/common_ia64.h +++ b/common_ia64.h @@ -379,8 +379,15 @@ REALNAME: #define PROFCODE #endif +#if defined(__linux__) && defined(__ELF__) +#define GNUSTACK .section .note.GNU-stack,"",%progbits +#else +#define GNUSTACK +#endif + #define EPILOGUE \ - .endp REALNAME + .endp REALNAME ; \ + GNUSTACK #define START_ADDRESS 0x20000fc800000000UL diff --git a/common_mips64.h b/common_mips64.h index 1bee69407..c08fb2c75 100644 --- a/common_mips64.h +++ b/common_mips64.h @@ -235,10 +235,17 @@ REALNAME: ;\ .set noreorder ;\ .set nomacro +#if defined(__linux__) && defined(__ELF__) +#define GNUSTACK .section .note.GNU-stack,"",%progbits +#else +#define GNUSTACK +#endif + #define EPILOGUE \ .set macro ;\ .set reorder ;\ - .end REALNAME + .end REALNAME ;\ + GNUSTACK #define PROFCODE #endif diff --git a/common_sparc.h b/common_sparc.h index 35d8bdb5f..cfd27f768 100644 --- a/common_sparc.h +++ b/common_sparc.h @@ -199,8 +199,17 @@ static __inline int blas_quickdivide(blasint x, blasint y){ .type REALNAME, #function; \ .proc 07; \ REALNAME:; + +#if defined(__linux__) && defined(__ELF__) +#define GNUSTACK .section .note.GNU-stack,"",%progbits +#else +#define GNUSTACK +#endif + #define EPILOGUE \ - .size REALNAME, .-REALNAME + .size REALNAME, .-REALNAME; \ + GNUSTACK + #endif #endif diff --git a/common_x86.h b/common_x86.h index 5f56839f8..48517d900 100644 --- a/common_x86.h +++ b/common_x86.h @@ -301,7 +301,9 @@ REALNAME: #define PROFCODE #endif -#define EPILOGUE .size REALNAME, .-REALNAME +#define EPILOGUE \ + .size REALNAME, .-REALNAME; \ + .section .note.GNU-stack,"",%progbits #endif diff --git a/common_x86_64.h b/common_x86_64.h index 8f9f73680..188903848 100644 --- a/common_x86_64.h +++ b/common_x86_64.h @@ -372,7 +372,10 @@ REALNAME: #define PROFCODE #endif -#define EPILOGUE .size REALNAME, .-REALNAME +#define EPILOGUE \ + .size REALNAME, .-REALNAME; \ + .section .note.GNU-stack,"",%progbits + #endif @@ -105,8 +105,8 @@ #define CORE_NANO 19 #define CORE_SANDYBRIDGE 20 #define CORE_BOBCAT 21 -#define CORE_BULLDOZER 22 -#define CORE_PILEDRIVER 23 +#define CORE_BULLDOZER CORE_BARCELONA +#define CORE_PILEDRIVER CORE_BARCELONA #define CORE_HASWELL CORE_SANDYBRIDGE #define HAVE_SSE (1 << 0) @@ -198,8 +198,8 @@ typedef struct { #define CPUTYPE_NANO 43 #define CPUTYPE_SANDYBRIDGE 44 #define CPUTYPE_BOBCAT 45 -#define CPUTYPE_BULLDOZER 46 -#define CPUTYPE_PILEDRIVER 47 +#define CPUTYPE_BULLDOZER CPUTYPE_BARCELONA +#define CPUTYPE_PILEDRIVER CPUTYPE_BARCELONA // this define is because BLAS doesn't have haswell specific optimizations yet #define CPUTYPE_HASWELL CPUTYPE_SANDYBRIDGE diff --git a/cpuid_x86.c b/cpuid_x86.c index 98af9d041..9e850a2aa 100644 --- a/cpuid_x86.c +++ b/cpuid_x86.c @@ -229,8 +229,8 @@ int get_cputype(int gettype){ if ((ecx & (1 << 20)) != 0) feature |= HAVE_SSE4_2; #ifndef NO_AVX if (support_avx()) feature |= HAVE_AVX; + if ((ecx & (1 << 12)) != 0) feature |= HAVE_FMA3; #endif - if ((ecx & (1 << 20)) != 0) feature |= HAVE_FMA3; if (have_excpuid() >= 0x01) { cpuid(0x80000001, &eax, &ebx, &ecx, &edx); diff --git a/ctest/Makefile b/ctest/Makefile index b1295640f..099116895 100644 --- a/ctest/Makefile +++ b/ctest/Makefile @@ -77,7 +77,7 @@ endif clean :: rm -f x* -FLDFLAGS = $(FFLAGS:-fPIC=) +FLDFLAGS = $(FFLAGS:-fPIC=) $(LDFLAGS) CEXTRALIB = # Single real diff --git a/driver/others/dynamic.c b/driver/others/dynamic.c index 197cc2b2d..bf60efb19 100644 --- a/driver/others/dynamic.c +++ b/driver/others/dynamic.c @@ -63,14 +63,16 @@ extern gotoblas_t gotoblas_BARCELONA; extern gotoblas_t gotoblas_BOBCAT; #ifndef NO_AVX extern gotoblas_t gotoblas_SANDYBRIDGE; -extern gotoblas_t gotoblas_BULLDOZER; -extern gotoblas_t gotoblas_PILEDRIVER; +//extern gotoblas_t gotoblas_BULLDOZER; +//extern gotoblas_t gotoblas_PILEDRIVER; #else //Use NEHALEM kernels for sandy bridge #define gotoblas_SANDYBRIDGE gotoblas_NEHALEM +#endif + #define gotoblas_BULLDOZER gotoblas_BARCELONA #define gotoblas_PILEDRIVER gotoblas_BARCELONA -#endif + //Use sandy bridge kernels for haswell. #define gotoblas_HASWELL gotoblas_SANDYBRIDGE diff --git a/driver/others/init.c b/driver/others/init.c index 4efc2816a..5da71cec1 100644 --- a/driver/others/init.c +++ b/driver/others/init.c @@ -83,6 +83,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include <dirent.h> #include <dlfcn.h> #include <unistd.h> +#include <string.h> #define MAX_NODES 16 #define MAX_CPUS 256 @@ -315,7 +316,7 @@ static int numa_check(void) { } while ((dir = readdir(dp)) != NULL) { - if (*(unsigned int *) dir -> d_name == 0x065646f6eU) { + if (strncmp(dir->d_name, "node", 4)==0) { node = atoi(&dir -> d_name[4]); diff --git a/exports/Makefile b/exports/Makefile index 6502d5d01..64d7e181f 100644 --- a/exports/Makefile +++ b/exports/Makefile @@ -18,6 +18,10 @@ ifndef NO_LAPACKE NO_LAPACKE = 0 endif +ifndef NEED2UNDERSCORES +NEED2UNDERSCORES=0 +endif + ifeq ($(OSNAME), WINNT) ifeq ($(F_COMPILER), GFORTRAN) EXTRALIB += -lgfortran @@ -89,18 +93,18 @@ else endif libgoto2_shared.dll : ../$(LIBNAME) libgoto2_shared.def - $(CC) $(CFLAGS) libgoto2_shared.def -shared -o $(@F) \ + $(CC) $(CFLAGS) $(LDFLAGS) libgoto2_shared.def -shared -o $(@F) \ -Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive \ -Wl,--out-implib,libgoto2_shared.lib $(FEXTRALIB) libopenblas.def : gensymbol - perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F) + perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F) libgoto2_shared.def : gensymbol - perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F) + perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F) libgoto_hpl.def : gensymbol - perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F) + perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F) $(LIBDYNNAME) : ../$(LIBNAME) osx.def $(CC) $(CFLAGS) -all_load -headerpad_max_install_names -install_name $(CURDIR)/../$(LIBDYNNAME) -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB) @@ -116,14 +120,14 @@ ifeq ($(OSNAME), Linux) so : ../$(LIBSONAME) ../$(LIBSONAME) : ../$(LIBNAME) linux.def linktest.c - $(CC) $(CFLAGS) -shared -o ../$(LIBSONAME) \ + $(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \ -Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive \ -Wl,--retain-symbols-file=linux.def -Wl,-soname,$(LIBPREFIX).so.$(MAJOR_VERSION) $(EXTRALIB) ifneq ($(C_COMPILER), LSB) - $(CC) $(CFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK. + $(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK. else #Use FC on LSB - $(FC) $(FFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK. + $(FC) $(FFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK. endif rm -f linktest @@ -135,10 +139,10 @@ ifeq ($(OSNAME), $(filter $(OSNAME),FreeBSD NetBSD)) so : ../$(LIBSONAME) ../$(LIBSONAME) : ../$(LIBNAME) linux.def linktest.c - $(CC) $(CFLAGS) -shared -o ../$(LIBSONAME) \ + $(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \ -Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive \ -Wl,--retain-symbols-file=linux.def $(FEXTRALIB) $(EXTRALIB) - $(CC) $(CFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK. + $(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK. rm -f linktest endif @@ -148,15 +152,15 @@ ifeq ($(OSNAME), OSF1) so : ../$(LIBSONAME) ../$(LIBSONAME) : - $(CC) -shared -o ../$(LIBSONAME) ../$(LIBNAME) + $(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) ../$(LIBNAME) endif ifeq ($(OSNAME), SunOS) so : ../$(LIBSONAME) - $(CC) $(CFLAGS) -shared -o ../$(LIBSONAME) \ + $(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \ -Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive $(EXTRALIB) - $(CC) $(CFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK. + $(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK. rm -f linktest endif @@ -187,23 +191,23 @@ static : ../$(LIBNAME) rm -f goto.$(SUFFIX) linux.def : gensymbol ../Makefile.system ../getarch.c - perl ./gensymbol linux $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F) + perl ./gensymbol linux $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F) osx.def : gensymbol ../Makefile.system ../getarch.c - perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F) + perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F) aix.def : gensymbol ../Makefile.system ../getarch.c - perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F) + perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F) symbol.S : gensymbol - perl ./gensymbol win2kasm noarch dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > symbol.S + perl ./gensymbol win2kasm noarch dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > symbol.S test : linktest.c - $(CC) $(CFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK. + $(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK. rm -f linktest linktest.c : gensymbol ../Makefile.system ../getarch.c - perl ./gensymbol linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > linktest.c + perl ./gensymbol linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > linktest.c clean :: @rm -f *.def *.dylib __.SYMDEF* diff --git a/exports/gensymbol b/exports/gensymbol index 0a9729a6c..707641291 100644 --- a/exports/gensymbol +++ b/exports/gensymbol @@ -114,8 +114,8 @@ # ALLAUX -- Auxiliary routines called from all precisions # already provided by @blasobjs: xerbla, lsame - ilaenv, ieeeck, lsamen, xerbla_array, iparmq, - ilaprec, ilatrans, ilauplo, iladiag, chla_transtype, + ilaenv, ieeeck, lsamen, iparmq, + ilaprec, ilatrans, ilauplo, iladiag, ilaver, slamch, slamc3, # SCLAUX -- Auxiliary routines called from both REAL and COMPLEX. @@ -2672,12 +2672,25 @@ #LAPACKE_zlagsy_work, ); +#These function may need 2 underscores. +@lapack_embeded_underscore_objs=(xerbla_array, chla_transtype,); + if ($ARGV[5] == 1) { #NO_LAPACK=1 @underscore_objs = (@blasobjs, @misc_underscore_objs); } elsif (-d "../lapack-3.1.1" || -d "../lapack-3.4.0" || -d "../lapack-3.4.1" || -d "../lapack-3.4.2" || -d "../lapack-netlib") { - @underscore_objs = (@blasobjs, @lapackobjs, @lapackobjs2, @misc_underscore_objs); + + if ($ARGV[7] == 0){ + # NEED2UNDERSCORES=0 + # Don't need 2 underscores + @underscore_objs = (@blasobjs, @lapackobjs, @lapackobjs2, @misc_underscore_objs, @lapack_embeded_underscore_objs); + }else{ + # Need 2 underscores + @underscore_objs = (@blasobjs, @lapackobjs, @lapackobjs2, @misc_underscore_objs); + @need_2underscore_objs = (@lapack_embeded_underscore_objs); + }; + } else { @underscore_objs = (@blasobjs, @lapackobjs, @misc_underscore_objs); } @@ -2729,6 +2742,10 @@ if ($ARGV[0] eq "linux"){ print $objs, $bu, "\n"; } + foreach $objs (@need_2underscore_objs) { + print $objs, $bu, $bu, "\n"; + } + # if ($ARGV[4] == 0) { foreach $objs (@no_underscore_objs) { print $objs, "\n"; @@ -2750,6 +2767,10 @@ if ($ARGV[0] eq "osx"){ print "_", $objs, $bu, "\n"; } + foreach $objs (@need_2underscore_objs) { + print "_", $objs, $bu, $bu, "\n"; + } + # if ($ARGV[4] == 0) { foreach $objs (@no_underscore_objs) { print "_", $objs, "\n"; @@ -2767,6 +2788,10 @@ if ($ARGV[0] eq "aix"){ print $objs, $bu, "\n"; } + foreach $objs (@need_2underscore_objs) { + print $objs, $bu, $bu, "\n"; + } + # if ($ARGV[4] == 0) { foreach $objs (@no_underscore_objs) { print $objs, "\n"; @@ -2791,6 +2816,17 @@ if ($ARGV[0] eq "win2k"){ print "\t$uppercase=$objs", "_ \@", $count, "\n"; $count ++; } + + foreach $objs (@need_2underscore_objs) { + $uppercase = $objs; + $uppercase =~ tr/[a-z]/[A-Z]/; + print "\t$objs=$objs","__ \@", $count, "\n"; + $count ++; + print "\t",$objs, "__=$objs","__ \@", $count, "\n"; + $count ++; + print "\t$uppercase=$objs", "__ \@", $count, "\n"; + $count ++; + } #for misc_common_objs foreach $objs (@misc_common_objs) { @@ -2852,6 +2888,18 @@ if ($ARGV[0] eq "microsoft"){ print "\t$uppercase\_ = $objs","_\n"; $count ++; } + + foreach $objs (@need_2underscore_objs) { + $uppercase = $objs; + $uppercase =~ tr/[a-z]/[A-Z]/; + print "\t$objs=$objs","__ \@", $count, "\n"; + $count ++; + print "\t",$objs, "__=$objs","__ \@", $count, "\n"; + $count ++; + print "\t$uppercase=$objs", "__ \@", $count, "\n"; + $count ++; + } + exit(0); } @@ -2868,6 +2916,16 @@ if ($ARGV[0] eq "win2kasm"){ print "_", $uppercase, "_:\n"; print "\tjmp\t_", $objs, "_\n"; } + + foreach $objs (@need_2underscore_objs) { + $uppercase = $objs; + $uppercase =~ tr/[a-z]/[A-Z]/; + print "\t.align 16\n"; + print "\t.globl _", $uppercase, "__\n"; + print "_", $uppercase, "__:\n"; + print "\tjmp\t_", $objs, "__\n"; + } + exit(0); } @@ -2880,6 +2938,11 @@ if ($ARGV[0] eq "linktest"){ foreach $objs (@underscore_objs) { print $objs, $bu, "();\n" if $objs ne "xerbla"; } + + foreach $objs (@need_2underscore_objs) { + print $objs, $bu, $bu, "();\n"; + } + # if ($ARGV[4] == 0) { foreach $objs (@no_underscore_objs) { print $objs, "();\n"; @@ -114,6 +114,12 @@ if ($compiler eq "") { $vendor = IBM; $openmp = "-openmp"; } + + # for embeded underscore name, e.g. zho_ge, it may append 2 underscores. + $data = `$compiler -O2 -S ftest3.f > /dev/null 2>&1 && cat ftest3.s && rm -f ftest3.s`; + if ($data =~ /zho_ge__/) { + $need2bu = 1; + } } if ($vendor eq "") { @@ -245,6 +251,8 @@ if ($link ne "") { $link =~ s/\-rpath\s+/\-rpath\@/g; + $link =~ s/\-rpath-link\s+/\-rpath-link\@/g; + @flags = split(/[\s\,\n]/, $link); # remove leading and trailing quotes from each flag. @flags = map {s/^['"]|['"]$//g; $_} @flags; @@ -265,7 +273,15 @@ if ($link ne "") { $linker_L .= "-Wl,". $flags . " "; } - if ($flags =~ /^\-rpath/) { + if ($flags =~ /^\-rpath\@/) { + $flags =~ s/\@/\,/g; + if ($vendor eq "PGI") { + $flags =~ s/lib$/libso/; + } + $linker_L .= "-Wl,". $flags . " " ; + } + + if ($flags =~ /^\-rpath-link\@/) { $flags =~ s/\@/\,/g; if ($vendor eq "PGI") { $flags =~ s/lib$/libso/; @@ -309,6 +325,9 @@ print MAKEFILE "NOFORTRAN=1\n" if $nofortran == 1; print CONFFILE "#define BUNDERSCORE\t$bu\n" if $bu ne ""; print CONFFILE "#define NEEDBUNDERSCORE\t1\n" if $bu ne ""; +print CONFFILE "#define NEED2UNDERSCORES\t1\n" if $need2bu ne ""; + +print MAKEFILE "NEED2UNDERSCORES=1\n" if $need2bu ne ""; if (($linker_l ne "") || ($linker_a ne "")) { print MAKEFILE "FEXTRALIB=$linker_L $linker_l $linker_a\n"; diff --git a/ftest3.f b/ftest3.f new file mode 100644 index 000000000..8f2cd332f --- /dev/null +++ b/ftest3.f @@ -0,0 +1,6 @@ + double complex function zho_ge() + + zho_ge = (0.0d0,0.0d0) + + return + end @@ -354,7 +354,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define CORENAME "OPTERON" #endif -#if defined(FORCE_BARCELONA) || defined(FORCE_SHANGHAI) || defined(FORCE_ISTANBUL) +#if defined(FORCE_BARCELONA) || defined(FORCE_SHANGHAI) || defined(FORCE_ISTANBUL) || defined (FORCE_PILEDRIVER) || defined (FORCE_BULLDOZER) #define FORCE #define FORCE_INTEL #define ARCHITECTURE "X86" @@ -384,7 +384,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define CORENAME "BOBCAT" #endif -#if defined (FORCE_BULLDOZER) +#if 0 #define FORCE #define FORCE_INTEL #define ARCHITECTURE "X86" @@ -400,7 +400,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define CORENAME "BULLDOZER" #endif -#if defined (FORCE_PILEDRIVER) +#if 0 #define FORCE #define FORCE_INTEL #define ARCHITECTURE "X86" diff --git a/getarch_2nd.c b/getarch_2nd.c index fc800cfac..0b140bba4 100644 --- a/getarch_2nd.c +++ b/getarch_2nd.c @@ -8,7 +8,7 @@ int main(int argc, char **argv) { - if ( (argc <= 1) || (argc >= 2) && (*argv[1] == '0')) { + if ( (argc <= 1) || ((argc >= 2) && (*argv[1] == '0'))) { printf("SGEMM_UNROLL_M=%d\n", SGEMM_DEFAULT_UNROLL_M); printf("SGEMM_UNROLL_N=%d\n", SGEMM_DEFAULT_UNROLL_N); printf("DGEMM_UNROLL_M=%d\n", DGEMM_DEFAULT_UNROLL_M); diff --git a/interface/trtri.c b/interface/trtri.c index 0564bc183..007dbd7fa 100644 --- a/interface/trtri.c +++ b/interface/trtri.c @@ -60,7 +60,7 @@ static blasint (*trtri_parallel[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT * }; #endif -extern void dtrtri_lapack_(char *UPLO, char *DIAG, int *N, double *a, int *ldA, int *Info); +extern void BLASFUNC(dtrtrilapack)(char *UPLO, char *DIAG, int *N, double *a, int *ldA, int *Info); int NAME(char *UPLO, char *DIAG, blasint *N, FLOAT *a, blasint *ldA, blasint *Info){ @@ -137,7 +137,10 @@ int NAME(char *UPLO, char *DIAG, blasint *N, FLOAT *a, blasint *ldA, blasint *In // double trtri_U single thread error // call dtrtri from lapack for a walk around. if(uplo==0){ - dtrtri_lapack_(UPLO, DIAG, N, a, ldA, Info); + BLASFUNC(dtrtrilapack)(UPLO, DIAG, N, a, ldA, Info); +#ifndef PPC440 + blas_memory_free(buffer); +#endif return 0; } #endif diff --git a/lapack/trtri/dtrtri_lapack.f b/lapack/trtri/dtrtri_lapack.f index 31a880f76..8e9a08170 100644 --- a/lapack/trtri/dtrtri_lapack.f +++ b/lapack/trtri/dtrtri_lapack.f @@ -107,7 +107,7 @@ *> \ingroup doubleOTHERcomputational * * ===================================================================== - SUBROUTINE DTRTRI_LAPACK( UPLO, DIAG, N, A, LDA, INFO ) + SUBROUTINE DTRTRILAPACK( UPLO, DIAG, N, A, LDA, INFO ) * * -- LAPACK computational routine (version 3.4.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- diff --git a/test/Makefile b/test/Makefile index 2df499b11..0bc06e85f 100644 --- a/test/Makefile +++ b/test/Makefile @@ -88,7 +88,7 @@ else endif endif -FLDFLAGS = $(FFLAGS:-fPIC=) +FLDFLAGS = $(FFLAGS:-fPIC=) $(LDFLAGS) CEXTRALIB = |