summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorwernsaar <wernsaar@googlemail.com>2013-12-01 13:16:41 +0100
committerwernsaar <wernsaar@googlemail.com>2013-12-01 13:16:41 +0100
commit4be4db590c14963410508e96833b9b47c6c5d586 (patch)
treee8fb012d1f5696401ca5de35643eedc5e3b6edfe
parent9d3fae15a8dba680c6f3369cd580c59b651201dd (diff)
parent5048a80032a9d020b585ffc59323274f1d14e6b7 (diff)
downloadopenblas-4be4db590c14963410508e96833b9b47c6c5d586.tar.gz
openblas-4be4db590c14963410508e96833b9b47c6c5d586.tar.bz2
openblas-4be4db590c14963410508e96833b9b47c6c5d586.zip
Merge remote branch 'origin/develop' into armv7
-rw-r--r--Makefile8
-rw-r--r--Makefile.install4
-rw-r--r--Makefile.system14
-rw-r--r--Makefile.tail3
-rw-r--r--driver/others/blas_server_win32.c5
-rw-r--r--driver/others/memory.c8
-rw-r--r--exports/Makefile11
-rw-r--r--interface/trtri.c13
-rw-r--r--kernel/x86_64/dtrsm_kernel_LT_8x2_bulldozer.S16
-rw-r--r--kernel/x86_64/dtrsm_kernel_RN_8x2_bulldozer.S16
-rw-r--r--lapack-netlib/lapacke/include/lapacke_config.h4
-rw-r--r--lapack/getrf/getrf_parallel.c14
-rw-r--r--lapack/getrf/getrf_parallel_omp.c2
-rw-r--r--lapack/getrf/getrf_single.c2
-rw-r--r--lapack/potrf/potrf_parallel.c2
-rw-r--r--lapack/trtri/Makefile4
-rw-r--r--lapack/trtri/dtrtri_lapack.f242
-rw-r--r--lapack/trtri/trtri_U_single.c19
-rw-r--r--openblas_config_template.h16
19 files changed, 87 insertions, 316 deletions
diff --git a/Makefile b/Makefile
index 67d425359..294502f81 100644
--- a/Makefile
+++ b/Makefile
@@ -219,10 +219,10 @@ prof_lapack : lapack_prebuild
lapack_prebuild :
ifndef NOFORTRAN
-@echo "FORTRAN = $(FC)" > $(NETLIB_LAPACK_DIR)/make.inc
- -@echo "OPTS = $(FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
- -@echo "POPTS = $(FPFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
- -@echo "NOOPT = $(FFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc
- -@echo "PNOOPT = $(FPFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc
+ -@echo "OPTS = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
+ -@echo "POPTS = $(LAPACK_FPFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
+ -@echo "NOOPT = $(LAPACK_FFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc
+ -@echo "PNOOPT = $(LAPACK_FPFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "LOADOPTS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "override CFLAGS = $(LAPACK_CFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
diff --git a/Makefile.install b/Makefile.install
index cbe98bc5b..8319b46db 100644
--- a/Makefile.install
+++ b/Makefile.install
@@ -23,8 +23,8 @@ install : lib.grd
#for inc
@echo \#ifndef OPENBLAS_CONFIG_H > $(OPENBLAS_INCLUDE_DIR)/openblas_config.h
@echo \#define OPENBLAS_CONFIG_H >> $(OPENBLAS_INCLUDE_DIR)/openblas_config.h
- @cat config_last.h >> $(OPENBLAS_INCLUDE_DIR)/openblas_config.h
- @echo \#define VERSION \" OpenBLAS $(VERSION) \" >> $(OPENBLAS_INCLUDE_DIR)/openblas_config.h
+ @awk '{print $$1, "OPENBLAS_"$$2, $$3}' config_last.h >> $(OPENBLAS_INCLUDE_DIR)/openblas_config.h
+ @echo \#define OPENBLAS_VERSION \" OpenBLAS $(VERSION) \" >> $(OPENBLAS_INCLUDE_DIR)/openblas_config.h
@cat openblas_config_template.h >> $(OPENBLAS_INCLUDE_DIR)/openblas_config.h
@echo \#endif \/\* OPENBLAS_CONFIG_H \*\/ >> $(OPENBLAS_INCLUDE_DIR)/openblas_config.h
diff --git a/Makefile.system b/Makefile.system
index 5545de1b1..d6c172f3d 100644
--- a/Makefile.system
+++ b/Makefile.system
@@ -222,6 +222,11 @@ endif
endif
endif
+# ifeq logical or
+ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT Interix))
+OS_WINDOWS=1
+endif
+
ifdef QUAD_PRECISION
CCOMMON_OPT += -DQUAD_PRECISION
NO_EXPRECISION = 1
@@ -477,10 +482,8 @@ CCOMMON_OPT += -DF_INTERFACE_GFORT
FCOMMON_OPT += -Wall
#Don't include -lgfortran, when NO_LAPACK=1 or lsbcc
ifneq ($(NO_LAPACK), 1)
-ifneq ($(C_COMPILER), LSB)
EXTRALIB += -lgfortran
endif
-endif
ifdef NO_BINARY_MODE
ifeq ($(ARCH), mips64)
ifdef BINARY64
@@ -861,11 +864,18 @@ override FFLAGS += $(COMMON_OPT) $(FCOMMON_OPT)
override FPFLAGS += $(COMMON_OPT) $(FCOMMON_OPT) $(COMMON_PROF)
#MAKEOVERRIDES =
+#For LAPACK Fortran codes.
+LAPACK_FFLAGS := $(filter-out -fopenmp -mp -openmp -xopenmp=parallel,$(FFLAGS))
+LAPACK_FPFLAGS := $(filter-out -fopenmp -mp -openmp -xopenmp=parallel,$(FPFLAGS))
+
LAPACK_CFLAGS = $(CFLAGS)
LAPACK_CFLAGS += -DHAVE_LAPACK_CONFIG_H
ifdef INTERFACE64
LAPACK_CFLAGS += -DLAPACK_ILP64
endif
+ifdef OS_WINDOWS
+LAPACK_CFLAGS += -DOPENBLAS_OS_WINDOWS
+endif
ifeq ($(C_COMPILER), LSB)
LAPACK_CFLAGS += -DLAPACK_COMPLEX_STRUCTURE
endif
diff --git a/Makefile.tail b/Makefile.tail
index 53dd0caad..56f8d820c 100644
--- a/Makefile.tail
+++ b/Makefile.tail
@@ -606,7 +606,8 @@ clean ::
@if test -d $(ARCH); then \
(cd $(ARCH) && $(MAKE) clean) \
fi
- @rm -rf *.a *.s *.o *.po *.obj *.i *.so core core.* gmon.out *.cso \
+ @find . -name '*.o' | xargs rm -rf
+ @rm -rf *.a *.s *.po *.obj *.i *.so core core.* gmon.out *.cso \
*.csx *.is *~ *.exe *.flame *.pdb *.dwf \
gen_insn_flash.c gen_insn_flash *.stackdump *.dll *.exp *.lib \
*.pc *.pcl *.def *.i *.prof linktest.c \
diff --git a/driver/others/blas_server_win32.c b/driver/others/blas_server_win32.c
index bd1069c5e..8723a6fa7 100644
--- a/driver/others/blas_server_win32.c
+++ b/driver/others/blas_server_win32.c
@@ -441,9 +441,10 @@ int BLASFUNC(blas_thread_shutdown)(void){
if (blas_server_avail){
SetEvent(pool.killed);
-
+ printf("blas_num_threads=%d\n", blas_num_threads);
for(i = 0; i < blas_num_threads - 1; i++){
- WaitForSingleObject(blas_threads[i], INFINITE);
+ WaitForSingleObject(blas_threads[i], 5); //INFINITE);
+ TerminateThread(blas_threads[i],0);
}
blas_server_avail = 0;
diff --git a/driver/others/memory.c b/driver/others/memory.c
index 4f35691ff..35758d13c 100644
--- a/driver/others/memory.c
+++ b/driver/others/memory.c
@@ -363,7 +363,7 @@ static void *alloc_mmap(void *address){
#define BENCH_ITERATION 4
#define SCALING 2
-static inline BLASULONG run_bench(BLASULONG address, long size) {
+static inline BLASULONG run_bench(BLASULONG address, BLASULONG size) {
BLASULONG original, *p;
BLASULONG start, stop, min;
@@ -450,12 +450,12 @@ static void *alloc_mmap(void *address){
current = (SCALING - 1) * BUFFER_SIZE;
while(current > 0) {
- *(long *)start = (long)start + PAGESIZE;
+ *(BLASLONG *)start = (BLASLONG)start + PAGESIZE;
start += PAGESIZE;
current -= PAGESIZE;
}
- *(long *)(start - PAGESIZE) = (BLASULONG)map_address;
+ *(BLASLONG *)(start - PAGESIZE) = (BLASULONG)map_address;
start = (BLASULONG)map_address;
@@ -1170,7 +1170,7 @@ static void _touch_memory(blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n,
#if !defined(ARCH_POWER) && !defined(ARCH_SPARC)
- long size;
+ size_t size;
BLASULONG buffer;
size = BUFFER_SIZE - PAGESIZE;
diff --git a/exports/Makefile b/exports/Makefile
index 0bc9ec6e0..8e50a9809 100644
--- a/exports/Makefile
+++ b/exports/Makefile
@@ -111,7 +111,7 @@ libgoto_hpl.def : gensymbol
perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) > $(@F)
$(LIBDYNNAME) : ../$(LIBNAME) osx.def
- $(CC) $(CFLAGS) -all_load -headerpad_max_install_names -install_name $(CURDIR)/../$(LIBDYNNAME) -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB)
+ $(FC) $(FFLAGS) -all_load -headerpad_max_install_names -install_name $(CURDIR)/../$(LIBDYNNAME) -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB)
symbol.$(SUFFIX) : symbol.S
$(CC) $(CFLAGS) -c -o $(@F) $^
@@ -124,14 +124,17 @@ ifeq ($(OSNAME), Linux)
so : ../$(LIBSONAME)
../$(LIBSONAME) : ../$(LIBNAME) linux.def linktest.c
+ifneq ($(C_COMPILER), LSB)
$(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive \
-Wl,-soname,$(LIBPREFIX).so.$(MAJOR_VERSION) $(EXTRALIB)
-ifneq ($(C_COMPILER), LSB)
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK.
else
-#Use FC on LSB
- $(FC) $(FFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK.
+#for LSB
+ env LSBCC_SHAREDLIBS=gfortran $(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \
+ -Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive \
+ -Wl,-soname,$(LIBPREFIX).so.$(MAJOR_VERSION) $(EXTRALIB)
+ $(FC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK.
endif
rm -f linktest
diff --git a/interface/trtri.c b/interface/trtri.c
index 007dbd7fa..5aa5e9b9b 100644
--- a/interface/trtri.c
+++ b/interface/trtri.c
@@ -60,7 +60,6 @@ static blasint (*trtri_parallel[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *
};
#endif
-extern void BLASFUNC(dtrtrilapack)(char *UPLO, char *DIAG, int *N, double *a, int *ldA, int *Info);
int NAME(char *UPLO, char *DIAG, blasint *N, FLOAT *a, blasint *ldA, blasint *Info){
@@ -133,18 +132,6 @@ int NAME(char *UPLO, char *DIAG, blasint *N, FLOAT *a, blasint *ldA, blasint *In
if (args.nthreads == 1) {
#endif
-#if DOUBLE
- // double trtri_U single thread error
- // call dtrtri from lapack for a walk around.
- if(uplo==0){
- BLASFUNC(dtrtrilapack)(UPLO, DIAG, N, a, ldA, Info);
-#ifndef PPC440
- blas_memory_free(buffer);
-#endif
- return 0;
- }
-#endif
-
*Info = (trtri_single[(uplo << 1) | diag])(&args, NULL, NULL, sa, sb, 0);
#ifdef SMP
diff --git a/kernel/x86_64/dtrsm_kernel_LT_8x2_bulldozer.S b/kernel/x86_64/dtrsm_kernel_LT_8x2_bulldozer.S
index 374f45096..9e15fa240 100644
--- a/kernel/x86_64/dtrsm_kernel_LT_8x2_bulldozer.S
+++ b/kernel/x86_64/dtrsm_kernel_LT_8x2_bulldozer.S
@@ -103,7 +103,7 @@
vmovups -10*SIZE(AO,%rax,8), %xmm6
vfmaddpd %xmm14, %xmm6 , %xmm1 , %xmm14
vfmaddpd %xmm15, %xmm6 , %xmm2 , %xmm15
- addq $SIZE, %rax
+ addq $ SIZE, %rax
.endm
.macro SOLVE_8x2
@@ -265,7 +265,7 @@
vmovups -14*SIZE(AO,%rax,4), %xmm0
vfmaddpd %xmm10, %xmm0 , %xmm1 , %xmm10
vfmaddpd %xmm11, %xmm0 , %xmm2 , %xmm11
- addq $SIZE, %rax
+ addq $ SIZE, %rax
.endm
@@ -338,7 +338,7 @@
vmovups -16*SIZE(AO,%rax,2), %xmm0
vfmaddpd %xmm8 , %xmm0 , %xmm1 , %xmm8
vfmaddpd %xmm9 , %xmm0 , %xmm2 , %xmm9
- addq $SIZE, %rax
+ addq $ SIZE, %rax
.endm
@@ -378,7 +378,7 @@
vmovups -16*SIZE(BO,%rax,2), %xmm1
vmovddup -16*SIZE(AO,%rax,1), %xmm0
vfmaddpd %xmm8 , %xmm0 , %xmm1 , %xmm8
- addq $SIZE, %rax
+ addq $ SIZE, %rax
.endm
.macro SOLVE_1x2
@@ -411,7 +411,7 @@
vfmaddpd %xmm10, %xmm0 , %xmm1 , %xmm10
vmovups -10*SIZE(AO,%rax,8), %xmm0
vfmaddpd %xmm11, %xmm0 , %xmm1 , %xmm11
- addq $SIZE, %rax
+ addq $ SIZE, %rax
.endm
.macro SOLVE_8x1
@@ -510,7 +510,7 @@
vfmaddpd %xmm8 , %xmm0 , %xmm1 , %xmm8
vmovups -14*SIZE(AO,%rax,4), %xmm0
vfmaddpd %xmm9 , %xmm0 , %xmm1 , %xmm9
- addq $SIZE, %rax
+ addq $ SIZE, %rax
.endm
@@ -560,7 +560,7 @@
vmovddup -16*SIZE(BO,%rax,1), %xmm1
vmovups -16*SIZE(AO,%rax,2), %xmm0
vfmaddpd %xmm8 , %xmm0 , %xmm1 , %xmm8
- addq $SIZE, %rax
+ addq $ SIZE, %rax
.endm
@@ -592,7 +592,7 @@
vmovsd -16*SIZE(BO,%rax,1), %xmm1
vmovsd -16*SIZE(AO,%rax,1), %xmm0
vfmaddsd %xmm8 , %xmm0 , %xmm1 , %xmm8
- addq $SIZE, %rax
+ addq $ SIZE, %rax
.endm
.macro SOLVE_1x1
diff --git a/kernel/x86_64/dtrsm_kernel_RN_8x2_bulldozer.S b/kernel/x86_64/dtrsm_kernel_RN_8x2_bulldozer.S
index 8fa53efa7..8d3964aee 100644
--- a/kernel/x86_64/dtrsm_kernel_RN_8x2_bulldozer.S
+++ b/kernel/x86_64/dtrsm_kernel_RN_8x2_bulldozer.S
@@ -103,7 +103,7 @@
vmovups -10*SIZE(AO,%rax,8), %xmm6
vfmaddpd %xmm14, %xmm6 , %xmm1 , %xmm14
vfmaddpd %xmm15, %xmm6 , %xmm2 , %xmm15
- addq $SIZE, %rax
+ addq $ SIZE, %rax
.endm
.macro SOLVE_8x2
@@ -177,7 +177,7 @@
vmovups -14*SIZE(AO,%rax,4), %xmm0
vfmaddpd %xmm10, %xmm0 , %xmm1 , %xmm10
vfmaddpd %xmm11, %xmm0 , %xmm2 , %xmm11
- addq $SIZE, %rax
+ addq $ SIZE, %rax
.endm
@@ -226,7 +226,7 @@
vmovups -16*SIZE(AO,%rax,2), %xmm0
vfmaddpd %xmm8 , %xmm0 , %xmm1 , %xmm8
vfmaddpd %xmm9 , %xmm0 , %xmm2 , %xmm9
- addq $SIZE, %rax
+ addq $ SIZE, %rax
.endm
@@ -262,7 +262,7 @@
vmovups -16*SIZE(BO,%rax,2), %xmm1
vmovddup -16*SIZE(AO,%rax,1), %xmm0
vfmaddpd %xmm8 , %xmm0 , %xmm1 , %xmm8
- addq $SIZE, %rax
+ addq $ SIZE, %rax
.endm
.macro SOLVE_1x2
@@ -306,7 +306,7 @@
vfmaddpd %xmm10, %xmm0 , %xmm1 , %xmm10
vmovups -10*SIZE(AO,%rax,8), %xmm0
vfmaddpd %xmm11, %xmm0 , %xmm1 , %xmm11
- addq $SIZE, %rax
+ addq $ SIZE, %rax
.endm
.macro SOLVE_8x1
@@ -347,7 +347,7 @@
vfmaddpd %xmm8 , %xmm0 , %xmm1 , %xmm8
vmovups -14*SIZE(AO,%rax,4), %xmm0
vfmaddpd %xmm9 , %xmm0 , %xmm1 , %xmm9
- addq $SIZE, %rax
+ addq $ SIZE, %rax
.endm
@@ -377,7 +377,7 @@
vmovddup -16*SIZE(BO,%rax,1), %xmm1
vmovups -16*SIZE(AO,%rax,2), %xmm0
vfmaddpd %xmm8 , %xmm0 , %xmm1 , %xmm8
- addq $SIZE, %rax
+ addq $ SIZE, %rax
.endm
@@ -402,7 +402,7 @@
vmovsd -16*SIZE(BO,%rax,1), %xmm1
vmovsd -16*SIZE(AO,%rax,1), %xmm0
vfmaddsd %xmm8 , %xmm0 , %xmm1 , %xmm8
- addq $SIZE, %rax
+ addq $ SIZE, %rax
.endm
.macro SOLVE_1x1
diff --git a/lapack-netlib/lapacke/include/lapacke_config.h b/lapack-netlib/lapacke/include/lapacke_config.h
index 1e2509bf0..561b2736b 100644
--- a/lapack-netlib/lapacke/include/lapacke_config.h
+++ b/lapack-netlib/lapacke/include/lapacke_config.h
@@ -45,7 +45,11 @@ extern "C" {
#ifndef lapack_int
#if defined(LAPACK_ILP64)
+#if defined(OPENBLAS_OS_WINDOWS)
+#define lapack_int long long
+#else
#define lapack_int long
+#endif
#else
#define lapack_int int
#endif
diff --git a/lapack/getrf/getrf_parallel.c b/lapack/getrf/getrf_parallel.c
index 21ea9d5f5..3dbc70e9d 100644
--- a/lapack/getrf/getrf_parallel.c
+++ b/lapack/getrf/getrf_parallel.c
@@ -67,14 +67,14 @@ double sqrt(double);
#undef GETRF_FACTOR
#define GETRF_FACTOR 1.00
-static inline long FORMULA1(long M, long N, long IS, long BK, long T) {
+static inline BLASLONG FORMULA1(BLASLONG M, BLASLONG N, BLASLONG IS, BLASLONG BK, BLASLONG T) {
double m = (double)(M - IS - BK);
double n = (double)(N - IS - BK);
double b = (double)BK;
double a = (double)T;
- return (long)((n + GETRF_FACTOR * m * b * (1. - a) / (b + m)) / a);
+ return (BLASLONG)((n + GETRF_FACTOR * m * b * (1. - a) / (b + m)) / a);
}
@@ -111,7 +111,7 @@ static void inner_basic_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *ra
if (args -> a == NULL) {
TRSM_ILTCOPY(k, k, (FLOAT *)args -> b, lda, 0, sb);
- sbb = (FLOAT *)((((long)(sb + k * k * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B);
+ sbb = (FLOAT *)((((BLASULONG)(sb + k * k * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B);
} else {
sb = (FLOAT *)args -> a;
}
@@ -221,7 +221,7 @@ static int inner_advanced_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *
if (args -> a == NULL) {
TRSM_ILTCOPY(k, k, (FLOAT *)args -> b, lda, 0, sb);
- sbb = (FLOAT *)((((long)(sb + k * k * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B);
+ sbb = (FLOAT *)((((BLASULONG)(sb + k * k * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B);
} else {
sb = (FLOAT *)args -> a;
}
@@ -448,7 +448,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
TRSM_ILTCOPY(bk, bk, a, lda, 0, sb);
- sbb = (FLOAT *)((((long)(sb + bk * bk * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B);
+ sbb = (FLOAT *)((((BLASULONG)(sb + bk * bk * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B);
is = 0;
num_cpu = 0;
@@ -685,7 +685,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
if (width > n - init_bk) width = n - init_bk;
if (width < init_bk) {
- long temp;
+ BLASLONG temp;
temp = FORMULA2(m, n, 0, init_bk, args -> nthreads);
temp = (temp + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1);
@@ -708,7 +708,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
is = 0;
num_cpu = 0;
- sbb = (FLOAT *)((((long)(sb + GEMM_PQ * GEMM_PQ * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B);
+ sbb = (FLOAT *)((((BLASULONG)(sb + GEMM_PQ * GEMM_PQ * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B);
while (is < mn) {
diff --git a/lapack/getrf/getrf_parallel_omp.c b/lapack/getrf/getrf_parallel_omp.c
index 4922b9b52..6eda30a52 100644
--- a/lapack/getrf/getrf_parallel_omp.c
+++ b/lapack/getrf/getrf_parallel_omp.c
@@ -178,7 +178,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
return info;
}
- sbb = (FLOAT *)((((long)(sb + blocking * blocking * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B);
+ sbb = (FLOAT *)((((BLASULONG)(sb + blocking * blocking * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B);
info = 0;
diff --git a/lapack/getrf/getrf_single.c b/lapack/getrf/getrf_single.c
index fcea0ae89..f1818ea97 100644
--- a/lapack/getrf/getrf_single.c
+++ b/lapack/getrf/getrf_single.c
@@ -82,7 +82,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
return info;
}
- sbb = (FLOAT *)((((long)(sb + blocking * blocking * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B);
+ sbb = (FLOAT *)((((BLASULONG)(sb + blocking * blocking * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B);
info = 0;
diff --git a/lapack/potrf/potrf_parallel.c b/lapack/potrf/potrf_parallel.c
index eec9b6e05..11f7f533c 100644
--- a/lapack/potrf/potrf_parallel.c
+++ b/lapack/potrf/potrf_parallel.c
@@ -185,7 +185,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
div_n = ((m_to - m_from + DIVIDE_RATE - 1) / DIVIDE_RATE + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
- buffer[0] = (FLOAT *)((((long)(sb + k * k * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B);
+ buffer[0] = (FLOAT *)((((BLASULONG)(sb + k * k * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B);
for (i = 1; i < DIVIDE_RATE; i++) {
buffer[i] = buffer[i - 1] + GEMM_Q * div_n * COMPSIZE;
}
diff --git a/lapack/trtri/Makefile b/lapack/trtri/Makefile
index 10d3cb7fd..626c47bbf 100644
--- a/lapack/trtri/Makefile
+++ b/lapack/trtri/Makefile
@@ -13,7 +13,6 @@ ZBLASOBJS = ztrtri_UU_single.$(SUFFIX) ztrtri_UN_single.$(SUFFIX) ztrtri_LU_sing
XBLASOBJS = xtrtri_UU_single.$(SUFFIX) xtrtri_UN_single.$(SUFFIX) xtrtri_LU_single.$(SUFFIX) xtrtri_LN_single.$(SUFFIX)
-DBLASOBJS += dtrtri_lapack.$(SUFFIX)
ifdef SMP
SBLASOBJS += strtri_UU_parallel.$(SUFFIX) strtri_UN_parallel.$(SUFFIX) strtri_LU_parallel.$(SUFFIX) strtri_LN_parallel.$(SUFFIX)
@@ -54,9 +53,6 @@ dtrtri_UU_single.$(SUFFIX) : trtri_U_single.c
dtrtri_UN_single.$(SUFFIX) : trtri_U_single.c
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -UUNIT $< -o $(@F)
-dtrtri_lapack.$(SUFFIX) : dtrtri_lapack.f
- $(FC) -c $(FFLAGS) -UCOMPLEX -DDOUBLE -DUNIT $< -o $(@F)
-
dtrtri_LU_single.$(SUFFIX) : trtri_L_single.c
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DUNIT $< -o $(@F)
diff --git a/lapack/trtri/dtrtri_lapack.f b/lapack/trtri/dtrtri_lapack.f
deleted file mode 100644
index 8e9a08170..000000000
--- a/lapack/trtri/dtrtri_lapack.f
+++ /dev/null
@@ -1,242 +0,0 @@
-*> \brief \b DTRTRI
-*
-* =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-* http://www.netlib.org/lapack/explore-html/
-*
-*> \htmlonly
-*> Download DTRTRI + dependencies
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dtrtri.f">
-*> [TGZ]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dtrtri.f">
-*> [ZIP]</a>
-*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dtrtri.f">
-*> [TXT]</a>
-*> \endhtmlonly
-*
-* Definition:
-* ===========
-*
-* SUBROUTINE DTRTRI( UPLO, DIAG, N, A, LDA, INFO )
-*
-* .. Scalar Arguments ..
-* CHARACTER DIAG, UPLO
-* INTEGER INFO, LDA, N
-* ..
-* .. Array Arguments ..
-* DOUBLE PRECISION A( LDA, * )
-* ..
-*
-*
-*> \par Purpose:
-* =============
-*>
-*> \verbatim
-*>
-*> DTRTRI computes the inverse of a real upper or lower triangular
-*> matrix A.
-*>
-*> This is the Level 3 BLAS version of the algorithm.
-*> \endverbatim
-*
-* Arguments:
-* ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*> UPLO is CHARACTER*1
-*> = 'U': A is upper triangular;
-*> = 'L': A is lower triangular.
-*> \endverbatim
-*>
-*> \param[in] DIAG
-*> \verbatim
-*> DIAG is CHARACTER*1
-*> = 'N': A is non-unit triangular;
-*> = 'U': A is unit triangular.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*> N is INTEGER
-*> The order of the matrix A. N >= 0.
-*> \endverbatim
-*>
-*> \param[in,out] A
-*> \verbatim
-*> A is DOUBLE PRECISION array, dimension (LDA,N)
-*> On entry, the triangular matrix A. If UPLO = 'U', the
-*> leading N-by-N upper triangular part of the array A contains
-*> the upper triangular matrix, and the strictly lower
-*> triangular part of A is not referenced. If UPLO = 'L', the
-*> leading N-by-N lower triangular part of the array A contains
-*> the lower triangular matrix, and the strictly upper
-*> triangular part of A is not referenced. If DIAG = 'U', the
-*> diagonal elements of A are also not referenced and are
-*> assumed to be 1.
-*> On exit, the (triangular) inverse of the original matrix, in
-*> the same storage format.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*> LDA is INTEGER
-*> The leading dimension of the array A. LDA >= max(1,N).
-*> \endverbatim
-*>
-*> \param[out] INFO
-*> \verbatim
-*> INFO is INTEGER
-*> = 0: successful exit
-*> < 0: if INFO = -i, the i-th argument had an illegal value
-*> > 0: if INFO = i, A(i,i) is exactly zero. The triangular
-*> matrix is singular and its inverse can not be computed.
-*> \endverbatim
-*
-* Authors:
-* ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date November 2011
-*
-*> \ingroup doubleOTHERcomputational
-*
-* =====================================================================
- SUBROUTINE DTRTRILAPACK( UPLO, DIAG, N, A, LDA, INFO )
-*
-* -- LAPACK computational routine (version 3.4.0) --
-* -- LAPACK is a software package provided by Univ. of Tennessee, --
-* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-* November 2011
-*
-* .. Scalar Arguments ..
- CHARACTER DIAG, UPLO
- INTEGER INFO, LDA, N
-* ..
-* .. Array Arguments ..
- DOUBLE PRECISION A( LDA, * )
-* ..
-*
-* =====================================================================
-*
-* .. Parameters ..
- DOUBLE PRECISION ONE, ZERO
- PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 )
-* ..
-* .. Local Scalars ..
- LOGICAL NOUNIT, UPPER
- INTEGER J, JB, NB, NN
-* ..
-* .. External Functions ..
- LOGICAL LSAME
- INTEGER ILAENV
- EXTERNAL LSAME, ILAENV
-* ..
-* .. External Subroutines ..
- EXTERNAL DTRMM, DTRSM, DTRTI2, XERBLA
-* ..
-* .. Intrinsic Functions ..
- INTRINSIC MAX, MIN
-* ..
-* .. Executable Statements ..
-*
-* Test the input parameters.
-*
- INFO = 0
- UPPER = LSAME( UPLO, 'U' )
- NOUNIT = LSAME( DIAG, 'N' )
- IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
- INFO = -1
- ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
- INFO = -2
- ELSE IF( N.LT.0 ) THEN
- INFO = -3
- ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
- INFO = -5
- END IF
- IF( INFO.NE.0 ) THEN
- CALL XERBLA( 'DTRTRI', -INFO )
- RETURN
- END IF
-*
-* Quick return if possible
-*
- IF( N.EQ.0 )
- $ RETURN
-*
-* Check for singularity if non-unit.
-*
- IF( NOUNIT ) THEN
- DO 10 INFO = 1, N
- IF( A( INFO, INFO ).EQ.ZERO )
- $ RETURN
- 10 CONTINUE
- INFO = 0
- END IF
-*
-* Determine the block size for this environment.
-*
- NB = ILAENV( 1, 'DTRTRI', UPLO // DIAG, N, -1, -1, -1 )
- IF( NB.LE.1 .OR. NB.GE.N ) THEN
-*
-* Use unblocked code
-*
- CALL DTRTI2( UPLO, DIAG, N, A, LDA, INFO )
- ELSE
-*
-* Use blocked code
-*
- IF( UPPER ) THEN
-*
-* Compute inverse of upper triangular matrix
-*
- DO 20 J = 1, N, NB
- JB = MIN( NB, N-J+1 )
-*
-* Compute rows 1:j-1 of current block column
-*
- CALL DTRMM( 'Left', 'Upper', 'No transpose', DIAG, J-1,
- $ JB, ONE, A, LDA, A( 1, J ), LDA )
- CALL DTRSM( 'Right', 'Upper', 'No transpose', DIAG, J-1,
- $ JB, -ONE, A( J, J ), LDA, A( 1, J ), LDA )
-*
-* Compute inverse of current diagonal block
-*
- CALL DTRTI2( 'Upper', DIAG, JB, A( J, J ), LDA, INFO )
- 20 CONTINUE
- ELSE
-*
-* Compute inverse of lower triangular matrix
-*
- NN = ( ( N-1 ) / NB )*NB + 1
- DO 30 J = NN, 1, -NB
- JB = MIN( NB, N-J+1 )
- IF( J+JB.LE.N ) THEN
-*
-* Compute rows j+jb:n of current block column
-*
- CALL DTRMM( 'Left', 'Lower', 'No transpose', DIAG,
- $ N-J-JB+1, JB, ONE, A( J+JB, J+JB ), LDA,
- $ A( J+JB, J ), LDA )
- CALL DTRSM( 'Right', 'Lower', 'No transpose', DIAG,
- $ N-J-JB+1, JB, -ONE, A( J, J ), LDA,
- $ A( J+JB, J ), LDA )
- END IF
-*
-* Compute inverse of current diagonal block
-*
- CALL DTRTI2( 'Lower', DIAG, JB, A( J, J ), LDA, INFO )
- 30 CONTINUE
- END IF
- END IF
-*
- RETURN
-*
-* End of DTRTRI
-*
- END
diff --git a/lapack/trtri/trtri_U_single.c b/lapack/trtri/trtri_U_single.c
index 72133d896..c79281cfb 100644
--- a/lapack/trtri/trtri_U_single.c
+++ b/lapack/trtri/trtri_U_single.c
@@ -127,8 +127,14 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
if (min_i > GEMM_P) min_i = GEMM_P;
if (ls == i + bk) {
- NEG_TCOPY (bk, min_i, a + (is + i * lda) * COMPSIZE, lda, sa);
-
+ //NEG_TCOPY (bk, min_i, a + (is + i * lda) * COMPSIZE, lda, sa);
+
+ GEMM_BETA(min_i, bk, 0, dm1,
+#ifdef COMPLEX
+ ZERO,
+#endif
+ NULL, 0, NULL, 0, a + (is + i * lda) * COMPSIZE, lda);
+
TRSM_KERNEL_RN(min_i, bk, bk, dm1,
#ifdef COMPLEX
ZERO,
@@ -171,8 +177,13 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
min_i = i - is;
if (min_i > GEMM_P) min_i = GEMM_P;
- NEG_TCOPY (bk, min_i, a + (is + i * lda) * COMPSIZE, lda, sa);
-
+ //NEG_TCOPY (bk, min_i, a + (is + i * lda) * COMPSIZE, lda, sa);
+ GEMM_BETA(min_i, bk, 0, dm1,
+#ifdef COMPLEX
+ ZERO,
+#endif
+ NULL, 0, NULL, 0, a + (is + i * lda) * COMPSIZE, lda);
+
TRSM_KERNEL_RN(min_i, bk, bk, dm1,
#ifdef COMPLEX
ZERO,
diff --git a/openblas_config_template.h b/openblas_config_template.h
index 1017caff9..3b3435b0e 100644
--- a/openblas_config_template.h
+++ b/openblas_config_template.h
@@ -1,8 +1,8 @@
/*This is only for "make install" target.*/
-#if defined(OS_WINNT) || defined(OS_CYGWIN_NT) || defined(OS_INTERIX)
-#define WINDOWS_ABI
-#define OS_WINDOWS
+#if defined(OPENBLAS_OS_WINNT) || defined(OPENBLAS_OS_CYGWIN_NT) || defined(OPENBLAS_OS_INTERIX)
+#define OPENBLAS_WINDOWS_ABI
+#define OPENBLAS_OS_WINDOWS
#ifdef DOUBLE
#define DOUBLE_DEFINED DOUBLE
@@ -10,23 +10,23 @@
#endif
#endif
-#ifdef NEEDBUNDERSCORE
+#ifdef OPENBLAS_NEEDBUNDERSCORE
#define BLASFUNC(FUNC) FUNC##_
#else
#define BLASFUNC(FUNC) FUNC
#endif
-#ifdef QUAD_PRECISION
+#ifdef OPENBLAS_QUAD_PRECISION
typedef struct {
unsigned long x[2];
} xdouble;
-#elif defined EXPRECISION
+#elif defined OPENBLAS_EXPRECISION
#define xdouble long double
#else
#define xdouble double
#endif
-#if defined(OS_WINDOWS) && defined(__64BIT__)
+#if defined(OPENBLAS_OS_WINDOWS) && defined(OPENBLAS___64BIT__)
typedef long long BLASLONG;
typedef unsigned long long BLASULONG;
#else
@@ -34,7 +34,7 @@ typedef long BLASLONG;
typedef unsigned long BLASULONG;
#endif
-#ifdef USE64BITINT
+#ifdef OPENBLAS_USE64BITINT
typedef BLASLONG blasint;
#else
typedef int blasint;