summaryrefslogtreecommitdiff
path: root/driver
diff options
context:
space:
mode:
Diffstat (limited to 'driver')
-rw-r--r--driver/level2/Makefile2130
-rw-r--r--driver/level2/gbmv_k.c4
-rw-r--r--driver/level2/gbmv_thread.c32
-rw-r--r--driver/level2/gemv_thread.c22
-rw-r--r--driver/level2/ger_thread.c28
-rw-r--r--driver/level2/sbmv_k.c6
-rw-r--r--driver/level2/sbmv_thread.c80
-rw-r--r--driver/level2/spmv_k.c2
-rw-r--r--driver/level2/spmv_thread.c78
-rw-r--r--driver/level2/spr2_k.c2
-rw-r--r--driver/level2/spr2_thread.c64
-rw-r--r--driver/level2/spr_k.c2
-rw-r--r--driver/level2/spr_thread.c56
-rw-r--r--driver/level2/symv_thread.c76
-rw-r--r--driver/level2/syr2_k.c2
-rw-r--r--driver/level2/syr2_thread.c62
-rw-r--r--driver/level2/syr_k.c2
-rw-r--r--driver/level2/syr_thread.c56
-rw-r--r--driver/level2/tbmv_L.c8
-rw-r--r--driver/level2/tbmv_U.c6
-rw-r--r--driver/level2/tbmv_thread.c96
-rw-r--r--driver/level2/tbsv_L.c6
-rw-r--r--driver/level2/tbsv_U.c8
-rw-r--r--driver/level2/tpmv_L.c6
-rw-r--r--driver/level2/tpmv_U.c6
-rw-r--r--driver/level2/tpmv_thread.c88
-rw-r--r--driver/level2/tpsv_L.c8
-rw-r--r--driver/level2/tpsv_U.c6
-rw-r--r--driver/level2/trmv_L.c10
-rw-r--r--driver/level2/trmv_U.c8
-rw-r--r--driver/level2/trmv_thread.c98
-rw-r--r--driver/level2/trsv_L.c14
-rw-r--r--driver/level2/trsv_U.c10
-rw-r--r--driver/level2/zgbmv_k.c2
-rw-r--r--driver/level2/zhbmv_k.c18
-rw-r--r--driver/level2/zher2_k.c6
-rw-r--r--driver/level2/zhpmv_k.c32
-rw-r--r--driver/level2/zhpr2_k.c6
-rw-r--r--driver/level2/zsbmv_k.c12
-rw-r--r--driver/level2/zspmv_k.c16
-rw-r--r--driver/level2/zspr2_k.c4
-rw-r--r--driver/level2/zspr_k.c2
-rw-r--r--driver/level2/zsyr2_k.c4
-rw-r--r--driver/level2/zsyr_k.c2
-rw-r--r--driver/level2/ztbmv_L.c6
-rw-r--r--driver/level2/ztbmv_U.c2
-rw-r--r--driver/level2/ztbsv_L.c10
-rw-r--r--driver/level2/ztbsv_U.c12
-rw-r--r--driver/level2/ztpmv_L.c8
-rw-r--r--driver/level2/ztpmv_U.c4
-rw-r--r--driver/level2/ztpsv_L.c12
-rw-r--r--driver/level2/ztpsv_U.c14
-rw-r--r--driver/level2/ztrmv_L.c2
-rw-r--r--driver/level2/ztrmv_U.c6
-rw-r--r--driver/level2/ztrsv_L.c8
-rw-r--r--driver/level2/ztrsv_U.c6
-rw-r--r--driver/level3/Makefile216
-rw-r--r--driver/level3/gemm3m_level3.c130
-rw-r--r--driver/level3/gemm_thread_m.c8
-rw-r--r--driver/level3/gemm_thread_mn.c12
-rw-r--r--driver/level3/gemm_thread_n.c10
-rw-r--r--driver/level3/gemm_thread_variable.c8
-rw-r--r--driver/level3/level3.c48
-rw-r--r--driver/level3/level3_gemm3m_thread.c280
-rw-r--r--driver/level3/level3_syr2k.c114
-rw-r--r--driver/level3/level3_syrk.c202
-rw-r--r--driver/level3/level3_syrk_threaded.c142
-rw-r--r--driver/level3/level3_thread.c132
-rw-r--r--driver/level3/syr2k_k.c2
-rw-r--r--driver/level3/syr2k_kernel.c42
-rw-r--r--driver/level3/syrk_k.c2
-rw-r--r--driver/level3/syrk_kernel.c24
-rw-r--r--driver/level3/syrk_thread.c52
-rw-r--r--driver/level3/trmm_L.c94
-rw-r--r--driver/level3/trmm_R.c102
-rw-r--r--driver/level3/trsm_L.c40
-rw-r--r--driver/level3/trsm_R.c104
-rw-r--r--driver/level3/zher2k_k.c2
-rw-r--r--driver/level3/zher2k_kernel.c38
-rw-r--r--driver/level3/zherk_k.c2
-rw-r--r--driver/level3/zherk_kernel.c24
-rw-r--r--driver/level3/zsyrk_beta.c2
-rw-r--r--driver/mapper/mapper.c40
-rw-r--r--driver/others/Makefile12
-rw-r--r--driver/others/blas_l1_thread.c16
-rw-r--r--driver/others/blas_server.c216
-rw-r--r--driver/others/blas_server_omp.c36
-rw-r--r--driver/others/blas_server_win32.c122
-rw-r--r--driver/others/divtable.c34
-rw-r--r--driver/others/dynamic.c32
-rw-r--r--driver/others/init.c96
-rw-r--r--driver/others/lamc3.c2
-rw-r--r--driver/others/lamch.c2
-rw-r--r--driver/others/memory.c214
-rw-r--r--driver/others/memory_qalloc.c6
-rw-r--r--driver/others/openblas_get_config.c22
-rw-r--r--driver/others/openblas_get_parallel.c34
-rw-r--r--driver/others/openblas_set_num_threads.c22
-rw-r--r--driver/others/parameter.c42
-rw-r--r--driver/others/profile.c12
-rw-r--r--driver/others/xerbla.c4
101 files changed, 3036 insertions, 3036 deletions
diff --git a/driver/level2/Makefile b/driver/level2/Makefile
index 7043e52e1..79c4ca153 100644
--- a/driver/level2/Makefile
+++ b/driver/level2/Makefile
@@ -419,3200 +419,3200 @@ endif
all ::
-sgbmv_n.$(SUFFIX) sgbmv_n.$(PSUFFIX) : gbmv_k.c
+sgbmv_n.$(SUFFIX) sgbmv_n.$(PSUFFIX) : gbmv_k.c
$(CC) -c -UCOMPLEX -UDOUBLE -UTRANS $(CFLAGS) -o $(@F) $<
-sgbmv_t.$(SUFFIX) sgbmv_t.$(PSUFFIX) : gbmv_k.c
+sgbmv_t.$(SUFFIX) sgbmv_t.$(PSUFFIX) : gbmv_k.c
$(CC) -c -UCOMPLEX -UDOUBLE -DTRANS $(CFLAGS) -o $(@F) $<
-dgbmv_n.$(SUFFIX) dgbmv_n.$(PSUFFIX) : gbmv_k.c
+dgbmv_n.$(SUFFIX) dgbmv_n.$(PSUFFIX) : gbmv_k.c
$(CC) -c -UCOMPLEX -DDOUBLE -UTRANS $(CFLAGS) -o $(@F) $<
-dgbmv_t.$(SUFFIX) dgbmv_t.$(PSUFFIX) : gbmv_k.c
+dgbmv_t.$(SUFFIX) dgbmv_t.$(PSUFFIX) : gbmv_k.c
$(CC) -c -UCOMPLEX -DDOUBLE -DTRANS $(CFLAGS) -o $(@F) $<
-qgbmv_n.$(SUFFIX) qgbmv_n.$(PSUFFIX) : gbmv_k.c
+qgbmv_n.$(SUFFIX) qgbmv_n.$(PSUFFIX) : gbmv_k.c
$(CC) -c -UCOMPLEX -DXDOUBLE -UTRANS $(CFLAGS) -o $(@F) $<
-qgbmv_t.$(SUFFIX) qgbmv_t.$(PSUFFIX) : gbmv_k.c
+qgbmv_t.$(SUFFIX) qgbmv_t.$(PSUFFIX) : gbmv_k.c
$(CC) -c -UCOMPLEX -DXDOUBLE -DTRANS $(CFLAGS) -o $(@F) $<
-cgbmv_n.$(SUFFIX) cgbmv_n.$(PSUFFIX) : zgbmv_k.c
+cgbmv_n.$(SUFFIX) cgbmv_n.$(PSUFFIX) : zgbmv_k.c
$(CC) -c -DCOMPLEX -UDOUBLE -UTRANS -UCONJ -UXCONJ $(CFLAGS) -o $(@F) $<
-cgbmv_t.$(SUFFIX) cgbmv_t.$(PSUFFIX) : zgbmv_k.c
+cgbmv_t.$(SUFFIX) cgbmv_t.$(PSUFFIX) : zgbmv_k.c
$(CC) -c -DCOMPLEX -UDOUBLE -DTRANS -UCONJ -UXCONJ $(CFLAGS) -o $(@F) $<
-cgbmv_r.$(SUFFIX) cgbmv_r.$(PSUFFIX) : zgbmv_k.c
+cgbmv_r.$(SUFFIX) cgbmv_r.$(PSUFFIX) : zgbmv_k.c
$(CC) -c -DCOMPLEX -UDOUBLE -UTRANS -DCONJ -UXCONJ $(CFLAGS) -o $(@F) $<
-cgbmv_c.$(SUFFIX) cgbmv_c.$(PSUFFIX) : zgbmv_k.c
+cgbmv_c.$(SUFFIX) cgbmv_c.$(PSUFFIX) : zgbmv_k.c
$(CC) -c -DCOMPLEX -UDOUBLE -DTRANS -DCONJ -UXCONJ $(CFLAGS) -o $(@F) $<
-cgbmv_o.$(SUFFIX) cgbmv_o.$(PSUFFIX) : zgbmv_k.c
+cgbmv_o.$(SUFFIX) cgbmv_o.$(PSUFFIX) : zgbmv_k.c
$(CC) -c -DCOMPLEX -UDOUBLE -UTRANS -UCONJ -DXCONJ $(CFLAGS) -o $(@F) $<
-cgbmv_u.$(SUFFIX) cgbmv_u.$(PSUFFIX) : zgbmv_k.c
+cgbmv_u.$(SUFFIX) cgbmv_u.$(PSUFFIX) : zgbmv_k.c
$(CC) -c -DCOMPLEX -UDOUBLE -DTRANS -UCONJ -DXCONJ $(CFLAGS) -o $(@F) $<
-cgbmv_s.$(SUFFIX) cgbmv_s.$(PSUFFIX) : zgbmv_k.c
+cgbmv_s.$(SUFFIX) cgbmv_s.$(PSUFFIX) : zgbmv_k.c
$(CC) -c -DCOMPLEX -UDOUBLE -UTRANS -DCONJ -DXCONJ $(CFLAGS) -o $(@F) $<
-cgbmv_d.$(SUFFIX) cgbmv_d.$(PSUFFIX) : zgbmv_k.c
+cgbmv_d.$(SUFFIX) cgbmv_d.$(PSUFFIX) : zgbmv_k.c
$(CC) -c -DCOMPLEX -UDOUBLE -DTRANS -DCONJ -DXCONJ $(CFLAGS) -o $(@F) $<
-zgbmv_n.$(SUFFIX) zgbmv_n.$(PSUFFIX) : zgbmv_k.c
+zgbmv_n.$(SUFFIX) zgbmv_n.$(PSUFFIX) : zgbmv_k.c
$(CC) -c -DCOMPLEX -DDOUBLE -UTRANS -UCONJ -UXCONJ $(CFLAGS) -o $(@F) $<
-zgbmv_t.$(SUFFIX) zgbmv_t.$(PSUFFIX) : zgbmv_k.c
+zgbmv_t.$(SUFFIX) zgbmv_t.$(PSUFFIX) : zgbmv_k.c
$(CC) -c -DCOMPLEX -DDOUBLE -DTRANS -UCONJ -UXCONJ $(CFLAGS) -o $(@F) $<
-zgbmv_r.$(SUFFIX) zgbmv_r.$(PSUFFIX) : zgbmv_k.c
+zgbmv_r.$(SUFFIX) zgbmv_r.$(PSUFFIX) : zgbmv_k.c
$(CC) -c -DCOMPLEX -DDOUBLE -UTRANS -DCONJ -UXCONJ $(CFLAGS) -o $(@F) $<
-zgbmv_c.$(SUFFIX) zgbmv_c.$(PSUFFIX) : zgbmv_k.c
+zgbmv_c.$(SUFFIX) zgbmv_c.$(PSUFFIX) : zgbmv_k.c
$(CC) -c -DCOMPLEX -DDOUBLE -DTRANS -DCONJ -UXCONJ $(CFLAGS) -o $(@F) $<
-zgbmv_o.$(SUFFIX) zgbmv_o.$(PSUFFIX) : zgbmv_k.c
+zgbmv_o.$(SUFFIX) zgbmv_o.$(PSUFFIX) : zgbmv_k.c
$(CC) -c -DCOMPLEX -DDOUBLE -UTRANS -UCONJ -DXCONJ $(CFLAGS) -o $(@F) $<
-zgbmv_u.$(SUFFIX) zgbmv_u.$(PSUFFIX) : zgbmv_k.c
+zgbmv_u.$(SUFFIX) zgbmv_u.$(PSUFFIX) : zgbmv_k.c
$(CC) -c -DCOMPLEX -DDOUBLE -DTRANS -UCONJ -DXCONJ $(CFLAGS) -o $(@F) $<
-zgbmv_s.$(SUFFIX) zgbmv_s.$(PSUFFIX) : zgbmv_k.c
+zgbmv_s.$(SUFFIX) zgbmv_s.$(PSUFFIX) : zgbmv_k.c
$(CC) -c -DCOMPLEX -DDOUBLE -UTRANS -DCONJ -DXCONJ $(CFLAGS) -o $(@F) $<
-zgbmv_d.$(SUFFIX) zgbmv_d.$(PSUFFIX) : zgbmv_k.c
+zgbmv_d.$(SUFFIX) zgbmv_d.$(PSUFFIX) : zgbmv_k.c
$(CC) -c -DCOMPLEX -DDOUBLE -DTRANS -DCONJ -DXCONJ $(CFLAGS) -o $(@F) $<
-xgbmv_n.$(SUFFIX) xgbmv_n.$(PSUFFIX) : zgbmv_k.c
+xgbmv_n.$(SUFFIX) xgbmv_n.$(PSUFFIX) : zgbmv_k.c
$(CC) -c -DCOMPLEX -DXDOUBLE -UTRANS -UCONJ -UXCONJ $(CFLAGS) -o $(@F) $<
-xgbmv_t.$(SUFFIX) xgbmv_t.$(PSUFFIX) : zgbmv_k.c
+xgbmv_t.$(SUFFIX) xgbmv_t.$(PSUFFIX) : zgbmv_k.c
$(CC) -c -DCOMPLEX -DXDOUBLE -DTRANS -UCONJ -UXCONJ $(CFLAGS) -o $(@F) $<
-xgbmv_r.$(SUFFIX) xgbmv_r.$(PSUFFIX) : zgbmv_k.c
+xgbmv_r.$(SUFFIX) xgbmv_r.$(PSUFFIX) : zgbmv_k.c
$(CC) -c -DCOMPLEX -DXDOUBLE -UTRANS -DCONJ -UXCONJ $(CFLAGS) -o $(@F) $<
-xgbmv_c.$(SUFFIX) xgbmv_c.$(PSUFFIX) : zgbmv_k.c
+xgbmv_c.$(SUFFIX) xgbmv_c.$(PSUFFIX) : zgbmv_k.c
$(CC) -c -DCOMPLEX -DXDOUBLE -DTRANS -DCONJ -UXCONJ $(CFLAGS) -o $(@F) $<
-xgbmv_o.$(SUFFIX) xgbmv_o.$(PSUFFIX) : zgbmv_k.c
+xgbmv_o.$(SUFFIX) xgbmv_o.$(PSUFFIX) : zgbmv_k.c
$(CC) -c -DCOMPLEX -DXDOUBLE -UTRANS -UCONJ -DXCONJ $(CFLAGS) -o $(@F) $<
-xgbmv_u.$(SUFFIX) xgbmv_u.$(PSUFFIX) : zgbmv_k.c
+xgbmv_u.$(SUFFIX) xgbmv_u.$(PSUFFIX) : zgbmv_k.c
$(CC) -c -DCOMPLEX -DXDOUBLE -DTRANS -UCONJ -DXCONJ $(CFLAGS) -o $(@F) $<
-xgbmv_s.$(SUFFIX) xgbmv_s.$(PSUFFIX) : zgbmv_k.c
+xgbmv_s.$(SUFFIX) xgbmv_s.$(PSUFFIX) : zgbmv_k.c
$(CC) -c -DCOMPLEX -DXDOUBLE -UTRANS -DCONJ -DXCONJ $(CFLAGS) -o $(@F) $<
-xgbmv_d.$(SUFFIX) xgbmv_d.$(PSUFFIX) : zgbmv_k.c
+xgbmv_d.$(SUFFIX) xgbmv_d.$(PSUFFIX) : zgbmv_k.c
$(CC) -c -DCOMPLEX -DXDOUBLE -DTRANS -DCONJ -DXCONJ $(CFLAGS) -o $(@F) $<
-sgbmv_thread_n.$(SUFFIX) sgbmv_thread_n.$(PSUFFIX) : gbmv_thread.c
+sgbmv_thread_n.$(SUFFIX) sgbmv_thread_n.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -UCOMPLEX -UDOUBLE -UTRANSA $(CFLAGS) -o $(@F) $<
-sgbmv_thread_t.$(SUFFIX) sgbmv_thread_t.$(PSUFFIX) : gbmv_thread.c
+sgbmv_thread_t.$(SUFFIX) sgbmv_thread_t.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -UCOMPLEX -UDOUBLE -DTRANSA $(CFLAGS) -o $(@F) $<
-dgbmv_thread_n.$(SUFFIX) dgbmv_thread_n.$(PSUFFIX) : gbmv_thread.c
+dgbmv_thread_n.$(SUFFIX) dgbmv_thread_n.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -UCOMPLEX -DDOUBLE -UTRANSA $(CFLAGS) -o $(@F) $<
-dgbmv_thread_t.$(SUFFIX) dgbmv_thread_t.$(PSUFFIX) : gbmv_thread.c
+dgbmv_thread_t.$(SUFFIX) dgbmv_thread_t.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -UCOMPLEX -DDOUBLE -DTRANSA $(CFLAGS) -o $(@F) $<
-qgbmv_thread_n.$(SUFFIX) qgbmv_thread_n.$(PSUFFIX) : gbmv_thread.c
+qgbmv_thread_n.$(SUFFIX) qgbmv_thread_n.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -UCOMPLEX -DXDOUBLE -UTRANSA $(CFLAGS) -o $(@F) $<
-qgbmv_thread_t.$(SUFFIX) qgbmv_thread_t.$(PSUFFIX) : gbmv_thread.c
+qgbmv_thread_t.$(SUFFIX) qgbmv_thread_t.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -UCOMPLEX -DXDOUBLE -DTRANSA $(CFLAGS) -o $(@F) $<
-cgbmv_thread_n.$(SUFFIX) cgbmv_thread_n.$(PSUFFIX) : gbmv_thread.c
+cgbmv_thread_n.$(SUFFIX) cgbmv_thread_n.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -DCOMPLEX -UDOUBLE -UTRANSA -UCONJ -UXCONJ $(CFLAGS) -o $(@F) $<
-cgbmv_thread_t.$(SUFFIX) cgbmv_thread_t.$(PSUFFIX) : gbmv_thread.c
+cgbmv_thread_t.$(SUFFIX) cgbmv_thread_t.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -DCOMPLEX -UDOUBLE -DTRANSA -UCONJ -UXCONJ $(CFLAGS) -o $(@F) $<
-cgbmv_thread_r.$(SUFFIX) cgbmv_thread_r.$(PSUFFIX) : gbmv_thread.c
+cgbmv_thread_r.$(SUFFIX) cgbmv_thread_r.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -DCOMPLEX -UDOUBLE -UTRANSA -DCONJ -UXCONJ $(CFLAGS) -o $(@F) $<
-cgbmv_thread_c.$(SUFFIX) cgbmv_thread_c.$(PSUFFIX) : gbmv_thread.c
+cgbmv_thread_c.$(SUFFIX) cgbmv_thread_c.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -DCOMPLEX -UDOUBLE -DTRANSA -DCONJ -UXCONJ $(CFLAGS) -o $(@F) $<
-cgbmv_thread_o.$(SUFFIX) cgbmv_thread_o.$(PSUFFIX) : gbmv_thread.c
+cgbmv_thread_o.$(SUFFIX) cgbmv_thread_o.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -DCOMPLEX -UDOUBLE -UTRANSA -UCONJ -DXCONJ $(CFLAGS) -o $(@F) $<
-cgbmv_thread_u.$(SUFFIX) cgbmv_thread_u.$(PSUFFIX) : gbmv_thread.c
+cgbmv_thread_u.$(SUFFIX) cgbmv_thread_u.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -DCOMPLEX -UDOUBLE -DTRANSA -UCONJ -DXCONJ $(CFLAGS) -o $(@F) $<
-cgbmv_thread_s.$(SUFFIX) cgbmv_thread_s.$(PSUFFIX) : gbmv_thread.c
+cgbmv_thread_s.$(SUFFIX) cgbmv_thread_s.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -DCOMPLEX -UDOUBLE -UTRANSA -DCONJ -DXCONJ $(CFLAGS) -o $(@F) $<
-cgbmv_thread_d.$(SUFFIX) cgbmv_thread_d.$(PSUFFIX) : gbmv_thread.c
+cgbmv_thread_d.$(SUFFIX) cgbmv_thread_d.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -DCOMPLEX -UDOUBLE -DTRANSA -DCONJ -DXCONJ $(CFLAGS) -o $(@F) $<
-zgbmv_thread_n.$(SUFFIX) zgbmv_thread_n.$(PSUFFIX) : gbmv_thread.c
+zgbmv_thread_n.$(SUFFIX) zgbmv_thread_n.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -DCOMPLEX -DDOUBLE -UTRANSA -UCONJ -UXCONJ $(CFLAGS) -o $(@F) $<
-zgbmv_thread_t.$(SUFFIX) zgbmv_thread_t.$(PSUFFIX) : gbmv_thread.c
+zgbmv_thread_t.$(SUFFIX) zgbmv_thread_t.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -DCOMPLEX -DDOUBLE -DTRANSA -UCONJ -UXCONJ $(CFLAGS) -o $(@F) $<
-zgbmv_thread_r.$(SUFFIX) zgbmv_thread_r.$(PSUFFIX) : gbmv_thread.c
+zgbmv_thread_r.$(SUFFIX) zgbmv_thread_r.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -DCOMPLEX -DDOUBLE -UTRANSA -DCONJ -UXCONJ $(CFLAGS) -o $(@F) $<
-zgbmv_thread_c.$(SUFFIX) zgbmv_thread_c.$(PSUFFIX) : gbmv_thread.c
+zgbmv_thread_c.$(SUFFIX) zgbmv_thread_c.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -DCOMPLEX -DDOUBLE -DTRANSA -DCONJ -UXCONJ $(CFLAGS) -o $(@F) $<
-zgbmv_thread_o.$(SUFFIX) zgbmv_thread_o.$(PSUFFIX) : gbmv_thread.c
+zgbmv_thread_o.$(SUFFIX) zgbmv_thread_o.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -DCOMPLEX -DDOUBLE -UTRANSA -UCONJ -DXCONJ $(CFLAGS) -o $(@F) $<
-zgbmv_thread_u.$(SUFFIX) zgbmv_thread_u.$(PSUFFIX) : gbmv_thread.c
+zgbmv_thread_u.$(SUFFIX) zgbmv_thread_u.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -DCOMPLEX -DDOUBLE -DTRANSA -UCONJ -DXCONJ $(CFLAGS) -o $(@F) $<
-zgbmv_thread_s.$(SUFFIX) zgbmv_thread_s.$(PSUFFIX) : gbmv_thread.c
+zgbmv_thread_s.$(SUFFIX) zgbmv_thread_s.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -DCOMPLEX -DDOUBLE -UTRANSA -DCONJ -DXCONJ $(CFLAGS) -o $(@F) $<
-zgbmv_thread_d.$(SUFFIX) zgbmv_thread_d.$(PSUFFIX) : gbmv_thread.c
+zgbmv_thread_d.$(SUFFIX) zgbmv_thread_d.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -DCOMPLEX -DDOUBLE -DTRANSA -DCONJ -DXCONJ $(CFLAGS) -o $(@F) $<
-xgbmv_thread_n.$(SUFFIX) xgbmv_thread_n.$(PSUFFIX) : gbmv_thread.c
+xgbmv_thread_n.$(SUFFIX) xgbmv_thread_n.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -DCOMPLEX -DXDOUBLE -UTRANSA -UCONJ -UXCONJ $(CFLAGS) -o $(@F) $<
-xgbmv_thread_t.$(SUFFIX) xgbmv_thread_t.$(PSUFFIX) : gbmv_thread.c
+xgbmv_thread_t.$(SUFFIX) xgbmv_thread_t.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -DCOMPLEX -DXDOUBLE -DTRANSA -UCONJ -UXCONJ $(CFLAGS) -o $(@F) $<
-xgbmv_thread_r.$(SUFFIX) xgbmv_thread_r.$(PSUFFIX) : gbmv_thread.c
+xgbmv_thread_r.$(SUFFIX) xgbmv_thread_r.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -DCOMPLEX -DXDOUBLE -UTRANSA -DCONJ -UXCONJ $(CFLAGS) -o $(@F) $<
-xgbmv_thread_c.$(SUFFIX) xgbmv_thread_c.$(PSUFFIX) : gbmv_thread.c
+xgbmv_thread_c.$(SUFFIX) xgbmv_thread_c.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -DCOMPLEX -DXDOUBLE -DTRANSA -DCONJ -UXCONJ $(CFLAGS) -o $(@F) $<
-xgbmv_thread_o.$(SUFFIX) xgbmv_thread_o.$(PSUFFIX) : gbmv_thread.c
+xgbmv_thread_o.$(SUFFIX) xgbmv_thread_o.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -DCOMPLEX -DXDOUBLE -UTRANSA -UCONJ -DXCONJ $(CFLAGS) -o $(@F) $<
-xgbmv_thread_u.$(SUFFIX) xgbmv_thread_u.$(PSUFFIX) : gbmv_thread.c
+xgbmv_thread_u.$(SUFFIX) xgbmv_thread_u.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -DCOMPLEX -DXDOUBLE -DTRANSA -UCONJ -DXCONJ $(CFLAGS) -o $(@F) $<
-xgbmv_thread_s.$(SUFFIX) xgbmv_thread_s.$(PSUFFIX) : gbmv_thread.c
+xgbmv_thread_s.$(SUFFIX) xgbmv_thread_s.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -DCOMPLEX -DXDOUBLE -UTRANSA -DCONJ -DXCONJ $(CFLAGS) -o $(@F) $<
-xgbmv_thread_d.$(SUFFIX) xgbmv_thread_d.$(PSUFFIX) : gbmv_thread.c
+xgbmv_thread_d.$(SUFFIX) xgbmv_thread_d.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -DCOMPLEX -DXDOUBLE -DTRANSA -DCONJ -DXCONJ $(CFLAGS) -o $(@F) $<
-sgemv_thread_n.$(SUFFIX) sgemv_thread_n.$(PSUFFIX) : gemv_thread.c ../../common.h
+sgemv_thread_n.$(SUFFIX) sgemv_thread_n.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F)
-sgemv_thread_t.$(SUFFIX) sgemv_thread_t.$(PSUFFIX) : gemv_thread.c ../../common.h
+sgemv_thread_t.$(SUFFIX) sgemv_thread_t.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DTRANSA -UCONJ -UXCONJ $< -o $(@F)
-dgemv_thread_n.$(SUFFIX) dgemv_thread_n.$(PSUFFIX) : gemv_thread.c ../../common.h
+dgemv_thread_n.$(SUFFIX) dgemv_thread_n.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F)
-dgemv_thread_t.$(SUFFIX) dgemv_thread_t.$(PSUFFIX) : gemv_thread.c ../../common.h
+dgemv_thread_t.$(SUFFIX) dgemv_thread_t.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DTRANSA -UCONJ -UXCONJ $< -o $(@F)
-qgemv_thread_n.$(SUFFIX) qgemv_thread_n.$(PSUFFIX) : gemv_thread.c ../../common.h
+qgemv_thread_n.$(SUFFIX) qgemv_thread_n.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F)
-qgemv_thread_t.$(SUFFIX) qgemv_thread_t.$(PSUFFIX) : gemv_thread.c ../../common.h
+qgemv_thread_t.$(SUFFIX) qgemv_thread_t.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DTRANSA -UCONJ -UXCONJ $< -o $(@F)
-cgemv_thread_n.$(SUFFIX) cgemv_thread_n.$(PSUFFIX) : gemv_thread.c ../../common.h
+cgemv_thread_n.$(SUFFIX) cgemv_thread_n.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F)
-cgemv_thread_t.$(SUFFIX) cgemv_thread_t.$(PSUFFIX) : gemv_thread.c ../../common.h
+cgemv_thread_t.$(SUFFIX) cgemv_thread_t.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA -UCONJ -UXCONJ $< -o $(@F)
-cgemv_thread_r.$(SUFFIX) cgemv_thread_r.$(PSUFFIX) : gemv_thread.c ../../common.h
+cgemv_thread_r.$(SUFFIX) cgemv_thread_r.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UTRANSA -DCONJ -UXCONJ $< -o $(@F)
-cgemv_thread_c.$(SUFFIX) cgemv_thread_c.$(PSUFFIX) : gemv_thread.c ../../common.h
+cgemv_thread_c.$(SUFFIX) cgemv_thread_c.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA -DCONJ -UXCONJ $< -o $(@F)
-cgemv_thread_o.$(SUFFIX) cgemv_thread_o.$(PSUFFIX) : gemv_thread.c ../../common.h
+cgemv_thread_o.$(SUFFIX) cgemv_thread_o.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UTRANSA -UCONJ -DXCONJ $< -o $(@F)
-cgemv_thread_u.$(SUFFIX) cgemv_thread_u.$(PSUFFIX) : gemv_thread.c ../../common.h
+cgemv_thread_u.$(SUFFIX) cgemv_thread_u.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA -UCONJ -DXCONJ $< -o $(@F)
-cgemv_thread_s.$(SUFFIX) cgemv_thread_s.$(PSUFFIX) : gemv_thread.c ../../common.h
+cgemv_thread_s.$(SUFFIX) cgemv_thread_s.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UTRANSA -DCONJ -DXCONJ $< -o $(@F)
-cgemv_thread_d.$(SUFFIX) cgemv_thread_d.$(PSUFFIX) : gemv_thread.c ../../common.h
+cgemv_thread_d.$(SUFFIX) cgemv_thread_d.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA -DCONJ -DXCONJ $< -o $(@F)
-zgemv_thread_n.$(SUFFIX) zgemv_thread_n.$(PSUFFIX) : gemv_thread.c ../../common.h
+zgemv_thread_n.$(SUFFIX) zgemv_thread_n.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F)
-zgemv_thread_t.$(SUFFIX) zgemv_thread_t.$(PSUFFIX) : gemv_thread.c ../../common.h
+zgemv_thread_t.$(SUFFIX) zgemv_thread_t.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA -UCONJ -UXCONJ $< -o $(@F)
-zgemv_thread_r.$(SUFFIX) zgemv_thread_r.$(PSUFFIX) : gemv_thread.c ../../common.h
+zgemv_thread_r.$(SUFFIX) zgemv_thread_r.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UTRANSA -DCONJ -UXCONJ $< -o $(@F)
-zgemv_thread_c.$(SUFFIX) zgemv_thread_c.$(PSUFFIX) : gemv_thread.c ../../common.h
+zgemv_thread_c.$(SUFFIX) zgemv_thread_c.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA -DCONJ -UXCONJ $< -o $(@F)
-zgemv_thread_o.$(SUFFIX) zgemv_thread_o.$(PSUFFIX) : gemv_thread.c ../../common.h
+zgemv_thread_o.$(SUFFIX) zgemv_thread_o.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UTRANSA -UCONJ -DXCONJ $< -o $(@F)
-zgemv_thread_u.$(SUFFIX) zgemv_thread_u.$(PSUFFIX) : gemv_thread.c ../../common.h
+zgemv_thread_u.$(SUFFIX) zgemv_thread_u.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA -UCONJ -DXCONJ $< -o $(@F)
-zgemv_thread_s.$(SUFFIX) zgemv_thread_s.$(PSUFFIX) : gemv_thread.c ../../common.h
+zgemv_thread_s.$(SUFFIX) zgemv_thread_s.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UTRANSA -DCONJ -DXCONJ $< -o $(@F)
-zgemv_thread_d.$(SUFFIX) zgemv_thread_d.$(PSUFFIX) : gemv_thread.c ../../common.h
+zgemv_thread_d.$(SUFFIX) zgemv_thread_d.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA -DCONJ -DXCONJ $< -o $(@F)
-xgemv_thread_n.$(SUFFIX) xgemv_thread_n.$(PSUFFIX) : gemv_thread.c ../../common.h
+xgemv_thread_n.$(SUFFIX) xgemv_thread_n.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F)
-xgemv_thread_t.$(SUFFIX) xgemv_thread_t.$(PSUFFIX) : gemv_thread.c ../../common.h
+xgemv_thread_t.$(SUFFIX) xgemv_thread_t.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA -UCONJ -UXCONJ $< -o $(@F)
-xgemv_thread_r.$(SUFFIX) xgemv_thread_r.$(PSUFFIX) : gemv_thread.c ../../common.h
+xgemv_thread_r.$(SUFFIX) xgemv_thread_r.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UTRANSA -DCONJ -UXCONJ $< -o $(@F)
-xgemv_thread_c.$(SUFFIX) xgemv_thread_c.$(PSUFFIX) : gemv_thread.c ../../common.h
+xgemv_thread_c.$(SUFFIX) xgemv_thread_c.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA -DCONJ -UXCONJ $< -o $(@F)
-xgemv_thread_o.$(SUFFIX) xgemv_thread_o.$(PSUFFIX) : gemv_thread.c ../../common.h
+xgemv_thread_o.$(SUFFIX) xgemv_thread_o.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UTRANSA -UCONJ -DXCONJ $< -o $(@F)
-xgemv_thread_u.$(SUFFIX) xgemv_thread_u.$(PSUFFIX) : gemv_thread.c ../../common.h
+xgemv_thread_u.$(SUFFIX) xgemv_thread_u.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA -UCONJ -DXCONJ $< -o $(@F)
-xgemv_thread_s.$(SUFFIX) xgemv_thread_s.$(PSUFFIX) : gemv_thread.c ../../common.h
+xgemv_thread_s.$(SUFFIX) xgemv_thread_s.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UTRANSA -DCONJ -DXCONJ $< -o $(@F)
-xgemv_thread_d.$(SUFFIX) xgemv_thread_d.$(PSUFFIX) : gemv_thread.c ../../common.h
+xgemv_thread_d.$(SUFFIX) xgemv_thread_d.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA -DCONJ -DXCONJ $< -o $(@F)
-sger_thread.$(SUFFIX) sger_thread.$(PSUFFIX) : ger_thread.c ../../common.h
+sger_thread.$(SUFFIX) sger_thread.$(PSUFFIX) : ger_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UCONJ -UXCONJ $< -o $(@F)
-dger_thread.$(SUFFIX) dger_thread.$(PSUFFIX) : ger_thread.c ../../common.h
+dger_thread.$(SUFFIX) dger_thread.$(PSUFFIX) : ger_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -UCONJ -UXCONJ $< -o $(@F)
-qger_thread.$(SUFFIX) qger_thread.$(PSUFFIX) : ger_thread.c ../../common.h
+qger_thread.$(SUFFIX) qger_thread.$(PSUFFIX) : ger_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -UCONJ -UXCONJ $< -o $(@F)
-cger_thread_U.$(SUFFIX) cger_thread_U.$(PSUFFIX) : ger_thread.c ../../common.h
+cger_thread_U.$(SUFFIX) cger_thread_U.$(PSUFFIX) : ger_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UCONJ -UXCONJ $< -o $(@F)
-cger_thread_C.$(SUFFIX) cger_thread_C.$(PSUFFIX) : ger_thread.c ../../common.h
+cger_thread_C.$(SUFFIX) cger_thread_C.$(PSUFFIX) : ger_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DCONJ -UXCONJ $< -o $(@F)
-cger_thread_V.$(SUFFIX) cger_thread_V.$(PSUFFIX) : ger_thread.c ../../common.h
+cger_thread_V.$(SUFFIX) cger_thread_V.$(PSUFFIX) : ger_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UCONJ -DXCONJ $< -o $(@F)
-cger_thread_D.$(SUFFIX) cger_thread_D.$(PSUFFIX) : ger_thread.c ../../common.h
+cger_thread_D.$(SUFFIX) cger_thread_D.$(PSUFFIX) : ger_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DCONJ -DXCONJ $< -o $(@F)
-zger_thread_U.$(SUFFIX) zger_thread_U.$(PSUFFIX) : ger_thread.c ../../common.h
+zger_thread_U.$(SUFFIX) zger_thread_U.$(PSUFFIX) : ger_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UCONJ -UXCONJ $< -o $(@F)
-zger_thread_C.$(SUFFIX) zger_thread_C.$(PSUFFIX) : ger_thread.c ../../common.h
+zger_thread_C.$(SUFFIX) zger_thread_C.$(PSUFFIX) : ger_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DCONJ -UXCONJ $< -o $(@F)
-zger_thread_V.$(SUFFIX) zger_thread_V.$(PSUFFIX) : ger_thread.c ../../common.h
+zger_thread_V.$(SUFFIX) zger_thread_V.$(PSUFFIX) : ger_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UCONJ -DXCONJ $< -o $(@F)
-zger_thread_D.$(SUFFIX) zger_thread_D.$(PSUFFIX) : ger_thread.c ../../common.h
+zger_thread_D.$(SUFFIX) zger_thread_D.$(PSUFFIX) : ger_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DCONJ -DXCONJ $< -o $(@F)
-xger_thread_U.$(SUFFIX) xger_thread_U.$(PSUFFIX) : ger_thread.c ../../common.h
+xger_thread_U.$(SUFFIX) xger_thread_U.$(PSUFFIX) : ger_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UCONJ -UXCONJ $< -o $(@F)
-xger_thread_C.$(SUFFIX) xger_thread_C.$(PSUFFIX) : ger_thread.c ../../common.h
+xger_thread_C.$(SUFFIX) xger_thread_C.$(PSUFFIX) : ger_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DCONJ -UXCONJ $< -o $(@F)
-xger_thread_V.$(SUFFIX) xger_thread_V.$(PSUFFIX) : ger_thread.c ../../common.h
+xger_thread_V.$(SUFFIX) xger_thread_V.$(PSUFFIX) : ger_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UCONJ -DXCONJ $< -o $(@F)
-xger_thread_D.$(SUFFIX) xger_thread_D.$(PSUFFIX) : ger_thread.c ../../common.h
+xger_thread_D.$(SUFFIX) xger_thread_D.$(PSUFFIX) : ger_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DCONJ -DXCONJ $< -o $(@F)
-ssymv_thread_U.$(SUFFIX) ssymv_thread_U.$(PSUFFIX) : symv_thread.c ../../param.h
+ssymv_thread_U.$(SUFFIX) ssymv_thread_U.$(PSUFFIX) : symv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -ULOWER $< -o $(@F)
-ssymv_thread_L.$(SUFFIX) ssymv_thread_L.$(PSUFFIX) : symv_thread.c ../../param.h
+ssymv_thread_L.$(SUFFIX) ssymv_thread_L.$(PSUFFIX) : symv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DLOWER $< -o $(@F)
-dsymv_thread_U.$(SUFFIX) dsymv_thread_U.$(PSUFFIX) : symv_thread.c ../../param.h
+dsymv_thread_U.$(SUFFIX) dsymv_thread_U.$(PSUFFIX) : symv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -ULOWER $< -o $(@F)
-dsymv_thread_L.$(SUFFIX) dsymv_thread_L.$(PSUFFIX) : symv_thread.c ../../param.h
+dsymv_thread_L.$(SUFFIX) dsymv_thread_L.$(PSUFFIX) : symv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DLOWER $< -o $(@F)
-qsymv_thread_U.$(SUFFIX) qsymv_thread_U.$(PSUFFIX) : symv_thread.c ../../param.h
+qsymv_thread_U.$(SUFFIX) qsymv_thread_U.$(PSUFFIX) : symv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -ULOWER $< -o $(@F)
-qsymv_thread_L.$(SUFFIX) qsymv_thread_L.$(PSUFFIX) : symv_thread.c ../../param.h
+qsymv_thread_L.$(SUFFIX) qsymv_thread_L.$(PSUFFIX) : symv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DLOWER $< -o $(@F)
-csymv_thread_U.$(SUFFIX) csymv_thread_U.$(PSUFFIX) : symv_thread.c ../../param.h
+csymv_thread_U.$(SUFFIX) csymv_thread_U.$(PSUFFIX) : symv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER $< -o $(@F)
-csymv_thread_L.$(SUFFIX) csymv_thread_L.$(PSUFFIX) : symv_thread.c ../../param.h
+csymv_thread_L.$(SUFFIX) csymv_thread_L.$(PSUFFIX) : symv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER $< -o $(@F)
-zsymv_thread_U.$(SUFFIX) zsymv_thread_U.$(PSUFFIX) : symv_thread.c ../../param.h
+zsymv_thread_U.$(SUFFIX) zsymv_thread_U.$(PSUFFIX) : symv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER $< -o $(@F)
-zsymv_thread_L.$(SUFFIX) zsymv_thread_L.$(PSUFFIX) : symv_thread.c ../../param.h
+zsymv_thread_L.$(SUFFIX) zsymv_thread_L.$(PSUFFIX) : symv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER $< -o $(@F)
-xsymv_thread_U.$(SUFFIX) xsymv_thread_U.$(PSUFFIX) : symv_thread.c ../../param.h
+xsymv_thread_U.$(SUFFIX) xsymv_thread_U.$(PSUFFIX) : symv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER $< -o $(@F)
-xsymv_thread_L.$(SUFFIX) xsymv_thread_L.$(PSUFFIX) : symv_thread.c ../../param.h
+xsymv_thread_L.$(SUFFIX) xsymv_thread_L.$(PSUFFIX) : symv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER $< -o $(@F)
-chemv_thread_U.$(SUFFIX) chemv_thread_U.$(PSUFFIX) : symv_thread.c ../../param.h
+chemv_thread_U.$(SUFFIX) chemv_thread_U.$(PSUFFIX) : symv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER -DHEMV $< -o $(@F)
-chemv_thread_L.$(SUFFIX) chemv_thread_L.$(PSUFFIX) : symv_thread.c ../../param.h
+chemv_thread_L.$(SUFFIX) chemv_thread_L.$(PSUFFIX) : symv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER -DHEMV $< -o $(@F)
-chemv_thread_V.$(SUFFIX) chemv_thread_V.$(PSUFFIX) : symv_thread.c ../../param.h
+chemv_thread_V.$(SUFFIX) chemv_thread_V.$(PSUFFIX) : symv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER -DHEMVREV $< -o $(@F)
-chemv_thread_M.$(SUFFIX) chemv_thread_M.$(PSUFFIX) : symv_thread.c ../../param.h
+chemv_thread_M.$(SUFFIX) chemv_thread_M.$(PSUFFIX) : symv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER -DHEMVREV $< -o $(@F)
-zhemv_thread_U.$(SUFFIX) zhemv_thread_U.$(PSUFFIX) : symv_thread.c ../../param.h
+zhemv_thread_U.$(SUFFIX) zhemv_thread_U.$(PSUFFIX) : symv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER -DHEMV $< -o $(@F)
-zhemv_thread_L.$(SUFFIX) zhemv_thread_L.$(PSUFFIX) : symv_thread.c ../../param.h
+zhemv_thread_L.$(SUFFIX) zhemv_thread_L.$(PSUFFIX) : symv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER -DHEMV $< -o $(@F)
-zhemv_thread_V.$(SUFFIX) zhemv_thread_V.$(PSUFFIX) : symv_thread.c ../../param.h
+zhemv_thread_V.$(SUFFIX) zhemv_thread_V.$(PSUFFIX) : symv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER -DHEMVREV $< -o $(@F)
-zhemv_thread_M.$(SUFFIX) zhemv_thread_M.$(PSUFFIX) : symv_thread.c ../../param.h
+zhemv_thread_M.$(SUFFIX) zhemv_thread_M.$(PSUFFIX) : symv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER -DHEMVREV $< -o $(@F)
-xhemv_thread_U.$(SUFFIX) xhemv_thread_U.$(PSUFFIX) : symv_thread.c ../../param.h
+xhemv_thread_U.$(SUFFIX) xhemv_thread_U.$(PSUFFIX) : symv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER -DHEMV $< -o $(@F)
-xhemv_thread_L.$(SUFFIX) xhemv_thread_L.$(PSUFFIX) : symv_thread.c ../../param.h
+xhemv_thread_L.$(SUFFIX) xhemv_thread_L.$(PSUFFIX) : symv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER -DHEMV $< -o $(@F)
-xhemv_thread_V.$(SUFFIX) xhemv_thread_V.$(PSUFFIX) : symv_thread.c ../../param.h
+xhemv_thread_V.$(SUFFIX) xhemv_thread_V.$(PSUFFIX) : symv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER -DHEMVREV $< -o $(@F)
-xhemv_thread_M.$(SUFFIX) xhemv_thread_M.$(PSUFFIX) : symv_thread.c ../../param.h
+xhemv_thread_M.$(SUFFIX) xhemv_thread_M.$(PSUFFIX) : symv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER -DHEMVREV $< -o $(@F)
-ssyr_thread_U.$(SUFFIX) ssyr_thread_U.$(PSUFFIX) : syr_thread.c ../../param.h
+ssyr_thread_U.$(SUFFIX) ssyr_thread_U.$(PSUFFIX) : syr_thread.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -ULOWER $< -o $(@F)
-ssyr_thread_L.$(SUFFIX) ssyr_thread_L.$(PSUFFIX) : syr_thread.c ../../param.h
+ssyr_thread_L.$(SUFFIX) ssyr_thread_L.$(PSUFFIX) : syr_thread.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DLOWER $< -o $(@F)
-dsyr_thread_U.$(SUFFIX) dsyr_thread_U.$(PSUFFIX) : syr_thread.c ../../param.h
+dsyr_thread_U.$(SUFFIX) dsyr_thread_U.$(PSUFFIX) : syr_thread.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -ULOWER $< -o $(@F)
-dsyr_thread_L.$(SUFFIX) dsyr_thread_L.$(PSUFFIX) : syr_thread.c ../../param.h
+dsyr_thread_L.$(SUFFIX) dsyr_thread_L.$(PSUFFIX) : syr_thread.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DLOWER $< -o $(@F)
-qsyr_thread_U.$(SUFFIX) qsyr_thread_U.$(PSUFFIX) : syr_thread.c ../../param.h
+qsyr_thread_U.$(SUFFIX) qsyr_thread_U.$(PSUFFIX) : syr_thread.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -ULOWER $< -o $(@F)
-qsyr_thread_L.$(SUFFIX) qsyr_thread_L.$(PSUFFIX) : syr_thread.c ../../param.h
+qsyr_thread_L.$(SUFFIX) qsyr_thread_L.$(PSUFFIX) : syr_thread.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DLOWER $< -o $(@F)
-csyr_thread_U.$(SUFFIX) csyr_thread_U.$(PSUFFIX) : syr_thread.c ../../param.h
+csyr_thread_U.$(SUFFIX) csyr_thread_U.$(PSUFFIX) : syr_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER $< -o $(@F)
-csyr_thread_L.$(SUFFIX) csyr_thread_L.$(PSUFFIX) : syr_thread.c ../../param.h
+csyr_thread_L.$(SUFFIX) csyr_thread_L.$(PSUFFIX) : syr_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER $< -o $(@F)
-zsyr_thread_U.$(SUFFIX) zsyr_thread_U.$(PSUFFIX) : syr_thread.c ../../param.h
+zsyr_thread_U.$(SUFFIX) zsyr_thread_U.$(PSUFFIX) : syr_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER $< -o $(@F)
-zsyr_thread_L.$(SUFFIX) zsyr_thread_L.$(PSUFFIX) : syr_thread.c ../../param.h
+zsyr_thread_L.$(SUFFIX) zsyr_thread_L.$(PSUFFIX) : syr_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER $< -o $(@F)
-xsyr_thread_U.$(SUFFIX) xsyr_thread_U.$(PSUFFIX) : syr_thread.c ../../param.h
+xsyr_thread_U.$(SUFFIX) xsyr_thread_U.$(PSUFFIX) : syr_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER $< -o $(@F)
-xsyr_thread_L.$(SUFFIX) xsyr_thread_L.$(PSUFFIX) : syr_thread.c ../../param.h
+xsyr_thread_L.$(SUFFIX) xsyr_thread_L.$(PSUFFIX) : syr_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER $< -o $(@F)
-cher_thread_U.$(SUFFIX) cher_thread_U.$(PSUFFIX) : syr_thread.c ../../param.h
+cher_thread_U.$(SUFFIX) cher_thread_U.$(PSUFFIX) : syr_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER -DHER $< -o $(@F)
-cher_thread_L.$(SUFFIX) cher_thread_L.$(PSUFFIX) : syr_thread.c ../../param.h
+cher_thread_L.$(SUFFIX) cher_thread_L.$(PSUFFIX) : syr_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER -DHER $< -o $(@F)
-cher_thread_V.$(SUFFIX) cher_thread_V.$(PSUFFIX) : syr_thread.c ../../param.h
+cher_thread_V.$(SUFFIX) cher_thread_V.$(PSUFFIX) : syr_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER -DHERREV $< -o $(@F)
-cher_thread_M.$(SUFFIX) cher_thread_M.$(PSUFFIX) : syr_thread.c ../../param.h
+cher_thread_M.$(SUFFIX) cher_thread_M.$(PSUFFIX) : syr_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER -DHERREV $< -o $(@F)
-zher_thread_U.$(SUFFIX) zher_thread_U.$(PSUFFIX) : syr_thread.c ../../param.h
+zher_thread_U.$(SUFFIX) zher_thread_U.$(PSUFFIX) : syr_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER -DHER $< -o $(@F)
-zher_thread_L.$(SUFFIX) zher_thread_L.$(PSUFFIX) : syr_thread.c ../../param.h
+zher_thread_L.$(SUFFIX) zher_thread_L.$(PSUFFIX) : syr_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER -DHER $< -o $(@F)
-zher_thread_V.$(SUFFIX) zher_thread_V.$(PSUFFIX) : syr_thread.c ../../param.h
+zher_thread_V.$(SUFFIX) zher_thread_V.$(PSUFFIX) : syr_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER -DHERREV $< -o $(@F)
-zher_thread_M.$(SUFFIX) zher_thread_M.$(PSUFFIX) : syr_thread.c ../../param.h
+zher_thread_M.$(SUFFIX) zher_thread_M.$(PSUFFIX) : syr_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER -DHERREV $< -o $(@F)
-xher_thread_U.$(SUFFIX) xher_thread_U.$(PSUFFIX) : syr_thread.c ../../param.h
+xher_thread_U.$(SUFFIX) xher_thread_U.$(PSUFFIX) : syr_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER -DHER $< -o $(@F)
-xher_thread_L.$(SUFFIX) xher_thread_L.$(PSUFFIX) : syr_thread.c ../../param.h
+xher_thread_L.$(SUFFIX) xher_thread_L.$(PSUFFIX) : syr_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER -DHER $< -o $(@F)
-xher_thread_V.$(SUFFIX) xher_thread_V.$(PSUFFIX) : syr_thread.c ../../param.h
+xher_thread_V.$(SUFFIX) xher_thread_V.$(PSUFFIX) : syr_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER -DHERREV $< -o $(@F)
-xher_thread_M.$(SUFFIX) xher_thread_M.$(PSUFFIX) : syr_thread.c ../../param.h
+xher_thread_M.$(SUFFIX) xher_thread_M.$(PSUFFIX) : syr_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER -DHERREV $< -o $(@F)
-ssyr2_thread_U.$(SUFFIX) ssyr2_thread_U.$(PSUFFIX) : syr2_thread.c ../../param.h
+ssyr2_thread_U.$(SUFFIX) ssyr2_thread_U.$(PSUFFIX) : syr2_thread.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -ULOWER $< -o $(@F)
-ssyr2_thread_L.$(SUFFIX) ssyr2_thread_L.$(PSUFFIX) : syr2_thread.c ../../param.h
+ssyr2_thread_L.$(SUFFIX) ssyr2_thread_L.$(PSUFFIX) : syr2_thread.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DLOWER $< -o $(@F)
-dsyr2_thread_U.$(SUFFIX) dsyr2_thread_U.$(PSUFFIX) : syr2_thread.c ../../param.h
+dsyr2_thread_U.$(SUFFIX) dsyr2_thread_U.$(PSUFFIX) : syr2_thread.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -ULOWER $< -o $(@F)
-dsyr2_thread_L.$(SUFFIX) dsyr2_thread_L.$(PSUFFIX) : syr2_thread.c ../../param.h
+dsyr2_thread_L.$(SUFFIX) dsyr2_thread_L.$(PSUFFIX) : syr2_thread.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DLOWER $< -o $(@F)
-qsyr2_thread_U.$(SUFFIX) qsyr2_thread_U.$(PSUFFIX) : syr2_thread.c ../../param.h
+qsyr2_thread_U.$(SUFFIX) qsyr2_thread_U.$(PSUFFIX) : syr2_thread.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -ULOWER $< -o $(@F)
-qsyr2_thread_L.$(SUFFIX) qsyr2_thread_L.$(PSUFFIX) : syr2_thread.c ../../param.h
+qsyr2_thread_L.$(SUFFIX) qsyr2_thread_L.$(PSUFFIX) : syr2_thread.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DLOWER $< -o $(@F)
-csyr2_thread_U.$(SUFFIX) csyr2_thread_U.$(PSUFFIX) : syr2_thread.c ../../param.h
+csyr2_thread_U.$(SUFFIX) csyr2_thread_U.$(PSUFFIX) : syr2_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER $< -o $(@F)
-csyr2_thread_L.$(SUFFIX) csyr2_thread_L.$(PSUFFIX) : syr2_thread.c ../../param.h
+csyr2_thread_L.$(SUFFIX) csyr2_thread_L.$(PSUFFIX) : syr2_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER $< -o $(@F)
-zsyr2_thread_U.$(SUFFIX) zsyr2_thread_U.$(PSUFFIX) : syr2_thread.c ../../param.h
+zsyr2_thread_U.$(SUFFIX) zsyr2_thread_U.$(PSUFFIX) : syr2_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER $< -o $(@F)
-zsyr2_thread_L.$(SUFFIX) zsyr2_thread_L.$(PSUFFIX) : syr2_thread.c ../../param.h
+zsyr2_thread_L.$(SUFFIX) zsyr2_thread_L.$(PSUFFIX) : syr2_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER $< -o $(@F)
-xsyr2_thread_U.$(SUFFIX) xsyr2_thread_U.$(PSUFFIX) : syr2_thread.c ../../param.h
+xsyr2_thread_U.$(SUFFIX) xsyr2_thread_U.$(PSUFFIX) : syr2_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER $< -o $(@F)
-xsyr2_thread_L.$(SUFFIX) xsyr2_thread_L.$(PSUFFIX) : syr2_thread.c ../../param.h
+xsyr2_thread_L.$(SUFFIX) xsyr2_thread_L.$(PSUFFIX) : syr2_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER $< -o $(@F)
-cher2_thread_U.$(SUFFIX) cher2_thread_U.$(PSUFFIX) : syr2_thread.c ../../param.h
+cher2_thread_U.$(SUFFIX) cher2_thread_U.$(PSUFFIX) : syr2_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER -DHER $< -o $(@F)
-cher2_thread_L.$(SUFFIX) cher2_thread_L.$(PSUFFIX) : syr2_thread.c ../../param.h
+cher2_thread_L.$(SUFFIX) cher2_thread_L.$(PSUFFIX) : syr2_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER -DHER $< -o $(@F)
-cher2_thread_V.$(SUFFIX) cher2_thread_V.$(PSUFFIX) : syr2_thread.c ../../param.h
+cher2_thread_V.$(SUFFIX) cher2_thread_V.$(PSUFFIX) : syr2_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER -DHERREV $< -o $(@F)
-cher2_thread_M.$(SUFFIX) cher2_thread_M.$(PSUFFIX) : syr2_thread.c ../../param.h
+cher2_thread_M.$(SUFFIX) cher2_thread_M.$(PSUFFIX) : syr2_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER -DHERREV $< -o $(@F)
-zher2_thread_U.$(SUFFIX) zher2_thread_U.$(PSUFFIX) : syr2_thread.c ../../param.h
+zher2_thread_U.$(SUFFIX) zher2_thread_U.$(PSUFFIX) : syr2_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER -DHER $< -o $(@F)
-zher2_thread_L.$(SUFFIX) zher2_thread_L.$(PSUFFIX) : syr2_thread.c ../../param.h
+zher2_thread_L.$(SUFFIX) zher2_thread_L.$(PSUFFIX) : syr2_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER -DHER $< -o $(@F)
-zher2_thread_V.$(SUFFIX) zher2_thread_V.$(PSUFFIX) : syr2_thread.c ../../param.h
+zher2_thread_V.$(SUFFIX) zher2_thread_V.$(PSUFFIX) : syr2_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER -DHERREV $< -o $(@F)
-zher2_thread_M.$(SUFFIX) zher2_thread_M.$(PSUFFIX) : syr2_thread.c ../../param.h
+zher2_thread_M.$(SUFFIX) zher2_thread_M.$(PSUFFIX) : syr2_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER -DHERREV $< -o $(@F)
-xher2_thread_U.$(SUFFIX) xher2_thread_U.$(PSUFFIX) : syr2_thread.c ../../param.h
+xher2_thread_U.$(SUFFIX) xher2_thread_U.$(PSUFFIX) : syr2_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER -DHER $< -o $(@F)
-xher2_thread_L.$(SUFFIX) xher2_thread_L.$(PSUFFIX) : syr2_thread.c ../../param.h
+xher2_thread_L.$(SUFFIX) xher2_thread_L.$(PSUFFIX) : syr2_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER -DHER $< -o $(@F)
-xher2_thread_V.$(SUFFIX) xher2_thread_V.$(PSUFFIX) : syr2_thread.c ../../param.h
+xher2_thread_V.$(SUFFIX) xher2_thread_V.$(PSUFFIX) : syr2_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER -DHERREV $< -o $(@F)
-xher2_thread_M.$(SUFFIX) xher2_thread_M.$(PSUFFIX) : syr2_thread.c ../../param.h
+xher2_thread_M.$(SUFFIX) xher2_thread_M.$(PSUFFIX) : syr2_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER -DHERREV $< -o $(@F)
-chbmv_U.$(SUFFIX) chbmv_U.$(PSUFFIX) : zhbmv_k.c ../../param.h
+chbmv_U.$(SUFFIX) chbmv_U.$(PSUFFIX) : zhbmv_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER $< -o $(@F)
-chbmv_L.$(SUFFIX) chbmv_L.$(PSUFFIX) : zhbmv_k.c ../../param.h
+chbmv_L.$(SUFFIX) chbmv_L.$(PSUFFIX) : zhbmv_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER $< -o $(@F)
-chbmv_V.$(SUFFIX) chbmv_V.$(PSUFFIX) : zhbmv_k.c ../../param.h
+chbmv_V.$(SUFFIX) chbmv_V.$(PSUFFIX) : zhbmv_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER -DHEMVREV $< -o $(@F)
-chbmv_M.$(SUFFIX) chbmv_M.$(PSUFFIX) : zhbmv_k.c ../../param.h
+chbmv_M.$(SUFFIX) chbmv_M.$(PSUFFIX) : zhbmv_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER -DHEMVREV $< -o $(@F)
-zhbmv_U.$(SUFFIX) zhbmv_U.$(PSUFFIX) : zhbmv_k.c ../../param.h
+zhbmv_U.$(SUFFIX) zhbmv_U.$(PSUFFIX) : zhbmv_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER $< -o $(@F)
-zhbmv_L.$(SUFFIX) zhbmv_L.$(PSUFFIX) : zhbmv_k.c ../../param.h
+zhbmv_L.$(SUFFIX) zhbmv_L.$(PSUFFIX) : zhbmv_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER $< -o $(@F)
-zhbmv_V.$(SUFFIX) zhbmv_V.$(PSUFFIX) : zhbmv_k.c ../../param.h
+zhbmv_V.$(SUFFIX) zhbmv_V.$(PSUFFIX) : zhbmv_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER -DHEMVREV $< -o $(@F)
-zhbmv_M.$(SUFFIX) zhbmv_M.$(PSUFFIX) : zhbmv_k.c ../../param.h
+zhbmv_M.$(SUFFIX) zhbmv_M.$(PSUFFIX) : zhbmv_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER -DHEMVREV $< -o $(@F)
-xhbmv_U.$(SUFFIX) xhbmv_U.$(PSUFFIX) : zhbmv_k.c ../../param.h
+xhbmv_U.$(SUFFIX) xhbmv_U.$(PSUFFIX) : zhbmv_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER $< -o $(@F)
-xhbmv_L.$(SUFFIX) xhbmv_L.$(PSUFFIX) : zhbmv_k.c ../../param.h
+xhbmv_L.$(SUFFIX) xhbmv_L.$(PSUFFIX) : zhbmv_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER $< -o $(@F)
-xhbmv_V.$(SUFFIX) xhbmv_V.$(PSUFFIX) : zhbmv_k.c ../../param.h
+xhbmv_V.$(SUFFIX) xhbmv_V.$(PSUFFIX) : zhbmv_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER -DHEMVREV $< -o $(@F)
-xhbmv_M.$(SUFFIX) xhbmv_M.$(PSUFFIX) : zhbmv_k.c ../../param.h
+xhbmv_M.$(SUFFIX) xhbmv_M.$(PSUFFIX) : zhbmv_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER -DHEMVREV $< -o $(@F)
-chbmv_thread_U.$(SUFFIX) chbmv_thread_U.$(PSUFFIX) : sbmv_thread.c ../../param.h
+chbmv_thread_U.$(SUFFIX) chbmv_thread_U.$(PSUFFIX) : sbmv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER -DHEMV $< -o $(@F)
-chbmv_thread_L.$(SUFFIX) chbmv_thread_L.$(PSUFFIX) : sbmv_thread.c ../../param.h
+chbmv_thread_L.$(SUFFIX) chbmv_thread_L.$(PSUFFIX) : sbmv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER -DHEMV $< -o $(@F)
-chbmv_thread_V.$(SUFFIX) chbmv_thread_V.$(PSUFFIX) : sbmv_thread.c ../../param.h
+chbmv_thread_V.$(SUFFIX) chbmv_thread_V.$(PSUFFIX) : sbmv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER -DHEMVREV $< -o $(@F)
-chbmv_thread_M.$(SUFFIX) chbmv_thread_M.$(PSUFFIX) : sbmv_thread.c ../../param.h
+chbmv_thread_M.$(SUFFIX) chbmv_thread_M.$(PSUFFIX) : sbmv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER -DHEMVREV $< -o $(@F)
-zhbmv_thread_U.$(SUFFIX) zhbmv_thread_U.$(PSUFFIX) : sbmv_thread.c ../../param.h
+zhbmv_thread_U.$(SUFFIX) zhbmv_thread_U.$(PSUFFIX) : sbmv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER -DHEMV $< -o $(@F)
-zhbmv_thread_L.$(SUFFIX) zhbmv_thread_L.$(PSUFFIX) : sbmv_thread.c ../../param.h
+zhbmv_thread_L.$(SUFFIX) zhbmv_thread_L.$(PSUFFIX) : sbmv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER -DHEMV $< -o $(@F)
-zhbmv_thread_V.$(SUFFIX) zhbmv_thread_V.$(PSUFFIX) : sbmv_thread.c ../../param.h
+zhbmv_thread_V.$(SUFFIX) zhbmv_thread_V.$(PSUFFIX) : sbmv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER -DHEMVREV $< -o $(@F)
-zhbmv_thread_M.$(SUFFIX) zhbmv_thread_M.$(PSUFFIX) : sbmv_thread.c ../../param.h
+zhbmv_thread_M.$(SUFFIX) zhbmv_thread_M.$(PSUFFIX) : sbmv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER -DHEMVREV $< -o $(@F)
-xhbmv_thread_U.$(SUFFIX) xhbmv_thread_U.$(PSUFFIX) : sbmv_thread.c ../../param.h
+xhbmv_thread_U.$(SUFFIX) xhbmv_thread_U.$(PSUFFIX) : sbmv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER -DHEMV $< -o $(@F)
-xhbmv_thread_L.$(SUFFIX) xhbmv_thread_L.$(PSUFFIX) : sbmv_thread.c ../../param.h
+xhbmv_thread_L.$(SUFFIX) xhbmv_thread_L.$(PSUFFIX) : sbmv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER -DHEMV $< -o $(@F)
-xhbmv_thread_V.$(SUFFIX) xhbmv_thread_V.$(PSUFFIX) : sbmv_thread.c ../../param.h
+xhbmv_thread_V.$(SUFFIX) xhbmv_thread_V.$(PSUFFIX) : sbmv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER -DHEMVREV $< -o $(@F)
-xhbmv_thread_M.$(SUFFIX) xhbmv_thread_M.$(PSUFFIX) : sbmv_thread.c ../../param.h
+xhbmv_thread_M.$(SUFFIX) xhbmv_thread_M.$(PSUFFIX) : sbmv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER -DHEMVREV $< -o $(@F)
-cher_U.$(SUFFIX) cher_U.$(PSUFFIX) : zher_k.c ../../common.h
+cher_U.$(SUFFIX) cher_U.$(PSUFFIX) : zher_k.c ../../common.h
$(CC) -c $(CFLAGS) -UDOUBLE -ULOWER $< -o $(@F)
-cher_L.$(SUFFIX) cher_L.$(PSUFFIX) : zher_k.c ../../common.h
+cher_L.$(SUFFIX) cher_L.$(PSUFFIX) : zher_k.c ../../common.h
$(CC) -c $(CFLAGS) -UDOUBLE -DLOWER $< -o $(@F)
-cher_V.$(SUFFIX) cher_V.$(PSUFFIX) : zher_k.c ../../common.h
+cher_V.$(SUFFIX) cher_V.$(PSUFFIX) : zher_k.c ../../common.h
$(CC) -c $(CFLAGS) -UDOUBLE -ULOWER -DHEMVREV $< -o $(@F)
-cher_M.$(SUFFIX) cher_M.$(PSUFFIX) : zher_k.c ../../common.h
+cher_M.$(SUFFIX) cher_M.$(PSUFFIX) : zher_k.c ../../common.h
$(CC) -c $(CFLAGS) -UDOUBLE -DLOWER -DHEMVREV $< -o $(@F)
-zher_U.$(SUFFIX) zher_U.$(PSUFFIX) : zher_k.c ../../common.h
+zher_U.$(SUFFIX) zher_U.$(PSUFFIX) : zher_k.c ../../common.h
$(CC) -c $(CFLAGS) -DDOUBLE -ULOWER $< -o $(@F)
-zher_L.$(SUFFIX) zher_L.$(PSUFFIX) : zher_k.c ../../common.h
+zher_L.$(SUFFIX) zher_L.$(PSUFFIX) : zher_k.c ../../common.h
$(CC) -c $(CFLAGS) -DDOUBLE -DLOWER $< -o $(@F)
-zher_V.$(SUFFIX) zher_V.$(PSUFFIX) : zher_k.c ../../common.h
+zher_V.$(SUFFIX) zher_V.$(PSUFFIX) : zher_k.c ../../common.h
$(CC) -c $(CFLAGS) -DDOUBLE -ULOWER -DHEMVREV $< -o $(@F)
-zher_M.$(SUFFIX) zher_M.$(PSUFFIX) : zher_k.c ../../common.h
+zher_M.$(SUFFIX) zher_M.$(PSUFFIX) : zher_k.c ../../common.h
$(CC) -c $(CFLAGS) -DDOUBLE -DLOWER -DHEMVREV $< -o $(@F)
-xher_U.$(SUFFIX) xher_U.$(PSUFFIX) : zher_k.c ../../common.h
+xher_U.$(SUFFIX) xher_U.$(PSUFFIX) : zher_k.c ../../common.h
$(CC) -c $(CFLAGS) -DXDOUBLE -ULOWER $< -o $(@F)
-xher_L.$(SUFFIX) xher_L.$(PSUFFIX) : zher_k.c ../../common.h
+xher_L.$(SUFFIX) xher_L.$(PSUFFIX) : zher_k.c ../../common.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DLOWER $< -o $(@F)
-xher_V.$(SUFFIX) xher_V.$(PSUFFIX) : zher_k.c ../../common.h
+xher_V.$(SUFFIX) xher_V.$(PSUFFIX) : zher_k.c ../../common.h
$(CC) -c $(CFLAGS) -DXDOUBLE -ULOWER -DHEMVREV $< -o $(@F)
-xher_M.$(SUFFIX) xher_M.$(PSUFFIX) : zher_k.c ../../common.h
+xher_M.$(SUFFIX) xher_M.$(PSUFFIX) : zher_k.c ../../common.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DLOWER -DHEMVREV $< -o $(@F)
-cher2_U.$(SUFFIX) cher2_U.$(PSUFFIX) : zher2_k.c ../../param.h
+cher2_U.$(SUFFIX) cher2_U.$(PSUFFIX) : zher2_k.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -ULOWER $< -o $(@F)
-cher2_L.$(SUFFIX) cher2_L.$(PSUFFIX) : zher2_k.c ../../param.h
+cher2_L.$(SUFFIX) cher2_L.$(PSUFFIX) : zher2_k.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -DLOWER $< -o $(@F)
-cher2_V.$(SUFFIX) cher2_V.$(PSUFFIX) : zher2_k.c ../../param.h
+cher2_V.$(SUFFIX) cher2_V.$(PSUFFIX) : zher2_k.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -ULOWER -DHEMVREV $< -o $(@F)
-cher2_M.$(SUFFIX) cher2_M.$(PSUFFIX) : zher2_k.c ../../param.h
+cher2_M.$(SUFFIX) cher2_M.$(PSUFFIX) : zher2_k.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -DLOWER -DHEMVREV $< -o $(@F)
-zher2_U.$(SUFFIX) zher2_U.$(PSUFFIX) : zher2_k.c ../../param.h
+zher2_U.$(SUFFIX) zher2_U.$(PSUFFIX) : zher2_k.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -ULOWER $< -o $(@F)
-zher2_L.$(SUFFIX) zher2_L.$(PSUFFIX) : zher2_k.c ../../param.h
+zher2_L.$(SUFFIX) zher2_L.$(PSUFFIX) : zher2_k.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -DLOWER $< -o $(@F)
-zher2_V.$(SUFFIX) zher2_V.$(PSUFFIX) : zher2_k.c ../../param.h
+zher2_V.$(SUFFIX) zher2_V.$(PSUFFIX) : zher2_k.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -ULOWER -DHEMVREV $< -o $(@F)
-zher2_M.$(SUFFIX) zher2_M.$(PSUFFIX) : zher2_k.c ../../param.h
+zher2_M.$(SUFFIX) zher2_M.$(PSUFFIX) : zher2_k.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -DLOWER -DHEMVREV $< -o $(@F)
-xher2_U.$(SUFFIX) xher2_U.$(PSUFFIX) : zher2_k.c ../../param.h
+xher2_U.$(SUFFIX) xher2_U.$(PSUFFIX) : zher2_k.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -ULOWER $< -o $(@F)
-xher2_L.$(SUFFIX) xher2_L.$(PSUFFIX) : zher2_k.c ../../param.h
+xher2_L.$(SUFFIX) xher2_L.$(PSUFFIX) : zher2_k.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DLOWER $< -o $(@F)
-xher2_V.$(SUFFIX) xher2_V.$(PSUFFIX) : zher2_k.c ../../param.h
+xher2_V.$(SUFFIX) xher2_V.$(PSUFFIX) : zher2_k.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -ULOWER $< -DHEMVREV -o $(@F)
-xher2_M.$(SUFFIX) xher2_M.$(PSUFFIX) : zher2_k.c ../../param.h
+xher2_M.$(SUFFIX) xher2_M.$(PSUFFIX) : zher2_k.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DLOWER $< -DHEMVREV -o $(@F)
-chpmv_U.$(SUFFIX) chpmv_U.$(PSUFFIX) : zhpmv_k.c ../../param.h
+chpmv_U.$(SUFFIX) chpmv_U.$(PSUFFIX) : zhpmv_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER $< -o $(@F)
-chpmv_L.$(SUFFIX) chpmv_L.$(PSUFFIX) : zhpmv_k.c ../../param.h
+chpmv_L.$(SUFFIX) chpmv_L.$(PSUFFIX) : zhpmv_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER $< -o $(@F)
-chpmv_V.$(SUFFIX) chpmv_V.$(PSUFFIX) : zhpmv_k.c ../../param.h
+chpmv_V.$(SUFFIX) chpmv_V.$(PSUFFIX) : zhpmv_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER -DHEMVREV $< -o $(@F)
-chpmv_M.$(SUFFIX) chpmv_M.$(PSUFFIX) : zhpmv_k.c ../../param.h
+chpmv_M.$(SUFFIX) chpmv_M.$(PSUFFIX) : zhpmv_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER -DHEMVREV $< -o $(@F)
-zhpmv_U.$(SUFFIX) zhpmv_U.$(PSUFFIX) : zhpmv_k.c ../../param.h
+zhpmv_U.$(SUFFIX) zhpmv_U.$(PSUFFIX) : zhpmv_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER $< -o $(@F)
-zhpmv_L.$(SUFFIX) zhpmv_L.$(PSUFFIX) : zhpmv_k.c ../../param.h
+zhpmv_L.$(SUFFIX) zhpmv_L.$(PSUFFIX) : zhpmv_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER $< -o $(@F)
-zhpmv_V.$(SUFFIX) zhpmv_V.$(PSUFFIX) : zhpmv_k.c ../../param.h
+zhpmv_V.$(SUFFIX) zhpmv_V.$(PSUFFIX) : zhpmv_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER -DHEMVREV $< -o $(@F)
-zhpmv_M.$(SUFFIX) zhpmv_M.$(PSUFFIX) : zhpmv_k.c ../../param.h
+zhpmv_M.$(SUFFIX) zhpmv_M.$(PSUFFIX) : zhpmv_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER -DHEMVREV $< -o $(@F)
-xhpmv_U.$(SUFFIX) xhpmv_U.$(PSUFFIX) : zhpmv_k.c ../../param.h
+xhpmv_U.$(SUFFIX) xhpmv_U.$(PSUFFIX) : zhpmv_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER $< -o $(@F)
-xhpmv_L.$(SUFFIX) xhpmv_L.$(PSUFFIX) : zhpmv_k.c ../../param.h
+xhpmv_L.$(SUFFIX) xhpmv_L.$(PSUFFIX) : zhpmv_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER $< -o $(@F)
-xhpmv_V.$(SUFFIX) xhpmv_V.$(PSUFFIX) : zhpmv_k.c ../../param.h
+xhpmv_V.$(SUFFIX) xhpmv_V.$(PSUFFIX) : zhpmv_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER -DHEMVREV $< -o $(@F)
-xhpmv_M.$(SUFFIX) xhpmv_M.$(PSUFFIX) : zhpmv_k.c ../../param.h
+xhpmv_M.$(SUFFIX) xhpmv_M.$(PSUFFIX) : zhpmv_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER -DHEMVREV $< -o $(@F)
-chpmv_thread_U.$(SUFFIX) chpmv_thread_U.$(PSUFFIX) : spmv_thread.c ../../param.h
+chpmv_thread_U.$(SUFFIX) chpmv_thread_U.$(PSUFFIX) : spmv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER -DHEMV $< -o $(@F)
-chpmv_thread_L.$(SUFFIX) chpmv_thread_L.$(PSUFFIX) : spmv_thread.c ../../param.h
+chpmv_thread_L.$(SUFFIX) chpmv_thread_L.$(PSUFFIX) : spmv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER -DHEMV $< -o $(@F)
-chpmv_thread_V.$(SUFFIX) chpmv_thread_V.$(PSUFFIX) : spmv_thread.c ../../param.h
+chpmv_thread_V.$(SUFFIX) chpmv_thread_V.$(PSUFFIX) : spmv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER -DHEMVREV $< -o $(@F)
-chpmv_thread_M.$(SUFFIX) chpmv_thread_M.$(PSUFFIX) : spmv_thread.c ../../param.h
+chpmv_thread_M.$(SUFFIX) chpmv_thread_M.$(PSUFFIX) : spmv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER -DHEMVREV $< -o $(@F)
-zhpmv_thread_U.$(SUFFIX) zhpmv_thread_U.$(PSUFFIX) : spmv_thread.c ../../param.h
+zhpmv_thread_U.$(SUFFIX) zhpmv_thread_U.$(PSUFFIX) : spmv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER -DHEMV $< -o $(@F)
-zhpmv_thread_L.$(SUFFIX) zhpmv_thread_L.$(PSUFFIX) : spmv_thread.c ../../param.h
+zhpmv_thread_L.$(SUFFIX) zhpmv_thread_L.$(PSUFFIX) : spmv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER -DHEMV $< -o $(@F)
-zhpmv_thread_V.$(SUFFIX) zhpmv_thread_V.$(PSUFFIX) : spmv_thread.c ../../param.h
+zhpmv_thread_V.$(SUFFIX) zhpmv_thread_V.$(PSUFFIX) : spmv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER -DHEMVREV $< -o $(@F)
-zhpmv_thread_M.$(SUFFIX) zhpmv_thread_M.$(PSUFFIX) : spmv_thread.c ../../param.h
+zhpmv_thread_M.$(SUFFIX) zhpmv_thread_M.$(PSUFFIX) : spmv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER -DHEMVREV $< -o $(@F)
-xhpmv_thread_U.$(SUFFIX) xhpmv_thread_U.$(PSUFFIX) : spmv_thread.c ../../param.h
+xhpmv_thread_U.$(SUFFIX) xhpmv_thread_U.$(PSUFFIX) : spmv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER -DHEMV $< -o $(@F)
-xhpmv_thread_L.$(SUFFIX) xhpmv_thread_L.$(PSUFFIX) : spmv_thread.c ../../param.h
+xhpmv_thread_L.$(SUFFIX) xhpmv_thread_L.$(PSUFFIX) : spmv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER -DHEMV $< -o $(@F)
-xhpmv_thread_V.$(SUFFIX) xhpmv_thread_V.$(PSUFFIX) : spmv_thread.c ../../param.h
+xhpmv_thread_V.$(SUFFIX) xhpmv_thread_V.$(PSUFFIX) : spmv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER -DHEMVREV $< -o $(@F)
-xhpmv_thread_M.$(SUFFIX) xhpmv_thread_M.$(PSUFFIX) : spmv_thread.c ../../param.h
+xhpmv_thread_M.$(SUFFIX) xhpmv_thread_M.$(PSUFFIX) : spmv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER -DHEMVREV $< -o $(@F)
-chpr_U.$(SUFFIX) chpr_U.$(PSUFFIX) : zhpr_k.c ../../common.h
+chpr_U.$(SUFFIX) chpr_U.$(PSUFFIX) : zhpr_k.c ../../common.h
$(CC) -c $(CFLAGS) -UDOUBLE -ULOWER $< -o $(@F)
-chpr_L.$(SUFFIX) chpr_L.$(PSUFFIX) : zhpr_k.c ../../common.h
+chpr_L.$(SUFFIX) chpr_L.$(PSUFFIX) : zhpr_k.c ../../common.h
$(CC) -c $(CFLAGS) -UDOUBLE -DLOWER $< -o $(@F)
-chpr_V.$(SUFFIX) chpr_V.$(PSUFFIX) : zhpr_k.c ../../common.h
+chpr_V.$(SUFFIX) chpr_V.$(PSUFFIX) : zhpr_k.c ../../common.h
$(CC) -c $(CFLAGS) -UDOUBLE -ULOWER -DHEMVREV $< -o $(@F)
-chpr_M.$(SUFFIX) chpr_M.$(PSUFFIX) : zhpr_k.c ../../common.h
+chpr_M.$(SUFFIX) chpr_M.$(PSUFFIX) : zhpr_k.c ../../common.h
$(CC) -c $(CFLAGS) -UDOUBLE -DLOWER -DHEMVREV $< -o $(@F)
-zhpr_U.$(SUFFIX) zhpr_U.$(PSUFFIX) : zhpr_k.c ../../common.h
+zhpr_U.$(SUFFIX) zhpr_U.$(PSUFFIX) : zhpr_k.c ../../common.h
$(CC) -c $(CFLAGS) -DDOUBLE -ULOWER $< -o $(@F)
-zhpr_L.$(SUFFIX) zhpr_L.$(PSUFFIX) : zhpr_k.c ../../common.h
+zhpr_L.$(SUFFIX) zhpr_L.$(PSUFFIX) : zhpr_k.c ../../common.h
$(CC) -c $(CFLAGS) -DDOUBLE -DLOWER $< -o $(@F)
-zhpr_V.$(SUFFIX) zhpr_V.$(PSUFFIX) : zhpr_k.c ../../common.h
+zhpr_V.$(SUFFIX) zhpr_V.$(PSUFFIX) : zhpr_k.c ../../common.h
$(CC) -c $(CFLAGS) -DDOUBLE -ULOWER -DHEMVREV $< -o $(@F)
-zhpr_M.$(SUFFIX) zhpr_M.$(PSUFFIX) : zhpr_k.c ../../common.h
+zhpr_M.$(SUFFIX) zhpr_M.$(PSUFFIX) : zhpr_k.c ../../common.h
$(CC) -c $(CFLAGS) -DDOUBLE -DLOWER -DHEMVREV $< -o $(@F)
-xhpr_U.$(SUFFIX) xhpr_U.$(PSUFFIX) : zhpr_k.c ../../common.h
+xhpr_U.$(SUFFIX) xhpr_U.$(PSUFFIX) : zhpr_k.c ../../common.h
$(CC) -c $(CFLAGS) -DXDOUBLE -ULOWER $< -o $(@F)
-xhpr_L.$(SUFFIX) xhpr_L.$(PSUFFIX) : zhpr_k.c ../../common.h
+xhpr_L.$(SUFFIX) xhpr_L.$(PSUFFIX) : zhpr_k.c ../../common.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DLOWER $< -o $(@F)
-xhpr_V.$(SUFFIX) xhpr_V.$(PSUFFIX) : zhpr_k.c ../../common.h
+xhpr_V.$(SUFFIX) xhpr_V.$(PSUFFIX) : zhpr_k.c ../../common.h
$(CC) -c $(CFLAGS) -DXDOUBLE -ULOWER -DHEMVREV $< -o $(@F)
-xhpr_M.$(SUFFIX) xhpr_M.$(PSUFFIX) : zhpr_k.c ../../common.h
+xhpr_M.$(SUFFIX) xhpr_M.$(PSUFFIX) : zhpr_k.c ../../common.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DLOWER -DHEMVREV $< -o $(@F)
-chpr_thread_U.$(SUFFIX) chpr_thread_U.$(PSUFFIX) : spr_thread.c ../../common.h
+chpr_thread_U.$(SUFFIX) chpr_thread_U.$(PSUFFIX) : spr_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UDOUBLE -ULOWER -DHEMV $< -o $(@F)
-chpr_thread_L.$(SUFFIX) chpr_thread_L.$(PSUFFIX) : spr_thread.c ../../common.h
+chpr_thread_L.$(SUFFIX) chpr_thread_L.$(PSUFFIX) : spr_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UDOUBLE -DLOWER -DHEMV $< -o $(@F)
-chpr_thread_V.$(SUFFIX) chpr_thread_V.$(PSUFFIX) : spr_thread.c ../../common.h
+chpr_thread_V.$(SUFFIX) chpr_thread_V.$(PSUFFIX) : spr_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UDOUBLE -ULOWER -DHEMVREV $< -o $(@F)
-chpr_thread_M.$(SUFFIX) chpr_thread_M.$(PSUFFIX) : spr_thread.c ../../common.h
+chpr_thread_M.$(SUFFIX) chpr_thread_M.$(PSUFFIX) : spr_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UDOUBLE -DLOWER -DHEMVREV $< -o $(@F)
-zhpr_thread_U.$(SUFFIX) zhpr_thread_U.$(PSUFFIX) : spr_thread.c ../../common.h
+zhpr_thread_U.$(SUFFIX) zhpr_thread_U.$(PSUFFIX) : spr_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DDOUBLE -ULOWER -DHEMV $< -o $(@F)
-zhpr_thread_L.$(SUFFIX) zhpr_thread_L.$(PSUFFIX) : spr_thread.c ../../common.h
+zhpr_thread_L.$(SUFFIX) zhpr_thread_L.$(PSUFFIX) : spr_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DDOUBLE -DLOWER -DHEMV $< -o $(@F)
-zhpr_thread_V.$(SUFFIX) zhpr_thread_V.$(PSUFFIX) : spr_thread.c ../../common.h
+zhpr_thread_V.$(SUFFIX) zhpr_thread_V.$(PSUFFIX) : spr_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DDOUBLE -ULOWER -DHEMVREV $< -o $(@F)
-zhpr_thread_M.$(SUFFIX) zhpr_thread_M.$(PSUFFIX) : spr_thread.c ../../common.h
+zhpr_thread_M.$(SUFFIX) zhpr_thread_M.$(PSUFFIX) : spr_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DDOUBLE -DLOWER -DHEMVREV $< -o $(@F)
-xhpr_thread_U.$(SUFFIX) xhpr_thread_U.$(PSUFFIX) : spr_thread.c ../../common.h
+xhpr_thread_U.$(SUFFIX) xhpr_thread_U.$(PSUFFIX) : spr_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DXDOUBLE -ULOWER -DHEMV $< -o $(@F)
-xhpr_thread_L.$(SUFFIX) xhpr_thread_L.$(PSUFFIX) : spr_thread.c ../../common.h
+xhpr_thread_L.$(SUFFIX) xhpr_thread_L.$(PSUFFIX) : spr_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DLOWER -DHEMV $< -o $(@F)
-xhpr_thread_V.$(SUFFIX) xhpr_thread_V.$(PSUFFIX) : spr_thread.c ../../common.h
+xhpr_thread_V.$(SUFFIX) xhpr_thread_V.$(PSUFFIX) : spr_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DXDOUBLE -ULOWER -DHEMVREV $< -o $(@F)
-xhpr_thread_M.$(SUFFIX) xhpr_thread_M.$(PSUFFIX) : spr_thread.c ../../common.h
+xhpr_thread_M.$(SUFFIX) xhpr_thread_M.$(PSUFFIX) : spr_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DLOWER -DHEMVREV $< -o $(@F)
-chpr2_U.$(SUFFIX) chpr2_U.$(PSUFFIX) : zhpr2_k.c ../../param.h
+chpr2_U.$(SUFFIX) chpr2_U.$(PSUFFIX) : zhpr2_k.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -ULOWER $< -o $(@F)
-chpr2_L.$(SUFFIX) chpr2_L.$(PSUFFIX) : zhpr2_k.c ../../param.h
+chpr2_L.$(SUFFIX) chpr2_L.$(PSUFFIX) : zhpr2_k.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -DLOWER $< -o $(@F)
-chpr2_V.$(SUFFIX) chpr2_V.$(PSUFFIX) : zhpr2_k.c ../../param.h
+chpr2_V.$(SUFFIX) chpr2_V.$(PSUFFIX) : zhpr2_k.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -ULOWER -DHEMVREV $< -o $(@F)
-chpr2_M.$(SUFFIX) chpr2_M.$(PSUFFIX) : zhpr2_k.c ../../param.h
+chpr2_M.$(SUFFIX) chpr2_M.$(PSUFFIX) : zhpr2_k.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -DLOWER -DHEMVREV $< -o $(@F)
-zhpr2_U.$(SUFFIX) zhpr2_U.$(PSUFFIX) : zhpr2_k.c ../../param.h
+zhpr2_U.$(SUFFIX) zhpr2_U.$(PSUFFIX) : zhpr2_k.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -ULOWER $< -o $(@F)
-zhpr2_L.$(SUFFIX) zhpr2_L.$(PSUFFIX) : zhpr2_k.c ../../param.h
+zhpr2_L.$(SUFFIX) zhpr2_L.$(PSUFFIX) : zhpr2_k.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -DLOWER $< -o $(@F)
-zhpr2_V.$(SUFFIX) zhpr2_V.$(PSUFFIX) : zhpr2_k.c ../../param.h
+zhpr2_V.$(SUFFIX) zhpr2_V.$(PSUFFIX) : zhpr2_k.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -ULOWER -DHEMVREV $< -o $(@F)
-zhpr2_M.$(SUFFIX) zhpr2_M.$(PSUFFIX) : zhpr2_k.c ../../param.h
+zhpr2_M.$(SUFFIX) zhpr2_M.$(PSUFFIX) : zhpr2_k.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -DLOWER -DHEMVREV $< -o $(@F)
-xhpr2_U.$(SUFFIX) xhpr2_U.$(PSUFFIX) : zhpr2_k.c ../../param.h
+xhpr2_U.$(SUFFIX) xhpr2_U.$(PSUFFIX) : zhpr2_k.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -ULOWER $< -o $(@F)
-xhpr2_L.$(SUFFIX) xhpr2_L.$(PSUFFIX) : zhpr2_k.c ../../param.h
+xhpr2_L.$(SUFFIX) xhpr2_L.$(PSUFFIX) : zhpr2_k.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DLOWER $< -o $(@F)
-xhpr2_V.$(SUFFIX) xhpr2_V.$(PSUFFIX) : zhpr2_k.c ../../param.h
+xhpr2_V.$(SUFFIX) xhpr2_V.$(PSUFFIX) : zhpr2_k.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -ULOWER -DHEMVREV $< -o $(@F)
-xhpr2_M.$(SUFFIX) xhpr2_M.$(PSUFFIX) : zhpr2_k.c ../../param.h
+xhpr2_M.$(SUFFIX) xhpr2_M.$(PSUFFIX) : zhpr2_k.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DLOWER $< -DHEMVREV -o $(@F)
-chpr2_thread_U.$(SUFFIX) chpr2_thread_U.$(PSUFFIX) : spr2_thread.c ../../param.h
+chpr2_thread_U.$(SUFFIX) chpr2_thread_U.$(PSUFFIX) : spr2_thread.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -ULOWER -DHEMV $< -o $(@F)
-chpr2_thread_L.$(SUFFIX) chpr2_thread_L.$(PSUFFIX) : spr2_thread.c ../../param.h
+chpr2_thread_L.$(SUFFIX) chpr2_thread_L.$(PSUFFIX) : spr2_thread.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -DLOWER -DHEMV $< -o $(@F)
-chpr2_thread_V.$(SUFFIX) chpr2_thread_V.$(PSUFFIX) : spr2_thread.c ../../param.h
+chpr2_thread_V.$(SUFFIX) chpr2_thread_V.$(PSUFFIX) : spr2_thread.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -ULOWER -DHEMVREV $< -o $(@F)
-chpr2_thread_M.$(SUFFIX) chpr2_thread_M.$(PSUFFIX) : spr2_thread.c ../../param.h
+chpr2_thread_M.$(SUFFIX) chpr2_thread_M.$(PSUFFIX) : spr2_thread.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -DLOWER -DHEMVREV $< -o $(@F)
-zhpr2_thread_U.$(SUFFIX) zhpr2_thread_U.$(PSUFFIX) : spr2_thread.c ../../param.h
+zhpr2_thread_U.$(SUFFIX) zhpr2_thread_U.$(PSUFFIX) : spr2_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -ULOWER -DHEMV $< -o $(@F)
-zhpr2_thread_L.$(SUFFIX) zhpr2_thread_L.$(PSUFFIX) : spr2_thread.c ../../param.h
+zhpr2_thread_L.$(SUFFIX) zhpr2_thread_L.$(PSUFFIX) : spr2_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -DLOWER -DHEMV $< -o $(@F)
-zhpr2_thread_V.$(SUFFIX) zhpr2_thread_V.$(PSUFFIX) : spr2_thread.c ../../param.h
+zhpr2_thread_V.$(SUFFIX) zhpr2_thread_V.$(PSUFFIX) : spr2_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -ULOWER -DHEMVREV $< -o $(@F)
-zhpr2_thread_M.$(SUFFIX) zhpr2_thread_M.$(PSUFFIX) : spr2_thread.c ../../param.h
+zhpr2_thread_M.$(SUFFIX) zhpr2_thread_M.$(PSUFFIX) : spr2_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -DLOWER -DHEMVREV $< -o $(@F)
-xhpr2_thread_U.$(SUFFIX) xhpr2_thread_U.$(PSUFFIX) : spr2_thread.c ../../param.h
+xhpr2_thread_U.$(SUFFIX) xhpr2_thread_U.$(PSUFFIX) : spr2_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -ULOWER -DHEMV $< -o $(@F)
-xhpr2_thread_L.$(SUFFIX) xhpr2_thread_L.$(PSUFFIX) : spr2_thread.c ../../param.h
+xhpr2_thread_L.$(SUFFIX) xhpr2_thread_L.$(PSUFFIX) : spr2_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DLOWER -DHEMV $< -o $(@F)
-xhpr2_thread_V.$(SUFFIX) xhpr2_thread_V.$(PSUFFIX) : spr2_thread.c ../../param.h
+xhpr2_thread_V.$(SUFFIX) xhpr2_thread_V.$(PSUFFIX) : spr2_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -ULOWER -DHEMVREV $< -o $(@F)
-xhpr2_thread_M.$(SUFFIX) xhpr2_thread_M.$(PSUFFIX) : spr2_thread.c ../../param.h
+xhpr2_thread_M.$(SUFFIX) xhpr2_thread_M.$(PSUFFIX) : spr2_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DLOWER $< -DHEMVREV -o $(@F)
-ssbmv_U.$(SUFFIX) ssbmv_U.$(PSUFFIX) : sbmv_k.c ../../param.h
+ssbmv_U.$(SUFFIX) ssbmv_U.$(PSUFFIX) : sbmv_k.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -ULOWER $< -o $(@F)
-ssbmv_L.$(SUFFIX) ssbmv_L.$(PSUFFIX) : sbmv_k.c ../../param.h
+ssbmv_L.$(SUFFIX) ssbmv_L.$(PSUFFIX) : sbmv_k.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DLOWER $< -o $(@F)
-dsbmv_U.$(SUFFIX) dsbmv_U.$(PSUFFIX) : sbmv_k.c ../../param.h
+dsbmv_U.$(SUFFIX) dsbmv_U.$(PSUFFIX) : sbmv_k.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -ULOWER $< -o $(@F)
-dsbmv_L.$(SUFFIX) dsbmv_L.$(PSUFFIX) : sbmv_k.c ../../param.h
+dsbmv_L.$(SUFFIX) dsbmv_L.$(PSUFFIX) : sbmv_k.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DLOWER $< -o $(@F)
-qsbmv_U.$(SUFFIX) qsbmv_U.$(PSUFFIX) : sbmv_k.c ../../param.h
+qsbmv_U.$(SUFFIX) qsbmv_U.$(PSUFFIX) : sbmv_k.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -ULOWER $< -o $(@F)
-qsbmv_L.$(SUFFIX) qsbmv_L.$(PSUFFIX) : sbmv_k.c ../../param.h
+qsbmv_L.$(SUFFIX) qsbmv_L.$(PSUFFIX) : sbmv_k.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DLOWER $< -o $(@F)
-csbmv_U.$(SUFFIX) csbmv_U.$(PSUFFIX) : zsbmv_k.c ../../param.h
+csbmv_U.$(SUFFIX) csbmv_U.$(PSUFFIX) : zsbmv_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER $< -o $(@F)
-csbmv_L.$(SUFFIX) csbmv_L.$(PSUFFIX) : zsbmv_k.c ../../param.h
+csbmv_L.$(SUFFIX) csbmv_L.$(PSUFFIX) : zsbmv_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER $< -o $(@F)
-zsbmv_U.$(SUFFIX) zsbmv_U.$(PSUFFIX) : zsbmv_k.c ../../param.h
+zsbmv_U.$(SUFFIX) zsbmv_U.$(PSUFFIX) : zsbmv_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER $< -o $(@F)
-zsbmv_L.$(SUFFIX) zsbmv_L.$(PSUFFIX) : zsbmv_k.c ../../param.h
+zsbmv_L.$(SUFFIX) zsbmv_L.$(PSUFFIX) : zsbmv_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER $< -o $(@F)
-xsbmv_U.$(SUFFIX) xsbmv_U.$(PSUFFIX) : zsbmv_k.c ../../param.h
+xsbmv_U.$(SUFFIX) xsbmv_U.$(PSUFFIX) : zsbmv_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER $< -o $(@F)
-xsbmv_L.$(SUFFIX) xsbmv_L.$(PSUFFIX) : zsbmv_k.c ../../param.h
+xsbmv_L.$(SUFFIX) xsbmv_L.$(PSUFFIX) : zsbmv_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER $< -o $(@F)
-ssbmv_thread_U.$(SUFFIX) ssbmv_thread_U.$(PSUFFIX) : sbmv_thread.c ../../param.h
+ssbmv_thread_U.$(SUFFIX) ssbmv_thread_U.$(PSUFFIX) : sbmv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -ULOWER $< -o $(@F)
-ssbmv_thread_L.$(SUFFIX) ssbmv_thread_L.$(PSUFFIX) : sbmv_thread.c ../../param.h
+ssbmv_thread_L.$(SUFFIX) ssbmv_thread_L.$(PSUFFIX) : sbmv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DLOWER $< -o $(@F)
-dsbmv_thread_U.$(SUFFIX) dsbmv_thread_U.$(PSUFFIX) : sbmv_thread.c ../../param.h
+dsbmv_thread_U.$(SUFFIX) dsbmv_thread_U.$(PSUFFIX) : sbmv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -ULOWER $< -o $(@F)
-dsbmv_thread_L.$(SUFFIX) dsbmv_thread_L.$(PSUFFIX) : sbmv_thread.c ../../param.h
+dsbmv_thread_L.$(SUFFIX) dsbmv_thread_L.$(PSUFFIX) : sbmv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DLOWER $< -o $(@F)
-qsbmv_thread_U.$(SUFFIX) qsbmv_thread_U.$(PSUFFIX) : sbmv_thread.c ../../param.h
+qsbmv_thread_U.$(SUFFIX) qsbmv_thread_U.$(PSUFFIX) : sbmv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -ULOWER $< -o $(@F)
-qsbmv_thread_L.$(SUFFIX) qsbmv_thread_L.$(PSUFFIX) : sbmv_thread.c ../../param.h
+qsbmv_thread_L.$(SUFFIX) qsbmv_thread_L.$(PSUFFIX) : sbmv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DLOWER $< -o $(@F)
-csbmv_thread_U.$(SUFFIX) csbmv_thread_U.$(PSUFFIX) : sbmv_thread.c ../../param.h
+csbmv_thread_U.$(SUFFIX) csbmv_thread_U.$(PSUFFIX) : sbmv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER $< -o $(@F)
-csbmv_thread_L.$(SUFFIX) csbmv_thread_L.$(PSUFFIX) : sbmv_thread.c ../../param.h
+csbmv_thread_L.$(SUFFIX) csbmv_thread_L.$(PSUFFIX) : sbmv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER $< -o $(@F)
-zsbmv_thread_U.$(SUFFIX) zsbmv_thread_U.$(PSUFFIX) : sbmv_thread.c ../../param.h
+zsbmv_thread_U.$(SUFFIX) zsbmv_thread_U.$(PSUFFIX) : sbmv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER $< -o $(@F)
-zsbmv_thread_L.$(SUFFIX) zsbmv_thread_L.$(PSUFFIX) : sbmv_thread.c ../../param.h
+zsbmv_thread_L.$(SUFFIX) zsbmv_thread_L.$(PSUFFIX) : sbmv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER $< -o $(@F)
-xsbmv_thread_U.$(SUFFIX) xsbmv_thread_U.$(PSUFFIX) : sbmv_thread.c ../../param.h
+xsbmv_thread_U.$(SUFFIX) xsbmv_thread_U.$(PSUFFIX) : sbmv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER $< -o $(@F)
-xsbmv_thread_L.$(SUFFIX) xsbmv_thread_L.$(PSUFFIX) : sbmv_thread.c ../../param.h
+xsbmv_thread_L.$(SUFFIX) xsbmv_thread_L.$(PSUFFIX) : sbmv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER $< -o $(@F)
-sspmv_U.$(SUFFIX) sspmv_U.$(PSUFFIX) : spmv_k.c ../../param.h
+sspmv_U.$(SUFFIX) sspmv_U.$(PSUFFIX) : spmv_k.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -ULOWER $< -o $(@F)
-sspmv_L.$(SUFFIX) sspmv_L.$(PSUFFIX) : spmv_k.c ../../param.h
+sspmv_L.$(SUFFIX) sspmv_L.$(PSUFFIX) : spmv_k.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DLOWER $< -o $(@F)
-dspmv_U.$(SUFFIX) dspmv_U.$(PSUFFIX) : spmv_k.c ../../param.h
+dspmv_U.$(SUFFIX) dspmv_U.$(PSUFFIX) : spmv_k.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -ULOWER $< -o $(@F)
-dspmv_L.$(SUFFIX) dspmv_L.$(PSUFFIX) : spmv_k.c ../../param.h
+dspmv_L.$(SUFFIX) dspmv_L.$(PSUFFIX) : spmv_k.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DLOWER $< -o $(@F)
-qspmv_U.$(SUFFIX) qspmv_U.$(PSUFFIX) : spmv_k.c ../../param.h
+qspmv_U.$(SUFFIX) qspmv_U.$(PSUFFIX) : spmv_k.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -ULOWER $< -o $(@F)
-qspmv_L.$(SUFFIX) qspmv_L.$(PSUFFIX) : spmv_k.c ../../param.h
+qspmv_L.$(SUFFIX) qspmv_L.$(PSUFFIX) : spmv_k.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DLOWER $< -o $(@F)
-cspmv_U.$(SUFFIX) cspmv_U.$(PSUFFIX) : zspmv_k.c ../../param.h
+cspmv_U.$(SUFFIX) cspmv_U.$(PSUFFIX) : zspmv_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER $< -o $(@F)
-cspmv_L.$(SUFFIX) cspmv_L.$(PSUFFIX) : zspmv_k.c ../../param.h
+cspmv_L.$(SUFFIX) cspmv_L.$(PSUFFIX) : zspmv_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER $< -o $(@F)
-zspmv_U.$(SUFFIX) zspmv_U.$(PSUFFIX) : zspmv_k.c ../../param.h
+zspmv_U.$(SUFFIX) zspmv_U.$(PSUFFIX) : zspmv_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER $< -o $(@F)
-zspmv_L.$(SUFFIX) zspmv_L.$(PSUFFIX) : zspmv_k.c ../../param.h
+zspmv_L.$(SUFFIX) zspmv_L.$(PSUFFIX) : zspmv_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER $< -o $(@F)
-xspmv_U.$(SUFFIX) xspmv_U.$(PSUFFIX) : zspmv_k.c ../../param.h
+xspmv_U.$(SUFFIX) xspmv_U.$(PSUFFIX) : zspmv_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER $< -o $(@F)
-xspmv_L.$(SUFFIX) xspmv_L.$(PSUFFIX) : zspmv_k.c ../../param.h
+xspmv_L.$(SUFFIX) xspmv_L.$(PSUFFIX) : zspmv_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER $< -o $(@F)
-sspmv_thread_U.$(SUFFIX) sspmv_thread_U.$(PSUFFIX) : spmv_thread.c ../../param.h
+sspmv_thread_U.$(SUFFIX) sspmv_thread_U.$(PSUFFIX) : spmv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -ULOWER $< -o $(@F)
-sspmv_thread_L.$(SUFFIX) sspmv_thread_L.$(PSUFFIX) : spmv_thread.c ../../param.h
+sspmv_thread_L.$(SUFFIX) sspmv_thread_L.$(PSUFFIX) : spmv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DLOWER $< -o $(@F)
-dspmv_thread_U.$(SUFFIX) dspmv_thread_U.$(PSUFFIX) : spmv_thread.c ../../param.h
+dspmv_thread_U.$(SUFFIX) dspmv_thread_U.$(PSUFFIX) : spmv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -ULOWER $< -o $(@F)
-dspmv_thread_L.$(SUFFIX) dspmv_thread_L.$(PSUFFIX) : spmv_thread.c ../../param.h
+dspmv_thread_L.$(SUFFIX) dspmv_thread_L.$(PSUFFIX) : spmv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DLOWER $< -o $(@F)
-qspmv_thread_U.$(SUFFIX) qspmv_thread_U.$(PSUFFIX) : spmv_thread.c ../../param.h
+qspmv_thread_U.$(SUFFIX) qspmv_thread_U.$(PSUFFIX) : spmv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -ULOWER $< -o $(@F)
-qspmv_thread_L.$(SUFFIX) qspmv_thread_L.$(PSUFFIX) : spmv_thread.c ../../param.h
+qspmv_thread_L.$(SUFFIX) qspmv_thread_L.$(PSUFFIX) : spmv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DLOWER $< -o $(@F)
-cspmv_thread_U.$(SUFFIX) cspmv_thread_U.$(PSUFFIX) : spmv_thread.c ../../param.h
+cspmv_thread_U.$(SUFFIX) cspmv_thread_U.$(PSUFFIX) : spmv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER $< -o $(@F)
-cspmv_thread_L.$(SUFFIX) cspmv_thread_L.$(PSUFFIX) : spmv_thread.c ../../param.h
+cspmv_thread_L.$(SUFFIX) cspmv_thread_L.$(PSUFFIX) : spmv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER $< -o $(@F)
-zspmv_thread_U.$(SUFFIX) zspmv_thread_U.$(PSUFFIX) : spmv_thread.c ../../param.h
+zspmv_thread_U.$(SUFFIX) zspmv_thread_U.$(PSUFFIX) : spmv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER $< -o $(@F)
-zspmv_thread_L.$(SUFFIX) zspmv_thread_L.$(PSUFFIX) : spmv_thread.c ../../param.h
+zspmv_thread_L.$(SUFFIX) zspmv_thread_L.$(PSUFFIX) : spmv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER $< -o $(@F)
-xspmv_thread_U.$(SUFFIX) xspmv_thread_U.$(PSUFFIX) : spmv_thread.c ../../param.h
+xspmv_thread_U.$(SUFFIX) xspmv_thread_U.$(PSUFFIX) : spmv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER $< -o $(@F)
-xspmv_thread_L.$(SUFFIX) xspmv_thread_L.$(PSUFFIX) : spmv_thread.c ../../param.h
+xspmv_thread_L.$(SUFFIX) xspmv_thread_L.$(PSUFFIX) : spmv_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER $< -o $(@F)
-sspr_U.$(SUFFIX) sspr_U.$(PSUFFIX) : spr_k.c ../../param.h
+sspr_U.$(SUFFIX) sspr_U.$(PSUFFIX) : spr_k.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -ULOWER $< -o $(@F)
-sspr_L.$(SUFFIX) sspr_L.$(PSUFFIX) : spr_k.c ../../param.h
+sspr_L.$(SUFFIX) sspr_L.$(PSUFFIX) : spr_k.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DLOWER $< -o $(@F)
-dspr_U.$(SUFFIX) dspr_U.$(PSUFFIX) : spr_k.c ../../param.h
+dspr_U.$(SUFFIX) dspr_U.$(PSUFFIX) : spr_k.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -ULOWER $< -o $(@F)
-dspr_L.$(SUFFIX) dspr_L.$(PSUFFIX) : spr_k.c ../../param.h
+dspr_L.$(SUFFIX) dspr_L.$(PSUFFIX) : spr_k.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DLOWER $< -o $(@F)
-qspr_U.$(SUFFIX) qspr_U.$(PSUFFIX) : spr_k.c ../../param.h
+qspr_U.$(SUFFIX) qspr_U.$(PSUFFIX) : spr_k.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -ULOWER $< -o $(@F)
-qspr_L.$(SUFFIX) qspr_L.$(PSUFFIX) : spr_k.c ../../param.h
+qspr_L.$(SUFFIX) qspr_L.$(PSUFFIX) : spr_k.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DLOWER $< -o $(@F)
-cspr_U.$(SUFFIX) cspr_U.$(PSUFFIX) : zspr_k.c ../../param.h
+cspr_U.$(SUFFIX) cspr_U.$(PSUFFIX) : zspr_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER $< -o $(@F)
-cspr_L.$(SUFFIX) cspr_L.$(PSUFFIX) : zspr_k.c ../../param.h
+cspr_L.$(SUFFIX) cspr_L.$(PSUFFIX) : zspr_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER $< -o $(@F)
-zspr_U.$(SUFFIX) zspr_U.$(PSUFFIX) : zspr_k.c ../../param.h
+zspr_U.$(SUFFIX) zspr_U.$(PSUFFIX) : zspr_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER $< -o $(@F)
-zspr_L.$(SUFFIX) zspr_L.$(PSUFFIX) : zspr_k.c ../../param.h
+zspr_L.$(SUFFIX) zspr_L.$(PSUFFIX) : zspr_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER $< -o $(@F)
-xspr_U.$(SUFFIX) xspr_U.$(PSUFFIX) : zspr_k.c ../../param.h
+xspr_U.$(SUFFIX) xspr_U.$(PSUFFIX) : zspr_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER $< -o $(@F)
-xspr_L.$(SUFFIX) xspr_L.$(PSUFFIX) : zspr_k.c ../../param.h
+xspr_L.$(SUFFIX) xspr_L.$(PSUFFIX) : zspr_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER $< -o $(@F)
-sspr_thread_U.$(SUFFIX) sspr_thread_U.$(PSUFFIX) : spr_thread.c ../../param.h
+sspr_thread_U.$(SUFFIX) sspr_thread_U.$(PSUFFIX) : spr_thread.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -ULOWER $< -o $(@F)
-sspr_thread_L.$(SUFFIX) sspr_thread_L.$(PSUFFIX) : spr_thread.c ../../param.h
+sspr_thread_L.$(SUFFIX) sspr_thread_L.$(PSUFFIX) : spr_thread.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DLOWER $< -o $(@F)
-dspr_thread_U.$(SUFFIX) dspr_thread_U.$(PSUFFIX) : spr_thread.c ../../param.h
+dspr_thread_U.$(SUFFIX) dspr_thread_U.$(PSUFFIX) : spr_thread.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -ULOWER $< -o $(@F)
-dspr_thread_L.$(SUFFIX) dspr_thread_L.$(PSUFFIX) : spr_thread.c ../../param.h
+dspr_thread_L.$(SUFFIX) dspr_thread_L.$(PSUFFIX) : spr_thread.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DLOWER $< -o $(@F)
-qspr_thread_U.$(SUFFIX) qspr_thread_U.$(PSUFFIX) : spr_thread.c ../../param.h
+qspr_thread_U.$(SUFFIX) qspr_thread_U.$(PSUFFIX) : spr_thread.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -ULOWER $< -o $(@F)
-qspr_thread_L.$(SUFFIX) qspr_thread_L.$(PSUFFIX) : spr_thread.c ../../param.h
+qspr_thread_L.$(SUFFIX) qspr_thread_L.$(PSUFFIX) : spr_thread.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DLOWER $< -o $(@F)
-cspr_thread_U.$(SUFFIX) cspr_thread_U.$(PSUFFIX) : spr_thread.c ../../param.h
+cspr_thread_U.$(SUFFIX) cspr_thread_U.$(PSUFFIX) : spr_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER $< -o $(@F)
-cspr_thread_L.$(SUFFIX) cspr_thread_L.$(PSUFFIX) : spr_thread.c ../../param.h
+cspr_thread_L.$(SUFFIX) cspr_thread_L.$(PSUFFIX) : spr_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER $< -o $(@F)
-zspr_thread_U.$(SUFFIX) zspr_thread_U.$(PSUFFIX) : spr_thread.c ../../param.h
+zspr_thread_U.$(SUFFIX) zspr_thread_U.$(PSUFFIX) : spr_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER $< -o $(@F)
-zspr_thread_L.$(SUFFIX) zspr_thread_L.$(PSUFFIX) : spr_thread.c ../../param.h
+zspr_thread_L.$(SUFFIX) zspr_thread_L.$(PSUFFIX) : spr_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER $< -o $(@F)
-xspr_thread_U.$(SUFFIX) xspr_thread_U.$(PSUFFIX) : spr_thread.c ../../param.h
+xspr_thread_U.$(SUFFIX) xspr_thread_U.$(PSUFFIX) : spr_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER $< -o $(@F)
-xspr_thread_L.$(SUFFIX) xspr_thread_L.$(PSUFFIX) : spr_thread.c ../../param.h
+xspr_thread_L.$(SUFFIX) xspr_thread_L.$(PSUFFIX) : spr_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER $< -o $(@F)
-sspr2_U.$(SUFFIX) sspr2_U.$(PSUFFIX) : spr2_k.c ../../param.h
+sspr2_U.$(SUFFIX) sspr2_U.$(PSUFFIX) : spr2_k.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -ULOWER $< -o $(@F)
-sspr2_L.$(SUFFIX) sspr2_L.$(PSUFFIX) : spr2_k.c ../../param.h
+sspr2_L.$(SUFFIX) sspr2_L.$(PSUFFIX) : spr2_k.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DLOWER $< -o $(@F)
-dspr2_U.$(SUFFIX) dspr2_U.$(PSUFFIX) : spr2_k.c ../../param.h
+dspr2_U.$(SUFFIX) dspr2_U.$(PSUFFIX) : spr2_k.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -ULOWER $< -o $(@F)
-dspr2_L.$(SUFFIX) dspr2_L.$(PSUFFIX) : spr2_k.c ../../param.h
+dspr2_L.$(SUFFIX) dspr2_L.$(PSUFFIX) : spr2_k.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DLOWER $< -o $(@F)
-qspr2_U.$(SUFFIX) qspr2_U.$(PSUFFIX) : spr2_k.c ../../param.h
+qspr2_U.$(SUFFIX) qspr2_U.$(PSUFFIX) : spr2_k.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -ULOWER $< -o $(@F)
-qspr2_L.$(SUFFIX) qspr2_L.$(PSUFFIX) : spr2_k.c ../../param.h
+qspr2_L.$(SUFFIX) qspr2_L.$(PSUFFIX) : spr2_k.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DLOWER $< -o $(@F)
-cspr2_U.$(SUFFIX) cspr2_U.$(PSUFFIX) : zspr2_k.c ../../param.h
+cspr2_U.$(SUFFIX) cspr2_U.$(PSUFFIX) : zspr2_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER $< -o $(@F)
-cspr2_L.$(SUFFIX) cspr2_L.$(PSUFFIX) : zspr2_k.c ../../param.h
+cspr2_L.$(SUFFIX) cspr2_L.$(PSUFFIX) : zspr2_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER $< -o $(@F)
-zspr2_U.$(SUFFIX) zspr2_U.$(PSUFFIX) : zspr2_k.c ../../param.h
+zspr2_U.$(SUFFIX) zspr2_U.$(PSUFFIX) : zspr2_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER $< -o $(@F)
-zspr2_L.$(SUFFIX) zspr2_L.$(PSUFFIX) : zspr2_k.c ../../param.h
+zspr2_L.$(SUFFIX) zspr2_L.$(PSUFFIX) : zspr2_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER $< -o $(@F)
-xspr2_U.$(SUFFIX) xspr2_U.$(PSUFFIX) : zspr2_k.c ../../param.h
+xspr2_U.$(SUFFIX) xspr2_U.$(PSUFFIX) : zspr2_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER $< -o $(@F)
-xspr2_L.$(SUFFIX) xspr2_L.$(PSUFFIX) : zspr2_k.c ../../param.h
+xspr2_L.$(SUFFIX) xspr2_L.$(PSUFFIX) : zspr2_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER $< -o $(@F)
-sspr2_thread_U.$(SUFFIX) sspr2_thread_U.$(PSUFFIX) : spr2_thread.c ../../param.h
+sspr2_thread_U.$(SUFFIX) sspr2_thread_U.$(PSUFFIX) : spr2_thread.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -ULOWER $< -o $(@F)
-sspr2_thread_L.$(SUFFIX) sspr2_thread_L.$(PSUFFIX) : spr2_thread.c ../../param.h
+sspr2_thread_L.$(SUFFIX) sspr2_thread_L.$(PSUFFIX) : spr2_thread.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DLOWER $< -o $(@F)
-dspr2_thread_U.$(SUFFIX) dspr2_thread_U.$(PSUFFIX) : spr2_thread.c ../../param.h
+dspr2_thread_U.$(SUFFIX) dspr2_thread_U.$(PSUFFIX) : spr2_thread.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -ULOWER $< -o $(@F)
-dspr2_thread_L.$(SUFFIX) dspr2_thread_L.$(PSUFFIX) : spr2_thread.c ../../param.h
+dspr2_thread_L.$(SUFFIX) dspr2_thread_L.$(PSUFFIX) : spr2_thread.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DLOWER $< -o $(@F)
-qspr2_thread_U.$(SUFFIX) qspr2_thread_U.$(PSUFFIX) : spr2_thread.c ../../param.h
+qspr2_thread_U.$(SUFFIX) qspr2_thread_U.$(PSUFFIX) : spr2_thread.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -ULOWER $< -o $(@F)
-qspr2_thread_L.$(SUFFIX) qspr2_thread_L.$(PSUFFIX) : spr2_thread.c ../../param.h
+qspr2_thread_L.$(SUFFIX) qspr2_thread_L.$(PSUFFIX) : spr2_thread.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DLOWER $< -o $(@F)
-cspr2_thread_U.$(SUFFIX) cspr2_thread_U.$(PSUFFIX) : spr2_thread.c ../../param.h
+cspr2_thread_U.$(SUFFIX) cspr2_thread_U.$(PSUFFIX) : spr2_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER $< -o $(@F)
-cspr2_thread_L.$(SUFFIX) cspr2_thread_L.$(PSUFFIX) : spr2_thread.c ../../param.h
+cspr2_thread_L.$(SUFFIX) cspr2_thread_L.$(PSUFFIX) : spr2_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER $< -o $(@F)
-zspr2_thread_U.$(SUFFIX) zspr2_thread_U.$(PSUFFIX) : spr2_thread.c ../../param.h
+zspr2_thread_U.$(SUFFIX) zspr2_thread_U.$(PSUFFIX) : spr2_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER $< -o $(@F)
-zspr2_thread_L.$(SUFFIX) zspr2_thread_L.$(PSUFFIX) : spr2_thread.c ../../param.h
+zspr2_thread_L.$(SUFFIX) zspr2_thread_L.$(PSUFFIX) : spr2_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER $< -o $(@F)
-xspr2_thread_U.$(SUFFIX) xspr2_thread_U.$(PSUFFIX) : spr2_thread.c ../../param.h
+xspr2_thread_U.$(SUFFIX) xspr2_thread_U.$(PSUFFIX) : spr2_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER $< -o $(@F)
-xspr2_thread_L.$(SUFFIX) xspr2_thread_L.$(PSUFFIX) : spr2_thread.c ../../param.h
+xspr2_thread_L.$(SUFFIX) xspr2_thread_L.$(PSUFFIX) : spr2_thread.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER $< -o $(@F)
-ssyr_U.$(SUFFIX) ssyr_U.$(PSUFFIX) : syr_k.c ../../param.h
+ssyr_U.$(SUFFIX) ssyr_U.$(PSUFFIX) : syr_k.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -ULOWER $< -o $(@F)
-ssyr_L.$(SUFFIX) ssyr_L.$(PSUFFIX) : syr_k.c ../../param.h
+ssyr_L.$(SUFFIX) ssyr_L.$(PSUFFIX) : syr_k.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DLOWER $< -o $(@F)
-dsyr_U.$(SUFFIX) dsyr_U.$(PSUFFIX) : syr_k.c ../../param.h
+dsyr_U.$(SUFFIX) dsyr_U.$(PSUFFIX) : syr_k.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -ULOWER $< -o $(@F)
-dsyr_L.$(SUFFIX) dsyr_L.$(PSUFFIX) : syr_k.c ../../param.h
+dsyr_L.$(SUFFIX) dsyr_L.$(PSUFFIX) : syr_k.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DLOWER $< -o $(@F)
-qsyr_U.$(SUFFIX) qsyr_U.$(PSUFFIX) : syr_k.c ../../param.h
+qsyr_U.$(SUFFIX) qsyr_U.$(PSUFFIX) : syr_k.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -ULOWER $< -o $(@F)
-qsyr_L.$(SUFFIX) qsyr_L.$(PSUFFIX) : syr_k.c ../../param.h
+qsyr_L.$(SUFFIX) qsyr_L.$(PSUFFIX) : syr_k.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DLOWER $< -o $(@F)
-csyr_U.$(SUFFIX) csyr_U.$(PSUFFIX) : zsyr_k.c ../../param.h
+csyr_U.$(SUFFIX) csyr_U.$(PSUFFIX) : zsyr_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER $< -o $(@F)
-csyr_L.$(SUFFIX) csyr_L.$(PSUFFIX) : zsyr_k.c ../../param.h
+csyr_L.$(SUFFIX) csyr_L.$(PSUFFIX) : zsyr_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER $< -o $(@F)
-zsyr_U.$(SUFFIX) zsyr_U.$(PSUFFIX) : zsyr_k.c ../../param.h
+zsyr_U.$(SUFFIX) zsyr_U.$(PSUFFIX) : zsyr_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER $< -o $(@F)
-zsyr_L.$(SUFFIX) zsyr_L.$(PSUFFIX) : zsyr_k.c ../../param.h
+zsyr_L.$(SUFFIX) zsyr_L.$(PSUFFIX) : zsyr_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER $< -o $(@F)
-xsyr_U.$(SUFFIX) xsyr_U.$(PSUFFIX) : zsyr_k.c ../../param.h
+xsyr_U.$(SUFFIX) xsyr_U.$(PSUFFIX) : zsyr_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER $< -o $(@F)
-xsyr_L.$(SUFFIX) xsyr_L.$(PSUFFIX) : zsyr_k.c ../../param.h
+xsyr_L.$(SUFFIX) xsyr_L.$(PSUFFIX) : zsyr_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER $< -o $(@F)
-ssyr2_U.$(SUFFIX) ssyr2_U.$(PSUFFIX) : syr2_k.c ../../param.h
+ssyr2_U.$(SUFFIX) ssyr2_U.$(PSUFFIX) : syr2_k.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -ULOWER $< -o $(@F)
-ssyr2_L.$(SUFFIX) ssyr2_L.$(PSUFFIX) : syr2_k.c ../../param.h
+ssyr2_L.$(SUFFIX) ssyr2_L.$(PSUFFIX) : syr2_k.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DLOWER $< -o $(@F)
-dsyr2_U.$(SUFFIX) dsyr2_U.$(PSUFFIX) : syr2_k.c ../../param.h
+dsyr2_U.$(SUFFIX) dsyr2_U.$(PSUFFIX) : syr2_k.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -ULOWER $< -o $(@F)
-dsyr2_L.$(SUFFIX) dsyr2_L.$(PSUFFIX) : syr2_k.c ../../param.h
+dsyr2_L.$(SUFFIX) dsyr2_L.$(PSUFFIX) : syr2_k.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DLOWER $< -o $(@F)
-qsyr2_U.$(SUFFIX) qsyr2_U.$(PSUFFIX) : syr2_k.c ../../param.h
+qsyr2_U.$(SUFFIX) qsyr2_U.$(PSUFFIX) : syr2_k.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -ULOWER $< -o $(@F)
-qsyr2_L.$(SUFFIX) qsyr2_L.$(PSUFFIX) : syr2_k.c ../../param.h
+qsyr2_L.$(SUFFIX) qsyr2_L.$(PSUFFIX) : syr2_k.c ../../param.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DLOWER $< -o $(@F)
-csyr2_U.$(SUFFIX) csyr2_U.$(PSUFFIX) : zsyr2_k.c ../../param.h
+csyr2_U.$(SUFFIX) csyr2_U.$(PSUFFIX) : zsyr2_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER $< -o $(@F)
-csyr2_L.$(SUFFIX) csyr2_L.$(PSUFFIX) : zsyr2_k.c ../../param.h
+csyr2_L.$(SUFFIX) csyr2_L.$(PSUFFIX) : zsyr2_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER $< -o $(@F)
-zsyr2_U.$(SUFFIX) zsyr2_U.$(PSUFFIX) : zsyr2_k.c ../../param.h
+zsyr2_U.$(SUFFIX) zsyr2_U.$(PSUFFIX) : zsyr2_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER $< -o $(@F)
-zsyr2_L.$(SUFFIX) zsyr2_L.$(PSUFFIX) : zsyr2_k.c ../../param.h
+zsyr2_L.$(SUFFIX) zsyr2_L.$(PSUFFIX) : zsyr2_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER $< -o $(@F)
-xsyr2_U.$(SUFFIX) xsyr2_U.$(PSUFFIX) : zsyr2_k.c ../../param.h
+xsyr2_U.$(SUFFIX) xsyr2_U.$(PSUFFIX) : zsyr2_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER $< -o $(@F)
-xsyr2_L.$(SUFFIX) xsyr2_L.$(PSUFFIX) : zsyr2_k.c ../../param.h
+xsyr2_L.$(SUFFIX) xsyr2_L.$(PSUFFIX) : zsyr2_k.c ../../param.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER $< -o $(@F)
-stbmv_NUU.$(SUFFIX) stbmv_NUU.$(PSUFFIX) : tbmv_U.c ../../common.h
+stbmv_NUU.$(SUFFIX) stbmv_NUU.$(PSUFFIX) : tbmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UTRANSA -DUNIT $< -o $(@F)
-stbmv_NUN.$(SUFFIX) stbmv_NUN.$(PSUFFIX) : tbmv_U.c ../../common.h
+stbmv_NUN.$(SUFFIX) stbmv_NUN.$(PSUFFIX) : tbmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UTRANSA -UUNIT $< -o $(@F)
-stbmv_TLU.$(SUFFIX) stbmv_TLU.$(PSUFFIX) : tbmv_U.c ../../common.h
+stbmv_TLU.$(SUFFIX) stbmv_TLU.$(PSUFFIX) : tbmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DTRANSA -DUNIT $< -o $(@F)
-stbmv_TLN.$(SUFFIX) stbmv_TLN.$(PSUFFIX) : tbmv_U.c ../../common.h
+stbmv_TLN.$(SUFFIX) stbmv_TLN.$(PSUFFIX) : tbmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DTRANSA -UUNIT $< -o $(@F)
-stbmv_NLU.$(SUFFIX) stbmv_NLU.$(PSUFFIX) : tbmv_L.c ../../common.h
+stbmv_NLU.$(SUFFIX) stbmv_NLU.$(PSUFFIX) : tbmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UTRANSA -DUNIT $< -o $(@F)
-stbmv_NLN.$(SUFFIX) stbmv_NLN.$(PSUFFIX) : tbmv_L.c ../../common.h
+stbmv_NLN.$(SUFFIX) stbmv_NLN.$(PSUFFIX) : tbmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UTRANSA -UUNIT $< -o $(@F)
-stbmv_TUU.$(SUFFIX) stbmv_TUU.$(PSUFFIX) : tbmv_L.c ../../common.h
+stbmv_TUU.$(SUFFIX) stbmv_TUU.$(PSUFFIX) : tbmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DTRANSA -DUNIT $< -o $(@F)
-stbmv_TUN.$(SUFFIX) stbmv_TUN.$(PSUFFIX) : tbmv_L.c ../../common.h
+stbmv_TUN.$(SUFFIX) stbmv_TUN.$(PSUFFIX) : tbmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DTRANSA -UUNIT $< -o $(@F)
-dtbmv_NUU.$(SUFFIX) dtbmv_NUU.$(PSUFFIX) : tbmv_U.c ../../common.h
+dtbmv_NUU.$(SUFFIX) dtbmv_NUU.$(PSUFFIX) : tbmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -UTRANSA -DUNIT $< -o $(@F)
-dtbmv_NUN.$(SUFFIX) dtbmv_NUN.$(PSUFFIX) : tbmv_U.c ../../common.h
+dtbmv_NUN.$(SUFFIX) dtbmv_NUN.$(PSUFFIX) : tbmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -UTRANSA -UUNIT $< -o $(@F)
-dtbmv_TLU.$(SUFFIX) dtbmv_TLU.$(PSUFFIX) : tbmv_U.c ../../common.h
+dtbmv_TLU.$(SUFFIX) dtbmv_TLU.$(PSUFFIX) : tbmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DTRANSA -DUNIT $< -o $(@F)
-dtbmv_TLN.$(SUFFIX) dtbmv_TLN.$(PSUFFIX) : tbmv_U.c ../../common.h
+dtbmv_TLN.$(SUFFIX) dtbmv_TLN.$(PSUFFIX) : tbmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DTRANSA -UUNIT $< -o $(@F)
-dtbmv_NLU.$(SUFFIX) dtbmv_NLU.$(PSUFFIX) : tbmv_L.c ../../common.h
+dtbmv_NLU.$(SUFFIX) dtbmv_NLU.$(PSUFFIX) : tbmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -UTRANSA -DUNIT $< -o $(@F)
-dtbmv_NLN.$(SUFFIX) dtbmv_NLN.$(PSUFFIX) : tbmv_L.c ../../common.h
+dtbmv_NLN.$(SUFFIX) dtbmv_NLN.$(PSUFFIX) : tbmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -UTRANSA -UUNIT $< -o $(@F)
-dtbmv_TUU.$(SUFFIX) dtbmv_TUU.$(PSUFFIX) : tbmv_L.c ../../common.h
+dtbmv_TUU.$(SUFFIX) dtbmv_TUU.$(PSUFFIX) : tbmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DTRANSA -DUNIT $< -o $(@F)
-dtbmv_TUN.$(SUFFIX) dtbmv_TUN.$(PSUFFIX) : tbmv_L.c ../../common.h
+dtbmv_TUN.$(SUFFIX) dtbmv_TUN.$(PSUFFIX) : tbmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DTRANSA -UUNIT $< -o $(@F)
-qtbmv_NUU.$(SUFFIX) qtbmv_NUU.$(PSUFFIX) : tbmv_U.c ../../common.h
+qtbmv_NUU.$(SUFFIX) qtbmv_NUU.$(PSUFFIX) : tbmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -UTRANSA -DUNIT $< -o $(@F)
-qtbmv_NUN.$(SUFFIX) qtbmv_NUN.$(PSUFFIX) : tbmv_U.c ../../common.h
+qtbmv_NUN.$(SUFFIX) qtbmv_NUN.$(PSUFFIX) : tbmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -UTRANSA -UUNIT $< -o $(@F)
-qtbmv_TLU.$(SUFFIX) qtbmv_TLU.$(PSUFFIX) : tbmv_U.c ../../common.h
+qtbmv_TLU.$(SUFFIX) qtbmv_TLU.$(PSUFFIX) : tbmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DTRANSA -DUNIT $< -o $(@F)
-qtbmv_TLN.$(SUFFIX) qtbmv_TLN.$(PSUFFIX) : tbmv_U.c ../../common.h
+qtbmv_TLN.$(SUFFIX) qtbmv_TLN.$(PSUFFIX) : tbmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DTRANSA -UUNIT $< -o $(@F)
-qtbmv_NLU.$(SUFFIX) qtbmv_NLU.$(PSUFFIX) : tbmv_L.c ../../common.h
+qtbmv_NLU.$(SUFFIX) qtbmv_NLU.$(PSUFFIX) : tbmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -UTRANSA -DUNIT $< -o $(@F)
-qtbmv_NLN.$(SUFFIX) qtbmv_NLN.$(PSUFFIX) : tbmv_L.c ../../common.h
+qtbmv_NLN.$(SUFFIX) qtbmv_NLN.$(PSUFFIX) : tbmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -UTRANSA -UUNIT $< -o $(@F)
-qtbmv_TUU.$(SUFFIX) qtbmv_TUU.$(PSUFFIX) : tbmv_L.c ../../common.h
+qtbmv_TUU.$(SUFFIX) qtbmv_TUU.$(PSUFFIX) : tbmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DTRANSA -DUNIT $< -o $(@F)
-qtbmv_TUN.$(SUFFIX) qtbmv_TUN.$(PSUFFIX) : tbmv_L.c ../../common.h
+qtbmv_TUN.$(SUFFIX) qtbmv_TUN.$(PSUFFIX) : tbmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DTRANSA -UUNIT $< -o $(@F)
-ctbmv_NUU.$(SUFFIX) ctbmv_NUU.$(PSUFFIX) : ztbmv_U.c ../../common.h
+ctbmv_NUU.$(SUFFIX) ctbmv_NUU.$(PSUFFIX) : ztbmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=1 -DUNIT $< -o $(@F)
-ctbmv_NUN.$(SUFFIX) ctbmv_NUN.$(PSUFFIX) : ztbmv_U.c ../../common.h
+ctbmv_NUN.$(SUFFIX) ctbmv_NUN.$(PSUFFIX) : ztbmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=1 -UUNIT $< -o $(@F)
-ctbmv_TLU.$(SUFFIX) ctbmv_TLU.$(PSUFFIX) : ztbmv_U.c ../../common.h
+ctbmv_TLU.$(SUFFIX) ctbmv_TLU.$(PSUFFIX) : ztbmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=2 -DUNIT $< -o $(@F)
-ctbmv_TLN.$(SUFFIX) ctbmv_TLN.$(PSUFFIX) : ztbmv_U.c ../../common.h
+ctbmv_TLN.$(SUFFIX) ctbmv_TLN.$(PSUFFIX) : ztbmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=2 -UUNIT $< -o $(@F)
-ctbmv_RLU.$(SUFFIX) ctbmv_RLU.$(PSUFFIX) : ztbmv_L.c ../../common.h
+ctbmv_RLU.$(SUFFIX) ctbmv_RLU.$(PSUFFIX) : ztbmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=3 -DUNIT $< -o $(@F)
-ctbmv_RLN.$(SUFFIX) ctbmv_RLN.$(PSUFFIX) : ztbmv_L.c ../../common.h
+ctbmv_RLN.$(SUFFIX) ctbmv_RLN.$(PSUFFIX) : ztbmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=3 -UUNIT $< -o $(@F)
-ctbmv_CLU.$(SUFFIX) ctbmv_CLU.$(PSUFFIX) : ztbmv_U.c ../../common.h
+ctbmv_CLU.$(SUFFIX) ctbmv_CLU.$(PSUFFIX) : ztbmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=4 -DUNIT $< -o $(@F)
-ctbmv_CLN.$(SUFFIX) ctbmv_CLN.$(PSUFFIX) : ztbmv_U.c ../../common.h
+ctbmv_CLN.$(SUFFIX) ctbmv_CLN.$(PSUFFIX) : ztbmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=4 -UUNIT $< -o $(@F)
-ctbmv_NLU.$(SUFFIX) ctbmv_NLU.$(PSUFFIX) : ztbmv_L.c ../../common.h
+ctbmv_NLU.$(SUFFIX) ctbmv_NLU.$(PSUFFIX) : ztbmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=1 -DUNIT $< -o $(@F)
-ctbmv_NLN.$(SUFFIX) ctbmv_NLN.$(PSUFFIX) : ztbmv_L.c ../../common.h
+ctbmv_NLN.$(SUFFIX) ctbmv_NLN.$(PSUFFIX) : ztbmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=1 -UUNIT $< -o $(@F)
-ctbmv_TUU.$(SUFFIX) ctbmv_TUU.$(PSUFFIX) : ztbmv_L.c ../../common.h
+ctbmv_TUU.$(SUFFIX) ctbmv_TUU.$(PSUFFIX) : ztbmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=2 -DUNIT $< -o $(@F)
-ctbmv_TUN.$(SUFFIX) ctbmv_TUN.$(PSUFFIX) : ztbmv_L.c ../../common.h
+ctbmv_TUN.$(SUFFIX) ctbmv_TUN.$(PSUFFIX) : ztbmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=2 -UUNIT $< -o $(@F)
-ctbmv_RUU.$(SUFFIX) ctbmv_RUU.$(PSUFFIX) : ztbmv_U.c ../../common.h
+ctbmv_RUU.$(SUFFIX) ctbmv_RUU.$(PSUFFIX) : ztbmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=3 -DUNIT $< -o $(@F)
-ctbmv_RUN.$(SUFFIX) ctbmv_RUN.$(PSUFFIX) : ztbmv_U.c ../../common.h
+ctbmv_RUN.$(SUFFIX) ctbmv_RUN.$(PSUFFIX) : ztbmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=3 -UUNIT $< -o $(@F)
-ctbmv_CUU.$(SUFFIX) ctbmv_CUU.$(PSUFFIX) : ztbmv_L.c ../../common.h
+ctbmv_CUU.$(SUFFIX) ctbmv_CUU.$(PSUFFIX) : ztbmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=4 -DUNIT $< -o $(@F)
-ctbmv_CUN.$(SUFFIX) ctbmv_CUN.$(PSUFFIX) : ztbmv_L.c ../../common.h
+ctbmv_CUN.$(SUFFIX) ctbmv_CUN.$(PSUFFIX) : ztbmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=4 -UUNIT $< -o $(@F)
-ztbmv_NUU.$(SUFFIX) ztbmv_NUU.$(PSUFFIX) : ztbmv_U.c ../../common.h
+ztbmv_NUU.$(SUFFIX) ztbmv_NUU.$(PSUFFIX) : ztbmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=1 -DUNIT $< -o $(@F)
-ztbmv_NUN.$(SUFFIX) ztbmv_NUN.$(PSUFFIX) : ztbmv_U.c ../../common.h
+ztbmv_NUN.$(SUFFIX) ztbmv_NUN.$(PSUFFIX) : ztbmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=1 -UUNIT $< -o $(@F)
-ztbmv_TLU.$(SUFFIX) ztbmv_TLU.$(PSUFFIX) : ztbmv_U.c ../../common.h
+ztbmv_TLU.$(SUFFIX) ztbmv_TLU.$(PSUFFIX) : ztbmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=2 -DUNIT $< -o $(@F)
-ztbmv_TLN.$(SUFFIX) ztbmv_TLN.$(PSUFFIX) : ztbmv_U.c ../../common.h
+ztbmv_TLN.$(SUFFIX) ztbmv_TLN.$(PSUFFIX) : ztbmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=2 -UUNIT $< -o $(@F)
-ztbmv_RLU.$(SUFFIX) ztbmv_RLU.$(PSUFFIX) : ztbmv_L.c ../../common.h
+ztbmv_RLU.$(SUFFIX) ztbmv_RLU.$(PSUFFIX) : ztbmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=3 -DUNIT $< -o $(@F)
-ztbmv_RLN.$(SUFFIX) ztbmv_RLN.$(PSUFFIX) : ztbmv_L.c ../../common.h
+ztbmv_RLN.$(SUFFIX) ztbmv_RLN.$(PSUFFIX) : ztbmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=3 -UUNIT $< -o $(@F)
-ztbmv_CLU.$(SUFFIX) ztbmv_CLU.$(PSUFFIX) : ztbmv_U.c ../../common.h
+ztbmv_CLU.$(SUFFIX) ztbmv_CLU.$(PSUFFIX) : ztbmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=4 -DUNIT $< -o $(@F)
-ztbmv_CLN.$(SUFFIX) ztbmv_CLN.$(PSUFFIX) : ztbmv_U.c ../../common.h
+ztbmv_CLN.$(SUFFIX) ztbmv_CLN.$(PSUFFIX) : ztbmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=4 -UUNIT $< -o $(@F)
-ztbmv_NLU.$(SUFFIX) ztbmv_NLU.$(PSUFFIX) : ztbmv_L.c ../../common.h
+ztbmv_NLU.$(SUFFIX) ztbmv_NLU.$(PSUFFIX) : ztbmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=1 -DUNIT $< -o $(@F)
-ztbmv_NLN.$(SUFFIX) ztbmv_NLN.$(PSUFFIX) : ztbmv_L.c ../../common.h
+ztbmv_NLN.$(SUFFIX) ztbmv_NLN.$(PSUFFIX) : ztbmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=1 -UUNIT $< -o $(@F)
-ztbmv_TUU.$(SUFFIX) ztbmv_TUU.$(PSUFFIX) : ztbmv_L.c ../../common.h
+ztbmv_TUU.$(SUFFIX) ztbmv_TUU.$(PSUFFIX) : ztbmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=2 -DUNIT $< -o $(@F)
-ztbmv_TUN.$(SUFFIX) ztbmv_TUN.$(PSUFFIX) : ztbmv_L.c ../../common.h
+ztbmv_TUN.$(SUFFIX) ztbmv_TUN.$(PSUFFIX) : ztbmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=2 -UUNIT $< -o $(@F)
-ztbmv_RUU.$(SUFFIX) ztbmv_RUU.$(PSUFFIX) : ztbmv_U.c ../../common.h
+ztbmv_RUU.$(SUFFIX) ztbmv_RUU.$(PSUFFIX) : ztbmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=3 -DUNIT $< -o $(@F)
-ztbmv_RUN.$(SUFFIX) ztbmv_RUN.$(PSUFFIX) : ztbmv_U.c ../../common.h
+ztbmv_RUN.$(SUFFIX) ztbmv_RUN.$(PSUFFIX) : ztbmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=3 -UUNIT $< -o $(@F)
-ztbmv_CUU.$(SUFFIX) ztbmv_CUU.$(PSUFFIX) : ztbmv_L.c ../../common.h
+ztbmv_CUU.$(SUFFIX) ztbmv_CUU.$(PSUFFIX) : ztbmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=4 -DUNIT $< -o $(@F)
-ztbmv_CUN.$(SUFFIX) ztbmv_CUN.$(PSUFFIX) : ztbmv_L.c ../../common.h
+ztbmv_CUN.$(SUFFIX) ztbmv_CUN.$(PSUFFIX) : ztbmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=4 -UUNIT $< -o $(@F)
-xtbmv_NUU.$(SUFFIX) xtbmv_NUU.$(PSUFFIX) : ztbmv_U.c ../../common.h
+xtbmv_NUU.$(SUFFIX) xtbmv_NUU.$(PSUFFIX) : ztbmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=1 -DUNIT $< -o $(@F)
-xtbmv_NUN.$(SUFFIX) xtbmv_NUN.$(PSUFFIX) : ztbmv_U.c ../../common.h
+xtbmv_NUN.$(SUFFIX) xtbmv_NUN.$(PSUFFIX) : ztbmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=1 -UUNIT $< -o $(@F)
-xtbmv_TLU.$(SUFFIX) xtbmv_TLU.$(PSUFFIX) : ztbmv_U.c ../../common.h
+xtbmv_TLU.$(SUFFIX) xtbmv_TLU.$(PSUFFIX) : ztbmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=2 -DUNIT $< -o $(@F)
-xtbmv_TLN.$(SUFFIX) xtbmv_TLN.$(PSUFFIX) : ztbmv_U.c ../../common.h
+xtbmv_TLN.$(SUFFIX) xtbmv_TLN.$(PSUFFIX) : ztbmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=2 -UUNIT $< -o $(@F)
-xtbmv_RLU.$(SUFFIX) xtbmv_RLU.$(PSUFFIX) : ztbmv_L.c ../../common.h
+xtbmv_RLU.$(SUFFIX) xtbmv_RLU.$(PSUFFIX) : ztbmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=3 -DUNIT $< -o $(@F)
-xtbmv_RLN.$(SUFFIX) xtbmv_RLN.$(PSUFFIX) : ztbmv_L.c ../../common.h
+xtbmv_RLN.$(SUFFIX) xtbmv_RLN.$(PSUFFIX) : ztbmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=3 -UUNIT $< -o $(@F)
-xtbmv_CLU.$(SUFFIX) xtbmv_CLU.$(PSUFFIX) : ztbmv_U.c ../../common.h
+xtbmv_CLU.$(SUFFIX) xtbmv_CLU.$(PSUFFIX) : ztbmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=4 -DUNIT $< -o $(@F)
-xtbmv_CLN.$(SUFFIX) xtbmv_CLN.$(PSUFFIX) : ztbmv_U.c ../../common.h
+xtbmv_CLN.$(SUFFIX) xtbmv_CLN.$(PSUFFIX) : ztbmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=4 -UUNIT $< -o $(@F)
-xtbmv_NLU.$(SUFFIX) xtbmv_NLU.$(PSUFFIX) : ztbmv_L.c ../../common.h
+xtbmv_NLU.$(SUFFIX) xtbmv_NLU.$(PSUFFIX) : ztbmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=1 -DUNIT $< -o $(@F)
-xtbmv_NLN.$(SUFFIX) xtbmv_NLN.$(PSUFFIX) : ztbmv_L.c ../../common.h
+xtbmv_NLN.$(SUFFIX) xtbmv_NLN.$(PSUFFIX) : ztbmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=1 -UUNIT $< -o $(@F)
-xtbmv_TUU.$(SUFFIX) xtbmv_TUU.$(PSUFFIX) : ztbmv_L.c ../../common.h
+xtbmv_TUU.$(SUFFIX) xtbmv_TUU.$(PSUFFIX) : ztbmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=2 -DUNIT $< -o $(@F)
-xtbmv_TUN.$(SUFFIX) xtbmv_TUN.$(PSUFFIX) : ztbmv_L.c ../../common.h
+xtbmv_TUN.$(SUFFIX) xtbmv_TUN.$(PSUFFIX) : ztbmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=2 -UUNIT $< -o $(@F)
-xtbmv_RUU.$(SUFFIX) xtbmv_RUU.$(PSUFFIX) : ztbmv_U.c ../../common.h
+xtbmv_RUU.$(SUFFIX) xtbmv_RUU.$(PSUFFIX) : ztbmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=3 -DUNIT $< -o $(@F)
-xtbmv_RUN.$(SUFFIX) xtbmv_RUN.$(PSUFFIX) : ztbmv_U.c ../../common.h
+xtbmv_RUN.$(SUFFIX) xtbmv_RUN.$(PSUFFIX) : ztbmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=3 -UUNIT $< -o $(@F)
-xtbmv_CUU.$(SUFFIX) xtbmv_CUU.$(PSUFFIX) : ztbmv_L.c ../../common.h
+xtbmv_CUU.$(SUFFIX) xtbmv_CUU.$(PSUFFIX) : ztbmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=4 -DUNIT $< -o $(@F)
-xtbmv_CUN.$(SUFFIX) xtbmv_CUN.$(PSUFFIX) : ztbmv_L.c ../../common.h
+xtbmv_CUN.$(SUFFIX) xtbmv_CUN.$(PSUFFIX) : ztbmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=4 -UUNIT $< -o $(@F)
-stbmv_thread_NUU.$(SUFFIX) stbmv_thread_NUU.$(PSUFFIX) : tbmv_thread.c ../../common.h
+stbmv_thread_NUU.$(SUFFIX) stbmv_thread_NUU.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -ULOWER -UTRANSA -DUNIT $< -o $(@F)
-stbmv_thread_NUN.$(SUFFIX) stbmv_thread_NUN.$(PSUFFIX) : tbmv_thread.c ../../common.h
+stbmv_thread_NUN.$(SUFFIX) stbmv_thread_NUN.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -ULOWER -UTRANSA -UUNIT $< -o $(@F)
-stbmv_thread_TLU.$(SUFFIX) stbmv_thread_TLU.$(PSUFFIX) : tbmv_thread.c ../../common.h
+stbmv_thread_TLU.$(SUFFIX) stbmv_thread_TLU.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DLOWER -DTRANSA -DUNIT $< -o $(@F)
-stbmv_thread_TLN.$(SUFFIX) stbmv_thread_TLN.$(PSUFFIX) : tbmv_thread.c ../../common.h
+stbmv_thread_TLN.$(SUFFIX) stbmv_thread_TLN.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DLOWER -DTRANSA -UUNIT $< -o $(@F)
-stbmv_thread_NLU.$(SUFFIX) stbmv_thread_NLU.$(PSUFFIX) : tbmv_thread.c ../../common.h
+stbmv_thread_NLU.$(SUFFIX) stbmv_thread_NLU.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DLOWER -UTRANSA -DUNIT $< -o $(@F)
-stbmv_thread_NLN.$(SUFFIX) stbmv_thread_NLN.$(PSUFFIX) : tbmv_thread.c ../../common.h
+stbmv_thread_NLN.$(SUFFIX) stbmv_thread_NLN.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DLOWER -UTRANSA -UUNIT $< -o $(@F)
-stbmv_thread_TUU.$(SUFFIX) stbmv_thread_TUU.$(PSUFFIX) : tbmv_thread.c ../../common.h
+stbmv_thread_TUU.$(SUFFIX) stbmv_thread_TUU.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -ULOWER -DTRANSA -DUNIT $< -o $(@F)
-stbmv_thread_TUN.$(SUFFIX) stbmv_thread_TUN.$(PSUFFIX) : tbmv_thread.c ../../common.h
+stbmv_thread_TUN.$(SUFFIX) stbmv_thread_TUN.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -ULOWER -DTRANSA -UUNIT $< -o $(@F)
-dtbmv_thread_NUU.$(SUFFIX) dtbmv_thread_NUU.$(PSUFFIX) : tbmv_thread.c ../../common.h
+dtbmv_thread_NUU.$(SUFFIX) dtbmv_thread_NUU.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -ULOWER -UTRANSA -DUNIT $< -o $(@F)
-dtbmv_thread_NUN.$(SUFFIX) dtbmv_thread_NUN.$(PSUFFIX) : tbmv_thread.c ../../common.h
+dtbmv_thread_NUN.$(SUFFIX) dtbmv_thread_NUN.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -ULOWER -UTRANSA -UUNIT $< -o $(@F)
-dtbmv_thread_TLU.$(SUFFIX) dtbmv_thread_TLU.$(PSUFFIX) : tbmv_thread.c ../../common.h
+dtbmv_thread_TLU.$(SUFFIX) dtbmv_thread_TLU.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DLOWER -DTRANSA -DUNIT $< -o $(@F)
-dtbmv_thread_TLN.$(SUFFIX) dtbmv_thread_TLN.$(PSUFFIX) : tbmv_thread.c ../../common.h
+dtbmv_thread_TLN.$(SUFFIX) dtbmv_thread_TLN.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DLOWER -DTRANSA -UUNIT $< -o $(@F)
-dtbmv_thread_NLU.$(SUFFIX) dtbmv_thread_NLU.$(PSUFFIX) : tbmv_thread.c ../../common.h
+dtbmv_thread_NLU.$(SUFFIX) dtbmv_thread_NLU.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DLOWER -UTRANSA -DUNIT $< -o $(@F)
-dtbmv_thread_NLN.$(SUFFIX) dtbmv_thread_NLN.$(PSUFFIX) : tbmv_thread.c ../../common.h
+dtbmv_thread_NLN.$(SUFFIX) dtbmv_thread_NLN.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DLOWER -UTRANSA -UUNIT $< -o $(@F)
-dtbmv_thread_TUU.$(SUFFIX) dtbmv_thread_TUU.$(PSUFFIX) : tbmv_thread.c ../../common.h
+dtbmv_thread_TUU.$(SUFFIX) dtbmv_thread_TUU.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -ULOWER -DTRANSA -DUNIT $< -o $(@F)
-dtbmv_thread_TUN.$(SUFFIX) dtbmv_thread_TUN.$(PSUFFIX) : tbmv_thread.c ../../common.h
+dtbmv_thread_TUN.$(SUFFIX) dtbmv_thread_TUN.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -ULOWER -DTRANSA -UUNIT $< -o $(@F)
-qtbmv_thread_NUU.$(SUFFIX) qtbmv_thread_NUU.$(PSUFFIX) : tbmv_thread.c ../../common.h
+qtbmv_thread_NUU.$(SUFFIX) qtbmv_thread_NUU.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -ULOWER -UTRANSA -DUNIT $< -o $(@F)
-qtbmv_thread_NUN.$(SUFFIX) qtbmv_thread_NUN.$(PSUFFIX) : tbmv_thread.c ../../common.h
+qtbmv_thread_NUN.$(SUFFIX) qtbmv_thread_NUN.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -ULOWER -UTRANSA -UUNIT $< -o $(@F)
-qtbmv_thread_TLU.$(SUFFIX) qtbmv_thread_TLU.$(PSUFFIX) : tbmv_thread.c ../../common.h
+qtbmv_thread_TLU.$(SUFFIX) qtbmv_thread_TLU.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DLOWER -DTRANSA -DUNIT $< -o $(@F)
-qtbmv_thread_TLN.$(SUFFIX) qtbmv_thread_TLN.$(PSUFFIX) : tbmv_thread.c ../../common.h
+qtbmv_thread_TLN.$(SUFFIX) qtbmv_thread_TLN.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DLOWER -DTRANSA -UUNIT $< -o $(@F)
-qtbmv_thread_NLU.$(SUFFIX) qtbmv_thread_NLU.$(PSUFFIX) : tbmv_thread.c ../../common.h
+qtbmv_thread_NLU.$(SUFFIX) qtbmv_thread_NLU.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DLOWER -UTRANSA -DUNIT $< -o $(@F)
-qtbmv_thread_NLN.$(SUFFIX) qtbmv_thread_NLN.$(PSUFFIX) : tbmv_thread.c ../../common.h
+qtbmv_thread_NLN.$(SUFFIX) qtbmv_thread_NLN.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DLOWER -UTRANSA -UUNIT $< -o $(@F)
-qtbmv_thread_TUU.$(SUFFIX) qtbmv_thread_TUU.$(PSUFFIX) : tbmv_thread.c ../../common.h
+qtbmv_thread_TUU.$(SUFFIX) qtbmv_thread_TUU.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -ULOWER -DTRANSA -DUNIT $< -o $(@F)
-qtbmv_thread_TUN.$(SUFFIX) qtbmv_thread_TUN.$(PSUFFIX) : tbmv_thread.c ../../common.h
+qtbmv_thread_TUN.$(SUFFIX) qtbmv_thread_TUN.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -ULOWER -DTRANSA -UUNIT $< -o $(@F)
-ctbmv_thread_NUU.$(SUFFIX) ctbmv_thread_NUU.$(PSUFFIX) : tbmv_thread.c ../../common.h
+ctbmv_thread_NUU.$(SUFFIX) ctbmv_thread_NUU.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER -DTRANSA=1 -DUNIT $< -o $(@F)
-ctbmv_thread_NUN.$(SUFFIX) ctbmv_thread_NUN.$(PSUFFIX) : tbmv_thread.c ../../common.h
+ctbmv_thread_NUN.$(SUFFIX) ctbmv_thread_NUN.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER -DTRANSA=1 -UUNIT $< -o $(@F)
-ctbmv_thread_TLU.$(SUFFIX) ctbmv_thread_TLU.$(PSUFFIX) : tbmv_thread.c ../../common.h
+ctbmv_thread_TLU.$(SUFFIX) ctbmv_thread_TLU.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER -DTRANSA=2 -DUNIT $< -o $(@F)
-ctbmv_thread_TLN.$(SUFFIX) ctbmv_thread_TLN.$(PSUFFIX) : tbmv_thread.c ../../common.h
+ctbmv_thread_TLN.$(SUFFIX) ctbmv_thread_TLN.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER -DTRANSA=2 -UUNIT $< -o $(@F)
-ctbmv_thread_RLU.$(SUFFIX) ctbmv_thread_RLU.$(PSUFFIX) : tbmv_thread.c ../../common.h
+ctbmv_thread_RLU.$(SUFFIX) ctbmv_thread_RLU.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER -DTRANSA=3 -DUNIT $< -o $(@F)
-ctbmv_thread_RLN.$(SUFFIX) ctbmv_thread_RLN.$(PSUFFIX) : tbmv_thread.c ../../common.h
+ctbmv_thread_RLN.$(SUFFIX) ctbmv_thread_RLN.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER -DTRANSA=3 -UUNIT $< -o $(@F)
-ctbmv_thread_CLU.$(SUFFIX) ctbmv_thread_CLU.$(PSUFFIX) : tbmv_thread.c ../../common.h
+ctbmv_thread_CLU.$(SUFFIX) ctbmv_thread_CLU.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER -DTRANSA=4 -DUNIT $< -o $(@F)
-ctbmv_thread_CLN.$(SUFFIX) ctbmv_thread_CLN.$(PSUFFIX) : tbmv_thread.c ../../common.h
+ctbmv_thread_CLN.$(SUFFIX) ctbmv_thread_CLN.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER -DTRANSA=4 -UUNIT $< -o $(@F)
-ctbmv_thread_NLU.$(SUFFIX) ctbmv_thread_NLU.$(PSUFFIX) : tbmv_thread.c ../../common.h
+ctbmv_thread_NLU.$(SUFFIX) ctbmv_thread_NLU.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER -DTRANSA=1 -DUNIT $< -o $(@F)
-ctbmv_thread_NLN.$(SUFFIX) ctbmv_thread_NLN.$(PSUFFIX) : tbmv_thread.c ../../common.h
+ctbmv_thread_NLN.$(SUFFIX) ctbmv_thread_NLN.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER -DTRANSA=1 -UUNIT $< -o $(@F)
-ctbmv_thread_TUU.$(SUFFIX) ctbmv_thread_TUU.$(PSUFFIX) : tbmv_thread.c ../../common.h
+ctbmv_thread_TUU.$(SUFFIX) ctbmv_thread_TUU.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER -DTRANSA=2 -DUNIT $< -o $(@F)
-ctbmv_thread_TUN.$(SUFFIX) ctbmv_thread_TUN.$(PSUFFIX) : tbmv_thread.c ../../common.h
+ctbmv_thread_TUN.$(SUFFIX) ctbmv_thread_TUN.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER -DTRANSA=2 -UUNIT $< -o $(@F)
-ctbmv_thread_RUU.$(SUFFIX) ctbmv_thread_RUU.$(PSUFFIX) : tbmv_thread.c ../../common.h
+ctbmv_thread_RUU.$(SUFFIX) ctbmv_thread_RUU.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER -DTRANSA=3 -DUNIT $< -o $(@F)
-ctbmv_thread_RUN.$(SUFFIX) ctbmv_thread_RUN.$(PSUFFIX) : tbmv_thread.c ../../common.h
+ctbmv_thread_RUN.$(SUFFIX) ctbmv_thread_RUN.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER -DTRANSA=3 -UUNIT $< -o $(@F)
-ctbmv_thread_CUU.$(SUFFIX) ctbmv_thread_CUU.$(PSUFFIX) : tbmv_thread.c ../../common.h
+ctbmv_thread_CUU.$(SUFFIX) ctbmv_thread_CUU.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER -DTRANSA=4 -DUNIT $< -o $(@F)
-ctbmv_thread_CUN.$(SUFFIX) ctbmv_thread_CUN.$(PSUFFIX) : tbmv_thread.c ../../common.h
+ctbmv_thread_CUN.$(SUFFIX) ctbmv_thread_CUN.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER -DTRANSA=4 -UUNIT $< -o $(@F)
-ztbmv_thread_NUU.$(SUFFIX) ztbmv_thread_NUU.$(PSUFFIX) : tbmv_thread.c ../../common.h
+ztbmv_thread_NUU.$(SUFFIX) ztbmv_thread_NUU.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER -DTRANSA=1 -DUNIT $< -o $(@F)
-ztbmv_thread_NUN.$(SUFFIX) ztbmv_thread_NUN.$(PSUFFIX) : tbmv_thread.c ../../common.h
+ztbmv_thread_NUN.$(SUFFIX) ztbmv_thread_NUN.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER -DTRANSA=1 -UUNIT $< -o $(@F)
-ztbmv_thread_TLU.$(SUFFIX) ztbmv_thread_TLU.$(PSUFFIX) : tbmv_thread.c ../../common.h
+ztbmv_thread_TLU.$(SUFFIX) ztbmv_thread_TLU.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER -DTRANSA=2 -DUNIT $< -o $(@F)
-ztbmv_thread_TLN.$(SUFFIX) ztbmv_thread_TLN.$(PSUFFIX) : tbmv_thread.c ../../common.h
+ztbmv_thread_TLN.$(SUFFIX) ztbmv_thread_TLN.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER -DTRANSA=2 -UUNIT $< -o $(@F)
-ztbmv_thread_RLU.$(SUFFIX) ztbmv_thread_RLU.$(PSUFFIX) : tbmv_thread.c ../../common.h
+ztbmv_thread_RLU.$(SUFFIX) ztbmv_thread_RLU.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER -DTRANSA=3 -DUNIT $< -o $(@F)
-ztbmv_thread_RLN.$(SUFFIX) ztbmv_thread_RLN.$(PSUFFIX) : tbmv_thread.c ../../common.h
+ztbmv_thread_RLN.$(SUFFIX) ztbmv_thread_RLN.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER -DTRANSA=3 -UUNIT $< -o $(@F)
-ztbmv_thread_CLU.$(SUFFIX) ztbmv_thread_CLU.$(PSUFFIX) : tbmv_thread.c ../../common.h
+ztbmv_thread_CLU.$(SUFFIX) ztbmv_thread_CLU.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER -DTRANSA=4 -DUNIT $< -o $(@F)
-ztbmv_thread_CLN.$(SUFFIX) ztbmv_thread_CLN.$(PSUFFIX) : tbmv_thread.c ../../common.h
+ztbmv_thread_CLN.$(SUFFIX) ztbmv_thread_CLN.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER -DTRANSA=4 -UUNIT $< -o $(@F)
-ztbmv_thread_NLU.$(SUFFIX) ztbmv_thread_NLU.$(PSUFFIX) : tbmv_thread.c ../../common.h
+ztbmv_thread_NLU.$(SUFFIX) ztbmv_thread_NLU.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER -DTRANSA=1 -DUNIT $< -o $(@F)
-ztbmv_thread_NLN.$(SUFFIX) ztbmv_thread_NLN.$(PSUFFIX) : tbmv_thread.c ../../common.h
+ztbmv_thread_NLN.$(SUFFIX) ztbmv_thread_NLN.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER -DTRANSA=1 -UUNIT $< -o $(@F)
-ztbmv_thread_TUU.$(SUFFIX) ztbmv_thread_TUU.$(PSUFFIX) : tbmv_thread.c ../../common.h
+ztbmv_thread_TUU.$(SUFFIX) ztbmv_thread_TUU.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER -DTRANSA=2 -DUNIT $< -o $(@F)
-ztbmv_thread_TUN.$(SUFFIX) ztbmv_thread_TUN.$(PSUFFIX) : tbmv_thread.c ../../common.h
+ztbmv_thread_TUN.$(SUFFIX) ztbmv_thread_TUN.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER -DTRANSA=2 -UUNIT $< -o $(@F)
-ztbmv_thread_RUU.$(SUFFIX) ztbmv_thread_RUU.$(PSUFFIX) : tbmv_thread.c ../../common.h
+ztbmv_thread_RUU.$(SUFFIX) ztbmv_thread_RUU.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER -DTRANSA=3 -DUNIT $< -o $(@F)
-ztbmv_thread_RUN.$(SUFFIX) ztbmv_thread_RUN.$(PSUFFIX) : tbmv_thread.c ../../common.h
+ztbmv_thread_RUN.$(SUFFIX) ztbmv_thread_RUN.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER -DTRANSA=3 -UUNIT $< -o $(@F)
-ztbmv_thread_CUU.$(SUFFIX) ztbmv_thread_CUU.$(PSUFFIX) : tbmv_thread.c ../../common.h
+ztbmv_thread_CUU.$(SUFFIX) ztbmv_thread_CUU.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER -DTRANSA=4 -DUNIT $< -o $(@F)
-ztbmv_thread_CUN.$(SUFFIX) ztbmv_thread_CUN.$(PSUFFIX) : tbmv_thread.c ../../common.h
+ztbmv_thread_CUN.$(SUFFIX) ztbmv_thread_CUN.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER -DTRANSA=4 -UUNIT $< -o $(@F)
-xtbmv_thread_NUU.$(SUFFIX) xtbmv_thread_NUU.$(PSUFFIX) : tbmv_thread.c ../../common.h
+xtbmv_thread_NUU.$(SUFFIX) xtbmv_thread_NUU.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER -DTRANSA=1 -DUNIT $< -o $(@F)
-xtbmv_thread_NUN.$(SUFFIX) xtbmv_thread_NUN.$(PSUFFIX) : tbmv_thread.c ../../common.h
+xtbmv_thread_NUN.$(SUFFIX) xtbmv_thread_NUN.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER -DTRANSA=1 -UUNIT $< -o $(@F)
-xtbmv_thread_TLU.$(SUFFIX) xtbmv_thread_TLU.$(PSUFFIX) : tbmv_thread.c ../../common.h
+xtbmv_thread_TLU.$(SUFFIX) xtbmv_thread_TLU.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER -DTRANSA=2 -DUNIT $< -o $(@F)
-xtbmv_thread_TLN.$(SUFFIX) xtbmv_thread_TLN.$(PSUFFIX) : tbmv_thread.c ../../common.h
+xtbmv_thread_TLN.$(SUFFIX) xtbmv_thread_TLN.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER -DTRANSA=2 -UUNIT $< -o $(@F)
-xtbmv_thread_RLU.$(SUFFIX) xtbmv_thread_RLU.$(PSUFFIX) : tbmv_thread.c ../../common.h
+xtbmv_thread_RLU.$(SUFFIX) xtbmv_thread_RLU.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER -DTRANSA=3 -DUNIT $< -o $(@F)
-xtbmv_thread_RLN.$(SUFFIX) xtbmv_thread_RLN.$(PSUFFIX) : tbmv_thread.c ../../common.h
+xtbmv_thread_RLN.$(SUFFIX) xtbmv_thread_RLN.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER -DTRANSA=3 -UUNIT $< -o $(@F)
-xtbmv_thread_CLU.$(SUFFIX) xtbmv_thread_CLU.$(PSUFFIX) : tbmv_thread.c ../../common.h
+xtbmv_thread_CLU.$(SUFFIX) xtbmv_thread_CLU.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER -DTRANSA=4 -DUNIT $< -o $(@F)
-xtbmv_thread_CLN.$(SUFFIX) xtbmv_thread_CLN.$(PSUFFIX) : tbmv_thread.c ../../common.h
+xtbmv_thread_CLN.$(SUFFIX) xtbmv_thread_CLN.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER -DTRANSA=4 -UUNIT $< -o $(@F)
-xtbmv_thread_NLU.$(SUFFIX) xtbmv_thread_NLU.$(PSUFFIX) : tbmv_thread.c ../../common.h
+xtbmv_thread_NLU.$(SUFFIX) xtbmv_thread_NLU.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER -DTRANSA=1 -DUNIT $< -o $(@F)
-xtbmv_thread_NLN.$(SUFFIX) xtbmv_thread_NLN.$(PSUFFIX) : tbmv_thread.c ../../common.h
+xtbmv_thread_NLN.$(SUFFIX) xtbmv_thread_NLN.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER -DTRANSA=1 -UUNIT $< -o $(@F)
-xtbmv_thread_TUU.$(SUFFIX) xtbmv_thread_TUU.$(PSUFFIX) : tbmv_thread.c ../../common.h
+xtbmv_thread_TUU.$(SUFFIX) xtbmv_thread_TUU.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER -DTRANSA=2 -DUNIT $< -o $(@F)
-xtbmv_thread_TUN.$(SUFFIX) xtbmv_thread_TUN.$(PSUFFIX) : tbmv_thread.c ../../common.h
+xtbmv_thread_TUN.$(SUFFIX) xtbmv_thread_TUN.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER -DTRANSA=2 -UUNIT $< -o $(@F)
-xtbmv_thread_RUU.$(SUFFIX) xtbmv_thread_RUU.$(PSUFFIX) : tbmv_thread.c ../../common.h
+xtbmv_thread_RUU.$(SUFFIX) xtbmv_thread_RUU.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER -DTRANSA=3 -DUNIT $< -o $(@F)
-xtbmv_thread_RUN.$(SUFFIX) xtbmv_thread_RUN.$(PSUFFIX) : tbmv_thread.c ../../common.h
+xtbmv_thread_RUN.$(SUFFIX) xtbmv_thread_RUN.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER -DTRANSA=3 -UUNIT $< -o $(@F)
-xtbmv_thread_CUU.$(SUFFIX) xtbmv_thread_CUU.$(PSUFFIX) : tbmv_thread.c ../../common.h
+xtbmv_thread_CUU.$(SUFFIX) xtbmv_thread_CUU.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER -DTRANSA=4 -DUNIT $< -o $(@F)
-xtbmv_thread_CUN.$(SUFFIX) xtbmv_thread_CUN.$(PSUFFIX) : tbmv_thread.c ../../common.h
+xtbmv_thread_CUN.$(SUFFIX) xtbmv_thread_CUN.$(PSUFFIX) : tbmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER -DTRANSA=4 -UUNIT $< -o $(@F)
-stbsv_NUU.$(SUFFIX) stbsv_NUU.$(PSUFFIX) : tbsv_U.c ../../common.h
+stbsv_NUU.$(SUFFIX) stbsv_NUU.$(PSUFFIX) : tbsv_U.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UTRANSA -DUNIT $< -o $(@F)
-stbsv_NUN.$(SUFFIX) stbsv_NUN.$(PSUFFIX) : tbsv_U.c ../../common.h
+stbsv_NUN.$(SUFFIX) stbsv_NUN.$(PSUFFIX) : tbsv_U.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UTRANSA -UUNIT $< -o $(@F)
-stbsv_TLU.$(SUFFIX) stbsv_TLU.$(PSUFFIX) : tbsv_U.c ../../common.h
+stbsv_TLU.$(SUFFIX) stbsv_TLU.$(PSUFFIX) : tbsv_U.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DTRANSA -DUNIT $< -o $(@F)
-stbsv_TLN.$(SUFFIX) stbsv_TLN.$(PSUFFIX) : tbsv_U.c ../../common.h
+stbsv_TLN.$(SUFFIX) stbsv_TLN.$(PSUFFIX) : tbsv_U.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DTRANSA -UUNIT $< -o $(@F)
-stbsv_NLU.$(SUFFIX) stbsv_NLU.$(PSUFFIX) : tbsv_L.c ../../common.h
+stbsv_NLU.$(SUFFIX) stbsv_NLU.$(PSUFFIX) : tbsv_L.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UTRANSA -DUNIT $< -o $(@F)
-stbsv_NLN.$(SUFFIX) stbsv_NLN.$(PSUFFIX) : tbsv_L.c ../../common.h
+stbsv_NLN.$(SUFFIX) stbsv_NLN.$(PSUFFIX) : tbsv_L.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UTRANSA -UUNIT $< -o $(@F)
-stbsv_TUU.$(SUFFIX) stbsv_TUU.$(PSUFFIX) : tbsv_L.c ../../common.h
+stbsv_TUU.$(SUFFIX) stbsv_TUU.$(PSUFFIX) : tbsv_L.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DTRANSA -DUNIT $< -o $(@F)
-stbsv_TUN.$(SUFFIX) stbsv_TUN.$(PSUFFIX) : tbsv_L.c ../../common.h
+stbsv_TUN.$(SUFFIX) stbsv_TUN.$(PSUFFIX) : tbsv_L.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DTRANSA -UUNIT $< -o $(@F)
-dtbsv_NUU.$(SUFFIX) dtbsv_NUU.$(PSUFFIX) : tbsv_U.c ../../common.h
+dtbsv_NUU.$(SUFFIX) dtbsv_NUU.$(PSUFFIX) : tbsv_U.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -UTRANSA -DUNIT $< -o $(@F)
-dtbsv_NUN.$(SUFFIX) dtbsv_NUN.$(PSUFFIX) : tbsv_U.c ../../common.h
+dtbsv_NUN.$(SUFFIX) dtbsv_NUN.$(PSUFFIX) : tbsv_U.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -UTRANSA -UUNIT $< -o $(@F)
-dtbsv_TLU.$(SUFFIX) dtbsv_TLU.$(PSUFFIX) : tbsv_U.c ../../common.h
+dtbsv_TLU.$(SUFFIX) dtbsv_TLU.$(PSUFFIX) : tbsv_U.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DTRANSA -DUNIT $< -o $(@F)
-dtbsv_TLN.$(SUFFIX) dtbsv_TLN.$(PSUFFIX) : tbsv_U.c ../../common.h
+dtbsv_TLN.$(SUFFIX) dtbsv_TLN.$(PSUFFIX) : tbsv_U.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DTRANSA -UUNIT $< -o $(@F)
-dtbsv_NLU.$(SUFFIX) dtbsv_NLU.$(PSUFFIX) : tbsv_L.c ../../common.h
+dtbsv_NLU.$(SUFFIX) dtbsv_NLU.$(PSUFFIX) : tbsv_L.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -UTRANSA -DUNIT $< -o $(@F)
-dtbsv_NLN.$(SUFFIX) dtbsv_NLN.$(PSUFFIX) : tbsv_L.c ../../common.h
+dtbsv_NLN.$(SUFFIX) dtbsv_NLN.$(PSUFFIX) : tbsv_L.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -UTRANSA -UUNIT $< -o $(@F)
-dtbsv_TUU.$(SUFFIX) dtbsv_TUU.$(PSUFFIX) : tbsv_L.c ../../common.h
+dtbsv_TUU.$(SUFFIX) dtbsv_TUU.$(PSUFFIX) : tbsv_L.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DTRANSA -DUNIT $< -o $(@F)
-dtbsv_TUN.$(SUFFIX) dtbsv_TUN.$(PSUFFIX) : tbsv_L.c ../../common.h
+dtbsv_TUN.$(SUFFIX) dtbsv_TUN.$(PSUFFIX) : tbsv_L.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DTRANSA -UUNIT $< -o $(@F)
-qtbsv_NUU.$(SUFFIX) qtbsv_NUU.$(PSUFFIX) : tbsv_U.c ../../common.h
+qtbsv_NUU.$(SUFFIX) qtbsv_NUU.$(PSUFFIX) : tbsv_U.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -UTRANSA -DUNIT $< -o $(@F)
-qtbsv_NUN.$(SUFFIX) qtbsv_NUN.$(PSUFFIX) : tbsv_U.c ../../common.h
+qtbsv_NUN.$(SUFFIX) qtbsv_NUN.$(PSUFFIX) : tbsv_U.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -UTRANSA -UUNIT $< -o $(@F)
-qtbsv_TLU.$(SUFFIX) qtbsv_TLU.$(PSUFFIX) : tbsv_U.c ../../common.h
+qtbsv_TLU.$(SUFFIX) qtbsv_TLU.$(PSUFFIX) : tbsv_U.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DTRANSA -DUNIT $< -o $(@F)
-qtbsv_TLN.$(SUFFIX) qtbsv_TLN.$(PSUFFIX) : tbsv_U.c ../../common.h
+qtbsv_TLN.$(SUFFIX) qtbsv_TLN.$(PSUFFIX) : tbsv_U.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DTRANSA -UUNIT $< -o $(@F)
-qtbsv_NLU.$(SUFFIX) qtbsv_NLU.$(PSUFFIX) : tbsv_L.c ../../common.h
+qtbsv_NLU.$(SUFFIX) qtbsv_NLU.$(PSUFFIX) : tbsv_L.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -UTRANSA -DUNIT $< -o $(@F)
-qtbsv_NLN.$(SUFFIX) qtbsv_NLN.$(PSUFFIX) : tbsv_L.c ../../common.h
+qtbsv_NLN.$(SUFFIX) qtbsv_NLN.$(PSUFFIX) : tbsv_L.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -UTRANSA -UUNIT $< -o $(@F)
-qtbsv_TUU.$(SUFFIX) qtbsv_TUU.$(PSUFFIX) : tbsv_L.c ../../common.h
+qtbsv_TUU.$(SUFFIX) qtbsv_TUU.$(PSUFFIX) : tbsv_L.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DTRANSA -DUNIT $< -o $(@F)
-qtbsv_TUN.$(SUFFIX) qtbsv_TUN.$(PSUFFIX) : tbsv_L.c ../../common.h
+qtbsv_TUN.$(SUFFIX) qtbsv_TUN.$(PSUFFIX) : tbsv_L.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DTRANSA -UUNIT $< -o $(@F)
-ctbsv_NUU.$(SUFFIX) ctbsv_NUU.$(PSUFFIX) : ztbsv_U.c ../../common.h
+ctbsv_NUU.$(SUFFIX) ctbsv_NUU.$(PSUFFIX) : ztbsv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=1 -DUNIT $< -o $(@F)
-ctbsv_NUN.$(SUFFIX) ctbsv_NUN.$(PSUFFIX) : ztbsv_U.c ../../common.h
+ctbsv_NUN.$(SUFFIX) ctbsv_NUN.$(PSUFFIX) : ztbsv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=1 -UUNIT $< -o $(@F)
-ctbsv_TLU.$(SUFFIX) ctbsv_TLU.$(PSUFFIX) : ztbsv_U.c ../../common.h
+ctbsv_TLU.$(SUFFIX) ctbsv_TLU.$(PSUFFIX) : ztbsv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=2 -DUNIT $< -o $(@F)
-ctbsv_TLN.$(SUFFIX) ctbsv_TLN.$(PSUFFIX) : ztbsv_U.c ../../common.h
+ctbsv_TLN.$(SUFFIX) ctbsv_TLN.$(PSUFFIX) : ztbsv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=2 -UUNIT $< -o $(@F)
-ctbsv_RLU.$(SUFFIX) ctbsv_RLU.$(PSUFFIX) : ztbsv_L.c ../../common.h
+ctbsv_RLU.$(SUFFIX) ctbsv_RLU.$(PSUFFIX) : ztbsv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=3 -DUNIT $< -o $(@F)
-ctbsv_RLN.$(SUFFIX) ctbsv_RLN.$(PSUFFIX) : ztbsv_L.c ../../common.h
+ctbsv_RLN.$(SUFFIX) ctbsv_RLN.$(PSUFFIX) : ztbsv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=3 -UUNIT $< -o $(@F)
-ctbsv_CLU.$(SUFFIX) ctbsv_CLU.$(PSUFFIX) : ztbsv_U.c ../../common.h
+ctbsv_CLU.$(SUFFIX) ctbsv_CLU.$(PSUFFIX) : ztbsv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=4 -DUNIT $< -o $(@F)
-ctbsv_CLN.$(SUFFIX) ctbsv_CLN.$(PSUFFIX) : ztbsv_U.c ../../common.h
+ctbsv_CLN.$(SUFFIX) ctbsv_CLN.$(PSUFFIX) : ztbsv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=4 -UUNIT $< -o $(@F)
-ctbsv_NLU.$(SUFFIX) ctbsv_NLU.$(PSUFFIX) : ztbsv_L.c ../../common.h
+ctbsv_NLU.$(SUFFIX) ctbsv_NLU.$(PSUFFIX) : ztbsv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=1 -DUNIT $< -o $(@F)
-ctbsv_NLN.$(SUFFIX) ctbsv_NLN.$(PSUFFIX) : ztbsv_L.c ../../common.h
+ctbsv_NLN.$(SUFFIX) ctbsv_NLN.$(PSUFFIX) : ztbsv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=1 -UUNIT $< -o $(@F)
-ctbsv_TUU.$(SUFFIX) ctbsv_TUU.$(PSUFFIX) : ztbsv_L.c ../../common.h
+ctbsv_TUU.$(SUFFIX) ctbsv_TUU.$(PSUFFIX) : ztbsv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=2 -DUNIT $< -o $(@F)
-ctbsv_TUN.$(SUFFIX) ctbsv_TUN.$(PSUFFIX) : ztbsv_L.c ../../common.h
+ctbsv_TUN.$(SUFFIX) ctbsv_TUN.$(PSUFFIX) : ztbsv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=2 -UUNIT $< -o $(@F)
-ctbsv_RUU.$(SUFFIX) ctbsv_RUU.$(PSUFFIX) : ztbsv_U.c ../../common.h
+ctbsv_RUU.$(SUFFIX) ctbsv_RUU.$(PSUFFIX) : ztbsv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=3 -DUNIT $< -o $(@F)
-ctbsv_RUN.$(SUFFIX) ctbsv_RUN.$(PSUFFIX) : ztbsv_U.c ../../common.h
+ctbsv_RUN.$(SUFFIX) ctbsv_RUN.$(PSUFFIX) : ztbsv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=3 -UUNIT $< -o $(@F)
-ctbsv_CUU.$(SUFFIX) ctbsv_CUU.$(PSUFFIX) : ztbsv_L.c ../../common.h
+ctbsv_CUU.$(SUFFIX) ctbsv_CUU.$(PSUFFIX) : ztbsv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=4 -DUNIT $< -o $(@F)
-ctbsv_CUN.$(SUFFIX) ctbsv_CUN.$(PSUFFIX) : ztbsv_L.c ../../common.h
+ctbsv_CUN.$(SUFFIX) ctbsv_CUN.$(PSUFFIX) : ztbsv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=4 -UUNIT $< -o $(@F)
-ztbsv_NUU.$(SUFFIX) ztbsv_NUU.$(PSUFFIX) : ztbsv_U.c ../../common.h
+ztbsv_NUU.$(SUFFIX) ztbsv_NUU.$(PSUFFIX) : ztbsv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=1 -DUNIT $< -o $(@F)
-ztbsv_NUN.$(SUFFIX) ztbsv_NUN.$(PSUFFIX) : ztbsv_U.c ../../common.h
+ztbsv_NUN.$(SUFFIX) ztbsv_NUN.$(PSUFFIX) : ztbsv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=1 -UUNIT $< -o $(@F)
-ztbsv_TLU.$(SUFFIX) ztbsv_TLU.$(PSUFFIX) : ztbsv_U.c ../../common.h
+ztbsv_TLU.$(SUFFIX) ztbsv_TLU.$(PSUFFIX) : ztbsv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=2 -DUNIT $< -o $(@F)
-ztbsv_TLN.$(SUFFIX) ztbsv_TLN.$(PSUFFIX) : ztbsv_U.c ../../common.h
+ztbsv_TLN.$(SUFFIX) ztbsv_TLN.$(PSUFFIX) : ztbsv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=2 -UUNIT $< -o $(@F)
-ztbsv_RLU.$(SUFFIX) ztbsv_RLU.$(PSUFFIX) : ztbsv_L.c ../../common.h
+ztbsv_RLU.$(SUFFIX) ztbsv_RLU.$(PSUFFIX) : ztbsv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=3 -DUNIT $< -o $(@F)
-ztbsv_RLN.$(SUFFIX) ztbsv_RLN.$(PSUFFIX) : ztbsv_L.c ../../common.h
+ztbsv_RLN.$(SUFFIX) ztbsv_RLN.$(PSUFFIX) : ztbsv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=3 -UUNIT $< -o $(@F)
-ztbsv_CLU.$(SUFFIX) ztbsv_CLU.$(PSUFFIX) : ztbsv_U.c ../../common.h
+ztbsv_CLU.$(SUFFIX) ztbsv_CLU.$(PSUFFIX) : ztbsv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=4 -DUNIT $< -o $(@F)
-ztbsv_CLN.$(SUFFIX) ztbsv_CLN.$(PSUFFIX) : ztbsv_U.c ../../common.h
+ztbsv_CLN.$(SUFFIX) ztbsv_CLN.$(PSUFFIX) : ztbsv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=4 -UUNIT $< -o $(@F)
-ztbsv_NLU.$(SUFFIX) ztbsv_NLU.$(PSUFFIX) : ztbsv_L.c ../../common.h
+ztbsv_NLU.$(SUFFIX) ztbsv_NLU.$(PSUFFIX) : ztbsv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=1 -DUNIT $< -o $(@F)
-ztbsv_NLN.$(SUFFIX) ztbsv_NLN.$(PSUFFIX) : ztbsv_L.c ../../common.h
+ztbsv_NLN.$(SUFFIX) ztbsv_NLN.$(PSUFFIX) : ztbsv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=1 -UUNIT $< -o $(@F)
-ztbsv_TUU.$(SUFFIX) ztbsv_TUU.$(PSUFFIX) : ztbsv_L.c ../../common.h
+ztbsv_TUU.$(SUFFIX) ztbsv_TUU.$(PSUFFIX) : ztbsv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=2 -DUNIT $< -o $(@F)
-ztbsv_TUN.$(SUFFIX) ztbsv_TUN.$(PSUFFIX) : ztbsv_L.c ../../common.h
+ztbsv_TUN.$(SUFFIX) ztbsv_TUN.$(PSUFFIX) : ztbsv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=2 -UUNIT $< -o $(@F)
-ztbsv_RUU.$(SUFFIX) ztbsv_RUU.$(PSUFFIX) : ztbsv_U.c ../../common.h
+ztbsv_RUU.$(SUFFIX) ztbsv_RUU.$(PSUFFIX) : ztbsv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=3 -DUNIT $< -o $(@F)
-ztbsv_RUN.$(SUFFIX) ztbsv_RUN.$(PSUFFIX) : ztbsv_U.c ../../common.h
+ztbsv_RUN.$(SUFFIX) ztbsv_RUN.$(PSUFFIX) : ztbsv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=3 -UUNIT $< -o $(@F)
-ztbsv_CUU.$(SUFFIX) ztbsv_CUU.$(PSUFFIX) : ztbsv_L.c ../../common.h
+ztbsv_CUU.$(SUFFIX) ztbsv_CUU.$(PSUFFIX) : ztbsv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=4 -DUNIT $< -o $(@F)
-ztbsv_CUN.$(SUFFIX) ztbsv_CUN.$(PSUFFIX) : ztbsv_L.c ../../common.h
+ztbsv_CUN.$(SUFFIX) ztbsv_CUN.$(PSUFFIX) : ztbsv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=4 -UUNIT $< -o $(@F)
-xtbsv_NUU.$(SUFFIX) xtbsv_NUU.$(PSUFFIX) : ztbsv_U.c ../../common.h
+xtbsv_NUU.$(SUFFIX) xtbsv_NUU.$(PSUFFIX) : ztbsv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=1 -DUNIT $< -o $(@F)
-xtbsv_NUN.$(SUFFIX) xtbsv_NUN.$(PSUFFIX) : ztbsv_U.c ../../common.h
+xtbsv_NUN.$(SUFFIX) xtbsv_NUN.$(PSUFFIX) : ztbsv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=1 -UUNIT $< -o $(@F)
-xtbsv_TLU.$(SUFFIX) xtbsv_TLU.$(PSUFFIX) : ztbsv_U.c ../../common.h
+xtbsv_TLU.$(SUFFIX) xtbsv_TLU.$(PSUFFIX) : ztbsv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=2 -DUNIT $< -o $(@F)
-xtbsv_TLN.$(SUFFIX) xtbsv_TLN.$(PSUFFIX) : ztbsv_U.c ../../common.h
+xtbsv_TLN.$(SUFFIX) xtbsv_TLN.$(PSUFFIX) : ztbsv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=2 -UUNIT $< -o $(@F)
-xtbsv_RLU.$(SUFFIX) xtbsv_RLU.$(PSUFFIX) : ztbsv_L.c ../../common.h
+xtbsv_RLU.$(SUFFIX) xtbsv_RLU.$(PSUFFIX) : ztbsv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=3 -DUNIT $< -o $(@F)
-xtbsv_RLN.$(SUFFIX) xtbsv_RLN.$(PSUFFIX) : ztbsv_L.c ../../common.h
+xtbsv_RLN.$(SUFFIX) xtbsv_RLN.$(PSUFFIX) : ztbsv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=3 -UUNIT $< -o $(@F)
-xtbsv_CLU.$(SUFFIX) xtbsv_CLU.$(PSUFFIX) : ztbsv_U.c ../../common.h
+xtbsv_CLU.$(SUFFIX) xtbsv_CLU.$(PSUFFIX) : ztbsv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=4 -DUNIT $< -o $(@F)
-xtbsv_CLN.$(SUFFIX) xtbsv_CLN.$(PSUFFIX) : ztbsv_U.c ../../common.h
+xtbsv_CLN.$(SUFFIX) xtbsv_CLN.$(PSUFFIX) : ztbsv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=4 -UUNIT $< -o $(@F)
-xtbsv_NLU.$(SUFFIX) xtbsv_NLU.$(PSUFFIX) : ztbsv_L.c ../../common.h
+xtbsv_NLU.$(SUFFIX) xtbsv_NLU.$(PSUFFIX) : ztbsv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=1 -DUNIT $< -o $(@F)
-xtbsv_NLN.$(SUFFIX) xtbsv_NLN.$(PSUFFIX) : ztbsv_L.c ../../common.h
+xtbsv_NLN.$(SUFFIX) xtbsv_NLN.$(PSUFFIX) : ztbsv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=1 -UUNIT $< -o $(@F)
-xtbsv_TUU.$(SUFFIX) xtbsv_TUU.$(PSUFFIX) : ztbsv_L.c ../../common.h
+xtbsv_TUU.$(SUFFIX) xtbsv_TUU.$(PSUFFIX) : ztbsv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=2 -DUNIT $< -o $(@F)
-xtbsv_TUN.$(SUFFIX) xtbsv_TUN.$(PSUFFIX) : ztbsv_L.c ../../common.h
+xtbsv_TUN.$(SUFFIX) xtbsv_TUN.$(PSUFFIX) : ztbsv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=2 -UUNIT $< -o $(@F)
-xtbsv_RUU.$(SUFFIX) xtbsv_RUU.$(PSUFFIX) : ztbsv_U.c ../../common.h
+xtbsv_RUU.$(SUFFIX) xtbsv_RUU.$(PSUFFIX) : ztbsv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=3 -DUNIT $< -o $(@F)
-xtbsv_RUN.$(SUFFIX) xtbsv_RUN.$(PSUFFIX) : ztbsv_U.c ../../common.h
+xtbsv_RUN.$(SUFFIX) xtbsv_RUN.$(PSUFFIX) : ztbsv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=3 -UUNIT $< -o $(@F)
-xtbsv_CUU.$(SUFFIX) xtbsv_CUU.$(PSUFFIX) : ztbsv_L.c ../../common.h
+xtbsv_CUU.$(SUFFIX) xtbsv_CUU.$(PSUFFIX) : ztbsv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=4 -DUNIT $< -o $(@F)
-xtbsv_CUN.$(SUFFIX) xtbsv_CUN.$(PSUFFIX) : ztbsv_L.c ../../common.h
+xtbsv_CUN.$(SUFFIX) xtbsv_CUN.$(PSUFFIX) : ztbsv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=4 -UUNIT $< -o $(@F)
-stpmv_NUU.$(SUFFIX) stpmv_NUU.$(PSUFFIX) : tpmv_U.c ../../common.h
+stpmv_NUU.$(SUFFIX) stpmv_NUU.$(PSUFFIX) : tpmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UTRANSA -DUNIT $< -o $(@F)
-stpmv_NUN.$(SUFFIX) stpmv_NUN.$(PSUFFIX) : tpmv_U.c ../../common.h
+stpmv_NUN.$(SUFFIX) stpmv_NUN.$(PSUFFIX) : tpmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UTRANSA -UUNIT $< -o $(@F)
-stpmv_TLU.$(SUFFIX) stpmv_TLU.$(PSUFFIX) : tpmv_U.c ../../common.h
+stpmv_TLU.$(SUFFIX) stpmv_TLU.$(PSUFFIX) : tpmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DTRANSA -DUNIT $< -o $(@F)
-stpmv_TLN.$(SUFFIX) stpmv_TLN.$(PSUFFIX) : tpmv_U.c ../../common.h
+stpmv_TLN.$(SUFFIX) stpmv_TLN.$(PSUFFIX) : tpmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DTRANSA -UUNIT $< -o $(@F)
-stpmv_NLU.$(SUFFIX) stpmv_NLU.$(PSUFFIX) : tpmv_L.c ../../common.h
+stpmv_NLU.$(SUFFIX) stpmv_NLU.$(PSUFFIX) : tpmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UTRANSA -DUNIT $< -o $(@F)
-stpmv_NLN.$(SUFFIX) stpmv_NLN.$(PSUFFIX) : tpmv_L.c ../../common.h
+stpmv_NLN.$(SUFFIX) stpmv_NLN.$(PSUFFIX) : tpmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UTRANSA -UUNIT $< -o $(@F)
-stpmv_TUU.$(SUFFIX) stpmv_TUU.$(PSUFFIX) : tpmv_L.c ../../common.h
+stpmv_TUU.$(SUFFIX) stpmv_TUU.$(PSUFFIX) : tpmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DTRANSA -DUNIT $< -o $(@F)
-stpmv_TUN.$(SUFFIX) stpmv_TUN.$(PSUFFIX) : tpmv_L.c ../../common.h
+stpmv_TUN.$(SUFFIX) stpmv_TUN.$(PSUFFIX) : tpmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DTRANSA -UUNIT $< -o $(@F)
-dtpmv_NUU.$(SUFFIX) dtpmv_NUU.$(PSUFFIX) : tpmv_U.c ../../common.h
+dtpmv_NUU.$(SUFFIX) dtpmv_NUU.$(PSUFFIX) : tpmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -UTRANSA -DUNIT $< -o $(@F)
-dtpmv_NUN.$(SUFFIX) dtpmv_NUN.$(PSUFFIX) : tpmv_U.c ../../common.h
+dtpmv_NUN.$(SUFFIX) dtpmv_NUN.$(PSUFFIX) : tpmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -UTRANSA -UUNIT $< -o $(@F)
-dtpmv_TLU.$(SUFFIX) dtpmv_TLU.$(PSUFFIX) : tpmv_U.c ../../common.h
+dtpmv_TLU.$(SUFFIX) dtpmv_TLU.$(PSUFFIX) : tpmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DTRANSA -DUNIT $< -o $(@F)
-dtpmv_TLN.$(SUFFIX) dtpmv_TLN.$(PSUFFIX) : tpmv_U.c ../../common.h
+dtpmv_TLN.$(SUFFIX) dtpmv_TLN.$(PSUFFIX) : tpmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DTRANSA -UUNIT $< -o $(@F)
-dtpmv_NLU.$(SUFFIX) dtpmv_NLU.$(PSUFFIX) : tpmv_L.c ../../common.h
+dtpmv_NLU.$(SUFFIX) dtpmv_NLU.$(PSUFFIX) : tpmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -UTRANSA -DUNIT $< -o $(@F)
-dtpmv_NLN.$(SUFFIX) dtpmv_NLN.$(PSUFFIX) : tpmv_L.c ../../common.h
+dtpmv_NLN.$(SUFFIX) dtpmv_NLN.$(PSUFFIX) : tpmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -UTRANSA -UUNIT $< -o $(@F)
-dtpmv_TUU.$(SUFFIX) dtpmv_TUU.$(PSUFFIX) : tpmv_L.c ../../common.h
+dtpmv_TUU.$(SUFFIX) dtpmv_TUU.$(PSUFFIX) : tpmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DTRANSA -DUNIT $< -o $(@F)
-dtpmv_TUN.$(SUFFIX) dtpmv_TUN.$(PSUFFIX) : tpmv_L.c ../../common.h
+dtpmv_TUN.$(SUFFIX) dtpmv_TUN.$(PSUFFIX) : tpmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DTRANSA -UUNIT $< -o $(@F)
-qtpmv_NUU.$(SUFFIX) qtpmv_NUU.$(PSUFFIX) : tpmv_U.c ../../common.h
+qtpmv_NUU.$(SUFFIX) qtpmv_NUU.$(PSUFFIX) : tpmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -UTRANSA -DUNIT $< -o $(@F)
-qtpmv_NUN.$(SUFFIX) qtpmv_NUN.$(PSUFFIX) : tpmv_U.c ../../common.h
+qtpmv_NUN.$(SUFFIX) qtpmv_NUN.$(PSUFFIX) : tpmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -UTRANSA -UUNIT $< -o $(@F)
-qtpmv_TLU.$(SUFFIX) qtpmv_TLU.$(PSUFFIX) : tpmv_U.c ../../common.h
+qtpmv_TLU.$(SUFFIX) qtpmv_TLU.$(PSUFFIX) : tpmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DTRANSA -DUNIT $< -o $(@F)
-qtpmv_TLN.$(SUFFIX) qtpmv_TLN.$(PSUFFIX) : tpmv_U.c ../../common.h
+qtpmv_TLN.$(SUFFIX) qtpmv_TLN.$(PSUFFIX) : tpmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DTRANSA -UUNIT $< -o $(@F)
-qtpmv_NLU.$(SUFFIX) qtpmv_NLU.$(PSUFFIX) : tpmv_L.c ../../common.h
+qtpmv_NLU.$(SUFFIX) qtpmv_NLU.$(PSUFFIX) : tpmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -UTRANSA -DUNIT $< -o $(@F)
-qtpmv_NLN.$(SUFFIX) qtpmv_NLN.$(PSUFFIX) : tpmv_L.c ../../common.h
+qtpmv_NLN.$(SUFFIX) qtpmv_NLN.$(PSUFFIX) : tpmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -UTRANSA -UUNIT $< -o $(@F)
-qtpmv_TUU.$(SUFFIX) qtpmv_TUU.$(PSUFFIX) : tpmv_L.c ../../common.h
+qtpmv_TUU.$(SUFFIX) qtpmv_TUU.$(PSUFFIX) : tpmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DTRANSA -DUNIT $< -o $(@F)
-qtpmv_TUN.$(SUFFIX) qtpmv_TUN.$(PSUFFIX) : tpmv_L.c ../../common.h
+qtpmv_TUN.$(SUFFIX) qtpmv_TUN.$(PSUFFIX) : tpmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DTRANSA -UUNIT $< -o $(@F)
-ctpmv_NUU.$(SUFFIX) ctpmv_NUU.$(PSUFFIX) : ztpmv_U.c ../../common.h
+ctpmv_NUU.$(SUFFIX) ctpmv_NUU.$(PSUFFIX) : ztpmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=1 -DUNIT $< -o $(@F)
-ctpmv_NUN.$(SUFFIX) ctpmv_NUN.$(PSUFFIX) : ztpmv_U.c ../../common.h
+ctpmv_NUN.$(SUFFIX) ctpmv_NUN.$(PSUFFIX) : ztpmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=1 -UUNIT $< -o $(@F)
-ctpmv_TLU.$(SUFFIX) ctpmv_TLU.$(PSUFFIX) : ztpmv_U.c ../../common.h
+ctpmv_TLU.$(SUFFIX) ctpmv_TLU.$(PSUFFIX) : ztpmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=2 -DUNIT $< -o $(@F)
-ctpmv_TLN.$(SUFFIX) ctpmv_TLN.$(PSUFFIX) : ztpmv_U.c ../../common.h
+ctpmv_TLN.$(SUFFIX) ctpmv_TLN.$(PSUFFIX) : ztpmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=2 -UUNIT $< -o $(@F)
-ctpmv_RLU.$(SUFFIX) ctpmv_RLU.$(PSUFFIX) : ztpmv_L.c ../../common.h
+ctpmv_RLU.$(SUFFIX) ctpmv_RLU.$(PSUFFIX) : ztpmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=3 -DUNIT $< -o $(@F)
-ctpmv_RLN.$(SUFFIX) ctpmv_RLN.$(PSUFFIX) : ztpmv_L.c ../../common.h
+ctpmv_RLN.$(SUFFIX) ctpmv_RLN.$(PSUFFIX) : ztpmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=3 -UUNIT $< -o $(@F)
-ctpmv_CLU.$(SUFFIX) ctpmv_CLU.$(PSUFFIX) : ztpmv_U.c ../../common.h
+ctpmv_CLU.$(SUFFIX) ctpmv_CLU.$(PSUFFIX) : ztpmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=4 -DUNIT $< -o $(@F)
-ctpmv_CLN.$(SUFFIX) ctpmv_CLN.$(PSUFFIX) : ztpmv_U.c ../../common.h
+ctpmv_CLN.$(SUFFIX) ctpmv_CLN.$(PSUFFIX) : ztpmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=4 -UUNIT $< -o $(@F)
-ctpmv_NLU.$(SUFFIX) ctpmv_NLU.$(PSUFFIX) : ztpmv_L.c ../../common.h
+ctpmv_NLU.$(SUFFIX) ctpmv_NLU.$(PSUFFIX) : ztpmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=1 -DUNIT $< -o $(@F)
-ctpmv_NLN.$(SUFFIX) ctpmv_NLN.$(PSUFFIX) : ztpmv_L.c ../../common.h
+ctpmv_NLN.$(SUFFIX) ctpmv_NLN.$(PSUFFIX) : ztpmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=1 -UUNIT $< -o $(@F)
-ctpmv_TUU.$(SUFFIX) ctpmv_TUU.$(PSUFFIX) : ztpmv_L.c ../../common.h
+ctpmv_TUU.$(SUFFIX) ctpmv_TUU.$(PSUFFIX) : ztpmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=2 -DUNIT $< -o $(@F)
-ctpmv_TUN.$(SUFFIX) ctpmv_TUN.$(PSUFFIX) : ztpmv_L.c ../../common.h
+ctpmv_TUN.$(SUFFIX) ctpmv_TUN.$(PSUFFIX) : ztpmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=2 -UUNIT $< -o $(@F)
-ctpmv_RUU.$(SUFFIX) ctpmv_RUU.$(PSUFFIX) : ztpmv_U.c ../../common.h
+ctpmv_RUU.$(SUFFIX) ctpmv_RUU.$(PSUFFIX) : ztpmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=3 -DUNIT $< -o $(@F)
-ctpmv_RUN.$(SUFFIX) ctpmv_RUN.$(PSUFFIX) : ztpmv_U.c ../../common.h
+ctpmv_RUN.$(SUFFIX) ctpmv_RUN.$(PSUFFIX) : ztpmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=3 -UUNIT $< -o $(@F)
-ctpmv_CUU.$(SUFFIX) ctpmv_CUU.$(PSUFFIX) : ztpmv_L.c ../../common.h
+ctpmv_CUU.$(SUFFIX) ctpmv_CUU.$(PSUFFIX) : ztpmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=4 -DUNIT $< -o $(@F)
-ctpmv_CUN.$(SUFFIX) ctpmv_CUN.$(PSUFFIX) : ztpmv_L.c ../../common.h
+ctpmv_CUN.$(SUFFIX) ctpmv_CUN.$(PSUFFIX) : ztpmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=4 -UUNIT $< -o $(@F)
-ztpmv_NUU.$(SUFFIX) ztpmv_NUU.$(PSUFFIX) : ztpmv_U.c ../../common.h
+ztpmv_NUU.$(SUFFIX) ztpmv_NUU.$(PSUFFIX) : ztpmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=1 -DUNIT $< -o $(@F)
-ztpmv_NUN.$(SUFFIX) ztpmv_NUN.$(PSUFFIX) : ztpmv_U.c ../../common.h
+ztpmv_NUN.$(SUFFIX) ztpmv_NUN.$(PSUFFIX) : ztpmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=1 -UUNIT $< -o $(@F)
-ztpmv_TLU.$(SUFFIX) ztpmv_TLU.$(PSUFFIX) : ztpmv_U.c ../../common.h
+ztpmv_TLU.$(SUFFIX) ztpmv_TLU.$(PSUFFIX) : ztpmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=2 -DUNIT $< -o $(@F)
-ztpmv_TLN.$(SUFFIX) ztpmv_TLN.$(PSUFFIX) : ztpmv_U.c ../../common.h
+ztpmv_TLN.$(SUFFIX) ztpmv_TLN.$(PSUFFIX) : ztpmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=2 -UUNIT $< -o $(@F)
-ztpmv_RLU.$(SUFFIX) ztpmv_RLU.$(PSUFFIX) : ztpmv_L.c ../../common.h
+ztpmv_RLU.$(SUFFIX) ztpmv_RLU.$(PSUFFIX) : ztpmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=3 -DUNIT $< -o $(@F)
-ztpmv_RLN.$(SUFFIX) ztpmv_RLN.$(PSUFFIX) : ztpmv_L.c ../../common.h
+ztpmv_RLN.$(SUFFIX) ztpmv_RLN.$(PSUFFIX) : ztpmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=3 -UUNIT $< -o $(@F)
-ztpmv_CLU.$(SUFFIX) ztpmv_CLU.$(PSUFFIX) : ztpmv_U.c ../../common.h
+ztpmv_CLU.$(SUFFIX) ztpmv_CLU.$(PSUFFIX) : ztpmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=4 -DUNIT $< -o $(@F)
-ztpmv_CLN.$(SUFFIX) ztpmv_CLN.$(PSUFFIX) : ztpmv_U.c ../../common.h
+ztpmv_CLN.$(SUFFIX) ztpmv_CLN.$(PSUFFIX) : ztpmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=4 -UUNIT $< -o $(@F)
-ztpmv_NLU.$(SUFFIX) ztpmv_NLU.$(PSUFFIX) : ztpmv_L.c ../../common.h
+ztpmv_NLU.$(SUFFIX) ztpmv_NLU.$(PSUFFIX) : ztpmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=1 -DUNIT $< -o $(@F)
-ztpmv_NLN.$(SUFFIX) ztpmv_NLN.$(PSUFFIX) : ztpmv_L.c ../../common.h
+ztpmv_NLN.$(SUFFIX) ztpmv_NLN.$(PSUFFIX) : ztpmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=1 -UUNIT $< -o $(@F)
-ztpmv_TUU.$(SUFFIX) ztpmv_TUU.$(PSUFFIX) : ztpmv_L.c ../../common.h
+ztpmv_TUU.$(SUFFIX) ztpmv_TUU.$(PSUFFIX) : ztpmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=2 -DUNIT $< -o $(@F)
-ztpmv_TUN.$(SUFFIX) ztpmv_TUN.$(PSUFFIX) : ztpmv_L.c ../../common.h
+ztpmv_TUN.$(SUFFIX) ztpmv_TUN.$(PSUFFIX) : ztpmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=2 -UUNIT $< -o $(@F)
-ztpmv_RUU.$(SUFFIX) ztpmv_RUU.$(PSUFFIX) : ztpmv_U.c ../../common.h
+ztpmv_RUU.$(SUFFIX) ztpmv_RUU.$(PSUFFIX) : ztpmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=3 -DUNIT $< -o $(@F)
-ztpmv_RUN.$(SUFFIX) ztpmv_RUN.$(PSUFFIX) : ztpmv_U.c ../../common.h
+ztpmv_RUN.$(SUFFIX) ztpmv_RUN.$(PSUFFIX) : ztpmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=3 -UUNIT $< -o $(@F)
-ztpmv_CUU.$(SUFFIX) ztpmv_CUU.$(PSUFFIX) : ztpmv_L.c ../../common.h
+ztpmv_CUU.$(SUFFIX) ztpmv_CUU.$(PSUFFIX) : ztpmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=4 -DUNIT $< -o $(@F)
-ztpmv_CUN.$(SUFFIX) ztpmv_CUN.$(PSUFFIX) : ztpmv_L.c ../../common.h
+ztpmv_CUN.$(SUFFIX) ztpmv_CUN.$(PSUFFIX) : ztpmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=4 -UUNIT $< -o $(@F)
-xtpmv_NUU.$(SUFFIX) xtpmv_NUU.$(PSUFFIX) : ztpmv_U.c ../../common.h
+xtpmv_NUU.$(SUFFIX) xtpmv_NUU.$(PSUFFIX) : ztpmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=1 -DUNIT $< -o $(@F)
-xtpmv_NUN.$(SUFFIX) xtpmv_NUN.$(PSUFFIX) : ztpmv_U.c ../../common.h
+xtpmv_NUN.$(SUFFIX) xtpmv_NUN.$(PSUFFIX) : ztpmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=1 -UUNIT $< -o $(@F)
-xtpmv_TLU.$(SUFFIX) xtpmv_TLU.$(PSUFFIX) : ztpmv_U.c ../../common.h
+xtpmv_TLU.$(SUFFIX) xtpmv_TLU.$(PSUFFIX) : ztpmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=2 -DUNIT $< -o $(@F)
-xtpmv_TLN.$(SUFFIX) xtpmv_TLN.$(PSUFFIX) : ztpmv_U.c ../../common.h
+xtpmv_TLN.$(SUFFIX) xtpmv_TLN.$(PSUFFIX) : ztpmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=2 -UUNIT $< -o $(@F)
-xtpmv_RLU.$(SUFFIX) xtpmv_RLU.$(PSUFFIX) : ztpmv_L.c ../../common.h
+xtpmv_RLU.$(SUFFIX) xtpmv_RLU.$(PSUFFIX) : ztpmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=3 -DUNIT $< -o $(@F)
-xtpmv_RLN.$(SUFFIX) xtpmv_RLN.$(PSUFFIX) : ztpmv_L.c ../../common.h
+xtpmv_RLN.$(SUFFIX) xtpmv_RLN.$(PSUFFIX) : ztpmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=3 -UUNIT $< -o $(@F)
-xtpmv_CLU.$(SUFFIX) xtpmv_CLU.$(PSUFFIX) : ztpmv_U.c ../../common.h
+xtpmv_CLU.$(SUFFIX) xtpmv_CLU.$(PSUFFIX) : ztpmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=4 -DUNIT $< -o $(@F)
-xtpmv_CLN.$(SUFFIX) xtpmv_CLN.$(PSUFFIX) : ztpmv_U.c ../../common.h
+xtpmv_CLN.$(SUFFIX) xtpmv_CLN.$(PSUFFIX) : ztpmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=4 -UUNIT $< -o $(@F)
-xtpmv_NLU.$(SUFFIX) xtpmv_NLU.$(PSUFFIX) : ztpmv_L.c ../../common.h
+xtpmv_NLU.$(SUFFIX) xtpmv_NLU.$(PSUFFIX) : ztpmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=1 -DUNIT $< -o $(@F)
-xtpmv_NLN.$(SUFFIX) xtpmv_NLN.$(PSUFFIX) : ztpmv_L.c ../../common.h
+xtpmv_NLN.$(SUFFIX) xtpmv_NLN.$(PSUFFIX) : ztpmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=1 -UUNIT $< -o $(@F)
-xtpmv_TUU.$(SUFFIX) xtpmv_TUU.$(PSUFFIX) : ztpmv_L.c ../../common.h
+xtpmv_TUU.$(SUFFIX) xtpmv_TUU.$(PSUFFIX) : ztpmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=2 -DUNIT $< -o $(@F)
-xtpmv_TUN.$(SUFFIX) xtpmv_TUN.$(PSUFFIX) : ztpmv_L.c ../../common.h
+xtpmv_TUN.$(SUFFIX) xtpmv_TUN.$(PSUFFIX) : ztpmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=2 -UUNIT $< -o $(@F)
-xtpmv_RUU.$(SUFFIX) xtpmv_RUU.$(PSUFFIX) : ztpmv_U.c ../../common.h
+xtpmv_RUU.$(SUFFIX) xtpmv_RUU.$(PSUFFIX) : ztpmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=3 -DUNIT $< -o $(@F)
-xtpmv_RUN.$(SUFFIX) xtpmv_RUN.$(PSUFFIX) : ztpmv_U.c ../../common.h
+xtpmv_RUN.$(SUFFIX) xtpmv_RUN.$(PSUFFIX) : ztpmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=3 -UUNIT $< -o $(@F)
-xtpmv_CUU.$(SUFFIX) xtpmv_CUU.$(PSUFFIX) : ztpmv_L.c ../../common.h
+xtpmv_CUU.$(SUFFIX) xtpmv_CUU.$(PSUFFIX) : ztpmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=4 -DUNIT $< -o $(@F)
-xtpmv_CUN.$(SUFFIX) xtpmv_CUN.$(PSUFFIX) : ztpmv_L.c ../../common.h
+xtpmv_CUN.$(SUFFIX) xtpmv_CUN.$(PSUFFIX) : ztpmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=4 -UUNIT $< -o $(@F)
-stpmv_thread_NUU.$(SUFFIX) stpmv_thread_NUU.$(PSUFFIX) : tpmv_thread.c ../../common.h
+stpmv_thread_NUU.$(SUFFIX) stpmv_thread_NUU.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -ULOWER -UTRANSA -DUNIT $< -o $(@F)
-stpmv_thread_NUN.$(SUFFIX) stpmv_thread_NUN.$(PSUFFIX) : tpmv_thread.c ../../common.h
+stpmv_thread_NUN.$(SUFFIX) stpmv_thread_NUN.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -ULOWER -UTRANSA -UUNIT $< -o $(@F)
-stpmv_thread_TLU.$(SUFFIX) stpmv_thread_TLU.$(PSUFFIX) : tpmv_thread.c ../../common.h
+stpmv_thread_TLU.$(SUFFIX) stpmv_thread_TLU.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DLOWER -DTRANSA -DUNIT $< -o $(@F)
-stpmv_thread_TLN.$(SUFFIX) stpmv_thread_TLN.$(PSUFFIX) : tpmv_thread.c ../../common.h
+stpmv_thread_TLN.$(SUFFIX) stpmv_thread_TLN.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DLOWER -DTRANSA -UUNIT $< -o $(@F)
-stpmv_thread_NLU.$(SUFFIX) stpmv_thread_NLU.$(PSUFFIX) : tpmv_thread.c ../../common.h
+stpmv_thread_NLU.$(SUFFIX) stpmv_thread_NLU.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DLOWER -UTRANSA -DUNIT $< -o $(@F)
-stpmv_thread_NLN.$(SUFFIX) stpmv_thread_NLN.$(PSUFFIX) : tpmv_thread.c ../../common.h
+stpmv_thread_NLN.$(SUFFIX) stpmv_thread_NLN.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DLOWER -UTRANSA -UUNIT $< -o $(@F)
-stpmv_thread_TUU.$(SUFFIX) stpmv_thread_TUU.$(PSUFFIX) : tpmv_thread.c ../../common.h
+stpmv_thread_TUU.$(SUFFIX) stpmv_thread_TUU.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -ULOWER -DTRANSA -DUNIT $< -o $(@F)
-stpmv_thread_TUN.$(SUFFIX) stpmv_thread_TUN.$(PSUFFIX) : tpmv_thread.c ../../common.h
+stpmv_thread_TUN.$(SUFFIX) stpmv_thread_TUN.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -ULOWER -DTRANSA -UUNIT $< -o $(@F)
-dtpmv_thread_NUU.$(SUFFIX) dtpmv_thread_NUU.$(PSUFFIX) : tpmv_thread.c ../../common.h
+dtpmv_thread_NUU.$(SUFFIX) dtpmv_thread_NUU.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -ULOWER -UTRANSA -DUNIT $< -o $(@F)
-dtpmv_thread_NUN.$(SUFFIX) dtpmv_thread_NUN.$(PSUFFIX) : tpmv_thread.c ../../common.h
+dtpmv_thread_NUN.$(SUFFIX) dtpmv_thread_NUN.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -ULOWER -UTRANSA -UUNIT $< -o $(@F)
-dtpmv_thread_TLU.$(SUFFIX) dtpmv_thread_TLU.$(PSUFFIX) : tpmv_thread.c ../../common.h
+dtpmv_thread_TLU.$(SUFFIX) dtpmv_thread_TLU.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DLOWER -DTRANSA -DUNIT $< -o $(@F)
-dtpmv_thread_TLN.$(SUFFIX) dtpmv_thread_TLN.$(PSUFFIX) : tpmv_thread.c ../../common.h
+dtpmv_thread_TLN.$(SUFFIX) dtpmv_thread_TLN.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DLOWER -DTRANSA -UUNIT $< -o $(@F)
-dtpmv_thread_NLU.$(SUFFIX) dtpmv_thread_NLU.$(PSUFFIX) : tpmv_thread.c ../../common.h
+dtpmv_thread_NLU.$(SUFFIX) dtpmv_thread_NLU.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DLOWER -UTRANSA -DUNIT $< -o $(@F)
-dtpmv_thread_NLN.$(SUFFIX) dtpmv_thread_NLN.$(PSUFFIX) : tpmv_thread.c ../../common.h
+dtpmv_thread_NLN.$(SUFFIX) dtpmv_thread_NLN.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DLOWER -UTRANSA -UUNIT $< -o $(@F)
-dtpmv_thread_TUU.$(SUFFIX) dtpmv_thread_TUU.$(PSUFFIX) : tpmv_thread.c ../../common.h
+dtpmv_thread_TUU.$(SUFFIX) dtpmv_thread_TUU.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -ULOWER -DTRANSA -DUNIT $< -o $(@F)
-dtpmv_thread_TUN.$(SUFFIX) dtpmv_thread_TUN.$(PSUFFIX) : tpmv_thread.c ../../common.h
+dtpmv_thread_TUN.$(SUFFIX) dtpmv_thread_TUN.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -ULOWER -DTRANSA -UUNIT $< -o $(@F)
-qtpmv_thread_NUU.$(SUFFIX) qtpmv_thread_NUU.$(PSUFFIX) : tpmv_thread.c ../../common.h
+qtpmv_thread_NUU.$(SUFFIX) qtpmv_thread_NUU.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -ULOWER -UTRANSA -DUNIT $< -o $(@F)
-qtpmv_thread_NUN.$(SUFFIX) qtpmv_thread_NUN.$(PSUFFIX) : tpmv_thread.c ../../common.h
+qtpmv_thread_NUN.$(SUFFIX) qtpmv_thread_NUN.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -ULOWER -UTRANSA -UUNIT $< -o $(@F)
-qtpmv_thread_TLU.$(SUFFIX) qtpmv_thread_TLU.$(PSUFFIX) : tpmv_thread.c ../../common.h
+qtpmv_thread_TLU.$(SUFFIX) qtpmv_thread_TLU.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DLOWER -DTRANSA -DUNIT $< -o $(@F)
-qtpmv_thread_TLN.$(SUFFIX) qtpmv_thread_TLN.$(PSUFFIX) : tpmv_thread.c ../../common.h
+qtpmv_thread_TLN.$(SUFFIX) qtpmv_thread_TLN.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DLOWER -DTRANSA -UUNIT $< -o $(@F)
-qtpmv_thread_NLU.$(SUFFIX) qtpmv_thread_NLU.$(PSUFFIX) : tpmv_thread.c ../../common.h
+qtpmv_thread_NLU.$(SUFFIX) qtpmv_thread_NLU.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DLOWER -UTRANSA -DUNIT $< -o $(@F)
-qtpmv_thread_NLN.$(SUFFIX) qtpmv_thread_NLN.$(PSUFFIX) : tpmv_thread.c ../../common.h
+qtpmv_thread_NLN.$(SUFFIX) qtpmv_thread_NLN.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DLOWER -UTRANSA -UUNIT $< -o $(@F)
-qtpmv_thread_TUU.$(SUFFIX) qtpmv_thread_TUU.$(PSUFFIX) : tpmv_thread.c ../../common.h
+qtpmv_thread_TUU.$(SUFFIX) qtpmv_thread_TUU.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -ULOWER -DTRANSA -DUNIT $< -o $(@F)
-qtpmv_thread_TUN.$(SUFFIX) qtpmv_thread_TUN.$(PSUFFIX) : tpmv_thread.c ../../common.h
+qtpmv_thread_TUN.$(SUFFIX) qtpmv_thread_TUN.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -ULOWER -DTRANSA -UUNIT $< -o $(@F)
-ctpmv_thread_NUU.$(SUFFIX) ctpmv_thread_NUU.$(PSUFFIX) : tpmv_thread.c ../../common.h
+ctpmv_thread_NUU.$(SUFFIX) ctpmv_thread_NUU.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER -DTRANSA=1 -DUNIT $< -o $(@F)
-ctpmv_thread_NUN.$(SUFFIX) ctpmv_thread_NUN.$(PSUFFIX) : tpmv_thread.c ../../common.h
+ctpmv_thread_NUN.$(SUFFIX) ctpmv_thread_NUN.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER -DTRANSA=1 -UUNIT $< -o $(@F)
-ctpmv_thread_TLU.$(SUFFIX) ctpmv_thread_TLU.$(PSUFFIX) : tpmv_thread.c ../../common.h
+ctpmv_thread_TLU.$(SUFFIX) ctpmv_thread_TLU.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER -DTRANSA=2 -DUNIT $< -o $(@F)
-ctpmv_thread_TLN.$(SUFFIX) ctpmv_thread_TLN.$(PSUFFIX) : tpmv_thread.c ../../common.h
+ctpmv_thread_TLN.$(SUFFIX) ctpmv_thread_TLN.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER -DTRANSA=2 -UUNIT $< -o $(@F)
-ctpmv_thread_RLU.$(SUFFIX) ctpmv_thread_RLU.$(PSUFFIX) : tpmv_thread.c ../../common.h
+ctpmv_thread_RLU.$(SUFFIX) ctpmv_thread_RLU.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER -DTRANSA=3 -DUNIT $< -o $(@F)
-ctpmv_thread_RLN.$(SUFFIX) ctpmv_thread_RLN.$(PSUFFIX) : tpmv_thread.c ../../common.h
+ctpmv_thread_RLN.$(SUFFIX) ctpmv_thread_RLN.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER -DTRANSA=3 -UUNIT $< -o $(@F)
-ctpmv_thread_CLU.$(SUFFIX) ctpmv_thread_CLU.$(PSUFFIX) : tpmv_thread.c ../../common.h
+ctpmv_thread_CLU.$(SUFFIX) ctpmv_thread_CLU.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER -DTRANSA=4 -DUNIT $< -o $(@F)
-ctpmv_thread_CLN.$(SUFFIX) ctpmv_thread_CLN.$(PSUFFIX) : tpmv_thread.c ../../common.h
+ctpmv_thread_CLN.$(SUFFIX) ctpmv_thread_CLN.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER -DTRANSA=4 -UUNIT $< -o $(@F)
-ctpmv_thread_NLU.$(SUFFIX) ctpmv_thread_NLU.$(PSUFFIX) : tpmv_thread.c ../../common.h
+ctpmv_thread_NLU.$(SUFFIX) ctpmv_thread_NLU.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER -DTRANSA=1 -DUNIT $< -o $(@F)
-ctpmv_thread_NLN.$(SUFFIX) ctpmv_thread_NLN.$(PSUFFIX) : tpmv_thread.c ../../common.h
+ctpmv_thread_NLN.$(SUFFIX) ctpmv_thread_NLN.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER -DTRANSA=1 -UUNIT $< -o $(@F)
-ctpmv_thread_TUU.$(SUFFIX) ctpmv_thread_TUU.$(PSUFFIX) : tpmv_thread.c ../../common.h
+ctpmv_thread_TUU.$(SUFFIX) ctpmv_thread_TUU.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER -DTRANSA=2 -DUNIT $< -o $(@F)
-ctpmv_thread_TUN.$(SUFFIX) ctpmv_thread_TUN.$(PSUFFIX) : tpmv_thread.c ../../common.h
+ctpmv_thread_TUN.$(SUFFIX) ctpmv_thread_TUN.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER -DTRANSA=2 -UUNIT $< -o $(@F)
-ctpmv_thread_RUU.$(SUFFIX) ctpmv_thread_RUU.$(PSUFFIX) : tpmv_thread.c ../../common.h
+ctpmv_thread_RUU.$(SUFFIX) ctpmv_thread_RUU.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER -DTRANSA=3 -DUNIT $< -o $(@F)
-ctpmv_thread_RUN.$(SUFFIX) ctpmv_thread_RUN.$(PSUFFIX) : tpmv_thread.c ../../common.h
+ctpmv_thread_RUN.$(SUFFIX) ctpmv_thread_RUN.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER -DTRANSA=3 -UUNIT $< -o $(@F)
-ctpmv_thread_CUU.$(SUFFIX) ctpmv_thread_CUU.$(PSUFFIX) : tpmv_thread.c ../../common.h
+ctpmv_thread_CUU.$(SUFFIX) ctpmv_thread_CUU.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER -DTRANSA=4 -DUNIT $< -o $(@F)
-ctpmv_thread_CUN.$(SUFFIX) ctpmv_thread_CUN.$(PSUFFIX) : tpmv_thread.c ../../common.h
+ctpmv_thread_CUN.$(SUFFIX) ctpmv_thread_CUN.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER -DTRANSA=4 -UUNIT $< -o $(@F)
-ztpmv_thread_NUU.$(SUFFIX) ztpmv_thread_NUU.$(PSUFFIX) : tpmv_thread.c ../../common.h
+ztpmv_thread_NUU.$(SUFFIX) ztpmv_thread_NUU.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER -DTRANSA=1 -DUNIT $< -o $(@F)
-ztpmv_thread_NUN.$(SUFFIX) ztpmv_thread_NUN.$(PSUFFIX) : tpmv_thread.c ../../common.h
+ztpmv_thread_NUN.$(SUFFIX) ztpmv_thread_NUN.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER -DTRANSA=1 -UUNIT $< -o $(@F)
-ztpmv_thread_TLU.$(SUFFIX) ztpmv_thread_TLU.$(PSUFFIX) : tpmv_thread.c ../../common.h
+ztpmv_thread_TLU.$(SUFFIX) ztpmv_thread_TLU.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER -DTRANSA=2 -DUNIT $< -o $(@F)
-ztpmv_thread_TLN.$(SUFFIX) ztpmv_thread_TLN.$(PSUFFIX) : tpmv_thread.c ../../common.h
+ztpmv_thread_TLN.$(SUFFIX) ztpmv_thread_TLN.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER -DTRANSA=2 -UUNIT $< -o $(@F)
-ztpmv_thread_RLU.$(SUFFIX) ztpmv_thread_RLU.$(PSUFFIX) : tpmv_thread.c ../../common.h
+ztpmv_thread_RLU.$(SUFFIX) ztpmv_thread_RLU.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER -DTRANSA=3 -DUNIT $< -o $(@F)
-ztpmv_thread_RLN.$(SUFFIX) ztpmv_thread_RLN.$(PSUFFIX) : tpmv_thread.c ../../common.h
+ztpmv_thread_RLN.$(SUFFIX) ztpmv_thread_RLN.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER -DTRANSA=3 -UUNIT $< -o $(@F)
-ztpmv_thread_CLU.$(SUFFIX) ztpmv_thread_CLU.$(PSUFFIX) : tpmv_thread.c ../../common.h
+ztpmv_thread_CLU.$(SUFFIX) ztpmv_thread_CLU.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER -DTRANSA=4 -DUNIT $< -o $(@F)
-ztpmv_thread_CLN.$(SUFFIX) ztpmv_thread_CLN.$(PSUFFIX) : tpmv_thread.c ../../common.h
+ztpmv_thread_CLN.$(SUFFIX) ztpmv_thread_CLN.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER -DTRANSA=4 -UUNIT $< -o $(@F)
-ztpmv_thread_NLU.$(SUFFIX) ztpmv_thread_NLU.$(PSUFFIX) : tpmv_thread.c ../../common.h
+ztpmv_thread_NLU.$(SUFFIX) ztpmv_thread_NLU.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER -DTRANSA=1 -DUNIT $< -o $(@F)
-ztpmv_thread_NLN.$(SUFFIX) ztpmv_thread_NLN.$(PSUFFIX) : tpmv_thread.c ../../common.h
+ztpmv_thread_NLN.$(SUFFIX) ztpmv_thread_NLN.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER -DTRANSA=1 -UUNIT $< -o $(@F)
-ztpmv_thread_TUU.$(SUFFIX) ztpmv_thread_TUU.$(PSUFFIX) : tpmv_thread.c ../../common.h
+ztpmv_thread_TUU.$(SUFFIX) ztpmv_thread_TUU.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER -DTRANSA=2 -DUNIT $< -o $(@F)
-ztpmv_thread_TUN.$(SUFFIX) ztpmv_thread_TUN.$(PSUFFIX) : tpmv_thread.c ../../common.h
+ztpmv_thread_TUN.$(SUFFIX) ztpmv_thread_TUN.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER -DTRANSA=2 -UUNIT $< -o $(@F)
-ztpmv_thread_RUU.$(SUFFIX) ztpmv_thread_RUU.$(PSUFFIX) : tpmv_thread.c ../../common.h
+ztpmv_thread_RUU.$(SUFFIX) ztpmv_thread_RUU.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER -DTRANSA=3 -DUNIT $< -o $(@F)
-ztpmv_thread_RUN.$(SUFFIX) ztpmv_thread_RUN.$(PSUFFIX) : tpmv_thread.c ../../common.h
+ztpmv_thread_RUN.$(SUFFIX) ztpmv_thread_RUN.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER -DTRANSA=3 -UUNIT $< -o $(@F)
-ztpmv_thread_CUU.$(SUFFIX) ztpmv_thread_CUU.$(PSUFFIX) : tpmv_thread.c ../../common.h
+ztpmv_thread_CUU.$(SUFFIX) ztpmv_thread_CUU.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER -DTRANSA=4 -DUNIT $< -o $(@F)
-ztpmv_thread_CUN.$(SUFFIX) ztpmv_thread_CUN.$(PSUFFIX) : tpmv_thread.c ../../common.h
+ztpmv_thread_CUN.$(SUFFIX) ztpmv_thread_CUN.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER -DTRANSA=4 -UUNIT $< -o $(@F)
-xtpmv_thread_NUU.$(SUFFIX) xtpmv_thread_NUU.$(PSUFFIX) : tpmv_thread.c ../../common.h
+xtpmv_thread_NUU.$(SUFFIX) xtpmv_thread_NUU.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER -DTRANSA=1 -DUNIT $< -o $(@F)
-xtpmv_thread_NUN.$(SUFFIX) xtpmv_thread_NUN.$(PSUFFIX) : tpmv_thread.c ../../common.h
+xtpmv_thread_NUN.$(SUFFIX) xtpmv_thread_NUN.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER -DTRANSA=1 -UUNIT $< -o $(@F)
-xtpmv_thread_TLU.$(SUFFIX) xtpmv_thread_TLU.$(PSUFFIX) : tpmv_thread.c ../../common.h
+xtpmv_thread_TLU.$(SUFFIX) xtpmv_thread_TLU.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER -DTRANSA=2 -DUNIT $< -o $(@F)
-xtpmv_thread_TLN.$(SUFFIX) xtpmv_thread_TLN.$(PSUFFIX) : tpmv_thread.c ../../common.h
+xtpmv_thread_TLN.$(SUFFIX) xtpmv_thread_TLN.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER -DTRANSA=2 -UUNIT $< -o $(@F)
-xtpmv_thread_RLU.$(SUFFIX) xtpmv_thread_RLU.$(PSUFFIX) : tpmv_thread.c ../../common.h
+xtpmv_thread_RLU.$(SUFFIX) xtpmv_thread_RLU.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER -DTRANSA=3 -DUNIT $< -o $(@F)
-xtpmv_thread_RLN.$(SUFFIX) xtpmv_thread_RLN.$(PSUFFIX) : tpmv_thread.c ../../common.h
+xtpmv_thread_RLN.$(SUFFIX) xtpmv_thread_RLN.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER -DTRANSA=3 -UUNIT $< -o $(@F)
-xtpmv_thread_CLU.$(SUFFIX) xtpmv_thread_CLU.$(PSUFFIX) : tpmv_thread.c ../../common.h
+xtpmv_thread_CLU.$(SUFFIX) xtpmv_thread_CLU.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER -DTRANSA=4 -DUNIT $< -o $(@F)
-xtpmv_thread_CLN.$(SUFFIX) xtpmv_thread_CLN.$(PSUFFIX) : tpmv_thread.c ../../common.h
+xtpmv_thread_CLN.$(SUFFIX) xtpmv_thread_CLN.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER -DTRANSA=4 -UUNIT $< -o $(@F)
-xtpmv_thread_NLU.$(SUFFIX) xtpmv_thread_NLU.$(PSUFFIX) : tpmv_thread.c ../../common.h
+xtpmv_thread_NLU.$(SUFFIX) xtpmv_thread_NLU.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER -DTRANSA=1 -DUNIT $< -o $(@F)
-xtpmv_thread_NLN.$(SUFFIX) xtpmv_thread_NLN.$(PSUFFIX) : tpmv_thread.c ../../common.h
+xtpmv_thread_NLN.$(SUFFIX) xtpmv_thread_NLN.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER -DTRANSA=1 -UUNIT $< -o $(@F)
-xtpmv_thread_TUU.$(SUFFIX) xtpmv_thread_TUU.$(PSUFFIX) : tpmv_thread.c ../../common.h
+xtpmv_thread_TUU.$(SUFFIX) xtpmv_thread_TUU.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER -DTRANSA=2 -DUNIT $< -o $(@F)
-xtpmv_thread_TUN.$(SUFFIX) xtpmv_thread_TUN.$(PSUFFIX) : tpmv_thread.c ../../common.h
+xtpmv_thread_TUN.$(SUFFIX) xtpmv_thread_TUN.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER -DTRANSA=2 -UUNIT $< -o $(@F)
-xtpmv_thread_RUU.$(SUFFIX) xtpmv_thread_RUU.$(PSUFFIX) : tpmv_thread.c ../../common.h
+xtpmv_thread_RUU.$(SUFFIX) xtpmv_thread_RUU.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER -DTRANSA=3 -DUNIT $< -o $(@F)
-xtpmv_thread_RUN.$(SUFFIX) xtpmv_thread_RUN.$(PSUFFIX) : tpmv_thread.c ../../common.h
+xtpmv_thread_RUN.$(SUFFIX) xtpmv_thread_RUN.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER -DTRANSA=3 -UUNIT $< -o $(@F)
-xtpmv_thread_CUU.$(SUFFIX) xtpmv_thread_CUU.$(PSUFFIX) : tpmv_thread.c ../../common.h
+xtpmv_thread_CUU.$(SUFFIX) xtpmv_thread_CUU.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER -DTRANSA=4 -DUNIT $< -o $(@F)
-xtpmv_thread_CUN.$(SUFFIX) xtpmv_thread_CUN.$(PSUFFIX) : tpmv_thread.c ../../common.h
+xtpmv_thread_CUN.$(SUFFIX) xtpmv_thread_CUN.$(PSUFFIX) : tpmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER -DTRANSA=4 -UUNIT $< -o $(@F)
-stpsv_NUU.$(SUFFIX) stpsv_NUU.$(PSUFFIX) : tpsv_U.c ../../param.h
+stpsv_NUU.$(SUFFIX) stpsv_NUU.$(PSUFFIX) : tpsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -UTRANSA -DUNIT $< -o $(@F)
-stpsv_NUN.$(SUFFIX) stpsv_NUN.$(PSUFFIX) : tpsv_U.c ../../param.h
+stpsv_NUN.$(SUFFIX) stpsv_NUN.$(PSUFFIX) : tpsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -UTRANSA -UUNIT $< -o $(@F)
-stpsv_TLU.$(SUFFIX) stpsv_TLU.$(PSUFFIX) : tpsv_U.c ../../param.h
+stpsv_TLU.$(SUFFIX) stpsv_TLU.$(PSUFFIX) : tpsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DTRANSA -DUNIT $< -o $(@F)
-stpsv_TLN.$(SUFFIX) stpsv_TLN.$(PSUFFIX) : tpsv_U.c ../../param.h
+stpsv_TLN.$(SUFFIX) stpsv_TLN.$(PSUFFIX) : tpsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DTRANSA -UUNIT $< -o $(@F)
-stpsv_NLU.$(SUFFIX) stpsv_NLU.$(PSUFFIX) : tpsv_L.c ../../param.h
+stpsv_NLU.$(SUFFIX) stpsv_NLU.$(PSUFFIX) : tpsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -UTRANSA -DUNIT $< -o $(@F)
-stpsv_NLN.$(SUFFIX) stpsv_NLN.$(PSUFFIX) : tpsv_L.c ../../param.h
+stpsv_NLN.$(SUFFIX) stpsv_NLN.$(PSUFFIX) : tpsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -UTRANSA -UUNIT $< -o $(@F)
-stpsv_TUU.$(SUFFIX) stpsv_TUU.$(PSUFFIX) : tpsv_L.c ../../param.h
+stpsv_TUU.$(SUFFIX) stpsv_TUU.$(PSUFFIX) : tpsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DTRANSA -DUNIT $< -o $(@F)
-stpsv_TUN.$(SUFFIX) stpsv_TUN.$(PSUFFIX) : tpsv_L.c ../../param.h
+stpsv_TUN.$(SUFFIX) stpsv_TUN.$(PSUFFIX) : tpsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DTRANSA -UUNIT $< -o $(@F)
-dtpsv_NUU.$(SUFFIX) dtpsv_NUU.$(PSUFFIX) : tpsv_U.c ../../param.h
+dtpsv_NUU.$(SUFFIX) dtpsv_NUU.$(PSUFFIX) : tpsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -UTRANSA -DUNIT $< -o $(@F)
-dtpsv_NUN.$(SUFFIX) dtpsv_NUN.$(PSUFFIX) : tpsv_U.c ../../param.h
+dtpsv_NUN.$(SUFFIX) dtpsv_NUN.$(PSUFFIX) : tpsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -UTRANSA -UUNIT $< -o $(@F)
-dtpsv_TLU.$(SUFFIX) dtpsv_TLU.$(PSUFFIX) : tpsv_U.c ../../param.h
+dtpsv_TLU.$(SUFFIX) dtpsv_TLU.$(PSUFFIX) : tpsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DTRANSA -DUNIT $< -o $(@F)
-dtpsv_TLN.$(SUFFIX) dtpsv_TLN.$(PSUFFIX) : tpsv_U.c ../../param.h
+dtpsv_TLN.$(SUFFIX) dtpsv_TLN.$(PSUFFIX) : tpsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DTRANSA -UUNIT $< -o $(@F)
-dtpsv_NLU.$(SUFFIX) dtpsv_NLU.$(PSUFFIX) : tpsv_L.c ../../param.h
+dtpsv_NLU.$(SUFFIX) dtpsv_NLU.$(PSUFFIX) : tpsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -UTRANSA -DUNIT $< -o $(@F)
-dtpsv_NLN.$(SUFFIX) dtpsv_NLN.$(PSUFFIX) : tpsv_L.c ../../param.h
+dtpsv_NLN.$(SUFFIX) dtpsv_NLN.$(PSUFFIX) : tpsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -UTRANSA -UUNIT $< -o $(@F)
-dtpsv_TUU.$(SUFFIX) dtpsv_TUU.$(PSUFFIX) : tpsv_L.c ../../param.h
+dtpsv_TUU.$(SUFFIX) dtpsv_TUU.$(PSUFFIX) : tpsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DTRANSA -DUNIT $< -o $(@F)
-dtpsv_TUN.$(SUFFIX) dtpsv_TUN.$(PSUFFIX) : tpsv_L.c ../../param.h
+dtpsv_TUN.$(SUFFIX) dtpsv_TUN.$(PSUFFIX) : tpsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DTRANSA -UUNIT $< -o $(@F)
-qtpsv_NUU.$(SUFFIX) qtpsv_NUU.$(PSUFFIX) : tpsv_U.c ../../param.h
+qtpsv_NUU.$(SUFFIX) qtpsv_NUU.$(PSUFFIX) : tpsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -UTRANSA -DUNIT $< -o $(@F)
-qtpsv_NUN.$(SUFFIX) qtpsv_NUN.$(PSUFFIX) : tpsv_U.c ../../param.h
+qtpsv_NUN.$(SUFFIX) qtpsv_NUN.$(PSUFFIX) : tpsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -UTRANSA -UUNIT $< -o $(@F)
-qtpsv_TLU.$(SUFFIX) qtpsv_TLU.$(PSUFFIX) : tpsv_U.c ../../param.h
+qtpsv_TLU.$(SUFFIX) qtpsv_TLU.$(PSUFFIX) : tpsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DTRANSA -DUNIT $< -o $(@F)
-qtpsv_TLN.$(SUFFIX) qtpsv_TLN.$(PSUFFIX) : tpsv_U.c ../../param.h
+qtpsv_TLN.$(SUFFIX) qtpsv_TLN.$(PSUFFIX) : tpsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DTRANSA -UUNIT $< -o $(@F)
-qtpsv_NLU.$(SUFFIX) qtpsv_NLU.$(PSUFFIX) : tpsv_L.c ../../param.h
+qtpsv_NLU.$(SUFFIX) qtpsv_NLU.$(PSUFFIX) : tpsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -UTRANSA -DUNIT $< -o $(@F)
-qtpsv_NLN.$(SUFFIX) qtpsv_NLN.$(PSUFFIX) : tpsv_L.c ../../param.h
+qtpsv_NLN.$(SUFFIX) qtpsv_NLN.$(PSUFFIX) : tpsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -UTRANSA -UUNIT $< -o $(@F)
-qtpsv_TUU.$(SUFFIX) qtpsv_TUU.$(PSUFFIX) : tpsv_L.c ../../param.h
+qtpsv_TUU.$(SUFFIX) qtpsv_TUU.$(PSUFFIX) : tpsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DTRANSA -DUNIT $< -o $(@F)
-qtpsv_TUN.$(SUFFIX) qtpsv_TUN.$(PSUFFIX) : tpsv_L.c ../../param.h
+qtpsv_TUN.$(SUFFIX) qtpsv_TUN.$(PSUFFIX) : tpsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DTRANSA -UUNIT $< -o $(@F)
-ctpsv_NUU.$(SUFFIX) ctpsv_NUU.$(PSUFFIX) : ztpsv_U.c ../../param.h
+ctpsv_NUU.$(SUFFIX) ctpsv_NUU.$(PSUFFIX) : ztpsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -DTRANSA=1 -DUNIT $< -o $(@F)
-ctpsv_NUN.$(SUFFIX) ctpsv_NUN.$(PSUFFIX) : ztpsv_U.c ../../param.h
+ctpsv_NUN.$(SUFFIX) ctpsv_NUN.$(PSUFFIX) : ztpsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -DTRANSA=1 -UUNIT $< -o $(@F)
-ctpsv_TLU.$(SUFFIX) ctpsv_TLU.$(PSUFFIX) : ztpsv_U.c ../../param.h
+ctpsv_TLU.$(SUFFIX) ctpsv_TLU.$(PSUFFIX) : ztpsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -DTRANSA=2 -DUNIT $< -o $(@F)
-ctpsv_TLN.$(SUFFIX) ctpsv_TLN.$(PSUFFIX) : ztpsv_U.c ../../param.h
+ctpsv_TLN.$(SUFFIX) ctpsv_TLN.$(PSUFFIX) : ztpsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -DTRANSA=2 -UUNIT $< -o $(@F)
-ctpsv_RLU.$(SUFFIX) ctpsv_RLU.$(PSUFFIX) : ztpsv_L.c ../../param.h
+ctpsv_RLU.$(SUFFIX) ctpsv_RLU.$(PSUFFIX) : ztpsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -DTRANSA=3 -DUNIT $< -o $(@F)
-ctpsv_RLN.$(SUFFIX) ctpsv_RLN.$(PSUFFIX) : ztpsv_L.c ../../param.h
+ctpsv_RLN.$(SUFFIX) ctpsv_RLN.$(PSUFFIX) : ztpsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -DTRANSA=3 -UUNIT $< -o $(@F)
-ctpsv_CLU.$(SUFFIX) ctpsv_CLU.$(PSUFFIX) : ztpsv_U.c ../../param.h
+ctpsv_CLU.$(SUFFIX) ctpsv_CLU.$(PSUFFIX) : ztpsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -DTRANSA=4 -DUNIT $< -o $(@F)
-ctpsv_CLN.$(SUFFIX) ctpsv_CLN.$(PSUFFIX) : ztpsv_U.c ../../param.h
+ctpsv_CLN.$(SUFFIX) ctpsv_CLN.$(PSUFFIX) : ztpsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -DTRANSA=4 -UUNIT $< -o $(@F)
-ctpsv_NLU.$(SUFFIX) ctpsv_NLU.$(PSUFFIX) : ztpsv_L.c ../../param.h
+ctpsv_NLU.$(SUFFIX) ctpsv_NLU.$(PSUFFIX) : ztpsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -DTRANSA=1 -DUNIT $< -o $(@F)
-ctpsv_NLN.$(SUFFIX) ctpsv_NLN.$(PSUFFIX) : ztpsv_L.c ../../param.h
+ctpsv_NLN.$(SUFFIX) ctpsv_NLN.$(PSUFFIX) : ztpsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -DTRANSA=1 -UUNIT $< -o $(@F)
-ctpsv_TUU.$(SUFFIX) ctpsv_TUU.$(PSUFFIX) : ztpsv_L.c ../../param.h
+ctpsv_TUU.$(SUFFIX) ctpsv_TUU.$(PSUFFIX) : ztpsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -DTRANSA=2 -DUNIT $< -o $(@F)
-ctpsv_TUN.$(SUFFIX) ctpsv_TUN.$(PSUFFIX) : ztpsv_L.c ../../param.h
+ctpsv_TUN.$(SUFFIX) ctpsv_TUN.$(PSUFFIX) : ztpsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -DTRANSA=2 -UUNIT $< -o $(@F)
-ctpsv_RUU.$(SUFFIX) ctpsv_RUU.$(PSUFFIX) : ztpsv_U.c ../../param.h
+ctpsv_RUU.$(SUFFIX) ctpsv_RUU.$(PSUFFIX) : ztpsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -DTRANSA=3 -DUNIT $< -o $(@F)
-ctpsv_RUN.$(SUFFIX) ctpsv_RUN.$(PSUFFIX) : ztpsv_U.c ../../param.h
+ctpsv_RUN.$(SUFFIX) ctpsv_RUN.$(PSUFFIX) : ztpsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -DTRANSA=3 -UUNIT $< -o $(@F)
-ctpsv_CUU.$(SUFFIX) ctpsv_CUU.$(PSUFFIX) : ztpsv_L.c ../../param.h
+ctpsv_CUU.$(SUFFIX) ctpsv_CUU.$(PSUFFIX) : ztpsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -DTRANSA=4 -DUNIT $< -o $(@F)
-ctpsv_CUN.$(SUFFIX) ctpsv_CUN.$(PSUFFIX) : ztpsv_L.c ../../param.h
+ctpsv_CUN.$(SUFFIX) ctpsv_CUN.$(PSUFFIX) : ztpsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -DTRANSA=4 -UUNIT $< -o $(@F)
-ztpsv_NUU.$(SUFFIX) ztpsv_NUU.$(PSUFFIX) : ztpsv_U.c ../../param.h
+ztpsv_NUU.$(SUFFIX) ztpsv_NUU.$(PSUFFIX) : ztpsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -DTRANSA=1 -DUNIT $< -o $(@F)
-ztpsv_NUN.$(SUFFIX) ztpsv_NUN.$(PSUFFIX) : ztpsv_U.c ../../param.h
+ztpsv_NUN.$(SUFFIX) ztpsv_NUN.$(PSUFFIX) : ztpsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -DTRANSA=1 -UUNIT $< -o $(@F)
-ztpsv_TLU.$(SUFFIX) ztpsv_TLU.$(PSUFFIX) : ztpsv_U.c ../../param.h
+ztpsv_TLU.$(SUFFIX) ztpsv_TLU.$(PSUFFIX) : ztpsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -DTRANSA=2 -DUNIT $< -o $(@F)
-ztpsv_TLN.$(SUFFIX) ztpsv_TLN.$(PSUFFIX) : ztpsv_U.c ../../param.h
+ztpsv_TLN.$(SUFFIX) ztpsv_TLN.$(PSUFFIX) : ztpsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -DTRANSA=2 -UUNIT $< -o $(@F)
-ztpsv_RLU.$(SUFFIX) ztpsv_RLU.$(PSUFFIX) : ztpsv_L.c ../../param.h
+ztpsv_RLU.$(SUFFIX) ztpsv_RLU.$(PSUFFIX) : ztpsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -DTRANSA=3 -DUNIT $< -o $(@F)
-ztpsv_RLN.$(SUFFIX) ztpsv_RLN.$(PSUFFIX) : ztpsv_L.c ../../param.h
+ztpsv_RLN.$(SUFFIX) ztpsv_RLN.$(PSUFFIX) : ztpsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -DTRANSA=3 -UUNIT $< -o $(@F)
-ztpsv_CLU.$(SUFFIX) ztpsv_CLU.$(PSUFFIX) : ztpsv_U.c ../../param.h
+ztpsv_CLU.$(SUFFIX) ztpsv_CLU.$(PSUFFIX) : ztpsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -DTRANSA=4 -DUNIT $< -o $(@F)
-ztpsv_CLN.$(SUFFIX) ztpsv_CLN.$(PSUFFIX) : ztpsv_U.c ../../param.h
+ztpsv_CLN.$(SUFFIX) ztpsv_CLN.$(PSUFFIX) : ztpsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -DTRANSA=4 -UUNIT $< -o $(@F)
-ztpsv_NLU.$(SUFFIX) ztpsv_NLU.$(PSUFFIX) : ztpsv_L.c ../../param.h
+ztpsv_NLU.$(SUFFIX) ztpsv_NLU.$(PSUFFIX) : ztpsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -DTRANSA=1 -DUNIT $< -o $(@F)
-ztpsv_NLN.$(SUFFIX) ztpsv_NLN.$(PSUFFIX) : ztpsv_L.c ../../param.h
+ztpsv_NLN.$(SUFFIX) ztpsv_NLN.$(PSUFFIX) : ztpsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -DTRANSA=1 -UUNIT $< -o $(@F)
-ztpsv_TUU.$(SUFFIX) ztpsv_TUU.$(PSUFFIX) : ztpsv_L.c ../../param.h
+ztpsv_TUU.$(SUFFIX) ztpsv_TUU.$(PSUFFIX) : ztpsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -DTRANSA=2 -DUNIT $< -o $(@F)
-ztpsv_TUN.$(SUFFIX) ztpsv_TUN.$(PSUFFIX) : ztpsv_L.c ../../param.h
+ztpsv_TUN.$(SUFFIX) ztpsv_TUN.$(PSUFFIX) : ztpsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -DTRANSA=2 -UUNIT $< -o $(@F)
-ztpsv_RUU.$(SUFFIX) ztpsv_RUU.$(PSUFFIX) : ztpsv_U.c ../../param.h
+ztpsv_RUU.$(SUFFIX) ztpsv_RUU.$(PSUFFIX) : ztpsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -DTRANSA=3 -DUNIT $< -o $(@F)
-ztpsv_RUN.$(SUFFIX) ztpsv_RUN.$(PSUFFIX) : ztpsv_U.c ../../param.h
+ztpsv_RUN.$(SUFFIX) ztpsv_RUN.$(PSUFFIX) : ztpsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -DTRANSA=3 -UUNIT $< -o $(@F)
-ztpsv_CUU.$(SUFFIX) ztpsv_CUU.$(PSUFFIX) : ztpsv_L.c ../../param.h
+ztpsv_CUU.$(SUFFIX) ztpsv_CUU.$(PSUFFIX) : ztpsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -DTRANSA=4 -DUNIT $< -o $(@F)
-ztpsv_CUN.$(SUFFIX) ztpsv_CUN.$(PSUFFIX) : ztpsv_L.c ../../param.h
+ztpsv_CUN.$(SUFFIX) ztpsv_CUN.$(PSUFFIX) : ztpsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -DTRANSA=4 -UUNIT $< -o $(@F)
-xtpsv_NUU.$(SUFFIX) xtpsv_NUU.$(PSUFFIX) : ztpsv_U.c ../../param.h
+xtpsv_NUU.$(SUFFIX) xtpsv_NUU.$(PSUFFIX) : ztpsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANSA=1 -DUNIT $< -o $(@F)
-xtpsv_NUN.$(SUFFIX) xtpsv_NUN.$(PSUFFIX) : ztpsv_U.c ../../param.h
+xtpsv_NUN.$(SUFFIX) xtpsv_NUN.$(PSUFFIX) : ztpsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANSA=1 -UUNIT $< -o $(@F)
-xtpsv_TLU.$(SUFFIX) xtpsv_TLU.$(PSUFFIX) : ztpsv_U.c ../../param.h
+xtpsv_TLU.$(SUFFIX) xtpsv_TLU.$(PSUFFIX) : ztpsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANSA=2 -DUNIT $< -o $(@F)
-xtpsv_TLN.$(SUFFIX) xtpsv_TLN.$(PSUFFIX) : ztpsv_U.c ../../param.h
+xtpsv_TLN.$(SUFFIX) xtpsv_TLN.$(PSUFFIX) : ztpsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANSA=2 -UUNIT $< -o $(@F)
-xtpsv_RLU.$(SUFFIX) xtpsv_RLU.$(PSUFFIX) : ztpsv_L.c ../../param.h
+xtpsv_RLU.$(SUFFIX) xtpsv_RLU.$(PSUFFIX) : ztpsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANSA=3 -DUNIT $< -o $(@F)
-xtpsv_RLN.$(SUFFIX) xtpsv_RLN.$(PSUFFIX) : ztpsv_L.c ../../param.h
+xtpsv_RLN.$(SUFFIX) xtpsv_RLN.$(PSUFFIX) : ztpsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANSA=3 -UUNIT $< -o $(@F)
-xtpsv_CLU.$(SUFFIX) xtpsv_CLU.$(PSUFFIX) : ztpsv_U.c ../../param.h
+xtpsv_CLU.$(SUFFIX) xtpsv_CLU.$(PSUFFIX) : ztpsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANSA=4 -DUNIT $< -o $(@F)
-xtpsv_CLN.$(SUFFIX) xtpsv_CLN.$(PSUFFIX) : ztpsv_U.c ../../param.h
+xtpsv_CLN.$(SUFFIX) xtpsv_CLN.$(PSUFFIX) : ztpsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANSA=4 -UUNIT $< -o $(@F)
-xtpsv_NLU.$(SUFFIX) xtpsv_NLU.$(PSUFFIX) : ztpsv_L.c ../../param.h
+xtpsv_NLU.$(SUFFIX) xtpsv_NLU.$(PSUFFIX) : ztpsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANSA=1 -DUNIT $< -o $(@F)
-xtpsv_NLN.$(SUFFIX) xtpsv_NLN.$(PSUFFIX) : ztpsv_L.c ../../param.h
+xtpsv_NLN.$(SUFFIX) xtpsv_NLN.$(PSUFFIX) : ztpsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANSA=1 -UUNIT $< -o $(@F)
-xtpsv_TUU.$(SUFFIX) xtpsv_TUU.$(PSUFFIX) : ztpsv_L.c ../../param.h
+xtpsv_TUU.$(SUFFIX) xtpsv_TUU.$(PSUFFIX) : ztpsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANSA=2 -DUNIT $< -o $(@F)
-xtpsv_TUN.$(SUFFIX) xtpsv_TUN.$(PSUFFIX) : ztpsv_L.c ../../param.h
+xtpsv_TUN.$(SUFFIX) xtpsv_TUN.$(PSUFFIX) : ztpsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANSA=2 -UUNIT $< -o $(@F)
-xtpsv_RUU.$(SUFFIX) xtpsv_RUU.$(PSUFFIX) : ztpsv_U.c ../../param.h
+xtpsv_RUU.$(SUFFIX) xtpsv_RUU.$(PSUFFIX) : ztpsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANSA=3 -DUNIT $< -o $(@F)
-xtpsv_RUN.$(SUFFIX) xtpsv_RUN.$(PSUFFIX) : ztpsv_U.c ../../param.h
+xtpsv_RUN.$(SUFFIX) xtpsv_RUN.$(PSUFFIX) : ztpsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANSA=3 -UUNIT $< -o $(@F)
-xtpsv_CUU.$(SUFFIX) xtpsv_CUU.$(PSUFFIX) : ztpsv_L.c ../../param.h
+xtpsv_CUU.$(SUFFIX) xtpsv_CUU.$(PSUFFIX) : ztpsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANSA=4 -DUNIT $< -o $(@F)
-xtpsv_CUN.$(SUFFIX) xtpsv_CUN.$(PSUFFIX) : ztpsv_L.c ../../param.h
+xtpsv_CUN.$(SUFFIX) xtpsv_CUN.$(PSUFFIX) : ztpsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANSA=4 -UUNIT $< -o $(@F)
-strmv_NUU.$(SUFFIX) strmv_NUU.$(PSUFFIX) : trmv_U.c ../../common.h
+strmv_NUU.$(SUFFIX) strmv_NUU.$(PSUFFIX) : trmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UTRANSA -DUNIT $< -o $(@F)
-strmv_NUN.$(SUFFIX) strmv_NUN.$(PSUFFIX) : trmv_U.c ../../common.h
+strmv_NUN.$(SUFFIX) strmv_NUN.$(PSUFFIX) : trmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UTRANSA -UUNIT $< -o $(@F)
-strmv_TLU.$(SUFFIX) strmv_TLU.$(PSUFFIX) : trmv_U.c ../../common.h
+strmv_TLU.$(SUFFIX) strmv_TLU.$(PSUFFIX) : trmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DTRANSA -DUNIT $< -o $(@F)
-strmv_TLN.$(SUFFIX) strmv_TLN.$(PSUFFIX) : trmv_U.c ../../common.h
+strmv_TLN.$(SUFFIX) strmv_TLN.$(PSUFFIX) : trmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DTRANSA -UUNIT $< -o $(@F)
-strmv_NLU.$(SUFFIX) strmv_NLU.$(PSUFFIX) : trmv_L.c ../../common.h
+strmv_NLU.$(SUFFIX) strmv_NLU.$(PSUFFIX) : trmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UTRANSA -DUNIT $< -o $(@F)
-strmv_NLN.$(SUFFIX) strmv_NLN.$(PSUFFIX) : trmv_L.c ../../common.h
+strmv_NLN.$(SUFFIX) strmv_NLN.$(PSUFFIX) : trmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UTRANSA -UUNIT $< -o $(@F)
-strmv_TUU.$(SUFFIX) strmv_TUU.$(PSUFFIX) : trmv_L.c ../../common.h
+strmv_TUU.$(SUFFIX) strmv_TUU.$(PSUFFIX) : trmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DTRANSA -DUNIT $< -o $(@F)
-strmv_TUN.$(SUFFIX) strmv_TUN.$(PSUFFIX) : trmv_L.c ../../common.h
+strmv_TUN.$(SUFFIX) strmv_TUN.$(PSUFFIX) : trmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DTRANSA -UUNIT $< -o $(@F)
-dtrmv_NUU.$(SUFFIX) dtrmv_NUU.$(PSUFFIX) : trmv_U.c ../../common.h
+dtrmv_NUU.$(SUFFIX) dtrmv_NUU.$(PSUFFIX) : trmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -UTRANSA -DUNIT $< -o $(@F)
-dtrmv_NUN.$(SUFFIX) dtrmv_NUN.$(PSUFFIX) : trmv_U.c ../../common.h
+dtrmv_NUN.$(SUFFIX) dtrmv_NUN.$(PSUFFIX) : trmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -UTRANSA -UUNIT $< -o $(@F)
-dtrmv_TLU.$(SUFFIX) dtrmv_TLU.$(PSUFFIX) : trmv_U.c ../../common.h
+dtrmv_TLU.$(SUFFIX) dtrmv_TLU.$(PSUFFIX) : trmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DTRANSA -DUNIT $< -o $(@F)
-dtrmv_TLN.$(SUFFIX) dtrmv_TLN.$(PSUFFIX) : trmv_U.c ../../common.h
+dtrmv_TLN.$(SUFFIX) dtrmv_TLN.$(PSUFFIX) : trmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DTRANSA -UUNIT $< -o $(@F)
-dtrmv_NLU.$(SUFFIX) dtrmv_NLU.$(PSUFFIX) : trmv_L.c ../../common.h
+dtrmv_NLU.$(SUFFIX) dtrmv_NLU.$(PSUFFIX) : trmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -UTRANSA -DUNIT $< -o $(@F)
-dtrmv_NLN.$(SUFFIX) dtrmv_NLN.$(PSUFFIX) : trmv_L.c ../../common.h
+dtrmv_NLN.$(SUFFIX) dtrmv_NLN.$(PSUFFIX) : trmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -UTRANSA -UUNIT $< -o $(@F)
-dtrmv_TUU.$(SUFFIX) dtrmv_TUU.$(PSUFFIX) : trmv_L.c ../../common.h
+dtrmv_TUU.$(SUFFIX) dtrmv_TUU.$(PSUFFIX) : trmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DTRANSA -DUNIT $< -o $(@F)
-dtrmv_TUN.$(SUFFIX) dtrmv_TUN.$(PSUFFIX) : trmv_L.c ../../common.h
+dtrmv_TUN.$(SUFFIX) dtrmv_TUN.$(PSUFFIX) : trmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DTRANSA -UUNIT $< -o $(@F)
-qtrmv_NUU.$(SUFFIX) qtrmv_NUU.$(PSUFFIX) : trmv_U.c ../../common.h
+qtrmv_NUU.$(SUFFIX) qtrmv_NUU.$(PSUFFIX) : trmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -UTRANSA -DUNIT $< -o $(@F)
-qtrmv_NUN.$(SUFFIX) qtrmv_NUN.$(PSUFFIX) : trmv_U.c ../../common.h
+qtrmv_NUN.$(SUFFIX) qtrmv_NUN.$(PSUFFIX) : trmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -UTRANSA -UUNIT $< -o $(@F)
-qtrmv_TLU.$(SUFFIX) qtrmv_TLU.$(PSUFFIX) : trmv_U.c ../../common.h
+qtrmv_TLU.$(SUFFIX) qtrmv_TLU.$(PSUFFIX) : trmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DTRANSA -DUNIT $< -o $(@F)
-qtrmv_TLN.$(SUFFIX) qtrmv_TLN.$(PSUFFIX) : trmv_U.c ../../common.h
+qtrmv_TLN.$(SUFFIX) qtrmv_TLN.$(PSUFFIX) : trmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DTRANSA -UUNIT $< -o $(@F)
-qtrmv_NLU.$(SUFFIX) qtrmv_NLU.$(PSUFFIX) : trmv_L.c ../../common.h
+qtrmv_NLU.$(SUFFIX) qtrmv_NLU.$(PSUFFIX) : trmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -UTRANSA -DUNIT $< -o $(@F)
-qtrmv_NLN.$(SUFFIX) qtrmv_NLN.$(PSUFFIX) : trmv_L.c ../../common.h
+qtrmv_NLN.$(SUFFIX) qtrmv_NLN.$(PSUFFIX) : trmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -UTRANSA -UUNIT $< -o $(@F)
-qtrmv_TUU.$(SUFFIX) qtrmv_TUU.$(PSUFFIX) : trmv_L.c ../../common.h
+qtrmv_TUU.$(SUFFIX) qtrmv_TUU.$(PSUFFIX) : trmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DTRANSA -DUNIT $< -o $(@F)
-qtrmv_TUN.$(SUFFIX) qtrmv_TUN.$(PSUFFIX) : trmv_L.c ../../common.h
+qtrmv_TUN.$(SUFFIX) qtrmv_TUN.$(PSUFFIX) : trmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DTRANSA -UUNIT $< -o $(@F)
-ctrmv_NUU.$(SUFFIX) ctrmv_NUU.$(PSUFFIX) : ztrmv_U.c ../../common.h
+ctrmv_NUU.$(SUFFIX) ctrmv_NUU.$(PSUFFIX) : ztrmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=1 -DUNIT $< -o $(@F)
-ctrmv_NUN.$(SUFFIX) ctrmv_NUN.$(PSUFFIX) : ztrmv_U.c ../../common.h
+ctrmv_NUN.$(SUFFIX) ctrmv_NUN.$(PSUFFIX) : ztrmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=1 -UUNIT $< -o $(@F)
-ctrmv_TLU.$(SUFFIX) ctrmv_TLU.$(PSUFFIX) : ztrmv_U.c ../../common.h
+ctrmv_TLU.$(SUFFIX) ctrmv_TLU.$(PSUFFIX) : ztrmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=2 -DUNIT $< -o $(@F)
-ctrmv_TLN.$(SUFFIX) ctrmv_TLN.$(PSUFFIX) : ztrmv_U.c ../../common.h
+ctrmv_TLN.$(SUFFIX) ctrmv_TLN.$(PSUFFIX) : ztrmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=2 -UUNIT $< -o $(@F)
-ctrmv_RLU.$(SUFFIX) ctrmv_RLU.$(PSUFFIX) : ztrmv_L.c ../../common.h
+ctrmv_RLU.$(SUFFIX) ctrmv_RLU.$(PSUFFIX) : ztrmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=3 -DUNIT $< -o $(@F)
-ctrmv_RLN.$(SUFFIX) ctrmv_RLN.$(PSUFFIX) : ztrmv_L.c ../../common.h
+ctrmv_RLN.$(SUFFIX) ctrmv_RLN.$(PSUFFIX) : ztrmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=3 -UUNIT $< -o $(@F)
-ctrmv_CLU.$(SUFFIX) ctrmv_CLU.$(PSUFFIX) : ztrmv_U.c ../../common.h
+ctrmv_CLU.$(SUFFIX) ctrmv_CLU.$(PSUFFIX) : ztrmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=4 -DUNIT $< -o $(@F)
-ctrmv_CLN.$(SUFFIX) ctrmv_CLN.$(PSUFFIX) : ztrmv_U.c ../../common.h
+ctrmv_CLN.$(SUFFIX) ctrmv_CLN.$(PSUFFIX) : ztrmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=4 -UUNIT $< -o $(@F)
-ctrmv_NLU.$(SUFFIX) ctrmv_NLU.$(PSUFFIX) : ztrmv_L.c ../../common.h
+ctrmv_NLU.$(SUFFIX) ctrmv_NLU.$(PSUFFIX) : ztrmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=1 -DUNIT $< -o $(@F)
-ctrmv_NLN.$(SUFFIX) ctrmv_NLN.$(PSUFFIX) : ztrmv_L.c ../../common.h
+ctrmv_NLN.$(SUFFIX) ctrmv_NLN.$(PSUFFIX) : ztrmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=1 -UUNIT $< -o $(@F)
-ctrmv_TUU.$(SUFFIX) ctrmv_TUU.$(PSUFFIX) : ztrmv_L.c ../../common.h
+ctrmv_TUU.$(SUFFIX) ctrmv_TUU.$(PSUFFIX) : ztrmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=2 -DUNIT $< -o $(@F)
-ctrmv_TUN.$(SUFFIX) ctrmv_TUN.$(PSUFFIX) : ztrmv_L.c ../../common.h
+ctrmv_TUN.$(SUFFIX) ctrmv_TUN.$(PSUFFIX) : ztrmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=2 -UUNIT $< -o $(@F)
-ctrmv_RUU.$(SUFFIX) ctrmv_RUU.$(PSUFFIX) : ztrmv_U.c ../../common.h
+ctrmv_RUU.$(SUFFIX) ctrmv_RUU.$(PSUFFIX) : ztrmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=3 -DUNIT $< -o $(@F)
-ctrmv_RUN.$(SUFFIX) ctrmv_RUN.$(PSUFFIX) : ztrmv_U.c ../../common.h
+ctrmv_RUN.$(SUFFIX) ctrmv_RUN.$(PSUFFIX) : ztrmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=3 -UUNIT $< -o $(@F)
-ctrmv_CUU.$(SUFFIX) ctrmv_CUU.$(PSUFFIX) : ztrmv_L.c ../../common.h
+ctrmv_CUU.$(SUFFIX) ctrmv_CUU.$(PSUFFIX) : ztrmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=4 -DUNIT $< -o $(@F)
-ctrmv_CUN.$(SUFFIX) ctrmv_CUN.$(PSUFFIX) : ztrmv_L.c ../../common.h
+ctrmv_CUN.$(SUFFIX) ctrmv_CUN.$(PSUFFIX) : ztrmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA=4 -UUNIT $< -o $(@F)
-ztrmv_NUU.$(SUFFIX) ztrmv_NUU.$(PSUFFIX) : ztrmv_U.c ../../common.h
+ztrmv_NUU.$(SUFFIX) ztrmv_NUU.$(PSUFFIX) : ztrmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=1 -DUNIT $< -o $(@F)
-ztrmv_NUN.$(SUFFIX) ztrmv_NUN.$(PSUFFIX) : ztrmv_U.c ../../common.h
+ztrmv_NUN.$(SUFFIX) ztrmv_NUN.$(PSUFFIX) : ztrmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=1 -UUNIT $< -o $(@F)
-ztrmv_TLU.$(SUFFIX) ztrmv_TLU.$(PSUFFIX) : ztrmv_U.c ../../common.h
+ztrmv_TLU.$(SUFFIX) ztrmv_TLU.$(PSUFFIX) : ztrmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=2 -DUNIT $< -o $(@F)
-ztrmv_TLN.$(SUFFIX) ztrmv_TLN.$(PSUFFIX) : ztrmv_U.c ../../common.h
+ztrmv_TLN.$(SUFFIX) ztrmv_TLN.$(PSUFFIX) : ztrmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=2 -UUNIT $< -o $(@F)
-ztrmv_RLU.$(SUFFIX) ztrmv_RLU.$(PSUFFIX) : ztrmv_L.c ../../common.h
+ztrmv_RLU.$(SUFFIX) ztrmv_RLU.$(PSUFFIX) : ztrmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=3 -DUNIT $< -o $(@F)
-ztrmv_RLN.$(SUFFIX) ztrmv_RLN.$(PSUFFIX) : ztrmv_L.c ../../common.h
+ztrmv_RLN.$(SUFFIX) ztrmv_RLN.$(PSUFFIX) : ztrmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=3 -UUNIT $< -o $(@F)
-ztrmv_CLU.$(SUFFIX) ztrmv_CLU.$(PSUFFIX) : ztrmv_U.c ../../common.h
+ztrmv_CLU.$(SUFFIX) ztrmv_CLU.$(PSUFFIX) : ztrmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=4 -DUNIT $< -o $(@F)
-ztrmv_CLN.$(SUFFIX) ztrmv_CLN.$(PSUFFIX) : ztrmv_U.c ../../common.h
+ztrmv_CLN.$(SUFFIX) ztrmv_CLN.$(PSUFFIX) : ztrmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=4 -UUNIT $< -o $(@F)
-ztrmv_NLU.$(SUFFIX) ztrmv_NLU.$(PSUFFIX) : ztrmv_L.c ../../common.h
+ztrmv_NLU.$(SUFFIX) ztrmv_NLU.$(PSUFFIX) : ztrmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=1 -DUNIT $< -o $(@F)
-ztrmv_NLN.$(SUFFIX) ztrmv_NLN.$(PSUFFIX) : ztrmv_L.c ../../common.h
+ztrmv_NLN.$(SUFFIX) ztrmv_NLN.$(PSUFFIX) : ztrmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=1 -UUNIT $< -o $(@F)
-ztrmv_TUU.$(SUFFIX) ztrmv_TUU.$(PSUFFIX) : ztrmv_L.c ../../common.h
+ztrmv_TUU.$(SUFFIX) ztrmv_TUU.$(PSUFFIX) : ztrmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=2 -DUNIT $< -o $(@F)
-ztrmv_TUN.$(SUFFIX) ztrmv_TUN.$(PSUFFIX) : ztrmv_L.c ../../common.h
+ztrmv_TUN.$(SUFFIX) ztrmv_TUN.$(PSUFFIX) : ztrmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=2 -UUNIT $< -o $(@F)
-ztrmv_RUU.$(SUFFIX) ztrmv_RUU.$(PSUFFIX) : ztrmv_U.c ../../common.h
+ztrmv_RUU.$(SUFFIX) ztrmv_RUU.$(PSUFFIX) : ztrmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=3 -DUNIT $< -o $(@F)
-ztrmv_RUN.$(SUFFIX) ztrmv_RUN.$(PSUFFIX) : ztrmv_U.c ../../common.h
+ztrmv_RUN.$(SUFFIX) ztrmv_RUN.$(PSUFFIX) : ztrmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=3 -UUNIT $< -o $(@F)
-ztrmv_CUU.$(SUFFIX) ztrmv_CUU.$(PSUFFIX) : ztrmv_L.c ../../common.h
+ztrmv_CUU.$(SUFFIX) ztrmv_CUU.$(PSUFFIX) : ztrmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=4 -DUNIT $< -o $(@F)
-ztrmv_CUN.$(SUFFIX) ztrmv_CUN.$(PSUFFIX) : ztrmv_L.c ../../common.h
+ztrmv_CUN.$(SUFFIX) ztrmv_CUN.$(PSUFFIX) : ztrmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA=4 -UUNIT $< -o $(@F)
-xtrmv_NUU.$(SUFFIX) xtrmv_NUU.$(PSUFFIX) : ztrmv_U.c ../../common.h
+xtrmv_NUU.$(SUFFIX) xtrmv_NUU.$(PSUFFIX) : ztrmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=1 -DUNIT $< -o $(@F)
-xtrmv_NUN.$(SUFFIX) xtrmv_NUN.$(PSUFFIX) : ztrmv_U.c ../../common.h
+xtrmv_NUN.$(SUFFIX) xtrmv_NUN.$(PSUFFIX) : ztrmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=1 -UUNIT $< -o $(@F)
-xtrmv_TLU.$(SUFFIX) xtrmv_TLU.$(PSUFFIX) : ztrmv_U.c ../../common.h
+xtrmv_TLU.$(SUFFIX) xtrmv_TLU.$(PSUFFIX) : ztrmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=2 -DUNIT $< -o $(@F)
-xtrmv_TLN.$(SUFFIX) xtrmv_TLN.$(PSUFFIX) : ztrmv_U.c ../../common.h
+xtrmv_TLN.$(SUFFIX) xtrmv_TLN.$(PSUFFIX) : ztrmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=2 -UUNIT $< -o $(@F)
-xtrmv_RLU.$(SUFFIX) xtrmv_RLU.$(PSUFFIX) : ztrmv_L.c ../../common.h
+xtrmv_RLU.$(SUFFIX) xtrmv_RLU.$(PSUFFIX) : ztrmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=3 -DUNIT $< -o $(@F)
-xtrmv_RLN.$(SUFFIX) xtrmv_RLN.$(PSUFFIX) : ztrmv_L.c ../../common.h
+xtrmv_RLN.$(SUFFIX) xtrmv_RLN.$(PSUFFIX) : ztrmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=3 -UUNIT $< -o $(@F)
-xtrmv_CLU.$(SUFFIX) xtrmv_CLU.$(PSUFFIX) : ztrmv_U.c ../../common.h
+xtrmv_CLU.$(SUFFIX) xtrmv_CLU.$(PSUFFIX) : ztrmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=4 -DUNIT $< -o $(@F)
-xtrmv_CLN.$(SUFFIX) xtrmv_CLN.$(PSUFFIX) : ztrmv_U.c ../../common.h
+xtrmv_CLN.$(SUFFIX) xtrmv_CLN.$(PSUFFIX) : ztrmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=4 -UUNIT $< -o $(@F)
-xtrmv_NLU.$(SUFFIX) xtrmv_NLU.$(PSUFFIX) : ztrmv_L.c ../../common.h
+xtrmv_NLU.$(SUFFIX) xtrmv_NLU.$(PSUFFIX) : ztrmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=1 -DUNIT $< -o $(@F)
-xtrmv_NLN.$(SUFFIX) xtrmv_NLN.$(PSUFFIX) : ztrmv_L.c ../../common.h
+xtrmv_NLN.$(SUFFIX) xtrmv_NLN.$(PSUFFIX) : ztrmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=1 -UUNIT $< -o $(@F)
-xtrmv_TUU.$(SUFFIX) xtrmv_TUU.$(PSUFFIX) : ztrmv_L.c ../../common.h
+xtrmv_TUU.$(SUFFIX) xtrmv_TUU.$(PSUFFIX) : ztrmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=2 -DUNIT $< -o $(@F)
-xtrmv_TUN.$(SUFFIX) xtrmv_TUN.$(PSUFFIX) : ztrmv_L.c ../../common.h
+xtrmv_TUN.$(SUFFIX) xtrmv_TUN.$(PSUFFIX) : ztrmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=2 -UUNIT $< -o $(@F)
-xtrmv_RUU.$(SUFFIX) xtrmv_RUU.$(PSUFFIX) : ztrmv_U.c ../../common.h
+xtrmv_RUU.$(SUFFIX) xtrmv_RUU.$(PSUFFIX) : ztrmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=3 -DUNIT $< -o $(@F)
-xtrmv_RUN.$(SUFFIX) xtrmv_RUN.$(PSUFFIX) : ztrmv_U.c ../../common.h
+xtrmv_RUN.$(SUFFIX) xtrmv_RUN.$(PSUFFIX) : ztrmv_U.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=3 -UUNIT $< -o $(@F)
-xtrmv_CUU.$(SUFFIX) xtrmv_CUU.$(PSUFFIX) : ztrmv_L.c ../../common.h
+xtrmv_CUU.$(SUFFIX) xtrmv_CUU.$(PSUFFIX) : ztrmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=4 -DUNIT $< -o $(@F)
-xtrmv_CUN.$(SUFFIX) xtrmv_CUN.$(PSUFFIX) : ztrmv_L.c ../../common.h
+xtrmv_CUN.$(SUFFIX) xtrmv_CUN.$(PSUFFIX) : ztrmv_L.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA=4 -UUNIT $< -o $(@F)
-strmv_thread_NUU.$(SUFFIX) strmv_thread_NUU.$(PSUFFIX) : trmv_thread.c ../../common.h
+strmv_thread_NUU.$(SUFFIX) strmv_thread_NUU.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -ULOWER -UTRANSA -DUNIT $< -o $(@F)
-strmv_thread_NUN.$(SUFFIX) strmv_thread_NUN.$(PSUFFIX) : trmv_thread.c ../../common.h
+strmv_thread_NUN.$(SUFFIX) strmv_thread_NUN.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -ULOWER -UTRANSA -UUNIT $< -o $(@F)
-strmv_thread_TLU.$(SUFFIX) strmv_thread_TLU.$(PSUFFIX) : trmv_thread.c ../../common.h
+strmv_thread_TLU.$(SUFFIX) strmv_thread_TLU.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DLOWER -DTRANSA -DUNIT $< -o $(@F)
-strmv_thread_TLN.$(SUFFIX) strmv_thread_TLN.$(PSUFFIX) : trmv_thread.c ../../common.h
+strmv_thread_TLN.$(SUFFIX) strmv_thread_TLN.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DLOWER -DTRANSA -UUNIT $< -o $(@F)
-strmv_thread_NLU.$(SUFFIX) strmv_thread_NLU.$(PSUFFIX) : trmv_thread.c ../../common.h
+strmv_thread_NLU.$(SUFFIX) strmv_thread_NLU.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DLOWER -UTRANSA -DUNIT $< -o $(@F)
-strmv_thread_NLN.$(SUFFIX) strmv_thread_NLN.$(PSUFFIX) : trmv_thread.c ../../common.h
+strmv_thread_NLN.$(SUFFIX) strmv_thread_NLN.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DLOWER -UTRANSA -UUNIT $< -o $(@F)
-strmv_thread_TUU.$(SUFFIX) strmv_thread_TUU.$(PSUFFIX) : trmv_thread.c ../../common.h
+strmv_thread_TUU.$(SUFFIX) strmv_thread_TUU.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -ULOWER -DTRANSA -DUNIT $< -o $(@F)
-strmv_thread_TUN.$(SUFFIX) strmv_thread_TUN.$(PSUFFIX) : trmv_thread.c ../../common.h
+strmv_thread_TUN.$(SUFFIX) strmv_thread_TUN.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -ULOWER -DTRANSA -UUNIT $< -o $(@F)
-dtrmv_thread_NUU.$(SUFFIX) dtrmv_thread_NUU.$(PSUFFIX) : trmv_thread.c ../../common.h
+dtrmv_thread_NUU.$(SUFFIX) dtrmv_thread_NUU.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -ULOWER -UTRANSA -DUNIT $< -o $(@F)
-dtrmv_thread_NUN.$(SUFFIX) dtrmv_thread_NUN.$(PSUFFIX) : trmv_thread.c ../../common.h
+dtrmv_thread_NUN.$(SUFFIX) dtrmv_thread_NUN.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -ULOWER -UTRANSA -UUNIT $< -o $(@F)
-dtrmv_thread_TLU.$(SUFFIX) dtrmv_thread_TLU.$(PSUFFIX) : trmv_thread.c ../../common.h
+dtrmv_thread_TLU.$(SUFFIX) dtrmv_thread_TLU.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DLOWER -DTRANSA -DUNIT $< -o $(@F)
-dtrmv_thread_TLN.$(SUFFIX) dtrmv_thread_TLN.$(PSUFFIX) : trmv_thread.c ../../common.h
+dtrmv_thread_TLN.$(SUFFIX) dtrmv_thread_TLN.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DLOWER -DTRANSA -UUNIT $< -o $(@F)
-dtrmv_thread_NLU.$(SUFFIX) dtrmv_thread_NLU.$(PSUFFIX) : trmv_thread.c ../../common.h
+dtrmv_thread_NLU.$(SUFFIX) dtrmv_thread_NLU.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DLOWER -UTRANSA -DUNIT $< -o $(@F)
-dtrmv_thread_NLN.$(SUFFIX) dtrmv_thread_NLN.$(PSUFFIX) : trmv_thread.c ../../common.h
+dtrmv_thread_NLN.$(SUFFIX) dtrmv_thread_NLN.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DLOWER -UTRANSA -UUNIT $< -o $(@F)
-dtrmv_thread_TUU.$(SUFFIX) dtrmv_thread_TUU.$(PSUFFIX) : trmv_thread.c ../../common.h
+dtrmv_thread_TUU.$(SUFFIX) dtrmv_thread_TUU.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -ULOWER -DTRANSA -DUNIT $< -o $(@F)
-dtrmv_thread_TUN.$(SUFFIX) dtrmv_thread_TUN.$(PSUFFIX) : trmv_thread.c ../../common.h
+dtrmv_thread_TUN.$(SUFFIX) dtrmv_thread_TUN.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -ULOWER -DTRANSA -UUNIT $< -o $(@F)
-qtrmv_thread_NUU.$(SUFFIX) qtrmv_thread_NUU.$(PSUFFIX) : trmv_thread.c ../../common.h
+qtrmv_thread_NUU.$(SUFFIX) qtrmv_thread_NUU.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -ULOWER -UTRANSA -DUNIT $< -o $(@F)
-qtrmv_thread_NUN.$(SUFFIX) qtrmv_thread_NUN.$(PSUFFIX) : trmv_thread.c ../../common.h
+qtrmv_thread_NUN.$(SUFFIX) qtrmv_thread_NUN.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -ULOWER -UTRANSA -UUNIT $< -o $(@F)
-qtrmv_thread_TLU.$(SUFFIX) qtrmv_thread_TLU.$(PSUFFIX) : trmv_thread.c ../../common.h
+qtrmv_thread_TLU.$(SUFFIX) qtrmv_thread_TLU.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DLOWER -DTRANSA -DUNIT $< -o $(@F)
-qtrmv_thread_TLN.$(SUFFIX) qtrmv_thread_TLN.$(PSUFFIX) : trmv_thread.c ../../common.h
+qtrmv_thread_TLN.$(SUFFIX) qtrmv_thread_TLN.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DLOWER -DTRANSA -UUNIT $< -o $(@F)
-qtrmv_thread_NLU.$(SUFFIX) qtrmv_thread_NLU.$(PSUFFIX) : trmv_thread.c ../../common.h
+qtrmv_thread_NLU.$(SUFFIX) qtrmv_thread_NLU.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DLOWER -UTRANSA -DUNIT $< -o $(@F)
-qtrmv_thread_NLN.$(SUFFIX) qtrmv_thread_NLN.$(PSUFFIX) : trmv_thread.c ../../common.h
+qtrmv_thread_NLN.$(SUFFIX) qtrmv_thread_NLN.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DLOWER -UTRANSA -UUNIT $< -o $(@F)
-qtrmv_thread_TUU.$(SUFFIX) qtrmv_thread_TUU.$(PSUFFIX) : trmv_thread.c ../../common.h
+qtrmv_thread_TUU.$(SUFFIX) qtrmv_thread_TUU.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -ULOWER -DTRANSA -DUNIT $< -o $(@F)
-qtrmv_thread_TUN.$(SUFFIX) qtrmv_thread_TUN.$(PSUFFIX) : trmv_thread.c ../../common.h
+qtrmv_thread_TUN.$(SUFFIX) qtrmv_thread_TUN.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -ULOWER -DTRANSA -UUNIT $< -o $(@F)
-ctrmv_thread_NUU.$(SUFFIX) ctrmv_thread_NUU.$(PSUFFIX) : trmv_thread.c ../../common.h
+ctrmv_thread_NUU.$(SUFFIX) ctrmv_thread_NUU.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER -DTRANSA=1 -DUNIT $< -o $(@F)
-ctrmv_thread_NUN.$(SUFFIX) ctrmv_thread_NUN.$(PSUFFIX) : trmv_thread.c ../../common.h
+ctrmv_thread_NUN.$(SUFFIX) ctrmv_thread_NUN.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER -DTRANSA=1 -UUNIT $< -o $(@F)
-ctrmv_thread_TLU.$(SUFFIX) ctrmv_thread_TLU.$(PSUFFIX) : trmv_thread.c ../../common.h
+ctrmv_thread_TLU.$(SUFFIX) ctrmv_thread_TLU.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER -DTRANSA=2 -DUNIT $< -o $(@F)
-ctrmv_thread_TLN.$(SUFFIX) ctrmv_thread_TLN.$(PSUFFIX) : trmv_thread.c ../../common.h
+ctrmv_thread_TLN.$(SUFFIX) ctrmv_thread_TLN.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER -DTRANSA=2 -UUNIT $< -o $(@F)
-ctrmv_thread_RLU.$(SUFFIX) ctrmv_thread_RLU.$(PSUFFIX) : trmv_thread.c ../../common.h
+ctrmv_thread_RLU.$(SUFFIX) ctrmv_thread_RLU.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER -DTRANSA=3 -DUNIT $< -o $(@F)
-ctrmv_thread_RLN.$(SUFFIX) ctrmv_thread_RLN.$(PSUFFIX) : trmv_thread.c ../../common.h
+ctrmv_thread_RLN.$(SUFFIX) ctrmv_thread_RLN.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER -DTRANSA=3 -UUNIT $< -o $(@F)
-ctrmv_thread_CLU.$(SUFFIX) ctrmv_thread_CLU.$(PSUFFIX) : trmv_thread.c ../../common.h
+ctrmv_thread_CLU.$(SUFFIX) ctrmv_thread_CLU.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER -DTRANSA=4 -DUNIT $< -o $(@F)
-ctrmv_thread_CLN.$(SUFFIX) ctrmv_thread_CLN.$(PSUFFIX) : trmv_thread.c ../../common.h
+ctrmv_thread_CLN.$(SUFFIX) ctrmv_thread_CLN.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER -DTRANSA=4 -UUNIT $< -o $(@F)
-ctrmv_thread_NLU.$(SUFFIX) ctrmv_thread_NLU.$(PSUFFIX) : trmv_thread.c ../../common.h
+ctrmv_thread_NLU.$(SUFFIX) ctrmv_thread_NLU.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER -DTRANSA=1 -DUNIT $< -o $(@F)
-ctrmv_thread_NLN.$(SUFFIX) ctrmv_thread_NLN.$(PSUFFIX) : trmv_thread.c ../../common.h
+ctrmv_thread_NLN.$(SUFFIX) ctrmv_thread_NLN.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER -DTRANSA=1 -UUNIT $< -o $(@F)
-ctrmv_thread_TUU.$(SUFFIX) ctrmv_thread_TUU.$(PSUFFIX) : trmv_thread.c ../../common.h
+ctrmv_thread_TUU.$(SUFFIX) ctrmv_thread_TUU.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER -DTRANSA=2 -DUNIT $< -o $(@F)
-ctrmv_thread_TUN.$(SUFFIX) ctrmv_thread_TUN.$(PSUFFIX) : trmv_thread.c ../../common.h
+ctrmv_thread_TUN.$(SUFFIX) ctrmv_thread_TUN.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER -DTRANSA=2 -UUNIT $< -o $(@F)
-ctrmv_thread_RUU.$(SUFFIX) ctrmv_thread_RUU.$(PSUFFIX) : trmv_thread.c ../../common.h
+ctrmv_thread_RUU.$(SUFFIX) ctrmv_thread_RUU.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER -DTRANSA=3 -DUNIT $< -o $(@F)
-ctrmv_thread_RUN.$(SUFFIX) ctrmv_thread_RUN.$(PSUFFIX) : trmv_thread.c ../../common.h
+ctrmv_thread_RUN.$(SUFFIX) ctrmv_thread_RUN.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER -DTRANSA=3 -UUNIT $< -o $(@F)
-ctrmv_thread_CUU.$(SUFFIX) ctrmv_thread_CUU.$(PSUFFIX) : trmv_thread.c ../../common.h
+ctrmv_thread_CUU.$(SUFFIX) ctrmv_thread_CUU.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER -DTRANSA=4 -DUNIT $< -o $(@F)
-ctrmv_thread_CUN.$(SUFFIX) ctrmv_thread_CUN.$(PSUFFIX) : trmv_thread.c ../../common.h
+ctrmv_thread_CUN.$(SUFFIX) ctrmv_thread_CUN.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER -DTRANSA=4 -UUNIT $< -o $(@F)
-ztrmv_thread_NUU.$(SUFFIX) ztrmv_thread_NUU.$(PSUFFIX) : trmv_thread.c ../../common.h
+ztrmv_thread_NUU.$(SUFFIX) ztrmv_thread_NUU.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER -DTRANSA=1 -DUNIT $< -o $(@F)
-ztrmv_thread_NUN.$(SUFFIX) ztrmv_thread_NUN.$(PSUFFIX) : trmv_thread.c ../../common.h
+ztrmv_thread_NUN.$(SUFFIX) ztrmv_thread_NUN.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER -DTRANSA=1 -UUNIT $< -o $(@F)
-ztrmv_thread_TLU.$(SUFFIX) ztrmv_thread_TLU.$(PSUFFIX) : trmv_thread.c ../../common.h
+ztrmv_thread_TLU.$(SUFFIX) ztrmv_thread_TLU.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER -DTRANSA=2 -DUNIT $< -o $(@F)
-ztrmv_thread_TLN.$(SUFFIX) ztrmv_thread_TLN.$(PSUFFIX) : trmv_thread.c ../../common.h
+ztrmv_thread_TLN.$(SUFFIX) ztrmv_thread_TLN.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER -DTRANSA=2 -UUNIT $< -o $(@F)
-ztrmv_thread_RLU.$(SUFFIX) ztrmv_thread_RLU.$(PSUFFIX) : trmv_thread.c ../../common.h
+ztrmv_thread_RLU.$(SUFFIX) ztrmv_thread_RLU.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER -DTRANSA=3 -DUNIT $< -o $(@F)
-ztrmv_thread_RLN.$(SUFFIX) ztrmv_thread_RLN.$(PSUFFIX) : trmv_thread.c ../../common.h
+ztrmv_thread_RLN.$(SUFFIX) ztrmv_thread_RLN.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER -DTRANSA=3 -UUNIT $< -o $(@F)
-ztrmv_thread_CLU.$(SUFFIX) ztrmv_thread_CLU.$(PSUFFIX) : trmv_thread.c ../../common.h
+ztrmv_thread_CLU.$(SUFFIX) ztrmv_thread_CLU.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER -DTRANSA=4 -DUNIT $< -o $(@F)
-ztrmv_thread_CLN.$(SUFFIX) ztrmv_thread_CLN.$(PSUFFIX) : trmv_thread.c ../../common.h
+ztrmv_thread_CLN.$(SUFFIX) ztrmv_thread_CLN.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER -DTRANSA=4 -UUNIT $< -o $(@F)
-ztrmv_thread_NLU.$(SUFFIX) ztrmv_thread_NLU.$(PSUFFIX) : trmv_thread.c ../../common.h
+ztrmv_thread_NLU.$(SUFFIX) ztrmv_thread_NLU.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER -DTRANSA=1 -DUNIT $< -o $(@F)
-ztrmv_thread_NLN.$(SUFFIX) ztrmv_thread_NLN.$(PSUFFIX) : trmv_thread.c ../../common.h
+ztrmv_thread_NLN.$(SUFFIX) ztrmv_thread_NLN.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER -DTRANSA=1 -UUNIT $< -o $(@F)
-ztrmv_thread_TUU.$(SUFFIX) ztrmv_thread_TUU.$(PSUFFIX) : trmv_thread.c ../../common.h
+ztrmv_thread_TUU.$(SUFFIX) ztrmv_thread_TUU.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER -DTRANSA=2 -DUNIT $< -o $(@F)
-ztrmv_thread_TUN.$(SUFFIX) ztrmv_thread_TUN.$(PSUFFIX) : trmv_thread.c ../../common.h
+ztrmv_thread_TUN.$(SUFFIX) ztrmv_thread_TUN.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER -DTRANSA=2 -UUNIT $< -o $(@F)
-ztrmv_thread_RUU.$(SUFFIX) ztrmv_thread_RUU.$(PSUFFIX) : trmv_thread.c ../../common.h
+ztrmv_thread_RUU.$(SUFFIX) ztrmv_thread_RUU.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER -DTRANSA=3 -DUNIT $< -o $(@F)
-ztrmv_thread_RUN.$(SUFFIX) ztrmv_thread_RUN.$(PSUFFIX) : trmv_thread.c ../../common.h
+ztrmv_thread_RUN.$(SUFFIX) ztrmv_thread_RUN.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER -DTRANSA=3 -UUNIT $< -o $(@F)
-ztrmv_thread_CUU.$(SUFFIX) ztrmv_thread_CUU.$(PSUFFIX) : trmv_thread.c ../../common.h
+ztrmv_thread_CUU.$(SUFFIX) ztrmv_thread_CUU.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER -DTRANSA=4 -DUNIT $< -o $(@F)
-ztrmv_thread_CUN.$(SUFFIX) ztrmv_thread_CUN.$(PSUFFIX) : trmv_thread.c ../../common.h
+ztrmv_thread_CUN.$(SUFFIX) ztrmv_thread_CUN.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER -DTRANSA=4 -UUNIT $< -o $(@F)
-xtrmv_thread_NUU.$(SUFFIX) xtrmv_thread_NUU.$(PSUFFIX) : trmv_thread.c ../../common.h
+xtrmv_thread_NUU.$(SUFFIX) xtrmv_thread_NUU.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER -DTRANSA=1 -DUNIT $< -o $(@F)
-xtrmv_thread_NUN.$(SUFFIX) xtrmv_thread_NUN.$(PSUFFIX) : trmv_thread.c ../../common.h
+xtrmv_thread_NUN.$(SUFFIX) xtrmv_thread_NUN.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER -DTRANSA=1 -UUNIT $< -o $(@F)
-xtrmv_thread_TLU.$(SUFFIX) xtrmv_thread_TLU.$(PSUFFIX) : trmv_thread.c ../../common.h
+xtrmv_thread_TLU.$(SUFFIX) xtrmv_thread_TLU.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER -DTRANSA=2 -DUNIT $< -o $(@F)
-xtrmv_thread_TLN.$(SUFFIX) xtrmv_thread_TLN.$(PSUFFIX) : trmv_thread.c ../../common.h
+xtrmv_thread_TLN.$(SUFFIX) xtrmv_thread_TLN.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER -DTRANSA=2 -UUNIT $< -o $(@F)
-xtrmv_thread_RLU.$(SUFFIX) xtrmv_thread_RLU.$(PSUFFIX) : trmv_thread.c ../../common.h
+xtrmv_thread_RLU.$(SUFFIX) xtrmv_thread_RLU.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER -DTRANSA=3 -DUNIT $< -o $(@F)
-xtrmv_thread_RLN.$(SUFFIX) xtrmv_thread_RLN.$(PSUFFIX) : trmv_thread.c ../../common.h
+xtrmv_thread_RLN.$(SUFFIX) xtrmv_thread_RLN.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER -DTRANSA=3 -UUNIT $< -o $(@F)
-xtrmv_thread_CLU.$(SUFFIX) xtrmv_thread_CLU.$(PSUFFIX) : trmv_thread.c ../../common.h
+xtrmv_thread_CLU.$(SUFFIX) xtrmv_thread_CLU.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER -DTRANSA=4 -DUNIT $< -o $(@F)
-xtrmv_thread_CLN.$(SUFFIX) xtrmv_thread_CLN.$(PSUFFIX) : trmv_thread.c ../../common.h
+xtrmv_thread_CLN.$(SUFFIX) xtrmv_thread_CLN.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER -DTRANSA=4 -UUNIT $< -o $(@F)
-xtrmv_thread_NLU.$(SUFFIX) xtrmv_thread_NLU.$(PSUFFIX) : trmv_thread.c ../../common.h
+xtrmv_thread_NLU.$(SUFFIX) xtrmv_thread_NLU.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER -DTRANSA=1 -DUNIT $< -o $(@F)
-xtrmv_thread_NLN.$(SUFFIX) xtrmv_thread_NLN.$(PSUFFIX) : trmv_thread.c ../../common.h
+xtrmv_thread_NLN.$(SUFFIX) xtrmv_thread_NLN.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER -DTRANSA=1 -UUNIT $< -o $(@F)
-xtrmv_thread_TUU.$(SUFFIX) xtrmv_thread_TUU.$(PSUFFIX) : trmv_thread.c ../../common.h
+xtrmv_thread_TUU.$(SUFFIX) xtrmv_thread_TUU.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER -DTRANSA=2 -DUNIT $< -o $(@F)
-xtrmv_thread_TUN.$(SUFFIX) xtrmv_thread_TUN.$(PSUFFIX) : trmv_thread.c ../../common.h
+xtrmv_thread_TUN.$(SUFFIX) xtrmv_thread_TUN.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER -DTRANSA=2 -UUNIT $< -o $(@F)
-xtrmv_thread_RUU.$(SUFFIX) xtrmv_thread_RUU.$(PSUFFIX) : trmv_thread.c ../../common.h
+xtrmv_thread_RUU.$(SUFFIX) xtrmv_thread_RUU.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER -DTRANSA=3 -DUNIT $< -o $(@F)
-xtrmv_thread_RUN.$(SUFFIX) xtrmv_thread_RUN.$(PSUFFIX) : trmv_thread.c ../../common.h
+xtrmv_thread_RUN.$(SUFFIX) xtrmv_thread_RUN.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER -DTRANSA=3 -UUNIT $< -o $(@F)
-xtrmv_thread_CUU.$(SUFFIX) xtrmv_thread_CUU.$(PSUFFIX) : trmv_thread.c ../../common.h
+xtrmv_thread_CUU.$(SUFFIX) xtrmv_thread_CUU.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER -DTRANSA=4 -DUNIT $< -o $(@F)
-xtrmv_thread_CUN.$(SUFFIX) xtrmv_thread_CUN.$(PSUFFIX) : trmv_thread.c ../../common.h
+xtrmv_thread_CUN.$(SUFFIX) xtrmv_thread_CUN.$(PSUFFIX) : trmv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER -DTRANSA=4 -UUNIT $< -o $(@F)
-strsv_NUU.$(SUFFIX) strsv_NUU.$(PSUFFIX) : trsv_U.c ../../param.h
+strsv_NUU.$(SUFFIX) strsv_NUU.$(PSUFFIX) : trsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -UTRANSA -DUNIT $< -o $(@F)
-strsv_NUN.$(SUFFIX) strsv_NUN.$(PSUFFIX) : trsv_U.c ../../param.h
+strsv_NUN.$(SUFFIX) strsv_NUN.$(PSUFFIX) : trsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -UTRANSA -UUNIT $< -o $(@F)
-strsv_TLU.$(SUFFIX) strsv_TLU.$(PSUFFIX) : trsv_U.c ../../param.h
+strsv_TLU.$(SUFFIX) strsv_TLU.$(PSUFFIX) : trsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DTRANSA -DUNIT $< -o $(@F)
-strsv_TLN.$(SUFFIX) strsv_TLN.$(PSUFFIX) : trsv_U.c ../../param.h
+strsv_TLN.$(SUFFIX) strsv_TLN.$(PSUFFIX) : trsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DTRANSA -UUNIT $< -o $(@F)
-strsv_NLU.$(SUFFIX) strsv_NLU.$(PSUFFIX) : trsv_L.c ../../param.h
+strsv_NLU.$(SUFFIX) strsv_NLU.$(PSUFFIX) : trsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -UTRANSA -DUNIT $< -o $(@F)
-strsv_NLN.$(SUFFIX) strsv_NLN.$(PSUFFIX) : trsv_L.c ../../param.h
+strsv_NLN.$(SUFFIX) strsv_NLN.$(PSUFFIX) : trsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -UTRANSA -UUNIT $< -o $(@F)
-strsv_TUU.$(SUFFIX) strsv_TUU.$(PSUFFIX) : trsv_L.c ../../param.h
+strsv_TUU.$(SUFFIX) strsv_TUU.$(PSUFFIX) : trsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DTRANSA -DUNIT $< -o $(@F)
-strsv_TUN.$(SUFFIX) strsv_TUN.$(PSUFFIX) : trsv_L.c ../../param.h
+strsv_TUN.$(SUFFIX) strsv_TUN.$(PSUFFIX) : trsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DTRANSA -UUNIT $< -o $(@F)
-dtrsv_NUU.$(SUFFIX) dtrsv_NUU.$(PSUFFIX) : trsv_U.c ../../param.h
+dtrsv_NUU.$(SUFFIX) dtrsv_NUU.$(PSUFFIX) : trsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -UTRANSA -DUNIT $< -o $(@F)
-dtrsv_NUN.$(SUFFIX) dtrsv_NUN.$(PSUFFIX) : trsv_U.c ../../param.h
+dtrsv_NUN.$(SUFFIX) dtrsv_NUN.$(PSUFFIX) : trsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -UTRANSA -UUNIT $< -o $(@F)
-dtrsv_TLU.$(SUFFIX) dtrsv_TLU.$(PSUFFIX) : trsv_U.c ../../param.h
+dtrsv_TLU.$(SUFFIX) dtrsv_TLU.$(PSUFFIX) : trsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DTRANSA -DUNIT $< -o $(@F)
-dtrsv_TLN.$(SUFFIX) dtrsv_TLN.$(PSUFFIX) : trsv_U.c ../../param.h
+dtrsv_TLN.$(SUFFIX) dtrsv_TLN.$(PSUFFIX) : trsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DTRANSA -UUNIT $< -o $(@F)
-dtrsv_NLU.$(SUFFIX) dtrsv_NLU.$(PSUFFIX) : trsv_L.c ../../param.h
+dtrsv_NLU.$(SUFFIX) dtrsv_NLU.$(PSUFFIX) : trsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -UTRANSA -DUNIT $< -o $(@F)
-dtrsv_NLN.$(SUFFIX) dtrsv_NLN.$(PSUFFIX) : trsv_L.c ../../param.h
+dtrsv_NLN.$(SUFFIX) dtrsv_NLN.$(PSUFFIX) : trsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -UTRANSA -UUNIT $< -o $(@F)
-dtrsv_TUU.$(SUFFIX) dtrsv_TUU.$(PSUFFIX) : trsv_L.c ../../param.h
+dtrsv_TUU.$(SUFFIX) dtrsv_TUU.$(PSUFFIX) : trsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DTRANSA -DUNIT $< -o $(@F)
-dtrsv_TUN.$(SUFFIX) dtrsv_TUN.$(PSUFFIX) : trsv_L.c ../../param.h
+dtrsv_TUN.$(SUFFIX) dtrsv_TUN.$(PSUFFIX) : trsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DTRANSA -UUNIT $< -o $(@F)
-qtrsv_NUU.$(SUFFIX) qtrsv_NUU.$(PSUFFIX) : trsv_U.c ../../param.h
+qtrsv_NUU.$(SUFFIX) qtrsv_NUU.$(PSUFFIX) : trsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -UTRANSA -DUNIT $< -o $(@F)
-qtrsv_NUN.$(SUFFIX) qtrsv_NUN.$(PSUFFIX) : trsv_U.c ../../param.h
+qtrsv_NUN.$(SUFFIX) qtrsv_NUN.$(PSUFFIX) : trsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -UTRANSA -UUNIT $< -o $(@F)
-qtrsv_TLU.$(SUFFIX) qtrsv_TLU.$(PSUFFIX) : trsv_U.c ../../param.h
+qtrsv_TLU.$(SUFFIX) qtrsv_TLU.$(PSUFFIX) : trsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DTRANSA -DUNIT $< -o $(@F)
-qtrsv_TLN.$(SUFFIX) qtrsv_TLN.$(PSUFFIX) : trsv_U.c ../../param.h
+qtrsv_TLN.$(SUFFIX) qtrsv_TLN.$(PSUFFIX) : trsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DTRANSA -UUNIT $< -o $(@F)
-qtrsv_NLU.$(SUFFIX) qtrsv_NLU.$(PSUFFIX) : trsv_L.c ../../param.h
+qtrsv_NLU.$(SUFFIX) qtrsv_NLU.$(PSUFFIX) : trsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -UTRANSA -DUNIT $< -o $(@F)
-qtrsv_NLN.$(SUFFIX) qtrsv_NLN.$(PSUFFIX) : trsv_L.c ../../param.h
+qtrsv_NLN.$(SUFFIX) qtrsv_NLN.$(PSUFFIX) : trsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -UTRANSA -UUNIT $< -o $(@F)
-qtrsv_TUU.$(SUFFIX) qtrsv_TUU.$(PSUFFIX) : trsv_L.c ../../param.h
+qtrsv_TUU.$(SUFFIX) qtrsv_TUU.$(PSUFFIX) : trsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DTRANSA -DUNIT $< -o $(@F)
-qtrsv_TUN.$(SUFFIX) qtrsv_TUN.$(PSUFFIX) : trsv_L.c ../../param.h
+qtrsv_TUN.$(SUFFIX) qtrsv_TUN.$(PSUFFIX) : trsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DTRANSA -UUNIT $< -o $(@F)
-ctrsv_NUU.$(SUFFIX) ctrsv_NUU.$(PSUFFIX) : ztrsv_U.c ../../param.h
+ctrsv_NUU.$(SUFFIX) ctrsv_NUU.$(PSUFFIX) : ztrsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -DTRANSA=1 -DUNIT $< -o $(@F)
-ctrsv_NUN.$(SUFFIX) ctrsv_NUN.$(PSUFFIX) : ztrsv_U.c ../../param.h
+ctrsv_NUN.$(SUFFIX) ctrsv_NUN.$(PSUFFIX) : ztrsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -DTRANSA=1 -UUNIT $< -o $(@F)
-ctrsv_TLU.$(SUFFIX) ctrsv_TLU.$(PSUFFIX) : ztrsv_U.c ../../param.h
+ctrsv_TLU.$(SUFFIX) ctrsv_TLU.$(PSUFFIX) : ztrsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -DTRANSA=2 -DUNIT $< -o $(@F)
-ctrsv_TLN.$(SUFFIX) ctrsv_TLN.$(PSUFFIX) : ztrsv_U.c ../../param.h
+ctrsv_TLN.$(SUFFIX) ctrsv_TLN.$(PSUFFIX) : ztrsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -DTRANSA=2 -UUNIT $< -o $(@F)
-ctrsv_RLU.$(SUFFIX) ctrsv_RLU.$(PSUFFIX) : ztrsv_L.c ../../param.h
+ctrsv_RLU.$(SUFFIX) ctrsv_RLU.$(PSUFFIX) : ztrsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -DTRANSA=3 -DUNIT $< -o $(@F)
-ctrsv_RLN.$(SUFFIX) ctrsv_RLN.$(PSUFFIX) : ztrsv_L.c ../../param.h
+ctrsv_RLN.$(SUFFIX) ctrsv_RLN.$(PSUFFIX) : ztrsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -DTRANSA=3 -UUNIT $< -o $(@F)
-ctrsv_CLU.$(SUFFIX) ctrsv_CLU.$(PSUFFIX) : ztrsv_U.c ../../param.h
+ctrsv_CLU.$(SUFFIX) ctrsv_CLU.$(PSUFFIX) : ztrsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -DTRANSA=4 -DUNIT $< -o $(@F)
-ctrsv_CLN.$(SUFFIX) ctrsv_CLN.$(PSUFFIX) : ztrsv_U.c ../../param.h
+ctrsv_CLN.$(SUFFIX) ctrsv_CLN.$(PSUFFIX) : ztrsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -DTRANSA=4 -UUNIT $< -o $(@F)
-ctrsv_NLU.$(SUFFIX) ctrsv_NLU.$(PSUFFIX) : ztrsv_L.c ../../param.h
+ctrsv_NLU.$(SUFFIX) ctrsv_NLU.$(PSUFFIX) : ztrsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -DTRANSA=1 -DUNIT $< -o $(@F)
-ctrsv_NLN.$(SUFFIX) ctrsv_NLN.$(PSUFFIX) : ztrsv_L.c ../../param.h
+ctrsv_NLN.$(SUFFIX) ctrsv_NLN.$(PSUFFIX) : ztrsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -DTRANSA=1 -UUNIT $< -o $(@F)
-ctrsv_TUU.$(SUFFIX) ctrsv_TUU.$(PSUFFIX) : ztrsv_L.c ../../param.h
+ctrsv_TUU.$(SUFFIX) ctrsv_TUU.$(PSUFFIX) : ztrsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -DTRANSA=2 -DUNIT $< -o $(@F)
-ctrsv_TUN.$(SUFFIX) ctrsv_TUN.$(PSUFFIX) : ztrsv_L.c ../../param.h
+ctrsv_TUN.$(SUFFIX) ctrsv_TUN.$(PSUFFIX) : ztrsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -DTRANSA=2 -UUNIT $< -o $(@F)
-ctrsv_RUU.$(SUFFIX) ctrsv_RUU.$(PSUFFIX) : ztrsv_U.c ../../param.h
+ctrsv_RUU.$(SUFFIX) ctrsv_RUU.$(PSUFFIX) : ztrsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -DTRANSA=3 -DUNIT $< -o $(@F)
-ctrsv_RUN.$(SUFFIX) ctrsv_RUN.$(PSUFFIX) : ztrsv_U.c ../../param.h
+ctrsv_RUN.$(SUFFIX) ctrsv_RUN.$(PSUFFIX) : ztrsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -DTRANSA=3 -UUNIT $< -o $(@F)
-ctrsv_CUU.$(SUFFIX) ctrsv_CUU.$(PSUFFIX) : ztrsv_L.c ../../param.h
+ctrsv_CUU.$(SUFFIX) ctrsv_CUU.$(PSUFFIX) : ztrsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -DTRANSA=4 -DUNIT $< -o $(@F)
-ctrsv_CUN.$(SUFFIX) ctrsv_CUN.$(PSUFFIX) : ztrsv_L.c ../../param.h
+ctrsv_CUN.$(SUFFIX) ctrsv_CUN.$(PSUFFIX) : ztrsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -DTRANSA=4 -UUNIT $< -o $(@F)
-ztrsv_NUU.$(SUFFIX) ztrsv_NUU.$(PSUFFIX) : ztrsv_U.c ../../param.h
+ztrsv_NUU.$(SUFFIX) ztrsv_NUU.$(PSUFFIX) : ztrsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -DTRANSA=1 -DUNIT $< -o $(@F)
-ztrsv_NUN.$(SUFFIX) ztrsv_NUN.$(PSUFFIX) : ztrsv_U.c ../../param.h
+ztrsv_NUN.$(SUFFIX) ztrsv_NUN.$(PSUFFIX) : ztrsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -DTRANSA=1 -UUNIT $< -o $(@F)
-ztrsv_TLU.$(SUFFIX) ztrsv_TLU.$(PSUFFIX) : ztrsv_U.c ../../param.h
+ztrsv_TLU.$(SUFFIX) ztrsv_TLU.$(PSUFFIX) : ztrsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -DTRANSA=2 -DUNIT $< -o $(@F)
-ztrsv_TLN.$(SUFFIX) ztrsv_TLN.$(PSUFFIX) : ztrsv_U.c ../../param.h
+ztrsv_TLN.$(SUFFIX) ztrsv_TLN.$(PSUFFIX) : ztrsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -DTRANSA=2 -UUNIT $< -o $(@F)
-ztrsv_RLU.$(SUFFIX) ztrsv_RLU.$(PSUFFIX) : ztrsv_L.c ../../param.h
+ztrsv_RLU.$(SUFFIX) ztrsv_RLU.$(PSUFFIX) : ztrsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -DTRANSA=3 -DUNIT $< -o $(@F)
-ztrsv_RLN.$(SUFFIX) ztrsv_RLN.$(PSUFFIX) : ztrsv_L.c ../../param.h
+ztrsv_RLN.$(SUFFIX) ztrsv_RLN.$(PSUFFIX) : ztrsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -DTRANSA=3 -UUNIT $< -o $(@F)
-ztrsv_CLU.$(SUFFIX) ztrsv_CLU.$(PSUFFIX) : ztrsv_U.c ../../param.h
+ztrsv_CLU.$(SUFFIX) ztrsv_CLU.$(PSUFFIX) : ztrsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -DTRANSA=4 -DUNIT $< -o $(@F)
-ztrsv_CLN.$(SUFFIX) ztrsv_CLN.$(PSUFFIX) : ztrsv_U.c ../../param.h
+ztrsv_CLN.$(SUFFIX) ztrsv_CLN.$(PSUFFIX) : ztrsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -DTRANSA=4 -UUNIT $< -o $(@F)
-ztrsv_NLU.$(SUFFIX) ztrsv_NLU.$(PSUFFIX) : ztrsv_L.c ../../param.h
+ztrsv_NLU.$(SUFFIX) ztrsv_NLU.$(PSUFFIX) : ztrsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -DTRANSA=1 -DUNIT $< -o $(@F)
-ztrsv_NLN.$(SUFFIX) ztrsv_NLN.$(PSUFFIX) : ztrsv_L.c ../../param.h
+ztrsv_NLN.$(SUFFIX) ztrsv_NLN.$(PSUFFIX) : ztrsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -DTRANSA=1 -UUNIT $< -o $(@F)
-ztrsv_TUU.$(SUFFIX) ztrsv_TUU.$(PSUFFIX) : ztrsv_L.c ../../param.h
+ztrsv_TUU.$(SUFFIX) ztrsv_TUU.$(PSUFFIX) : ztrsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -DTRANSA=2 -DUNIT $< -o $(@F)
-ztrsv_TUN.$(SUFFIX) ztrsv_TUN.$(PSUFFIX) : ztrsv_L.c ../../param.h
+ztrsv_TUN.$(SUFFIX) ztrsv_TUN.$(PSUFFIX) : ztrsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -DTRANSA=2 -UUNIT $< -o $(@F)
-ztrsv_RUU.$(SUFFIX) ztrsv_RUU.$(PSUFFIX) : ztrsv_U.c ../../param.h
+ztrsv_RUU.$(SUFFIX) ztrsv_RUU.$(PSUFFIX) : ztrsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -DTRANSA=3 -DUNIT $< -o $(@F)
-ztrsv_RUN.$(SUFFIX) ztrsv_RUN.$(PSUFFIX) : ztrsv_U.c ../../param.h
+ztrsv_RUN.$(SUFFIX) ztrsv_RUN.$(PSUFFIX) : ztrsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -DTRANSA=3 -UUNIT $< -o $(@F)
-ztrsv_CUU.$(SUFFIX) ztrsv_CUU.$(PSUFFIX) : ztrsv_L.c ../../param.h
+ztrsv_CUU.$(SUFFIX) ztrsv_CUU.$(PSUFFIX) : ztrsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -DTRANSA=4 -DUNIT $< -o $(@F)
-ztrsv_CUN.$(SUFFIX) ztrsv_CUN.$(PSUFFIX) : ztrsv_L.c ../../param.h
+ztrsv_CUN.$(SUFFIX) ztrsv_CUN.$(PSUFFIX) : ztrsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -DTRANSA=4 -UUNIT $< -o $(@F)
-xtrsv_NUU.$(SUFFIX) xtrsv_NUU.$(PSUFFIX) : ztrsv_U.c ../../param.h
+xtrsv_NUU.$(SUFFIX) xtrsv_NUU.$(PSUFFIX) : ztrsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANSA=1 -DUNIT $< -o $(@F)
-xtrsv_NUN.$(SUFFIX) xtrsv_NUN.$(PSUFFIX) : ztrsv_U.c ../../param.h
+xtrsv_NUN.$(SUFFIX) xtrsv_NUN.$(PSUFFIX) : ztrsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANSA=1 -UUNIT $< -o $(@F)
-xtrsv_TLU.$(SUFFIX) xtrsv_TLU.$(PSUFFIX) : ztrsv_U.c ../../param.h
+xtrsv_TLU.$(SUFFIX) xtrsv_TLU.$(PSUFFIX) : ztrsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANSA=2 -DUNIT $< -o $(@F)
-xtrsv_TLN.$(SUFFIX) xtrsv_TLN.$(PSUFFIX) : ztrsv_U.c ../../param.h
+xtrsv_TLN.$(SUFFIX) xtrsv_TLN.$(PSUFFIX) : ztrsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANSA=2 -UUNIT $< -o $(@F)
-xtrsv_RLU.$(SUFFIX) xtrsv_RLU.$(PSUFFIX) : ztrsv_L.c ../../param.h
+xtrsv_RLU.$(SUFFIX) xtrsv_RLU.$(PSUFFIX) : ztrsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANSA=3 -DUNIT $< -o $(@F)
-xtrsv_RLN.$(SUFFIX) xtrsv_RLN.$(PSUFFIX) : ztrsv_L.c ../../param.h
+xtrsv_RLN.$(SUFFIX) xtrsv_RLN.$(PSUFFIX) : ztrsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANSA=3 -UUNIT $< -o $(@F)
-xtrsv_CLU.$(SUFFIX) xtrsv_CLU.$(PSUFFIX) : ztrsv_U.c ../../param.h
+xtrsv_CLU.$(SUFFIX) xtrsv_CLU.$(PSUFFIX) : ztrsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANSA=4 -DUNIT $< -o $(@F)
-xtrsv_CLN.$(SUFFIX) xtrsv_CLN.$(PSUFFIX) : ztrsv_U.c ../../param.h
+xtrsv_CLN.$(SUFFIX) xtrsv_CLN.$(PSUFFIX) : ztrsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANSA=4 -UUNIT $< -o $(@F)
-xtrsv_NLU.$(SUFFIX) xtrsv_NLU.$(PSUFFIX) : ztrsv_L.c ../../param.h
+xtrsv_NLU.$(SUFFIX) xtrsv_NLU.$(PSUFFIX) : ztrsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANSA=1 -DUNIT $< -o $(@F)
-xtrsv_NLN.$(SUFFIX) xtrsv_NLN.$(PSUFFIX) : ztrsv_L.c ../../param.h
+xtrsv_NLN.$(SUFFIX) xtrsv_NLN.$(PSUFFIX) : ztrsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANSA=1 -UUNIT $< -o $(@F)
-xtrsv_TUU.$(SUFFIX) xtrsv_TUU.$(PSUFFIX) : ztrsv_L.c ../../param.h
+xtrsv_TUU.$(SUFFIX) xtrsv_TUU.$(PSUFFIX) : ztrsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANSA=2 -DUNIT $< -o $(@F)
-xtrsv_TUN.$(SUFFIX) xtrsv_TUN.$(PSUFFIX) : ztrsv_L.c ../../param.h
+xtrsv_TUN.$(SUFFIX) xtrsv_TUN.$(PSUFFIX) : ztrsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANSA=2 -UUNIT $< -o $(@F)
-xtrsv_RUU.$(SUFFIX) xtrsv_RUU.$(PSUFFIX) : ztrsv_U.c ../../param.h
+xtrsv_RUU.$(SUFFIX) xtrsv_RUU.$(PSUFFIX) : ztrsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANSA=3 -DUNIT $< -o $(@F)
-xtrsv_RUN.$(SUFFIX) xtrsv_RUN.$(PSUFFIX) : ztrsv_U.c ../../param.h
+xtrsv_RUN.$(SUFFIX) xtrsv_RUN.$(PSUFFIX) : ztrsv_U.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANSA=3 -UUNIT $< -o $(@F)
-xtrsv_CUU.$(SUFFIX) xtrsv_CUU.$(PSUFFIX) : ztrsv_L.c ../../param.h
+xtrsv_CUU.$(SUFFIX) xtrsv_CUU.$(PSUFFIX) : ztrsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANSA=4 -DUNIT $< -o $(@F)
-xtrsv_CUN.$(SUFFIX) xtrsv_CUN.$(PSUFFIX) : ztrsv_L.c ../../param.h
+xtrsv_CUN.$(SUFFIX) xtrsv_CUN.$(PSUFFIX) : ztrsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANSA=4 -UUNIT $< -o $(@F)
include ../../Makefile.tail
diff --git a/driver/level2/gbmv_k.c b/driver/level2/gbmv_k.c
index 317d42047..4b29d70d1 100644
--- a/driver/level2/gbmv_k.c
+++ b/driver/level2/gbmv_k.c
@@ -84,12 +84,12 @@ void CNAME(BLASLONG m, BLASLONG n, BLASLONG ku, BLASLONG kl, FLOAT alpha,
#ifndef TRANS
AXPYU_K(length, 0, 0,
- alpha * X[i],
+ alpha * X[i],
a + start, 1, Y + start - offset_u, 1, NULL, 0);
#else
Y[i] += alpha * DOTU_K(length, a + start, 1, X + start - offset_u, 1);
#endif
-
+
offset_u --;
offset_l --;
diff --git a/driver/level2/gbmv_thread.c b/driver/level2/gbmv_thread.c
index 18aae26ae..9efe17092 100644
--- a/driver/level2/gbmv_thread.c
+++ b/driver/level2/gbmv_thread.c
@@ -105,13 +105,13 @@ static int gbmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
args -> m,
#else
args -> n,
-#endif
- 0, 0, ZERO,
+#endif
+ 0, 0, ZERO,
#ifdef COMPLEX
ZERO,
#endif
- y, 1, NULL, 0, NULL, 0);
-
+ y, 1, NULL, 0, NULL, 0);
+
offset_u = ku - n_from;
offset_l = ku - n_from + args -> m;
@@ -157,7 +157,7 @@ static int gbmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
x += COMPSIZE;
#endif
-
+
y += COMPSIZE;
offset_u --;
@@ -190,7 +190,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG ku, BLASLONG kl, FLOAT *alpha, FLOAT
int mode = BLAS_DOUBLE | BLAS_REAL;
#else
int mode = BLAS_SINGLE | BLAS_REAL;
-#endif
+#endif
#else
#ifdef XDOUBLE
int mode = BLAS_XDOUBLE | BLAS_COMPLEX;
@@ -198,27 +198,27 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG ku, BLASLONG kl, FLOAT *alpha, FLOAT
int mode = BLAS_DOUBLE | BLAS_COMPLEX;
#else
int mode = BLAS_SINGLE | BLAS_COMPLEX;
-#endif
+#endif
#endif
#endif
args.m = m;
args.n = n;
-
+
args.a = (void *)a;
args.b = (void *)x;
args.c = (void *)buffer;
-
+
args.lda = lda;
args.ldb = incx;
args.ldc = ku;
args.ldd = kl;
num_cpu = 0;
-
+
range_n[0] = 0;
i = n;
-
+
while (i > 0){
width = blas_quickdivide(i + nthreads - num_cpu - 1, nthreads - num_cpu);
@@ -227,7 +227,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG ku, BLASLONG kl, FLOAT *alpha, FLOAT
if (i < width) width = i;
range_n[num_cpu + 1] = range_n[num_cpu] + width;
-
+
#ifndef TRANSA
range_m[num_cpu] = num_cpu * ((m + 15) & ~15);
#else
@@ -242,7 +242,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG ku, BLASLONG kl, FLOAT *alpha, FLOAT
queue[num_cpu].sa = NULL;
queue[num_cpu].sb = NULL;
queue[num_cpu].next = &queue[num_cpu + 1];
-
+
num_cpu ++;
i -= width;
}
@@ -254,12 +254,12 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG ku, BLASLONG kl, FLOAT *alpha, FLOAT
#else
queue[0].sb = buffer + num_cpu * (((n + 255) & ~255) + 16) * COMPSIZE;
#endif
-
+
queue[num_cpu - 1].next = NULL;
-
+
exec_blas(num_cpu, queue);
}
-
+
for (i = 1; i < num_cpu; i ++) {
AXPYU_K(
#ifndef TRANSA
diff --git a/driver/level2/gemv_thread.c b/driver/level2/gemv_thread.c
index 5f8abf26f..ddd475367 100644
--- a/driver/level2/gemv_thread.c
+++ b/driver/level2/gemv_thread.c
@@ -110,7 +110,7 @@ static int gemv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
*((FLOAT *)args -> alpha + 1),
#endif
a, lda, x, incx, y, incy, buffer);
-
+
return 0;
}
@@ -134,7 +134,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x
int mode = BLAS_DOUBLE | BLAS_REAL;
#else
int mode = BLAS_SINGLE | BLAS_REAL;
-#endif
+#endif
#else
#ifdef XDOUBLE
int mode = BLAS_XDOUBLE | BLAS_COMPLEX;
@@ -142,17 +142,17 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x
int mode = BLAS_DOUBLE | BLAS_COMPLEX;
#else
int mode = BLAS_SINGLE | BLAS_COMPLEX;
-#endif
+#endif
#endif
#endif
args.m = m;
args.n = n;
-
+
args.a = (void *)a;
args.b = (void *)x;
args.c = (void *)y;
-
+
args.lda = lda;
args.ldb = incx;
args.ldc = incy;
@@ -164,14 +164,14 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x
#endif
num_cpu = 0;
-
+
range[0] = 0;
#ifndef TRANSA
i = m;
#else
i = n;
#endif
-
+
while (i > 0){
width = blas_quickdivide(i + nthreads - num_cpu - 1, nthreads - num_cpu);
@@ -179,7 +179,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x
if (i < width) width = i;
range[num_cpu + 1] = range[num_cpu] + width;
-
+
queue[num_cpu].mode = mode;
queue[num_cpu].routine = gemv_kernel;
queue[num_cpu].args = &args;
@@ -193,7 +193,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x
queue[num_cpu].sa = NULL;
queue[num_cpu].sb = NULL;
queue[num_cpu].next = &queue[num_cpu + 1];
-
+
num_cpu ++;
i -= width;
}
@@ -202,9 +202,9 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x
queue[0].sa = NULL;
queue[0].sb = buffer;
queue[num_cpu - 1].next = NULL;
-
+
exec_blas(num_cpu, queue);
}
-
+
return 0;
}
diff --git a/driver/level2/ger_thread.c b/driver/level2/ger_thread.c
index 9e2f520ef..0a5e14cef 100644
--- a/driver/level2/ger_thread.c
+++ b/driver/level2/ger_thread.c
@@ -102,7 +102,7 @@ static int ger_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FL
#endif
#endif
x, 1, a, 1, NULL, 0);
-
+
y += incy * COMPSIZE;
a += lda * COMPSIZE;
}
@@ -130,7 +130,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *alpha, FLOAT *x, BLASLONG incx, FLOAT *
int mode = BLAS_DOUBLE | BLAS_REAL;
#else
int mode = BLAS_SINGLE | BLAS_REAL;
-#endif
+#endif
#else
#ifdef XDOUBLE
int mode = BLAS_XDOUBLE | BLAS_COMPLEX;
@@ -138,17 +138,17 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *alpha, FLOAT *x, BLASLONG incx, FLOAT *
int mode = BLAS_DOUBLE | BLAS_COMPLEX;
#else
int mode = BLAS_SINGLE | BLAS_COMPLEX;
-#endif
+#endif
#endif
#endif
args.m = m;
args.n = n;
-
+
args.a = (void *)x;
args.b = (void *)y;
args.c = (void *)a;
-
+
args.lda = incx;
args.ldb = incy;
args.ldc = lda;
@@ -160,18 +160,18 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *alpha, FLOAT *x, BLASLONG incx, FLOAT *
#endif
num_cpu = 0;
-
+
range_n[0] = 0;
i = n;
-
+
while (i > 0){
-
+
width = blas_quickdivide(i + nthreads - num_cpu - 1, nthreads - num_cpu);
if (width < 4) width = 4;
if (i < width) width = i;
range_n[num_cpu + 1] = range_n[num_cpu] + width;
-
+
queue[num_cpu].mode = mode;
queue[num_cpu].routine = ger_kernel;
queue[num_cpu].args = &args;
@@ -179,19 +179,19 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *alpha, FLOAT *x, BLASLONG incx, FLOAT *
queue[num_cpu].sa = NULL;
queue[num_cpu].sb = NULL;
queue[num_cpu].next = &queue[num_cpu + 1];
-
+
num_cpu ++;
i -= width;
}
-
+
if (num_cpu) {
queue[0].sa = NULL;
queue[0].sb = buffer;
-
+
queue[num_cpu - 1].next = NULL;
-
+
exec_blas(num_cpu, queue);
}
-
+
return 0;
}
diff --git a/driver/level2/sbmv_k.c b/driver/level2/sbmv_k.c
index d0adc678a..ef7fa378c 100644
--- a/driver/level2/sbmv_k.c
+++ b/driver/level2/sbmv_k.c
@@ -72,7 +72,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha,
if (length > k) length = k;
AXPYU_K(length + 1, 0, 0,
- alpha * X[i],
+ alpha * X[i],
a + k - length, 1, Y + i - length, 1, NULL, 0);
Y[i] += alpha * DOTU_K(length, a + k - length, 1, X + i - length, 1);
#else
@@ -80,11 +80,11 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha,
if (n - i - 1 < k) length = n - i - 1;
AXPYU_K(length + 1, 0, 0,
- alpha * X[i],
+ alpha * X[i],
a, 1, Y + i, 1, NULL, 0);
Y[i] += alpha * DOTU_K(length, a + 1, 1, X + i + 1, 1);
#endif
-
+
a += lda;
}
diff --git a/driver/level2/sbmv_thread.c b/driver/level2/sbmv_thread.c
index 7dfabfa81..5b7fc7332 100644
--- a/driver/level2/sbmv_thread.c
+++ b/driver/level2/sbmv_thread.c
@@ -76,7 +76,7 @@ static int sbmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
n_to = n;
//Use y as each thread's n* COMPSIZE elements in sb buffer
- y = buffer;
+ y = buffer;
buffer += ((COMPSIZE * n + 1023) & ~1023);
if (range_m) {
@@ -94,12 +94,12 @@ static int sbmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
buffer += ((COMPSIZE * n + 1023) & ~1023);
}
- SCAL_K(n, 0, 0, ZERO,
+ SCAL_K(n, 0, 0, ZERO,
#ifdef COMPLEX
ZERO,
#endif
- y, 1, NULL, 0, NULL, 0);
-
+ y, 1, NULL, 0, NULL, 0);
+
for (i = n_from; i < n_to; i++) {
#ifndef LOWER
@@ -193,7 +193,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x
int mode = BLAS_DOUBLE | BLAS_REAL;
#else
int mode = BLAS_SINGLE | BLAS_REAL;
-#endif
+#endif
#else
#ifdef XDOUBLE
int mode = BLAS_XDOUBLE | BLAS_COMPLEX;
@@ -201,52 +201,52 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x
int mode = BLAS_DOUBLE | BLAS_COMPLEX;
#else
int mode = BLAS_SINGLE | BLAS_COMPLEX;
-#endif
+#endif
#endif
#endif
args.n = n;
args.k = k;
-
+
args.a = (void *)a;
args.b = (void *)x;
args.c = (void *)buffer;
-
+
args.lda = lda;
args.ldb = incx;
args.ldc = incy;
dnum = (double)n * (double)n / (double)nthreads;
num_cpu = 0;
-
+
if (n < 2 * k) {
#ifndef LOWER
range_m[MAX_CPU_NUMBER] = n;
i = 0;
-
+
while (i < n){
-
+
if (nthreads - num_cpu > 1) {
-
+
double di = (double)(n - i);
if (di * di - dnum > 0) {
width = ((BLASLONG)(-sqrt(di * di - dnum) + di) + mask) & ~mask;
} else {
width = n - i;
}
-
+
if (width < 16) width = 16;
if (width > n - i) width = n - i;
-
+
} else {
width = n - i;
}
-
+
range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width;
range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16);
-
+
queue[num_cpu].mode = mode;
queue[num_cpu].routine = sbmv_kernel;
queue[num_cpu].args = &args;
@@ -255,37 +255,37 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x
queue[num_cpu].sa = NULL;
queue[num_cpu].sb = NULL;
queue[num_cpu].next = &queue[num_cpu + 1];
-
+
num_cpu ++;
i += width;
}
-
+
#else
-
+
range_m[0] = 0;
i = 0;
-
+
while (i < n){
-
+
if (nthreads - num_cpu > 1) {
-
+
double di = (double)(n - i);
if (di * di - dnum > 0) {
width = ((BLASLONG)(-sqrt(di * di - dnum) + di) + mask) & ~mask;
} else {
width = n - i;
}
-
+
if (width < 16) width = 16;
if (width > n - i) width = n - i;
-
+
} else {
width = n - i;
}
-
+
range_m[num_cpu + 1] = range_m[num_cpu] + width;
range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16);
-
+
queue[num_cpu].mode = mode;
queue[num_cpu].routine = sbmv_kernel;
queue[num_cpu].args = &args;
@@ -294,29 +294,29 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x
queue[num_cpu].sa = NULL;
queue[num_cpu].sb = NULL;
queue[num_cpu].next = &queue[num_cpu + 1];
-
+
num_cpu ++;
i += width;
}
-
+
#endif
-
+
} else {
-
+
range_m[0] = 0;
i = n;
-
+
while (i > 0){
-
+
width = blas_quickdivide(i + nthreads - num_cpu - 1, nthreads - num_cpu);
-
+
if (width < 4) width = 4;
if (i < width) width = i;
-
+
range_m[num_cpu + 1] = range_m[num_cpu] + width;
-
+
range_n[num_cpu] = num_cpu * ((n + 15) & ~15);
-
+
queue[num_cpu].mode = mode;
queue[num_cpu].routine = sbmv_kernel;
queue[num_cpu].args = &args;
@@ -325,7 +325,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x
queue[num_cpu].sa = NULL;
queue[num_cpu].sb = NULL;
queue[num_cpu].next = &queue[num_cpu + 1];
-
+
num_cpu ++;
i -= width;
}
@@ -335,10 +335,10 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x
queue[0].sa = NULL;
queue[0].sb = buffer;
queue[num_cpu - 1].next = NULL;
-
+
exec_blas(num_cpu, queue);
}
-
+
for (i = 1; i < num_cpu; i ++) {
AXPYU_K(n, 0, 0,
#ifndef COMPLEX
@@ -356,6 +356,6 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x
alpha[0], alpha[1],
#endif
buffer, 1, y, incy, NULL, 0);
-
+
return 0;
}
diff --git a/driver/level2/spmv_k.c b/driver/level2/spmv_k.c
index 07ec66095..8ce0abdf7 100644
--- a/driver/level2/spmv_k.c
+++ b/driver/level2/spmv_k.c
@@ -68,7 +68,7 @@ int CNAME(BLASLONG m, FLOAT alpha, FLOAT *a,
if (i > 0) Y[i] += alpha * DOTU_K(i, a, 1, X, 1);
AXPYU_K(i + 1, 0, 0, alpha * X[i], a, 1, Y, 1, NULL, 0);
a += i + 1;
-
+
#else
Y[i] += alpha * DOTU_K(m - i, a + i, 1, X + i, 1);
if (m - i > 1) AXPYU_K(m - i - 1, 0, 0, alpha * X[i],
diff --git a/driver/level2/spmv_thread.c b/driver/level2/spmv_thread.c
index 7717bbf2b..93a2f44d4 100644
--- a/driver/level2/spmv_thread.c
+++ b/driver/level2/spmv_thread.c
@@ -91,17 +91,17 @@ static int spmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
}
#ifndef LOWER
- SCAL_K(m_to, 0, 0, ZERO,
+ SCAL_K(m_to, 0, 0, ZERO,
#ifdef COMPLEX
ZERO,
#endif
- y, 1, NULL, 0, NULL, 0);
+ y, 1, NULL, 0, NULL, 0);
#else
- SCAL_K(args -> m - m_from, 0, 0, ZERO,
+ SCAL_K(args -> m - m_from, 0, 0, ZERO,
#ifdef COMPLEX
ZERO,
#endif
- y + m_from * COMPSIZE, 1, NULL, 0, NULL, 0);
+ y + m_from * COMPSIZE, 1, NULL, 0, NULL, 0);
#endif
#ifndef LOWER
@@ -139,7 +139,7 @@ static int spmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
a, 1, y, 1, NULL, 0);
a += (i + 1) * COMPSIZE;
-
+
#else
#if !defined(HEMV) && !defined(HEMVREV)
result = MYDOT(args -> m - i , a + i * COMPSIZE, 1, x + i * COMPSIZE, 1);
@@ -198,7 +198,7 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *y,
int mode = BLAS_DOUBLE | BLAS_REAL;
#else
int mode = BLAS_SINGLE | BLAS_REAL;
-#endif
+#endif
#else
#ifdef XDOUBLE
int mode = BLAS_XDOUBLE | BLAS_COMPLEX;
@@ -206,31 +206,31 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *y,
int mode = BLAS_DOUBLE | BLAS_COMPLEX;
#else
int mode = BLAS_SINGLE | BLAS_COMPLEX;
-#endif
+#endif
#endif
#endif
args.m = m;
-
+
args.a = (void *)a;
args.b = (void *)x;
args.c = (void *)buffer;
-
+
args.ldb = incx;
args.ldc = incy;
dnum = (double)m * (double)m / (double)nthreads;
num_cpu = 0;
-
+
#ifndef LOWER
range_m[MAX_CPU_NUMBER] = m;
i = 0;
-
+
while (i < m){
-
+
if (nthreads - num_cpu > 1) {
-
+
double di = (double)(m - i);
if (di * di - dnum > 0) {
width = ((BLASLONG)(-sqrt(di * di - dnum) + di) + mask) & ~mask;
@@ -240,14 +240,14 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *y,
if (width < 16) width = 16;
if (width > m - i) width = m - i;
-
+
} else {
width = m - i;
}
-
+
range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width;
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
-
+
queue[num_cpu].mode = mode;
queue[num_cpu].routine = spmv_kernel;
queue[num_cpu].args = &args;
@@ -256,20 +256,20 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *y,
queue[num_cpu].sa = NULL;
queue[num_cpu].sb = NULL;
queue[num_cpu].next = &queue[num_cpu + 1];
-
+
num_cpu ++;
i += width;
}
-
+
#else
range_m[0] = 0;
i = 0;
-
+
while (i < m){
-
+
if (nthreads - num_cpu > 1) {
-
+
double di = (double)(m - i);
if (di * di - dnum > 0) {
width = ((BLASLONG)(-sqrt(di * di - dnum) + di) + mask) & ~mask;
@@ -279,14 +279,14 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *y,
if (width < 16) width = 16;
if (width > m - i) width = m - i;
-
+
} else {
width = m - i;
}
-
+
range_m[num_cpu + 1] = range_m[num_cpu] + width;
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
-
+
queue[num_cpu].mode = mode;
queue[num_cpu].routine = spmv_kernel;
queue[num_cpu].args = &args;
@@ -295,44 +295,44 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *y,
queue[num_cpu].sa = NULL;
queue[num_cpu].sb = NULL;
queue[num_cpu].next = &queue[num_cpu + 1];
-
+
num_cpu ++;
i += width;
}
-
+
#endif
if (num_cpu) {
queue[0].sa = NULL;
queue[0].sb = buffer + num_cpu * (((m + 255) & ~255) + 16) * COMPSIZE;
-
+
queue[num_cpu - 1].next = NULL;
-
+
exec_blas(num_cpu, queue);
}
-
+
for (i = 1; i < num_cpu; i ++) {
-
+
#ifndef LOWER
-
+
AXPYU_K(range_m[MAX_CPU_NUMBER - i], 0, 0, ONE,
#ifdef COMPLEX
- ZERO,
+ ZERO,
#endif
buffer + range_n[i] * COMPSIZE, 1, buffer, 1, NULL, 0);
-
+
#else
-
+
AXPYU_K(m - range_m[i], 0, 0, ONE,
#ifdef COMPLEX
- ZERO,
+ ZERO,
#endif
buffer + (range_n[i] + range_m[i]) * COMPSIZE, 1, buffer + range_m[i] * COMPSIZE, 1, NULL, 0);
-
+
#endif
-
+
}
-
+
AXPYU_K(m, 0, 0,
#ifndef COMPLEX
alpha,
@@ -340,6 +340,6 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *y,
alpha[0], alpha[1],
#endif
buffer, 1, y, incy, NULL, 0);
-
+
return 0;
}
diff --git a/driver/level2/spr2_k.c b/driver/level2/spr2_k.c
index 58e14ebe2..e742b246f 100644
--- a/driver/level2/spr2_k.c
+++ b/driver/level2/spr2_k.c
@@ -40,7 +40,7 @@
#include <ctype.h>
#include "common.h"
-int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT *x, BLASLONG incx,
+int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT *x, BLASLONG incx,
FLOAT *y, BLASLONG incy, FLOAT *a, FLOAT *buffer){
BLASLONG i;
diff --git a/driver/level2/spr2_thread.c b/driver/level2/spr2_thread.c
index b20eb055a..10edb1eb1 100644
--- a/driver/level2/spr2_thread.c
+++ b/driver/level2/spr2_thread.c
@@ -116,7 +116,7 @@ static int syr_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FL
#else
if ((x[i * COMPSIZE + 0] != ZERO) || (x[i * COMPSIZE + 1] != ZERO)) {
#ifndef LOWER
- AXPYU_K(i + 1, 0, 0,
+ AXPYU_K(i + 1, 0, 0,
alpha_r * x[i * COMPSIZE + 0] - alpha_i * x[i * COMPSIZE + 1],
alpha_i * x[i * COMPSIZE + 0] + alpha_r * x[i * COMPSIZE + 1],
y, 1, a, 1, NULL, 0);
@@ -129,7 +129,7 @@ static int syr_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FL
}
if ((y[i * COMPSIZE + 0] != ZERO) || (y[i * COMPSIZE + 1] != ZERO)) {
#ifndef LOWER
- AXPYU_K(i + 1, 0, 0,
+ AXPYU_K(i + 1, 0, 0,
alpha_r * y[i * COMPSIZE + 0] - alpha_i * y[i * COMPSIZE + 1],
alpha_i * y[i * COMPSIZE + 0] + alpha_r * y[i * COMPSIZE + 1],
x, 1, a, 1, NULL, 0);
@@ -145,7 +145,7 @@ static int syr_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FL
if ((x[i * COMPSIZE + 0] != ZERO) || (x[i * COMPSIZE + 1] != ZERO)) {
#ifndef HEMVREV
#ifndef LOWER
- AXPYU_K(i + 1, 0, 0,
+ AXPYU_K(i + 1, 0, 0,
alpha_r * x[i * COMPSIZE + 0] - alpha_i * x[i * COMPSIZE + 1],
- alpha_i * x[i * COMPSIZE + 0] - alpha_r * x[i * COMPSIZE + 1],
y, 1, a, 1, NULL, 0);
@@ -157,7 +157,7 @@ static int syr_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FL
#endif
#else
#ifndef LOWER
- AXPYC_K(i + 1, 0, 0,
+ AXPYC_K(i + 1, 0, 0,
alpha_r * x[i * COMPSIZE + 0] - alpha_i * x[i * COMPSIZE + 1],
alpha_i * x[i * COMPSIZE + 0] + alpha_r * x[i * COMPSIZE + 1],
y, 1, a, 1, NULL, 0);
@@ -172,7 +172,7 @@ static int syr_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FL
if ((y[i * COMPSIZE + 0] != ZERO) || (y[i * COMPSIZE + 1] != ZERO)) {
#ifndef HEMVREV
#ifndef LOWER
- AXPYU_K(i + 1, 0, 0,
+ AXPYU_K(i + 1, 0, 0,
alpha_r * y[i * COMPSIZE + 0] + alpha_i * y[i * COMPSIZE + 1],
alpha_i * y[i * COMPSIZE + 0] - alpha_r * y[i * COMPSIZE + 1],
x, 1, a, 1, NULL, 0);
@@ -184,7 +184,7 @@ static int syr_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FL
#endif
#else
#ifndef LOWER
- AXPYC_K(i + 1, 0, 0,
+ AXPYC_K(i + 1, 0, 0,
alpha_r * y[i * COMPSIZE + 0] + alpha_i * y[i * COMPSIZE + 1],
- alpha_i * y[i * COMPSIZE + 0] + alpha_r * y[i * COMPSIZE + 1],
x, 1, a, 1, NULL, 0);
@@ -202,14 +202,14 @@ static int syr_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FL
a[ 1] = ZERO;
#endif
#endif
-
+
#ifndef LOWER
a += (i + 1) * COMPSIZE;
#else
a += (args -> m - i) * COMPSIZE;
#endif
}
-
+
return 0;
}
@@ -236,7 +236,7 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *x, BLASLONG incx, FLOAT *y, BLASLONG
int mode = BLAS_DOUBLE | BLAS_REAL;
#else
int mode = BLAS_SINGLE | BLAS_REAL;
-#endif
+#endif
#else
#ifdef XDOUBLE
int mode = BLAS_XDOUBLE | BLAS_COMPLEX;
@@ -244,16 +244,16 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *x, BLASLONG incx, FLOAT *y, BLASLONG
int mode = BLAS_DOUBLE | BLAS_COMPLEX;
#else
int mode = BLAS_SINGLE | BLAS_COMPLEX;
-#endif
+#endif
#endif
#endif
args.m = m;
-
+
args.a = (void *)x;
args.b = (void *)y;
args.c = (void *)a;
-
+
args.lda = incx;
args.ldb = incy;
#ifndef COMPLEX
@@ -264,16 +264,16 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *x, BLASLONG incx, FLOAT *y, BLASLONG
dnum = (double)m * (double)m / (double)nthreads;
num_cpu = 0;
-
+
#ifndef LOWER
range_m[MAX_CPU_NUMBER] = m;
i = 0;
-
+
while (i < m){
-
+
if (nthreads - num_cpu > 1) {
-
+
double di = (double)(m - i);
if (di * di - dnum > 0) {
width = ((BLASLONG)(-sqrt(di * di - dnum) + di) + mask) & ~mask;
@@ -283,13 +283,13 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *x, BLASLONG incx, FLOAT *y, BLASLONG
if (width < 16) width = 16;
if (width > m - i) width = m - i;
-
+
} else {
width = m - i;
}
-
+
range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width;
-
+
queue[num_cpu].mode = mode;
queue[num_cpu].routine = syr_kernel;
queue[num_cpu].args = &args;
@@ -298,20 +298,20 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *x, BLASLONG incx, FLOAT *y, BLASLONG
queue[num_cpu].sa = NULL;
queue[num_cpu].sb = NULL;
queue[num_cpu].next = &queue[num_cpu + 1];
-
+
num_cpu ++;
i += width;
}
-
+
#else
range_m[0] = 0;
i = 0;
-
+
while (i < m){
-
+
if (nthreads - num_cpu > 1) {
-
+
double di = (double)(m - i);
if (di * di - dnum > 0) {
width = ((BLASLONG)(-sqrt(di * di - dnum) + di) + mask) & ~mask;
@@ -321,13 +321,13 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *x, BLASLONG incx, FLOAT *y, BLASLONG
if (width < 16) width = 16;
if (width > m - i) width = m - i;
-
+
} else {
width = m - i;
}
-
+
range_m[num_cpu + 1] = range_m[num_cpu] + width;
-
+
queue[num_cpu].mode = mode;
queue[num_cpu].routine = syr_kernel;
queue[num_cpu].args = &args;
@@ -336,21 +336,21 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *x, BLASLONG incx, FLOAT *y, BLASLONG
queue[num_cpu].sa = NULL;
queue[num_cpu].sb = NULL;
queue[num_cpu].next = &queue[num_cpu + 1];
-
+
num_cpu ++;
i += width;
}
-
+
#endif
if (num_cpu) {
queue[0].sa = NULL;
queue[0].sb = buffer;
-
+
queue[num_cpu - 1].next = NULL;
-
+
exec_blas(num_cpu, queue);
}
-
+
return 0;
}
diff --git a/driver/level2/spr_k.c b/driver/level2/spr_k.c
index 996d9257e..84fb4e8fa 100644
--- a/driver/level2/spr_k.c
+++ b/driver/level2/spr_k.c
@@ -38,7 +38,7 @@
#include "common.h"
-int CNAME(BLASLONG m, FLOAT alpha_r,
+int CNAME(BLASLONG m, FLOAT alpha_r,
FLOAT *x, BLASLONG incx, FLOAT *a, FLOAT *buffer){
BLASLONG i;
diff --git a/driver/level2/spr_thread.c b/driver/level2/spr_thread.c
index f88950698..4a194cbd6 100644
--- a/driver/level2/spr_thread.c
+++ b/driver/level2/spr_thread.c
@@ -96,7 +96,7 @@ static int syr_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FL
#else
if ((x[i * COMPSIZE + 0] != ZERO) || (x[i * COMPSIZE + 1] != ZERO)) {
#ifndef LOWER
- AXPYU_K(i + 1, 0, 0,
+ AXPYU_K(i + 1, 0, 0,
alpha_r * x[i * COMPSIZE + 0] - alpha_i * x[i * COMPSIZE + 1],
alpha_i * x[i * COMPSIZE + 0] + alpha_r * x[i * COMPSIZE + 1],
x, 1, a, 1, NULL, 0);
@@ -112,7 +112,7 @@ static int syr_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FL
if ((x[i * COMPSIZE + 0] != ZERO) || (x[i * COMPSIZE + 1] != ZERO)) {
#ifndef HEMVREV
#ifndef LOWER
- AXPYU_K(i + 1, 0, 0,
+ AXPYU_K(i + 1, 0, 0,
alpha_r * x[i * COMPSIZE + 0], - alpha_r * x[i * COMPSIZE + 1],
x, 1, a, 1, NULL, 0);
#else
@@ -122,7 +122,7 @@ static int syr_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FL
#endif
#else
#ifndef LOWER
- AXPYC_K(i + 1, 0, 0,
+ AXPYC_K(i + 1, 0, 0,
alpha_r * x[i * COMPSIZE + 0], alpha_r * x[i * COMPSIZE + 1],
x, 1, a, 1, NULL, 0);
#else
@@ -145,7 +145,7 @@ static int syr_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FL
a += (args -> m - i) * COMPSIZE;
#endif
}
-
+
return 0;
}
@@ -172,7 +172,7 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *x, BLASLONG incx, FLOAT *a, FLOAT *bu
int mode = BLAS_DOUBLE | BLAS_REAL;
#else
int mode = BLAS_SINGLE | BLAS_REAL;
-#endif
+#endif
#else
#ifdef XDOUBLE
int mode = BLAS_XDOUBLE | BLAS_COMPLEX;
@@ -180,15 +180,15 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *x, BLASLONG incx, FLOAT *a, FLOAT *bu
int mode = BLAS_DOUBLE | BLAS_COMPLEX;
#else
int mode = BLAS_SINGLE | BLAS_COMPLEX;
-#endif
+#endif
#endif
#endif
args.m = m;
-
+
args.a = (void *)x;
args.b = (void *)a;
-
+
args.lda = incx;
#if !defined(COMPLEX) || defined(HEMV) || defined(HEMVREV)
@@ -199,16 +199,16 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *x, BLASLONG incx, FLOAT *a, FLOAT *bu
dnum = (double)m * (double)m / (double)nthreads;
num_cpu = 0;
-
+
#ifndef LOWER
range_m[MAX_CPU_NUMBER] = m;
i = 0;
-
+
while (i < m){
-
+
if (nthreads - num_cpu > 1) {
-
+
double di = (double)(m - i);
if (di * di - dnum > 0) {
width = ((BLASLONG)(-sqrt(di * di - dnum) + di) + mask) & ~mask;
@@ -218,13 +218,13 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *x, BLASLONG incx, FLOAT *a, FLOAT *bu
if (width < 16) width = 16;
if (width > m - i) width = m - i;
-
+
} else {
width = m - i;
}
-
+
range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width;
-
+
queue[num_cpu].mode = mode;
queue[num_cpu].routine = syr_kernel;
queue[num_cpu].args = &args;
@@ -233,20 +233,20 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *x, BLASLONG incx, FLOAT *a, FLOAT *bu
queue[num_cpu].sa = NULL;
queue[num_cpu].sb = NULL;
queue[num_cpu].next = &queue[num_cpu + 1];
-
+
num_cpu ++;
i += width;
}
-
+
#else
range_m[0] = 0;
i = 0;
-
+
while (i < m){
-
+
if (nthreads - num_cpu > 1) {
-
+
double di = (double)(m - i);
if (di * di - dnum > 0) {
width = ((BLASLONG)(-sqrt(di * di - dnum) + di) + mask) & ~mask;
@@ -256,13 +256,13 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *x, BLASLONG incx, FLOAT *a, FLOAT *bu
if (width < 16) width = 16;
if (width > m - i) width = m - i;
-
+
} else {
width = m - i;
}
-
+
range_m[num_cpu + 1] = range_m[num_cpu] + width;
-
+
queue[num_cpu].mode = mode;
queue[num_cpu].routine = syr_kernel;
queue[num_cpu].args = &args;
@@ -271,21 +271,21 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *x, BLASLONG incx, FLOAT *a, FLOAT *bu
queue[num_cpu].sa = NULL;
queue[num_cpu].sb = NULL;
queue[num_cpu].next = &queue[num_cpu + 1];
-
+
num_cpu ++;
i += width;
}
-
+
#endif
if (num_cpu) {
queue[0].sa = NULL;
queue[0].sb = buffer;
-
+
queue[num_cpu - 1].next = NULL;
-
+
exec_blas(num_cpu, queue);
}
-
+
return 0;
}
diff --git a/driver/level2/symv_thread.c b/driver/level2/symv_thread.c
index cf0e2d0c0..95d6c9bb5 100644
--- a/driver/level2/symv_thread.c
+++ b/driver/level2/symv_thread.c
@@ -78,11 +78,11 @@ static int symv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
#ifndef LOWER
- SCAL_K(m_to, 0, 0, ZERO,
+ SCAL_K(m_to, 0, 0, ZERO,
#ifdef COMPLEX
ZERO,
#endif
- y, 1, NULL, 0, NULL, 0);
+ y, 1, NULL, 0, NULL, 0);
MYSYMV_U (m_to, m_to - m_from, ONE,
#ifdef COMPLEX
@@ -92,11 +92,11 @@ static int symv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
#else
- SCAL_K(args -> m - m_from, 0, 0, ZERO,
+ SCAL_K(args -> m - m_from, 0, 0, ZERO,
#ifdef COMPLEX
ZERO,
#endif
- y + m_from * COMPSIZE, 1, NULL, 0, NULL, 0);
+ y + m_from * COMPSIZE, 1, NULL, 0, NULL, 0);
MYSYMV_L (args -> m - m_from, m_to - m_from, ONE,
#ifdef COMPLEX
@@ -132,7 +132,7 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG i
int mode = BLAS_DOUBLE | BLAS_REAL;
#else
int mode = BLAS_SINGLE | BLAS_REAL;
-#endif
+#endif
#else
#ifdef XDOUBLE
int mode = BLAS_XDOUBLE | BLAS_COMPLEX;
@@ -140,45 +140,45 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG i
int mode = BLAS_DOUBLE | BLAS_COMPLEX;
#else
int mode = BLAS_SINGLE | BLAS_COMPLEX;
-#endif
+#endif
#endif
#endif
args.m = m;
-
+
args.a = (void *)a;
args.b = (void *)x;
args.c = (void *)buffer;
-
+
args.lda = lda;
args.ldb = incx;
args.ldc = incy;
dnum = (double)m * (double)m / (double)nthreads;
num_cpu = 0;
-
+
#ifndef LOWER
range_m[0] = 0;
i = 0;
-
+
while (i < m){
-
+
if (nthreads - num_cpu > 1) {
-
+
double di = (double)i;
width = ((BLASLONG)(sqrt(di * di + dnum) - di) + mask) & ~mask;
if (width < 4) width = 4;
if (width > m - i) width = m - i;
-
+
} else {
width = m - i;
}
-
+
range_m[num_cpu + 1] = range_m[num_cpu] + width;
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
-
+
queue[MAX_CPU_NUMBER - num_cpu - 1].mode = mode;
queue[MAX_CPU_NUMBER - num_cpu - 1].routine = symv_kernel;
queue[MAX_CPU_NUMBER - num_cpu - 1].args = &args;
@@ -187,29 +187,29 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG i
queue[MAX_CPU_NUMBER - num_cpu - 1].sa = NULL;
queue[MAX_CPU_NUMBER - num_cpu - 1].sb = NULL;
queue[MAX_CPU_NUMBER - num_cpu - 1].next = &queue[MAX_CPU_NUMBER - num_cpu];
-
+
num_cpu ++;
i += width;
}
-
+
if (num_cpu) {
queue[MAX_CPU_NUMBER - num_cpu].sa = NULL;
queue[MAX_CPU_NUMBER - num_cpu].sb = buffer + num_cpu * (((m + 255) & ~255) + 16) * COMPSIZE;
-
+
queue[MAX_CPU_NUMBER - 1].next = NULL;
-
+
exec_blas(num_cpu, &queue[MAX_CPU_NUMBER - num_cpu]);
}
-
+
#else
range_m[0] = 0;
i = 0;
-
+
while (i < m){
-
+
if (nthreads - num_cpu > 1) {
-
+
double di = (double)(m - i);
if (di * di - dnum > 0) {
width = ((BLASLONG)(-sqrt(di * di - dnum) + di) + mask) & ~mask;
@@ -219,14 +219,14 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG i
if (width < 4) width = 4;
if (width > m - i) width = m - i;
-
+
} else {
width = m - i;
}
-
+
range_m[num_cpu + 1] = range_m[num_cpu] + width;
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
-
+
queue[num_cpu].mode = mode;
queue[num_cpu].routine = symv_kernel;
queue[num_cpu].args = &args;
@@ -235,32 +235,32 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG i
queue[num_cpu].sa = NULL;
queue[num_cpu].sb = NULL;
queue[num_cpu].next = &queue[num_cpu + 1];
-
+
num_cpu ++;
i += width;
}
-
+
if (num_cpu) {
queue[0].sa = NULL;
queue[0].sb = buffer + num_cpu * (((m + 255) & ~255) + 16) * COMPSIZE;
-
+
queue[num_cpu - 1].next = NULL;
-
+
exec_blas(num_cpu, queue);
}
-
+
#endif
#ifndef LOWER
for (i = 0; i < num_cpu - 1; i ++) {
-
+
AXPYU_K(range_m[i + 1], 0, 0, ONE,
#ifdef COMPLEX
- ZERO,
+ ZERO,
#endif
buffer + range_n[i] * COMPSIZE, 1, buffer + range_n[num_cpu - 1] * COMPSIZE, 1, NULL, 0);
- }
+ }
AXPYU_K(m, 0, 0,
#ifndef COMPLEX
@@ -271,12 +271,12 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG i
buffer + range_n[num_cpu - 1] * COMPSIZE, 1, y, incy, NULL, 0);
#else
-
+
for (i = 1; i < num_cpu; i ++) {
AXPYU_K(m - range_m[i], 0, 0, ONE,
#ifdef COMPLEX
- ZERO,
+ ZERO,
#endif
buffer + (range_n[i] + range_m[i]) * COMPSIZE, 1, buffer + range_m[i] * COMPSIZE, 1, NULL, 0);
}
@@ -288,8 +288,8 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG i
alpha[0], alpha[1],
#endif
buffer, 1, y, incy, NULL, 0);
-
+
#endif
-
+
return 0;
}
diff --git a/driver/level2/syr2_k.c b/driver/level2/syr2_k.c
index bca8b3bca..5bbd47bdf 100644
--- a/driver/level2/syr2_k.c
+++ b/driver/level2/syr2_k.c
@@ -40,7 +40,7 @@
#include <ctype.h>
#include "common.h"
-int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT *x, BLASLONG incx,
+int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT *x, BLASLONG incx,
FLOAT *y, BLASLONG incy, FLOAT *a, BLASLONG lda, FLOAT *buffer){
BLASLONG i;
diff --git a/driver/level2/syr2_thread.c b/driver/level2/syr2_thread.c
index 130a62d3e..4c3294493 100644
--- a/driver/level2/syr2_thread.c
+++ b/driver/level2/syr2_thread.c
@@ -112,7 +112,7 @@ static int syr_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FL
#else
if ((x[i * COMPSIZE + 0] != ZERO) || (x[i * COMPSIZE + 1] != ZERO)) {
#ifndef LOWER
- AXPYU_K(i + 1, 0, 0,
+ AXPYU_K(i + 1, 0, 0,
alpha_r * x[i * COMPSIZE + 0] - alpha_i * x[i * COMPSIZE + 1],
alpha_i * x[i * COMPSIZE + 0] + alpha_r * x[i * COMPSIZE + 1],
y, 1, a, 1, NULL, 0);
@@ -125,7 +125,7 @@ static int syr_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FL
}
if ((y[i * COMPSIZE + 0] != ZERO) || (y[i * COMPSIZE + 1] != ZERO)) {
#ifndef LOWER
- AXPYU_K(i + 1, 0, 0,
+ AXPYU_K(i + 1, 0, 0,
alpha_r * y[i * COMPSIZE + 0] - alpha_i * y[i * COMPSIZE + 1],
alpha_i * y[i * COMPSIZE + 0] + alpha_r * y[i * COMPSIZE + 1],
x, 1, a, 1, NULL, 0);
@@ -141,7 +141,7 @@ static int syr_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FL
if ((x[i * COMPSIZE + 0] != ZERO) || (x[i * COMPSIZE + 1] != ZERO)) {
#ifndef HERREV
#ifndef LOWER
- AXPYU_K(i + 1, 0, 0,
+ AXPYU_K(i + 1, 0, 0,
alpha_r * x[i * COMPSIZE + 0] - alpha_i * x[i * COMPSIZE + 1],
- alpha_i * x[i * COMPSIZE + 0] - alpha_r * x[i * COMPSIZE + 1],
y, 1, a, 1, NULL, 0);
@@ -153,7 +153,7 @@ static int syr_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FL
#endif
#else
#ifndef LOWER
- AXPYC_K(i + 1, 0, 0,
+ AXPYC_K(i + 1, 0, 0,
alpha_r * x[i * COMPSIZE + 0] - alpha_i * x[i * COMPSIZE + 1],
alpha_i * x[i * COMPSIZE + 0] + alpha_r * x[i * COMPSIZE + 1],
y, 1, a, 1, NULL, 0);
@@ -168,7 +168,7 @@ static int syr_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FL
if ((y[i * COMPSIZE + 0] != ZERO) || (y[i * COMPSIZE + 1] != ZERO)) {
#ifndef HERREV
#ifndef LOWER
- AXPYU_K(i + 1, 0, 0,
+ AXPYU_K(i + 1, 0, 0,
alpha_r * y[i * COMPSIZE + 0] + alpha_i * y[i * COMPSIZE + 1],
alpha_i * y[i * COMPSIZE + 0] - alpha_r * y[i * COMPSIZE + 1],
x, 1, a, 1, NULL, 0);
@@ -180,7 +180,7 @@ static int syr_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FL
#endif
#else
#ifndef LOWER
- AXPYC_K(i + 1, 0, 0,
+ AXPYC_K(i + 1, 0, 0,
alpha_r * y[i * COMPSIZE + 0] + alpha_i * y[i * COMPSIZE + 1],
- alpha_i * y[i * COMPSIZE + 0] + alpha_r * y[i * COMPSIZE + 1],
x, 1, a, 1, NULL, 0);
@@ -197,7 +197,7 @@ static int syr_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FL
a += lda * COMPSIZE;
}
-
+
return 0;
}
@@ -224,7 +224,7 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *x, BLASLONG incx, FLOAT *y, BLASLONG
int mode = BLAS_DOUBLE | BLAS_REAL;
#else
int mode = BLAS_SINGLE | BLAS_REAL;
-#endif
+#endif
#else
#ifdef XDOUBLE
int mode = BLAS_XDOUBLE | BLAS_COMPLEX;
@@ -232,16 +232,16 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *x, BLASLONG incx, FLOAT *y, BLASLONG
int mode = BLAS_DOUBLE | BLAS_COMPLEX;
#else
int mode = BLAS_SINGLE | BLAS_COMPLEX;
-#endif
+#endif
#endif
#endif
args.m = m;
-
+
args.a = (void *)x;
args.b = (void *)y;
args.c = (void *)a;
-
+
args.lda = incx;
args.ldb = incy;
args.ldc = lda;
@@ -253,16 +253,16 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *x, BLASLONG incx, FLOAT *y, BLASLONG
dnum = (double)m * (double)m / (double)nthreads;
num_cpu = 0;
-
+
#ifndef LOWER
range_m[MAX_CPU_NUMBER] = m;
i = 0;
-
+
while (i < m){
-
+
if (nthreads - num_cpu > 1) {
-
+
double di = (double)(m - i);
if (di * di - dnum > 0) {
width = ((BLASLONG)(-sqrt(di * di - dnum) + di) + mask) & ~mask;
@@ -272,13 +272,13 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *x, BLASLONG incx, FLOAT *y, BLASLONG
if (width < 16) width = 16;
if (width > m - i) width = m - i;
-
+
} else {
width = m - i;
}
-
+
range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width;
-
+
queue[num_cpu].mode = mode;
queue[num_cpu].routine = syr_kernel;
queue[num_cpu].args = &args;
@@ -287,20 +287,20 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *x, BLASLONG incx, FLOAT *y, BLASLONG
queue[num_cpu].sa = NULL;
queue[num_cpu].sb = NULL;
queue[num_cpu].next = &queue[num_cpu + 1];
-
+
num_cpu ++;
i += width;
}
-
+
#else
range_m[0] = 0;
i = 0;
-
+
while (i < m){
-
+
if (nthreads - num_cpu > 1) {
-
+
double di = (double)(m - i);
if (di * di - dnum > 0) {
width = ((BLASLONG)(-sqrt(di * di - dnum) + di) + mask) & ~mask;
@@ -310,13 +310,13 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *x, BLASLONG incx, FLOAT *y, BLASLONG
if (width < 16) width = 16;
if (width > m - i) width = m - i;
-
+
} else {
width = m - i;
}
-
+
range_m[num_cpu + 1] = range_m[num_cpu] + width;
-
+
queue[num_cpu].mode = mode;
queue[num_cpu].routine = syr_kernel;
queue[num_cpu].args = &args;
@@ -325,21 +325,21 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *x, BLASLONG incx, FLOAT *y, BLASLONG
queue[num_cpu].sa = NULL;
queue[num_cpu].sb = NULL;
queue[num_cpu].next = &queue[num_cpu + 1];
-
+
num_cpu ++;
i += width;
}
-
+
#endif
if (num_cpu) {
queue[0].sa = NULL;
queue[0].sb = buffer;
-
+
queue[num_cpu - 1].next = NULL;
-
+
exec_blas(num_cpu, queue);
}
-
+
return 0;
}
diff --git a/driver/level2/syr_k.c b/driver/level2/syr_k.c
index a0d9a2fa0..4f18cc6d0 100644
--- a/driver/level2/syr_k.c
+++ b/driver/level2/syr_k.c
@@ -38,7 +38,7 @@
#include "common.h"
-int CNAME(BLASLONG m, FLOAT alpha_r,
+int CNAME(BLASLONG m, FLOAT alpha_r,
FLOAT *x, BLASLONG incx, FLOAT *a, BLASLONG lda, FLOAT *buffer){
BLASLONG i;
diff --git a/driver/level2/syr_thread.c b/driver/level2/syr_thread.c
index 250e8c006..0eb54289f 100644
--- a/driver/level2/syr_thread.c
+++ b/driver/level2/syr_thread.c
@@ -95,7 +95,7 @@ static int syr_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FL
#else
if ((x[i * COMPSIZE + 0] != ZERO) || (x[i * COMPSIZE + 1] != ZERO)) {
#ifndef LOWER
- AXPYU_K(i + 1, 0, 0,
+ AXPYU_K(i + 1, 0, 0,
alpha_r * x[i * COMPSIZE + 0] - alpha_i * x[i * COMPSIZE + 1],
alpha_i * x[i * COMPSIZE + 0] + alpha_r * x[i * COMPSIZE + 1],
x, 1, a, 1, NULL, 0);
@@ -111,7 +111,7 @@ static int syr_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FL
if ((x[i * COMPSIZE + 0] != ZERO) || (x[i * COMPSIZE + 1] != ZERO)) {
#ifndef HERREV
#ifndef LOWER
- AXPYU_K(i + 1, 0, 0,
+ AXPYU_K(i + 1, 0, 0,
alpha_r * x[i * COMPSIZE + 0], -alpha_r * x[i * COMPSIZE + 1],
x, 1, a, 1, NULL, 0);
#else
@@ -121,7 +121,7 @@ static int syr_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FL
#endif
#else
#ifndef LOWER
- AXPYC_K(i + 1, 0, 0,
+ AXPYC_K(i + 1, 0, 0,
alpha_r * x[i * COMPSIZE + 0], alpha_r * x[i * COMPSIZE + 1],
x, 1, a, 1, NULL, 0);
#else
@@ -137,7 +137,7 @@ static int syr_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FL
a += lda * COMPSIZE;
}
-
+
return 0;
}
@@ -164,7 +164,7 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *x, BLASLONG incx, FLOAT *a, BLASLONG
int mode = BLAS_DOUBLE | BLAS_REAL;
#else
int mode = BLAS_SINGLE | BLAS_REAL;
-#endif
+#endif
#else
#ifdef XDOUBLE
int mode = BLAS_XDOUBLE | BLAS_COMPLEX;
@@ -172,15 +172,15 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *x, BLASLONG incx, FLOAT *a, BLASLONG
int mode = BLAS_DOUBLE | BLAS_COMPLEX;
#else
int mode = BLAS_SINGLE | BLAS_COMPLEX;
-#endif
+#endif
#endif
#endif
args.m = m;
-
+
args.a = (void *)x;
args.b = (void *)a;
-
+
args.lda = incx;
args.ldb = lda;
#if !defined(COMPLEX) || defined(HER) || defined(HERREV)
@@ -191,16 +191,16 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *x, BLASLONG incx, FLOAT *a, BLASLONG
dnum = (double)m * (double)m / (double)nthreads;
num_cpu = 0;
-
+
#ifndef LOWER
range_m[MAX_CPU_NUMBER] = m;
i = 0;
-
+
while (i < m){
-
+
if (nthreads - num_cpu > 1) {
-
+
double di = (double)(m - i);
if (di * di - dnum > 0) {
width = ((BLASLONG)(-sqrt(di * di - dnum) + di) + mask) & ~mask;
@@ -210,13 +210,13 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *x, BLASLONG incx, FLOAT *a, BLASLONG
if (width < 16) width = 16;
if (width > m - i) width = m - i;
-
+
} else {
width = m - i;
}
-
+
range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width;
-
+
queue[num_cpu].mode = mode;
queue[num_cpu].routine = syr_kernel;
queue[num_cpu].args = &args;
@@ -225,20 +225,20 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *x, BLASLONG incx, FLOAT *a, BLASLONG
queue[num_cpu].sa = NULL;
queue[num_cpu].sb = NULL;
queue[num_cpu].next = &queue[num_cpu + 1];
-
+
num_cpu ++;
i += width;
}
-
+
#else
range_m[0] = 0;
i = 0;
-
+
while (i < m){
-
+
if (nthreads - num_cpu > 1) {
-
+
double di = (double)(m - i);
if (di * di - dnum > 0) {
width = ((BLASLONG)(-sqrt(di * di - dnum) + di) + mask) & ~mask;
@@ -248,13 +248,13 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *x, BLASLONG incx, FLOAT *a, BLASLONG
if (width < 16) width = 16;
if (width > m - i) width = m - i;
-
+
} else {
width = m - i;
}
-
+
range_m[num_cpu + 1] = range_m[num_cpu] + width;
-
+
queue[num_cpu].mode = mode;
queue[num_cpu].routine = syr_kernel;
queue[num_cpu].args = &args;
@@ -263,21 +263,21 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *x, BLASLONG incx, FLOAT *a, BLASLONG
queue[num_cpu].sa = NULL;
queue[num_cpu].sb = NULL;
queue[num_cpu].next = &queue[num_cpu + 1];
-
+
num_cpu ++;
i += width;
}
-
+
#endif
if (num_cpu) {
queue[0].sa = NULL;
queue[0].sb = buffer;
-
+
queue[num_cpu - 1].next = NULL;
-
+
exec_blas(num_cpu, queue);
}
-
+
return 0;
}
diff --git a/driver/level2/tbmv_L.c b/driver/level2/tbmv_L.c
index 05e7cf869..b41b4141e 100644
--- a/driver/level2/tbmv_L.c
+++ b/driver/level2/tbmv_L.c
@@ -54,7 +54,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc
gemvbuffer = (FLOAT *)(((BLASLONG)buffer + n * sizeof(FLOAT) + 4095) & ~4095);
COPY_K(n, b, incb, buffer, 1);
}
-
+
a += (n - 1) * lda;
for (i = n - 1; i >= 0; i--) {
@@ -65,7 +65,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc
if (length > 0) {
AXPYU_K(length, 0, 0,
- B[i],
+ B[i],
a + 1, 1, B + i + 1, 1, NULL, 0);
}
#endif
@@ -77,7 +77,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc
B[i] *= a[k];
#endif
#endif
-
+
#ifdef TRANSA
length = i;
if (length > k) length = k;
@@ -89,7 +89,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc
a -= lda;
}
-
+
if (incb != 1) {
COPY_K(n, buffer, 1, b, incb);
}
diff --git a/driver/level2/tbmv_U.c b/driver/level2/tbmv_U.c
index 49d28dcf5..50c10326b 100644
--- a/driver/level2/tbmv_U.c
+++ b/driver/level2/tbmv_U.c
@@ -56,14 +56,14 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc
}
for (i = 0; i < n; i++) {
-
+
#ifndef TRANSA
length = i;
if (length > k) length = k;
if (length > 0) {
AXPYU_K(length, 0, 0,
- B[i],
+ B[i],
a + k - length, 1, B + i - length, 1, NULL, 0);
}
#endif
@@ -75,7 +75,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc
B[i] *= a[0];
#endif
#endif
-
+
#ifdef TRANSA
length = n - i - 1;
if (length > k) length = k;
diff --git a/driver/level2/tbmv_thread.c b/driver/level2/tbmv_thread.c
index e3d058826..3c1249448 100644
--- a/driver/level2/tbmv_thread.c
+++ b/driver/level2/tbmv_thread.c
@@ -105,18 +105,18 @@ static int trmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
if (incx != 1) {
COPY_K(args -> n, x, incx, buffer, 1);
-
+
x = buffer;
buffer += ((args -> n * COMPSIZE + 1023) & ~1023);
- }
+ }
if (range_n) y += *range_n * COMPSIZE;
- SCAL_K(args -> n, 0, 0, ZERO,
+ SCAL_K(args -> n, 0, 0, ZERO,
#ifdef COMPLEX
ZERO,
#endif
- y, 1, NULL, 0, NULL, 0);
+ y, 1, NULL, 0, NULL, 0);
for (i = n_from; i < n_to; i++) {
@@ -148,7 +148,7 @@ static int trmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
#endif
}
#endif
-
+
#ifndef COMPLEX
#ifdef UNIT
*(y + i * COMPSIZE) += *(x + i * COMPSIZE);
@@ -183,19 +183,19 @@ static int trmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
#endif
#endif
#endif
-
+
#ifdef LOWER
if (length > 0) {
#ifndef TRANS
MYAXPY(length, 0, 0,
- *(x + i * COMPSIZE + 0),
+ *(x + i * COMPSIZE + 0),
#ifdef COMPLEX
*(x + i * COMPSIZE + 1),
#endif
a + COMPSIZE, 1, y + (i + 1) * COMPSIZE, 1, NULL, 0);
#else
result = MYDOT(length, a + COMPSIZE, 1, x + (i + 1) * COMPSIZE, 1);
-
+
#ifndef COMPLEX
*(y + i * COMPSIZE + 0) += result;
#else
@@ -205,10 +205,10 @@ static int trmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
#endif
}
#endif
-
+
a += lda * COMPSIZE;
}
-
+
return 0;
}
@@ -236,7 +236,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc
int mode = BLAS_DOUBLE | BLAS_REAL;
#else
int mode = BLAS_SINGLE | BLAS_REAL;
-#endif
+#endif
#else
#ifdef XDOUBLE
int mode = BLAS_XDOUBLE | BLAS_COMPLEX;
@@ -244,51 +244,51 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc
int mode = BLAS_DOUBLE | BLAS_COMPLEX;
#else
int mode = BLAS_SINGLE | BLAS_COMPLEX;
-#endif
+#endif
#endif
#endif
args.n = n;
args.k = k;
-
+
args.a = (void *)a;
args.b = (void *)x;
args.c = (void *)(buffer);
-
+
args.lda = lda;
args.ldb = incx;
-
+
dnum = (double)n * (double)n / (double)nthreads;
num_cpu = 0;
-
+
if (n < 2 * k) {
#ifndef LOWER
-
+
range_m[MAX_CPU_NUMBER] = n;
i = 0;
-
+
while (i < n){
-
+
if (nthreads - num_cpu > 1) {
-
+
double di = (double)(n - i);
if (di * di - dnum > 0) {
width = ((BLASLONG)(-sqrt(di * di - dnum) + di) + mask) & ~mask;
} else {
width = n - i;
}
-
+
if (width < 16) width = 16;
if (width > n - i) width = n - i;
-
+
} else {
width = n - i;
}
-
+
range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width;
range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16);
-
+
queue[num_cpu].mode = mode;
queue[num_cpu].routine = trmv_kernel;
queue[num_cpu].args = &args;
@@ -297,37 +297,37 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc
queue[num_cpu].sa = NULL;
queue[num_cpu].sb = NULL;
queue[num_cpu].next = &queue[num_cpu + 1];
-
+
num_cpu ++;
i += width;
}
-
+
#else
-
+
range_m[0] = 0;
i = 0;
-
+
while (i < n){
-
+
if (nthreads - num_cpu > 1) {
-
+
double di = (double)(n - i);
if (di * di - dnum > 0) {
width = ((BLASLONG)(-sqrt(di * di - dnum) + di) + mask) & ~mask;
} else {
width = n - i;
}
-
+
if (width < 16) width = 16;
if (width > n - i) width = n - i;
-
+
} else {
width = n - i;
}
-
+
range_m[num_cpu + 1] = range_m[num_cpu] + width;
range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16);
-
+
queue[num_cpu].mode = mode;
queue[num_cpu].routine = trmv_kernel;
queue[num_cpu].args = &args;
@@ -336,27 +336,27 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc
queue[num_cpu].sa = NULL;
queue[num_cpu].sb = NULL;
queue[num_cpu].next = &queue[num_cpu + 1];
-
+
num_cpu ++;
i += width;
}
-
+
#endif
} else {
-
+
range_m[0] = 0;
i = n;
-
+
while (i > 0){
-
+
width = blas_quickdivide(i + nthreads - num_cpu - 1, nthreads - num_cpu);
-
+
if (width < 4) width = 4;
if (i < width) width = i;
-
+
range_m[num_cpu + 1] = range_m[num_cpu] + width;
range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16);
-
+
queue[num_cpu].mode = mode;
queue[num_cpu].routine = trmv_kernel;
queue[num_cpu].args = &args;
@@ -365,7 +365,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc
queue[num_cpu].sa = NULL;
queue[num_cpu].sb = NULL;
queue[num_cpu].next = &queue[num_cpu + 1];
-
+
num_cpu ++;
i -= width;
}
@@ -376,20 +376,20 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc
if (num_cpu) {
queue[0].sa = NULL;
queue[0].sb = buffer + num_cpu * (((n + 255) & ~255) + 16) * COMPSIZE;
-
+
queue[num_cpu - 1].next = NULL;
-
+
exec_blas(num_cpu, queue);
}
-
+
for (i = 1; i < num_cpu; i ++) {
AXPYU_K(n, 0, 0, ONE,
#ifdef COMPLEX
- ZERO,
+ ZERO,
#endif
buffer + range_n[i] * COMPSIZE, 1, buffer, 1, NULL, 0);
}
-
+
COPY_K(n, buffer, 1, x, incx);
return 0;
diff --git a/driver/level2/tbsv_L.c b/driver/level2/tbsv_L.c
index e9c9158e4..0d036440d 100644
--- a/driver/level2/tbsv_L.c
+++ b/driver/level2/tbsv_L.c
@@ -56,7 +56,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc
}
for (i = 0; i < n; i++) {
-
+
#ifdef TRANSA
length = i;
if (length > k) length = k;
@@ -73,14 +73,14 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc
B[i] /= a[0];
#endif
#endif
-
+
#ifndef TRANSA
length = n - i - 1;
if (length > k) length = k;
if (length > 0) {
AXPYU_K(length, 0, 0,
- -B[i],
+ -B[i],
a + 1, 1, B + i + 1, 1, NULL, 0);
}
#endif
diff --git a/driver/level2/tbsv_U.c b/driver/level2/tbsv_U.c
index 0b1fca8f0..1dc1a99e7 100644
--- a/driver/level2/tbsv_U.c
+++ b/driver/level2/tbsv_U.c
@@ -54,7 +54,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc
gemvbuffer = (FLOAT *)(((BLASLONG)buffer + n * sizeof(FLOAT) + 4095) & ~4095);
COPY_K(n, b, incb, buffer, 1);
}
-
+
a += (n - 1) * lda;
for (i = n - 1; i >= 0; i--) {
@@ -75,21 +75,21 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc
B[i] /= a[k];
#endif
#endif
-
+
#ifndef TRANSA
length = i;
if (length > k) length = k;
if (length > 0) {
AXPYU_K(length, 0, 0,
- - B[i],
+ - B[i],
a + k - length, 1, B + i - length, 1, NULL, 0);
}
#endif
a -= lda;
}
-
+
if (incb != 1) {
COPY_K(n, buffer, 1, b, incb);
}
diff --git a/driver/level2/tpmv_L.c b/driver/level2/tpmv_L.c
index c139eb79d..d01478c66 100644
--- a/driver/level2/tpmv_L.c
+++ b/driver/level2/tpmv_L.c
@@ -51,14 +51,14 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){
B = buffer;
COPY_K(m, b, incb, buffer, 1);
}
-
+
a += (m + 1) * m / 2 - 1;
for (i = 0; i < m; i++) {
#ifndef TRANSA
if (i > 0) AXPYU_K(i, 0, 0, B[m - i - 1], a + 1, 1, B + m - i, 1, NULL, 0);
#endif
-
+
#ifndef UNIT
B[m - i - 1] *= a[0];
#endif
@@ -73,7 +73,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){
a -= (m - i);
#endif
}
-
+
if (incb != 1) {
COPY_K(m, buffer, 1, b, incb);
}
diff --git a/driver/level2/tpmv_U.c b/driver/level2/tpmv_U.c
index 6d69df6f0..5d311f8bd 100644
--- a/driver/level2/tpmv_U.c
+++ b/driver/level2/tpmv_U.c
@@ -53,11 +53,11 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){
}
for (i = 0; i < m; i++) {
-
+
#ifndef TRANSA
if (i > 0) AXPYU_K(i, 0, 0, B[i], a, 1, B, 1, NULL, 0);
#endif
-
+
#ifndef UNIT
#ifndef TRANSA
B[i] *= a[i];
@@ -65,7 +65,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){
B[i] *= a[0];
#endif
#endif
-
+
#ifdef TRANSA
if (i < m - 1) B[i] += DOTU_K(m - i - 1, a + 1, 1, B + i + 1, 1);
#endif
diff --git a/driver/level2/tpmv_thread.c b/driver/level2/tpmv_thread.c
index 64b725f86..3b91cee45 100644
--- a/driver/level2/tpmv_thread.c
+++ b/driver/level2/tpmv_thread.c
@@ -110,35 +110,35 @@ static int tpmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
#else
COPY_K(args -> m - m_from, x + m_from * incx * COMPSIZE, incx, buffer + m_from * COMPSIZE, 1);
#endif
-
+
x = buffer;
buffer += ((COMPSIZE * args -> m + 1023) & ~1023);
- }
+ }
#ifndef TRANS
if (range_n) y += *range_n * COMPSIZE;
#ifndef LOWER
- SCAL_K(m_to, 0, 0, ZERO,
+ SCAL_K(m_to, 0, 0, ZERO,
#ifdef COMPLEX
ZERO,
#endif
- y, 1, NULL, 0, NULL, 0);
+ y, 1, NULL, 0, NULL, 0);
#else
- SCAL_K(args -> m - m_from, 0, 0, ZERO,
+ SCAL_K(args -> m - m_from, 0, 0, ZERO,
#ifdef COMPLEX
ZERO,
#endif
- y + m_from * COMPSIZE, 1, NULL, 0, NULL, 0);
+ y + m_from * COMPSIZE, 1, NULL, 0, NULL, 0);
#endif
#else
- SCAL_K(m_to - m_from, 0, 0, ZERO,
+ SCAL_K(m_to - m_from, 0, 0, ZERO,
#ifdef COMPLEX
ZERO,
#endif
- y + m_from * COMPSIZE, 1, NULL, 0, NULL, 0);
+ y + m_from * COMPSIZE, 1, NULL, 0, NULL, 0);
#endif
@@ -154,9 +154,9 @@ static int tpmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
if (i > 0) {
#ifndef TRANS
MYAXPY(i, 0, 0,
- *(x + i * COMPSIZE + 0),
+ *(x + i * COMPSIZE + 0),
#ifdef COMPLEX
- *(x + i * COMPSIZE + 1),
+ *(x + i * COMPSIZE + 1),
#endif
a, 1, y, 1, NULL, 0);
#else
@@ -202,7 +202,7 @@ static int tpmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
#ifdef LOWER
if (args -> m > i + 1) {
#ifndef TRANS
- MYAXPY(args -> m - i - 1, 0, 0,
+ MYAXPY(args -> m - i - 1, 0, 0,
*(x + i * COMPSIZE + 0),
#ifdef COMPLEX
*(x + i * COMPSIZE + 1),
@@ -258,7 +258,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *buffer, int nthr
int mode = BLAS_DOUBLE | BLAS_REAL;
#else
int mode = BLAS_SINGLE | BLAS_REAL;
-#endif
+#endif
#else
#ifdef XDOUBLE
int mode = BLAS_XDOUBLE | BLAS_COMPLEX;
@@ -266,31 +266,31 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *buffer, int nthr
int mode = BLAS_DOUBLE | BLAS_COMPLEX;
#else
int mode = BLAS_SINGLE | BLAS_COMPLEX;
-#endif
+#endif
#endif
#endif
args.m = m;
-
+
args.a = (void *)a;
args.b = (void *)x;
args.c = (void *)(buffer);
-
+
args.ldb = incx;
args.ldc = incx;
-
+
dnum = (double)m * (double)m / (double)nthreads;
num_cpu = 0;
-
+
#ifndef LOWER
range_m[MAX_CPU_NUMBER] = m;
i = 0;
-
+
while (i < m){
-
+
if (nthreads - num_cpu > 1) {
-
+
double di = (double)(m - i);
if (di * di - dnum > 0) {
width = ((BLASLONG)(-sqrt(di * di - dnum) + di) + mask) & ~mask;
@@ -300,14 +300,14 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *buffer, int nthr
if (width < 16) width = 16;
if (width > m - i) width = m - i;
-
+
} else {
width = m - i;
}
-
+
range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width;
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
-
+
queue[num_cpu].mode = mode;
queue[num_cpu].routine = tpmv_kernel;
queue[num_cpu].args = &args;
@@ -316,20 +316,20 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *buffer, int nthr
queue[num_cpu].sa = NULL;
queue[num_cpu].sb = NULL;
queue[num_cpu].next = &queue[num_cpu + 1];
-
+
num_cpu ++;
i += width;
}
-
+
#else
range_m[0] = 0;
i = 0;
-
+
while (i < m){
-
+
if (nthreads - num_cpu > 1) {
-
+
double di = (double)(m - i);
if (di * di - dnum > 0) {
width = ((BLASLONG)(-sqrt(di * di - dnum) + di) + mask) & ~mask;
@@ -339,14 +339,14 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *buffer, int nthr
if (width < 16) width = 16;
if (width > m - i) width = m - i;
-
+
} else {
width = m - i;
}
-
+
range_m[num_cpu + 1] = range_m[num_cpu] + width;
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
-
+
queue[num_cpu].mode = mode;
queue[num_cpu].routine = tpmv_kernel;
queue[num_cpu].args = &args;
@@ -355,46 +355,46 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *buffer, int nthr
queue[num_cpu].sa = NULL;
queue[num_cpu].sb = NULL;
queue[num_cpu].next = &queue[num_cpu + 1];
-
+
num_cpu ++;
i += width;
}
-
+
#endif
if (num_cpu) {
queue[0].sa = NULL;
queue[0].sb = buffer + num_cpu * (((m + 255) & ~255) + 16) * COMPSIZE;
-
+
queue[num_cpu - 1].next = NULL;
-
+
exec_blas(num_cpu, queue);
}
-
+
#ifndef TRANS
for (i = 1; i < num_cpu; i ++) {
-
+
#ifndef LOWER
-
+
AXPYU_K(range_m[MAX_CPU_NUMBER - i], 0, 0, ONE,
#ifdef COMPLEX
- ZERO,
+ ZERO,
#endif
buffer + range_n[i] * COMPSIZE, 1, buffer, 1, NULL, 0);
-
+
#else
-
+
AXPYU_K(m - range_m[i], 0, 0, ONE,
#ifdef COMPLEX
- ZERO,
+ ZERO,
#endif
buffer + (range_n[i] + range_m[i]) * COMPSIZE, 1, buffer + range_m[i] * COMPSIZE, 1, NULL, 0);
-
+
#endif
}
#endif
-
+
COPY_K(m, buffer, 1, x, incx);
return 0;
diff --git a/driver/level2/tpsv_L.c b/driver/level2/tpsv_L.c
index 9f76181e1..3fafa9054 100644
--- a/driver/level2/tpsv_L.c
+++ b/driver/level2/tpsv_L.c
@@ -41,7 +41,7 @@
#include "common.h"
int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){
-
+
BLASLONG i;
FLOAT *gemvbuffer = (FLOAT *)buffer;
FLOAT *B = b;
@@ -56,7 +56,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){
#ifdef TRANSA
if (i > 0) B[i] -= DOTU_K(i, a, 1, B, 1);
#endif
-
+
#ifndef UNIT
#ifndef TRANSA
B[i] /= a[0];
@@ -64,7 +64,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){
B[i] /= a[i];
#endif
#endif
-
+
#ifndef TRANSA
if (i < m - 1) {
AXPYU_K(m - i - 1 , 0, 0, - B[i],
@@ -78,7 +78,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){
a += (i + 1);
#endif
}
-
+
if (incb != 1) {
COPY_K(m, buffer, 1, b, incb);
}
diff --git a/driver/level2/tpsv_U.c b/driver/level2/tpsv_U.c
index 7a0958021..fb5ef02b2 100644
--- a/driver/level2/tpsv_U.c
+++ b/driver/level2/tpsv_U.c
@@ -51,18 +51,18 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){
gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) + 4095) & ~4095);
COPY_K(m, b, incb, buffer, 1);
}
-
+
a += (m + 1) * m / 2 - 1;
for (i = 0; i < m; i++) {
#ifdef TRANSA
if (i > 0) B[m - i - 1] -= DOTU_K(i, a + 1, 1, B + m - i, 1);
#endif
-
+
#ifndef UNIT
B[m - i - 1] /= a[0];
#endif
-
+
#ifndef TRANSA
if (i < m - 1) AXPYU_K(m - i - 1, 0, 0, -B[m - i - 1], a - (m - i - 1), 1, B, 1, NULL, 0);
#endif
diff --git a/driver/level2/trmv_L.c b/driver/level2/trmv_L.c
index e515ba60b..0de48a697 100644
--- a/driver/level2/trmv_L.c
+++ b/driver/level2/trmv_L.c
@@ -53,14 +53,14 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *bu
gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) + 4095) & ~4095);
COPY_K(m, b, incb, buffer, 1);
}
-
+
for (is = m; is > 0; is -= DTB_ENTRIES){
min_i = MIN(is, DTB_ENTRIES);
-
+
#ifndef TRANSA
if (m - is > 0){
- GEMV_N(m - is, min_i, 0, dp1,
+ GEMV_N(m - is, min_i, 0, dp1,
a + is + (is - min_i) * lda, lda,
B + is - min_i, 1,
B + is, 1, gemvbuffer);
@@ -83,10 +83,10 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *bu
if (i < min_i - 1) BB[0] += DOTU_K(min_i - i - 1, AA - (min_i - i - 1), 1, BB - (min_i - i - 1), 1);
#endif
}
-
+
#ifdef TRANSA
if (is - min_i > 0){
- GEMV_T(is - min_i, min_i, 0, dp1,
+ GEMV_T(is - min_i, min_i, 0, dp1,
a + (is - min_i) * lda, lda,
B, 1,
B + is - min_i, 1, gemvbuffer);
diff --git a/driver/level2/trmv_U.c b/driver/level2/trmv_U.c
index 3c36f77d9..a0aa7ef0e 100644
--- a/driver/level2/trmv_U.c
+++ b/driver/level2/trmv_U.c
@@ -55,12 +55,12 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *bu
}
for (is = 0; is < m; is += DTB_ENTRIES){
-
+
min_i = MIN(m - is, DTB_ENTRIES);
#ifndef TRANSA
if (is > 0){
- GEMV_N(is, min_i, 0, dp1,
+ GEMV_N(is, min_i, 0, dp1,
a + is * lda, lda,
B + is, 1,
B, 1, gemvbuffer);
@@ -70,7 +70,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *bu
for (i = 0; i < min_i; i++) {
FLOAT *AA = a + is + (i + is) * lda;
FLOAT *BB = B + is;
-
+
#ifndef TRANSA
if (i > 0) AXPYU_K(i, 0, 0, BB[i], AA, 1, BB, 1, NULL, 0);
#endif
@@ -86,7 +86,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *bu
#ifdef TRANSA
if (m - is > min_i){
- GEMV_T(m - is - min_i, min_i, 0, dp1,
+ GEMV_T(m - is - min_i, min_i, 0, dp1,
a + is + min_i + is * lda, lda,
B + is + min_i, 1,
B + is, 1, gemvbuffer);
diff --git a/driver/level2/trmv_thread.c b/driver/level2/trmv_thread.c
index 4f5b27c69..29e9799f6 100644
--- a/driver/level2/trmv_thread.c
+++ b/driver/level2/trmv_thread.c
@@ -117,40 +117,40 @@ static int trmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
#else
COPY_K(args -> m - m_from, x + m_from * incx * COMPSIZE, incx, buffer + m_from * COMPSIZE, 1);
#endif
-
+
x = buffer;
buffer += ((COMPSIZE * args -> m + 1023) & ~1023);
- }
+ }
#ifndef TRANS
if (range_n) y += *range_n * COMPSIZE;
#ifndef LOWER
- SCAL_K(m_to, 0, 0, ZERO,
+ SCAL_K(m_to, 0, 0, ZERO,
#ifdef COMPLEX
ZERO,
#endif
- y, 1, NULL, 0, NULL, 0);
+ y, 1, NULL, 0, NULL, 0);
#else
- SCAL_K(args -> m - m_from, 0, 0, ZERO,
+ SCAL_K(args -> m - m_from, 0, 0, ZERO,
#ifdef COMPLEX
ZERO,
#endif
- y + m_from * COMPSIZE, 1, NULL, 0, NULL, 0);
+ y + m_from * COMPSIZE, 1, NULL, 0, NULL, 0);
#endif
#else
- SCAL_K(m_to - m_from, 0, 0, ZERO,
+ SCAL_K(m_to - m_from, 0, 0, ZERO,
#ifdef COMPLEX
ZERO,
#endif
- y + m_from * COMPSIZE, 1, NULL, 0, NULL, 0);
+ y + m_from * COMPSIZE, 1, NULL, 0, NULL, 0);
#endif
for (is = m_from; is < m_to; is += DTB_ENTRIES){
-
+
min_i = MIN(m_to - is, DTB_ENTRIES);
#ifndef LOWER
@@ -178,13 +178,13 @@ static int trmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
if (i - is > 0) {
#ifndef TRANS
MYAXPY(i - is, 0, 0,
- *(x + i * COMPSIZE + 0),
+ *(x + i * COMPSIZE + 0),
#ifdef COMPLEX
- *(x + i * COMPSIZE + 1),
+ *(x + i * COMPSIZE + 1),
#endif
a + (is + i * lda) * COMPSIZE, 1, y + is * COMPSIZE, 1, NULL, 0);
#else
-
+
result = MYDOT(i - is, a + (is + i * lda) * COMPSIZE, 1, x + is * COMPSIZE, 1);
#ifndef COMPLEX
@@ -227,7 +227,7 @@ static int trmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
#ifdef LOWER
if (is + min_i > i + 1) {
#ifndef TRANS
- MYAXPY(is + min_i - i - 1, 0, 0,
+ MYAXPY(is + min_i - i - 1, 0, 0,
*(x + i * COMPSIZE + 0),
#ifdef COMPLEX
*(x + i * COMPSIZE + 1),
@@ -248,7 +248,7 @@ static int trmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
}
#endif
}
-
+
#ifdef LOWER
if (args -> m > is + min_i){
MYGEMV(args -> m - is - min_i, min_i, 0,
@@ -259,9 +259,9 @@ static int trmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
a + (is + min_i + is * lda) * COMPSIZE, lda,
#ifndef TRANS
x + is * COMPSIZE, 1,
- y + (is + min_i) * COMPSIZE, 1,
+ y + (is + min_i) * COMPSIZE, 1,
#else
- x + (is + min_i) * COMPSIZE, 1,
+ x + (is + min_i) * COMPSIZE, 1,
y + is * COMPSIZE, 1,
#endif
buffer);
@@ -296,7 +296,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *bu
int mode = BLAS_DOUBLE | BLAS_REAL;
#else
int mode = BLAS_SINGLE | BLAS_REAL;
-#endif
+#endif
#else
#ifdef XDOUBLE
int mode = BLAS_XDOUBLE | BLAS_COMPLEX;
@@ -304,32 +304,32 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *bu
int mode = BLAS_DOUBLE | BLAS_COMPLEX;
#else
int mode = BLAS_SINGLE | BLAS_COMPLEX;
-#endif
+#endif
#endif
#endif
args.m = m;
-
+
args.a = (void *)a;
args.b = (void *)x;
args.c = (void *)(buffer);
-
+
args.lda = lda;
args.ldb = incx;
args.ldc = incx;
-
+
dnum = (double)m * (double)m / (double)nthreads;
num_cpu = 0;
-
+
#ifndef LOWER
range_m[MAX_CPU_NUMBER] = m;
i = 0;
-
+
while (i < m){
-
+
if (nthreads - num_cpu > 1) {
-
+
double di = (double)(m - i);
if (di * di - dnum > 0) {
width = ((BLASLONG)(-sqrt(di * di - dnum) + di) + mask) & ~mask;
@@ -339,14 +339,14 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *bu
if (width < 16) width = 16;
if (width > m - i) width = m - i;
-
+
} else {
width = m - i;
}
-
+
range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width;
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
-
+
queue[num_cpu].mode = mode;
queue[num_cpu].routine = trmv_kernel;
queue[num_cpu].args = &args;
@@ -355,20 +355,20 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *bu
queue[num_cpu].sa = NULL;
queue[num_cpu].sb = NULL;
queue[num_cpu].next = &queue[num_cpu + 1];
-
+
num_cpu ++;
i += width;
}
-
+
#else
range_m[0] = 0;
i = 0;
-
+
while (i < m){
-
+
if (nthreads - num_cpu > 1) {
-
+
double di = (double)(m - i);
if (di * di - dnum > 0) {
width = ((BLASLONG)(-sqrt(di * di - dnum) + di) + mask) & ~mask;
@@ -378,14 +378,14 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *bu
if (width < 16) width = 16;
if (width > m - i) width = m - i;
-
+
} else {
width = m - i;
}
-
+
range_m[num_cpu + 1] = range_m[num_cpu] + width;
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
-
+
queue[num_cpu].mode = mode;
queue[num_cpu].routine = trmv_kernel;
queue[num_cpu].args = &args;
@@ -394,46 +394,46 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *bu
queue[num_cpu].sa = NULL;
queue[num_cpu].sb = NULL;
queue[num_cpu].next = &queue[num_cpu + 1];
-
+
num_cpu ++;
i += width;
}
-
+
#endif
if (num_cpu) {
queue[0].sa = NULL;
queue[0].sb = buffer + num_cpu * (((m + 255) & ~255) + 16) * COMPSIZE;
-
+
queue[num_cpu - 1].next = NULL;
-
+
exec_blas(num_cpu, queue);
}
-
+
#ifndef TRANS
for (i = 1; i < num_cpu; i ++) {
-
+
#ifndef LOWER
-
+
AXPYU_K(range_m[MAX_CPU_NUMBER - i], 0, 0, ONE,
#ifdef COMPLEX
- ZERO,
+ ZERO,
#endif
buffer + range_n[i] * COMPSIZE, 1, buffer, 1, NULL, 0);
-
+
#else
-
+
AXPYU_K(m - range_m[i], 0, 0, ONE,
#ifdef COMPLEX
- ZERO,
+ ZERO,
#endif
buffer + (range_n[i] + range_m[i]) * COMPSIZE, 1, buffer + range_m[i] * COMPSIZE, 1, NULL, 0);
-
+
#endif
}
#endif
-
+
COPY_K(m, buffer, 1, x, incx);
return 0;
diff --git a/driver/level2/trsv_L.c b/driver/level2/trsv_L.c
index 44bcfe398..95ec57213 100644
--- a/driver/level2/trsv_L.c
+++ b/driver/level2/trsv_L.c
@@ -46,7 +46,7 @@ const static FLOAT dm1 = -1.;
#define GEMV_UNROLL DTB_ENTRIES
int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){
-
+
BLASLONG i, is, min_i;
FLOAT *gemvbuffer = (FLOAT *)buffer;
FLOAT *B = b;
@@ -58,14 +58,14 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buf
}
for (is = 0; is < m; is += GEMV_UNROLL){
-
+
min_i = MIN(m - is, GEMV_UNROLL);
#ifdef TRANSA
if (is > 0){
- GEMV_T(is, min_i, 0, dm1,
+ GEMV_T(is, min_i, 0, dm1,
a + is * lda , lda,
- B, 1,
+ B, 1,
B + is, 1, gemvbuffer);
}
#endif
@@ -89,12 +89,12 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buf
}
#endif
}
-
+
#ifndef TRANSA
if (m - is > min_i){
- GEMV_N(m - is - min_i, min_i, 0, dm1,
+ GEMV_N(m - is - min_i, min_i, 0, dm1,
a + is + min_i + is * lda, lda,
- B + is, 1,
+ B + is, 1,
B + (is + min_i), 1, gemvbuffer);
}
#endif
diff --git a/driver/level2/trsv_U.c b/driver/level2/trsv_U.c
index f02512bbb..823ca2e43 100644
--- a/driver/level2/trsv_U.c
+++ b/driver/level2/trsv_U.c
@@ -53,20 +53,20 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buf
gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) + 4095) & ~4095);
COPY_K(m, b, incb, buffer, 1);
}
-
+
for (is = m; is > 0; is -= DTB_ENTRIES){
min_i = MIN(is, DTB_ENTRIES);
#ifdef TRANSA
if (m - is > 0){
- GEMV_T(m - is, min_i, 0, dm1,
+ GEMV_T(m - is, min_i, 0, dm1,
a + is + (is - min_i) * lda, lda,
B + is, 1,
B + is - min_i, 1, gemvbuffer);
}
#endif
-
+
for (i = 0; i < min_i; i++) {
FLOAT *AA = a + (is - i - 1) + (is - i - 1) * lda;
FLOAT *BB = B + (is - i - 1);
@@ -86,13 +86,13 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buf
#ifndef TRANSA
if (is - min_i > 0){
- GEMV_N(is - min_i, min_i, 0, dm1,
+ GEMV_N(is - min_i, min_i, 0, dm1,
a + (is - min_i) * lda, lda,
B + is - min_i, 1,
B, 1, gemvbuffer);
}
#endif
-
+
}
if (incb != 1) {
diff --git a/driver/level2/zgbmv_k.c b/driver/level2/zgbmv_k.c
index 7832a7ea5..68d6045bd 100644
--- a/driver/level2/zgbmv_k.c
+++ b/driver/level2/zgbmv_k.c
@@ -129,7 +129,7 @@ void CNAME(BLASLONG m, BLASLONG n, BLASLONG ku, BLASLONG kl, FLOAT alpha_r, FLOA
Y[i * 2 + 1] += alpha_i * CREAL(temp) - alpha_r * CIMAG(temp);
#endif
#endif
-
+
offset_u --;
offset_l --;
diff --git a/driver/level2/zhbmv_k.c b/driver/level2/zhbmv_k.c
index 8771942d0..70e92e050 100644
--- a/driver/level2/zhbmv_k.c
+++ b/driver/level2/zhbmv_k.c
@@ -81,8 +81,8 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
if (length > 0) {
AXPYU_K(length, 0, 0,
- alpha_r * X[i * 2 + 0] - alpha_i * X[i * 2 + 1],
- alpha_r * X[i * 2 + 1] + alpha_i * X[i * 2 + 0],
+ alpha_r * X[i * 2 + 0] - alpha_i * X[i * 2 + 1],
+ alpha_r * X[i * 2 + 1] + alpha_i * X[i * 2 + 0],
a + offset * COMPSIZE, 1, Y + (i - length) * COMPSIZE, 1, NULL, 0);
}
@@ -106,8 +106,8 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
if (length > 0) {
AXPYU_K(length, 0, 0,
- alpha_r * X[i * 2 + 0] - alpha_i * X[i * 2 + 1],
- alpha_r * X[i * 2 + 1] + alpha_i * X[i * 2 + 0],
+ alpha_r * X[i * 2 + 0] - alpha_i * X[i * 2 + 1],
+ alpha_r * X[i * 2 + 1] + alpha_i * X[i * 2 + 0],
a + COMPSIZE, 1, Y + (i + 1) * COMPSIZE, 1, NULL, 0);
}
@@ -131,8 +131,8 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
if (length > 0) {
AXPYC_K(length, 0, 0,
- alpha_r * X[i * 2 + 0] - alpha_i * X[i * 2 + 1],
- alpha_r * X[i * 2 + 1] + alpha_i * X[i * 2 + 0],
+ alpha_r * X[i * 2 + 0] - alpha_i * X[i * 2 + 1],
+ alpha_r * X[i * 2 + 1] + alpha_i * X[i * 2 + 0],
a + offset * COMPSIZE, 1, Y + (i - length) * COMPSIZE, 1, NULL, 0);
}
@@ -156,8 +156,8 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
if (length > 0) {
AXPYC_K(length, 0, 0,
- alpha_r * X[i * 2 + 0] - alpha_i * X[i * 2 + 1],
- alpha_r * X[i * 2 + 1] + alpha_i * X[i * 2 + 0],
+ alpha_r * X[i * 2 + 0] - alpha_i * X[i * 2 + 1],
+ alpha_r * X[i * 2 + 1] + alpha_i * X[i * 2 + 0],
a + COMPSIZE, 1, Y + (i + 1) * COMPSIZE, 1, NULL, 0);
}
@@ -176,7 +176,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
#endif
#endif
-
+
a += lda * 2;
}
diff --git a/driver/level2/zher2_k.c b/driver/level2/zher2_k.c
index 3e924582f..94a8b7c61 100644
--- a/driver/level2/zher2_k.c
+++ b/driver/level2/zher2_k.c
@@ -41,7 +41,7 @@
#include "common.h"
int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
- FLOAT *x, BLASLONG incx,
+ FLOAT *x, BLASLONG incx,
FLOAT *y, BLASLONG incy, FLOAT *a, BLASLONG lda, FLOAT *buffer){
BLASLONG i;
@@ -65,7 +65,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
for (i = 0; i < m; i++){
#ifndef HEMVREV
#ifndef LOWER
- AXPYU_K(i + 1, 0, 0,
+ AXPYU_K(i + 1, 0, 0,
alpha_r * X[i * 2 + 0] - alpha_i * X[i * 2 + 1],
- alpha_i * X[i * 2 + 0] - alpha_r * X[i * 2 + 1],
Y, 1, a, 1, NULL, 0);
@@ -89,7 +89,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
#endif
#else
#ifndef LOWER
- AXPYC_K(i + 1, 0, 0,
+ AXPYC_K(i + 1, 0, 0,
alpha_r * X[i * 2 + 0] - alpha_i * X[i * 2 + 1],
alpha_i * X[i * 2 + 0] + alpha_r * X[i * 2 + 1],
Y, 1, a, 1, NULL, 0);
diff --git a/driver/level2/zhpmv_k.c b/driver/level2/zhpmv_k.c
index 5f95ce7bd..96bceaaf2 100644
--- a/driver/level2/zhpmv_k.c
+++ b/driver/level2/zhpmv_k.c
@@ -40,7 +40,7 @@
#include <ctype.h>
#include "common.h"
-int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
+int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *y, BLASLONG incy, void *buffer){
BLASLONG i;
@@ -70,7 +70,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
#ifndef LOWER
if (i > 0) {
FLOAT _Complex result = DOTC_K(i, a, 1, X, 1);
-
+
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
}
@@ -83,18 +83,18 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
if (i > 0) {
AXPYU_K(i, 0, 0,
- alpha_r * X[i * 2 + 0] - alpha_i * X[i * 2 + 1],
- alpha_r * X[i * 2 + 1] + alpha_i * X[i * 2 + 0],
+ alpha_r * X[i * 2 + 0] - alpha_i * X[i * 2 + 1],
+ alpha_r * X[i * 2 + 1] + alpha_i * X[i * 2 + 0],
a, 1, Y, 1, NULL, 0);
}
a += (i + 1) * 2;
-
+
#else
if (m - i > 1) {
FLOAT _Complex result = DOTC_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1);
-
+
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
}
@@ -107,8 +107,8 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
if (m - i > 1) {
AXPYU_K(m - i - 1, 0, 0,
- alpha_r * X[i * 2 + 0] - alpha_i * X[i * 2 + 1],
- alpha_r * X[i * 2 + 1] + alpha_i * X[i * 2 + 0],
+ alpha_r * X[i * 2 + 0] - alpha_i * X[i * 2 + 1],
+ alpha_r * X[i * 2 + 1] + alpha_i * X[i * 2 + 0],
a + (i + 1) * 2, 1, Y + (i + 1) * 2, 1, NULL, 0);
}
@@ -119,7 +119,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
#ifndef LOWER
if (i > 0) {
FLOAT _Complex result = DOTU_K(i, a, 1, X, 1);
-
+
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
}
@@ -132,18 +132,18 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
if (i > 0) {
AXPYC_K(i, 0, 0,
- alpha_r * X[i * 2 + 0] - alpha_i * X[i * 2 + 1],
- alpha_r * X[i * 2 + 1] + alpha_i * X[i * 2 + 0],
+ alpha_r * X[i * 2 + 0] - alpha_i * X[i * 2 + 1],
+ alpha_r * X[i * 2 + 1] + alpha_i * X[i * 2 + 0],
a, 1, Y, 1, NULL, 0);
}
a += (i + 1) * 2;
-
+
#else
if (m - i > 1) {
FLOAT _Complex result = DOTU_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1);
-
+
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
}
@@ -156,8 +156,8 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
if (m - i > 1) {
AXPYC_K(m - i - 1, 0, 0,
- alpha_r * X[i * 2 + 0] - alpha_i * X[i * 2 + 1],
- alpha_r * X[i * 2 + 1] + alpha_i * X[i * 2 + 0],
+ alpha_r * X[i * 2 + 0] - alpha_i * X[i * 2 + 1],
+ alpha_r * X[i * 2 + 1] + alpha_i * X[i * 2 + 0],
a + (i + 1) * 2, 1, Y + (i + 1) * 2, 1, NULL, 0);
}
@@ -167,7 +167,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
#endif
}
-
+
if (incy != 1) {
COPY_K(m, Y, 1, y, incy);
}
diff --git a/driver/level2/zhpr2_k.c b/driver/level2/zhpr2_k.c
index f4608ff9d..cb7113f60 100644
--- a/driver/level2/zhpr2_k.c
+++ b/driver/level2/zhpr2_k.c
@@ -41,7 +41,7 @@
#include "common.h"
int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
- FLOAT *x, BLASLONG incx,
+ FLOAT *x, BLASLONG incx,
FLOAT *y, BLASLONG incy, FLOAT *a, FLOAT *buffer){
BLASLONG i;
@@ -63,7 +63,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
for (i = 0; i < m; i++){
#ifndef HEMVREV
#ifndef LOWER
- AXPYU_K(i + 1, 0, 0,
+ AXPYU_K(i + 1, 0, 0,
alpha_r * X[i * 2 + 0] - alpha_i * X[i * 2 + 1],
- alpha_i * X[i * 2 + 0] - alpha_r * X[i * 2 + 1],
Y, 1, a, 1, NULL, 0);
@@ -87,7 +87,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
#endif
#else
#ifndef LOWER
- AXPYC_K(i + 1, 0, 0,
+ AXPYC_K(i + 1, 0, 0,
alpha_r * X[i * 2 + 0] - alpha_i * X[i * 2 + 1],
alpha_i * X[i * 2 + 0] + alpha_r * X[i * 2 + 1],
Y, 1, a, 1, NULL, 0);
diff --git a/driver/level2/zsbmv_k.c b/driver/level2/zsbmv_k.c
index de5dfdde2..30e2f91c3 100644
--- a/driver/level2/zsbmv_k.c
+++ b/driver/level2/zsbmv_k.c
@@ -78,8 +78,8 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
length = k - offset;
AXPYU_K(length + 1, 0, 0,
- alpha_r * X[i * 2 + 0] - alpha_i * X[i * 2 + 1],
- alpha_r * X[i * 2 + 1] + alpha_i * X[i * 2 + 0],
+ alpha_r * X[i * 2 + 0] - alpha_i * X[i * 2 + 1],
+ alpha_r * X[i * 2 + 1] + alpha_i * X[i * 2 + 0],
a + offset * COMPSIZE, 1, Y + (i - length) * COMPSIZE, 1, NULL, 0);
if (length > 0) {
@@ -95,18 +95,18 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
if (n - i - 1 < k) length = n - i - 1;
AXPYU_K(length + 1, 0, 0,
- alpha_r * X[i * 2 + 0] - alpha_i * X[i * 2 + 1],
- alpha_r * X[i * 2 + 1] + alpha_i * X[i * 2 + 0],
+ alpha_r * X[i * 2 + 0] - alpha_i * X[i * 2 + 1],
+ alpha_r * X[i * 2 + 1] + alpha_i * X[i * 2 + 0],
a, 1, Y + i * COMPSIZE, 1, NULL, 0);
if (length > 0) {
FLOAT _Complex result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1);
-
+
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
}
#endif
-
+
a += lda * 2;
}
diff --git a/driver/level2/zspmv_k.c b/driver/level2/zspmv_k.c
index c93b1e17e..76657eab9 100644
--- a/driver/level2/zspmv_k.c
+++ b/driver/level2/zspmv_k.c
@@ -69,29 +69,29 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
if (i > 0) {
result = DOTU_K(i, a, 1, X, 1);
-
+
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
}
AXPYU_K(i + 1, 0, 0,
- alpha_r * X[i * 2 + 0] - alpha_i * X[i * 2 + 1],
- alpha_r * X[i * 2 + 1] + alpha_i * X[i * 2 + 0],
+ alpha_r * X[i * 2 + 0] - alpha_i * X[i * 2 + 1],
+ alpha_r * X[i * 2 + 1] + alpha_i * X[i * 2 + 0],
a, 1, Y, 1, NULL, 0);
a += (i + 1) * 2;
-
+
#else
result = DOTU_K(m - i, a + i * 2, 1, X + i * 2, 1);
-
+
Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result);
Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result);
-
+
if (m - i > 1)
AXPYU_K(m - i - 1, 0, 0,
- alpha_r * X[i * 2 + 0] - alpha_i * X[i * 2 + 1],
- alpha_r * X[i * 2 + 1] + alpha_i * X[i * 2 + 0],
+ alpha_r * X[i * 2 + 0] - alpha_i * X[i * 2 + 1],
+ alpha_r * X[i * 2 + 1] + alpha_i * X[i * 2 + 0],
a + (i + 1) * 2, 1, Y + (i + 1) * 2, 1, NULL, 0);
a += (m - i - 1) * 2;
diff --git a/driver/level2/zspr2_k.c b/driver/level2/zspr2_k.c
index 48c81a366..e41a8de3c 100644
--- a/driver/level2/zspr2_k.c
+++ b/driver/level2/zspr2_k.c
@@ -40,7 +40,7 @@
#include <ctype.h>
#include "common.h"
-int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i, FLOAT *x, BLASLONG incx,
+int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i, FLOAT *x, BLASLONG incx,
FLOAT *y, BLASLONG incy, FLOAT *a, FLOAT *buffer){
BLASLONG i;
@@ -61,7 +61,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i, FLOAT *x, BLASLONG incx,
for (i = 0; i < m; i++){
#ifndef LOWER
- AXPYU_K(i + 1, 0, 0,
+ AXPYU_K(i + 1, 0, 0,
alpha_r * X[i * 2 + 0] - alpha_i * X[i * 2 + 1],
alpha_i * X[i * 2 + 0] + alpha_r * X[i * 2 + 1],
Y, 1, a, 1, NULL, 0);
diff --git a/driver/level2/zspr_k.c b/driver/level2/zspr_k.c
index a187bdbfa..d888a81ee 100644
--- a/driver/level2/zspr_k.c
+++ b/driver/level2/zspr_k.c
@@ -54,7 +54,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
for (i = 0; i < m; i++){
#ifndef LOWER
if ((X[i * 2 + 0] != ZERO) && (X[i * 2 + 1] != ZERO)) {
- AXPYU_K(i + 1, 0, 0,
+ AXPYU_K(i + 1, 0, 0,
alpha_r * X[i * 2 + 0] - alpha_i * X[i * 2 + 1],
alpha_i * X[i * 2 + 0] + alpha_r * X[i * 2 + 1],
X, 1, a, 1, NULL, 0);
diff --git a/driver/level2/zsyr2_k.c b/driver/level2/zsyr2_k.c
index f7bbbb2f2..03daf923d 100644
--- a/driver/level2/zsyr2_k.c
+++ b/driver/level2/zsyr2_k.c
@@ -40,7 +40,7 @@
#include <ctype.h>
#include "common.h"
-int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i, FLOAT *x, BLASLONG incx,
+int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i, FLOAT *x, BLASLONG incx,
FLOAT *y, BLASLONG incy, FLOAT *a, BLASLONG lda, FLOAT *buffer){
BLASLONG i;
@@ -63,7 +63,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i, FLOAT *x, BLASLONG incx,
for (i = 0; i < m; i++){
#ifndef LOWER
- AXPYU_K(i + 1, 0, 0,
+ AXPYU_K(i + 1, 0, 0,
alpha_r * X[i * 2 + 0] - alpha_i * X[i * 2 + 1],
alpha_i * X[i * 2 + 0] + alpha_r * X[i * 2 + 1],
Y, 1, a, 1, NULL, 0);
diff --git a/driver/level2/zsyr_k.c b/driver/level2/zsyr_k.c
index 9d800d37d..57d1769c3 100644
--- a/driver/level2/zsyr_k.c
+++ b/driver/level2/zsyr_k.c
@@ -55,7 +55,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
for (i = 0; i < m; i++){
#ifndef LOWER
if ((X[i * 2 + 0] != ZERO) || (X[i * 2 + 1] != ZERO)) {
- AXPYU_K(i + 1, 0, 0,
+ AXPYU_K(i + 1, 0, 0,
alpha_r * X[i * 2 + 0] - alpha_i * X[i * 2 + 1],
alpha_i * X[i * 2 + 0] + alpha_r * X[i * 2 + 1],
X, 1, a, 1, NULL, 0);
diff --git a/driver/level2/ztbmv_L.c b/driver/level2/ztbmv_L.c
index 9b604c04f..74ff0bce1 100644
--- a/driver/level2/ztbmv_L.c
+++ b/driver/level2/ztbmv_L.c
@@ -60,7 +60,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc
gemvbuffer = (FLOAT *)(((BLASLONG)buffer + n * sizeof(FLOAT) * COMPSIZE+ 4095) & ~4095);
COPY_K(n, b, incb, buffer, 1);
}
-
+
a += (n - 1) * lda * COMPSIZE;
for (i = n - 1; i >= 0; i--) {
@@ -102,7 +102,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc
B[i * 2 + 1] = atemp1 * btemp2 - atemp2 * btemp1;
#endif
#endif
-
+
#if (TRANSA == 2) || (TRANSA == 4)
length = i;
if (length > k) length = k;
@@ -121,7 +121,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc
a -= lda * COMPSIZE;
}
-
+
if (incb != 1) {
COPY_K(n, buffer, 1, b, incb);
}
diff --git a/driver/level2/ztbmv_U.c b/driver/level2/ztbmv_U.c
index 4e86f4fb1..933275de3 100644
--- a/driver/level2/ztbmv_U.c
+++ b/driver/level2/ztbmv_U.c
@@ -62,7 +62,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc
}
for (i = 0; i < n; i++) {
-
+
#if (TRANSA == 1) || (TRANSA == 3)
length = i;
if (length > k) length = k;
diff --git a/driver/level2/ztbsv_L.c b/driver/level2/ztbsv_L.c
index f32ddff24..0726bbd16 100644
--- a/driver/level2/ztbsv_L.c
+++ b/driver/level2/ztbsv_L.c
@@ -62,7 +62,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc
}
for (i = 0; i < n; i++) {
-
+
#if (TRANSA == 2) || (TRANSA == 4)
length = i;
if (length > k) length = k;
@@ -87,11 +87,11 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc
ar = a[k * 2 + 0];
ai = a[k * 2 + 1];
#endif
-
+
if (fabs(ar) >= fabs(ai)){
ratio = ai / ar;
den = 1./(ar * ( 1 + ratio * ratio));
-
+
ar = den;
#if TRANSA < 3
ai = -ratio * den;
@@ -108,10 +108,10 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc
ai = den;
#endif
}
-
+
br = B[i * 2 + 0];
bi = B[i * 2 + 1];
-
+
B[i * 2 + 0] = ar*br - ai*bi;
B[i * 2 + 1] = ar*bi + ai*br;
#endif
diff --git a/driver/level2/ztbsv_U.c b/driver/level2/ztbsv_U.c
index 252f3bace..d022650bc 100644
--- a/driver/level2/ztbsv_U.c
+++ b/driver/level2/ztbsv_U.c
@@ -60,7 +60,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc
gemvbuffer = (FLOAT *)(((BLASLONG)buffer + n * sizeof(FLOAT) * COMPSIZE+ 4095) & ~4095);
COPY_K(n, b, incb, buffer, 1);
}
-
+
a += (n - 1) * lda * COMPSIZE;
for (i = n - 1; i >= 0; i--) {
@@ -89,11 +89,11 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc
ar = a[0];
ai = a[1];
#endif
-
+
if (fabs(ar) >= fabs(ai)){
ratio = ai / ar;
den = 1./(ar * ( 1 + ratio * ratio));
-
+
ar = den;
#if TRANSA < 3
ai = -ratio * den;
@@ -110,10 +110,10 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc
ai = den;
#endif
}
-
+
br = B[i * 2 + 0];
bi = B[i * 2 + 1];
-
+
B[i * 2 + 0] = ar*br - ai*bi;
B[i * 2 + 1] = ar*bi + ai*br;
#endif
@@ -138,7 +138,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc
a -= lda * COMPSIZE;
}
-
+
if (incb != 1) {
COPY_K(n, buffer, 1, b, incb);
}
diff --git a/driver/level2/ztpmv_L.c b/driver/level2/ztpmv_L.c
index 62b9dc6ce..12c254c12 100644
--- a/driver/level2/ztpmv_L.c
+++ b/driver/level2/ztpmv_L.c
@@ -61,14 +61,14 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){
a += (m + 1) * m - 2;
for (i = 0; i < m; i++) {
-
+
#if (TRANSA == 1) || (TRANSA == 3)
#if TRANSA == 1
- if (i > 0) AXPYU_K (i, 0, 0,
+ if (i > 0) AXPYU_K (i, 0, 0,
B[(m - i - 1) * 2 + 0], B[(m - i - 1) * 2 + 1],
a + 2, 1, B + (m - i) * 2, 1, NULL, 0);
#else
- if (i > 0) AXPYC_K(i, 0, 0,
+ if (i > 0) AXPYC_K(i, 0, 0,
B[(m - i - 1) * 2 + 0], B[(m - i - 1) * 2 + 1],
a + 2, 1, B + (m - i) * 2, 1, NULL, 0);
#endif
@@ -110,7 +110,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){
#endif
}
-
+
if (incb != 1) {
COPY_K(m, buffer, 1, b, incb);
diff --git a/driver/level2/ztpmv_U.c b/driver/level2/ztpmv_U.c
index 2ff3bfb56..59708b8b8 100644
--- a/driver/level2/ztpmv_U.c
+++ b/driver/level2/ztpmv_U.c
@@ -41,7 +41,7 @@
#include "common.h"
int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){
-
+
BLASLONG i;
#if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex temp;
@@ -114,7 +114,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){
a += (m - i) * 2;
#endif
}
-
+
if (incb != 1) {
COPY_K(m, buffer, 1, b, incb);
}
diff --git a/driver/level2/ztpsv_L.c b/driver/level2/ztpsv_L.c
index e9317fbdd..3b8e562ce 100644
--- a/driver/level2/ztpsv_L.c
+++ b/driver/level2/ztpsv_L.c
@@ -43,7 +43,7 @@
const static FLOAT dm1 = -1.;
int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){
-
+
BLASLONG i;
#if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex result;
@@ -61,7 +61,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){
}
for (i = 0; i < m; i++) {
-
+
#if (TRANSA == 2) || (TRANSA == 4)
if (i > 0) {
#if TRANSA == 2
@@ -69,7 +69,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){
#else
result = DOTC_K(i, a, 1, B, 1);
#endif
-
+
B[i * COMPSIZE + 0] -= CREAL(result);
B[i * COMPSIZE + 1] -= CIMAG(result);
}
@@ -83,11 +83,11 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){
ar = a[i * COMPSIZE + 0];
ai = a[i * COMPSIZE + 1];
#endif
-
+
if (fabs(ar) >= fabs(ai)){
ratio = ai / ar;
den = 1./(ar * ( 1 + ratio * ratio));
-
+
ar = den;
#if TRANSA < 3
ai = -ratio * den;
@@ -107,7 +107,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){
br = B[i * COMPSIZE + 0];
bi = B[i * COMPSIZE + 1];
-
+
B[i * COMPSIZE + 0] = ar*br - ai*bi;
B[i * COMPSIZE + 1] = ar*bi + ai*br;
#endif
diff --git a/driver/level2/ztpsv_U.c b/driver/level2/ztpsv_U.c
index 54903dc30..601ac2f9d 100644
--- a/driver/level2/ztpsv_U.c
+++ b/driver/level2/ztpsv_U.c
@@ -61,7 +61,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){
a += (m + 1) * m - 2;
for (i = 0; i < m; i++) {
-
+
#if (TRANSA == 2) || (TRANSA == 4)
if (i > 0) {
#if TRANSA == 2
@@ -69,20 +69,20 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){
#else
result = DOTC_K(i, a + 2, 1, B + (m - i) * 2, 1);
#endif
-
+
B[(m - i - 1) * 2 + 0] -= CREAL(result);
B[(m - i - 1) * 2 + 1] -= CIMAG(result);
}
#endif
-
+
#ifndef UNIT
ar = a[0];
ai = a[1];
-
+
if (fabs(ar) >= fabs(ai)){
ratio = ai / ar;
den = 1./(ar * ( 1 + ratio * ratio));
-
+
ar = den;
#if (TRANSA == 1) || (TRANSA == 2)
ai = -ratio * den;
@@ -99,10 +99,10 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){
ai = den;
#endif
}
-
+
br = B[(m - i - 1) * 2 + 0];
bi = B[(m - i - 1) * 2 + 1];
-
+
B[(m - i - 1) * 2 + 0] = ar*br - ai*bi;
B[(m - i - 1) * 2 + 1] = ar*bi + ai*br;
#endif
diff --git a/driver/level2/ztrmv_L.c b/driver/level2/ztrmv_L.c
index 3688f588e..63522cf81 100644
--- a/driver/level2/ztrmv_L.c
+++ b/driver/level2/ztrmv_L.c
@@ -122,7 +122,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *bu
#endif
}
-
+
#if (TRANSA == 2) || (TRANSA == 4)
if (is - min_i > 0){
#if TRANSA == 2
diff --git a/driver/level2/ztrmv_U.c b/driver/level2/ztrmv_U.c
index a9fb6d1d0..8a4494fd7 100644
--- a/driver/level2/ztrmv_U.c
+++ b/driver/level2/ztrmv_U.c
@@ -43,7 +43,7 @@
static FLOAT dp1 = 1.;
int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *buffer){
-
+
BLASLONG i, is, min_i;
#if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex temp;
@@ -61,7 +61,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *bu
}
for (is =0; is < m; is += DTB_ENTRIES){
-
+
min_i = MIN(m - is, DTB_ENTRIES);
#if (TRANSA) == 1 || (TRANSA == 3)
@@ -128,7 +128,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *bu
#endif
}
-
+
#if (TRANSA) == 2 || (TRANSA == 4)
if (m - is > min_i){
#if TRANSA == 2
diff --git a/driver/level2/ztrsv_L.c b/driver/level2/ztrsv_L.c
index f825c61f5..90f1c2c7d 100644
--- a/driver/level2/ztrsv_L.c
+++ b/driver/level2/ztrsv_L.c
@@ -43,7 +43,7 @@
const static FLOAT dm1 = -1.;
int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){
-
+
BLASLONG i, is, min_i;
#if (TRANSA == 2) || (TRANSA == 4)
FLOAT _Complex result;
@@ -100,11 +100,11 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buf
#ifndef UNIT
ar = AA[i * COMPSIZE + 0];
ai = AA[i * COMPSIZE + 1];
-
+
if (fabs(ar) >= fabs(ai)){
ratio = ai / ar;
den = 1./(ar * ( 1 + ratio * ratio));
-
+
ar = den;
#if TRANSA < 3
ai = -ratio * den;
@@ -124,7 +124,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buf
br = BB[i * COMPSIZE + 0];
bi = BB[i * COMPSIZE + 1];
-
+
BB[i * COMPSIZE + 0] = ar*br - ai*bi;
BB[i * COMPSIZE + 1] = ar*bi + ai*br;
#endif
diff --git a/driver/level2/ztrsv_U.c b/driver/level2/ztrsv_U.c
index 3b750a29f..bec8114f3 100644
--- a/driver/level2/ztrsv_U.c
+++ b/driver/level2/ztrsv_U.c
@@ -100,11 +100,11 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buf
#ifndef UNIT
ar = AA[0];
ai = AA[1];
-
+
if (fabs(ar) >= fabs(ai)){
ratio = ai / ar;
den = 1./(ar * ( 1 + ratio * ratio));
-
+
ar = den;
#if TRANSA < 3
ai = -ratio * den;
@@ -124,7 +124,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buf
br = BB[0];
bi = BB[1];
-
+
BB[0] = ar*br - ai*bi;
BB[1] = ar*bi + ai*br;
#endif
diff --git a/driver/level3/Makefile b/driver/level3/Makefile
index 7d7d72339..4c004ee80 100644
--- a/driver/level3/Makefile
+++ b/driver/level3/Makefile
@@ -1094,7 +1094,7 @@ ssymm_LU.$(SUFFIX) : symm_k.c level3.c ../../param.h
ssymm_LL.$(SUFFIX) : symm_k.c level3.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -UCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
ssymm_RU.$(SUFFIX) : symm_k.c level3.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -UCOMPLEX -ULOWER -DRSIDE -DNN $< -o $(@F)
@@ -1106,7 +1106,7 @@ dsymm_LU.$(SUFFIX) : symm_k.c level3.c ../../param.h
dsymm_LL.$(SUFFIX) : symm_k.c level3.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -UCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
dsymm_RU.$(SUFFIX) : symm_k.c level3.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -UCOMPLEX -ULOWER -DRSIDE -DNN $< -o $(@F)
@@ -1118,7 +1118,7 @@ qsymm_LU.$(SUFFIX) : symm_k.c level3.c ../../param.h
qsymm_LL.$(SUFFIX) : symm_k.c level3.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -UCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
qsymm_RU.$(SUFFIX) : symm_k.c level3.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -UCOMPLEX -ULOWER -DRSIDE -DNN $< -o $(@F)
@@ -1130,7 +1130,7 @@ csymm_LU.$(SUFFIX) : symm_k.c level3.c ../../param.h
csymm_LL.$(SUFFIX) : symm_k.c level3.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
csymm_RU.$(SUFFIX) : symm_k.c level3.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -ULOWER -DRSIDE -DNN $< -o $(@F)
@@ -1142,7 +1142,7 @@ zsymm_LU.$(SUFFIX) : symm_k.c level3.c ../../param.h
zsymm_LL.$(SUFFIX) : symm_k.c level3.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
zsymm_RU.$(SUFFIX) : symm_k.c level3.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -ULOWER -DRSIDE -DNN $< -o $(@F)
@@ -1154,7 +1154,7 @@ xsymm_LU.$(SUFFIX) : symm_k.c level3.c ../../param.h
xsymm_LL.$(SUFFIX) : symm_k.c level3.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
xsymm_RU.$(SUFFIX) : symm_k.c level3.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -ULOWER -DRSIDE -DNN $< -o $(@F)
@@ -1166,7 +1166,7 @@ ssymm_thread_LU.$(SUFFIX) : symm_k.c level3_thread.c ../../param.h
ssymm_thread_LL.$(SUFFIX) : symm_k.c level3_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(CFLAGS) -UDOUBLE -UCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
ssymm_thread_RU.$(SUFFIX) : symm_k.c level3_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(CFLAGS) -UDOUBLE -UCOMPLEX -ULOWER -DRSIDE -DNN $< -o $(@F)
@@ -1178,7 +1178,7 @@ dsymm_thread_LU.$(SUFFIX) : symm_k.c level3_thread.c ../../param.h
dsymm_thread_LL.$(SUFFIX) : symm_k.c level3_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(CFLAGS) -DDOUBLE -UCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
dsymm_thread_RU.$(SUFFIX) : symm_k.c level3_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(CFLAGS) -DDOUBLE -UCOMPLEX -ULOWER -DRSIDE -DNN $< -o $(@F)
@@ -1190,7 +1190,7 @@ qsymm_thread_LU.$(SUFFIX) : symm_k.c level3_thread.c ../../param.h
qsymm_thread_LL.$(SUFFIX) : symm_k.c level3_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(CFLAGS) -DXDOUBLE -UCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
qsymm_thread_RU.$(SUFFIX) : symm_k.c level3_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(CFLAGS) -DXDOUBLE -UCOMPLEX -ULOWER -DRSIDE -DNN $< -o $(@F)
@@ -1202,7 +1202,7 @@ csymm_thread_LU.$(SUFFIX) : symm_k.c level3_thread.c ../../param.h
csymm_thread_LL.$(SUFFIX) : symm_k.c level3_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(CFLAGS) -UDOUBLE -DCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
csymm_thread_RU.$(SUFFIX) : symm_k.c level3_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(CFLAGS) -UDOUBLE -DCOMPLEX -ULOWER -DRSIDE -DNN $< -o $(@F)
@@ -1214,7 +1214,7 @@ zsymm_thread_LU.$(SUFFIX) : symm_k.c level3_thread.c ../../param.h
zsymm_thread_LL.$(SUFFIX) : symm_k.c level3_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(CFLAGS) -DDOUBLE -DCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
zsymm_thread_RU.$(SUFFIX) : symm_k.c level3_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(CFLAGS) -DDOUBLE -DCOMPLEX -ULOWER -DRSIDE -DNN $< -o $(@F)
@@ -1226,7 +1226,7 @@ xsymm_thread_LU.$(SUFFIX) : symm_k.c level3_thread.c ../../param.h
xsymm_thread_LL.$(SUFFIX) : symm_k.c level3_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(CFLAGS) -DXDOUBLE -DCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
xsymm_thread_RU.$(SUFFIX) : symm_k.c level3_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(CFLAGS) -DXDOUBLE -DCOMPLEX -ULOWER -DRSIDE -DNN $< -o $(@F)
@@ -1529,7 +1529,7 @@ chemm_LU.$(SUFFIX) : zhemm_k.c ../../param.h
chemm_LL.$(SUFFIX) : zhemm_k.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
chemm_RU.$(SUFFIX) : zhemm_k.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -ULOWER -DRSIDE -DNC $< -o $(@F)
@@ -1541,7 +1541,7 @@ zhemm_LU.$(SUFFIX) : zhemm_k.c ../../param.h
zhemm_LL.$(SUFFIX) : zhemm_k.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
zhemm_RU.$(SUFFIX) : zhemm_k.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -ULOWER -DRSIDE -DNC $< -o $(@F)
@@ -1553,7 +1553,7 @@ xhemm_LU.$(SUFFIX) : zhemm_k.c ../../param.h
xhemm_LL.$(SUFFIX) : zhemm_k.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
xhemm_RU.$(SUFFIX) : zhemm_k.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -ULOWER -DRSIDE -DNC $< -o $(@F)
@@ -1565,7 +1565,7 @@ chemm_thread_LU.$(SUFFIX) : zhemm_k.c level3_thread.c ../../param.h
chemm_thread_LL.$(SUFFIX) : zhemm_k.c level3_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(CFLAGS) -UDOUBLE -DCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
chemm_thread_RU.$(SUFFIX) : zhemm_k.c level3_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(CFLAGS) -UDOUBLE -DCOMPLEX -ULOWER -DRSIDE -DNC $< -o $(@F)
@@ -1577,7 +1577,7 @@ zhemm_thread_LU.$(SUFFIX) : zhemm_k.c level3_thread.c ../../param.h
zhemm_thread_LL.$(SUFFIX) : zhemm_k.c level3_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(CFLAGS) -DDOUBLE -DCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
zhemm_thread_RU.$(SUFFIX) : zhemm_k.c level3_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(CFLAGS) -DDOUBLE -DCOMPLEX -ULOWER -DRSIDE -DNC $< -o $(@F)
@@ -1589,7 +1589,7 @@ xhemm_thread_LU.$(SUFFIX) : zhemm_k.c level3_thread.c ../../param.h
xhemm_thread_LL.$(SUFFIX) : zhemm_k.c level3_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(CFLAGS) -DXDOUBLE -DCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
xhemm_thread_RU.$(SUFFIX) : zhemm_k.c level3_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(CFLAGS) -DXDOUBLE -DCOMPLEX -ULOWER -DRSIDE -DNC $< -o $(@F)
@@ -1776,76 +1776,76 @@ xher2k_kernel_LN.$(SUFFIX) : zher2k_kernel.c
xher2k_kernel_LC.$(SUFFIX) : zher2k_kernel.c
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DLOWER -DCONJ $< -o $(@F)
-cgemm3m_nn.$(SUFFIX) : gemm3m.c gemm3m_level3.c
+cgemm3m_nn.$(SUFFIX) : gemm3m.c gemm3m_level3.c
$(CC) $(CFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DNN $< -o $(@F)
-cgemm3m_nt.$(SUFFIX) : gemm3m.c gemm3m_level3.c
+cgemm3m_nt.$(SUFFIX) : gemm3m.c gemm3m_level3.c
$(CC) $(CFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DNT $< -o $(@F)
-cgemm3m_nr.$(SUFFIX) : gemm3m.c gemm3m_level3.c
+cgemm3m_nr.$(SUFFIX) : gemm3m.c gemm3m_level3.c
$(CC) $(CFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DNR $< -o $(@F)
-cgemm3m_nc.$(SUFFIX) : gemm3m.c gemm3m_level3.c
+cgemm3m_nc.$(SUFFIX) : gemm3m.c gemm3m_level3.c
$(CC) $(CFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DNC $< -o $(@F)
-cgemm3m_tn.$(SUFFIX) : gemm3m.c gemm3m_level3.c
+cgemm3m_tn.$(SUFFIX) : gemm3m.c gemm3m_level3.c
$(CC) $(CFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DTN $< -o $(@F)
-cgemm3m_tt.$(SUFFIX) : gemm3m.c gemm3m_level3.c
+cgemm3m_tt.$(SUFFIX) : gemm3m.c gemm3m_level3.c
$(CC) $(CFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DTT $< -o $(@F)
-cgemm3m_tr.$(SUFFIX) : gemm3m.c gemm3m_level3.c
+cgemm3m_tr.$(SUFFIX) : gemm3m.c gemm3m_level3.c
$(CC) $(CFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DTR $< -o $(@F)
-cgemm3m_tc.$(SUFFIX) : gemm3m.c gemm3m_level3.c
+cgemm3m_tc.$(SUFFIX) : gemm3m.c gemm3m_level3.c
$(CC) $(CFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DTC $< -o $(@F)
-cgemm3m_rn.$(SUFFIX) : gemm3m.c gemm3m_level3.c
+cgemm3m_rn.$(SUFFIX) : gemm3m.c gemm3m_level3.c
$(CC) $(CFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DRN $< -o $(@F)
-cgemm3m_rt.$(SUFFIX) : gemm3m.c gemm3m_level3.c
+cgemm3m_rt.$(SUFFIX) : gemm3m.c gemm3m_level3.c
$(CC) $(CFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DRT $< -o $(@F)
-cgemm3m_rr.$(SUFFIX) : gemm3m.c gemm3m_level3.c
+cgemm3m_rr.$(SUFFIX) : gemm3m.c gemm3m_level3.c
$(CC) $(CFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DRR $< -o $(@F)
-cgemm3m_rc.$(SUFFIX) : gemm3m.c gemm3m_level3.c
+cgemm3m_rc.$(SUFFIX) : gemm3m.c gemm3m_level3.c
$(CC) $(CFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DRC $< -o $(@F)
-cgemm3m_cn.$(SUFFIX) : gemm3m.c gemm3m_level3.c
+cgemm3m_cn.$(SUFFIX) : gemm3m.c gemm3m_level3.c
$(CC) $(CFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DCN $< -o $(@F)
-cgemm3m_ct.$(SUFFIX) : gemm3m.c gemm3m_level3.c
+cgemm3m_ct.$(SUFFIX) : gemm3m.c gemm3m_level3.c
$(CC) $(CFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DCT $< -o $(@F)
-cgemm3m_cr.$(SUFFIX) : gemm3m.c gemm3m_level3.c
+cgemm3m_cr.$(SUFFIX) : gemm3m.c gemm3m_level3.c
$(CC) $(CFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DCR $< -o $(@F)
-cgemm3m_cc.$(SUFFIX) : gemm3m.c gemm3m_level3.c
+cgemm3m_cc.$(SUFFIX) : gemm3m.c gemm3m_level3.c
$(CC) $(CFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DCC $< -o $(@F)
-zgemm3m_nn.$(SUFFIX) : gemm3m.c gemm3m_level3.c
+zgemm3m_nn.$(SUFFIX) : gemm3m.c gemm3m_level3.c
$(CC) $(CFLAGS) $(BLOCKS) -c -DDOUBLE -DCOMPLEX -DNN $< -o $(@F)
-zgemm3m_nt.$(SUFFIX) : gemm3m.c gemm3m_level3.c
+zgemm3m_nt.$(SUFFIX) : gemm3m.c gemm3m_level3.c
$(CC) $(CFLAGS) $(BLOCKS) -c -DDOUBLE -DCOMPLEX -DNT $< -o $(@F)
-zgemm3m_nr.$(SUFFIX) : gemm3m.c gemm3m_level3.c
+zgemm3m_nr.$(SUFFIX) : gemm3m.c gemm3m_level3.c
$(CC) $(CFLAGS) $(BLOCKS) -c -DDOUBLE -DCOMPLEX -DNR $< -o $(@F)
-zgemm3m_nc.$(SUFFIX) : gemm3m.c gemm3m_level3.c
+zgemm3m_nc.$(SUFFIX) : gemm3m.c gemm3m_level3.c
$(CC) $(CFLAGS) $(BLOCKS) -c -DDOUBLE -DCOMPLEX -DNC $< -o $(@F)
-zgemm3m_tn.$(SUFFIX) : gemm3m.c gemm3m_level3.c
+zgemm3m_tn.$(SUFFIX) : gemm3m.c gemm3m_level3.c
$(CC) $(CFLAGS) $(BLOCKS) -c -DDOUBLE -DCOMPLEX -DTN $< -o $(@F)
-zgemm3m_tt.$(SUFFIX) : gemm3m.c gemm3m_level3.c
+zgemm3m_tt.$(SUFFIX) : gemm3m.c gemm3m_level3.c
$(CC) $(CFLAGS) $(BLOCKS) -c -DDOUBLE -DCOMPLEX -DTT $< -o $(@F)
-zgemm3m_tr.$(SUFFIX) : gemm3m.c gemm3m_level3.c
+zgemm3m_tr.$(SUFFIX) : gemm3m.c gemm3m_level3.c
$(CC) $(CFLAGS) $(BLOCKS) -c -DDOUBLE -DCOMPLEX -DTR $< -o $(@F)
-zgemm3m_tc.$(SUFFIX) : gemm3m.c gemm3m_level3.c
+zgemm3m_tc.$(SUFFIX) : gemm3m.c gemm3m_level3.c
$(CC) $(CFLAGS) $(BLOCKS) -c -DDOUBLE -DCOMPLEX -DTC $< -o $(@F)
zgemm3m_rn.$(SUFFIX) : gemm3m.c gemm3m_level3.c
@@ -2078,7 +2078,7 @@ csymm3m_LU.$(SUFFIX) : symm3m_k.c ../../param.h
csymm3m_LL.$(SUFFIX) : symm3m_k.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
csymm3m_RU.$(SUFFIX) : symm3m_k.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -ULOWER -DRSIDE -DNN $< -o $(@F)
@@ -2090,7 +2090,7 @@ zsymm3m_LU.$(SUFFIX) : symm3m_k.c ../../param.h
zsymm3m_LL.$(SUFFIX) : symm3m_k.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
zsymm3m_RU.$(SUFFIX) : symm3m_k.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -ULOWER -DRSIDE -DNN $< -o $(@F)
@@ -2102,7 +2102,7 @@ xsymm3m_LU.$(SUFFIX) : symm3m_k.c ../../param.h
xsymm3m_LL.$(SUFFIX) : symm3m_k.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
xsymm3m_RU.$(SUFFIX) : symm3m_k.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -ULOWER -DRSIDE -DNN $< -o $(@F)
@@ -2114,7 +2114,7 @@ csymm3m_thread_LU.$(SUFFIX) : symm3m_k.c level3_gemm3m_thread.c ../../param.h
csymm3m_thread_LL.$(SUFFIX) : symm3m_k.c level3_gemm3m_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(CFLAGS) -UDOUBLE -DCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
csymm3m_thread_RU.$(SUFFIX) : symm3m_k.c level3_gemm3m_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(CFLAGS) -UDOUBLE -DCOMPLEX -ULOWER -DRSIDE -DNN $< -o $(@F)
@@ -2126,7 +2126,7 @@ zsymm3m_thread_LU.$(SUFFIX) : symm3m_k.c level3_gemm3m_thread.c ../../param.h
zsymm3m_thread_LL.$(SUFFIX) : symm3m_k.c level3_gemm3m_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(CFLAGS) -DDOUBLE -DCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
zsymm3m_thread_RU.$(SUFFIX) : symm3m_k.c level3_gemm3m_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(CFLAGS) -DDOUBLE -DCOMPLEX -ULOWER -DRSIDE -DNN $< -o $(@F)
@@ -2138,7 +2138,7 @@ xsymm3m_thread_LU.$(SUFFIX) : symm3m_k.c level3_gemm3m_thread.c ../../param.h
xsymm3m_thread_LL.$(SUFFIX) : symm3m_k.c level3_gemm3m_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(CFLAGS) -DXDOUBLE -DCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
xsymm3m_thread_RU.$(SUFFIX) : symm3m_k.c level3_gemm3m_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(CFLAGS) -DXDOUBLE -DCOMPLEX -ULOWER -DRSIDE -DNN $< -o $(@F)
@@ -2150,7 +2150,7 @@ chemm3m_LU.$(SUFFIX) : hemm3m_k.c gemm3m_level3.c ../../param.h
chemm3m_LL.$(SUFFIX) : hemm3m_k.c gemm3m_level3.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
chemm3m_RU.$(SUFFIX) : hemm3m_k.c gemm3m_level3.c ../../param.h
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -ULOWER -DRSIDE -DNN $< -o $(@F)
@@ -2162,7 +2162,7 @@ zhemm3m_LU.$(SUFFIX) : hemm3m_k.c gemm3m_level3.c ../../param.h
zhemm3m_LL.$(SUFFIX) : hemm3m_k.c gemm3m_level3.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
zhemm3m_RU.$(SUFFIX) : hemm3m_k.c gemm3m_level3.c ../../param.h
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -ULOWER -DRSIDE -DNN $< -o $(@F)
@@ -2174,7 +2174,7 @@ xhemm3m_LU.$(SUFFIX) : hemm3m_k.c gemm3m_level3.c ../../param.h
xhemm3m_LL.$(SUFFIX) : hemm3m_k.c gemm3m_level3.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
xhemm3m_RU.$(SUFFIX) : hemm3m_k.c gemm3m_level3.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -ULOWER -DRSIDE -DNN $< -o $(@F)
@@ -2186,7 +2186,7 @@ chemm3m_thread_LU.$(SUFFIX) : hemm3m_k.c level3_gemm3m_thread.c ../../param.h
chemm3m_thread_LL.$(SUFFIX) : hemm3m_k.c level3_gemm3m_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(CFLAGS) -UDOUBLE -DCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
chemm3m_thread_RU.$(SUFFIX) : hemm3m_k.c level3_gemm3m_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(CFLAGS) -UDOUBLE -DCOMPLEX -ULOWER -DRSIDE -DNN $< -o $(@F)
@@ -2198,7 +2198,7 @@ zhemm3m_thread_LU.$(SUFFIX) : hemm3m_k.c level3_gemm3m_thread.c ../../param.h
zhemm3m_thread_LL.$(SUFFIX) : hemm3m_k.c level3_gemm3m_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(CFLAGS) -DDOUBLE -DCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
zhemm3m_thread_RU.$(SUFFIX) : hemm3m_k.c level3_gemm3m_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(CFLAGS) -DDOUBLE -DCOMPLEX -ULOWER -DRSIDE -DNN $< -o $(@F)
@@ -2210,7 +2210,7 @@ xhemm3m_thread_LU.$(SUFFIX) : hemm3m_k.c level3_gemm3m_thread.c ../../param.h
xhemm3m_thread_LL.$(SUFFIX) : hemm3m_k.c level3_gemm3m_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(CFLAGS) -DXDOUBLE -DCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
xhemm3m_thread_RU.$(SUFFIX) : hemm3m_k.c level3_gemm3m_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(CFLAGS) -DXDOUBLE -DCOMPLEX -ULOWER -DRSIDE -DNN $< -o $(@F)
@@ -3463,7 +3463,7 @@ ssymm_LU.$(PSUFFIX) : symm_k.c level3.c ../../param.h
ssymm_LL.$(PSUFFIX) : symm_k.c level3.c ../../param.h
$(CC) -c $(PFLAGS) -UDOUBLE -UCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
ssymm_RU.$(PSUFFIX) : symm_k.c level3.c ../../param.h
$(CC) -c $(PFLAGS) -UDOUBLE -UCOMPLEX -ULOWER -DRSIDE -DNN $< -o $(@F)
@@ -3475,7 +3475,7 @@ dsymm_LU.$(PSUFFIX) : symm_k.c level3.c ../../param.h
dsymm_LL.$(PSUFFIX) : symm_k.c level3.c ../../param.h
$(CC) -c $(PFLAGS) -DDOUBLE -UCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
dsymm_RU.$(PSUFFIX) : symm_k.c level3.c ../../param.h
$(CC) -c $(PFLAGS) -DDOUBLE -UCOMPLEX -ULOWER -DRSIDE -DNN $< -o $(@F)
@@ -3487,7 +3487,7 @@ qsymm_LU.$(PSUFFIX) : symm_k.c level3.c ../../param.h
qsymm_LL.$(PSUFFIX) : symm_k.c level3.c ../../param.h
$(CC) -c $(PFLAGS) -DXDOUBLE -UCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
qsymm_RU.$(PSUFFIX) : symm_k.c level3.c ../../param.h
$(CC) -c $(PFLAGS) -DXDOUBLE -UCOMPLEX -ULOWER -DRSIDE -DNN $< -o $(@F)
@@ -3499,7 +3499,7 @@ csymm_LU.$(PSUFFIX) : symm_k.c level3.c ../../param.h
csymm_LL.$(PSUFFIX) : symm_k.c level3.c ../../param.h
$(CC) -c $(PFLAGS) -UDOUBLE -DCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
csymm_RU.$(PSUFFIX) : symm_k.c level3.c ../../param.h
$(CC) -c $(PFLAGS) -UDOUBLE -DCOMPLEX -ULOWER -DRSIDE -DNN $< -o $(@F)
@@ -3511,7 +3511,7 @@ zsymm_LU.$(PSUFFIX) : symm_k.c level3.c ../../param.h
zsymm_LL.$(PSUFFIX) : symm_k.c level3.c ../../param.h
$(CC) -c $(PFLAGS) -DDOUBLE -DCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
zsymm_RU.$(PSUFFIX) : symm_k.c level3.c ../../param.h
$(CC) -c $(PFLAGS) -DDOUBLE -DCOMPLEX -ULOWER -DRSIDE -DNN $< -o $(@F)
@@ -3523,7 +3523,7 @@ xsymm_LU.$(PSUFFIX) : symm_k.c level3.c ../../param.h
xsymm_LL.$(PSUFFIX) : symm_k.c level3.c ../../param.h
$(CC) -c $(PFLAGS) -DXDOUBLE -DCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
xsymm_RU.$(PSUFFIX) : symm_k.c level3.c ../../param.h
$(CC) -c $(PFLAGS) -DXDOUBLE -DCOMPLEX -ULOWER -DRSIDE -DNN $< -o $(@F)
@@ -3535,7 +3535,7 @@ ssymm_thread_LU.$(PSUFFIX) : symm_k.c level3_thread.c ../../param.h
ssymm_thread_LL.$(PSUFFIX) : symm_k.c level3_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(PFLAGS) -UDOUBLE -UCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
ssymm_thread_RU.$(PSUFFIX) : symm_k.c level3_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(PFLAGS) -UDOUBLE -UCOMPLEX -ULOWER -DRSIDE -DNN $< -o $(@F)
@@ -3547,7 +3547,7 @@ dsymm_thread_LU.$(PSUFFIX) : symm_k.c level3_thread.c ../../param.h
dsymm_thread_LL.$(PSUFFIX) : symm_k.c level3_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(PFLAGS) -DDOUBLE -UCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
dsymm_thread_RU.$(PSUFFIX) : symm_k.c level3_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(PFLAGS) -DDOUBLE -UCOMPLEX -ULOWER -DRSIDE -DNN $< -o $(@F)
@@ -3559,7 +3559,7 @@ qsymm_thread_LU.$(PSUFFIX) : symm_k.c level3_thread.c ../../param.h
qsymm_thread_LL.$(PSUFFIX) : symm_k.c level3_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(PFLAGS) -DXDOUBLE -UCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
qsymm_thread_RU.$(PSUFFIX) : symm_k.c level3_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(PFLAGS) -DXDOUBLE -UCOMPLEX -ULOWER -DRSIDE -DNN $< -o $(@F)
@@ -3571,7 +3571,7 @@ csymm_thread_LU.$(PSUFFIX) : symm_k.c level3_thread.c ../../param.h
csymm_thread_LL.$(PSUFFIX) : symm_k.c level3_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(PFLAGS) -UDOUBLE -DCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
csymm_thread_RU.$(PSUFFIX) : symm_k.c level3_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(PFLAGS) -UDOUBLE -DCOMPLEX -ULOWER -DRSIDE -DNN $< -o $(@F)
@@ -3583,7 +3583,7 @@ zsymm_thread_LU.$(PSUFFIX) : symm_k.c level3_thread.c ../../param.h
zsymm_thread_LL.$(PSUFFIX) : symm_k.c level3_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(PFLAGS) -DDOUBLE -DCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
zsymm_thread_RU.$(PSUFFIX) : symm_k.c level3_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(PFLAGS) -DDOUBLE -DCOMPLEX -ULOWER -DRSIDE -DNN $< -o $(@F)
@@ -3595,7 +3595,7 @@ xsymm_thread_LU.$(PSUFFIX) : symm_k.c level3_thread.c ../../param.h
xsymm_thread_LL.$(PSUFFIX) : symm_k.c level3_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(PFLAGS) -DXDOUBLE -DCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
xsymm_thread_RU.$(PSUFFIX) : symm_k.c level3_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(PFLAGS) -DXDOUBLE -DCOMPLEX -ULOWER -DRSIDE -DNN $< -o $(@F)
@@ -3898,7 +3898,7 @@ chemm_LU.$(PSUFFIX) : zhemm_k.c ../../param.h
chemm_LL.$(PSUFFIX) : zhemm_k.c ../../param.h
$(CC) -c $(PFLAGS) -UDOUBLE -DCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
chemm_RU.$(PSUFFIX) : zhemm_k.c ../../param.h
$(CC) -c $(PFLAGS) -UDOUBLE -DCOMPLEX -ULOWER -DRSIDE -DNC $< -o $(@F)
@@ -3910,7 +3910,7 @@ zhemm_LU.$(PSUFFIX) : zhemm_k.c ../../param.h
zhemm_LL.$(PSUFFIX) : zhemm_k.c ../../param.h
$(CC) -c $(PFLAGS) -DDOUBLE -DCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
zhemm_RU.$(PSUFFIX) : zhemm_k.c ../../param.h
$(CC) -c $(PFLAGS) -DDOUBLE -DCOMPLEX -ULOWER -DRSIDE -DNC $< -o $(@F)
@@ -3922,7 +3922,7 @@ xhemm_LU.$(PSUFFIX) : zhemm_k.c ../../param.h
xhemm_LL.$(PSUFFIX) : zhemm_k.c ../../param.h
$(CC) -c $(PFLAGS) -DXDOUBLE -DCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
xhemm_RU.$(PSUFFIX) : zhemm_k.c ../../param.h
$(CC) -c $(PFLAGS) -DXDOUBLE -DCOMPLEX -ULOWER -DRSIDE -DNC $< -o $(@F)
@@ -3934,7 +3934,7 @@ chemm_thread_LU.$(PSUFFIX) : zhemm_k.c level3_thread.c ../../param.h
chemm_thread_LL.$(PSUFFIX) : zhemm_k.c level3_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(PFLAGS) -UDOUBLE -DCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
chemm_thread_RU.$(PSUFFIX) : zhemm_k.c level3_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(PFLAGS) -UDOUBLE -DCOMPLEX -ULOWER -DRSIDE -DNC $< -o $(@F)
@@ -3946,7 +3946,7 @@ zhemm_thread_LU.$(PSUFFIX) : zhemm_k.c level3_thread.c ../../param.h
zhemm_thread_LL.$(PSUFFIX) : zhemm_k.c level3_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(PFLAGS) -DDOUBLE -DCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
zhemm_thread_RU.$(PSUFFIX) : zhemm_k.c level3_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(PFLAGS) -DDOUBLE -DCOMPLEX -ULOWER -DRSIDE -DNC $< -o $(@F)
@@ -3958,7 +3958,7 @@ xhemm_thread_LU.$(PSUFFIX) : zhemm_k.c level3_thread.c ../../param.h
xhemm_thread_LL.$(PSUFFIX) : zhemm_k.c level3_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(PFLAGS) -DXDOUBLE -DCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
xhemm_thread_RU.$(PSUFFIX) : zhemm_k.c level3_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(PFLAGS) -DXDOUBLE -DCOMPLEX -ULOWER -DRSIDE -DNC $< -o $(@F)
@@ -4145,76 +4145,76 @@ xher2k_kernel_LN.$(PSUFFIX) : zher2k_kernel.c
xher2k_kernel_LC.$(PSUFFIX) : zher2k_kernel.c
$(CC) -c $(PFLAGS) -DXDOUBLE -DCOMPLEX -DLOWER -DCONJ $< -o $(@F)
-cgemm3m_nn.$(PSUFFIX) : gemm3m.c gemm3m_level3.c
+cgemm3m_nn.$(PSUFFIX) : gemm3m.c gemm3m_level3.c
$(CC) $(PFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DNN $< -o $(@F)
-cgemm3m_nt.$(PSUFFIX) : gemm3m.c gemm3m_level3.c
+cgemm3m_nt.$(PSUFFIX) : gemm3m.c gemm3m_level3.c
$(CC) $(PFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DNT $< -o $(@F)
-cgemm3m_nr.$(PSUFFIX) : gemm3m.c gemm3m_level3.c
+cgemm3m_nr.$(PSUFFIX) : gemm3m.c gemm3m_level3.c
$(CC) $(PFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DNR $< -o $(@F)
-cgemm3m_nc.$(PSUFFIX) : gemm3m.c gemm3m_level3.c
+cgemm3m_nc.$(PSUFFIX) : gemm3m.c gemm3m_level3.c
$(CC) $(PFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DNC $< -o $(@F)
-cgemm3m_tn.$(PSUFFIX) : gemm3m.c gemm3m_level3.c
+cgemm3m_tn.$(PSUFFIX) : gemm3m.c gemm3m_level3.c
$(CC) $(PFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DTN $< -o $(@F)
-cgemm3m_tt.$(PSUFFIX) : gemm3m.c gemm3m_level3.c
+cgemm3m_tt.$(PSUFFIX) : gemm3m.c gemm3m_level3.c
$(CC) $(PFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DTT $< -o $(@F)
-cgemm3m_tr.$(PSUFFIX) : gemm3m.c gemm3m_level3.c
+cgemm3m_tr.$(PSUFFIX) : gemm3m.c gemm3m_level3.c
$(CC) $(PFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DTR $< -o $(@F)
-cgemm3m_tc.$(PSUFFIX) : gemm3m.c gemm3m_level3.c
+cgemm3m_tc.$(PSUFFIX) : gemm3m.c gemm3m_level3.c
$(CC) $(PFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DTC $< -o $(@F)
-cgemm3m_rn.$(PSUFFIX) : gemm3m.c gemm3m_level3.c
+cgemm3m_rn.$(PSUFFIX) : gemm3m.c gemm3m_level3.c
$(CC) $(PFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DRN $< -o $(@F)
-cgemm3m_rt.$(PSUFFIX) : gemm3m.c gemm3m_level3.c
+cgemm3m_rt.$(PSUFFIX) : gemm3m.c gemm3m_level3.c
$(CC) $(PFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DRT $< -o $(@F)
-cgemm3m_rr.$(PSUFFIX) : gemm3m.c gemm3m_level3.c
+cgemm3m_rr.$(PSUFFIX) : gemm3m.c gemm3m_level3.c
$(CC) $(PFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DRR $< -o $(@F)
-cgemm3m_rc.$(PSUFFIX) : gemm3m.c gemm3m_level3.c
+cgemm3m_rc.$(PSUFFIX) : gemm3m.c gemm3m_level3.c
$(CC) $(PFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DRC $< -o $(@F)
-cgemm3m_cn.$(PSUFFIX) : gemm3m.c gemm3m_level3.c
+cgemm3m_cn.$(PSUFFIX) : gemm3m.c gemm3m_level3.c
$(CC) $(PFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DCN $< -o $(@F)
-cgemm3m_ct.$(PSUFFIX) : gemm3m.c gemm3m_level3.c
+cgemm3m_ct.$(PSUFFIX) : gemm3m.c gemm3m_level3.c
$(CC) $(PFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DCT $< -o $(@F)
-cgemm3m_cr.$(PSUFFIX) : gemm3m.c gemm3m_level3.c
+cgemm3m_cr.$(PSUFFIX) : gemm3m.c gemm3m_level3.c
$(CC) $(PFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DCR $< -o $(@F)
-cgemm3m_cc.$(PSUFFIX) : gemm3m.c gemm3m_level3.c
+cgemm3m_cc.$(PSUFFIX) : gemm3m.c gemm3m_level3.c
$(CC) $(PFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DCC $< -o $(@F)
-zgemm3m_nn.$(PSUFFIX) : gemm3m.c gemm3m_level3.c
+zgemm3m_nn.$(PSUFFIX) : gemm3m.c gemm3m_level3.c
$(CC) $(PFLAGS) $(BLOCKS) -c -DDOUBLE -DCOMPLEX -DNN $< -o $(@F)
-zgemm3m_nt.$(PSUFFIX) : gemm3m.c gemm3m_level3.c
+zgemm3m_nt.$(PSUFFIX) : gemm3m.c gemm3m_level3.c
$(CC) $(PFLAGS) $(BLOCKS) -c -DDOUBLE -DCOMPLEX -DNT $< -o $(@F)
-zgemm3m_nr.$(PSUFFIX) : gemm3m.c gemm3m_level3.c
+zgemm3m_nr.$(PSUFFIX) : gemm3m.c gemm3m_level3.c
$(CC) $(PFLAGS) $(BLOCKS) -c -DDOUBLE -DCOMPLEX -DNR $< -o $(@F)
-zgemm3m_nc.$(PSUFFIX) : gemm3m.c gemm3m_level3.c
+zgemm3m_nc.$(PSUFFIX) : gemm3m.c gemm3m_level3.c
$(CC) $(PFLAGS) $(BLOCKS) -c -DDOUBLE -DCOMPLEX -DNC $< -o $(@F)
-zgemm3m_tn.$(PSUFFIX) : gemm3m.c gemm3m_level3.c
+zgemm3m_tn.$(PSUFFIX) : gemm3m.c gemm3m_level3.c
$(CC) $(PFLAGS) $(BLOCKS) -c -DDOUBLE -DCOMPLEX -DTN $< -o $(@F)
-zgemm3m_tt.$(PSUFFIX) : gemm3m.c gemm3m_level3.c
+zgemm3m_tt.$(PSUFFIX) : gemm3m.c gemm3m_level3.c
$(CC) $(PFLAGS) $(BLOCKS) -c -DDOUBLE -DCOMPLEX -DTT $< -o $(@F)
-zgemm3m_tr.$(PSUFFIX) : gemm3m.c gemm3m_level3.c
+zgemm3m_tr.$(PSUFFIX) : gemm3m.c gemm3m_level3.c
$(CC) $(PFLAGS) $(BLOCKS) -c -DDOUBLE -DCOMPLEX -DTR $< -o $(@F)
-zgemm3m_tc.$(PSUFFIX) : gemm3m.c gemm3m_level3.c
+zgemm3m_tc.$(PSUFFIX) : gemm3m.c gemm3m_level3.c
$(CC) $(PFLAGS) $(BLOCKS) -c -DDOUBLE -DCOMPLEX -DTC $< -o $(@F)
zgemm3m_rn.$(PSUFFIX) : gemm3m.c gemm3m_level3.c
@@ -4447,7 +4447,7 @@ csymm3m_LU.$(PSUFFIX) : symm3m_k.c ../../param.h
csymm3m_LL.$(PSUFFIX) : symm3m_k.c ../../param.h
$(CC) -c $(PFLAGS) -UDOUBLE -DCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
csymm3m_RU.$(PSUFFIX) : symm3m_k.c ../../param.h
$(CC) -c $(PFLAGS) -UDOUBLE -DCOMPLEX -ULOWER -DRSIDE -DNN $< -o $(@F)
@@ -4459,7 +4459,7 @@ zsymm3m_LU.$(PSUFFIX) : symm3m_k.c ../../param.h
zsymm3m_LL.$(PSUFFIX) : symm3m_k.c ../../param.h
$(CC) -c $(PFLAGS) -DDOUBLE -DCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
zsymm3m_RU.$(PSUFFIX) : symm3m_k.c ../../param.h
$(CC) -c $(PFLAGS) -DDOUBLE -DCOMPLEX -ULOWER -DRSIDE -DNN $< -o $(@F)
@@ -4471,7 +4471,7 @@ xsymm3m_LU.$(PSUFFIX) : symm3m_k.c ../../param.h
xsymm3m_LL.$(PSUFFIX) : symm3m_k.c ../../param.h
$(CC) -c $(PFLAGS) -DXDOUBLE -DCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
xsymm3m_RU.$(PSUFFIX) : symm3m_k.c ../../param.h
$(CC) -c $(PFLAGS) -DXDOUBLE -DCOMPLEX -ULOWER -DRSIDE -DNN $< -o $(@F)
@@ -4483,7 +4483,7 @@ csymm3m_thread_LU.$(PSUFFIX) : symm3m_k.c level3_gemm3m_thread.c ../../param.h
csymm3m_thread_LL.$(PSUFFIX) : symm3m_k.c level3_gemm3m_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(PFLAGS) -UDOUBLE -DCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
csymm3m_thread_RU.$(PSUFFIX) : symm3m_k.c level3_gemm3m_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(PFLAGS) -UDOUBLE -DCOMPLEX -ULOWER -DRSIDE -DNN $< -o $(@F)
@@ -4495,7 +4495,7 @@ zsymm3m_thread_LU.$(PSUFFIX) : symm3m_k.c level3_gemm3m_thread.c ../../param.h
zsymm3m_thread_LL.$(PSUFFIX) : symm3m_k.c level3_gemm3m_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(PFLAGS) -DDOUBLE -DCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
zsymm3m_thread_RU.$(PSUFFIX) : symm3m_k.c level3_gemm3m_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(PFLAGS) -DDOUBLE -DCOMPLEX -ULOWER -DRSIDE -DNN $< -o $(@F)
@@ -4507,7 +4507,7 @@ xsymm3m_thread_LU.$(PSUFFIX) : symm3m_k.c level3_gemm3m_thread.c ../../param.h
xsymm3m_thread_LL.$(PSUFFIX) : symm3m_k.c level3_gemm3m_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(PFLAGS) -DXDOUBLE -DCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
xsymm3m_thread_RU.$(PSUFFIX) : symm3m_k.c level3_gemm3m_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(PFLAGS) -DXDOUBLE -DCOMPLEX -ULOWER -DRSIDE -DNN $< -o $(@F)
@@ -4519,7 +4519,7 @@ chemm3m_LU.$(PSUFFIX) : hemm3m_k.c gemm3m_level3.c ../../param.h
chemm3m_LL.$(PSUFFIX) : hemm3m_k.c gemm3m_level3.c ../../param.h
$(CC) -c $(PFLAGS) -UDOUBLE -DCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
chemm3m_RU.$(PSUFFIX) : hemm3m_k.c gemm3m_level3.c ../../param.h
$(CC) -c $(PFLAGS) -UDOUBLE -DCOMPLEX -ULOWER -DRSIDE -DNN $< -o $(@F)
@@ -4531,7 +4531,7 @@ zhemm3m_LU.$(PSUFFIX) : hemm3m_k.c gemm3m_level3.c ../../param.h
zhemm3m_LL.$(PSUFFIX) : hemm3m_k.c gemm3m_level3.c ../../param.h
$(CC) -c $(PFLAGS) -DDOUBLE -DCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
zhemm3m_RU.$(PSUFFIX) : hemm3m_k.c gemm3m_level3.c ../../param.h
$(CC) -c $(PFLAGS) -DDOUBLE -DCOMPLEX -ULOWER -DRSIDE -DNN $< -o $(@F)
@@ -4543,7 +4543,7 @@ xhemm3m_LU.$(PSUFFIX) : hemm3m_k.c gemm3m_level3.c ../../param.h
xhemm3m_LL.$(PSUFFIX) : hemm3m_k.c gemm3m_level3.c ../../param.h
$(CC) -c $(PFLAGS) -DXDOUBLE -DCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
xhemm3m_RU.$(PSUFFIX) : hemm3m_k.c gemm3m_level3.c ../../param.h
$(CC) -c $(PFLAGS) -DXDOUBLE -DCOMPLEX -ULOWER -DRSIDE -DNN $< -o $(@F)
@@ -4555,7 +4555,7 @@ chemm3m_thread_LU.$(PSUFFIX) : hemm3m_k.c level3_gemm3m_thread.c ../../param.h
chemm3m_thread_LL.$(PSUFFIX) : hemm3m_k.c level3_gemm3m_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(PFLAGS) -UDOUBLE -DCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
chemm3m_thread_RU.$(PSUFFIX) : hemm3m_k.c level3_gemm3m_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(PFLAGS) -UDOUBLE -DCOMPLEX -ULOWER -DRSIDE -DNN $< -o $(@F)
@@ -4567,7 +4567,7 @@ zhemm3m_thread_LU.$(PSUFFIX) : hemm3m_k.c level3_gemm3m_thread.c ../../param.h
zhemm3m_thread_LL.$(PSUFFIX) : hemm3m_k.c level3_gemm3m_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(PFLAGS) -DDOUBLE -DCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
zhemm3m_thread_RU.$(PSUFFIX) : hemm3m_k.c level3_gemm3m_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(PFLAGS) -DDOUBLE -DCOMPLEX -ULOWER -DRSIDE -DNN $< -o $(@F)
@@ -4579,7 +4579,7 @@ xhemm3m_thread_LU.$(PSUFFIX) : hemm3m_k.c level3_gemm3m_thread.c ../../param.h
xhemm3m_thread_LL.$(PSUFFIX) : hemm3m_k.c level3_gemm3m_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(PFLAGS) -DXDOUBLE -DCOMPLEX -DLOWER -URSIDE -DNN $< -o $(@F)
-
+
xhemm3m_thread_RU.$(PSUFFIX) : hemm3m_k.c level3_gemm3m_thread.c ../../param.h
$(CC) -c -DTHREADED_LEVEL3 $(PFLAGS) -DXDOUBLE -DCOMPLEX -ULOWER -DRSIDE -DNN $< -o $(@F)
diff --git a/driver/level3/gemm3m_level3.c b/driver/level3/gemm3m_level3.c
index df4d723ab..064968298 100644
--- a/driver/level3/gemm3m_level3.c
+++ b/driver/level3/gemm3m_level3.c
@@ -306,10 +306,10 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
for(js = n_from; js < n_to; js += GEMM3M_R){
min_j = n_to - js;
if (min_j > GEMM3M_R) min_j = GEMM3M_R;
-
+
for(ls = 0; ls < k; ls += min_l){
min_l = k - ls;
-
+
if (min_l >= GEMM3M_Q * 2) {
min_l = GEMM3M_Q;
} else {
@@ -320,7 +320,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
#endif
}
}
-
+
min_i = m_to - m_from;
if (min_i >= GEMM3M_P * 2) {
min_i = GEMM3M_P;
@@ -331,53 +331,53 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
}
START_RPCC();
-
+
ICOPYB_OPERATION(min_l, min_i, a, lda, ls, m_from, sa);
-
+
STOP_RPCC(innercost);
-
+
for(jjs = js; jjs < js + min_j; jjs += min_jj){
min_jj = min_j + js - jjs;
if (min_jj > GEMM3M_UNROLL_N) min_jj = GEMM3M_UNROLL_N;
-
+
START_RPCC();
-
+
#if defined(NN) || defined(NT) || defined(TN) || defined(TT) || defined(RN) || defined(RT) || defined(CN) || defined(CT)
OCOPYB_OPERATION(min_l, min_jj, b, ldb, alpha[0], alpha[1], ls, jjs, sb + min_l * (jjs - js));
#else
OCOPYB_OPERATION(min_l, min_jj, b, ldb, alpha[0], -alpha[1], ls, jjs, sb + min_l * (jjs - js));
#endif
-
+
STOP_RPCC(outercost);
-
+
START_RPCC();
-
+
KERNEL_OPERATION(min_i, min_jj, min_l, ALPHA5, ALPHA6,
sa, sb + min_l * (jjs - js), c, ldc, m_from, jjs);
-
+
STOP_RPCC(kernelcost);
-
- }
-
+
+ }
+
for(is = m_from + min_i; is < m_to; is += min_i){
min_i = m_to - is;
if (min_i >= GEMM3M_P * 2) {
min_i = GEMM3M_P;
- } else
+ } else
if (min_i > GEMM3M_P) {
min_i = (min_i / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1);
}
-
+
START_RPCC();
-
+
ICOPYB_OPERATION(min_l, min_i, a, lda, ls, is, sa);
-
+
STOP_RPCC(innercost);
-
+
START_RPCC();
-
+
KERNEL_OPERATION(min_i, min_j, min_l, ALPHA5, ALPHA6, sa, sb, c, ldc, is, js);
-
+
STOP_RPCC(kernelcost);
}
@@ -389,19 +389,19 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
min_i = (min_i / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1);
}
}
-
+
START_RPCC();
-
+
ICOPYR_OPERATION(min_l, min_i, a, lda, ls, m_from, sa);
-
+
STOP_RPCC(innercost);
-
+
for(jjs = js; jjs < js + min_j; jjs += min_jj){
min_jj = min_j + js - jjs;
if (min_jj > GEMM3M_UNROLL_N) min_jj = GEMM3M_UNROLL_N;
-
+
START_RPCC();
-
+
#if defined(NN) || defined(NT) || defined(TN) || defined(TT)
OCOPYR_OPERATION(min_l, min_jj, b, ldb, alpha[0], alpha[1], ls, jjs, sb + min_l * (jjs - js));
#elif defined(RR) || defined(RC) || defined(CR) || defined(CC)
@@ -413,37 +413,37 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
#endif
STOP_RPCC(outercost);
-
+
START_RPCC();
-
+
KERNEL_OPERATION(min_i, min_jj, min_l, ALPHA11, ALPHA12,
sa, sb + min_l * (jjs - js), c, ldc, m_from, jjs);
-
+
STOP_RPCC(kernelcost);
-
- }
-
+
+ }
+
for(is = m_from + min_i; is < m_to; is += min_i){
min_i = m_to - is;
if (min_i >= GEMM3M_P * 2) {
min_i = GEMM3M_P;
- } else
+ } else
if (min_i > GEMM3M_P) {
min_i = (min_i / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1);
}
-
+
START_RPCC();
-
+
ICOPYR_OPERATION(min_l, min_i, a, lda, ls, is, sa);
-
+
STOP_RPCC(innercost);
-
+
START_RPCC();
-
+
KERNEL_OPERATION(min_i, min_j, min_l, ALPHA11, ALPHA12, sa, sb, c, ldc, is, js);
-
+
STOP_RPCC(kernelcost);
-
+
}
min_i = m_to - m_from;
@@ -454,20 +454,20 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
min_i = (min_i / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1);
}
}
-
+
START_RPCC();
-
+
ICOPYI_OPERATION(min_l, min_i, a, lda, ls, m_from, sa);
-
+
STOP_RPCC(innercost);
-
+
for(jjs = js; jjs < js + min_j; jjs += min_jj){
min_jj = min_j + js - jjs;
if (min_jj > GEMM3M_UNROLL_N) min_jj = GEMM3M_UNROLL_N;
-
+
START_RPCC();
-
-#if defined(NN) || defined(NT) || defined(TN) || defined(TT)
+
+#if defined(NN) || defined(NT) || defined(TN) || defined(TT)
OCOPYI_OPERATION(min_l, min_jj, b, ldb, alpha[0], alpha[1], ls, jjs, sb + min_l * (jjs - js));
#elif defined(RR) || defined(RC) || defined(CR) || defined(CC)
OCOPYI_OPERATION(min_l, min_jj, b, ldb, alpha[0], -alpha[1], ls, jjs, sb + min_l * (jjs - js));
@@ -478,42 +478,42 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
#endif
STOP_RPCC(outercost);
-
+
START_RPCC();
-
+
KERNEL_OPERATION(min_i, min_jj, min_l, ALPHA17, ALPHA18,
sa, sb + min_l * (jjs - js), c, ldc, m_from, jjs);
-
+
STOP_RPCC(kernelcost);
-
- }
-
+
+ }
+
for(is = m_from + min_i; is < m_to; is += min_i){
min_i = m_to - is;
if (min_i >= GEMM3M_P * 2) {
min_i = GEMM3M_P;
- } else
+ } else
if (min_i > GEMM3M_P) {
min_i = (min_i / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1);
}
-
+
START_RPCC();
-
+
ICOPYI_OPERATION(min_l, min_i, a, lda, ls, is, sa);
-
+
STOP_RPCC(innercost);
-
+
START_RPCC();
-
+
KERNEL_OPERATION(min_i, min_j, min_l, ALPHA17, ALPHA18, sa, sb, c, ldc, is, js);
-
+
STOP_RPCC(kernelcost);
-
+
}
} /* end of js */
} /* end of ls */
-
+
#ifdef TIMING
total = (double)outercost + (double)innercost + (double)kernelcost;
@@ -526,6 +526,6 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
((double)(m_to - m_from) * (double)(n_to - n_from) * (double)k) / (double)kernelcost / 2 * 100,
2400. * (2. * (double)(m_to - m_from) * (double)(n_to - n_from) * (double)k) / (double)kernelcost);
#endif
-
+
return 0;
}
diff --git a/driver/level3/gemm_thread_m.c b/driver/level3/gemm_thread_m.c
index 52c9b2d3e..8813e5529 100644
--- a/driver/level3/gemm_thread_m.c
+++ b/driver/level3/gemm_thread_m.c
@@ -58,7 +58,7 @@ int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (
num_cpu = 0;
while (i > 0){
-
+
width = blas_quickdivide(i + nthreads - num_cpu - 1, nthreads - num_cpu);
i -= width;
@@ -76,15 +76,15 @@ int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (
queue[num_cpu].next = &queue[num_cpu + 1];
num_cpu ++;
}
-
+
if (num_cpu) {
queue[0].sa = sa;
queue[0].sb = sb;
queue[num_cpu - 1].next = NULL;
-
+
exec_blas(num_cpu, queue);
}
-
+
return 0;
}
diff --git a/driver/level3/gemm_thread_mn.c b/driver/level3/gemm_thread_mn.c
index b81c6fa40..2966eac82 100644
--- a/driver/level3/gemm_thread_mn.c
+++ b/driver/level3/gemm_thread_mn.c
@@ -40,7 +40,7 @@
#include <stdlib.h>
#include "common.h"
-static const int divide_rule[][2] =
+static const int divide_rule[][2] =
{{ 0, 0},
{ 1, 1}, { 1, 2}, { 1, 3}, { 2, 2},
{ 1, 5}, { 2, 3}, { 1, 7}, { 2, 4},
@@ -84,7 +84,7 @@ int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (
num_cpu_m = 0;
while (i > 0){
-
+
width = blas_quickdivide(i + divM - num_cpu_m - 1, divM - num_cpu_m);
i -= width;
@@ -106,7 +106,7 @@ int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (
num_cpu_n = 0;
while (i > 0){
-
+
width = blas_quickdivide(i + divN - num_cpu_n - 1, divN - num_cpu_n);
i -= width;
@@ -134,15 +134,15 @@ int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (
procs ++;
}
}
-
+
if (procs) {
queue[0].sa = sa;
queue[0].sb = sb;
queue[procs - 1].next = NULL;
-
+
exec_blas(procs, queue);
}
-
+
return 0;
}
diff --git a/driver/level3/gemm_thread_n.c b/driver/level3/gemm_thread_n.c
index 3e11f9aba..9668841bb 100644
--- a/driver/level3/gemm_thread_n.c
+++ b/driver/level3/gemm_thread_n.c
@@ -54,11 +54,11 @@ int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (
range[0] = range_n[0];
i = range_n[1] - range_n[0];
}
-
+
num_cpu = 0;
while (i > 0){
-
+
width = blas_quickdivide(i + nthreads - num_cpu - 1, nthreads - num_cpu);
i -= width;
@@ -81,7 +81,7 @@ int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (
queue[num_cpu].next = &queue[num_cpu + 1];
num_cpu ++;
}
-
+
if (num_cpu) {
#if 0 //defined(LOONGSON3A)
queue[0].sa = sa;
@@ -91,10 +91,10 @@ int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (
queue[0].sb = sb;
#endif
queue[num_cpu - 1].next = NULL;
-
+
exec_blas(num_cpu,
queue);
}
-
+
return 0;
}
diff --git a/driver/level3/gemm_thread_variable.c b/driver/level3/gemm_thread_variable.c
index 9ffe17040..162a75f70 100644
--- a/driver/level3/gemm_thread_variable.c
+++ b/driver/level3/gemm_thread_variable.c
@@ -62,7 +62,7 @@ int CNAME(int mode,
num_cpu_m = 0;
while (i > 0){
-
+
width = blas_quickdivide(i + divM - num_cpu_m - 1, divM - num_cpu_m);
i -= width;
@@ -84,7 +84,7 @@ int CNAME(int mode,
num_cpu_n = 0;
while (i > 0){
-
+
width = blas_quickdivide(i + divN - num_cpu_n - 1, divN - num_cpu_n);
i -= width;
@@ -112,7 +112,7 @@ int CNAME(int mode,
procs ++;
}
}
-
+
if (procs) {
queue[0].sa = sa;
queue[0].sb = sb;
@@ -121,7 +121,7 @@ int CNAME(int mode,
exec_blas(procs, queue);
}
-
+
return 0;
}
diff --git a/driver/level3/level3.c b/driver/level3/level3.c
index 5f746642c..261204099 100644
--- a/driver/level3/level3.c
+++ b/driver/level3/level3.c
@@ -241,7 +241,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
) {
#if defined(XDOUBLE) && defined(QUAD_PRECISION)
xidouble xbeta;
-
+
qtox(&xbeta, beta);
#endif
BETA_OPERATION(m_from, m_to, n_from, n_to, beta, c, ldc);
@@ -287,7 +287,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
for(js = n_from; js < n_to; js += GEMM_R){
min_j = n_to - js;
if (min_j > GEMM_R) min_j = GEMM_R;
-
+
for(ls = 0; ls < k; ls += min_l){
min_l = k - ls;
@@ -302,11 +302,11 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
gemm_p = ((l2size / min_l + GEMM_UNROLL_M - 1) & ~(GEMM_UNROLL_M - 1));
while (gemm_p * min_l > l2size) gemm_p -= GEMM_UNROLL_M;
}
-
+
/* First, we have to move data A to L2 cache */
min_i = m_to - m_from;
l1stride = 1;
-
+
if (min_i >= GEMM_P * 2) {
min_i = GEMM_P;
} else {
@@ -316,13 +316,13 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
l1stride = 0;
}
}
-
+
START_RPCC();
-
+
ICOPY_OPERATION(min_l, min_i, a, lda, ls, m_from, sa);
-
+
STOP_RPCC(innercost);
-
+
#if defined(FUSED_GEMM) && !defined(TIMING)
FUSED_KERNEL_OPERATION(min_i, min_j, min_l, alpha,
@@ -344,16 +344,16 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
#endif
-
+
START_RPCC();
-
- OCOPY_OPERATION(min_l, min_jj, b, ldb, ls, jjs,
+
+ OCOPY_OPERATION(min_l, min_jj, b, ldb, ls, jjs,
sb + min_l * (jjs - js) * COMPSIZE * l1stride);
-
+
STOP_RPCC(outercost);
-
+
START_RPCC();
-
+
#if !defined(XDOUBLE) || !defined(QUAD_PRECISION)
KERNEL_OPERATION(min_i, min_jj, min_l, alpha,
sa, sb + min_l * (jjs - js) * COMPSIZE * l1stride, c, ldc, m_from, jjs);
@@ -363,39 +363,39 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
#endif
STOP_RPCC(kernelcost);
- }
+ }
#endif
-
+
for(is = m_from + min_i; is < m_to; is += min_i){
min_i = m_to - is;
if (min_i >= GEMM_P * 2) {
min_i = GEMM_P;
- } else
+ } else
if (min_i > GEMM_P) {
min_i = (min_i / 2 + GEMM_UNROLL_M - 1) & ~(GEMM_UNROLL_M - 1);
}
-
+
START_RPCC();
-
+
ICOPY_OPERATION(min_l, min_i, a, lda, ls, is, sa);
-
+
STOP_RPCC(innercost);
-
+
START_RPCC();
-
+
#if !defined(XDOUBLE) || !defined(QUAD_PRECISION)
KERNEL_OPERATION(min_i, min_j, min_l, alpha, sa, sb, c, ldc, is, js);
#else
KERNEL_OPERATION(min_i, min_j, min_l, (void *)&xalpha, sa, sb, c, ldc, is, js);
#endif
-
+
STOP_RPCC(kernelcost);
} /* end of is */
} /* end of js */
} /* end of ls */
-
+
#ifdef TIMING
total = (double)outercost + (double)innercost + (double)kernelcost;
diff --git a/driver/level3/level3_gemm3m_thread.c b/driver/level3/level3_gemm3m_thread.c
index bcb0f9dd9..02bf57ee2 100644
--- a/driver/level3/level3_gemm3m_thread.c
+++ b/driver/level3/level3_gemm3m_thread.c
@@ -49,7 +49,7 @@
#endif
//The array of job_t may overflow the stack.
-//Instead, use malloc to alloc job_t.
+//Instead, use malloc to alloc job_t.
#if MAX_CPU_NUMBER > BLAS3_MEM_ALLOC_THRESHOLD
#define USE_ALLOC_HEAP
#endif
@@ -362,12 +362,12 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
#endif
div_n = (n_to - n_from + DIVIDE_RATE - 1) / DIVIDE_RATE;
-
+
buffer[0] = sb;
for (i = 1; i < DIVIDE_RATE; i++) {
buffer[i] = buffer[i - 1] + GEMM3M_Q * ((div_n + GEMM3M_UNROLL_N - 1) & ~(GEMM3M_UNROLL_N - 1));
}
-
+
for(ls = 0; ls < k; ls += min_l){
min_l = k - ls;
if (min_l >= GEMM3M_Q * 2) {
@@ -379,7 +379,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
}
min_i = m_to - m_from;
-
+
if (min_i >= GEMM3M_P * 2) {
min_i = GEMM3M_P;
} else {
@@ -390,73 +390,73 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
START_RPCC();
-
+
ICOPYB_OPERATION(min_l, min_i, a, lda, ls, m_from, sa);
-
+
STOP_RPCC(copy_A);
-
+
div_n = (n_to - n_from + DIVIDE_RATE - 1) / DIVIDE_RATE;
-
+
for (xxx = n_from, bufferside = 0; xxx < n_to; xxx += div_n, bufferside ++) {
-
+
START_RPCC();
-
+
/* Make sure if no one is using another buffer */
for (i = 0; i < args -> nthreads; i++)
while (job[mypos].working[i][CACHE_LINE_SIZE * bufferside]) {YIELDING;};
-
+
STOP_RPCC(waiting1);
-
+
for(jjs = xxx; jjs < MIN(n_to, xxx + div_n); jjs += min_jj){
min_jj = MIN(n_to, xxx + div_n) - jjs;
if (min_jj > GEMM3M_UNROLL_N) min_jj = GEMM3M_UNROLL_N;
-
+
START_RPCC();
-
+
#if defined(NN) || defined(NT) || defined(TN) || defined(TT) || defined(RN) || defined(RT) || defined(CN) || defined(CT)
OCOPYB_OPERATION(min_l, min_jj, b, ldb, alpha[0], alpha[1], ls, jjs, buffer[bufferside] + min_l * (jjs - xxx));
#else
OCOPYB_OPERATION(min_l, min_jj, b, ldb, alpha[0], -alpha[1], ls, jjs, buffer[bufferside] + min_l * (jjs - xxx));
#endif
-
+
STOP_RPCC(copy_B);
-
+
START_RPCC();
-
+
KERNEL_OPERATION(min_i, min_jj, min_l, ALPHA5, ALPHA6,
sa, buffer[bufferside] + min_l * (jjs - xxx),
c, ldc, m_from, jjs);
-
+
STOP_RPCC(kernel);
#ifdef TIMING
ops += 2 * min_i * min_jj * min_l;
#endif
}
-
+
for (i = 0; i < args -> nthreads; i++)
job[mypos].working[i][CACHE_LINE_SIZE * bufferside] = (BLASLONG)buffer[bufferside];
}
-
+
current = mypos;
-
+
do {
current ++;
if (current >= args -> nthreads) current = 0;
-
+
div_n = (range_n[current + 1] - range_n[current] + DIVIDE_RATE - 1) / DIVIDE_RATE;
-
+
for (xxx = range_n[current], bufferside = 0; xxx < range_n[current + 1]; xxx += div_n, bufferside ++) {
-
+
if (current != mypos) {
-
+
START_RPCC();
-
+
/* thread has to wait */
while(job[current].working[mypos][CACHE_LINE_SIZE * bufferside] == 0) {YIELDING;};
-
+
STOP_RPCC(waiting2);
-
+
START_RPCC();
@@ -469,42 +469,42 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
ops += 2 * min_i * MIN(range_n[current + 1] - xxx, div_n) * min_l;
#endif
}
-
+
if (m_to - m_from == min_i) {
job[current].working[mypos][CACHE_LINE_SIZE * bufferside] = 0;
}
}
} while (current != mypos);
-
+
for(is = m_from + min_i; is < m_to; is += min_i){
min_i = m_to - is;
if (min_i >= GEMM3M_P * 2) {
min_i = GEMM3M_P;
- } else
+ } else
if (min_i > GEMM3M_P) {
min_i = ((min_i + 1) / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1);
}
-
+
START_RPCC();
-
+
ICOPYB_OPERATION(min_l, min_i, a, lda, ls, is, sa);
-
+
STOP_RPCC(copy_A);
-
+
current = mypos;
do {
-
+
div_n = (range_n[current + 1] - range_n[current] + DIVIDE_RATE - 1) / DIVIDE_RATE;
-
+
for (xxx = range_n[current], bufferside = 0; xxx < range_n[current + 1]; xxx += div_n, bufferside ++) {
-
+
START_RPCC();
-
+
KERNEL_OPERATION(min_i, MIN(range_n[current + 1] - xxx, div_n), min_l, ALPHA5, ALPHA6,
sa, (FLOAT *)job[current].working[mypos][CACHE_LINE_SIZE * bufferside],
c, ldc, is, xxx);
-
+
STOP_RPCC(kernel);
#ifdef TIMING
ops += 2 * min_i * (range_n[current + 1] - range_n[current] - div_n) * min_l;
@@ -514,38 +514,38 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
job[current].working[mypos][CACHE_LINE_SIZE * bufferside] = 0;
}
}
-
+
current ++;
if (current >= args -> nthreads) current = 0;
-
+
} while (current != mypos);
-
+
} /* end of is */
-
+
START_RPCC();
-
+
ICOPYR_OPERATION(min_l, min_i, a, lda, ls, m_from, sa);
-
+
STOP_RPCC(copy_A);
-
+
div_n = (n_to - n_from + DIVIDE_RATE - 1) / DIVIDE_RATE;
-
+
for (xxx = n_from, bufferside = 0; xxx < n_to; xxx += div_n, bufferside ++) {
-
+
START_RPCC();
-
+
/* Make sure if no one is using another buffer */
for (i = 0; i < args -> nthreads; i++)
while (job[mypos].working[i][CACHE_LINE_SIZE * bufferside]) {YIELDING;};
-
+
STOP_RPCC(waiting1);
-
+
for(jjs = xxx; jjs < MIN(n_to, xxx + div_n); jjs += min_jj){
min_jj = MIN(n_to, xxx + div_n) - jjs;
if (min_jj > GEMM3M_UNROLL_N) min_jj = GEMM3M_UNROLL_N;
-
+
START_RPCC();
-
+
#if defined(NN) || defined(NT) || defined(TN) || defined(TT)
OCOPYR_OPERATION(min_l, min_jj, b, ldb, alpha[0], alpha[1], ls, jjs, buffer[bufferside] + min_l * (jjs - xxx));
#elif defined(RR) || defined(RC) || defined(CR) || defined(CC)
@@ -557,43 +557,43 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
#endif
STOP_RPCC(copy_B);
-
+
START_RPCC();
-
+
KERNEL_OPERATION(min_i, min_jj, min_l, ALPHA11, ALPHA12,
sa, buffer[bufferside] + min_l * (jjs - xxx),
c, ldc, m_from, jjs);
-
+
STOP_RPCC(kernel);
#ifdef TIMING
ops += 2 * min_i * min_jj * min_l;
#endif
}
-
+
for (i = 0; i < args -> nthreads; i++)
job[mypos].working[i][CACHE_LINE_SIZE * bufferside] = (BLASLONG)buffer[bufferside];
}
-
+
current = mypos;
-
+
do {
current ++;
if (current >= args -> nthreads) current = 0;
-
+
div_n = (range_n[current + 1] - range_n[current] + DIVIDE_RATE - 1) / DIVIDE_RATE;
-
+
for (xxx = range_n[current], bufferside = 0; xxx < range_n[current + 1]; xxx += div_n, bufferside ++) {
-
+
if (current != mypos) {
-
+
START_RPCC();
-
+
/* thread has to wait */
while(job[current].working[mypos][CACHE_LINE_SIZE * bufferside] == 0) {YIELDING;};
-
+
STOP_RPCC(waiting2);
-
+
START_RPCC();
KERNEL_OPERATION(min_i, MIN(range_n[current + 1] - xxx, div_n), min_l, ALPHA11, ALPHA12,
@@ -605,41 +605,41 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
ops += 2 * min_i * MIN(range_n[current + 1] - xxx, div_n) * min_l;
#endif
}
-
+
if (m_to - m_from == min_i) {
job[current].working[mypos][CACHE_LINE_SIZE * bufferside] = 0;
}
}
} while (current != mypos);
-
+
for(is = m_from + min_i; is < m_to; is += min_i){
min_i = m_to - is;
if (min_i >= GEMM3M_P * 2) {
min_i = GEMM3M_P;
- } else
+ } else
if (min_i > GEMM3M_P) {
min_i = ((min_i + 1) / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1);
}
-
+
START_RPCC();
-
+
ICOPYR_OPERATION(min_l, min_i, a, lda, ls, is, sa);
-
+
STOP_RPCC(copy_A);
-
+
current = mypos;
do {
-
+
div_n = (range_n[current + 1] - range_n[current] + DIVIDE_RATE - 1) / DIVIDE_RATE;
-
+
for (xxx = range_n[current], bufferside = 0; xxx < range_n[current + 1]; xxx += div_n, bufferside ++) {
-
+
START_RPCC();
KERNEL_OPERATION(min_i, MIN(range_n[current + 1] - xxx, div_n), min_l, ALPHA11, ALPHA12,
sa, (FLOAT *)job[current].working[mypos][CACHE_LINE_SIZE * bufferside],
c, ldc, is, xxx);
-
+
STOP_RPCC(kernel);
#ifdef TIMING
ops += 2 * min_i * (range_n[current + 1] - range_n[current] - div_n) * min_l;
@@ -649,40 +649,40 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
job[current].working[mypos][CACHE_LINE_SIZE * bufferside] = 0;
}
}
-
+
current ++;
if (current >= args -> nthreads) current = 0;
-
+
} while (current != mypos);
-
+
} /* end of is */
-
+
START_RPCC();
-
+
ICOPYI_OPERATION(min_l, min_i, a, lda, ls, m_from, sa);
-
+
STOP_RPCC(copy_A);
-
+
div_n = (n_to - n_from + DIVIDE_RATE - 1) / DIVIDE_RATE;
-
+
for (xxx = n_from, bufferside = 0; xxx < n_to; xxx += div_n, bufferside ++) {
-
+
START_RPCC();
-
+
/* Make sure if no one is using another buffer */
for (i = 0; i < args -> nthreads; i++)
while (job[mypos].working[i][CACHE_LINE_SIZE * bufferside]) {YIELDING;};
-
+
STOP_RPCC(waiting1);
-
+
for(jjs = xxx; jjs < MIN(n_to, xxx + div_n); jjs += min_jj){
min_jj = MIN(n_to, xxx + div_n) - jjs;
if (min_jj > GEMM3M_UNROLL_N) min_jj = GEMM3M_UNROLL_N;
-
+
START_RPCC();
-
-#if defined(NN) || defined(NT) || defined(TN) || defined(TT)
+
+#if defined(NN) || defined(NT) || defined(TN) || defined(TT)
OCOPYI_OPERATION(min_l, min_jj, b, ldb, alpha[0], alpha[1], ls, jjs, buffer[bufferside] + min_l * (jjs - xxx));
#elif defined(RR) || defined(RC) || defined(CR) || defined(CC)
OCOPYI_OPERATION(min_l, min_jj, b, ldb, alpha[0], -alpha[1], ls, jjs, buffer[bufferside] + min_l * (jjs - xxx));
@@ -693,43 +693,43 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
#endif
STOP_RPCC(copy_B);
-
+
START_RPCC();
-
+
KERNEL_OPERATION(min_i, min_jj, min_l, ALPHA17, ALPHA18,
sa, buffer[bufferside] + min_l * (jjs - xxx),
c, ldc, m_from, jjs);
-
+
STOP_RPCC(kernel);
#ifdef TIMING
ops += 2 * min_i * min_jj * min_l;
#endif
}
-
+
for (i = 0; i < args -> nthreads; i++)
job[mypos].working[i][CACHE_LINE_SIZE * bufferside] = (BLASLONG)buffer[bufferside];
}
-
+
current = mypos;
-
+
do {
current ++;
if (current >= args -> nthreads) current = 0;
-
+
div_n = (range_n[current + 1] - range_n[current] + DIVIDE_RATE - 1) / DIVIDE_RATE;
-
+
for (xxx = range_n[current], bufferside = 0; xxx < range_n[current + 1]; xxx += div_n, bufferside ++) {
-
+
if (current != mypos) {
-
+
START_RPCC();
-
+
/* thread has to wait */
while(job[current].working[mypos][CACHE_LINE_SIZE * bufferside] == 0) {YIELDING;};
-
+
STOP_RPCC(waiting2);
-
+
START_RPCC();
KERNEL_OPERATION(min_i, MIN(range_n[current + 1] - xxx, div_n), min_l, ALPHA17, ALPHA18,
@@ -741,41 +741,41 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
ops += 2 * min_i * MIN(range_n[current + 1] - xxx, div_n) * min_l;
#endif
}
-
+
if (m_to - m_from == min_i) {
job[current].working[mypos][CACHE_LINE_SIZE * bufferside] = 0;
}
}
} while (current != mypos);
-
+
for(is = m_from + min_i; is < m_to; is += min_i){
min_i = m_to - is;
if (min_i >= GEMM3M_P * 2) {
min_i = GEMM3M_P;
- } else
+ } else
if (min_i > GEMM3M_P) {
min_i = ((min_i + 1) / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1);
}
-
+
START_RPCC();
-
+
ICOPYI_OPERATION(min_l, min_i, a, lda, ls, is, sa);
-
+
STOP_RPCC(copy_A);
-
+
current = mypos;
do {
-
+
div_n = (range_n[current + 1] - range_n[current] + DIVIDE_RATE - 1) / DIVIDE_RATE;
-
+
for (xxx = range_n[current], bufferside = 0; xxx < range_n[current + 1]; xxx += div_n, bufferside ++) {
-
+
START_RPCC();
-
+
KERNEL_OPERATION(min_i, MIN(range_n[current + 1] - xxx, div_n), min_l, ALPHA17, ALPHA18,
sa, (FLOAT *)job[current].working[mypos][CACHE_LINE_SIZE * bufferside],
c, ldc, is, xxx);
-
+
STOP_RPCC(kernel);
#ifdef TIMING
ops += 2 * min_i * (range_n[current + 1] - range_n[current] - div_n) * min_l;
@@ -785,16 +785,16 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
job[current].working[mypos][CACHE_LINE_SIZE * bufferside] = 0;
}
}
-
+
current ++;
if (current >= args -> nthreads) current = 0;
-
+
} while (current != mypos);
-
+
} /* end of is */
}
-
+
START_RPCC();
for (i = 0; i < args -> nthreads; i++) {
@@ -862,7 +862,7 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
mode = BLAS_DOUBLE | BLAS_REAL | BLAS_NODE;
#else
mode = BLAS_SINGLE | BLAS_REAL | BLAS_NODE;
-#endif
+#endif
newarg.m = args -> m;
newarg.n = args -> n;
@@ -886,7 +886,7 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
#endif
newarg.common = (void *)job;
-
+
if (!range_m) {
range_M[0] = 0;
m = args -> m;
@@ -898,7 +898,7 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
num_cpu_m = 0;
while (m > 0){
-
+
width = blas_quickdivide(m + nthreads - num_cpu_m - 1, nthreads - num_cpu_m);
m -= width;
@@ -919,10 +919,10 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
queue[i].sb = NULL;
queue[i].next = &queue[i + 1];
}
-
+
queue[0].sa = sa;
queue[0].sb = sb;
-
+
if (!range_n) {
n_from = 0;
n_to = args -> n;
@@ -934,23 +934,23 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
for(js = n_from; js < n_to; js += GEMM_R * nthreads){
n = n_to - js;
if (n > GEMM_R * nthreads) n = GEMM_R * nthreads;
-
+
range_N[0] = js;
num_cpu_n = 0;
while (n > 0){
-
+
width = blas_quickdivide(n + nthreads - num_cpu_n - 1, nthreads - num_cpu_n);
-
+
n -= width;
if (n < 0) width = width + n;
-
+
range_N[num_cpu_n + 1] = range_N[num_cpu_n] + width;
-
+
num_cpu_n ++;
}
-
+
for (j = 0; j < num_cpu_m; j++) {
for (i = 0; i < num_cpu_m; i++) {
for (k = 0; k < DIVIDE_RATE; k++) {
@@ -958,9 +958,9 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
}
}
}
-
+
queue[num_cpu_m - 1].next = NULL;
-
+
exec_blas(num_cpu_m, queue);
}
@@ -978,7 +978,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
BLASLONG nthreads = args -> nthreads;
BLASLONG divN, divT;
int mode;
-
+
if (range_m) {
BLASLONG m_from = *(((BLASLONG *)range_m) + 0);
BLASLONG m_to = *(((BLASLONG *)range_m) + 1);
@@ -1020,8 +1020,8 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
mode = BLAS_DOUBLE | BLAS_COMPLEX;
#else
mode = BLAS_SINGLE | BLAS_COMPLEX;
-#endif
-
+#endif
+
#if defined(TN) || defined(TT) || defined(TR) || defined(TC) || \
defined(CN) || defined(CT) || defined(CR) || defined(CC)
mode |= (BLAS_TRANSA_T);
@@ -1030,8 +1030,8 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
defined(NC) || defined(TC) || defined(RC) || defined(CC)
mode |= (BLAS_TRANSB_T);
#endif
-
- gemm_thread_n(mode, args, range_m, range_n, gemm_driver, sa, sb, divN);
+
+ gemm_thread_n(mode, args, range_m, range_n, gemm_driver, sa, sb, divN);
}
return 0;
diff --git a/driver/level3/level3_syr2k.c b/driver/level3/level3_syr2k.c
index 2db18578b..a75d379d7 100644
--- a/driver/level3/level3_syr2k.c
+++ b/driver/level3/level3_syr2k.c
@@ -178,16 +178,16 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
min_l = k - ls;
if (min_l >= GEMM_Q * 2) {
min_l = GEMM_Q;
- } else
+ } else
if (min_l > GEMM_Q) {
min_l = (min_l + 1) / 2;
}
min_i = m_end - m_start;
-
+
if (min_i >= GEMM_P * 2) {
min_i = GEMM_P;
- } else
+ } else
if (min_i > GEMM_P) {
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
}
@@ -195,44 +195,44 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
#ifndef LOWER
if (m_start >= js) {
-
+
ICOPY_OPERATION(min_l, min_i, a, lda, ls, m_start, sa);
aa = sb + min_l * (m_start - js) * COMPSIZE;
-
+
OCOPY_OPERATION(min_l, min_i, b, ldb, ls, m_start, aa);
-
+
KERNEL_OPERATION(min_i, min_i, min_l, alpha, sa, aa, c, ldc, m_start, m_start, 1);
-
+
jjs = m_start + min_i;
} else {
-
+
ICOPY_OPERATION(min_l, min_i, a, lda, ls, m_start, sa);
jjs = js;
}
-
+
for(; jjs < js + min_j; jjs += GEMM_UNROLL_MN){
min_jj = min_j + js - jjs;
if (min_jj > GEMM_UNROLL_MN) min_jj = GEMM_UNROLL_MN;
-
+
OCOPY_OPERATION(min_l, min_jj, b, ldb, ls, jjs, sb + min_l * (jjs - js) * COMPSIZE);
-
+
KERNEL_OPERATION(min_i, min_jj, min_l, alpha,
sa, sb + min_l * (jjs - js) * COMPSIZE,
c, ldc, m_start, jjs, 1);
}
-
+
for(is = m_start + min_i; is < m_end; is += min_i){
min_i = m_end - is;
if (min_i >= GEMM_P * 2) {
min_i = GEMM_P;
- } else
+ } else
if (min_i > GEMM_P) {
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
}
-
+
ICOPY_OPERATION(min_l, min_i, a, lda, ls, is, sa);
KERNEL_OPERATION(min_i, min_j, min_l, alpha, sa, sb, c, ldc, is, js, 1);
@@ -243,50 +243,50 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
if (min_i >= GEMM_P * 2) {
min_i = GEMM_P;
- } else
+ } else
if (min_i > GEMM_P) {
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
}
if (m_start >= js) {
-
+
ICOPY_OPERATION(min_l, min_i, b, ldb, ls, m_start, sa);
aa = sb + min_l * (m_start - js) * COMPSIZE;
-
+
OCOPY_OPERATION(min_l, min_i, a, lda, ls, m_start, aa);
-
+
KERNEL_OPERATION_C(min_i, min_i, min_l, alpha, sa, aa, c, ldc, m_start, m_start, 0);
-
+
jjs = m_start + min_i;
} else {
-
+
ICOPY_OPERATION(min_l, min_i, b, ldb, ls, m_start, sa);
jjs = js;
}
-
+
for(; jjs < js + min_j; jjs += GEMM_UNROLL_MN){
min_jj = min_j + js - jjs;
if (min_jj > GEMM_UNROLL_MN) min_jj = GEMM_UNROLL_MN;
-
+
OCOPY_OPERATION(min_l, min_jj, a, lda, ls, jjs, sb + min_l * (jjs - js) * COMPSIZE);
-
+
KERNEL_OPERATION_C(min_i, min_jj, min_l, alpha,
sa, sb + min_l * (jjs - js) * COMPSIZE,
c, ldc, m_start, jjs, 0);
}
-
+
for(is = m_start + min_i; is < m_end; is += min_i){
min_i = m_end - is;
if (min_i >= GEMM_P * 2) {
min_i = GEMM_P;
- } else
+ } else
if (min_i > GEMM_P) {
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
}
-
+
ICOPY_OPERATION(min_l, min_i, b, ldb, ls, is, sa);
KERNEL_OPERATION_C(min_i, min_j, min_l, alpha, sa, sb, c, ldc, is, js, 0);
@@ -300,49 +300,49 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
ICOPY_OPERATION(min_l, min_i, a, lda, ls, m_start, sa);
OCOPY_OPERATION(min_l, min_i, b, ldb, ls, m_start, aa);
-
+
KERNEL_OPERATION(min_i, MIN(min_i, min_j + js - m_start), min_l, alpha,
sa, aa, c, ldc, m_start, m_start, 1);
for(jjs = js; jjs < m_start; jjs += GEMM_UNROLL_MN){
min_jj = m_start - jjs;
if (min_jj > GEMM_UNROLL_MN) min_jj = GEMM_UNROLL_MN;
-
+
OCOPY_OPERATION(min_l, min_jj, b, ldb, ls, jjs, sb + min_l * (jjs - js) * COMPSIZE);
-
+
KERNEL_OPERATION(min_i, min_jj, min_l, alpha,
sa, sb + min_l * (jjs - js) * COMPSIZE, c, ldc, m_start, jjs, 1);
}
for(is = m_start + min_i; is < m_end; is += min_i){
-
+
min_i = m_end - is;
-
+
if (min_i >= GEMM_P * 2) {
min_i = GEMM_P;
- } else
+ } else
if (min_i > GEMM_P) {
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
}
-
+
aa = sb + min_l * (is - js) * COMPSIZE;
if (is < js + min_j) {
-
+
ICOPY_OPERATION(min_l, min_i, a, lda, ls, is, sa);
-
+
OCOPY_OPERATION(min_l, min_i, b, ldb, ls, is, aa);
-
+
KERNEL_OPERATION(min_i, MIN(min_i, min_j - is + js), min_l, alpha, sa, aa, c, ldc, is, is, 1);
-
+
KERNEL_OPERATION(min_i, is - js, min_l, alpha, sa, sb, c, ldc, is, js, 1);
-
+
} else {
-
+
ICOPY_OPERATION(min_l, min_i, a, lda, ls, is, sa);
-
+
KERNEL_OPERATION(min_i, min_j, min_l, alpha, sa, sb, c, ldc, is, js, 1);
-
+
}
}
@@ -351,7 +351,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
if (min_i >= GEMM_P * 2) {
min_i = GEMM_P;
- } else
+ } else
if (min_i > GEMM_P) {
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
}
@@ -361,49 +361,49 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
ICOPY_OPERATION(min_l, min_i, b, ldb, ls, m_start, sa);
OCOPY_OPERATION(min_l, min_i, a, lda, ls, m_start, aa);
-
+
KERNEL_OPERATION_C(min_i, MIN(min_i, min_j + js - m_start), min_l, alpha,
sa, aa, c, ldc, m_start, m_start, 0);
for(jjs = js; jjs < m_start; jjs += GEMM_UNROLL_MN){
min_jj = m_start - jjs;
if (min_jj > GEMM_UNROLL_MN) min_jj = GEMM_UNROLL_MN;
-
+
OCOPY_OPERATION(min_l, min_jj, a, lda, ls, jjs, sb + min_l * (jjs - js) * COMPSIZE);
-
+
KERNEL_OPERATION_C(min_i, min_jj, min_l, alpha,
sa, sb + min_l * (jjs - js) * COMPSIZE, c, ldc, m_start, jjs, 0);
}
for(is = m_start + min_i; is < m_end; is += min_i){
-
+
min_i = m_end - is;
-
+
if (min_i >= GEMM_P * 2) {
min_i = GEMM_P;
- } else
+ } else
if (min_i > GEMM_P) {
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
}
-
+
aa = sb + min_l * (is - js) * COMPSIZE;
if (is < js + min_j) {
-
+
ICOPY_OPERATION(min_l, min_i, b, ldb, ls, is, sa);
-
+
OCOPY_OPERATION(min_l, min_i, a, lda, ls, is, aa);
-
+
KERNEL_OPERATION_C(min_i, MIN(min_i, min_j - is + js), min_l, alpha, sa, aa, c, ldc, is, is, 0);
-
+
KERNEL_OPERATION_C(min_i, is - js, min_l, alpha, sa, sb, c, ldc, is, js, 0);
-
+
} else {
-
+
ICOPY_OPERATION(min_l, min_i, b, ldb, ls, is, sa);
-
+
KERNEL_OPERATION_C(min_i, min_j, min_l, alpha, sa, sb, c, ldc, is, js, 0);
-
+
}
}
diff --git a/driver/level3/level3_syrk.c b/driver/level3/level3_syrk.c
index 249c140cd..ba544a00d 100644
--- a/driver/level3/level3_syrk.c
+++ b/driver/level3/level3_syrk.c
@@ -187,16 +187,16 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
min_l = k - ls;
if (min_l >= GEMM_Q * 2) {
min_l = GEMM_Q;
- } else
+ } else
if (min_l > GEMM_Q) {
min_l = (min_l + 1) / 2;
}
min_i = m_end - m_start;
-
+
if (min_i >= GEMM_P * 2) {
min_i = GEMM_P;
- } else
+ } else
if (min_i > GEMM_P) {
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
}
@@ -207,29 +207,29 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
aa = sb + min_l * MAX(m_start - js, 0) * COMPSIZE;
if (!shared) aa = sa;
-
+
for(jjs = MAX(m_start, js); jjs < js + min_j; jjs += min_jj){
min_jj = js + min_j - jjs;
if (min_jj > GEMM_UNROLL_MN) min_jj = GEMM_UNROLL_MN;
-
+
if (!shared && (jjs - MAX(m_start, js) < min_i)) {
START_RPCC();
-
+
ICOPY_OPERATION(min_l, min_jj, a, lda, ls, jjs, sa + min_l * (jjs - js) * COMPSIZE);
-
+
STOP_RPCC(innercost);
}
-
+
START_RPCC();
-
+
OCOPY_OPERATION(min_l, min_jj, a, lda, ls, jjs, sb + min_l * (jjs - js) * COMPSIZE);
-
+
STOP_RPCC(outercost);
-
+
START_RPCC();
-
+
KERNEL_OPERATION(min_i, min_jj, min_l, alpha, aa, sb + min_l * (jjs - js) * COMPSIZE, c, ldc, MAX(m_start, js), jjs);
-
+
STOP_RPCC(kernelcost);
}
@@ -237,30 +237,30 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
min_i = m_end - is;
if (min_i >= GEMM_P * 2) {
min_i = GEMM_P;
- } else
+ } else
if (min_i > GEMM_P) {
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
}
-
+
aa = sb + min_l * (is - js) * COMPSIZE;
-
+
if (!shared) {
-
+
START_RPCC();
-
+
ICOPY_OPERATION(min_l, min_i, a, lda, ls, is, sa);
-
+
STOP_RPCC(innercost);
aa = sa;
}
START_RPCC();
-
+
KERNEL_OPERATION(min_i, min_j, min_l, alpha, aa, sb, c, ldc, is, js);
-
+
STOP_RPCC(kernelcost);
-
+
}
}
@@ -268,27 +268,27 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
if (m_start < js) {
if (m_end < js) {
-
+
START_RPCC();
-
+
ICOPY_OPERATION(min_l, min_i, a, lda, ls, m_start, sa);
-
+
STOP_RPCC(innercost);
-
+
for(jjs = js; jjs < js + min_j; jjs += GEMM_UNROLL_MN){
min_jj = min_j + js - jjs;
if (min_jj > GEMM_UNROLL_MN) min_jj = GEMM_UNROLL_MN;
-
+
START_RPCC();
-
+
OCOPY_OPERATION(min_l, min_jj, a, lda, ls, jjs, sb + min_l * (jjs - js) * COMPSIZE);
-
+
STOP_RPCC(outercost);
-
+
START_RPCC();
-
+
KERNEL_OPERATION(min_i, min_jj, min_l, alpha, sa, sb + min_l * (jjs - js) * COMPSIZE, c, ldc, m_start, jjs);
-
+
STOP_RPCC(kernelcost);
}
@@ -301,180 +301,180 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
min_i = MIN(m_end, js)- is;
if (min_i >= GEMM_P * 2) {
min_i = GEMM_P;
- } else
+ } else
if (min_i > GEMM_P) {
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
}
-
+
START_RPCC();
-
+
ICOPY_OPERATION(min_l, min_i, a, lda, ls, is, sa);
-
+
STOP_RPCC(innercost);
-
+
START_RPCC();
-
+
KERNEL_OPERATION(min_i, min_j, min_l, alpha, sa, sb, c, ldc, is, js);
-
+
STOP_RPCC(kernelcost);
-
+
}
}
#else
if (m_start < js + min_j) {
-
+
aa = sb + min_l * (m_start - js) * COMPSIZE;
-
+
if (!shared) {
START_RPCC();
-
+
ICOPY_OPERATION(min_l, min_i, a, lda, ls, m_start, sa);
-
+
STOP_RPCC(innercost);
-
+
}
START_RPCC();
-
+
OCOPY_OPERATION(min_l, (shared? (min_i) : MIN(min_i, min_j + js - m_start)), a, lda, ls, m_start, aa);
-
+
STOP_RPCC(outercost);
START_RPCC();
-
+
KERNEL_OPERATION(min_i, MIN(min_i, min_j + js - m_start), min_l, alpha, (shared? (aa) : (sa)), aa, c, ldc, m_start, m_start);
-
+
STOP_RPCC(kernelcost);
for(jjs = js; jjs < m_start; jjs += GEMM_UNROLL_N){
min_jj = m_start - jjs;
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
-
+
START_RPCC();
-
+
OCOPY_OPERATION(min_l, min_jj, a, lda, ls, jjs, sb + min_l * (jjs - js) * COMPSIZE);
-
+
STOP_RPCC(outercost);
-
+
START_RPCC();
KERNEL_OPERATION(min_i, min_jj, min_l, alpha, (shared? (aa) : (sa)), sb + min_l * (jjs - js) * COMPSIZE, c, ldc, m_start, jjs);
-
+
STOP_RPCC(kernelcost);
-
+
}
for(is = m_start + min_i; is < m_end; is += min_i){
-
+
min_i = m_end - is;
-
+
if (min_i >= GEMM_P * 2) {
min_i = GEMM_P;
- } else
+ } else
if (min_i > GEMM_P) {
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
}
-
+
if (is < js + min_j) {
-
+
if (!shared) {
START_RPCC();
-
+
ICOPY_OPERATION(min_l, min_i, a, lda, ls, is, sa);
-
+
STOP_RPCC(innercost);
}
aa = sb + min_l * (is - js) * COMPSIZE;
-
+
START_RPCC();
-
+
OCOPY_OPERATION(min_l, (shared? (min_i) : MIN(min_i, min_j - is + js)), a, lda, ls, is, aa);
-
+
STOP_RPCC(outercost);
-
+
START_RPCC();
-
+
KERNEL_OPERATION(min_i, MIN(min_i, min_j - is + js), min_l, alpha, (shared? (aa) : (sa)), aa, c, ldc, is, is);
-
+
STOP_RPCC(kernelcost);
-
+
START_RPCC();
-
+
KERNEL_OPERATION(min_i, is - js, min_l, alpha, (shared? (aa) : (sa)), sb, c, ldc, is, js);
-
+
STOP_RPCC(kernelcost);
-
+
} else {
-
+
START_RPCC();
-
+
ICOPY_OPERATION(min_l, min_i, a, lda, ls, is, sa);
-
+
STOP_RPCC(innercost);
-
+
START_RPCC();
-
+
KERNEL_OPERATION(min_i, min_j, min_l, alpha, sa, sb, c, ldc, is, js);
-
+
STOP_RPCC(kernelcost);
-
+
}
-
+
}
} else {
START_RPCC();
-
+
ICOPY_OPERATION(min_l, min_i, a, lda, ls, m_start, sa);
-
+
STOP_RPCC(innercost);
-
+
for(jjs = js; jjs < min_j; jjs += GEMM_UNROLL_N){
min_jj = min_j - jjs;
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
-
+
START_RPCC();
-
+
OCOPY_OPERATION(min_l, min_jj, a, lda, ls, jjs, sb + min_l * (jjs - js) * COMPSIZE);
-
+
STOP_RPCC(outercost);
-
+
START_RPCC();
-
+
KERNEL_OPERATION(min_i, min_jj, min_l, alpha, sa, sb + min_l * (jjs - js) * COMPSIZE, c, ldc, m_start, jjs);
-
+
STOP_RPCC(kernelcost);
-
+
}
-
+
for(is = m_start + min_i; is < m_end; is += min_i){
-
+
min_i = m_end - is;
-
+
if (min_i >= GEMM_P * 2) {
min_i = GEMM_P;
- } else
+ } else
if (min_i > GEMM_P) {
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
}
-
+
START_RPCC();
-
+
ICOPY_OPERATION(min_l, min_i, a, lda, ls, is, sa);
-
+
STOP_RPCC(innercost);
-
+
START_RPCC();
-
+
KERNEL_OPERATION(min_i, min_j, min_l, alpha, sa, sb, c, ldc, is, js);
-
+
STOP_RPCC(kernelcost);
-
+
}
}
#endif
diff --git a/driver/level3/level3_syrk_threaded.c b/driver/level3/level3_syrk_threaded.c
index 4a3f7a89f..01c7b23ed 100644
--- a/driver/level3/level3_syrk_threaded.c
+++ b/driver/level3/level3_syrk_threaded.c
@@ -49,7 +49,7 @@
#endif
//The array of job_t may overflow the stack.
-//Instead, use malloc to alloc job_t.
+//Instead, use malloc to alloc job_t.
#if MAX_CPU_NUMBER > BLAS3_MEM_ALLOC_THRESHOLD
#define USE_ALLOC_HEAP
#endif
@@ -217,7 +217,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
for (i = 1; i < DIVIDE_RATE; i++) {
buffer[i] = buffer[i - 1] + GEMM_Q * div_n * COMPSIZE;
}
-
+
for(ls = 0; ls < k; ls += min_l){
min_l = k - ls;
@@ -228,7 +228,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
}
min_i = m_to - m_from;
-
+
if (min_i >= GEMM_P * 2) {
min_i = GEMM_P;
} else {
@@ -244,22 +244,22 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
#endif
START_RPCC();
-
+
#ifndef LOWER
ICOPY_OPERATION(min_l, min_i, a, lda, ls, m_from, sa);
#else
ICOPY_OPERATION(min_l, min_i, a, lda, ls, m_to - min_i, sa);
#endif
-
+
STOP_RPCC(copy_A);
-
+
div_n = ((m_to - m_from + DIVIDE_RATE - 1) / DIVIDE_RATE
+ GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
-
+
for (xxx = m_from, bufferside = 0; xxx < m_to; xxx += div_n, bufferside ++) {
-
+
START_RPCC();
-
+
/* Make sure if no one is using buffer */
#ifndef LOWER
for (i = 0; i < mypos; i++)
@@ -267,9 +267,9 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
for (i = mypos + 1; i < args -> nthreads; i++)
#endif
while (job[mypos].working[i][CACHE_LINE_SIZE * bufferside]) {YIELDING;};
-
+
STOP_RPCC(waiting1);
-
+
#ifndef LOWER
for(jjs = xxx; jjs < MIN(m_to, xxx + div_n); jjs += min_jj){
@@ -281,16 +281,16 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
} else {
if (min_jj > GEMM_UNROLL_MN) min_jj = GEMM_UNROLL_MN;
}
-
+
START_RPCC();
-
- OCOPY_OPERATION(min_l, min_jj, a, lda, ls, jjs,
+
+ OCOPY_OPERATION(min_l, min_jj, a, lda, ls, jjs,
buffer[bufferside] + min_l * (jjs - xxx) * COMPSIZE);
-
+
STOP_RPCC(copy_B);
-
+
START_RPCC();
-
+
KERNEL_OPERATION(min_i, min_jj, min_l, alpha,
sa, buffer[bufferside] + min_l * (jjs - xxx) * COMPSIZE,
c, ldc, m_from, jjs);
@@ -310,20 +310,20 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
min_jj = MIN(m_to, xxx + div_n) - jjs;
if (min_jj > GEMM_UNROLL_MN) min_jj = GEMM_UNROLL_MN;
-
+
START_RPCC();
-
- OCOPY_OPERATION(min_l, min_jj, a, lda, ls, jjs,
+
+ OCOPY_OPERATION(min_l, min_jj, a, lda, ls, jjs,
buffer[bufferside] + min_l * (jjs - xxx) * COMPSIZE);
-
+
STOP_RPCC(copy_B);
-
+
START_RPCC();
-
+
KERNEL_OPERATION(min_i, min_jj, min_l, alpha,
sa, buffer[bufferside] + min_l * (jjs - xxx) * COMPSIZE,
c, ldc, m_to - min_i, jjs);
-
+
STOP_RPCC(kernel);
#ifdef TIMING
@@ -333,7 +333,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
}
#endif
-
+
#ifndef LOWER
for (i = 0; i <= mypos; i++)
#else
@@ -344,7 +344,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
WMB;
}
-
+
#ifndef LOWER
current = mypos + 1;
while (current < args -> nthreads) {
@@ -355,42 +355,42 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
div_n = ((range_n[current + 1] - range_n[current] + DIVIDE_RATE - 1) / DIVIDE_RATE
+ GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
-
+
for (xxx = range_n[current], bufferside = 0; xxx < range_n[current + 1]; xxx += div_n, bufferside ++) {
-
+
START_RPCC();
-
+
/* thread has to wait */
while(job[current].working[mypos][CACHE_LINE_SIZE * bufferside] == 0) {YIELDING;};
-
+
STOP_RPCC(waiting2);
-
+
START_RPCC();
-
+
#ifndef LOWER
KERNEL_OPERATION(min_i, MIN(range_n[current + 1] - xxx, div_n), min_l, alpha,
sa, (FLOAT *)job[current].working[mypos][CACHE_LINE_SIZE * bufferside],
- c, ldc,
+ c, ldc,
m_from,
xxx);
#else
KERNEL_OPERATION(min_i, MIN(range_n[current + 1] - xxx, div_n), min_l, alpha,
sa, (FLOAT *)job[current].working[mypos][CACHE_LINE_SIZE * bufferside],
- c, ldc,
+ c, ldc,
m_to - min_i,
xxx);
#endif
-
+
STOP_RPCC(kernel);
#ifdef TIMING
ops += 2 * min_i * MIN(range_n[current + 1] - xxx, div_n) * min_l;
#endif
-
+
if (m_to - m_from == min_i) {
job[current].working[mypos][CACHE_LINE_SIZE * bufferside] &= 0;
}
}
-
+
#ifndef LOWER
current ++;
#else
@@ -410,38 +410,38 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
if (min_i >= GEMM_P * 2) {
min_i = GEMM_P;
- } else
+ } else
if (min_i > GEMM_P) {
min_i = ((min_i + 1) / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
}
START_RPCC();
-
+
ICOPY_OPERATION(min_l, min_i, a, lda, ls, is, sa);
-
+
STOP_RPCC(copy_A);
-
+
current = mypos;
do {
-
+
div_n = ((range_n[current + 1] - range_n[current] + DIVIDE_RATE - 1) / DIVIDE_RATE
+ GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1);
-
+
for (xxx = range_n[current], bufferside = 0; xxx < range_n[current + 1]; xxx += div_n, bufferside ++) {
-
+
START_RPCC();
KERNEL_OPERATION(min_i, MIN(range_n[current + 1] - xxx, div_n), min_l, alpha,
sa, (FLOAT *)job[current].working[mypos][CACHE_LINE_SIZE * bufferside],
c, ldc, is, xxx);
-
+
STOP_RPCC(kernel);
#ifdef TIMING
ops += 2 * min_i * MIN(range_n[current + 1] - xxx, div_n) * min_l;
#endif
-
+
#ifndef LOWER
if (is + min_i >= m_to) {
#else
@@ -452,7 +452,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
WMB;
}
}
-
+
#ifndef LOWER
current ++;
} while (current != args -> nthreads);
@@ -460,11 +460,11 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
current --;
} while (current >= 0);
#endif
-
-
+
+
}
}
-
+
START_RPCC();
for (i = 0; i < args -> nthreads; i++) {
@@ -528,7 +528,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
double dnum;
if ((nthreads == 1) || (args -> n < nthreads * SWITCH_RATIO)) {
- SYRK_LOCAL(args, range_m, range_n, sa, sb, 0);
+ SYRK_LOCAL(args, range_m, range_n, sa, sb, 0);
return 0;
}
@@ -542,7 +542,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
#else
mode = BLAS_SINGLE | BLAS_REAL;
mask = MAX(SGEMM_UNROLL_M, SGEMM_UNROLL_N) - 1;
-#endif
+#endif
#else
#ifdef XDOUBLE
mode = BLAS_XDOUBLE | BLAS_COMPLEX;
@@ -553,7 +553,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
#else
mode = BLAS_SINGLE | BLAS_COMPLEX;
mask = MAX(CGEMM_UNROLL_M, CGEMM_UNROLL_N) - 1;
-#endif
+#endif
#endif
newarg.m = args -> m;
@@ -577,7 +577,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
#endif
newarg.common = (void *)job;
-
+
if (!range_n) {
n_from = 0;
n_to = args -> n;
@@ -597,17 +597,17 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
dnum = (double)n * (double)n /(double)nthreads;
while (i < n){
-
+
if (nthreads - num_cpu > 1) {
-
+
double di = (double)i;
-
+
width = (((BLASLONG)(sqrt(di * di + dnum) - di) + mask) & ~mask);
-
+
if (num_cpu == 0) width = n - ((n - width) & ~mask);
-
+
if ((width > n - i) || (width < mask)) width = n - i;
-
+
} else {
width = n - i;
}
@@ -622,7 +622,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
queue[num_cpu].sa = NULL;
queue[num_cpu].sb = NULL;
queue[num_cpu].next = &queue[num_cpu + 1];
-
+
num_cpu ++;
i += width;
}
@@ -639,21 +639,21 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
dnum = (double)n * (double)n /(double)nthreads;
while (i < n){
-
+
if (nthreads - num_cpu > 1) {
-
+
double di = (double)i;
-
+
width = (((BLASLONG)(sqrt(di * di + dnum) - di) + mask) & ~mask);
-
+
if ((width > n - i) || (width < mask)) width = n - i;
-
+
} else {
width = n - i;
}
range[num_cpu + 1] = range[num_cpu] + width;
-
+
queue[num_cpu].mode = mode;
queue[num_cpu].routine = inner_thread;
queue[num_cpu].args = &newarg;
@@ -662,7 +662,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
queue[num_cpu].sa = NULL;
queue[num_cpu].sb = NULL;
queue[num_cpu].next = &queue[num_cpu + 1];
-
+
num_cpu ++;
i += width;
}
@@ -680,14 +680,14 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
}
}
}
-
+
queue[0].sa = sa;
queue[0].sb = sb;
queue[num_cpu - 1].next = NULL;
-
+
exec_blas(num_cpu, queue);
}
-
+
#ifdef USE_ALLOC_HEAP
free(job);
#endif
diff --git a/driver/level3/level3_thread.c b/driver/level3/level3_thread.c
index ee1a8db7c..95860d0c0 100644
--- a/driver/level3/level3_thread.c
+++ b/driver/level3/level3_thread.c
@@ -49,7 +49,7 @@
#endif
//The array of job_t may overflow the stack.
-//Instead, use malloc to alloc job_t.
+//Instead, use malloc to alloc job_t.
#if MAX_CPU_NUMBER > BLAS3_MEM_ALLOC_THRESHOLD
#define USE_ALLOC_HEAP
#endif
@@ -309,12 +309,12 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
#endif
div_n = (n_to - n_from + DIVIDE_RATE - 1) / DIVIDE_RATE;
-
+
buffer[0] = sb;
for (i = 1; i < DIVIDE_RATE; i++) {
buffer[i] = buffer[i - 1] + GEMM_Q * ((div_n + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1)) * COMPSIZE;
}
-
+
for(ls = 0; ls < k; ls += min_l){
@@ -328,7 +328,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
l1stride = 1;
min_i = m_to - m_from;
-
+
if (min_i >= GEMM_P * 2) {
min_i = GEMM_P;
} else {
@@ -340,23 +340,23 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
}
START_RPCC();
-
+
ICOPY_OPERATION(min_l, min_i, a, lda, ls, m_from, sa);
-
+
STOP_RPCC(copy_A);
-
+
div_n = (n_to - n_from + DIVIDE_RATE - 1) / DIVIDE_RATE;
-
+
for (xxx = n_from, bufferside = 0; xxx < n_to; xxx += div_n, bufferside ++) {
-
+
START_RPCC();
-
+
/* Make sure if no one is using buffer */
for (i = 0; i < args -> nthreads; i++)
while (job[mypos].working[i][CACHE_LINE_SIZE * bufferside]) {YIELDING;};
-
+
STOP_RPCC(waiting1);
-
+
#if defined(FUSED_GEMM) && !defined(TIMING)
FUSED_KERNEL_OPERATION(min_i, MIN(n_to, xxx + div_n) - xxx, min_l, alpha,
@@ -376,21 +376,21 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
#else
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
-#endif
+#endif
START_RPCC();
-
- OCOPY_OPERATION(min_l, min_jj, b, ldb, ls, jjs,
+
+ OCOPY_OPERATION(min_l, min_jj, b, ldb, ls, jjs,
buffer[bufferside] + min_l * (jjs - xxx) * COMPSIZE * l1stride);
-
+
STOP_RPCC(copy_B);
-
+
START_RPCC();
-
+
KERNEL_OPERATION(min_i, min_jj, min_l, alpha,
sa, buffer[bufferside] + min_l * (jjs - xxx) * COMPSIZE * l1stride,
c, ldc, m_from, jjs);
-
+
STOP_RPCC(kernel);
#ifdef TIMING
@@ -399,30 +399,30 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
}
#endif
-
+
for (i = 0; i < args -> nthreads; i++) job[mypos].working[i][CACHE_LINE_SIZE * bufferside] = (BLASLONG)buffer[bufferside];
WMB;
}
current = mypos;
-
+
do {
current ++;
if (current >= args -> nthreads) current = 0;
-
+
div_n = (range_n[current + 1] - range_n[current] + DIVIDE_RATE - 1) / DIVIDE_RATE;
-
+
for (xxx = range_n[current], bufferside = 0; xxx < range_n[current + 1]; xxx += div_n, bufferside ++) {
-
+
if (current != mypos) {
-
+
START_RPCC();
-
+
/* thread has to wait */
while(job[current].working[mypos][CACHE_LINE_SIZE * bufferside] == 0) {YIELDING;};
-
+
STOP_RPCC(waiting2);
-
+
START_RPCC();
KERNEL_OPERATION(min_i, MIN(range_n[current + 1] - xxx, div_n), min_l, alpha,
@@ -434,43 +434,43 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
ops += 2 * min_i * MIN(range_n[current + 1] - xxx, div_n) * min_l;
#endif
}
-
+
if (m_to - m_from == min_i) {
job[current].working[mypos][CACHE_LINE_SIZE * bufferside] &= 0;
}
}
} while (current != mypos);
-
+
for(is = m_from + min_i; is < m_to; is += min_i){
min_i = m_to - is;
if (min_i >= GEMM_P * 2) {
min_i = GEMM_P;
- } else
+ } else
if (min_i > GEMM_P) {
min_i = ((min_i + 1) / 2 + GEMM_UNROLL_M - 1) & ~(GEMM_UNROLL_M - 1);
}
-
+
START_RPCC();
-
+
ICOPY_OPERATION(min_l, min_i, a, lda, ls, is, sa);
-
+
STOP_RPCC(copy_A);
-
+
current = mypos;
do {
-
+
div_n = (range_n[current + 1] - range_n[current] + DIVIDE_RATE - 1) / DIVIDE_RATE;
-
+
for (xxx = range_n[current], bufferside = 0; xxx < range_n[current + 1]; xxx += div_n, bufferside ++) {
-
+
START_RPCC();
-
+
KERNEL_OPERATION(min_i, MIN(range_n[current + 1] - xxx, div_n), min_l, alpha,
sa, (FLOAT *)job[current].working[mypos][CACHE_LINE_SIZE * bufferside],
c, ldc, is, xxx);
-
+
STOP_RPCC(kernel);
#ifdef TIMING
@@ -483,16 +483,16 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
WMB;
}
}
-
+
current ++;
if (current >= args -> nthreads) current = 0;
-
+
} while (current != mypos);
-
+
}
-
+
}
-
+
START_RPCC();
for (i = 0; i < args -> nthreads; i++) {
@@ -561,7 +561,7 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
mode = BLAS_DOUBLE | BLAS_REAL | BLAS_NODE;
#else
mode = BLAS_SINGLE | BLAS_REAL | BLAS_NODE;
-#endif
+#endif
#else
#ifdef XDOUBLE
mode = BLAS_XDOUBLE | BLAS_COMPLEX | BLAS_NODE;
@@ -569,7 +569,7 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
mode = BLAS_DOUBLE | BLAS_COMPLEX | BLAS_NODE;
#else
mode = BLAS_SINGLE | BLAS_COMPLEX | BLAS_NODE;
-#endif
+#endif
#endif
newarg.m = args -> m;
@@ -594,7 +594,7 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
#endif
newarg.common = (void *)job;
-
+
#ifdef PARAMTEST
newarg.gemm_p = args -> gemm_p;
newarg.gemm_q = args -> gemm_q;
@@ -612,7 +612,7 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
num_cpu_m = 0;
while (m > 0){
-
+
width = blas_quickdivide(m + nthreads - num_cpu_m - 1, nthreads - num_cpu_m);
m -= width;
@@ -633,10 +633,10 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
queue[i].sb = NULL;
queue[i].next = &queue[i + 1];
}
-
+
queue[0].sa = sa;
queue[0].sb = sb;
-
+
if (!range_n) {
n_from = 0;
n_to = args -> n;
@@ -648,23 +648,23 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
for(js = n_from; js < n_to; js += GEMM_R * nthreads){
n = n_to - js;
if (n > GEMM_R * nthreads) n = GEMM_R * nthreads;
-
+
range_N[0] = js;
num_cpu_n = 0;
while (n > 0){
-
+
width = blas_quickdivide(n + nthreads - num_cpu_n - 1, nthreads - num_cpu_n);
-
+
n -= width;
if (n < 0) width = width + n;
-
+
range_N[num_cpu_n + 1] = range_N[num_cpu_n] + width;
-
+
num_cpu_n ++;
}
-
+
for (j = 0; j < num_cpu_m; j++) {
for (i = 0; i < num_cpu_m; i++) {
for (k = 0; k < DIVIDE_RATE; k++) {
@@ -672,7 +672,7 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
}
}
}
-
+
queue[num_cpu_m - 1].next = NULL;
exec_blas(num_cpu_m, queue);
@@ -692,9 +692,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
BLASLONG nthreads = args -> nthreads;
BLASLONG divN, divT;
int mode;
-
+
if (nthreads == 1) {
- GEMM_LOCAL(args, range_m, range_n, sa, sb, 0);
+ GEMM_LOCAL(args, range_m, range_n, sa, sb, 0);
return 0;
}
@@ -745,7 +745,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
mode = BLAS_DOUBLE | BLAS_REAL;
#else
mode = BLAS_SINGLE | BLAS_REAL;
-#endif
+#endif
#else
#ifdef XDOUBLE
mode = BLAS_XDOUBLE | BLAS_COMPLEX;
@@ -753,9 +753,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
mode = BLAS_DOUBLE | BLAS_COMPLEX;
#else
mode = BLAS_SINGLE | BLAS_COMPLEX;
-#endif
#endif
-
+#endif
+
#if defined(TN) || defined(TT) || defined(TR) || defined(TC) || \
defined(CN) || defined(CT) || defined(CR) || defined(CC)
mode |= (BLAS_TRANSA_T);
@@ -764,11 +764,11 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
defined(NC) || defined(TC) || defined(RC) || defined(CC)
mode |= (BLAS_TRANSB_T);
#endif
-
+
#ifdef OS_WINDOWS
- gemm_thread_n(mode, args, range_m, range_n, GEMM_LOCAL, sa, sb, divN);
+ gemm_thread_n(mode, args, range_m, range_n, GEMM_LOCAL, sa, sb, divN);
#else
- gemm_thread_n(mode, args, range_m, range_n, gemm_driver, sa, sb, divN);
+ gemm_thread_n(mode, args, range_m, range_n, gemm_driver, sa, sb, divN);
#endif
}
diff --git a/driver/level3/syr2k_k.c b/driver/level3/syr2k_k.c
index 01251d483..8df0f122f 100644
--- a/driver/level3/syr2k_k.c
+++ b/driver/level3/syr2k_k.c
@@ -78,7 +78,7 @@ static inline int syrk_beta(BLASLONG m_from, BLASLONG m_to, BLASLONG n_from, BLA
#else
- SCAL_K(MIN(m_to - i + m_from - n_from, m_to), 0, 0, alpha[0],
+ SCAL_K(MIN(m_to - i + m_from - n_from, m_to), 0, 0, alpha[0],
#ifdef COMPLEX
alpha[1],
#endif
diff --git a/driver/level3/syr2k_kernel.c b/driver/level3/syr2k_kernel.c
index 8c476f50c..f9e4a4cda 100644
--- a/driver/level3/syr2k_kernel.c
+++ b/driver/level3/syr2k_kernel.c
@@ -56,7 +56,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r,
#ifdef COMPLEX
alpha_i,
#endif
- a, b, c, ldc);
+ a, b, c, ldc);
#endif
return 0;
}
@@ -68,7 +68,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r,
#ifdef COMPLEX
alpha_i,
#endif
- a, b, c, ldc);
+ a, b, c, ldc);
#endif
return 0;
}
@@ -81,7 +81,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r,
#ifdef COMPLEX
alpha_i,
#endif
- a, b, c, ldc);
+ a, b, c, ldc);
#endif
b += offset * k * COMPSIZE;
c += offset * ldc * COMPSIZE;
@@ -100,7 +100,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r,
#endif
a,
b + (m + offset) * k * COMPSIZE,
- c + (m + offset) * ldc * COMPSIZE, ldc);
+ c + (m + offset) * ldc * COMPSIZE, ldc);
#endif
n = m + offset;
@@ -115,7 +115,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r,
#ifdef COMPLEX
alpha_i,
#endif
- a, b, c, ldc);
+ a, b, c, ldc);
#endif
a -= offset * k * COMPSIZE;
c -= offset * COMPSIZE;
@@ -134,53 +134,53 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r,
#endif
a + (n - offset) * k * COMPSIZE,
b,
- c + (n - offset) * COMPSIZE, ldc);
+ c + (n - offset) * COMPSIZE, ldc);
#endif
m = n + offset;
if (m <= 0) return 0;
}
for (loop = 0; loop < n; loop += GEMM_UNROLL_MN) {
-
+
int mm, nn;
-
+
mm = (loop & ~(GEMM_UNROLL_MN - 1));
nn = MIN(GEMM_UNROLL_MN, n - loop);
-
+
#ifndef LOWER
GEMM_KERNEL_N(mm, nn, k,
alpha_r,
#ifdef COMPLEX
alpha_i,
#endif
- a, b + loop * k * COMPSIZE, c + loop * ldc * COMPSIZE, ldc);
+ a, b + loop * k * COMPSIZE, c + loop * ldc * COMPSIZE, ldc);
#endif
-
+
if (flag) {
- GEMM_BETA(nn, nn, 0, ZERO,
+ GEMM_BETA(nn, nn, 0, ZERO,
#ifdef COMPLEX
ZERO,
#endif
NULL, 0, NULL, 0, subbuffer, nn);
-
+
GEMM_KERNEL_N(nn, nn, k,
alpha_r,
#ifdef COMPLEX
alpha_i,
#endif
- a + loop * k * COMPSIZE, b + loop * k * COMPSIZE, subbuffer, nn);
+ a + loop * k * COMPSIZE, b + loop * k * COMPSIZE, subbuffer, nn);
#ifndef LOWER
-
+
for (j = 0; j < nn; j ++) {
for (i = 0; i <= j; i ++) {
#ifndef COMPLEX
c[i + loop + (j + loop) * ldc] +=
subbuffer[i + j * nn] + subbuffer[j + i * nn];
#else
- c[(i + loop + (j + loop) * ldc) * 2 + 0] +=
+ c[(i + loop + (j + loop) * ldc) * 2 + 0] +=
subbuffer[(i + j * nn) * 2 + 0] + subbuffer[(j + i * nn) * 2 + 0];
- c[(i + loop + (j + loop) * ldc) * 2 + 1] +=
+ c[(i + loop + (j + loop) * ldc) * 2 + 1] +=
subbuffer[(i + j * nn) * 2 + 1] + subbuffer[(j + i * nn) * 2 + 1];
#endif
}
@@ -189,7 +189,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r,
for (j = 0; j < nn; j ++) {
for (i = j; i < nn; i ++) {
#ifndef COMPLEX
- c[i + loop + (j + loop) * ldc] +=
+ c[i + loop + (j + loop) * ldc] +=
subbuffer[i + j * nn] + subbuffer[j + i * nn];
#else
c[(i + loop + (j + loop) * ldc) * 2 + 0] +=
@@ -201,15 +201,15 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r,
}
#endif
}
-
+
#ifdef LOWER
GEMM_KERNEL_N(m - mm - nn, nn, k,
alpha_r,
#ifdef COMPLEX
alpha_i,
#endif
- a + (mm + nn) * k * COMPSIZE, b + loop * k * COMPSIZE,
- c + (mm + nn + loop * ldc) * COMPSIZE, ldc);
+ a + (mm + nn) * k * COMPSIZE, b + loop * k * COMPSIZE,
+ c + (mm + nn + loop * ldc) * COMPSIZE, ldc);
#endif
}
diff --git a/driver/level3/syrk_k.c b/driver/level3/syrk_k.c
index 9c9700ef3..08751dc8b 100644
--- a/driver/level3/syrk_k.c
+++ b/driver/level3/syrk_k.c
@@ -80,7 +80,7 @@ static inline int syrk_beta(BLASLONG m_from, BLASLONG m_to, BLASLONG n_from, BLA
#else
- SCAL_K(MIN(m_to - i + m_from - n_from, m_to), 0, 0, alpha[0],
+ SCAL_K(MIN(m_to - i + m_from - n_from, m_to), 0, 0, alpha[0],
#ifdef COMPLEX
alpha[1],
#endif
diff --git a/driver/level3/syrk_kernel.c b/driver/level3/syrk_kernel.c
index 65d108a49..434d2f630 100644
--- a/driver/level3/syrk_kernel.c
+++ b/driver/level3/syrk_kernel.c
@@ -71,7 +71,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r,
#ifdef COMPLEX
alpha_i,
#endif
- a, b, c, ldc);
+ a, b, c, ldc);
#endif
return 0;
}
@@ -83,7 +83,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r,
#ifdef COMPLEX
alpha_i,
#endif
- a, b, c, ldc);
+ a, b, c, ldc);
#endif
return 0;
}
@@ -95,7 +95,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r,
#ifdef COMPLEX
alpha_i,
#endif
- a, b, c, ldc);
+ a, b, c, ldc);
#endif
b += offset * k * COMPSIZE;
c += offset * ldc * COMPSIZE;
@@ -114,7 +114,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r,
#endif
a,
b + (m + offset) * k * COMPSIZE,
- c + (m + offset) * ldc * COMPSIZE, ldc);
+ c + (m + offset) * ldc * COMPSIZE, ldc);
#endif
n = m + offset;
@@ -128,7 +128,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r,
#ifdef COMPLEX
alpha_i,
#endif
- a, b, c, ldc);
+ a, b, c, ldc);
#endif
a -= offset * k * COMPSIZE;
c -= offset * COMPSIZE;
@@ -147,7 +147,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r,
#endif
a + (n - offset) * k * COMPSIZE,
b,
- c + (n - offset) * COMPSIZE, ldc);
+ c + (n - offset) * COMPSIZE, ldc);
#endif
m = n + offset;
@@ -167,21 +167,21 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r,
#ifdef COMPLEX
alpha_i,
#endif
- a, b + loop * k * COMPSIZE, c + loop * ldc * COMPSIZE, ldc);
+ a, b + loop * k * COMPSIZE, c + loop * ldc * COMPSIZE, ldc);
#endif
- GEMM_BETA(nn, nn, 0, ZERO,
+ GEMM_BETA(nn, nn, 0, ZERO,
#ifdef COMPLEX
ZERO,
#endif
NULL, 0, NULL, 0, subbuffer, nn);
-
+
GEMM_KERNEL(nn, nn, k,
alpha_r,
#ifdef COMPLEX
alpha_i,
#endif
- a + loop * k * COMPSIZE, b + loop * k * COMPSIZE, subbuffer, nn);
+ a + loop * k * COMPSIZE, b + loop * k * COMPSIZE, subbuffer, nn);
cc = c + (loop + loop * ldc) * COMPSIZE;
ss = subbuffer;
@@ -220,8 +220,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r,
#ifdef COMPLEX
alpha_i,
#endif
- a + (mm + nn) * k * COMPSIZE, b + loop * k * COMPSIZE,
- c + (mm + nn + loop * ldc) * COMPSIZE, ldc);
+ a + (mm + nn) * k * COMPSIZE, b + loop * k * COMPSIZE,
+ c + (mm + nn + loop * ldc) * COMPSIZE, ldc);
#endif
}
diff --git a/driver/level3/syrk_thread.c b/driver/level3/syrk_thread.c
index 837670b9f..0d9bdf209 100644
--- a/driver/level3/syrk_thread.c
+++ b/driver/level3/syrk_thread.c
@@ -52,7 +52,7 @@ int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (
int num_cpu;
int mask = 0;
-
+
if (!(mode & BLAS_COMPLEX)) {
switch (mode & BLAS_PREC) {
@@ -83,7 +83,7 @@ int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (
#endif
}
}
-
+
n_from = 0;
n_to = arg -> n;
@@ -96,29 +96,29 @@ int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (
nf = (double)(n_from);
nt = (double)(n_to);
-
+
dnum = (nt * nt - nf * nf) / (double)nthreads;
-
+
num_cpu = 0;
-
+
range[0] = n_from;
i = n_from;
-
+
while (i < n_to){
-
+
if (nthreads - num_cpu > 1) {
-
+
di = (double)i;
width = ((BLASLONG)( sqrt(di * di + dnum) - di) + mask) & ~mask;
-
+
if ((width <= 0) || (width > n_to - i)) width = n_to - i;
-
+
} else {
width = n_to - i;
}
-
+
range[num_cpu + 1] = range[num_cpu] + width;
-
+
queue[num_cpu].mode = mode;
queue[num_cpu].routine = function;
queue[num_cpu].args = arg;
@@ -127,38 +127,38 @@ int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (
queue[num_cpu].sa = NULL;
queue[num_cpu].sb = NULL;
queue[num_cpu].next = &queue[num_cpu + 1];
-
+
num_cpu ++;
i += width;
}
-
+
} else {
nf = (double)(arg -> n - n_from);
nt = (double)(arg -> n - n_to);
dnum = (nt * nt - nf * nf) / (double)nthreads;
-
+
num_cpu = 0;
-
+
range[0] = n_from;
i = n_from;
-
+
while (i < n_to){
-
+
if (nthreads - num_cpu > 1) {
-
+
di = (double)(arg -> n - i);
width = ((BLASLONG)(-sqrt(di * di + dnum) + di) + mask) & ~mask;
-
+
if ((width <= 0) || (width > n_to - i)) width = n_to - i;
-
+
} else {
width = n_to - i;
}
-
+
range[num_cpu + 1] = range[num_cpu] + width;
-
+
queue[num_cpu].mode = mode;
queue[num_cpu].routine = function;
queue[num_cpu].args = arg;
@@ -167,7 +167,7 @@ int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (
queue[num_cpu].sa = NULL;
queue[num_cpu].sb = NULL;
queue[num_cpu].next = &queue[num_cpu + 1];
-
+
num_cpu ++;
i += width;
}
@@ -178,9 +178,9 @@ int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (
queue[0].sa = sa;
queue[0].sb = sb;
queue[num_cpu - 1].next = NULL;
-
+
exec_blas(num_cpu, queue);
}
-
+
return 0;
}
diff --git a/driver/level3/trmm_L.c b/driver/level3/trmm_L.c
index 9e46df05c..c0a822b51 100644
--- a/driver/level3/trmm_L.c
+++ b/driver/level3/trmm_L.c
@@ -122,7 +122,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
if (min_l > GEMM_Q) min_l = GEMM_Q;
min_i = min_l;
if (min_i > GEMM_P) min_i = GEMM_P;
-
+
START_RPCC();
#ifndef TRANSA
@@ -130,7 +130,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
#else
TRMM_ILNCOPY(min_l, min_i, a, lda, 0, 0, sa);
#endif
-
+
STOP_RPCC(innercost);
for(jjs = js; jjs < js + min_j; jjs += min_jj){
@@ -140,16 +140,16 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
START_RPCC();
GEMM_ONCOPY(min_l, min_jj, b + (jjs * ldb) * COMPSIZE, ldb, sb + min_l * (jjs - js) * COMPSIZE);
-
+
STOP_RPCC(outercost);
-
+
START_RPCC();
-
+
TRMM_KERNEL_N(min_i, min_jj, min_l, dp1,
#ifdef COMPLEX
ZERO,
#endif
- sa, sb + min_l * (jjs - js) * COMPSIZE, b + (jjs * ldb) * COMPSIZE, ldb, 0);
+ sa, sb + min_l * (jjs - js) * COMPSIZE, b + (jjs * ldb) * COMPSIZE, ldb, 0);
STOP_RPCC(trmmcost);
}
@@ -158,7 +158,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
for(is = min_i; is < min_l; is += GEMM_P){
min_i = min_l - is;
if (min_i > GEMM_P) min_i = GEMM_P;
-
+
START_RPCC();
#ifndef TRANSA
@@ -166,16 +166,16 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
#else
TRMM_ILNCOPY(min_l, min_i, a, lda, 0, is, sa);
#endif
-
+
STOP_RPCC(innercost);
-
+
START_RPCC();
-
+
TRMM_KERNEL_N(min_i, min_j, min_l, dp1,
#ifdef COMPLEX
ZERO,
#endif
- sa, sb, b + (is + js * ldb) * COMPSIZE, ldb, is);
+ sa, sb, b + (is + js * ldb) * COMPSIZE, ldb, is);
STOP_RPCC(trmmcost);
@@ -186,7 +186,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
if (min_l > GEMM_Q) min_l = GEMM_Q;
min_i = ls;
if (min_i > GEMM_P) min_i = GEMM_P;
-
+
START_RPCC();
#ifndef TRANSA
@@ -200,21 +200,21 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
for(jjs = js; jjs < js + min_j; jjs += min_jj){
min_jj = min_j + js - jjs;
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
-
+
START_RPCC();
GEMM_ONCOPY(min_l, min_jj, b + (ls + jjs * ldb) * COMPSIZE, ldb, sb + min_l * (jjs - js) * COMPSIZE);
-
+
STOP_RPCC(gemmcost);
-
+
START_RPCC();
- GEMM_KERNEL(min_i, min_jj, min_l, dp1,
+ GEMM_KERNEL(min_i, min_jj, min_l, dp1,
#ifdef COMPLEX
ZERO,
#endif
- sa, sb + min_l * (jjs - js) * COMPSIZE,
- b + (jjs * ldb) * COMPSIZE, ldb);
+ sa, sb + min_l * (jjs - js) * COMPSIZE,
+ b + (jjs * ldb) * COMPSIZE, ldb);
STOP_RPCC(gemmcost);
}
@@ -222,7 +222,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
for(is = min_i; is < ls; is += GEMM_P){
min_i = ls - is;
if (min_i > GEMM_P) min_i = GEMM_P;
-
+
START_RPCC();
#ifndef TRANSA
@@ -235,19 +235,19 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
START_RPCC();
- GEMM_KERNEL(min_i, min_j, min_l, dp1,
+ GEMM_KERNEL(min_i, min_j, min_l, dp1,
#ifdef COMPLEX
ZERO,
#endif
- sa, sb, b + (is + js * ldb) * COMPSIZE, ldb);
+ sa, sb, b + (is + js * ldb) * COMPSIZE, ldb);
STOP_RPCC(gemmcost);
}
-
+
for(is = ls; is < ls + min_l; is += GEMM_P){
min_i = ls + min_l - is;
if (min_i > GEMM_P) min_i = GEMM_P;
-
+
START_RPCC();
#ifndef TRANSA
@@ -255,7 +255,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
#else
TRMM_ILNCOPY(min_l, min_i, a, lda, ls, is, sa);
#endif
-
+
STOP_RPCC(innercost);
START_RPCC();
@@ -264,7 +264,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
#ifdef COMPLEX
ZERO,
#endif
- sa, sb, b + (is + js * ldb) * COMPSIZE, ldb, is - ls);
+ sa, sb, b + (is + js * ldb) * COMPSIZE, ldb, is - ls);
STOP_RPCC(trmmcost);
}
@@ -275,7 +275,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
if (min_l > GEMM_Q) min_l = GEMM_Q;
min_i = min_l;
if (min_i > GEMM_P) min_i = GEMM_P;
-
+
START_RPCC();
#ifndef TRANSA
@@ -283,20 +283,20 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
#else
TRMM_IUNCOPY(min_l, min_i, a, lda, m - min_l, m - min_l, sa);
#endif
-
+
STOP_RPCC(innercost);
-
+
for(jjs = js; jjs < js + min_j; jjs += min_jj){
min_jj = min_j + js - jjs;
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
-
+
START_RPCC();
GEMM_ONCOPY(min_l, min_jj, b + (m - min_l + jjs * ldb) * COMPSIZE, ldb,
sb + min_l * (jjs - js) * COMPSIZE);
-
+
STOP_RPCC(outercost);
-
+
START_RPCC();
TRMM_KERNEL_T(min_i, min_jj, min_l, dp1,
@@ -304,7 +304,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
ZERO,
#endif
sa, sb + min_l * (jjs - js) * COMPSIZE,
- b + (m - min_l + jjs * ldb) * COMPSIZE, ldb, 0);
+ b + (m - min_l + jjs * ldb) * COMPSIZE, ldb, 0);
STOP_RPCC(trmmcost);
}
@@ -312,7 +312,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
for(is = m - min_l + min_i; is < m; is += GEMM_P){
min_i = m - is;
if (min_i > GEMM_P) min_i = GEMM_P;
-
+
START_RPCC();
#ifndef TRANSA
@@ -320,16 +320,16 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
#else
TRMM_IUNCOPY(min_l, min_i, a, lda, m - min_l, is, sa);
#endif
-
+
STOP_RPCC(innercost);
-
+
START_RPCC();
TRMM_KERNEL_T(min_i, min_j, min_l, dp1,
#ifdef COMPLEX
ZERO,
#endif
- sa, sb, b + (is + js * ldb) * COMPSIZE, ldb, is - m + min_l);
+ sa, sb, b + (is + js * ldb) * COMPSIZE, ldb, is - m + min_l);
STOP_RPCC(trmmcost);
}
@@ -339,7 +339,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
if (min_l > GEMM_Q) min_l = GEMM_Q;
min_i = min_l;
if (min_i > GEMM_P) min_i = GEMM_P;
-
+
START_RPCC();
#ifndef TRANSA
@@ -347,18 +347,18 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
#else
TRMM_IUNCOPY(min_l, min_i, a, lda, ls - min_l, ls - min_l, sa);
#endif
-
+
STOP_RPCC(innercost);
for(jjs = js; jjs < js + min_j; jjs += min_jj){
min_jj = min_j + js - jjs;
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
-
+
START_RPCC();
GEMM_ONCOPY(min_l, min_jj, b + (ls - min_l + jjs * ldb) * COMPSIZE, ldb,
sb + min_l * (jjs - js) * COMPSIZE);
-
+
STOP_RPCC(outercost);
START_RPCC();
@@ -368,7 +368,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
ZERO,
#endif
sa, sb + min_l * (jjs - js) * COMPSIZE,
- b + (ls - min_l + jjs * ldb) * COMPSIZE, ldb, 0);
+ b + (ls - min_l + jjs * ldb) * COMPSIZE, ldb, 0);
STOP_RPCC(trmmcost);
}
@@ -376,7 +376,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
for(is = ls - min_l + min_i; is < ls; is += GEMM_P){
min_i = ls - is;
if (min_i > GEMM_P) min_i = GEMM_P;
-
+
START_RPCC();
#ifndef TRANSA
@@ -384,7 +384,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
#else
TRMM_IUNCOPY(min_l, min_i, a, lda, ls - min_l, is, sa);
#endif
-
+
STOP_RPCC(innercost);
START_RPCC();
@@ -393,7 +393,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
#ifdef COMPLEX
ZERO,
#endif
- sa, sb, b + (is + js * ldb) * COMPSIZE, ldb, is - ls + min_l);
+ sa, sb, b + (is + js * ldb) * COMPSIZE, ldb, is - ls + min_l);
STOP_RPCC(trmmcost);
}
@@ -402,7 +402,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
for(is = ls; is < m; is += GEMM_P){
min_i = m - is;
if (min_i > GEMM_P) min_i = GEMM_P;
-
+
START_RPCC();
#ifndef TRANSA
@@ -415,11 +415,11 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
START_RPCC();
- GEMM_KERNEL(min_i, min_j, min_l, dp1,
+ GEMM_KERNEL(min_i, min_j, min_l, dp1,
#ifdef COMPLEX
ZERO,
#endif
- sa, sb, b + (is + js * ldb) * COMPSIZE, ldb);
+ sa, sb, b + (is + js * ldb) * COMPSIZE, ldb);
STOP_RPCC(gemmcost);
}
diff --git a/driver/level3/trmm_R.c b/driver/level3/trmm_R.c
index e46553c3f..6012386c8 100644
--- a/driver/level3/trmm_R.c
+++ b/driver/level3/trmm_R.c
@@ -114,9 +114,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
if (min_l > GEMM_Q) min_l = GEMM_Q;
min_i = m;
if (min_i > GEMM_P) min_i = GEMM_P;
-
+
GEMM_ITCOPY(min_l, min_i, b + (ls * ldb) * COMPSIZE, ldb, sa);
-
+
for(jjs = 0; jjs < ls - js; jjs += min_jj){
min_jj = ls - js - jjs;
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
@@ -126,54 +126,54 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
#else
GEMM_OTCOPY(min_l, min_jj, a + ((js + jjs) + ls * lda) * COMPSIZE, lda, sb + min_l * jjs * COMPSIZE);
#endif
-
- GEMM_KERNEL(min_i, min_jj, min_l, dp1,
+
+ GEMM_KERNEL(min_i, min_jj, min_l, dp1,
#ifdef COMPLEX
ZERO,
#endif
sa, sb + min_l * jjs * COMPSIZE,
- b + ((js + jjs) * ldb) * COMPSIZE, ldb);
+ b + ((js + jjs) * ldb) * COMPSIZE, ldb);
}
for(jjs = 0; jjs < min_l; jjs += min_jj){
min_jj = min_l - jjs;
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
-
+
#ifndef TRANSA
TRMM_OLNCOPY(min_l, min_jj, a, lda, ls, ls + jjs, sb + min_l * (ls - js + jjs) * COMPSIZE);
#else
TRMM_OUTCOPY(min_l, min_jj, a, lda, ls, ls + jjs, sb + min_l * (ls - js + jjs) * COMPSIZE);
#endif
-
+
TRMM_KERNEL_T(min_i, min_jj, min_l, dp1,
#ifdef COMPLEX
ZERO,
#endif
sa,
sb + (ls - js + jjs) * min_l * COMPSIZE,
- b + ((ls + jjs) * ldb) * COMPSIZE, ldb, -jjs);
+ b + ((ls + jjs) * ldb) * COMPSIZE, ldb, -jjs);
}
for(is = min_i; is < m; is += GEMM_P){
min_i = m - is;
if (min_i > GEMM_P) min_i = GEMM_P;
-
+
GEMM_ITCOPY(min_l, min_i, b + (is + ls * ldb) * COMPSIZE, ldb, sa);
-
- GEMM_KERNEL(min_i, ls - js, min_l, dp1,
+
+ GEMM_KERNEL(min_i, ls - js, min_l, dp1,
#ifdef COMPLEX
ZERO,
#endif
sa, sb,
- b + (is + js * ldb) * COMPSIZE, ldb);
-
+ b + (is + js * ldb) * COMPSIZE, ldb);
+
TRMM_KERNEL_T(min_i, min_l, min_l, dp1,
#ifdef COMPLEX
ZERO,
#endif
sa,
sb + (ls - js) * min_l * COMPSIZE,
- b + (is + ls * ldb) * COMPSIZE, ldb, 0);
+ b + (is + ls * ldb) * COMPSIZE, ldb, 0);
}
}
@@ -183,9 +183,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
if (min_l > GEMM_Q) min_l = GEMM_Q;
min_i = m;
if (min_i > GEMM_P) min_i = GEMM_P;
-
+
GEMM_ITCOPY(min_l, min_i, b + (ls * ldb) * COMPSIZE, ldb, sa);
-
+
for(jjs = js; jjs < js + min_j; jjs += min_jj){
min_jj = min_j + js - jjs;
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
@@ -195,26 +195,26 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
#else
GEMM_OTCOPY(min_l, min_jj, a + (jjs + ls * lda) * COMPSIZE, lda, sb + min_l * (jjs - js) * COMPSIZE);
#endif
-
- GEMM_KERNEL(min_i, min_jj, min_l, dp1,
+
+ GEMM_KERNEL(min_i, min_jj, min_l, dp1,
#ifdef COMPLEX
ZERO,
#endif
- sa, sb + min_l * (jjs - js) * COMPSIZE,
- b + (jjs * ldb) * COMPSIZE, ldb);
+ sa, sb + min_l * (jjs - js) * COMPSIZE,
+ b + (jjs * ldb) * COMPSIZE, ldb);
}
-
+
for(is = min_i; is < m; is += GEMM_P){
min_i = m - is;
if (min_i > GEMM_P) min_i = GEMM_P;
-
+
GEMM_ITCOPY(min_l, min_i, b + (is + ls * ldb) * COMPSIZE, ldb, sa);
-
- GEMM_KERNEL(min_i, min_j, min_l, dp1,
+
+ GEMM_KERNEL(min_i, min_j, min_l, dp1,
#ifdef COMPLEX
ZERO,
#endif
- sa, sb, b + (is + js * ldb) * COMPSIZE, ldb);
+ sa, sb, b + (is + js * ldb) * COMPSIZE, ldb);
}
}
}
@@ -225,7 +225,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
for(js = n; js > 0; js -= GEMM_R){
min_j = js;
if (min_j > GEMM_R) min_j = GEMM_R;
-
+
start_ls = js - min_j;
while (start_ls + GEMM_Q < js) start_ls += GEMM_Q;
@@ -234,7 +234,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
if (min_l > GEMM_Q) min_l = GEMM_Q;
min_i = m;
if (min_i > GEMM_P) min_i = GEMM_P;
-
+
GEMM_ITCOPY(min_l, min_i, b + (ls * ldb) * COMPSIZE, ldb, sa);
for(jjs = 0; jjs < min_l; jjs += min_jj){
@@ -246,20 +246,20 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
#else
TRMM_OLTCOPY(min_l, min_jj, a, lda, ls, ls + jjs, sb + min_l * jjs * COMPSIZE);
#endif
-
+
TRMM_KERNEL_N(min_i, min_jj, min_l, dp1,
#ifdef COMPLEX
ZERO,
#endif
sa,
sb + min_l * jjs * COMPSIZE,
- b + ((ls + jjs) * ldb) * COMPSIZE, ldb, -jjs);
+ b + ((ls + jjs) * ldb) * COMPSIZE, ldb, -jjs);
}
-
+
for(jjs = 0; jjs < js - ls - min_l; jjs += min_jj){
min_jj = js - ls - min_l - jjs;
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
-
+
#ifndef TRANSA
GEMM_ONCOPY(min_l, min_jj, a + (ls + (ls + min_l + jjs) * lda) * COMPSIZE, lda,
sb + min_l * (min_l + jjs) * COMPSIZE);
@@ -267,20 +267,20 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
GEMM_OTCOPY(min_l, min_jj, a + ((ls + min_l + jjs) + ls * lda) * COMPSIZE, lda,
sb + min_l * (min_l + jjs) * COMPSIZE);
#endif
-
- GEMM_KERNEL(min_i, min_jj, min_l, dp1,
+
+ GEMM_KERNEL(min_i, min_jj, min_l, dp1,
#ifdef COMPLEX
ZERO,
#endif
sa,
sb + min_l * (min_l + jjs) * COMPSIZE,
- b + ((ls + min_l + jjs) * ldb) * COMPSIZE, ldb);
+ b + ((ls + min_l + jjs) * ldb) * COMPSIZE, ldb);
}
-
+
for(is = min_i; is < m; is += GEMM_P){
min_i = m - is;
if (min_i > GEMM_P) min_i = GEMM_P;
-
+
GEMM_ITCOPY(min_l, min_i, b + (is + ls * ldb) * COMPSIZE, ldb, sa);
TRMM_KERNEL_N(min_i, min_l, min_l, dp1,
@@ -289,16 +289,16 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
#endif
sa,
sb,
- b + (is + ls * ldb) * COMPSIZE, ldb, 0);
+ b + (is + ls * ldb) * COMPSIZE, ldb, 0);
if (js - ls - min_l > 0) {
- GEMM_KERNEL(min_i, js - ls - min_l, min_l, dp1,
+ GEMM_KERNEL(min_i, js - ls - min_l, min_l, dp1,
#ifdef COMPLEX
ZERO,
#endif
sa,
sb + min_l * min_l * COMPSIZE,
- b + (is + (ls + min_l) * ldb) * COMPSIZE, ldb);
+ b + (is + (ls + min_l) * ldb) * COMPSIZE, ldb);
}
}
}
@@ -308,38 +308,38 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
if (min_l > GEMM_Q) min_l = GEMM_Q;
min_i = m;
if (min_i > GEMM_P) min_i = GEMM_P;
-
+
GEMM_ITCOPY(min_l, min_i, b + (ls * ldb) * COMPSIZE, ldb, sa);
-
+
for(jjs = js; jjs < js + min_j; jjs += min_jj){
min_jj = min_j + js - jjs;
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
-
+
#ifndef TRANSA
GEMM_ONCOPY(min_l, min_jj, a + (ls + (jjs - min_j) * lda) * COMPSIZE, lda, sb + min_l * (jjs - js) * COMPSIZE);
#else
GEMM_OTCOPY(min_l, min_jj, a + ((jjs - min_j) + ls * lda) * COMPSIZE, lda, sb + min_l * (jjs - js) * COMPSIZE);
#endif
-
- GEMM_KERNEL(min_i, min_jj, min_l, dp1,
+
+ GEMM_KERNEL(min_i, min_jj, min_l, dp1,
#ifdef COMPLEX
ZERO,
#endif
- sa, sb + min_l * (jjs - js) * COMPSIZE,
- b + ((jjs - min_j) * ldb) * COMPSIZE, ldb);
+ sa, sb + min_l * (jjs - js) * COMPSIZE,
+ b + ((jjs - min_j) * ldb) * COMPSIZE, ldb);
}
-
+
for(is = min_i; is < m; is += GEMM_P){
min_i = m - is;
if (min_i > GEMM_P) min_i = GEMM_P;
-
+
GEMM_ITCOPY(min_l, min_i, b + (is + ls * ldb) * COMPSIZE, ldb, sa);
-
- GEMM_KERNEL(min_i, min_j, min_l, dp1,
+
+ GEMM_KERNEL(min_i, min_j, min_l, dp1,
#ifdef COMPLEX
ZERO,
#endif
- sa, sb, b + (is + (js - min_j) * ldb) * COMPSIZE, ldb);
+ sa, sb, b + (is + (js - min_j) * ldb) * COMPSIZE, ldb);
}
}
}
diff --git a/driver/level3/trsm_L.c b/driver/level3/trsm_L.c
index 2c3006f09..fa3b0d580 100644
--- a/driver/level3/trsm_L.c
+++ b/driver/level3/trsm_L.c
@@ -112,20 +112,20 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
for(js = 0; js < n; js += GEMM_R){
min_j = n - js;
if (min_j > GEMM_R) min_j = GEMM_R;
-
+
#if (!defined(UPPER) && !defined(TRANSA)) || (defined(UPPER) && defined(TRANSA))
for(ls = 0; ls < m; ls += GEMM_Q){
min_l = m - ls;
if (min_l > GEMM_Q) min_l = GEMM_Q;
min_i = min_l;
if (min_i > GEMM_P) min_i = GEMM_P;
-
+
#ifndef TRANSA
TRSM_ILTCOPY(min_l, min_i, a + (ls + ls * lda) * COMPSIZE, lda, 0, sa);
#else
TRSM_IUNCOPY(min_l, min_i, a + (ls + ls * lda) * COMPSIZE, lda, 0, sa);
#endif
-
+
for(jjs = js; jjs < js + min_j; jjs += min_jj){
min_jj = min_j + js - jjs;
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
@@ -136,43 +136,43 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
#ifdef COMPLEX
ZERO,
#endif
- sa, sb + min_l * (jjs - js) * COMPSIZE,
+ sa, sb + min_l * (jjs - js) * COMPSIZE,
b + (ls + jjs * ldb) * COMPSIZE, ldb, 0);
}
for(is = ls + min_i; is < ls + min_l; is += GEMM_P){
min_i = ls + min_l - is;
if (min_i > GEMM_P) min_i = GEMM_P;
-
+
#ifndef TRANSA
TRSM_ILTCOPY(min_l, min_i, a + (is + ls * lda) * COMPSIZE, lda, is - ls, sa);
#else
TRSM_IUNCOPY(min_l, min_i, a + (ls + is * lda) * COMPSIZE, lda, is - ls, sa);
#endif
-
+
TRSM_KERNEL(min_i, min_j, min_l, dm1,
#ifdef COMPLEX
ZERO,
#endif
sa, sb, b + (is + js * ldb) * COMPSIZE, ldb, is - ls);
}
-
+
for(is = ls + min_l; is < m; is += GEMM_P){
min_i = m - is;
if (min_i > GEMM_P) min_i = GEMM_P;
-
+
#ifndef TRANSA
GEMM_ITCOPY(min_l, min_i, a + (is + ls * lda) * COMPSIZE, lda, sa);
#else
GEMM_INCOPY(min_l, min_i, a + (ls + is * lda) * COMPSIZE, lda, sa);
#endif
-
- GEMM_KERNEL(min_i, min_j, min_l, dm1,
+
+ GEMM_KERNEL(min_i, min_j, min_l, dm1,
#ifdef COMPLEX
ZERO,
#endif
- sa, sb, b + (is + js * ldb) * COMPSIZE, ldb);
+ sa, sb, b + (is + js * ldb) * COMPSIZE, ldb);
}
}
#else
@@ -197,19 +197,19 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
GEMM_ONCOPY(min_l, min_jj, b + (ls - min_l + jjs * ldb) * COMPSIZE, ldb, sb + min_l * (jjs - js) * COMPSIZE);
-
+
TRSM_KERNEL(min_i, min_jj, min_l, dm1,
#ifdef COMPLEX
ZERO,
#endif
- sa, sb + min_l * (jjs - js) * COMPSIZE,
+ sa, sb + min_l * (jjs - js) * COMPSIZE,
b + (start_is + jjs * ldb) * COMPSIZE, ldb, start_is - ls + min_l);
}
-
+
for(is = start_is - GEMM_P; is >= ls - min_l; is -= GEMM_P){
min_i = ls - is;
if (min_i > GEMM_P) min_i = GEMM_P;
-
+
#ifndef TRANSA
TRSM_IUTCOPY(min_l, min_i, a + (is + (ls - min_l) * lda) * COMPSIZE, lda, is - (ls - min_l), sa);
#else
@@ -219,26 +219,26 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
#ifdef COMPLEX
ZERO,
#endif
- sa, sb,
+ sa, sb,
b + (is + js * ldb) * COMPSIZE, ldb, + is - (ls - min_l) );
}
-
+
for(is = 0; is < ls - min_l; is += GEMM_P){
min_i = ls - min_l - is;
if (min_i > GEMM_P) min_i = GEMM_P;
-
+
#ifndef TRANSA
GEMM_ITCOPY(min_l, min_i, a + (is + (ls - min_l) * lda) * COMPSIZE, lda, sa);
#else
GEMM_INCOPY(min_l, min_i, a + ((ls - min_l) + is * lda) * COMPSIZE, lda, sa);
#endif
- GEMM_KERNEL(min_i, min_j, min_l, dm1,
+ GEMM_KERNEL(min_i, min_j, min_l, dm1,
#ifdef COMPLEX
ZERO,
#endif
- sa, sb, b + (is + js * ldb) * COMPSIZE, ldb);
+ sa, sb, b + (is + js * ldb) * COMPSIZE, ldb);
}
}
diff --git a/driver/level3/trsm_R.c b/driver/level3/trsm_R.c
index 0964d7860..b6ee95654 100644
--- a/driver/level3/trsm_R.c
+++ b/driver/level3/trsm_R.c
@@ -112,15 +112,15 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
for(js = 0; js < n; js += GEMM_R){
min_j = n - js;
if (min_j > GEMM_R) min_j = GEMM_R;
-
+
for(ls = 0; ls < js; ls += GEMM_Q){
min_l = js - ls;
if (min_l > GEMM_Q) min_l = GEMM_Q;
min_i = m;
if (min_i > GEMM_P) min_i = GEMM_P;
-
+
GEMM_ITCOPY(min_l, min_i, b + (ls * ldb) * COMPSIZE, ldb, sa);
-
+
for(jjs = js; jjs < js + min_j; jjs += min_jj){
min_jj = min_j + js - jjs;
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
@@ -131,25 +131,25 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
GEMM_OTCOPY(min_l, min_jj, a + (jjs + ls * lda) * COMPSIZE, lda, sb + min_l * (jjs - js) * COMPSIZE);
#endif
- GEMM_KERNEL(min_i, min_jj, min_l, dm1,
+ GEMM_KERNEL(min_i, min_jj, min_l, dm1,
#ifdef COMPLEX
ZERO,
#endif
sa, sb + min_l * (jjs - js) * COMPSIZE,
- b + (jjs * ldb) * COMPSIZE, ldb);
+ b + (jjs * ldb) * COMPSIZE, ldb);
}
for(is = min_i; is < m; is += GEMM_P){
min_i = m - is;
if (min_i > GEMM_P) min_i = GEMM_P;
-
+
GEMM_ITCOPY(min_l, min_i, b + (is + ls * ldb) * COMPSIZE, ldb, sa);
-
- GEMM_KERNEL(min_i, min_j, min_l, dm1,
+
+ GEMM_KERNEL(min_i, min_j, min_l, dm1,
#ifdef COMPLEX
ZERO,
#endif
- sa, sb, b + (is + js * ldb) * COMPSIZE, ldb);
+ sa, sb, b + (is + js * ldb) * COMPSIZE, ldb);
}
}
@@ -160,25 +160,25 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
if (min_i > GEMM_P) min_i = GEMM_P;
GEMM_ITCOPY(min_l, min_i, b + (ls * ldb) * COMPSIZE, ldb, sa);
-
+
#ifndef TRANSA
TRSM_OUNCOPY(min_l, min_l, a + (ls + ls * lda) * COMPSIZE, lda, 0, sb);
#else
TRSM_OLTCOPY(min_l, min_l, a + (ls + ls * lda) * COMPSIZE, lda, 0, sb);
#endif
-
+
TRSM_KERNEL(min_i, min_l, min_l, dm1,
#ifdef COMPLEX
ZERO,
#endif
sa,
- sb,
+ sb,
b + (ls * ldb) * COMPSIZE, ldb, 0);
-
+
for(jjs = 0; jjs < min_j - min_l - ls + js; jjs += min_jj){
min_jj = min_j - min_l - ls + js - jjs;
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
-
+
#ifndef TRANSA
GEMM_ONCOPY (min_l, min_jj, a + (ls + (ls + min_l + jjs) * lda) * COMPSIZE, lda,
sb + min_l * (min_l + jjs) * COMPSIZE);
@@ -187,36 +187,36 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
sb + min_l * (min_l + jjs) * COMPSIZE);
#endif
- GEMM_KERNEL(min_i, min_jj, min_l, dm1,
+ GEMM_KERNEL(min_i, min_jj, min_l, dm1,
#ifdef COMPLEX
ZERO,
#endif
- sa,
+ sa,
sb + min_l * (min_l + jjs) * COMPSIZE,
- b + (min_l + ls + jjs) * ldb * COMPSIZE, ldb);
+ b + (min_l + ls + jjs) * ldb * COMPSIZE, ldb);
}
for(is = min_i; is < m; is += GEMM_P){
min_i = m - is;
if (min_i > GEMM_P) min_i = GEMM_P;
-
+
GEMM_ITCOPY(min_l, min_i, b + (is + ls * ldb) * COMPSIZE, ldb, sa);
-
+
TRSM_KERNEL(min_i, min_l, min_l, dm1,
#ifdef COMPLEX
ZERO,
#endif
sa,
- sb,
+ sb,
b + (is + ls * ldb) * COMPSIZE, ldb, 0);
-
- GEMM_KERNEL(min_i, min_j - min_l + js - ls, min_l, dm1,
+
+ GEMM_KERNEL(min_i, min_j - min_l + js - ls, min_l, dm1,
#ifdef COMPLEX
ZERO,
#endif
- sa,
+ sa,
sb + min_l * min_l * COMPSIZE,
- b + (is + ( min_l + ls) * ldb) * COMPSIZE, ldb);
+ b + (is + ( min_l + ls) * ldb) * COMPSIZE, ldb);
}
}
}
@@ -235,48 +235,48 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
if (min_i > GEMM_P) min_i = GEMM_P;
GEMM_ITCOPY(min_l, min_i, b + (ls * ldb) * COMPSIZE, ldb, sa);
-
+
for(jjs = js; jjs < js + min_j; jjs += min_jj){
min_jj = min_j + js - jjs;
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
-
+
#ifndef TRANSA
GEMM_ONCOPY(min_l, min_jj, a + (ls + (jjs - min_j) * lda) * COMPSIZE, lda, sb + min_l * (jjs - js) * COMPSIZE);
#else
GEMM_OTCOPY(min_l, min_jj, a + ((jjs - min_j) + ls * lda) * COMPSIZE, lda, sb + min_l * (jjs - js) * COMPSIZE);
#endif
- GEMM_KERNEL(min_i, min_jj, min_l, dm1,
+ GEMM_KERNEL(min_i, min_jj, min_l, dm1,
#ifdef COMPLEX
ZERO,
#endif
sa, sb + min_l * (jjs - js) * COMPSIZE,
- b + (jjs - min_j) * ldb * COMPSIZE, ldb);
+ b + (jjs - min_j) * ldb * COMPSIZE, ldb);
}
for(is = min_i; is < m; is += GEMM_P){
min_i = m - is;
if (min_i > GEMM_P) min_i = GEMM_P;
-
+
GEMM_ITCOPY(min_l, min_i, b + (is + ls * ldb) * COMPSIZE, ldb, sa);
-
- GEMM_KERNEL(min_i, min_j, min_l, dm1,
+
+ GEMM_KERNEL(min_i, min_j, min_l, dm1,
#ifdef COMPLEX
ZERO,
#endif
- sa, sb, b + (is + (js - min_j) * ldb) * COMPSIZE, ldb);
+ sa, sb, b + (is + (js - min_j) * ldb) * COMPSIZE, ldb);
}
}
start_ls = js - min_j;
while (start_ls + GEMM_Q < js) start_ls += GEMM_Q;
-
+
for(ls = start_ls; ls >= js - min_j; ls -= GEMM_Q){
min_l = js - ls;
if (min_l > GEMM_Q) min_l = GEMM_Q;
min_i = m;
if (min_i > GEMM_P) min_i = GEMM_P;
-
+
GEMM_ITCOPY(min_l, min_i, b + (ls * ldb) * COMPSIZE, ldb, sa);
#ifndef TRANSA
@@ -286,63 +286,63 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
TRSM_OUTCOPY(min_l, min_l, a + (ls + ls * lda) * COMPSIZE, lda,
0, sb + min_l * (min_j - js + ls) * COMPSIZE);
#endif
-
+
TRSM_KERNEL(min_i, min_l, min_l, dm1,
#ifdef COMPLEX
ZERO,
#endif
sa,
- sb + min_l * (min_j - js + ls) * COMPSIZE,
+ sb + min_l * (min_j - js + ls) * COMPSIZE,
b + (ls * ldb) * COMPSIZE, ldb, 0);
-
+
for(jjs = 0; jjs < min_j - js + ls; jjs += min_jj){
min_jj = min_j - js + ls - jjs;
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
-
+
#ifndef TRANSA
GEMM_ONCOPY (min_l, min_jj, a + (ls + (js - min_j + jjs) * lda) * COMPSIZE, lda,
sb + min_l * jjs * COMPSIZE);
#else
- GEMM_OTCOPY (min_l, min_jj, a + ((js - min_j + jjs) + ls * lda) * COMPSIZE, lda,
+ GEMM_OTCOPY (min_l, min_jj, a + ((js - min_j + jjs) + ls * lda) * COMPSIZE, lda,
sb + min_l * jjs * COMPSIZE);
#endif
-
- GEMM_KERNEL(min_i, min_jj, min_l, dm1,
+
+ GEMM_KERNEL(min_i, min_jj, min_l, dm1,
#ifdef COMPLEX
ZERO,
#endif
- sa,
+ sa,
sb + min_l * jjs * COMPSIZE,
- b + (js - min_j + jjs) * ldb * COMPSIZE, ldb);
+ b + (js - min_j + jjs) * ldb * COMPSIZE, ldb);
}
for(is = min_i; is < m; is += GEMM_P){
min_i = m - is;
if (min_i > GEMM_P) min_i = GEMM_P;
-
+
GEMM_ITCOPY(min_l, min_i, b + (is + ls * ldb) * COMPSIZE, ldb, sa);
-
+
TRSM_KERNEL(min_i, min_l, min_l, dm1,
#ifdef COMPLEX
ZERO,
#endif
sa,
- sb + min_l * (min_j - js + ls) * COMPSIZE,
+ sb + min_l * (min_j - js + ls) * COMPSIZE,
b + (is + ls * ldb) * COMPSIZE, ldb, 0);
-
- GEMM_KERNEL(min_i, min_j - js + ls, min_l, dm1,
+
+ GEMM_KERNEL(min_i, min_j - js + ls, min_l, dm1,
#ifdef COMPLEX
ZERO,
#endif
- sa,
+ sa,
sb,
- b + (is + (js - min_j) * ldb) * COMPSIZE, ldb);
+ b + (is + (js - min_j) * ldb) * COMPSIZE, ldb);
}
}
}
-
+
#endif
-
+
return 0;
}
diff --git a/driver/level3/zher2k_k.c b/driver/level3/zher2k_k.c
index 93bb781f1..54c76d7f5 100644
--- a/driver/level3/zher2k_k.c
+++ b/driver/level3/zher2k_k.c
@@ -130,7 +130,7 @@ static inline int syrk_beta(BLASLONG m_from, BLASLONG m_to, BLASLONG n_from, BLA
SCAL_K(MIN(i + n_from - m_from + 1, m_to) * COMPSIZE, 0, 0, alpha[0], c, 1, NULL, 0, NULL, 0);
- if (i + n_from - m_from + 1 <= m_to)
+ if (i + n_from - m_from + 1 <= m_to)
*(c + (i + n_from - m_from) * COMPSIZE + 1) = ZERO;
c += ldc * COMPSIZE;
diff --git a/driver/level3/zher2k_kernel.c b/driver/level3/zher2k_kernel.c
index 9b4c45033..92aef8880 100644
--- a/driver/level3/zher2k_kernel.c
+++ b/driver/level3/zher2k_kernel.c
@@ -61,7 +61,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
#ifdef COMPLEX
alpha_i,
#endif
- a, b, c, ldc);
+ a, b, c, ldc);
#endif
return 0;
}
@@ -73,7 +73,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
#ifdef COMPLEX
alpha_i,
#endif
- a, b, c, ldc);
+ a, b, c, ldc);
#endif
return 0;
}
@@ -86,7 +86,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
#ifdef COMPLEX
alpha_i,
#endif
- a, b, c, ldc);
+ a, b, c, ldc);
#endif
b += offset * k * COMPSIZE;
c += offset * ldc * COMPSIZE;
@@ -105,7 +105,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
#endif
a,
b + (m + offset) * k * COMPSIZE,
- c + (m + offset) * ldc * COMPSIZE, ldc);
+ c + (m + offset) * ldc * COMPSIZE, ldc);
#endif
n = m + offset;
@@ -120,7 +120,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
#ifdef COMPLEX
alpha_i,
#endif
- a, b, c, ldc);
+ a, b, c, ldc);
#endif
a -= offset * k * COMPSIZE;
c -= offset * COMPSIZE;
@@ -139,30 +139,30 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
#endif
a + (n - offset) * k * COMPSIZE,
b,
- c + (n - offset) * COMPSIZE, ldc);
+ c + (n - offset) * COMPSIZE, ldc);
#endif
m = n + offset;
if (m <= 0) return 0;
}
for (loop = 0; loop < n; loop += GEMM_UNROLL_MN) {
-
+
int mm, nn;
-
+
mm = (loop & ~(GEMM_UNROLL_MN - 1));
nn = MIN(GEMM_UNROLL_MN, n - loop);
-
+
#ifndef LOWER
GEMM_KERNEL(mm, nn, k,
alpha_r,
#ifdef COMPLEX
alpha_i,
#endif
- a, b + loop * k * COMPSIZE, c + loop * ldc * COMPSIZE, ldc);
+ a, b + loop * k * COMPSIZE, c + loop * ldc * COMPSIZE, ldc);
#endif
-
+
if (flag) {
- GEMM_BETA(nn, nn, 0, ZERO,
+ GEMM_BETA(nn, nn, 0, ZERO,
#ifdef COMPLEX
ZERO,
#endif
@@ -173,17 +173,17 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
#ifdef COMPLEX
alpha_i,
#endif
- a + loop * k * COMPSIZE, b + loop * k * COMPSIZE, subbuffer, nn);
+ a + loop * k * COMPSIZE, b + loop * k * COMPSIZE, subbuffer, nn);
#ifndef LOWER
-
+
for (j = 0; j < nn; j ++) {
for (i = 0; i <= j; i ++) {
- c[(i + loop + (j + loop) * ldc) * 2 + 0] +=
+ c[(i + loop + (j + loop) * ldc) * 2 + 0] +=
subbuffer[(i + j * nn) * 2 + 0] + subbuffer[(j + i * nn) * 2 + 0];
if (i != j) {
- c[(i + loop + (j + loop) * ldc) * 2 + 1] +=
+ c[(i + loop + (j + loop) * ldc) * 2 + 1] +=
subbuffer[(i + j * nn) * 2 + 1] - subbuffer[(j + i * nn) * 2 + 1];
} else {
c[(i + loop + (j + loop) * ldc) * 2 + 1] = ZERO;
@@ -205,15 +205,15 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
}
#endif
}
-
+
#ifdef LOWER
GEMM_KERNEL(m - mm - nn, nn, k,
alpha_r,
#ifdef COMPLEX
alpha_i,
#endif
- a + (mm + nn) * k * COMPSIZE, b + loop * k * COMPSIZE,
- c + (mm + nn + loop * ldc) * COMPSIZE, ldc);
+ a + (mm + nn) * k * COMPSIZE, b + loop * k * COMPSIZE,
+ c + (mm + nn + loop * ldc) * COMPSIZE, ldc);
#endif
}
diff --git a/driver/level3/zherk_k.c b/driver/level3/zherk_k.c
index d1ffbdb12..2203fc5c1 100644
--- a/driver/level3/zherk_k.c
+++ b/driver/level3/zherk_k.c
@@ -128,7 +128,7 @@ static inline int syrk_beta(BLASLONG m_from, BLASLONG m_to, BLASLONG n_from, BLA
SCAL_K(MIN(i + n_from - m_from + 1, m_to) * COMPSIZE, 0, 0, alpha[0], c, 1, NULL, 0, NULL, 0);
- if (i + n_from - m_from + 1 <= m_to)
+ if (i + n_from - m_from + 1 <= m_to)
*(c + (i + n_from - m_from) * COMPSIZE + 1) = ZERO;
c += ldc * COMPSIZE;
diff --git a/driver/level3/zherk_kernel.c b/driver/level3/zherk_kernel.c
index fd8ff9cf3..e4c9e27c4 100644
--- a/driver/level3/zherk_kernel.c
+++ b/driver/level3/zherk_kernel.c
@@ -59,7 +59,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r,
#ifndef LOWER
GEMM_KERNEL(m, n, k,
alpha_r, ZERO,
- a, b, c, ldc);
+ a, b, c, ldc);
#endif
return 0;
}
@@ -68,7 +68,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r,
#ifdef LOWER
GEMM_KERNEL(m, n, k,
alpha_r, ZERO,
- a, b, c, ldc);
+ a, b, c, ldc);
#endif
return 0;
}
@@ -78,7 +78,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r,
#ifdef LOWER
GEMM_KERNEL(m, offset, k,
alpha_r, ZERO,
- a, b, c, ldc);
+ a, b, c, ldc);
#endif
b += offset * k * COMPSIZE;
c += offset * ldc * COMPSIZE;
@@ -94,7 +94,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r,
alpha_r, ZERO,
a,
b + (m + offset) * k * COMPSIZE,
- c + (m + offset) * ldc * COMPSIZE, ldc);
+ c + (m + offset) * ldc * COMPSIZE, ldc);
#endif
n = m + offset;
@@ -106,7 +106,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r,
#ifndef LOWER
GEMM_KERNEL(-offset, n, k,
alpha_r, ZERO,
- a, b, c, ldc);
+ a, b, c, ldc);
#endif
a -= offset * k * COMPSIZE;
c -= offset * COMPSIZE;
@@ -122,7 +122,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r,
alpha_r, ZERO,
a + (n - offset) * k * COMPSIZE,
b,
- c + (n - offset) * COMPSIZE, ldc);
+ c + (n - offset) * COMPSIZE, ldc);
#endif
m = n + offset;
if (m <= 0) return 0;
@@ -138,7 +138,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r,
#ifndef LOWER
GEMM_KERNEL(mm, nn, k,
alpha_r, ZERO,
- a, b + loop * k * COMPSIZE, c + loop * ldc * COMPSIZE, ldc);
+ a, b + loop * k * COMPSIZE, c + loop * ldc * COMPSIZE, ldc);
#endif
GEMM_BETA(nn, nn, 0, ZERO, ZERO,
@@ -146,8 +146,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r,
GEMM_KERNEL(nn, nn, k,
alpha_r, ZERO,
- a + loop * k * COMPSIZE, b + loop * k * COMPSIZE, subbuffer, nn);
-
+ a + loop * k * COMPSIZE, b + loop * k * COMPSIZE, subbuffer, nn);
+
cc = c + (loop + loop * ldc) * COMPSIZE;
ss = subbuffer;
@@ -158,7 +158,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r,
cc[i * 2 + 0] += ss[i * 2 + 0];
cc[i * 2 + 1] += ss[i * 2 + 1];
}
-
+
cc[j * 2 + 0] += ss[i * 2 + 0];
cc[j * 2 + 1] = ZERO;
@@ -184,8 +184,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r,
#ifdef LOWER
GEMM_KERNEL(m - mm - nn, nn, k,
alpha_r, ZERO,
- a + (mm + nn) * k * COMPSIZE, b + loop * k * COMPSIZE,
- c + (mm + nn + loop * ldc) * COMPSIZE, ldc);
+ a + (mm + nn) * k * COMPSIZE, b + loop * k * COMPSIZE,
+ c + (mm + nn + loop * ldc) * COMPSIZE, ldc);
#endif
}
diff --git a/driver/level3/zsyrk_beta.c b/driver/level3/zsyrk_beta.c
index eb0972975..3787e31b5 100644
--- a/driver/level3/zsyrk_beta.c
+++ b/driver/level3/zsyrk_beta.c
@@ -42,7 +42,7 @@
int CNAME(BLASLONG dummy1, BLASLONG n, BLASLONG dummy2, FLOAT alpha_r, FLOAT alpha_i,
FLOAT *dummy3, BLASLONG dummy4, FLOAT *dummy5, BLASLONG dummy6,
- FLOAT *c, BLASLONG ldc,
+ FLOAT *c, BLASLONG ldc,
FLOAT *dummy7, FLOAT *dummy8, BLASLONG from, BLASLONG to){
BLASLONG i;
diff --git a/driver/mapper/mapper.c b/driver/mapper/mapper.c
index 83805fb1e..bbf499fce 100644
--- a/driver/mapper/mapper.c
+++ b/driver/mapper/mapper.c
@@ -92,7 +92,7 @@ static int mapper_release(struct inode *inode, struct file *fp){
#ifdef CONFIG_BIGPHYS_AREA
bigphysarea_free_pages(buffer[pos].address);
#else
-
+
for (addr = buffer[pos].address; addr < buffer[pos].address + buffer[pos].size; addr += PAGE_SIZE) {
ClearPageReserved(virt_to_page(addr));
}
@@ -121,7 +121,7 @@ int mapper_mapper(struct file *fp, struct vm_area_struct *vma){
all_length = vma->vm_end - vma->vm_start;
current_addr = vma -> vm_start;
-
+
spin_lock(&lock);
while (all_length > 0) {
@@ -133,56 +133,56 @@ int mapper_mapper(struct file *fp, struct vm_area_struct *vma){
pos = 0;
while ((pos < MAX_BUFF_SIZE) && (buffer[pos].address != 0)) pos ++;
-
+
if (pos >= MAX_BUFF_SIZE) {
-
+
printk("Memory Allocator : too much memory allocation requested.\n");
spin_unlock(&lock);
-
+
return -EIO;
}
-
+
#ifdef CONFIG_BIGPHYS_AREA
alloc_addr = (caddr_t)bigphysarea_alloc_pages(length >> PAGE_SHIFT, 1, GFP_KERNEL);
#else
alloc_addr = (caddr_t)kmalloc(length, GFP_KERNEL);
#endif
-
+
if (alloc_addr == (caddr_t)NULL) {
-
+
spin_unlock(&lock);
-
+
return -EIO;
}
-
+
#ifndef CONFIG_BIGPHYS_AREA
for (addr = alloc_addr; addr < alloc_addr + length; addr += PAGE_SIZE) {
clear_page(addr);
SetPageReserved(virt_to_page(addr));
}
#endif
-
+
if ((ret = remap_pfn_range(vma,
current_addr,
virt_to_phys((void *)alloc_addr) >> PAGE_SHIFT,
length,
PAGE_SHARED)) < 0) {
-
+
#ifdef CONFIG_BIGPHYS_AREA
bigphysarea_free_pages((caddr_t)alloc_addr);
#else
-
+
for (addr = alloc_addr; addr < alloc_addr + length; addr += PAGE_SIZE) ClearPageReserved(virt_to_page(addr));
-
+
kfree((caddr_t)alloc_addr);
#endif
-
+
spin_unlock(&lock);
-
+
return ret;
}
-
+
buffer[pos].pid = current -> tgid;
buffer[pos].address = alloc_addr;
#ifndef CONFIG_BIGPHYS_AREA
@@ -209,11 +209,11 @@ static int __init mapper_init(void){
int ret, i;
ret = alloc_chrdev_region(&mapper_dev, 0, 1, "mapper");
-
+
cdev_init(&mapper_cdev, &mapper_fops);
ret = cdev_add(&mapper_cdev, mapper_dev, 1);
-
+
spin_lock_init(&lock);
for (i = 0; i < MAX_BUFF_SIZE; i++) {
@@ -240,7 +240,7 @@ static void __exit mapper_exit(void){
#endif
}
}
-
+
cdev_del(&mapper_cdev);
unregister_chrdev_region(mapper_dev, 1);
diff --git a/driver/others/Makefile b/driver/others/Makefile
index ca05c5129..fc73871cc 100644
--- a/driver/others/Makefile
+++ b/driver/others/Makefile
@@ -1,14 +1,14 @@
TOPDIR = ../..
include ../../Makefile.system
-COMMONOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) c_abs.$(SUFFIX) z_abs.$(SUFFIX) openblas_set_num_threads.$(SUFFIX) openblas_get_config.$(SUFFIX) openblas_get_parallel.$(SUFFIX) openblas_error_handle.$(SUFFIX)
+COMMONOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) c_abs.$(SUFFIX) z_abs.$(SUFFIX) openblas_set_num_threads.$(SUFFIX) openblas_get_config.$(SUFFIX) openblas_get_parallel.$(SUFFIX) openblas_error_handle.$(SUFFIX)
#COMMONOBJS += slamch.$(SUFFIX) slamc3.$(SUFFIX) dlamch.$(SUFFIX) dlamc3.$(SUFFIX)
ifdef SMP
-COMMONOBJS += blas_server.$(SUFFIX) divtable.$(SUFFIX) blasL1thread.$(SUFFIX)
+COMMONOBJS += blas_server.$(SUFFIX) divtable.$(SUFFIX) blasL1thread.$(SUFFIX)
ifndef NO_AFFINITY
-COMMONOBJS += init.$(SUFFIX)
+COMMONOBJS += init.$(SUFFIX)
endif
endif
@@ -56,13 +56,13 @@ ifeq ($(USE_OPENMP), 1)
BLAS_SERVER = blas_server_omp.c
else
ifeq ($(OSNAME), WINNT)
-BLAS_SERVER = blas_server_win32.c
+BLAS_SERVER = blas_server_win32.c
endif
ifeq ($(OSNAME), CYGWIN_NT)
-BLAS_SERVER = blas_server_win32.c
+BLAS_SERVER = blas_server_win32.c
endif
ifeq ($(OSNAME), Interix)
-BLAS_SERVER = blas_server_win32.c
+BLAS_SERVER = blas_server_win32.c
endif
endif
diff --git a/driver/others/blas_l1_thread.c b/driver/others/blas_l1_thread.c
index 851135b10..83fc26884 100644
--- a/driver/others/blas_l1_thread.c
+++ b/driver/others/blas_l1_thread.c
@@ -42,9 +42,9 @@
int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha,
void *a, BLASLONG lda,
- void *b, BLASLONG ldb,
+ void *b, BLASLONG ldb,
void *c, BLASLONG ldc, int (*function)(), int nthreads){
-
+
blas_queue_t queue[MAX_CPU_NUMBER];
blas_arg_t args [MAX_CPU_NUMBER];
@@ -52,23 +52,23 @@ int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha
int num_cpu, calc_type;
calc_type = (mode & BLAS_PREC) + ((mode & BLAS_COMPLEX) != 0) + 2;
-
+
mode |= BLAS_LEGACY;
for (i = 0; i < nthreads; i++) blas_queue_init(&queue[i]);
num_cpu = 0;
i = m;
-
+
while (i > 0){
-
+
/* Adjust Parameters */
width = blas_quickdivide(i + nthreads - num_cpu - 1,
nthreads - num_cpu);
i -= width;
if (i < 0) width = width + i;
-
+
astride = width * lda;
if (!(mode & BLAS_TRANSB_T)) {
@@ -95,10 +95,10 @@ int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha
queue[num_cpu].routine = function;
queue[num_cpu].args = &args[num_cpu];
queue[num_cpu].next = &queue[num_cpu + 1];
-
+
a = (void *)((BLASULONG)a + astride);
b = (void *)((BLASULONG)b + bstride);
-
+
num_cpu ++;
}
diff --git a/driver/others/blas_server.c b/driver/others/blas_server.c
index 1735ee931..95b5965e1 100644
--- a/driver/others/blas_server.c
+++ b/driver/others/blas_server.c
@@ -13,19 +13,19 @@ met:
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
- 3. Neither the name of the ISCAS nor the names of its contributors may
- be used to endorse or promote products derived from this software
+ 3. Neither the name of the ISCAS nor the names of its contributors may
+ be used to endorse or promote products derived from this software
without specific prior written permission.
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************************/
@@ -178,8 +178,8 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
#ifdef EXPRECISION
if (mode & BLAS_XDOUBLE){
/* REAL / Extended Double */
- void (*afunc)(BLASLONG, BLASLONG, BLASLONG, xdouble,
- xdouble *, BLASLONG, xdouble *, BLASLONG,
+ void (*afunc)(BLASLONG, BLASLONG, BLASLONG, xdouble,
+ xdouble *, BLASLONG, xdouble *, BLASLONG,
xdouble *, BLASLONG, void *) = func;
afunc(args -> m, args -> n, args -> k,
@@ -187,14 +187,14 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
args -> a, args -> lda,
args -> b, args -> ldb,
args -> c, args -> ldc, sb);
- } else
+ } else
#endif
if (mode & BLAS_DOUBLE){
/* REAL / Double */
- void (*afunc)(BLASLONG, BLASLONG, BLASLONG, double,
- double *, BLASLONG, double *, BLASLONG,
+ void (*afunc)(BLASLONG, BLASLONG, BLASLONG, double,
+ double *, BLASLONG, double *, BLASLONG,
double *, BLASLONG, void *) = func;
-
+
afunc(args -> m, args -> n, args -> k,
((double *)args -> alpha)[0],
args -> a, args -> lda,
@@ -202,10 +202,10 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
args -> c, args -> ldc, sb);
} else {
/* REAL / Single */
- void (*afunc)(BLASLONG, BLASLONG, BLASLONG, float,
- float *, BLASLONG, float *, BLASLONG,
+ void (*afunc)(BLASLONG, BLASLONG, BLASLONG, float,
+ float *, BLASLONG, float *, BLASLONG,
float *, BLASLONG, void *) = func;
-
+
afunc(args -> m, args -> n, args -> k,
((float *)args -> alpha)[0],
args -> a, args -> lda,
@@ -217,7 +217,7 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
if (mode & BLAS_XDOUBLE){
/* COMPLEX / Extended Double */
void (*afunc)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble,
- xdouble *, BLASLONG, xdouble *, BLASLONG,
+ xdouble *, BLASLONG, xdouble *, BLASLONG,
xdouble *, BLASLONG, void *) = func;
afunc(args -> m, args -> n, args -> k,
@@ -231,7 +231,7 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
if (mode & BLAS_DOUBLE){
/* COMPLEX / Double */
void (*afunc)(BLASLONG, BLASLONG, BLASLONG, double, double,
- double *, BLASLONG, double *, BLASLONG,
+ double *, BLASLONG, double *, BLASLONG,
double *, BLASLONG, void *) = func;
afunc(args -> m, args -> n, args -> k,
@@ -243,7 +243,7 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
} else {
/* COMPLEX / Single */
void (*afunc)(BLASLONG, BLASLONG, BLASLONG, float, float,
- float *, BLASLONG, float *, BLASLONG,
+ float *, BLASLONG, float *, BLASLONG,
float *, BLASLONG, void *) = func;
afunc(args -> m, args -> n, args -> k,
@@ -274,11 +274,11 @@ static int blas_thread_server(void *arg){
#ifdef TIMING_DEBUG
unsigned long start, stop;
#endif
-
+
#if defined(OS_LINUX) && !defined(NO_AFFINITY)
if (!increased_threads)
thread_status[cpu].node = gotoblas_set_affinity(cpu + 1);
- else
+ else
thread_status[cpu].node = gotoblas_set_affinity(-1);
#endif
@@ -291,7 +291,7 @@ static int blas_thread_server(void *arg){
#ifdef SMP_DEBUG
fprintf(STDERR, "Server[%2ld] Thread has just been spawned!\n", cpu);
#endif
-
+
while (1){
#ifdef MONITOR
@@ -303,34 +303,34 @@ static int blas_thread_server(void *arg){
#endif
last_tick = (unsigned int)rpcc();
-
+
while (!thread_status[cpu].queue) {
-
+
YIELDING;
if ((unsigned int)rpcc() - last_tick > thread_timeout) {
-
+
pthread_mutex_lock (&thread_status[cpu].lock);
-
+
if (!thread_status[cpu].queue) {
thread_status[cpu].status = THREAD_STATUS_SLEEP;
while (thread_status[cpu].status == THREAD_STATUS_SLEEP) {
-
+
#ifdef MONITOR
main_status[cpu] = MAIN_SLEEPING;
#endif
-
+
pthread_cond_wait(&thread_status[cpu].wakeup, &thread_status[cpu].lock);
}
}
-
+
pthread_mutex_unlock(&thread_status[cpu].lock);
-
+
last_tick = (unsigned int)rpcc();
}
-
+
}
-
+
queue = thread_status[cpu].queue;
if ((long)queue == -1) break;
@@ -345,19 +345,19 @@ static int blas_thread_server(void *arg){
if (queue) {
int (*routine)(blas_arg_t *, void *, void *, void *, void *, BLASLONG) = queue -> routine;
-
+
thread_status[cpu].queue = (blas_queue_t *)1;
sa = queue -> sa;
sb = queue -> sb;
-
+
#ifdef SMP_DEBUG
if (queue -> args) {
fprintf(STDERR, "Server[%2ld] Calculation started. Mode = 0x%03x M = %3ld N=%3ld K=%3ld\n",
cpu, queue->mode, queue-> args ->m, queue->args->n, queue->args->k);
}
#endif
-
+
#ifdef CONSISTENT_FPCSR
__asm__ __volatile__ ("ldmxcsr %0" : : "m" (queue -> sse_mode));
__asm__ __volatile__ ("fldcw %0" : : "m" (queue -> x87_mode));
@@ -366,21 +366,21 @@ static int blas_thread_server(void *arg){
#ifdef MONITOR
main_status[cpu] = MAIN_RUNNING1;
#endif
-
+
if (sa == NULL) sa = (void *)((BLASLONG)buffer + GEMM_OFFSET_A);
if (sb == NULL) {
if (!(queue -> mode & BLAS_COMPLEX)){
#ifdef EXPRECISION
if (queue -> mode & BLAS_XDOUBLE){
- sb = (void *)(((BLASLONG)sa + ((QGEMM_P * QGEMM_Q * sizeof(xdouble)
+ sb = (void *)(((BLASLONG)sa + ((QGEMM_P * QGEMM_Q * sizeof(xdouble)
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
- } else
+ } else
#endif
if (queue -> mode & BLAS_DOUBLE){
sb = (void *)(((BLASLONG)sa + ((DGEMM_P * DGEMM_Q * sizeof(double)
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
-
+
} else {
sb = (void *)(((BLASLONG)sa + ((SGEMM_P * SGEMM_Q * sizeof(float)
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
@@ -402,7 +402,7 @@ static int blas_thread_server(void *arg){
}
queue->sb=sb;
}
-
+
#ifdef MONITOR
main_status[cpu] = MAIN_RUNNING2;
#endif
@@ -423,24 +423,24 @@ static int blas_thread_server(void *arg){
#ifdef MONITOR
main_status[cpu] = MAIN_FINISH;
#endif
-
+
thread_status[cpu].queue = (blas_queue_t * volatile) ((long)thread_status[cpu].queue & 0); /* Need a trick */
WMB;
}
-
+
#ifdef MONITOR
main_status[cpu] = MAIN_DONE;
#endif
#ifdef TIMING_DEBUG
stop = rpcc();
-
+
fprintf(STDERR, "Thread[%ld] : %16lu %16lu (%8lu cycles)\n", cpu + 1,
start, stop,
stop - start);
#endif
-
+
}
/* Shutdown procedure */
@@ -508,7 +508,7 @@ static int blas_monitor(void *arg){
}
sleep(1);
}
-
+
return 0;
}
#endif
@@ -522,13 +522,13 @@ int blas_thread_init(void){
#endif
if (blas_server_avail) return 0;
-
+
#ifdef NEED_STACKATTR
pthread_attr_init(&attr);
pthread_attr_setguardsize(&attr, 0x1000U);
pthread_attr_setstacksize( &attr, 0x1000U);
#endif
-
+
LOCK_COMMAND(&server_lock);
if (!blas_server_avail){
@@ -551,21 +551,21 @@ int blas_thread_init(void){
thread_timeout = (1 << thread_timeout);
}
}
-
+
for(i = 0; i < blas_num_threads - 1; i++){
thread_status[i].queue = (blas_queue_t *)NULL;
thread_status[i].status = THREAD_STATUS_WAKEUP;
-
+
pthread_mutex_init(&thread_status[i].lock, NULL);
pthread_cond_init (&thread_status[i].wakeup, NULL);
-
+
#ifdef NEED_STACKATTR
- ret=pthread_create(&blas_threads[i], &attr,
+ ret=pthread_create(&blas_threads[i], &attr,
(void *)&blas_thread_server, (void *)i);
#else
- ret=pthread_create(&blas_threads[i], NULL,
+ ret=pthread_create(&blas_threads[i], NULL,
(void *)&blas_thread_server, (void *)i);
#endif
if(ret!=0){
@@ -575,7 +575,7 @@ int blas_thread_init(void){
}
#ifdef MONITOR
- pthread_create(&monitor_thread, NULL,
+ pthread_create(&monitor_thread, NULL,
(void *)&blas_monitor, (void *)NULL);
#endif
@@ -587,7 +587,7 @@ int blas_thread_init(void){
return 0;
}
-/*
+/*
User can call one of two routines.
exec_blas_async ... immediately returns after jobs are queued.
@@ -613,13 +613,13 @@ int exec_blas_async(BLASLONG pos, blas_queue_t *queue){
#ifdef SMP_DEBUG
int exec_count = 0;
fprintf(STDERR, "Exec_blas_async is called. Position = %d\n", pos);
-#endif
-
+#endif
+
blas_lock(&exec_queue_lock);
while (queue) {
queue -> position = pos;
-
+
#ifdef CONSISTENT_FPCSR
__asm__ __volatile__ ("fnstcw %0" : "=m" (queue -> x87_mode));
__asm__ __volatile__ ("stmxcsr %0" : "=m" (queue -> sse_mode));
@@ -633,7 +633,7 @@ int exec_blas_async(BLASLONG pos, blas_queue_t *queue){
do {
while((thread_status[i].node != node || thread_status[i].queue) && (i < blas_num_threads - 1)) i ++;
-
+
if (i < blas_num_threads - 1) break;
i ++;
@@ -657,40 +657,40 @@ int exec_blas_async(BLASLONG pos, blas_queue_t *queue){
if (i >= blas_num_threads - 1) i = 0;
}
#endif
-
+
queue -> assigned = i;
WMB;
thread_status[i].queue = queue;
WMB;
-
+
queue = queue -> next;
pos ++;
#ifdef SMP_DEBUG
exec_count ++;
#endif
-
+
}
blas_unlock(&exec_queue_lock);
#ifdef SMP_DEBUG
fprintf(STDERR, "Done(Number of threads = %2ld).\n", exec_count);
-#endif
-
+#endif
+
while (current) {
-
+
pos = current -> assigned;
-
+
if ((BLASULONG)thread_status[pos].queue > 1) {
-
+
if (thread_status[pos].status == THREAD_STATUS_SLEEP) {
-
+
pthread_mutex_lock (&thread_status[pos].lock);
-
+
#ifdef MONITOR
num_suspend ++;
#endif
-
+
if (thread_status[pos].status == THREAD_STATUS_SLEEP) {
thread_status[pos].status = THREAD_STATUS_WAKEUP;
pthread_cond_signal(&thread_status[pos].wakeup);
@@ -698,7 +698,7 @@ int exec_blas_async(BLASLONG pos, blas_queue_t *queue){
pthread_mutex_unlock(&thread_status[pos].lock);
}
}
-
+
current = current -> next;
}
@@ -708,11 +708,11 @@ int exec_blas_async(BLASLONG pos, blas_queue_t *queue){
int exec_blas_async_wait(BLASLONG num, blas_queue_t *queue){
while ((num > 0) && queue) {
-
+
while(thread_status[queue -> assigned].queue) {
YIELDING;
};
-
+
queue = queue -> next;
num --;
}
@@ -720,7 +720,7 @@ int exec_blas_async_wait(BLASLONG num, blas_queue_t *queue){
#ifdef SMP_DEBUG
fprintf(STDERR, "Done.\n\n");
#endif
-
+
return 0;
}
@@ -738,31 +738,31 @@ int exec_blas(BLASLONG num, blas_queue_t *queue){
#endif
if ((num <= 0) || (queue == NULL)) return 0;
-
+
#ifdef SMP_DEBUG
fprintf(STDERR, "Exec_blas is called. Number of executing threads : %ld\n", num);
-#endif
+#endif
#ifdef __ELF__
if (omp_in_parallel && (num > 1)) {
if (omp_in_parallel() > 0) {
- fprintf(stderr,
+ fprintf(stderr,
"OpenBLAS Warning : Detect OpenMP Loop and this application may hang. "
"Please rebuild the library with USE_OPENMP=1 option.\n");
}
}
#endif
-
+
if ((num > 1) && queue -> next) exec_blas_async(1, queue -> next);
#ifdef TIMING_DEBUG
start = rpcc();
-
+
fprintf(STDERR, "\n");
#endif
-
+
routine = queue -> routine;
-
+
if (queue -> mode & BLAS_LEGACY) {
legacy_exec(routine, queue -> mode, queue -> args, queue -> sb);
} else
@@ -772,19 +772,19 @@ int exec_blas(BLASLONG num, blas_queue_t *queue){
} else
(routine)(queue -> args, queue -> range_m, queue -> range_n,
queue -> sa, queue -> sb, 0);
-
+
#ifdef TIMING_DEBUG
stop = rpcc();
#endif
-
+
if ((num > 1) && queue -> next) exec_blas_async_wait(num - 1, queue -> next);
-
+
#ifdef TIMING_DEBUG
- fprintf(STDERR, "Thread[0] : %16lu %16lu (%8lu cycles)\n",
+ fprintf(STDERR, "Thread[0] : %16lu %16lu (%8lu cycles)\n",
start, stop,
stop - start);
#endif
-
+
return 0;
}
@@ -798,7 +798,7 @@ void goto_set_num_threads(int num_threads) {
if (num_threads == 1) {
if (blas_cpu_number == 1){
//OpenBLAS is already single thread.
- return;
+ return;
}else{
//From multi-threads to single thread
//Restore the original affinity mask
@@ -812,26 +812,26 @@ void goto_set_num_threads(int num_threads) {
if (num_threads > blas_num_threads) {
LOCK_COMMAND(&server_lock);
-
+
increased_threads = 1;
for(i = blas_num_threads - 1; i < num_threads - 1; i++){
-
+
thread_status[i].queue = (blas_queue_t *)NULL;
thread_status[i].status = THREAD_STATUS_WAKEUP;
-
+
pthread_mutex_init(&thread_status[i].lock, NULL);
pthread_cond_init (&thread_status[i].wakeup, NULL);
-
+
#ifdef NEED_STACKATTR
- pthread_create(&blas_threads[i], &attr,
+ pthread_create(&blas_threads[i], &attr,
(void *)&blas_thread_server, (void *)i);
#else
- pthread_create(&blas_threads[i], NULL,
+ pthread_create(&blas_threads[i], NULL,
(void *)&blas_thread_server, (void *)i);
#endif
}
-
+
blas_num_threads = num_threads;
UNLOCK_COMMAND(&server_lock);
@@ -846,7 +846,7 @@ void goto_set_num_threads(int num_threads) {
blas_cpu_number = num_threads;
-#if defined(ARCH_MIPS64)
+#if defined(ARCH_MIPS64)
//set parameters for different number of threads.
blas_set_parameter();
#endif
@@ -855,7 +855,7 @@ void goto_set_num_threads(int num_threads) {
void openblas_set_num_threads(int num_threads) {
goto_set_num_threads(num_threads);
-
+
}
/* Compatible function with pthread_create / join */
@@ -887,11 +887,11 @@ int gotoblas_pthread(int numthreads, void *function, void *args, int stride) {
args += stride;
}
-
+
queue[numthreads - 1].next = NULL;
-
+
exec_blas(numthreads, queue);
-
+
return 0;
}
@@ -903,17 +903,17 @@ int BLASFUNC(blas_thread_shutdown)(void){
int i;
if (!blas_server_avail) return 0;
-
+
LOCK_COMMAND(&server_lock);
for (i = 0; i < blas_num_threads - 1; i++) {
blas_lock(&exec_queue_lock);
-
+
thread_status[i].queue = (blas_queue_t *)-1;
blas_unlock(&exec_queue_lock);
-
+
pthread_mutex_lock (&thread_status[i].lock);
thread_status[i].status = THREAD_STATUS_WAKEUP;
@@ -931,16 +931,16 @@ int BLASFUNC(blas_thread_shutdown)(void){
for(i = 0; i < blas_num_threads - 1; i++){
pthread_mutex_destroy(&thread_status[i].lock);
pthread_cond_destroy (&thread_status[i].wakeup);
- }
+ }
#ifdef NEED_STACKATTR
pthread_attr_destory(&attr);
#endif
blas_server_avail = 0;
-
+
UNLOCK_COMMAND(&server_lock);
-
+
return 0;
}
diff --git a/driver/others/blas_server_omp.c b/driver/others/blas_server_omp.c
index 0a484f3e4..8d62a8125 100644
--- a/driver/others/blas_server_omp.c
+++ b/driver/others/blas_server_omp.c
@@ -79,7 +79,7 @@ void goto_set_num_threads(int num_threads) {
blas_thread_buffer[i]=NULL;
}
}
-#if defined(ARCH_MIPS64)
+#if defined(ARCH_MIPS64)
//set parameters for different number of threads.
blas_set_parameter();
#endif
@@ -128,8 +128,8 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
#ifdef EXPRECISION
if (mode & BLAS_XDOUBLE){
/* REAL / Extended Double */
- void (*afunc)(BLASLONG, BLASLONG, BLASLONG, xdouble,
- xdouble *, BLASLONG, xdouble *, BLASLONG,
+ void (*afunc)(BLASLONG, BLASLONG, BLASLONG, xdouble,
+ xdouble *, BLASLONG, xdouble *, BLASLONG,
xdouble *, BLASLONG, void *) = func;
afunc(args -> m, args -> n, args -> k,
@@ -137,14 +137,14 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
args -> a, args -> lda,
args -> b, args -> ldb,
args -> c, args -> ldc, sb);
- } else
+ } else
#endif
if (mode & BLAS_DOUBLE){
/* REAL / Double */
- void (*afunc)(BLASLONG, BLASLONG, BLASLONG, double,
- double *, BLASLONG, double *, BLASLONG,
+ void (*afunc)(BLASLONG, BLASLONG, BLASLONG, double,
+ double *, BLASLONG, double *, BLASLONG,
double *, BLASLONG, void *) = func;
-
+
afunc(args -> m, args -> n, args -> k,
((double *)args -> alpha)[0],
args -> a, args -> lda,
@@ -152,10 +152,10 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
args -> c, args -> ldc, sb);
} else {
/* REAL / Single */
- void (*afunc)(BLASLONG, BLASLONG, BLASLONG, float,
- float *, BLASLONG, float *, BLASLONG,
+ void (*afunc)(BLASLONG, BLASLONG, BLASLONG, float,
+ float *, BLASLONG, float *, BLASLONG,
float *, BLASLONG, void *) = func;
-
+
afunc(args -> m, args -> n, args -> k,
((float *)args -> alpha)[0],
args -> a, args -> lda,
@@ -167,7 +167,7 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
if (mode & BLAS_XDOUBLE){
/* COMPLEX / Extended Double */
void (*afunc)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble,
- xdouble *, BLASLONG, xdouble *, BLASLONG,
+ xdouble *, BLASLONG, xdouble *, BLASLONG,
xdouble *, BLASLONG, void *) = func;
afunc(args -> m, args -> n, args -> k,
@@ -181,7 +181,7 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
if (mode & BLAS_DOUBLE){
/* COMPLEX / Double */
void (*afunc)(BLASLONG, BLASLONG, BLASLONG, double, double,
- double *, BLASLONG, double *, BLASLONG,
+ double *, BLASLONG, double *, BLASLONG,
double *, BLASLONG, void *) = func;
afunc(args -> m, args -> n, args -> k,
@@ -193,7 +193,7 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
} else {
/* COMPLEX / Single */
void (*afunc)(BLASLONG, BLASLONG, BLASLONG, float, float,
- float *, BLASLONG, float *, BLASLONG,
+ float *, BLASLONG, float *, BLASLONG,
float *, BLASLONG, void *) = func;
afunc(args -> m, args -> n, args -> k,
@@ -210,7 +210,7 @@ static void exec_threads(blas_queue_t *queue){
void *buffer, *sa, *sb;
int pos=0, release_flag=0;
-
+
buffer = NULL;
sa = queue -> sa;
sb = queue -> sb;
@@ -235,19 +235,19 @@ static void exec_threads(blas_queue_t *queue){
sa = (void *)((BLASLONG)buffer + GEMM_OFFSET_A);
queue->sa=sa;
}
-
+
if (sb == NULL) {
if (!(queue -> mode & BLAS_COMPLEX)){
#ifdef EXPRECISION
if (queue -> mode & BLAS_XDOUBLE){
- sb = (void *)(((BLASLONG)sa + ((QGEMM_P * QGEMM_Q * sizeof(xdouble)
+ sb = (void *)(((BLASLONG)sa + ((QGEMM_P * QGEMM_Q * sizeof(xdouble)
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
- } else
+ } else
#endif
if (queue -> mode & BLAS_DOUBLE){
sb = (void *)(((BLASLONG)sa + ((DGEMM_P * DGEMM_Q * sizeof(double)
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
-
+
} else {
sb = (void *)(((BLASLONG)sa + ((SGEMM_P * SGEMM_Q * sizeof(float)
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
diff --git a/driver/others/blas_server_win32.c b/driver/others/blas_server_win32.c
index 100ca34f7..081bdd7d4 100644
--- a/driver/others/blas_server_win32.c
+++ b/driver/others/blas_server_win32.c
@@ -71,8 +71,8 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
#ifdef EXPRECISION
if (mode & BLAS_XDOUBLE){
/* REAL / Extended Double */
- void (*afunc)(BLASLONG, BLASLONG, BLASLONG, xdouble,
- xdouble *, BLASLONG, xdouble *, BLASLONG,
+ void (*afunc)(BLASLONG, BLASLONG, BLASLONG, xdouble,
+ xdouble *, BLASLONG, xdouble *, BLASLONG,
xdouble *, BLASLONG, void *) = func;
afunc(args -> m, args -> n, args -> k,
@@ -80,14 +80,14 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
args -> a, args -> lda,
args -> b, args -> ldb,
args -> c, args -> ldc, sb);
- } else
+ } else
#endif
if (mode & BLAS_DOUBLE){
/* REAL / Double */
- void (*afunc)(BLASLONG, BLASLONG, BLASLONG, double,
- double *, BLASLONG, double *, BLASLONG,
+ void (*afunc)(BLASLONG, BLASLONG, BLASLONG, double,
+ double *, BLASLONG, double *, BLASLONG,
double *, BLASLONG, void *) = func;
-
+
afunc(args -> m, args -> n, args -> k,
((double *)args -> alpha)[0],
args -> a, args -> lda,
@@ -95,10 +95,10 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
args -> c, args -> ldc, sb);
} else {
/* REAL / Single */
- void (*afunc)(BLASLONG, BLASLONG, BLASLONG, float,
- float *, BLASLONG, float *, BLASLONG,
+ void (*afunc)(BLASLONG, BLASLONG, BLASLONG, float,
+ float *, BLASLONG, float *, BLASLONG,
float *, BLASLONG, void *) = func;
-
+
afunc(args -> m, args -> n, args -> k,
((float *)args -> alpha)[0],
args -> a, args -> lda,
@@ -110,7 +110,7 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
if (mode & BLAS_XDOUBLE){
/* COMPLEX / Extended Double */
void (*afunc)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble,
- xdouble *, BLASLONG, xdouble *, BLASLONG,
+ xdouble *, BLASLONG, xdouble *, BLASLONG,
xdouble *, BLASLONG, void *) = func;
afunc(args -> m, args -> n, args -> k,
@@ -124,7 +124,7 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
if (mode & BLAS_DOUBLE){
/* COMPLEX / Double */
void (*afunc)(BLASLONG, BLASLONG, BLASLONG, double, double,
- double *, BLASLONG, double *, BLASLONG,
+ double *, BLASLONG, double *, BLASLONG,
double *, BLASLONG, void *) = func;
afunc(args -> m, args -> n, args -> k,
@@ -136,7 +136,7 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
} else {
/* COMPLEX / Single */
void (*afunc)(BLASLONG, BLASLONG, BLASLONG, float, float,
- float *, BLASLONG, float *, BLASLONG,
+ float *, BLASLONG, float *, BLASLONG,
float *, BLASLONG, void *) = func;
afunc(args -> m, args -> n, args -> k,
@@ -163,47 +163,47 @@ static DWORD WINAPI blas_thread_server(void *arg){
blas_queue_t *queue;
DWORD action;
HANDLE handles[] = {pool.filled, pool.killed};
-
+
/* Each server needs each buffer */
buffer = blas_memory_alloc(2);
-
+
#ifdef SMP_DEBUG
fprintf(STDERR, "Server[%2ld] Thread is started!\n", cpu);
#endif
-
+
while (1){
-
+
/* Waiting for Queue */
-
+
#ifdef SMP_DEBUG
fprintf(STDERR, "Server[%2ld] Waiting for Queue.\n", cpu);
#endif
-
+
do {
action = WaitForMultipleObjects(2, handles, FALSE, INFINITE);
} while ((action != WAIT_OBJECT_0) && (action != WAIT_OBJECT_0 + 1));
-
+
if (action == WAIT_OBJECT_0 + 1) break;
#ifdef SMP_DEBUG
fprintf(STDERR, "Server[%2ld] Got it.\n", cpu);
#endif
-
+
EnterCriticalSection(&pool.lock);
-
+
queue = pool.queue;
if (queue) pool.queue = queue->next;
-
+
LeaveCriticalSection(&pool.lock);
-
+
if (queue) {
int (*routine)(blas_arg_t *, void *, void *, void *, void *, BLASLONG) = queue -> routine;
-
+
if (pool.queue) SetEvent(pool.filled);
-
+
sa = queue -> sa;
sb = queue -> sb;
-
+
#ifdef CONSISTENT_FPCSR
__asm__ __volatile__ ("ldmxcsr %0" : : "m" (queue -> sse_mode));
__asm__ __volatile__ ("fldcw %0" : : "m" (queue -> x87_mode));
@@ -213,27 +213,27 @@ static DWORD WINAPI blas_thread_server(void *arg){
fprintf(STDERR, "Server[%2ld] Started. Mode = 0x%03x M = %3ld N=%3ld K=%3ld\n",
cpu, queue->mode, queue-> args ->m, queue->args->n, queue->args->k);
#endif
-
+
// fprintf(stderr, "queue start[%ld]!!!\n", cpu);
-
+
#ifdef MONITOR
main_status[cpu] = MAIN_RUNNING1;
#endif
-
+
if (sa == NULL) sa = (void *)((BLASLONG)buffer + GEMM_OFFSET_A);
-
+
if (sb == NULL) {
if (!(queue -> mode & BLAS_COMPLEX)){
#ifdef EXPRECISION
if (queue -> mode & BLAS_XDOUBLE){
- sb = (void *)(((BLASLONG)sa + ((XGEMM_P * XGEMM_Q * sizeof(xdouble)
+ sb = (void *)(((BLASLONG)sa + ((XGEMM_P * XGEMM_Q * sizeof(xdouble)
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
- } else
+ } else
#endif
if (queue -> mode & BLAS_DOUBLE){
sb = (void *)(((BLASLONG)sa + ((DGEMM_P * DGEMM_Q * sizeof(double)
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
-
+
} else {
sb = (void *)(((BLASLONG)sa + ((SGEMM_P * SGEMM_Q * sizeof(float)
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
@@ -255,11 +255,11 @@ static DWORD WINAPI blas_thread_server(void *arg){
}
queue->sb=sb;
}
-
+
#ifdef MONITOR
main_status[cpu] = MAIN_RUNNING2;
#endif
-
+
if (!(queue -> mode & BLAS_LEGACY)) {
(routine)(queue -> args, queue -> range_m, queue -> range_n, sa, sb, queue -> position);
@@ -269,28 +269,28 @@ static DWORD WINAPI blas_thread_server(void *arg){
}else{
continue; //if queue == NULL
}
-
+
#ifdef SMP_DEBUG
fprintf(STDERR, "Server[%2ld] Finished!\n", cpu);
#endif
-
+
EnterCriticalSection(&queue->lock);
-
+
queue -> status = BLAS_STATUS_FINISHED;
-
+
LeaveCriticalSection(&queue->lock);
-
+
SetEvent(queue->finish);
}
-
+
/* Shutdown procedure */
-
+
#ifdef SMP_DEBUG
fprintf(STDERR, "Server[%2ld] Shutdown!\n", cpu);
#endif
-
+
blas_memory_free(buffer);
-
+
return 0;
}
@@ -299,11 +299,11 @@ int blas_thread_init(void){
BLASLONG i;
if (blas_server_avail || (blas_cpu_number <= 1)) return 0;
-
+
LOCK_COMMAND(&server_lock);
#ifdef SMP_DEBUG
- fprintf(STDERR, "Initializing Thread(Num. threads = %d)\n",
+ fprintf(STDERR, "Initializing Thread(Num. threads = %d)\n",
blas_cpu_number);
#endif
@@ -317,11 +317,11 @@ int blas_thread_init(void){
pool.queue = NULL;
for(i = 0; i < blas_cpu_number - 1; i++){
- blas_threads[i] = CreateThread(NULL, 0,
+ blas_threads[i] = CreateThread(NULL, 0,
blas_thread_server, (void *)i,
0, &blas_threads_id[i]);
}
-
+
blas_server_avail = 1;
}
@@ -330,7 +330,7 @@ int blas_thread_init(void){
return 0;
}
-/*
+/*
User can call one of two routines.
exec_blas_async ... immediately returns after jobs are queued.
@@ -387,7 +387,7 @@ int exec_blas_async_wait(BLASLONG num, blas_queue_t *queue){
#endif
WaitForSingleObject(queue->finish, INFINITE);
-
+
CloseHandle(queue->finish);
DeleteCriticalSection(&queue -> lock);
@@ -414,7 +414,7 @@ int exec_blas(BLASLONG num, blas_queue_t *queue){
if ((num > 1) && queue -> next) exec_blas_async(1, queue -> next);
routine = queue -> routine;
-
+
if (!(queue -> mode & BLAS_LEGACY)) {
(routine)(queue -> args, queue -> range_m, queue -> range_n,
queue -> sa, queue -> sb, 0);
@@ -435,7 +435,7 @@ int BLASFUNC(blas_thread_shutdown)(void){
int i;
if (!blas_server_avail) return 0;
-
+
LOCK_COMMAND(&server_lock);
if (blas_server_avail){
@@ -446,12 +446,12 @@ int BLASFUNC(blas_thread_shutdown)(void){
WaitForSingleObject(blas_threads[i], 5); //INFINITE);
TerminateThread(blas_threads[i],0);
}
-
+
blas_server_avail = 0;
}
-
+
UNLOCK_COMMAND(&server_lock);
-
+
return 0;
}
@@ -466,7 +466,7 @@ void goto_set_num_threads(int num_threads)
if (num_threads > blas_num_threads) {
LOCK_COMMAND(&server_lock);
-
+
//increased_threads = 1;
if (!blas_server_avail){
@@ -478,14 +478,14 @@ void goto_set_num_threads(int num_threads)
pool.queue = NULL;
blas_server_avail = 1;
}
-
- for(i = blas_num_threads - 1; i < num_threads - 1; i++){
-
- blas_threads[i] = CreateThread(NULL, 0,
+
+ for(i = blas_num_threads - 1; i < num_threads - 1; i++){
+
+ blas_threads[i] = CreateThread(NULL, 0,
blas_thread_server, (void *)i,
0, &blas_threads_id[i]);
}
-
+
blas_num_threads = num_threads;
UNLOCK_COMMAND(&server_lock);
diff --git a/driver/others/divtable.c b/driver/others/divtable.c
index d50b091df..d801afb9b 100644
--- a/driver/others/divtable.c
+++ b/driver/others/divtable.c
@@ -41,23 +41,23 @@
#ifdef SMP
#if !defined(USE64BITINT) || defined(ARCH_X86)
unsigned int blas_quick_divide_table[] = {
- 0x00000000, 0x00000001, 0x80000001, 0x55555556,
- 0x40000001, 0x33333334, 0x2aaaaaab, 0x24924925,
- 0x20000001, 0x1c71c71d, 0x1999999a, 0x1745d175,
- 0x15555556, 0x13b13b14, 0x12492493, 0x11111112,
- 0x10000001, 0x0f0f0f10, 0x0e38e38f, 0x0d79435f,
- 0x0ccccccd, 0x0c30c30d, 0x0ba2e8bb, 0x0b21642d,
- 0x0aaaaaab, 0x0a3d70a4, 0x09d89d8a, 0x097b425f,
- 0x0924924a, 0x08d3dcb1, 0x08888889, 0x08421085,
- 0x08000001, 0x07c1f07d, 0x07878788, 0x07507508,
- 0x071c71c8, 0x06eb3e46, 0x06bca1b0, 0x06906907,
- 0x06666667, 0x063e7064, 0x06186187, 0x05f417d1,
- 0x05d1745e, 0x05b05b06, 0x0590b217, 0x0572620b,
- 0x05555556, 0x0539782a, 0x051eb852, 0x05050506,
- 0x04ec4ec5, 0x04d4873f, 0x04bda130, 0x04a7904b,
- 0x04924925, 0x047dc120, 0x0469ee59, 0x0456c798,
- 0x04444445, 0x04325c54, 0x04210843, 0x04104105,
- 0x04000001,
+ 0x00000000, 0x00000001, 0x80000001, 0x55555556,
+ 0x40000001, 0x33333334, 0x2aaaaaab, 0x24924925,
+ 0x20000001, 0x1c71c71d, 0x1999999a, 0x1745d175,
+ 0x15555556, 0x13b13b14, 0x12492493, 0x11111112,
+ 0x10000001, 0x0f0f0f10, 0x0e38e38f, 0x0d79435f,
+ 0x0ccccccd, 0x0c30c30d, 0x0ba2e8bb, 0x0b21642d,
+ 0x0aaaaaab, 0x0a3d70a4, 0x09d89d8a, 0x097b425f,
+ 0x0924924a, 0x08d3dcb1, 0x08888889, 0x08421085,
+ 0x08000001, 0x07c1f07d, 0x07878788, 0x07507508,
+ 0x071c71c8, 0x06eb3e46, 0x06bca1b0, 0x06906907,
+ 0x06666667, 0x063e7064, 0x06186187, 0x05f417d1,
+ 0x05d1745e, 0x05b05b06, 0x0590b217, 0x0572620b,
+ 0x05555556, 0x0539782a, 0x051eb852, 0x05050506,
+ 0x04ec4ec5, 0x04d4873f, 0x04bda130, 0x04a7904b,
+ 0x04924925, 0x047dc120, 0x0469ee59, 0x0456c798,
+ 0x04444445, 0x04325c54, 0x04210843, 0x04104105,
+ 0x04000001,
};
#else
BLASULONG blas_quick_divide_table[] = {
diff --git a/driver/others/dynamic.c b/driver/others/dynamic.c
index ec421d6de..a044343e5 100644
--- a/driver/others/dynamic.c
+++ b/driver/others/dynamic.c
@@ -95,7 +95,7 @@ int support_avx(){
#ifndef NO_AVX
int eax, ebx, ecx, edx;
int ret=0;
-
+
cpuid(1, &eax, &ebx, &ecx, &edx);
if ((ecx & (1 << 28)) != 0 && (ecx & (1 << 27)) != 0 && (ecx & (1 << 26)) != 0){
xgetbv(0, &eax, &edx);
@@ -179,7 +179,7 @@ static gotoblas_t *get_coretype(void){
// Pentium (Clarkdale) / Pentium Mobile (Arrandale)
// Xeon (Clarkdale), 32nm
if (model == 5) return &gotoblas_NEHALEM;
-
+
//Intel Xeon Processor 5600 (Westmere-EP)
//Xeon Processor E7 (Westmere-EX)
//Xeon E7540
@@ -250,7 +250,7 @@ static gotoblas_t *get_coretype(void){
}
if (family == 0xf){
if ((exfamily == 0) || (exfamily == 2)) {
- if (ecx & (1 << 0)) return &gotoblas_OPTERON_SSE3;
+ if (ecx & (1 << 0)) return &gotoblas_OPTERON_SSE3;
else return &gotoblas_OPTERON;
} else if (exfamily == 5) {
return &gotoblas_BOBCAT;
@@ -285,7 +285,7 @@ static gotoblas_t *get_coretype(void){
break;
}
}
-
+
return NULL;
}
@@ -326,7 +326,7 @@ char *gotoblas_corename(void) {
if (gotoblas == &gotoblas_DUNNINGTON) return corename[ 9];
if (gotoblas == &gotoblas_NEHALEM) return corename[10];
if (gotoblas == &gotoblas_ATHLON) return corename[11];
- if (gotoblas == &gotoblas_OPTERON_SSE3) return corename[12];
+ if (gotoblas == &gotoblas_OPTERON_SSE3) return corename[12];
if (gotoblas == &gotoblas_OPTERON) return corename[13];
if (gotoblas == &gotoblas_BARCELONA) return corename[14];
if (gotoblas == &gotoblas_NANO) return corename[15];
@@ -359,7 +359,7 @@ static gotoblas_t *force_coretype(char *coretype){
{
strncpy(mname,coretype,20);
sprintf(message, "Core not found: %s\n",mname);
- openblas_warning(1, message);
+ openblas_warning(1, message);
return(NULL);
}
@@ -390,16 +390,16 @@ static gotoblas_t *force_coretype(char *coretype){
return(NULL);
}
-
-
-
+
+
+
void gotoblas_dynamic_init(void) {
-
+
char coremsg[128];
char coren[22];
char *p;
-
+
if (gotoblas) return;
@@ -412,7 +412,7 @@ void gotoblas_dynamic_init(void) {
{
gotoblas = get_coretype();
}
-
+
#ifdef ARCH_X86
if (gotoblas == NULL) gotoblas = &gotoblas_KATMAI;
#else
@@ -427,21 +427,21 @@ void gotoblas_dynamic_init(void) {
gotoblas = &gotoblas_PRESCOTT;
}
#endif
-
+
if (gotoblas && gotoblas -> init) {
strncpy(coren,gotoblas_corename(),20);
sprintf(coremsg, "Core: %s\n",coren);
- openblas_warning(2, coremsg);
+ openblas_warning(2, coremsg);
gotoblas -> init();
} else {
openblas_warning(0, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n");
exit(1);
}
-
+
}
void gotoblas_dynamic_quit(void) {
-
+
gotoblas = NULL;
}
diff --git a/driver/others/init.c b/driver/others/init.c
index cbcf229fa..9c7524909 100644
--- a/driver/others/init.c
+++ b/driver/others/init.c
@@ -13,19 +13,19 @@ met:
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
- 3. Neither the name of the ISCAS nor the names of its contributors may
- be used to endorse or promote products derived from this software
+ 3. Neither the name of the ISCAS nor the names of its contributors may
+ be used to endorse or promote products derived from this software
without specific prior written permission.
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************************/
@@ -181,8 +181,8 @@ static inline int rcount(unsigned long number) {
}
/***
- Known issue: The number of CPUs/cores should less
- than sizeof(unsigned long). On 64 bits, the limit
+ Known issue: The number of CPUs/cores should less
+ than sizeof(unsigned long). On 64 bits, the limit
is 64. On 32 bits, it is 32.
***/
static inline void get_cpumap(int node, unsigned long * node_info) {
@@ -197,14 +197,14 @@ static inline void get_cpumap(int node, unsigned long * node_info) {
int k=0;
sprintf(name, CPUMAP_NAME, node);
-
+
infile = open(name, O_RDONLY);
for(i=0; i<32; i++){
affinity[i] = 0;
}
if (infile != -1) {
-
+
read(infile, cpumap, sizeof(cpumap));
for(i=0; i<160; i++){
@@ -212,7 +212,7 @@ static inline void get_cpumap(int node, unsigned long * node_info) {
break;
if(cpumap[i] != ','){
name[k++]=cpumap[i];
-
+
//Enough data for Hex
if(k >= NCPUBITS/4){
affinity[count++] = strtoul(name, &dummy, 16);
@@ -249,7 +249,7 @@ static inline void get_share(int cpu, int level, unsigned long * share) {
int bitmask_idx = 0;
sprintf(name, SHARE_NAME, cpu, level);
-
+
infile = open(name, O_RDONLY);
// Init share
@@ -260,7 +260,7 @@ static inline void get_share(int cpu, int level, unsigned long * share) {
share[bitmask_idx] = CPUMASK(cpu);
if (infile != -1) {
-
+
read(infile, cpumap, sizeof(cpumap));
for(i=0; i<160; i++){
@@ -268,8 +268,8 @@ static inline void get_share(int cpu, int level, unsigned long * share) {
break;
if(cpumap[i] != ','){
name[k++]=cpumap[i];
-
- //Enough data
+
+ //Enough data
if(k >= NCPUBITS/4){
affinity[count++] = strtoul(name, &dummy, 16);
k=0;
@@ -287,8 +287,8 @@ static inline void get_share(int cpu, int level, unsigned long * share) {
for(i=0; i<count && i<MAX_BITMASK_LEN; i++){
share[i]=affinity[count-i-1];
}
-
-
+
+
close(infile);
}
@@ -369,7 +369,7 @@ static void numa_mapping(void) {
#ifdef DEBUG
fprintf(stderr, "\nFrom /sys ...\n\n");
- for (cpu = 0; cpu < count; cpu++)
+ for (cpu = 0; cpu < count; cpu++)
fprintf(stderr, "CPU (%2d) : %08lx\n", cpu, common -> cpu_info[cpu]);
#endif
@@ -406,7 +406,7 @@ static void numa_mapping(void) {
#ifdef DEBUG
fprintf(stderr, "\nSorting ...\n\n");
- for (cpu = 0; cpu < count; cpu++)
+ for (cpu = 0; cpu < count; cpu++)
fprintf(stderr, "CPU (%2d) : %08lx\n", cpu, common -> cpu_info[cpu]);
#endif
@@ -453,12 +453,12 @@ static void disable_hyperthread(void) {
share[i] &= common->avail[i];
if (popcount(share[i]) > 1) {
-
+
#ifdef DEBUG
fprintf(stderr, "Detected Hyper Threading on CPU %4x; disabled CPU %04lx.\n",
cpu, share[i] & ~(CPUMASK(cpu)));
#endif
-
+
common -> avail[i] &= ~((share[i] & ~ CPUMASK(cpu)));
}
}
@@ -514,7 +514,7 @@ static void setup_mempolicy(void) {
for (cpu = 0; cpu < numprocs; cpu ++) {
mynode = READ_NODE(common -> cpu_info[cpu_sub_mapping[cpu]]);
-
+
lnodemask |= (1UL << mynode);
node_cpu[mynode] ++;
@@ -527,11 +527,11 @@ static void setup_mempolicy(void) {
for (cpu = 0; cpu < MAX_NODES; cpu ++) if ((node_cpu[cpu] != 0) && (node_cpu[cpu] != maxcpu)) node_equal = 0;
if (lnodemask) {
-
+
#ifdef DEBUG
fprintf(stderr, "Node mask = %lx\n", lnodemask);
#endif
-
+
my_set_mempolicy(MPOL_INTERLEAVE, &lnodemask, sizeof(lnodemask) * 8);
numnodes = popcount(lnodemask);
@@ -551,11 +551,11 @@ static void open_shmem(void) {
do {
shmid = shmget(SH_MAGIC, 4096, 0666);
-
+
if (shmid == -1) {
shmid = shmget(SH_MAGIC, 4096, IPC_CREAT | 0666);
}
-
+
try ++;
} while ((try < 10) && (shmid == -1));
@@ -599,7 +599,7 @@ static void local_cpu_map(void) {
if (id > 0) {
if (is_dead(id)) common -> cpu_use[cpu] = 0;
}
-
+
bitmask_idx = CPUELT(cpu);
if ((common -> cpu_use[cpu] == 0) && (lprocmask[bitmask_idx] & CPUMASK(cpu))) {
@@ -611,9 +611,9 @@ static void local_cpu_map(void) {
}
cpu ++;
-
+
} while ((mapping < numprocs) && (cpu < common -> final_num_procs));
-
+
disable_mapping = 0;
if ((mapping < numprocs) || (numprocs == 1)) {
@@ -622,7 +622,7 @@ static void local_cpu_map(void) {
}
disable_mapping = 1;
}
-
+
#ifdef DEBUG
for (cpu = 0; cpu < numprocs; cpu ++) {
fprintf(stderr, "Local Mapping : %2d --> %2d (%2d)\n", cpu, cpu_mapping[cpu], cpu_sub_mapping[cpu]);
@@ -634,14 +634,14 @@ static void local_cpu_map(void) {
int get_num_procs(void) { return numprocs; }
int get_num_nodes(void) { return numnodes; }
-int get_node_equal(void) {
+int get_node_equal(void) {
return (((blas_cpu_number % numnodes) == 0) && node_equal);
-
+
}
int gotoblas_set_affinity(int pos) {
-
+
cpu_set_t cpu_mask;
int mynode = 1;
@@ -662,7 +662,7 @@ int gotoblas_set_affinity(int pos) {
CPU_ZERO(&cpu_mask);
CPU_SET (cpu_mapping[pos], &cpu_mask);
-
+
sched_setaffinity(0, sizeof(cpu_mask), &cpu_mask);
node_mapping[WhereAmI()] = mynode;
@@ -672,7 +672,7 @@ int gotoblas_set_affinity(int pos) {
return mynode;
}
-int get_node(void) {
+int get_node(void) {
if (!disable_mapping) return node_mapping[WhereAmI()];
@@ -694,7 +694,7 @@ void gotoblas_affinity_init(void) {
initialized = 1;
sched_getaffinity(0, sizeof(cpu_orig_mask), &cpu_orig_mask[0]);
-
+
#ifdef USE_OPENMP
numprocs = 0;
#else
@@ -746,9 +746,9 @@ void gotoblas_affinity_init(void) {
}
for (cpu = 0; cpu < common -> num_procs; cpu++) common -> cpu_info[cpu] = cpu;
-
+
numa_check();
-
+
disable_hyperthread();
if (common -> num_nodes > 1) numa_mapping();
@@ -786,7 +786,7 @@ void gotoblas_affinity_init(void) {
CPU_ZERO(&cpu_mask);
CPU_SET (cpu_mapping[0], &cpu_mask);
-
+
sched_setaffinity(0, sizeof(cpu_mask), &cpu_mask);
node_mapping[WhereAmI()] = READ_NODE(common -> cpu_info[cpu_sub_mapping[0]]);
@@ -817,13 +817,13 @@ void gotoblas_affinity_quit(void) {
if ((numprocs == 1) || (initialized == 0)) return;
if (!disable_mapping) {
-
+
blas_lock(&common -> lock);
-
+
for (i = 0; i < numprocs; i ++) common -> cpu_use[cpu_mapping[i]] = -1;
-
+
blas_unlock(&common -> lock);
-
+
}
shmctl(shmid, IPC_STAT, &ds);
diff --git a/driver/others/lamc3.c b/driver/others/lamc3.c
index 439ef6e3d..acc4b505d 100644
--- a/driver/others/lamc3.c
+++ b/driver/others/lamc3.c
@@ -44,7 +44,7 @@ double
FLOAT
#endif
NAME(FLOAT *a, FLOAT *b){
-
+
return *a + *b;
}
diff --git a/driver/others/lamch.c b/driver/others/lamch.c
index b04450024..cdbc0eef5 100644
--- a/driver/others/lamch.c
+++ b/driver/others/lamch.c
@@ -152,7 +152,7 @@ double
FLOAT
#endif
NAME(char *P){
-
+
char p = *P;
int pos;
FLOAT *hdata = (FLOAT *)idata;
diff --git a/driver/others/memory.c b/driver/others/memory.c
index 24a92034d..ba806b3a3 100644
--- a/driver/others/memory.c
+++ b/driver/others/memory.c
@@ -13,19 +13,19 @@ met:
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
- 3. Neither the name of the ISCAS nor the names of its contributors may
- be used to endorse or promote products derived from this software
+ 3. Neither the name of the ISCAS nor the names of its contributors may
+ be used to endorse or promote products derived from this software
without specific prior written permission.
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************************/
@@ -136,8 +136,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define BITMASK(a, b, c) ((((a) >> (b)) & (c)))
-#define CONSTRUCTOR __attribute__ ((constructor))
-#define DESTRUCTOR __attribute__ ((destructor))
+#define CONSTRUCTOR __attribute__ ((constructor))
+#define DESTRUCTOR __attribute__ ((destructor))
#ifdef DYNAMIC_ARCH
gotoblas_t *gotoblas = NULL;
@@ -171,32 +171,32 @@ int get_num_procs(void) {
#ifdef OS_WINDOWS
int get_num_procs(void) {
-
+
static int nums = 0;
if (nums == 0) {
SYSTEM_INFO sysinfo;
-
+
GetSystemInfo(&sysinfo);
nums = sysinfo.dwNumberOfProcessors;
}
-
+
return nums;
}
#endif
-#if defined(OS_FREEBSD)
+#if defined(OS_FREEBSD)
int get_num_procs(void) {
-
+
static int nums = 0;
int m[2];
size_t len;
-
+
if (nums == 0) {
m[0] = CTL_HW;
m[1] = HW_NCPU;
@@ -232,7 +232,7 @@ void set_stack_limit(int limitMB){
rl.rlim_cur=StackSize;
result=setrlimit(RLIMIT_STACK, &rl);
if(result !=0){
- fprintf(stderr, "OpenBLAS: set stack limit error =%d\n", result);
+ fprintf(stderr, "OpenBLAS: set stack limit error =%d\n", result);
}
}
}
@@ -241,12 +241,12 @@ void set_stack_limit(int limitMB){
#endif
/*
-OpenBLAS uses the numbers of CPU cores in multithreading.
+OpenBLAS uses the numbers of CPU cores in multithreading.
It can be set by openblas_set_num_threads(int num_threads);
*/
int blas_cpu_number = 0;
/*
-The numbers of threads in the thread pool.
+The numbers of threads in the thread pool.
This value is equal or large than blas_cpu_number. This means some threads are sleep.
*/
int blas_num_threads = 0;
@@ -297,7 +297,7 @@ int blas_get_cpu_number(void){
if (p) blas_goto_num = atoi(p);
if (blas_goto_num < 0) blas_goto_num = 0;
}
-
+
#endif
blas_omp_num = 0;
@@ -318,8 +318,8 @@ int blas_get_cpu_number(void){
#ifdef DEBUG
printf( "Adjusted number of threads : %3d\n", blas_num_threads);
#endif
-
- blas_cpu_number = blas_num_threads;
+
+ blas_cpu_number = blas_num_threads;
return blas_num_threads;
}
@@ -355,12 +355,12 @@ static void *alloc_mmap(void *address){
void *map_address;
if (address){
- map_address = mmap(address,
- BUFFER_SIZE,
+ map_address = mmap(address,
+ BUFFER_SIZE,
MMAP_ACCESS, MMAP_POLICY | MAP_FIXED, -1, 0);
} else {
- map_address = mmap(address,
- BUFFER_SIZE,
+ map_address = mmap(address,
+ BUFFER_SIZE,
MMAP_ACCESS, MMAP_POLICY, -1, 0);
}
@@ -387,7 +387,7 @@ static inline BLASULONG run_bench(BLASULONG address, BLASULONG size) {
BLASULONG original, *p;
BLASULONG start, stop, min;
int iter, i, count;
-
+
min = (BLASULONG)-1;
original = *(BLASULONG *)(address + size - PAGESIZE);
@@ -397,20 +397,20 @@ static inline BLASULONG run_bench(BLASULONG address, BLASULONG size) {
for (iter = 0; iter < BENCH_ITERATION; iter ++ ) {
p = (BLASULONG *)address;
-
+
count = size / PAGESIZE;
-
+
start = rpcc();
-
+
for (i = 0; i < count; i ++) {
p = (BLASULONG *)(*p);
}
-
+
stop = rpcc();
-
+
if (min > stop - start) min = stop - start;
}
-
+
*(BLASULONG *)(address + size - PAGESIZE + 0) = original;
*(BLASULONG *)(address + size - PAGESIZE + 8) = (BLASULONG)p;
@@ -442,11 +442,11 @@ static void *alloc_mmap(void *address){
} else {
#endif
- map_address = mmap(NULL, BUFFER_SIZE * SCALING,
+ map_address = mmap(NULL, BUFFER_SIZE * SCALING,
MMAP_ACCESS, MMAP_POLICY, -1, 0);
-
+
if (map_address != (void *)-1) {
-
+
#ifdef OS_LINUX
#ifdef DEBUG
int ret=0;
@@ -462,45 +462,45 @@ static void *alloc_mmap(void *address){
#endif
#endif
-
+
allocsize = DGEMM_P * DGEMM_Q * sizeof(double);
-
+
start = (BLASULONG)map_address;
current = (SCALING - 1) * BUFFER_SIZE;
-
+
while(current > 0) {
*(BLASLONG *)start = (BLASLONG)start + PAGESIZE;
start += PAGESIZE;
current -= PAGESIZE;
}
-
+
*(BLASLONG *)(start - PAGESIZE) = (BLASULONG)map_address;
-
+
start = (BLASULONG)map_address;
-
+
best = (BLASULONG)-1;
best_address = map_address;
-
+
while ((start + allocsize < (BLASULONG)map_address + (SCALING - 1) * BUFFER_SIZE)) {
-
+
current = run_bench(start, allocsize);
-
+
if (best > current) {
best = current;
best_address = (void *)start;
}
-
+
start += PAGESIZE;
-
+
}
-
+
if ((BLASULONG)best_address > (BLASULONG)map_address)
munmap(map_address, (BLASULONG)best_address - (BLASULONG)map_address);
-
+
munmap((void *)((BLASULONG)best_address + BUFFER_SIZE), (SCALING - 1) * BUFFER_SIZE + (BLASULONG)map_address - (BLASULONG)best_address);
-
+
map_address = best_address;
-
+
#if defined(OS_LINUX) && !defined(NO_WARMUP)
hot_alloc = 2;
#endif
@@ -632,7 +632,7 @@ static void alloc_devicedirver_free(struct release_t *release){
}
static void *alloc_devicedirver(void *address){
-
+
int fd;
void *map_address;
@@ -646,7 +646,7 @@ static void *alloc_devicedirver(void *address){
PROT_READ | PROT_WRITE,
MAP_FILE | MAP_SHARED,
fd, 0);
-
+
if (map_address != (void *)-1) {
release_info[release_pos].address = map_address;
release_info[release_pos].attr = fd;
@@ -671,9 +671,9 @@ static void alloc_shm_free(struct release_t *release){
static void *alloc_shm(void *address){
void *map_address;
int shmid;
-
+
shmid = shmget(IPC_PRIVATE, BUFFER_SIZE,IPC_CREAT | 0600);
-
+
map_address = (void *)shmat(shmid, address, 0);
if (map_address != (void *)-1){
@@ -725,7 +725,7 @@ static void *alloc_hugetlb(void *address){
#if defined(OS_LINUX) || defined(OS_AIX)
int shmid;
-
+
shmid = shmget(IPC_PRIVATE, BUFFER_SIZE,
#ifdef OS_LINUX
SHM_HUGETLB |
@@ -734,10 +734,10 @@ static void *alloc_hugetlb(void *address){
SHM_LGPAGE | SHM_PIN |
#endif
IPC_CREAT | SHM_R | SHM_W);
-
+
if (shmid != -1) {
map_address = (void *)shmat(shmid, address, SHM_RND);
-
+
#ifdef OS_LINUX
my_mbind(map_address, BUFFER_SIZE, MPOL_PREFERRED, NULL, 0, 0);
#endif
@@ -750,7 +750,7 @@ static void *alloc_hugetlb(void *address){
#ifdef __sun__
struct memcntl_mha mha;
-
+
mha.mha_cmd = MHA_MAPSIZE_BSSBRK;
mha.mha_flags = 0;
mha.mha_pagesize = HUGE_PAGESIZE;
@@ -768,7 +768,7 @@ static void *alloc_hugetlb(void *address){
tp.PrivilegeCount = 1;
tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
-
+
if (LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME, &tp.Privileges[0].Luid) != TRUE) return (void *) -1;
if (AdjustTokenPrivileges(hToken, FALSE, (PTOKEN_PRIVILEGES)&tp, 0, NULL, NULL) != TRUE) return (void *) -1;
@@ -781,7 +781,7 @@ static void *alloc_hugetlb(void *address){
AdjustTokenPrivileges(hToken, TRUE, &tp, 0, (PTOKEN_PRIVILEGES)NULL, NULL);
if (map_address == (void *)NULL) map_address = (void *)-1;
-
+
#endif
if (map_address != (void *)-1){
@@ -829,7 +829,7 @@ static void *alloc_hugetlbfile(void *address){
PROT_READ | PROT_WRITE,
MAP_SHARED,
fd, 0);
-
+
if (map_address != (void *)-1) {
release_info[release_pos].address = map_address;
release_info[release_pos].attr = fd;
@@ -882,7 +882,7 @@ static void gotoblas_memory_init(void);
/* 2 : Thread */
void *blas_memory_alloc(int procpos){
-
+
int position;
#if defined(WHEREAMI) && !defined(USE_OPENMP)
int mypos;
@@ -917,11 +917,11 @@ void *blas_memory_alloc(int procpos){
void *(**func)(void *address);
if (!memory_initialized) {
-
+
LOCK_COMMAND(&alloc_lock);
-
+
if (!memory_initialized) {
-
+
#if defined(WHEREAMI) && !defined(USE_OPENMP)
for (position = 0; position < NUM_BUFFERS; position ++){
memory[position].addr = (void *)0;
@@ -930,7 +930,7 @@ void *blas_memory_alloc(int procpos){
memory[position].lock = 0;
}
#endif
-
+
#ifdef DYNAMIC_ARCH
gotoblas_dynamic_init();
#endif
@@ -938,11 +938,11 @@ void *blas_memory_alloc(int procpos){
#if defined(SMP) && defined(OS_LINUX) && !defined(NO_AFFINITY)
gotoblas_affinity_init();
#endif
-
+
#ifdef SMP
if (!blas_num_threads) blas_cpu_number = blas_get_cpu_number();
#endif
-
+
#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64)
#ifndef DYNAMIC_ARCH
blas_set_parameter();
@@ -968,16 +968,16 @@ void *blas_memory_alloc(int procpos){
do {
if (!memory[position].used && (memory[position].pos == mypos)) {
-
+
blas_lock(&memory[position].lock);
-
+
if (!memory[position].used) goto allocation;
-
+
blas_unlock(&memory[position].lock);
}
-
+
position ++;
-
+
} while (position < NUM_BUFFERS);
@@ -987,18 +987,18 @@ void *blas_memory_alloc(int procpos){
do {
if (!memory[position].used) {
-
+
blas_lock(&memory[position].lock);
if (!memory[position].used) goto allocation;
-
+
blas_unlock(&memory[position].lock);
}
-
+
position ++;
-
+
} while (position < NUM_BUFFERS);
-
+
goto error;
allocation :
@@ -1055,13 +1055,13 @@ void *blas_memory_alloc(int procpos){
} while ((BLASLONG)map_address == -1);
- memory[position].addr = map_address;
+ memory[position].addr = map_address;
#ifdef DEBUG
printf(" Mapping Succeeded. %p(%d)\n", (void *)memory[position].addr, position);
#endif
}
-
+
#if defined(WHEREAMI) && !defined(USE_OPENMP)
if (memory[position].pos == -1) memory[position].pos = mypos;
@@ -1071,18 +1071,18 @@ void *blas_memory_alloc(int procpos){
#ifdef DYNAMIC_ARCH
if (memory_initialized == 1) {
-
+
LOCK_COMMAND(&alloc_lock);
-
+
if (memory_initialized == 1) {
-
+
if (!gotoblas) gotoblas_dynamic_init();
-
+
memory_initialized = 2;
}
-
+
UNLOCK_COMMAND(&alloc_lock);
-
+
}
#endif
@@ -1090,8 +1090,8 @@ void *blas_memory_alloc(int procpos){
#ifdef DEBUG
printf("Mapped : %p %3d\n\n",
(void *)memory[position].addr, position);
-#endif
-
+#endif
+
return (void *)memory[position].addr;
error:
@@ -1106,8 +1106,8 @@ void blas_memory_free(void *free_area){
#ifdef DEBUG
printf("Unmapped Start : %p ...\n", free_area);
-#endif
-
+#endif
+
position = 0;
while ((memory[position].addr != free_area)
@@ -1117,21 +1117,21 @@ void blas_memory_free(void *free_area){
#ifdef DEBUG
printf(" Position : %d\n", position);
-#endif
+#endif
memory[position].used = 0;
#ifdef DEBUG
printf("Unmap Succeeded.\n\n");
-#endif
+#endif
return;
-
+
error:
printf("BLAS : Bad memory unallocation! : %4d %p\n", position, free_area);
#ifdef DEBUG
- for (position = 0; position < NUM_BUFFERS; position++)
+ for (position = 0; position < NUM_BUFFERS; position++)
printf("%4ld %p : %d\n", position, memory[position].addr, memory[position].used);
#endif
@@ -1151,7 +1151,7 @@ void blas_shutdown(void){
for (pos = 0; pos < release_pos; pos ++) {
release_info[pos].func(&release_info[pos]);
}
-
+
#ifdef SEEK_ADDRESS
base_address = 0UL;
#else
@@ -1173,7 +1173,7 @@ void blas_shutdown(void){
}
#if defined(OS_LINUX) && !defined(NO_WARMUP)
-
+
#ifdef SMP
#if defined(USE_PTHREAD_LOCK)
static pthread_mutex_t init_lock = PTHREAD_MUTEX_INITIALIZER;
@@ -1184,7 +1184,7 @@ static BLASULONG init_lock = 0UL;
#endif
#endif
-static void _touch_memory(blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n,
+static void _touch_memory(blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n,
void *sa, void *sb, BLASLONG pos) {
#if !defined(ARCH_POWER) && !defined(ARCH_SPARC)
@@ -1247,7 +1247,7 @@ static void _init_thread_memory(void *buffer) {
queue[num_cpu - 1].next = NULL;
queue[0].sa = buffer;
-
+
exec_blas(num_cpu, queue);
}
@@ -1266,15 +1266,15 @@ static void gotoblas_memory_init(void) {
#ifdef SMP_SERVER
if (blas_server_avail == 0) blas_thread_init();
#endif
-
+
_init_thread_memory((void *)((BLASULONG)buffer + GEMM_OFFSET_A));
-
+
#else
-
+
_touch_memory(NULL, NULL, NULL, (void *)((BLASULONG)buffer + GEMM_OFFSET_A), NULL, 0);
-
+
#endif
-
+
blas_memory_free(buffer);
}
#endif
diff --git a/driver/others/memory_qalloc.c b/driver/others/memory_qalloc.c
index 10b35aa31..17b7f5d60 100644
--- a/driver/others/memory_qalloc.c
+++ b/driver/others/memory_qalloc.c
@@ -58,12 +58,12 @@ void *sb = NULL;
static double static_buffer[BUFFER_SIZE/sizeof(double)];
void *blas_memory_alloc(int numproc){
-
+
if (sa == NULL){
#if 1
- sa = (void *)qalloc(QFAST, BUFFER_SIZE);
+ sa = (void *)qalloc(QFAST, BUFFER_SIZE);
#else
- sa = (void *)malloc(BUFFER_SIZE);
+ sa = (void *)malloc(BUFFER_SIZE);
#endif
sb = (void *)&static_buffer[0];
}
diff --git a/driver/others/openblas_get_config.c b/driver/others/openblas_get_config.c
index 581ab1a43..d8da2e398 100644
--- a/driver/others/openblas_get_config.c
+++ b/driver/others/openblas_get_config.c
@@ -13,19 +13,19 @@ met:
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
- 3. Neither the name of the ISCAS nor the names of its contributors may
- be used to endorse or promote products derived from this software
+ 3. Neither the name of the ISCAS nor the names of its contributors may
+ be used to endorse or promote products derived from this software
without specific prior written permission.
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************************/
diff --git a/driver/others/openblas_get_parallel.c b/driver/others/openblas_get_parallel.c
index 68fe57449..ea2e4d986 100644
--- a/driver/others/openblas_get_parallel.c
+++ b/driver/others/openblas_get_parallel.c
@@ -13,19 +13,19 @@ met:
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
- 3. Neither the name of the ISCAS nor the names of its contributors may
- be used to endorse or promote products derived from this software
+ 3. Neither the name of the ISCAS nor the names of its contributors may
+ be used to endorse or promote products derived from this software
without specific prior written permission.
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************************/
@@ -33,12 +33,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h"
#if defined(USE_OPENMP)
-static int parallel = 2 ;
-#elif defined(SMP_SERVER)
-static int parallel = 1;
-#else
-static int parallel = 0;
-#endif
+static int parallel = 2 ;
+#elif defined(SMP_SERVER)
+static int parallel = 1;
+#else
+static int parallel = 0;
+#endif
int CNAME() {
return parallel;
diff --git a/driver/others/openblas_set_num_threads.c b/driver/others/openblas_set_num_threads.c
index 5e24cfcc7..ea0c70a91 100644
--- a/driver/others/openblas_set_num_threads.c
+++ b/driver/others/openblas_set_num_threads.c
@@ -13,19 +13,19 @@ met:
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
- 3. Neither the name of the ISCAS nor the names of its contributors may
- be used to endorse or promote products derived from this software
+ 3. Neither the name of the ISCAS nor the names of its contributors may
+ be used to endorse or promote products derived from this software
without specific prior written permission.
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************************/
diff --git a/driver/others/parameter.c b/driver/others/parameter.c
index 58e5fb11d..0d8d1e11a 100644
--- a/driver/others/parameter.c
+++ b/driver/others/parameter.c
@@ -177,26 +177,26 @@ int get_L2_size(void){
int i;
cpuid(2, &eax, &ebx, &ecx, &edx);
-
+
info[ 0] = BITMASK(eax, 8, 0xff);
info[ 1] = BITMASK(eax, 16, 0xff);
info[ 2] = BITMASK(eax, 24, 0xff);
-
+
info[ 3] = BITMASK(ebx, 0, 0xff);
info[ 4] = BITMASK(ebx, 8, 0xff);
info[ 5] = BITMASK(ebx, 16, 0xff);
info[ 6] = BITMASK(ebx, 24, 0xff);
-
+
info[ 7] = BITMASK(ecx, 0, 0xff);
info[ 8] = BITMASK(ecx, 8, 0xff);
info[ 9] = BITMASK(ecx, 16, 0xff);
info[10] = BITMASK(ecx, 24, 0xff);
-
+
info[11] = BITMASK(edx, 0, 0xff);
info[12] = BITMASK(edx, 8, 0xff);
info[13] = BITMASK(edx, 16, 0xff);
info[14] = BITMASK(edx, 24, 0xff);
-
+
for (i = 0; i < 15; i++){
switch (info[i]){
@@ -284,7 +284,7 @@ void blas_set_parameter(void){
#endif
#endif
-#if defined(CORE_NORTHWOOD)
+#if defined(CORE_NORTHWOOD)
size >>= 7;
#ifdef ALLOC_HUGETLB
@@ -414,7 +414,7 @@ void blas_set_parameter(void){
#endif
#endif
-#if defined(CORE_OPTERON)
+#if defined(CORE_OPTERON)
sgemm_p = 224 + 14 * (size >> 5);
dgemm_p = 112 + 14 * (size >> 6);
cgemm_p = 116 + 14 * (size >> 6);
@@ -469,7 +469,7 @@ void blas_set_parameter(void){
factor = atoi(p);
if (factor < 10) factor = 10;
if (factor > 200) factor = 200;
-
+
sgemm_p = ((long)((double)sgemm_p * (double)factor * 1.e-2)) & ~7L;
dgemm_p = ((long)((double)dgemm_p * (double)factor * 1.e-2)) & ~7L;
cgemm_p = ((long)((double)cgemm_p * (double)factor * 1.e-2)) & ~7L;
@@ -479,7 +479,7 @@ void blas_set_parameter(void){
xgemm_p = ((long)((double)xgemm_p * (double)factor * 1.e-2)) & ~7L;
#endif
}
-
+
if (sgemm_p == 0) sgemm_p = 64;
if (dgemm_p == 0) dgemm_p = 64;
if (cgemm_p == 0) cgemm_p = 64;
@@ -572,7 +572,7 @@ int get_current_cpu_info(void){
#if defined(ARCH_IA64)
-static inline BLASULONG cpuid(BLASULONG regnum){
+static inline BLASULONG cpuid(BLASULONG regnum){
BLASULONG value;
#ifndef __ECC
@@ -587,11 +587,11 @@ static inline BLASULONG cpuid(BLASULONG regnum){
#if 1
void blas_set_parameter(void){
-
+
BLASULONG cpuid3, size;
cpuid3 = cpuid(3);
-
+
size = BITMASK(cpuid3, 16, 0xff);
sgemm_p = 192 * (size + 1);
@@ -625,7 +625,7 @@ void blas_set_parameter(void){
#define IA64_PROC_NAME "/proc/pal/cpu0/cache_info"
void blas_set_parameter(void){
-
+
BLASULONG cpuid3;
int size = 0;
@@ -643,17 +643,17 @@ void blas_set_parameter(void){
if (size <= 0) {
if ((infile = fopen(IA64_PROC_NAME, "r")) != NULL) {
-
+
while(fgets(buffer, sizeof(buffer), infile) != NULL) {
if ((!strncmp("Data/Instruction Cache level 3", buffer, 30))) break;
}
-
+
fgets(buffer, sizeof(buffer), infile);
-
+
fclose(infile);
-
+
*strstr(buffer, "bytes") = (char)NULL;
-
+
size = atoi(strchr(buffer, ':') + 1) / 1572864;
}
}
@@ -663,7 +663,7 @@ void blas_set_parameter(void){
if (size <= 0) {
cpuid3 = cpuid(3);
-
+
size = BITMASK(cpuid3, 16, 0xff) + 1;
}
@@ -692,7 +692,7 @@ void blas_set_parameter(void){
#endif
-#if defined(ARCH_MIPS64)
+#if defined(ARCH_MIPS64)
void blas_set_parameter(void){
#if defined(LOONGSON3A)
#ifdef SMP
@@ -720,7 +720,7 @@ void blas_set_parameter(void){
dgemm_r = 160;
}
#endif
-#endif
+#endif
}
#endif
diff --git a/driver/others/profile.c b/driver/others/profile.c
index f464c0b6a..9fca09f06 100644
--- a/driver/others/profile.c
+++ b/driver/others/profile.c
@@ -75,13 +75,13 @@ void gotoblas_profile_quit(void) {
fprintf(stderr, "\n\t====== BLAS Profiling Result =======\n\n");
fprintf(stderr, " Function No. of Calls Time Consumption Efficiency Bytes/cycle Wall Time(Cycles)\n");
-
+
for (i = 0; i < MAX_PROF_TABLE; i ++) {
if (function_profile_table[i].calls) {
#ifndef OS_WINDOWS
- fprintf(stderr, "%-12s : %10Ld %8.2f%% %10.3f%% %8.2f %Ld\n",
+ fprintf(stderr, "%-12s : %10Ld %8.2f%% %10.3f%% %8.2f %Ld\n",
#else
- fprintf(stderr, "%-12s : %10lld %8.2f%% %10.3f%% %8.2f %lld\n",
+ fprintf(stderr, "%-12s : %10lld %8.2f%% %10.3f%% %8.2f %lld\n",
#endif
func_table[i],
function_profile_table[i].calls,
@@ -94,11 +94,11 @@ void gotoblas_profile_quit(void) {
}
fprintf(stderr, " --------------------------------------------------------------------\n");
-
+
#ifndef OS_WINDOWS
- fprintf(stderr, "%-12s : %10Ld %10.3f%% %8.2f\n",
+ fprintf(stderr, "%-12s : %10Ld %10.3f%% %8.2f\n",
#else
- fprintf(stderr, "%-12s : %10lld %10.3f%% %8.2f\n",
+ fprintf(stderr, "%-12s : %10lld %10.3f%% %8.2f\n",
#endif
"Total",
calls,
diff --git a/driver/others/xerbla.c b/driver/others/xerbla.c
index 6f5170ef1..7427b51c4 100644
--- a/driver/others/xerbla.c
+++ b/driver/others/xerbla.c
@@ -48,7 +48,7 @@
#ifdef __ELF__
int __xerbla(char *message, blasint *info, blasint length){
-
+
printf(" ** On entry to %6s parameter number %2d had an illegal value\n",
message, *info);
@@ -60,7 +60,7 @@ int BLASFUNC(xerbla)(char *, blasint *, blasint) __attribute__ ((weak, alias ("_
#else
int BLASFUNC(xerbla)(char *message, blasint *info, blasint length){
-
+
printf(" ** On entry to %6s parameter number %2d had an illegal value\n",
message, *info);