From 342bbc3871d1b43f548e9d1ae9d380a1d4989cb3 Mon Sep 17 00:00:00 2001 From: Xianyi Zhang Date: Mon, 24 Jan 2011 14:54:24 +0000 Subject: Import GotoBLAS2 1.13 BSD version codes. --- interface/Makefile | 1942 ++++++++++++++++++++++++++++++++++++++++++++++++++++ interface/asum.c | 93 +++ interface/axpy.c | 112 +++ interface/copy.c | 80 +++ interface/create | 22 + interface/dot.c | 101 +++ interface/dsdot.c | 99 +++ interface/gbmv.c | 252 +++++++ interface/gemm.c | 452 ++++++++++++ interface/gemv.c | 237 +++++++ interface/ger.c | 193 ++++++ interface/gesv.c | 154 +++++ interface/getf2.c | 109 +++ interface/getrf.c | 121 ++++ interface/getrs.c | 152 ++++ interface/imax.c | 171 +++++ interface/larf.c | 109 +++ interface/laswp.c | 110 +++ interface/lauu2.c | 128 ++++ interface/lauum.c | 139 ++++ interface/max.c | 169 +++++ interface/nrm2.c | 93 +++ interface/potf2.c | 128 ++++ interface/potrf.c | 139 ++++ interface/potri.c | 160 +++++ interface/rot.c | 82 +++ interface/rotg.c | 109 +++ interface/rotm.c | 155 +++++ interface/rotmg.c | 199 ++++++ interface/sbmv.c | 215 ++++++ interface/scal.c | 112 +++ interface/sdsdot.c | 101 +++ interface/spmv.c | 207 ++++++ interface/spr.c | 197 ++++++ interface/spr2.c | 203 ++++++ interface/swap.c | 110 +++ interface/symm.c | 422 ++++++++++++ interface/symv.c | 205 ++++++ interface/syr.c | 200 ++++++ interface/syr2.c | 204 ++++++ interface/syr2k.c | 366 ++++++++++ interface/syrk.c | 355 ++++++++++ interface/tbmv.c | 248 +++++++ interface/tbsv.c | 213 ++++++ interface/tpmv.c | 239 +++++++ interface/tpsv.c | 204 ++++++ interface/trmv.c | 243 +++++++ interface/trsm.c | 391 +++++++++++ interface/trsv.c | 208 ++++++ interface/trti2.c | 134 ++++ interface/trtri.c | 153 +++++ interface/zaxpy.c | 122 ++++ interface/zdot.c | 202 ++++++ interface/zgbmv.c | 271 ++++++++ interface/zgemv.c | 259 +++++++ interface/zger.c | 249 +++++++ interface/zgetf2.c | 109 +++ interface/zgetrf.c | 122 ++++ interface/zgetrs.c | 153 +++++ interface/zhbmv.c | 223 ++++++ interface/zhemv.c | 215 ++++++ interface/zher.c | 200 ++++++ interface/zher2.c | 207 ++++++ interface/zhpmv.c | 213 ++++++ interface/zhpr.c | 198 ++++++ interface/zhpr2.c | 207 ++++++ interface/zlaswp.c | 108 +++ interface/zlauu2.c | 129 ++++ interface/zlauum.c | 141 ++++ interface/zpotf2.c | 129 ++++ interface/zpotrf.c | 141 ++++ interface/zpotri.c | 157 +++++ interface/zrot.c | 72 ++ interface/zrotg.c | 115 ++++ interface/zsbmv.c | 157 +++++ interface/zscal.c | 117 ++++ interface/zspmv.c | 154 +++++ interface/zspr.c | 146 ++++ interface/zspr2.c | 149 ++++ interface/zswap.c | 111 +++ interface/zsymv.c | 143 ++++ interface/zsyr.c | 203 ++++++ interface/zsyr2.c | 151 ++++ interface/ztbmv.c | 260 +++++++ interface/ztbsv.c | 219 ++++++ interface/ztpmv.c | 252 +++++++ interface/ztpsv.c | 210 ++++++ interface/ztrmv.c | 255 +++++++ interface/ztrsv.c | 216 ++++++ interface/ztrti2.c | 134 ++++ interface/ztrtri.c | 154 +++++ 91 files changed, 17913 insertions(+) create mode 100644 interface/Makefile create mode 100644 interface/asum.c create mode 100644 interface/axpy.c create mode 100644 interface/copy.c create mode 100644 interface/create create mode 100644 interface/dot.c create mode 100644 interface/dsdot.c create mode 100644 interface/gbmv.c create mode 100644 interface/gemm.c create mode 100644 interface/gemv.c create mode 100644 interface/ger.c create mode 100644 interface/gesv.c create mode 100644 interface/getf2.c create mode 100644 interface/getrf.c create mode 100644 interface/getrs.c create mode 100644 interface/imax.c create mode 100644 interface/larf.c create mode 100644 interface/laswp.c create mode 100644 interface/lauu2.c create mode 100644 interface/lauum.c create mode 100644 interface/max.c create mode 100644 interface/nrm2.c create mode 100644 interface/potf2.c create mode 100644 interface/potrf.c create mode 100644 interface/potri.c create mode 100644 interface/rot.c create mode 100644 interface/rotg.c create mode 100644 interface/rotm.c create mode 100644 interface/rotmg.c create mode 100644 interface/sbmv.c create mode 100644 interface/scal.c create mode 100644 interface/sdsdot.c create mode 100644 interface/spmv.c create mode 100644 interface/spr.c create mode 100644 interface/spr2.c create mode 100644 interface/swap.c create mode 100644 interface/symm.c create mode 100644 interface/symv.c create mode 100644 interface/syr.c create mode 100644 interface/syr2.c create mode 100644 interface/syr2k.c create mode 100644 interface/syrk.c create mode 100644 interface/tbmv.c create mode 100644 interface/tbsv.c create mode 100644 interface/tpmv.c create mode 100644 interface/tpsv.c create mode 100644 interface/trmv.c create mode 100644 interface/trsm.c create mode 100644 interface/trsv.c create mode 100644 interface/trti2.c create mode 100644 interface/trtri.c create mode 100644 interface/zaxpy.c create mode 100644 interface/zdot.c create mode 100644 interface/zgbmv.c create mode 100644 interface/zgemv.c create mode 100644 interface/zger.c create mode 100644 interface/zgetf2.c create mode 100644 interface/zgetrf.c create mode 100644 interface/zgetrs.c create mode 100644 interface/zhbmv.c create mode 100644 interface/zhemv.c create mode 100644 interface/zher.c create mode 100644 interface/zher2.c create mode 100644 interface/zhpmv.c create mode 100644 interface/zhpr.c create mode 100644 interface/zhpr2.c create mode 100644 interface/zlaswp.c create mode 100644 interface/zlauu2.c create mode 100644 interface/zlauum.c create mode 100644 interface/zpotf2.c create mode 100644 interface/zpotrf.c create mode 100644 interface/zpotri.c create mode 100644 interface/zrot.c create mode 100644 interface/zrotg.c create mode 100644 interface/zsbmv.c create mode 100644 interface/zscal.c create mode 100644 interface/zspmv.c create mode 100644 interface/zspr.c create mode 100644 interface/zspr2.c create mode 100644 interface/zswap.c create mode 100644 interface/zsymv.c create mode 100644 interface/zsyr.c create mode 100644 interface/zsyr2.c create mode 100644 interface/ztbmv.c create mode 100644 interface/ztbsv.c create mode 100644 interface/ztpmv.c create mode 100644 interface/ztpsv.c create mode 100644 interface/ztrmv.c create mode 100644 interface/ztrsv.c create mode 100644 interface/ztrti2.c create mode 100644 interface/ztrtri.c (limited to 'interface') diff --git a/interface/Makefile b/interface/Makefile new file mode 100644 index 000000000..5bfc5f389 --- /dev/null +++ b/interface/Makefile @@ -0,0 +1,1942 @@ +TOPDIR = .. +include $(TOPDIR)/Makefile.system + +ifeq ($(ARCH), x86) +SUPPORT_GEMM3M = 1 +endif + +ifeq ($(ARCH), x86_64) +SUPPORT_GEMM3M = 1 +endif + +ifeq ($(ARCH), ia64) +SUPPORT_GEMM3M = 1 +endif + +ifeq ($(ARCH), MIPS) +SUPPORT_GEMM3M = 1 +endif + +ifndef NO_FBLAS + +SBLAS1OBJS = \ + saxpy.$(SUFFIX) sswap.$(SUFFIX) \ + scopy.$(SUFFIX) sscal.$(SUFFIX) \ + sdot.$(SUFFIX) sdsdot.$(SUFFIX) dsdot.$(SUFFIX) \ + sasum.$(SUFFIX) snrm2.$(SUFFIX) \ + smax.$(SUFFIX) samax.$(SUFFIX) ismax.$(SUFFIX) isamax.$(SUFFIX) \ + smin.$(SUFFIX) samin.$(SUFFIX) ismin.$(SUFFIX) isamin.$(SUFFIX) \ + srot.$(SUFFIX) srotg.$(SUFFIX) srotm.$(SUFFIX) srotmg.$(SUFFIX) \ + +SBLAS2OBJS = \ + sgemv.$(SUFFIX) sger.$(SUFFIX) \ + strsv.$(SUFFIX) strmv.$(SUFFIX) ssymv.$(SUFFIX) \ + ssyr.$(SUFFIX) ssyr2.$(SUFFIX) sgbmv.$(SUFFIX) \ + ssbmv.$(SUFFIX) sspmv.$(SUFFIX) \ + sspr.$(SUFFIX) sspr2.$(SUFFIX) \ + stbsv.$(SUFFIX) stbmv.$(SUFFIX) \ + stpsv.$(SUFFIX) stpmv.$(SUFFIX) + +SBLAS3OBJS = \ + sgemm.$(SUFFIX) ssymm.$(SUFFIX) strmm.$(SUFFIX) \ + strsm.$(SUFFIX) ssyrk.$(SUFFIX) ssyr2k.$(SUFFIX) + +DBLAS1OBJS = \ + daxpy.$(SUFFIX) dswap.$(SUFFIX) \ + dcopy.$(SUFFIX) dscal.$(SUFFIX) \ + ddot.$(SUFFIX) \ + dasum.$(SUFFIX) dnrm2.$(SUFFIX) \ + dmax.$(SUFFIX) damax.$(SUFFIX) idmax.$(SUFFIX) idamax.$(SUFFIX) \ + dmin.$(SUFFIX) damin.$(SUFFIX) idmin.$(SUFFIX) idamin.$(SUFFIX) \ + drot.$(SUFFIX) drotg.$(SUFFIX) drotm.$(SUFFIX) drotmg.$(SUFFIX) \ + +DBLAS2OBJS = \ + dgemv.$(SUFFIX) dger.$(SUFFIX) \ + dtrsv.$(SUFFIX) dtrmv.$(SUFFIX) dsymv.$(SUFFIX) \ + dsyr.$(SUFFIX) dsyr2.$(SUFFIX) dgbmv.$(SUFFIX) \ + dsbmv.$(SUFFIX) dspmv.$(SUFFIX) \ + dspr.$(SUFFIX) dspr2.$(SUFFIX) \ + dtbsv.$(SUFFIX) dtbmv.$(SUFFIX) \ + dtpsv.$(SUFFIX) dtpmv.$(SUFFIX) + +DBLAS3OBJS = \ + dgemm.$(SUFFIX) dsymm.$(SUFFIX) dtrmm.$(SUFFIX) \ + dtrsm.$(SUFFIX) dsyrk.$(SUFFIX) dsyr2k.$(SUFFIX) + +CBLAS1OBJS = \ + caxpy.$(SUFFIX) caxpyc.$(SUFFIX) cswap.$(SUFFIX) \ + ccopy.$(SUFFIX) cscal.$(SUFFIX) csscal.$(SUFFIX) \ + cdotc.$(SUFFIX) cdotu.$(SUFFIX) \ + scasum.$(SUFFIX) scnrm2.$(SUFFIX) \ + scamax.$(SUFFIX) icamax.$(SUFFIX) \ + scamin.$(SUFFIX) icamin.$(SUFFIX) \ + csrot.$(SUFFIX) crotg.$(SUFFIX) \ + +CBLAS2OBJS = \ + cgemv.$(SUFFIX) cgeru.$(SUFFIX) cgerc.$(SUFFIX) \ + ctrsv.$(SUFFIX) ctrmv.$(SUFFIX) csymv.$(SUFFIX) \ + csyr.$(SUFFIX) csyr2.$(SUFFIX) cgbmv.$(SUFFIX) \ + csbmv.$(SUFFIX) cspmv.$(SUFFIX) \ + cspr.$(SUFFIX) cspr2.$(SUFFIX) \ + ctbsv.$(SUFFIX) ctbmv.$(SUFFIX) \ + ctpsv.$(SUFFIX) ctpmv.$(SUFFIX) \ + chemv.$(SUFFIX) chbmv.$(SUFFIX) \ + cher.$(SUFFIX) cher2.$(SUFFIX) \ + chpmv.$(SUFFIX) chpr.$(SUFFIX) chpr2.$(SUFFIX) + +CBLAS3OBJS = \ + cgemm.$(SUFFIX) csymm.$(SUFFIX) ctrmm.$(SUFFIX) \ + ctrsm.$(SUFFIX) csyrk.$(SUFFIX) csyr2k.$(SUFFIX) \ + chemm.$(SUFFIX) cherk.$(SUFFIX) cher2k.$(SUFFIX) + +ZBLAS1OBJS = \ + zaxpy.$(SUFFIX) zaxpyc.$(SUFFIX) zswap.$(SUFFIX) \ + zcopy.$(SUFFIX) zscal.$(SUFFIX) zdscal.$(SUFFIX) \ + zdotc.$(SUFFIX) zdotu.$(SUFFIX) \ + dzasum.$(SUFFIX) dznrm2.$(SUFFIX) \ + dzamax.$(SUFFIX) izamax.$(SUFFIX) \ + dzamin.$(SUFFIX) izamin.$(SUFFIX) \ + zdrot.$(SUFFIX) zrotg.$(SUFFIX) \ + +ZBLAS2OBJS = \ + zgemv.$(SUFFIX) zgeru.$(SUFFIX) zgerc.$(SUFFIX) \ + ztrsv.$(SUFFIX) ztrmv.$(SUFFIX) zsymv.$(SUFFIX) \ + zsyr.$(SUFFIX) zsyr2.$(SUFFIX) zgbmv.$(SUFFIX) \ + zsbmv.$(SUFFIX) zspmv.$(SUFFIX) \ + zspr.$(SUFFIX) zspr2.$(SUFFIX) \ + ztbsv.$(SUFFIX) ztbmv.$(SUFFIX) \ + ztpsv.$(SUFFIX) ztpmv.$(SUFFIX) \ + zhemv.$(SUFFIX) zhbmv.$(SUFFIX) \ + zher.$(SUFFIX) zher2.$(SUFFIX) \ + zhpmv.$(SUFFIX) zhpr.$(SUFFIX) zhpr2.$(SUFFIX) + +ZBLAS3OBJS = \ + zgemm.$(SUFFIX) zsymm.$(SUFFIX) ztrmm.$(SUFFIX) \ + ztrsm.$(SUFFIX) zsyrk.$(SUFFIX) zsyr2k.$(SUFFIX) \ + zhemm.$(SUFFIX) zherk.$(SUFFIX) zher2k.$(SUFFIX) + +ifdef SUPPORT_GEMM3M + +CBLAS3OBJS += cgemm3m.$(SUFFIX) csymm3m.$(SUFFIX) chemm3m.$(SUFFIX) + +ZBLAS3OBJS += zgemm3m.$(SUFFIX) zsymm3m.$(SUFFIX) zhemm3m.$(SUFFIX) + +endif + +ifdef EXPRECISION + +QBLAS1OBJS = \ + qaxpy.$(SUFFIX) qswap.$(SUFFIX) \ + qcopy.$(SUFFIX) qscal.$(SUFFIX) \ + qdot.$(SUFFIX) \ + qasum.$(SUFFIX) qnrm2.$(SUFFIX) \ + qmax.$(SUFFIX) qamax.$(SUFFIX) iqmax.$(SUFFIX) iqamax.$(SUFFIX) \ + qmin.$(SUFFIX) qamin.$(SUFFIX) iqmin.$(SUFFIX) iqamin.$(SUFFIX) \ + qrot.$(SUFFIX) qrotg.$(SUFFIX) qrotm.$(SUFFIX) qrotmg.$(SUFFIX) \ + +QBLAS2OBJS = \ + qgemv.$(SUFFIX) qger.$(SUFFIX) \ + qtrsv.$(SUFFIX) qtrmv.$(SUFFIX) qsymv.$(SUFFIX) \ + qsyr.$(SUFFIX) qsyr2.$(SUFFIX) qgbmv.$(SUFFIX) \ + qsbmv.$(SUFFIX) qspmv.$(SUFFIX) \ + qspr.$(SUFFIX) qspr2.$(SUFFIX) \ + qtbsv.$(SUFFIX) qtbmv.$(SUFFIX) \ + qtpsv.$(SUFFIX) qtpmv.$(SUFFIX) + +QBLAS3OBJS = \ + qgemm.$(SUFFIX) qsymm.$(SUFFIX) qtrmm.$(SUFFIX) \ + qtrsm.$(SUFFIX) qsyrk.$(SUFFIX) qsyr2k.$(SUFFIX) + +XBLAS1OBJS = \ + xaxpy.$(SUFFIX) xaxpyc.$(SUFFIX) xswap.$(SUFFIX) \ + xcopy.$(SUFFIX) xscal.$(SUFFIX) xqscal.$(SUFFIX) \ + xdotc.$(SUFFIX) xdotu.$(SUFFIX) \ + qxasum.$(SUFFIX) qxnrm2.$(SUFFIX) \ + qxamax.$(SUFFIX) ixamax.$(SUFFIX) \ + qxamin.$(SUFFIX) ixamin.$(SUFFIX) \ + xqrot.$(SUFFIX) xrotg.$(SUFFIX) \ + +XBLAS2OBJS = \ + xgemv.$(SUFFIX) xgeru.$(SUFFIX) xgerc.$(SUFFIX) \ + xtrsv.$(SUFFIX) xtrmv.$(SUFFIX) xsymv.$(SUFFIX) \ + xsyr.$(SUFFIX) xsyr2.$(SUFFIX) xgbmv.$(SUFFIX) \ + xsbmv.$(SUFFIX) xspmv.$(SUFFIX) \ + xspr.$(SUFFIX) xspr2.$(SUFFIX) \ + xtbsv.$(SUFFIX) xtbmv.$(SUFFIX) \ + xtpsv.$(SUFFIX) xtpmv.$(SUFFIX) \ + xhemv.$(SUFFIX) xhbmv.$(SUFFIX) \ + xher.$(SUFFIX) xher2.$(SUFFIX) \ + xhpmv.$(SUFFIX) xhpr.$(SUFFIX) xhpr2.$(SUFFIX) + +XBLAS3OBJS = \ + xgemm.$(SUFFIX) xsymm.$(SUFFIX) xtrmm.$(SUFFIX) \ + xtrsm.$(SUFFIX) xsyrk.$(SUFFIX) xsyr2k.$(SUFFIX) \ + xhemm.$(SUFFIX) xherk.$(SUFFIX) xher2k.$(SUFFIX) + +ifdef SUPPORT_GEMM3M + +XBLAS3OBJS += xgemm3m.$(SUFFIX) xsymm3m.$(SUFFIX) xhemm3m.$(SUFFIX) + +endif + +endif + +ifdef QUAD_PRECISION + +QBLAS1OBJS = \ + qaxpy.$(SUFFIX) qswap.$(SUFFIX) \ + qcopy.$(SUFFIX) qscal.$(SUFFIX) \ + qasum.$(SUFFIX) qnrm2.$(SUFFIX) \ + qmax.$(SUFFIX) qamax.$(SUFFIX) iqmax.$(SUFFIX) iqamax.$(SUFFIX) \ + qmin.$(SUFFIX) qamin.$(SUFFIX) iqmin.$(SUFFIX) iqamin.$(SUFFIX) \ + qrot.$(SUFFIX) qrotg.$(SUFFIX) qrotm.$(SUFFIX) qrotmg.$(SUFFIX) \ + +QBLAS2OBJS = \ + qgemv.$(SUFFIX) qger.$(SUFFIX) \ + qtrsv.$(SUFFIX) qtrmv.$(SUFFIX) qsymv.$(SUFFIX) \ + qsyr.$(SUFFIX) qsyr2.$(SUFFIX) qgbmv.$(SUFFIX) \ + qsbmv.$(SUFFIX) qspmv.$(SUFFIX) \ + qspr.$(SUFFIX) qspr2.$(SUFFIX) \ + qtbsv.$(SUFFIX) qtbmv.$(SUFFIX) \ + qtpsv.$(SUFFIX) qtpmv.$(SUFFIX) + +QBLAS3OBJS = \ + qgemm.$(SUFFIX) qsymm.$(SUFFIX) qtrmm.$(SUFFIX) \ + qtrsm.$(SUFFIX) qsyrk.$(SUFFIX) qsyr2k.$(SUFFIX) + +XBLAS1OBJS = \ + xaxpy.$(SUFFIX) xaxpyc.$(SUFFIX) xswap.$(SUFFIX) \ + xcopy.$(SUFFIX) xscal.$(SUFFIX) xqscal.$(SUFFIX) \ + qxasum.$(SUFFIX) qxnrm2.$(SUFFIX) \ + qxamax.$(SUFFIX) ixamax.$(SUFFIX) \ + qxamin.$(SUFFIX) ixamin.$(SUFFIX) \ + xqrot.$(SUFFIX) xrotg.$(SUFFIX) \ + +XBLAS2OBJS = \ + xgemv.$(SUFFIX) xgeru.$(SUFFIX) xgerc.$(SUFFIX) \ + xtrsv.$(SUFFIX) xtrmv.$(SUFFIX) xsymv.$(SUFFIX) \ + xsyr.$(SUFFIX) xsyr2.$(SUFFIX) xgbmv.$(SUFFIX) \ + xsbmv.$(SUFFIX) xspmv.$(SUFFIX) \ + xspr.$(SUFFIX) xspr2.$(SUFFIX) \ + xtbsv.$(SUFFIX) xtbmv.$(SUFFIX) \ + xtpsv.$(SUFFIX) xtpmv.$(SUFFIX) \ + xhemv.$(SUFFIX) xhbmv.$(SUFFIX) \ + xher.$(SUFFIX) xher2.$(SUFFIX) \ + xhpmv.$(SUFFIX) xhpr.$(SUFFIX) xhpr2.$(SUFFIX) + +XBLAS3OBJS = \ + xgemm.$(SUFFIX) xsymm.$(SUFFIX) xtrmm.$(SUFFIX) \ + xtrsm.$(SUFFIX) xsyrk.$(SUFFIX) xsyr2k.$(SUFFIX) \ + xhemm.$(SUFFIX) xherk.$(SUFFIX) xher2k.$(SUFFIX) + +ifdef SUPPORT_GEMM3M + +XBLAS3OBJS += xgemm3m.$(SUFFIX) xsymm3m.$(SUFFIX) xhemm3m.$(SUFFIX) + +endif +endif + +endif + +HPLOBJS = dgemm.$(SUFFIX) dtrsm.$(SUFFIX) \ + dgemv.$(SUFFIX) dtrsv.$(SUFFIX) dger.$(SUFFIX) \ + idamax.$(SUFFIX) daxpy.$(SUFFIX) dcopy.$(SUFFIX) dscal.$(SUFFIX) + +CSBLAS1OBJS = \ + cblas_isamax.$(SUFFIX) cblas_sasum.$(SUFFIX) cblas_saxpy.$(SUFFIX) \ + cblas_scopy.$(SUFFIX) cblas_sdot.$(SUFFIX) cblas_sdsdot.$(SUFFIX) cblas_dsdot.$(SUFFIX) \ + cblas_srot.$(SUFFIX) cblas_srotg.$(SUFFIX) cblas_srotm.$(SUFFIX) cblas_srotmg.$(SUFFIX) \ + cblas_sscal.$(SUFFIX) cblas_sswap.$(SUFFIX) cblas_snrm2.$(SUFFIX) + +CSBLAS2OBJS = \ + cblas_sgemv.$(SUFFIX) cblas_sger.$(SUFFIX) cblas_ssymv.$(SUFFIX) cblas_strmv.$(SUFFIX) \ + cblas_strsv.$(SUFFIX) cblas_ssyr.$(SUFFIX) cblas_ssyr2.$(SUFFIX) cblas_sgbmv.$(SUFFIX) \ + cblas_ssbmv.$(SUFFIX) cblas_sspmv.$(SUFFIX) cblas_sspr.$(SUFFIX) cblas_sspr2.$(SUFFIX) \ + cblas_stbmv.$(SUFFIX) cblas_stbsv.$(SUFFIX) cblas_stpmv.$(SUFFIX) cblas_stpsv.$(SUFFIX) + +CSBLAS3OBJS = \ + cblas_sgemm.$(SUFFIX) cblas_ssymm.$(SUFFIX) cblas_strmm.$(SUFFIX) cblas_strsm.$(SUFFIX) \ + cblas_ssyrk.$(SUFFIX) cblas_ssyr2k.$(SUFFIX) + +CDBLAS1OBJS = \ + cblas_idamax.$(SUFFIX) cblas_dasum.$(SUFFIX) cblas_daxpy.$(SUFFIX) \ + cblas_dcopy.$(SUFFIX) cblas_ddot.$(SUFFIX) \ + cblas_drot.$(SUFFIX) cblas_drotg.$(SUFFIX) cblas_drotm.$(SUFFIX) cblas_drotmg.$(SUFFIX) \ + cblas_dscal.$(SUFFIX) cblas_dswap.$(SUFFIX) cblas_dnrm2.$(SUFFIX) + +CDBLAS2OBJS = \ + cblas_dgemv.$(SUFFIX) cblas_dger.$(SUFFIX) cblas_dsymv.$(SUFFIX) cblas_dtrmv.$(SUFFIX) \ + cblas_dtrsv.$(SUFFIX) cblas_dsyr.$(SUFFIX) cblas_dsyr2.$(SUFFIX) cblas_dgbmv.$(SUFFIX) \ + cblas_dsbmv.$(SUFFIX) cblas_dspmv.$(SUFFIX) cblas_dspr.$(SUFFIX) cblas_dspr2.$(SUFFIX) \ + cblas_dtbmv.$(SUFFIX) cblas_dtbsv.$(SUFFIX) cblas_dtpmv.$(SUFFIX) cblas_dtpsv.$(SUFFIX) + +CDBLAS3OBJS += \ + cblas_dgemm.$(SUFFIX) cblas_dsymm.$(SUFFIX) cblas_dtrmm.$(SUFFIX) cblas_dtrsm.$(SUFFIX) \ + cblas_dsyrk.$(SUFFIX) cblas_dsyr2k.$(SUFFIX) + +CCBLAS1OBJS = \ + cblas_icamax.$(SUFFIX) cblas_scasum.$(SUFFIX) cblas_caxpy.$(SUFFIX) \ + cblas_ccopy.$(SUFFIX) \ + cblas_cdotc.$(SUFFIX) cblas_cdotu.$(SUFFIX) \ + cblas_cdotc_sub.$(SUFFIX) cblas_cdotu_sub.$(SUFFIX) \ + cblas_cscal.$(SUFFIX) cblas_csscal.$(SUFFIX) \ + cblas_cswap.$(SUFFIX) cblas_scnrm2.$(SUFFIX) + +CCBLAS2OBJS = \ + cblas_cgemv.$(SUFFIX) cblas_cgerc.$(SUFFIX) cblas_cgeru.$(SUFFIX) \ + cblas_cgbmv.$(SUFFIX) cblas_chbmv.$(SUFFIX) cblas_chemv.$(SUFFIX) \ + cblas_cher.$(SUFFIX) cblas_cher2.$(SUFFIX) cblas_chpmv.$(SUFFIX) \ + cblas_chpr.$(SUFFIX) cblas_chpr2.$(SUFFIX) cblas_ctbmv.$(SUFFIX) \ + cblas_ctbsv.$(SUFFIX) cblas_ctpmv.$(SUFFIX) cblas_ctpsv.$(SUFFIX) \ + cblas_ctrmv.$(SUFFIX) cblas_ctrsv.$(SUFFIX) + +CCBLAS3OBJS = \ + cblas_cgemm.$(SUFFIX) cblas_csymm.$(SUFFIX) cblas_ctrmm.$(SUFFIX) cblas_ctrsm.$(SUFFIX) \ + cblas_csyrk.$(SUFFIX) cblas_csyr2k.$(SUFFIX) \ + cblas_chemm.$(SUFFIX) cblas_cherk.$(SUFFIX) cblas_cher2k.$(SUFFIX) + +CZBLAS1OBJS = \ + cblas_izamax.$(SUFFIX) cblas_dzasum.$(SUFFIX) cblas_zaxpy.$(SUFFIX) \ + cblas_zcopy.$(SUFFIX) \ + cblas_zdotc.$(SUFFIX) cblas_zdotu.$(SUFFIX) \ + cblas_zdotc_sub.$(SUFFIX) cblas_zdotu_sub.$(SUFFIX) \ + cblas_zscal.$(SUFFIX) cblas_zdscal.$(SUFFIX) \ + cblas_zswap.$(SUFFIX) cblas_dznrm2.$(SUFFIX) + +CZBLAS2OBJS = \ + cblas_zgemv.$(SUFFIX) cblas_zgerc.$(SUFFIX) cblas_zgeru.$(SUFFIX) \ + cblas_zgbmv.$(SUFFIX) cblas_zhbmv.$(SUFFIX) cblas_zhemv.$(SUFFIX) \ + cblas_zher.$(SUFFIX) cblas_zher2.$(SUFFIX) cblas_zhpmv.$(SUFFIX) \ + cblas_zhpr.$(SUFFIX) cblas_zhpr2.$(SUFFIX) cblas_ztbmv.$(SUFFIX) \ + cblas_ztbsv.$(SUFFIX) cblas_ztpmv.$(SUFFIX) cblas_ztpsv.$(SUFFIX) \ + cblas_ztrmv.$(SUFFIX) cblas_ztrsv.$(SUFFIX) + +CZBLAS3OBJS = \ + cblas_zgemm.$(SUFFIX) cblas_zsymm.$(SUFFIX) cblas_ztrmm.$(SUFFIX) cblas_ztrsm.$(SUFFIX) \ + cblas_zsyrk.$(SUFFIX) cblas_zsyr2k.$(SUFFIX) \ + cblas_zhemm.$(SUFFIX) cblas_zherk.$(SUFFIX) cblas_zher2k.$(SUFFIX) + +ifndef NO_CBLAS + +CFLAGS += -I. + +SBLAS1OBJS += $(CSBLAS1OBJS) +SBLAS2OBJS += $(CSBLAS2OBJS) +SBLAS3OBJS += $(CSBLAS3OBJS) +DBLAS1OBJS += $(CDBLAS1OBJS) +DBLAS2OBJS += $(CDBLAS2OBJS) +DBLAS3OBJS += $(CDBLAS3OBJS) +CBLAS1OBJS += $(CCBLAS1OBJS) +CBLAS2OBJS += $(CCBLAS2OBJS) +CBLAS3OBJS += $(CCBLAS3OBJS) +ZBLAS1OBJS += $(CZBLAS1OBJS) +ZBLAS2OBJS += $(CZBLAS2OBJS) +ZBLAS3OBJS += $(CZBLAS3OBJS) + +endif + +SBLASOBJS = $(SBLAS1OBJS) $(SBLAS2OBJS) $(SBLAS3OBJS) +DBLASOBJS = $(DBLAS1OBJS) $(DBLAS2OBJS) $(DBLAS3OBJS) +QBLASOBJS = $(QBLAS1OBJS) $(QBLAS2OBJS) $(QBLAS3OBJS) +CBLASOBJS = $(CBLAS1OBJS) $(CBLAS2OBJS) $(CBLAS3OBJS) +ZBLASOBJS = $(ZBLAS1OBJS) $(ZBLAS2OBJS) $(ZBLAS3OBJS) +XBLASOBJS = $(XBLAS1OBJS) $(XBLAS2OBJS) $(XBLAS3OBJS) + +SBLASOBJS += \ + sgetf2.$(SUFFIX) sgetrf.$(SUFFIX) slauu2.$(SUFFIX) slauum.$(SUFFIX) \ + spotf2.$(SUFFIX) spotrf.$(SUFFIX) strti2.$(SUFFIX) strtri.$(SUFFIX) \ + slaswp.$(SUFFIX) sgetrs.$(SUFFIX) sgesv.$(SUFFIX) spotri.$(SUFFIX) \ + +DBLASOBJS += \ + dgetf2.$(SUFFIX) dgetrf.$(SUFFIX) dlauu2.$(SUFFIX) dlauum.$(SUFFIX) \ + dpotf2.$(SUFFIX) dpotrf.$(SUFFIX) dtrti2.$(SUFFIX) dtrtri.$(SUFFIX) \ + dlaswp.$(SUFFIX) dgetrs.$(SUFFIX) dgesv.$(SUFFIX) dpotri.$(SUFFIX) \ + +QBLASOBJS += \ + qgetf2.$(SUFFIX) qgetrf.$(SUFFIX) qlauu2.$(SUFFIX) qlauum.$(SUFFIX) \ + qpotf2.$(SUFFIX) qpotrf.$(SUFFIX) qtrti2.$(SUFFIX) qtrtri.$(SUFFIX) \ + qlaswp.$(SUFFIX) qgetrs.$(SUFFIX) qgesv.$(SUFFIX) qpotri.$(SUFFIX) \ + +CBLASOBJS += \ + cgetf2.$(SUFFIX) cgetrf.$(SUFFIX) clauu2.$(SUFFIX) clauum.$(SUFFIX) \ + cpotf2.$(SUFFIX) cpotrf.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) \ + claswp.$(SUFFIX) cgetrs.$(SUFFIX) cgesv.$(SUFFIX) cpotri.$(SUFFIX) \ + +ZBLASOBJS += \ + zgetf2.$(SUFFIX) zgetrf.$(SUFFIX) zlauu2.$(SUFFIX) zlauum.$(SUFFIX) \ + zpotf2.$(SUFFIX) zpotrf.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX) \ + zlaswp.$(SUFFIX) zgetrs.$(SUFFIX) zgesv.$(SUFFIX) zpotri.$(SUFFIX) \ + +XBLASOBJS += \ + xgetf2.$(SUFFIX) xgetrf.$(SUFFIX) xlauu2.$(SUFFIX) xlauum.$(SUFFIX) \ + xpotf2.$(SUFFIX) xpotrf.$(SUFFIX) xtrti2.$(SUFFIX) xtrtri.$(SUFFIX) \ + xlaswp.$(SUFFIX) xgetrs.$(SUFFIX) xgesv.$(SUFFIX) xpotri.$(SUFFIX) \ + + +FUNCOBJS = $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS) + +ifdef EXPRECISION +FUNCOBJS += $(QBLASOBJS) $(XBLASOBJS) +endif + +ifdef QUAD_PRECISION +FUNCOBJS += $(QBLASOBJS) $(XBLASOBJS) +endif + +FUNCALLFILES = $(FUNCOBJS:.$(SUFFIX)=) + +include $(TOPDIR)/Makefile.tail + +all :: libs + +ifdef FUNCTION_PROFILE +$(BLASOBJS) $(BLASOBJS_P) : functable.h +$(BLASOBJS) $(BLASOBJS_P) : CFLAGS += -DPROFILE_FUNC_NAME=interface_$(*F) + +functable.h : Makefile + ./create $(FUNCALLFILES) > functable.h + +endif + +clean :: + @rm -f functable.h + +level1 : $(SBLAS1OBJS) $(DBLAS1OBJS) $(QBLAS1OBJS) $(CBLAS1OBJS) $(ZBLAS1OBJS) $(XBLAS1OBJS) + $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ + +level2 : $(SBLAS2OBJS) $(DBLAS2OBJS) $(QBLAS2OBJS) $(CBLAS2OBJS) $(ZBLAS2OBJS) $(XBLAS2OBJS) + $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ + +level3 : $(SBLAS3OBJS) $(DBLAS3OBJS) $(QBLAS3OBJS) $(CBLAS3OBJS) $(ZBLAS3OBJS) $(XBLAS3OBJS) + $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ + +$(CSBLASOBJS) $(CSBLASOBJS_P) $(CDBLASOBJS) $(CDBLASOBJS_P) $(CQBLASOBJS) $(CQBLASOBJS_P) \ +$(CCBLASOBJS) $(CCBLASOBJS_P) $(CZBLASOBJS) $(CZBLASOBJS_P) $(CXBLASOBJS) $(CXBLASOBJS_P) : CFLAGS += -DCBLAS + +srot.$(SUFFIX) srot.$(PSUFFIX) : rot.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +drot.$(SUFFIX) drot.$(PSUFFIX) : rot.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +qrot.$(SUFFIX) qrot.$(PSUFFIX) : rot.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +csrot.$(SUFFIX) csrot.$(PSUFFIX) : zrot.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +zdrot.$(SUFFIX) zdrot.$(PSUFFIX) : zrot.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +xqrot.$(SUFFIX) xqrot.$(PSUFFIX) : zrot.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +srotm.$(SUFFIX) srotm.$(PSUFFIX): rotm.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +drotm.$(SUFFIX) drotm.$(PSUFFIX): rotm.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qrotm.$(SUFFIX) qrotm.$(PSUFFIX): rotm.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +srotmg.$(SUFFIX) srotmg.$(PSUFFIX): rotmg.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +drotmg.$(SUFFIX) drotmg.$(PSUFFIX): rotmg.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qrotmg.$(SUFFIX) qrotmg.$(PSUFFIX): rotmg.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +srotg.$(SUFFIX) srotg.$(PSUFFIX): rotg.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +drotg.$(SUFFIX) drotg.$(PSUFFIX): rotg.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qrotg.$(SUFFIX) qrotg.$(PSUFFIX): rotg.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +crotg.$(SUFFIX) crotg.$(PSUFFIX): zrotg.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zrotg.$(SUFFIX) zrotg.$(PSUFFIX): zrotg.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xrotg.$(SUFFIX) xrotg.$(PSUFFIX): zrotg.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +sasum.$(SUFFIX) sasum.$(PSUFFIX) : asum.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +dasum.$(SUFFIX) dasum.$(PSUFFIX) : asum.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +qasum.$(SUFFIX) qasum.$(PSUFFIX) : asum.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +scasum.$(SUFFIX) scasum.$(PSUFFIX) : asum.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +dzasum.$(SUFFIX) dzasum.$(PSUFFIX) : asum.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +qxasum.$(SUFFIX) qxasum.$(PSUFFIX) : asum.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +snrm2.$(SUFFIX) snrm2.$(PSUFFIX) : nrm2.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +dnrm2.$(SUFFIX) dnrm2.$(PSUFFIX) : nrm2.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +qnrm2.$(SUFFIX) qnrm2.$(PSUFFIX) : nrm2.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +scnrm2.$(SUFFIX) scnrm2.$(PSUFFIX) : nrm2.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +dznrm2.$(SUFFIX) dznrm2.$(PSUFFIX) : nrm2.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +qxnrm2.$(SUFFIX) qxnrm2.$(PSUFFIX) : nrm2.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +samax.$(SUFFIX) samax.$(PSUFFIX) : max.c + $(CC) $(CFLAGS) -c -DUSE_ABS -UUSE_MIN $< -o $(@F) + +damax.$(SUFFIX) damax.$(PSUFFIX) : max.c + $(CC) $(CFLAGS) -c -DUSE_ABS -UUSE_MIN $< -o $(@F) + +qamax.$(SUFFIX) qamax.$(PSUFFIX) : max.c + $(CC) $(CFLAGS) -c -DUSE_ABS -UUSE_MIN $< -o $(@F) + +scamax.$(SUFFIX) scamax.$(PSUFFIX) : max.c + $(CC) $(CFLAGS) -c -DUSE_ABS -UUSE_MIN $< -o $(@F) + +dzamax.$(SUFFIX) dzamax.$(PSUFFIX) : max.c + $(CC) $(CFLAGS) -c -DUSE_ABS -UUSE_MIN $< -o $(@F) + +qxamax.$(SUFFIX) qxamax.$(PSUFFIX) : max.c + $(CC) $(CFLAGS) -c -DUSE_ABS -UUSE_MIN $< -o $(@F) + +samin.$(SUFFIX) samin.$(PSUFFIX) : max.c + $(CC) $(CFLAGS) -c -DUSE_ABS -DUSE_MIN $< -o $(@F) + +damin.$(SUFFIX) damin.$(PSUFFIX) : max.c + $(CC) $(CFLAGS) -c -DUSE_ABS -DUSE_MIN $< -o $(@F) + +qamin.$(SUFFIX) qamin.$(PSUFFIX) : max.c + $(CC) $(CFLAGS) -c -DUSE_ABS -DUSE_MIN $< -o $(@F) + +scamin.$(SUFFIX) scamin.$(PSUFFIX) : max.c + $(CC) $(CFLAGS) -c -DUSE_ABS -DUSE_MIN $< -o $(@F) + +dzamin.$(SUFFIX) dzamin.$(PSUFFIX) : max.c + $(CC) $(CFLAGS) -c -DUSE_ABS -DUSE_MIN $< -o $(@F) + +qxamin.$(SUFFIX) qxamin.$(PSUFFIX) : max.c + $(CC) $(CFLAGS) -c -DUSE_ABS -DUSE_MIN $< -o $(@F) + +smax.$(SUFFIX) smax.$(PSUFFIX) : max.c + $(CC) $(CFLAGS) -c -UUSE_ABS -UUSE_MIN $< -o $(@F) + +dmax.$(SUFFIX) dmax.$(PSUFFIX) : max.c + $(CC) $(CFLAGS) -c -UUSE_ABS -UUSE_MIN $< -o $(@F) + +qmax.$(SUFFIX) qmax.$(PSUFFIX) : max.c + $(CC) $(CFLAGS) -c -UUSE_ABS -UUSE_MIN $< -o $(@F) + +smin.$(SUFFIX) smin.$(PSUFFIX) : max.c + $(CC) $(CFLAGS) -c -UUSE_ABS -DUSE_MIN $< -o $(@F) + +dmin.$(SUFFIX) dmin.$(PSUFFIX) : max.c + $(CC) $(CFLAGS) -c -UUSE_ABS -DUSE_MIN $< -o $(@F) + +qmin.$(SUFFIX) qmin.$(PSUFFIX) : max.c + $(CC) $(CFLAGS) -c -UUSE_ABS -DUSE_MIN $< -o $(@F) + +isamax.$(SUFFIX) isamax.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -c -DUSE_ABS -UUSE_MIN $< -o $(@F) + +idamax.$(SUFFIX) idamax.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -c -DUSE_ABS -UUSE_MIN $< -o $(@F) + +iqamax.$(SUFFIX) iqamax.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -c -DUSE_ABS -UUSE_MIN $< -o $(@F) + +icamax.$(SUFFIX) icamax.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -c -DUSE_ABS -UUSE_MIN $< -o $(@F) + +izamax.$(SUFFIX) izamax.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -c -DUSE_ABS -UUSE_MIN $< -o $(@F) + +ixamax.$(SUFFIX) ixamax.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -c -DUSE_ABS -UUSE_MIN $< -o $(@F) + +isamin.$(SUFFIX) isamin.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -c -DUSE_ABS -DUSE_MIN $< -o $(@F) + +idamin.$(SUFFIX) idamin.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -c -DUSE_ABS -DUSE_MIN $< -o $(@F) + +iqamin.$(SUFFIX) iqamin.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -c -DUSE_ABS -DUSE_MIN $< -o $(@F) + +icamin.$(SUFFIX) icamin.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -c -DUSE_ABS -DUSE_MIN $< -o $(@F) + +izamin.$(SUFFIX) izamin.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -c -DUSE_ABS -DUSE_MIN $< -o $(@F) + +ixamin.$(SUFFIX) ixamin.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -c -DUSE_ABS -DUSE_MIN $< -o $(@F) + +ismax.$(SUFFIX) ismax.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -c -UUSE_ABS -UUSE_MIN $< -o $(@F) + +idmax.$(SUFFIX) idmax.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -c -UUSE_ABS -UUSE_MIN $< -o $(@F) + +iqmax.$(SUFFIX) iqmax.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -c -UUSE_ABS -UUSE_MIN $< -o $(@F) + +ismin.$(SUFFIX) ismin.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -c -UUSE_ABS -DUSE_MIN $< -o $(@F) + +idmin.$(SUFFIX) idmin.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -c -UUSE_ABS -DUSE_MIN $< -o $(@F) + +iqmin.$(SUFFIX) iqmin.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -c -UUSE_ABS -DUSE_MIN $< -o $(@F) + +sdsdot.$(SUFFIX) sdsdot.$(PSUFFIX) : sdsdot.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +dsdot.$(SUFFIX) dsdot.$(PSUFFIX) : dsdot.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +sdot.$(SUFFIX) sdot.$(PSUFFIX) : dot.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +ddot.$(SUFFIX) ddot.$(PSUFFIX) : dot.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +qdot.$(SUFFIX) qdot.$(PSUFFIX) : dot.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +cdotu.$(SUFFIX) cdotu.$(PSUFFIX) : zdot.c + $(CC) $(CFLAGS) -c -UCONJ $< -o $(@F) + +cdotc.$(SUFFIX) cdotc.$(PSUFFIX) : zdot.c + $(CC) $(CFLAGS) -c -DCONJ $< -o $(@F) + +zdotu.$(SUFFIX) zdotu.$(PSUFFIX) : zdot.c + $(CC) $(CFLAGS) -c -UCONJ $< -o $(@F) + +zdotc.$(SUFFIX) zdotc.$(PSUFFIX) : zdot.c + $(CC) $(CFLAGS) -c -DCONJ $< -o $(@F) + +xdotu.$(SUFFIX) xdotu.$(PSUFFIX) : zdot.c + $(CC) $(CFLAGS) -c -UCONJ $< -o $(@F) + +xdotc.$(SUFFIX) xdotc.$(PSUFFIX) : zdot.c + $(CC) $(CFLAGS) -c -DCONJ $< -o $(@F) + +saxpy.$(SUFFIX) saxpy.$(PSUFFIX) : axpy.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +daxpy.$(SUFFIX) daxpy.$(PSUFFIX) : axpy.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +qaxpy.$(SUFFIX) qaxpy.$(PSUFFIX) : axpy.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +caxpy.$(SUFFIX) caxpy.$(PSUFFIX) : zaxpy.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +zaxpy.$(SUFFIX) zaxpy.$(PSUFFIX) : zaxpy.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +xaxpy.$(SUFFIX) xaxpy.$(PSUFFIX) : zaxpy.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +caxpyc.$(SUFFIX) caxpyc.$(PSUFFIX) : zaxpy.c + $(CC) $(CFLAGS) -c -DCONJ $< -o $(@F) + +zaxpyc.$(SUFFIX) zaxpyc.$(PSUFFIX) : zaxpy.c + $(CC) $(CFLAGS) -c -DCONJ $< -o $(@F) + +xaxpyc.$(SUFFIX) xaxpyc.$(PSUFFIX) : zaxpy.c + $(CC) $(CFLAGS) -c -DCONJ $< -o $(@F) + +sscal.$(SUFFIX) sscal.$(PSUFFIX) : scal.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +dscal.$(SUFFIX) dscal.$(PSUFFIX) : scal.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +qscal.$(SUFFIX) qscal.$(PSUFFIX) : scal.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +cscal.$(SUFFIX) cscal.$(PSUFFIX) : zscal.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +zscal.$(SUFFIX) zscal.$(PSUFFIX) : zscal.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +xscal.$(SUFFIX) xscal.$(PSUFFIX) : zscal.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +csscal.$(SUFFIX) csscal.$(PSUFFIX) : zscal.c + $(CC) $(CFLAGS) -c -DSSCAL $< -o $(@F) + +zdscal.$(SUFFIX) zdscal.$(PSUFFIX) : zscal.c + $(CC) $(CFLAGS) -c -DSSCAL $< -o $(@F) + +xqscal.$(SUFFIX) xqscal.$(PSUFFIX) : zscal.c + $(CC) $(CFLAGS) -c -DSSCAL $< -o $(@F) + +scopy.$(SUFFIX) scopy.$(PSUFFIX) : copy.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +dcopy.$(SUFFIX) dcopy.$(PSUFFIX) : copy.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +qcopy.$(SUFFIX) qcopy.$(PSUFFIX) : copy.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +ccopy.$(SUFFIX) ccopy.$(PSUFFIX) : copy.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +zcopy.$(SUFFIX) zcopy.$(PSUFFIX) : copy.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +xcopy.$(SUFFIX) xcopy.$(PSUFFIX) : copy.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +sswap.$(SUFFIX) sswap.$(PSUFFIX) : swap.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +dswap.$(SUFFIX) dswap.$(PSUFFIX) : swap.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +qswap.$(SUFFIX) qswap.$(PSUFFIX) : swap.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +cswap.$(SUFFIX) cswap.$(PSUFFIX) : zswap.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +zswap.$(SUFFIX) zswap.$(PSUFFIX) : zswap.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +xswap.$(SUFFIX) xswap.$(PSUFFIX) : zswap.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +sger.$(SUFFIX) sger.$(PSUFFIX) : ger.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dger.$(SUFFIX) dger.$(PSUFFIX) : ger.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qger.$(SUFFIX) qger.$(PSUFFIX) : ger.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +cgeru.$(SUFFIX) cgeru.$(PSUFFIX) : zger.c + $(CC) -c $(CFLAGS) -UCONJ $< -o $(@F) + +cgerc.$(SUFFIX) cgerc.$(PSUFFIX) : zger.c + $(CC) -c $(CFLAGS) -DCONJ $< -o $(@F) + +zgeru.$(SUFFIX) zgeru.$(PSUFFIX) : zger.c + $(CC) -c $(CFLAGS) -UCONJ $< -o $(@F) + +zgerc.$(SUFFIX) zgerc.$(PSUFFIX) : zger.c + $(CC) -c $(CFLAGS) -DCONJ $< -o $(@F) + +xgeru.$(SUFFIX) xgeru.$(PSUFFIX) : zger.c + $(CC) -c $(CFLAGS) -UCONJ $< -o $(@F) + +xgerc.$(SUFFIX) xgerc.$(PSUFFIX) : zger.c + $(CC) -c $(CFLAGS) -DCONJ $< -o $(@F) + +sgemv.$(SUFFIX) sgemv.$(PSUFFIX): gemv.c + $(CC) -c $(CFLAGS) -o $(@F) $< + +dgemv.$(SUFFIX) dgemv.$(PSUFFIX): gemv.c + $(CC) -c $(CFLAGS) -o $(@F) $< + +qgemv.$(SUFFIX) qgemv.$(PSUFFIX): gemv.c + $(CC) -c $(CFLAGS) -o $(@F) $< + +cgemv.$(SUFFIX) cgemv.$(PSUFFIX): zgemv.c + $(CC) -c $(CFLAGS) -o $(@F) $< + +zgemv.$(SUFFIX) zgemv.$(PSUFFIX): zgemv.c + $(CC) -c $(CFLAGS) -o $(@F) $< + +xgemv.$(SUFFIX) xgemv.$(PSUFFIX): zgemv.c + $(CC) -c $(CFLAGS) -o $(@F) $< + +strsv.$(SUFFIX) strsv.$(PSUFFIX) : trsv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dtrsv.$(SUFFIX) dtrsv.$(PSUFFIX) : trsv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qtrsv.$(SUFFIX) qtrsv.$(PSUFFIX) : trsv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ctrsv.$(SUFFIX) ctrsv.$(PSUFFIX) : ztrsv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ztrsv.$(SUFFIX) ztrsv.$(PSUFFIX) : ztrsv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xtrsv.$(SUFFIX) xtrsv.$(PSUFFIX) : ztrsv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +strmv.$(SUFFIX) strmv.$(PSUFFIX) : trmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dtrmv.$(SUFFIX) dtrmv.$(PSUFFIX) : trmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qtrmv.$(SUFFIX) qtrmv.$(PSUFFIX) : trmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ctrmv.$(SUFFIX) ctrmv.$(PSUFFIX) : ztrmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ztrmv.$(SUFFIX) ztrmv.$(PSUFFIX) : ztrmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xtrmv.$(SUFFIX) xtrmv.$(PSUFFIX) : ztrmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ssymv.$(SUFFIX) ssymv.$(PSUFFIX) : symv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dsymv.$(SUFFIX) dsymv.$(PSUFFIX) : symv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qsymv.$(SUFFIX) qsymv.$(PSUFFIX) : symv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +csymv.$(SUFFIX) csymv.$(PSUFFIX) : zsymv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zsymv.$(SUFFIX) zsymv.$(PSUFFIX) : zsymv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xsymv.$(SUFFIX) xsymv.$(PSUFFIX) : zsymv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ssyr.$(SUFFIX) ssyr.$(PSUFFIX) : syr.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dsyr.$(SUFFIX) dsyr.$(PSUFFIX) : syr.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qsyr.$(SUFFIX) qsyr.$(PSUFFIX) : syr.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +csyr.$(SUFFIX) csyr.$(PSUFFIX) : zsyr.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zsyr.$(SUFFIX) zsyr.$(PSUFFIX) : zsyr.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xsyr.$(SUFFIX) xsyr.$(PSUFFIX) : zsyr.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ssyr2.$(SUFFIX) ssyr2.$(PSUFFIX) : syr2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dsyr2.$(SUFFIX) dsyr2.$(PSUFFIX) : syr2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qsyr2.$(SUFFIX) qsyr2.$(PSUFFIX) : syr2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +csyr2.$(SUFFIX) csyr2.$(PSUFFIX) : zsyr2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zsyr2.$(SUFFIX) zsyr2.$(PSUFFIX) : zsyr2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xsyr2.$(SUFFIX) xsyr2.$(PSUFFIX) : zsyr2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +sgbmv.$(SUFFIX) sgbmv.$(PSUFFIX): gbmv.c + $(CC) -c $(CFLAGS) -o $(@F) $< + +dgbmv.$(SUFFIX) dgbmv.$(PSUFFIX): gbmv.c + $(CC) -c $(CFLAGS) -o $(@F) $< + +qgbmv.$(SUFFIX) qgbmv.$(PSUFFIX): gbmv.c + $(CC) -c $(CFLAGS) -o $(@F) $< + +cgbmv.$(SUFFIX) cgbmv.$(PSUFFIX): zgbmv.c + $(CC) -c $(CFLAGS) -o $(@F) $< + +zgbmv.$(SUFFIX) zgbmv.$(PSUFFIX): zgbmv.c + $(CC) -c $(CFLAGS) -o $(@F) $< + +xgbmv.$(SUFFIX) xgbmv.$(PSUFFIX): zgbmv.c + $(CC) -c $(CFLAGS) -o $(@F) $< + +ssbmv.$(SUFFIX) ssbmv.$(PSUFFIX) : sbmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dsbmv.$(SUFFIX) dsbmv.$(PSUFFIX) : sbmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qsbmv.$(SUFFIX) qsbmv.$(PSUFFIX) : sbmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +csbmv.$(SUFFIX) csbmv.$(PSUFFIX) : zsbmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zsbmv.$(SUFFIX) zsbmv.$(PSUFFIX) : zsbmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xsbmv.$(SUFFIX) xsbmv.$(PSUFFIX) : zsbmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +sspmv.$(SUFFIX) sspmv.$(PSUFFIX) : spmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dspmv.$(SUFFIX) dspmv.$(PSUFFIX) : spmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qspmv.$(SUFFIX) qspmv.$(PSUFFIX) : spmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +cspmv.$(SUFFIX) cspmv.$(PSUFFIX) : zspmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zspmv.$(SUFFIX) zspmv.$(PSUFFIX) : zspmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xspmv.$(SUFFIX) xspmv.$(PSUFFIX) : zspmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +sspr.$(SUFFIX) sspr.$(PSUFFIX) : spr.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dspr.$(SUFFIX) dspr.$(PSUFFIX) : spr.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qspr.$(SUFFIX) qspr.$(PSUFFIX) : spr.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +cspr.$(SUFFIX) cspr.$(PSUFFIX) : zspr.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zspr.$(SUFFIX) zspr.$(PSUFFIX) : zspr.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xspr.$(SUFFIX) xspr.$(PSUFFIX) : zspr.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +sspr2.$(SUFFIX) sspr2.$(PSUFFIX) : spr2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dspr2.$(SUFFIX) dspr2.$(PSUFFIX) : spr2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qspr2.$(SUFFIX) qspr2.$(PSUFFIX) : spr2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +cspr2.$(SUFFIX) cspr2.$(PSUFFIX) : zspr2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zspr2.$(SUFFIX) zspr2.$(PSUFFIX) : zspr2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xspr2.$(SUFFIX) xspr2.$(PSUFFIX) : zspr2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +stbmv.$(SUFFIX) stbmv.$(PSUFFIX) : tbmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dtbmv.$(SUFFIX) dtbmv.$(PSUFFIX) : tbmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qtbmv.$(SUFFIX) qtbmv.$(PSUFFIX) : tbmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ctbmv.$(SUFFIX) ctbmv.$(PSUFFIX) : ztbmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ztbmv.$(SUFFIX) ztbmv.$(PSUFFIX) : ztbmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xtbmv.$(SUFFIX) xtbmv.$(PSUFFIX) : ztbmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +stbsv.$(SUFFIX) stbsv.$(PSUFFIX) : tbsv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dtbsv.$(SUFFIX) dtbsv.$(PSUFFIX) : tbsv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qtbsv.$(SUFFIX) qtbsv.$(PSUFFIX) : tbsv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ctbsv.$(SUFFIX) ctbsv.$(PSUFFIX) : ztbsv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ztbsv.$(SUFFIX) ztbsv.$(PSUFFIX) : ztbsv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xtbsv.$(SUFFIX) xtbsv.$(PSUFFIX) : ztbsv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +stpsv.$(SUFFIX) stpsv.$(PSUFFIX) : tpsv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dtpsv.$(SUFFIX) dtpsv.$(PSUFFIX) : tpsv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qtpsv.$(SUFFIX) qtpsv.$(PSUFFIX) : tpsv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ctpsv.$(SUFFIX) ctpsv.$(PSUFFIX) : ztpsv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ztpsv.$(SUFFIX) ztpsv.$(PSUFFIX) : ztpsv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xtpsv.$(SUFFIX) xtpsv.$(PSUFFIX) : ztpsv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +stpmv.$(SUFFIX) stpmv.$(PSUFFIX) : tpmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dtpmv.$(SUFFIX) dtpmv.$(PSUFFIX) : tpmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qtpmv.$(SUFFIX) qtpmv.$(PSUFFIX) : tpmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ctpmv.$(SUFFIX) ctpmv.$(PSUFFIX) : ztpmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ztpmv.$(SUFFIX) ztpmv.$(PSUFFIX) : ztpmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xtpmv.$(SUFFIX) xtpmv.$(PSUFFIX) : ztpmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +chemv.$(SUFFIX) chemv.$(PSUFFIX) : zhemv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zhemv.$(SUFFIX) zhemv.$(PSUFFIX) : zhemv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xhemv.$(SUFFIX) xhemv.$(PSUFFIX) : zhemv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +chbmv.$(SUFFIX) chbmv.$(PSUFFIX) : zhbmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zhbmv.$(SUFFIX) zhbmv.$(PSUFFIX) : zhbmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xhbmv.$(SUFFIX) xhbmv.$(PSUFFIX) : zhbmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +cher.$(SUFFIX) cher.$(PSUFFIX) : zher.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zher.$(SUFFIX) zher.$(PSUFFIX) : zher.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xher.$(SUFFIX) xher.$(PSUFFIX) : zher.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +cher2.$(SUFFIX) cher2.$(PSUFFIX) : zher2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zher2.$(SUFFIX) zher2.$(PSUFFIX) : zher2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xher2.$(SUFFIX) xher2.$(PSUFFIX) : zher2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +chpmv.$(SUFFIX) chpmv.$(PSUFFIX) : zhpmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zhpmv.$(SUFFIX) zhpmv.$(PSUFFIX) : zhpmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xhpmv.$(SUFFIX) xhpmv.$(PSUFFIX) : zhpmv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +chpr.$(SUFFIX) chpr.$(PSUFFIX) : zhpr.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zhpr.$(SUFFIX) zhpr.$(PSUFFIX) : zhpr.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xhpr.$(SUFFIX) xhpr.$(PSUFFIX) : zhpr.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +chpr2.$(SUFFIX) chpr2.$(PSUFFIX) : zhpr2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zhpr2.$(SUFFIX) zhpr2.$(PSUFFIX) : zhpr2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xhpr2.$(SUFFIX) xhpr2.$(PSUFFIX) : zhpr2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +sgemm.$(SUFFIX) sgemm.$(PSUFFIX) : gemm.c ../param.h + $(CC) -c $(CFLAGS) $< -o $(@F) + +dgemm.$(SUFFIX) dgemm.$(PSUFFIX) : gemm.c ../param.h + $(CC) -c $(CFLAGS) $< -o $(@F) + +qgemm.$(SUFFIX) qgemm.$(PSUFFIX) : gemm.c ../param.h + $(CC) -c $(CFLAGS) $< -o $(@F) + +cgemm.$(SUFFIX) cgemm.$(PSUFFIX) : gemm.c ../param.h + $(CC) -c $(CFLAGS) $< -o $(@F) + +zgemm.$(SUFFIX) zgemm.$(PSUFFIX) : gemm.c ../param.h + $(CC) -c $(CFLAGS) $< -o $(@F) + +xgemm.$(SUFFIX) xgemm.$(PSUFFIX) : gemm.c ../param.h + $(CC) -c $(CFLAGS) $< -o $(@F) + +ssymm.$(SUFFIX) ssymm.$(PSUFFIX) : symm.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dsymm.$(SUFFIX) dsymm.$(PSUFFIX) : symm.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qsymm.$(SUFFIX) qsymm.$(PSUFFIX) : symm.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +csymm.$(SUFFIX) csymm.$(PSUFFIX) : symm.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zsymm.$(SUFFIX) zsymm.$(PSUFFIX) : symm.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xsymm.$(SUFFIX) xsymm.$(PSUFFIX) : symm.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +strmm.$(SUFFIX) strmm.$(PSUFFIX) : trsm.c + $(CC) -c $(CFLAGS) -DTRMM $< -o $(@F) + +dtrmm.$(SUFFIX) dtrmm.$(PSUFFIX) : trsm.c + $(CC) -c $(CFLAGS) -DTRMM $< -o $(@F) + +qtrmm.$(SUFFIX) qtrmm.$(PSUFFIX) : trsm.c + $(CC) -c $(CFLAGS) -DTRMM $< -o $(@F) + +ctrmm.$(SUFFIX) ctrmm.$(PSUFFIX) : trsm.c + $(CC) -c $(CFLAGS) -DTRMM $< -o $(@F) + +ztrmm.$(SUFFIX) ztrmm.$(PSUFFIX) : trsm.c + $(CC) -c $(CFLAGS) -DTRMM $< -o $(@F) + +xtrmm.$(SUFFIX) xtrmm.$(PSUFFIX) : trsm.c + $(CC) -c $(CFLAGS) -DTRMM $< -o $(@F) + +strsm.$(SUFFIX) strsm.$(PSUFFIX) : trsm.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dtrsm.$(SUFFIX) dtrsm.$(PSUFFIX) : trsm.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qtrsm.$(SUFFIX) qtrsm.$(PSUFFIX) : trsm.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ctrsm.$(SUFFIX) ctrsm.$(PSUFFIX) : trsm.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ztrsm.$(SUFFIX) ztrsm.$(PSUFFIX) : trsm.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xtrsm.$(SUFFIX) xtrsm.$(PSUFFIX) : trsm.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ssyrk.$(SUFFIX) ssyrk.$(PSUFFIX) : syrk.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dsyrk.$(SUFFIX) dsyrk.$(PSUFFIX) : syrk.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qsyrk.$(SUFFIX) qsyrk.$(PSUFFIX) : syrk.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +csyrk.$(SUFFIX) csyrk.$(PSUFFIX) : syrk.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zsyrk.$(SUFFIX) zsyrk.$(PSUFFIX) : syrk.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xsyrk.$(SUFFIX) xsyrk.$(PSUFFIX) : syrk.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ssyr2k.$(SUFFIX) ssyr2k.$(PSUFFIX) : syr2k.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dsyr2k.$(SUFFIX) dsyr2k.$(PSUFFIX) : syr2k.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qsyr2k.$(SUFFIX) qsyr2k.$(PSUFFIX) : syr2k.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +csyr2k.$(SUFFIX) csyr2k.$(PSUFFIX) : syr2k.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zsyr2k.$(SUFFIX) zsyr2k.$(PSUFFIX) : syr2k.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xsyr2k.$(SUFFIX) xsyr2k.$(PSUFFIX) : syr2k.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +chemm.$(SUFFIX) chemm.$(PSUFFIX) : symm.c + $(CC) -c $(CFLAGS) -DHEMM $< -o $(@F) + +zhemm.$(SUFFIX) zhemm.$(PSUFFIX) : symm.c + $(CC) -c $(CFLAGS) -DHEMM $< -o $(@F) + +xhemm.$(SUFFIX) xhemm.$(PSUFFIX) : symm.c + $(CC) -c $(CFLAGS) -DHEMM $< -o $(@F) + +cherk.$(SUFFIX) cherk.$(PSUFFIX) : syrk.c + $(CC) -c $(CFLAGS) -DHEMM $< -o $(@F) + +zherk.$(SUFFIX) zherk.$(PSUFFIX) : syrk.c + $(CC) -c $(CFLAGS) -DHEMM $< -o $(@F) + +xherk.$(SUFFIX) xherk.$(PSUFFIX) : syrk.c + $(CC) -c $(CFLAGS) -DHEMM $< -o $(@F) + +cher2k.$(SUFFIX) cher2k.$(PSUFFIX) : syr2k.c + $(CC) -c $(CFLAGS) -DHEMM $< -o $(@F) + +zher2k.$(SUFFIX) zher2k.$(PSUFFIX) : syr2k.c + $(CC) -c $(CFLAGS) -DHEMM $< -o $(@F) + +xher2k.$(SUFFIX) xher2k.$(PSUFFIX) : syr2k.c + $(CC) -c $(CFLAGS) -DHEMM $< -o $(@F) + +cgemm3m.$(SUFFIX) cgemm3m.$(PSUFFIX) : gemm.c + $(CC) -c $(CFLAGS) -DGEMM3M $< -o $(@F) + +zgemm3m.$(SUFFIX) zgemm3m.$(PSUFFIX) : gemm.c + $(CC) -c $(CFLAGS) -DGEMM3M $< -o $(@F) + +xgemm3m.$(SUFFIX) xgemm3m.$(PSUFFIX) : gemm.c + $(CC) -c $(CFLAGS) -DGEMM3M $< -o $(@F) + +csymm3m.$(SUFFIX) csymm3m.$(PSUFFIX) : symm.c + $(CC) -c $(CFLAGS) -DGEMM3M $< -o $(@F) + +zsymm3m.$(SUFFIX) zsymm3m.$(PSUFFIX) : symm.c + $(CC) -c $(CFLAGS) -DGEMM3M $< -o $(@F) + +xsymm3m.$(SUFFIX) xsymm3m.$(PSUFFIX) : symm.c + $(CC) -c $(CFLAGS) -DGEMM3M $< -o $(@F) + +chemm3m.$(SUFFIX) chemm3m.$(PSUFFIX) : symm.c + $(CC) -c $(CFLAGS) -DGEMM3M -DHEMM $< -o $(@F) + +zhemm3m.$(SUFFIX) zhemm3m.$(PSUFFIX) : symm.c + $(CC) -c $(CFLAGS) -DGEMM3M -DHEMM $< -o $(@F) + +xhemm3m.$(SUFFIX) xhemm3m.$(PSUFFIX) : symm.c + $(CC) -c $(CFLAGS) -DGEMM3M -DHEMM $< -o $(@F) + +cblas_isamax.$(SUFFIX) cblas_isamax.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -DCBLAS -c -DUSE_ABS -UUSE_MIN $< -o $(@F) + +cblas_idamax.$(SUFFIX) cblas_idamax.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -DCBLAS -c -DUSE_ABS -UUSE_MIN $< -o $(@F) + +cblas_icamax.$(SUFFIX) cblas_icamax.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -DCBLAS -c -DUSE_ABS -UUSE_MIN $< -o $(@F) + +cblas_izamax.$(SUFFIX) cblas_izamax.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -DCBLAS -c -DUSE_ABS -UUSE_MIN $< -o $(@F) + +cblas_ismax.$(SUFFIX) cblas_ismax.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -DCBLAS -c -UUSE_ABS -UUSE_MIN $< -o $(@F) + +cblas_idmax.$(SUFFIX) cblas_idmax.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -DCBLAS -c -UUSE_ABS -UUSE_MIN $< -o $(@F) + +cblas_ismin.$(SUFFIX) cblas_ismin.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -DCBLAS -c -UUSE_ABS -DUSE_MIN $< -o $(@F) + +cblas_idmin.$(SUFFIX) cblas_idmin.$(PSUFFIX) : imax.c + $(CC) $(CFLAGS) -DCBLAS -c -UUSE_ABS -DUSE_MIN $< -o $(@F) + +cblas_sasum.$(SUFFIX) cblas_sasum.$(PSUFFIX) : asum.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_dasum.$(SUFFIX) cblas_dasum.$(PSUFFIX) : asum.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_scasum.$(SUFFIX) cblas_scasum.$(PSUFFIX) : asum.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_dzasum.$(SUFFIX) cblas_dzasum.$(PSUFFIX) : asum.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_sdsdot.$(SUFFIX) cblas_sdsdot.$(PSUFFIX) : sdsdot.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_dsdot.$(SUFFIX) cblas_dsdot.$(PSUFFIX) : dsdot.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_sdot.$(SUFFIX) cblas_sdot.$(PSUFFIX) : dot.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_ddot.$(SUFFIX) cblas_ddot.$(PSUFFIX) : dot.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_cdotu.$(SUFFIX) cblas_cdotu.$(PSUFFIX) : zdot.c + $(CC) $(CFLAGS) -DCBLAS -c -UCONJ $< -o $(@F) + +cblas_cdotc.$(SUFFIX) cblas_cdotc.$(PSUFFIX) : zdot.c + $(CC) $(CFLAGS) -DCBLAS -c -DCONJ $< -o $(@F) + +cblas_zdotu.$(SUFFIX) cblas_zdotu.$(PSUFFIX) : zdot.c + $(CC) $(CFLAGS) -DCBLAS -c -UCONJ $< -o $(@F) + +cblas_zdotc.$(SUFFIX) cblas_zdotc.$(PSUFFIX) : zdot.c + $(CC) $(CFLAGS) -DCBLAS -c -DCONJ $< -o $(@F) + +cblas_cdotu_sub.$(SUFFIX) cblas_cdotu_sub.$(PSUFFIX) : zdot.c + $(CC) $(CFLAGS) -DCBLAS -DFORCE_USE_STACK -c -UCONJ $< -o $(@F) + +cblas_cdotc_sub.$(SUFFIX) cblas_cdotc_sub.$(PSUFFIX) : zdot.c + $(CC) $(CFLAGS) -DCBLAS -DFORCE_USE_STACK -c -DCONJ $< -o $(@F) + +cblas_zdotu_sub.$(SUFFIX) cblas_zdotu_sub.$(PSUFFIX) : zdot.c + $(CC) $(CFLAGS) -DCBLAS -DFORCE_USE_STACK -c -UCONJ $< -o $(@F) + +cblas_zdotc_sub.$(SUFFIX) cblas_zdotc_sub.$(PSUFFIX) : zdot.c + $(CC) $(CFLAGS) -DCBLAS -DFORCE_USE_STACK -c -DCONJ $< -o $(@F) + +cblas_snrm2.$(SUFFIX) cblas_snrm2.$(PSUFFIX) : nrm2.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_dnrm2.$(SUFFIX) cblas_dnrm2.$(PSUFFIX) : nrm2.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_scnrm2.$(SUFFIX) cblas_scnrm2.$(PSUFFIX) : nrm2.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_dznrm2.$(SUFFIX) cblas_dznrm2.$(PSUFFIX) : nrm2.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_saxpy.$(SUFFIX) cblas_saxpy.$(PSUFFIX) : axpy.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_daxpy.$(SUFFIX) cblas_daxpy.$(PSUFFIX) : axpy.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_caxpy.$(SUFFIX) cblas_caxpy.$(PSUFFIX) : zaxpy.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_zaxpy.$(SUFFIX) cblas_zaxpy.$(PSUFFIX) : zaxpy.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_scopy.$(SUFFIX) cblas_scopy.$(PSUFFIX) : copy.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_dcopy.$(SUFFIX) cblas_dcopy.$(PSUFFIX) : copy.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_ccopy.$(SUFFIX) cblas_ccopy.$(PSUFFIX) : copy.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_zcopy.$(SUFFIX) cblas_zcopy.$(PSUFFIX) : copy.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_sswap.$(SUFFIX) cblas_sswap.$(PSUFFIX) : swap.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_dswap.$(SUFFIX) cblas_dswap.$(PSUFFIX) : swap.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_cswap.$(SUFFIX) cblas_cswap.$(PSUFFIX) : zswap.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_zswap.$(SUFFIX) cblas_zswap.$(PSUFFIX) : zswap.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_srot.$(SUFFIX) cblas_srot.$(PSUFFIX) : rot.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_drot.$(SUFFIX) cblas_drot.$(PSUFFIX) : rot.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_srotg.$(SUFFIX) cblas_srotg.$(PSUFFIX): rotg.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_drotg.$(SUFFIX) cblas_drotg.$(PSUFFIX): rotg.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_srotm.$(SUFFIX) cblas_srotm.$(PSUFFIX): rotm.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_drotm.$(SUFFIX) cblas_drotm.$(PSUFFIX): rotm.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_srotmg.$(SUFFIX) cblas_srotmg.$(PSUFFIX): rotmg.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_drotmg.$(SUFFIX) cblas_drotmg.$(PSUFFIX): rotmg.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_sscal.$(SUFFIX) cblas_sscal.$(PSUFFIX) : scal.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_dscal.$(SUFFIX) cblas_dscal.$(PSUFFIX) : scal.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_cscal.$(SUFFIX) cblas_cscal.$(PSUFFIX) : zscal.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_zscal.$(SUFFIX) cblas_zscal.$(PSUFFIX) : zscal.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_csscal.$(SUFFIX) cblas_csscal.$(PSUFFIX) : zscal.c + $(CC) $(CFLAGS) -DCBLAS -c -DSSCAL $< -o $(@F) + +cblas_zdscal.$(SUFFIX) cblas_zdscal.$(PSUFFIX) : zscal.c + $(CC) $(CFLAGS) -DCBLAS -c -DSSCAL $< -o $(@F) + +cblas_sgemv.$(SUFFIX) cblas_sgemv.$(PSUFFIX): gemv.c + $(CC) -DCBLAS -c $(CFLAGS) -o $(@F) $< + +cblas_dgemv.$(SUFFIX) cblas_dgemv.$(PSUFFIX): gemv.c + $(CC) -DCBLAS -c $(CFLAGS) -o $(@F) $< + +cblas_cgemv.$(SUFFIX) cblas_cgemv.$(PSUFFIX): zgemv.c + $(CC) -DCBLAS -c $(CFLAGS) -o $(@F) $< + +cblas_zgemv.$(SUFFIX) cblas_zgemv.$(PSUFFIX): zgemv.c + $(CC) -DCBLAS -c $(CFLAGS) -o $(@F) $< + +cblas_sger.$(SUFFIX) cblas_sger.$(PSUFFIX) : ger.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_dger.$(SUFFIX) cblas_dger.$(PSUFFIX) : ger.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_cgeru.$(SUFFIX) cblas_cgeru.$(PSUFFIX) : zger.c + $(CC) -DCBLAS -c $(CFLAGS) -UCONJ $< -o $(@F) + +cblas_cgerc.$(SUFFIX) cblas_cgerc.$(PSUFFIX) : zger.c + $(CC) -DCBLAS -c $(CFLAGS) -DCONJ $< -o $(@F) + +cblas_zgeru.$(SUFFIX) cblas_zgeru.$(PSUFFIX) : zger.c + $(CC) -DCBLAS -c $(CFLAGS) -UCONJ $< -o $(@F) + +cblas_zgerc.$(SUFFIX) cblas_zgerc.$(PSUFFIX) : zger.c + $(CC) -DCBLAS -c $(CFLAGS) -DCONJ $< -o $(@F) + +cblas_strsv.$(SUFFIX) cblas_strsv.$(PSUFFIX) : trsv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_dtrsv.$(SUFFIX) cblas_dtrsv.$(PSUFFIX) : trsv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_ctrsv.$(SUFFIX) cblas_ctrsv.$(PSUFFIX) : ztrsv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_ztrsv.$(SUFFIX) cblas_ztrsv.$(PSUFFIX) : ztrsv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_strmv.$(SUFFIX) cblas_strmv.$(PSUFFIX) : trmv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_dtrmv.$(SUFFIX) cblas_dtrmv.$(PSUFFIX) : trmv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_ctrmv.$(SUFFIX) cblas_ctrmv.$(PSUFFIX) : ztrmv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_ztrmv.$(SUFFIX) cblas_ztrmv.$(PSUFFIX) : ztrmv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_ssyr.$(SUFFIX) cblas_ssyr.$(PSUFFIX) : syr.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_dsyr.$(SUFFIX) cblas_dsyr.$(PSUFFIX) : syr.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_cher.$(SUFFIX) cblas_cher.$(PSUFFIX) : zher.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_zher.$(SUFFIX) cblas_zher.$(PSUFFIX) : zher.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_ssyr2.$(SUFFIX) cblas_ssyr2.$(PSUFFIX) : syr2.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_dsyr2.$(SUFFIX) cblas_dsyr2.$(PSUFFIX) : syr2.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_cher2.$(SUFFIX) cblas_cher2.$(PSUFFIX) : zher2.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_zher2.$(SUFFIX) cblas_zher2.$(PSUFFIX) : zher2.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_sgbmv.$(SUFFIX) cblas_sgbmv.$(PSUFFIX): gbmv.c + $(CC) -DCBLAS -c $(CFLAGS) -o $(@F) $< + +cblas_dgbmv.$(SUFFIX) cblas_dgbmv.$(PSUFFIX): gbmv.c + $(CC) -DCBLAS -c $(CFLAGS) -o $(@F) $< + +cblas_cgbmv.$(SUFFIX) cblas_cgbmv.$(PSUFFIX): zgbmv.c + $(CC) -DCBLAS -c $(CFLAGS) -o $(@F) $< + +cblas_zgbmv.$(SUFFIX) cblas_zgbmv.$(PSUFFIX): zgbmv.c + $(CC) -DCBLAS -c $(CFLAGS) -o $(@F) $< + +cblas_ssbmv.$(SUFFIX) cblas_ssbmv.$(PSUFFIX) : sbmv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_dsbmv.$(SUFFIX) cblas_dsbmv.$(PSUFFIX) : sbmv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_chbmv.$(SUFFIX) cblas_chbmv.$(PSUFFIX) : zhbmv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_zhbmv.$(SUFFIX) cblas_zhbmv.$(PSUFFIX) : zhbmv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_sspmv.$(SUFFIX) cblas_sspmv.$(PSUFFIX) : spmv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_dspmv.$(SUFFIX) cblas_dspmv.$(PSUFFIX) : spmv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_sspr.$(SUFFIX) cblas_sspr.$(PSUFFIX) : spr.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_dspr.$(SUFFIX) cblas_dspr.$(PSUFFIX) : spr.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_chpr.$(SUFFIX) cblas_chpr.$(PSUFFIX) : zhpr.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_zhpr.$(SUFFIX) cblas_zhpr.$(PSUFFIX) : zhpr.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_sspr2.$(SUFFIX) cblas_sspr2.$(PSUFFIX) : spr2.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_dspr2.$(SUFFIX) cblas_dspr2.$(PSUFFIX) : spr2.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_chpr2.$(SUFFIX) cblas_chpr2.$(PSUFFIX) : zhpr2.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_zhpr2.$(SUFFIX) cblas_zhpr2.$(PSUFFIX) : zhpr2.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_stbmv.$(SUFFIX) cblas_stbmv.$(PSUFFIX) : tbmv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_dtbmv.$(SUFFIX) cblas_dtbmv.$(PSUFFIX) : tbmv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_ctbmv.$(SUFFIX) cblas_ctbmv.$(PSUFFIX) : ztbmv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_ztbmv.$(SUFFIX) cblas_ztbmv.$(PSUFFIX) : ztbmv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_stbsv.$(SUFFIX) cblas_stbsv.$(PSUFFIX) : tbsv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_dtbsv.$(SUFFIX) cblas_dtbsv.$(PSUFFIX) : tbsv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_ctbsv.$(SUFFIX) cblas_ctbsv.$(PSUFFIX) : ztbsv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_ztbsv.$(SUFFIX) cblas_ztbsv.$(PSUFFIX) : ztbsv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_stpmv.$(SUFFIX) cblas_stpmv.$(PSUFFIX) : tpmv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_dtpmv.$(SUFFIX) cblas_dtpmv.$(PSUFFIX) : tpmv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_ctpmv.$(SUFFIX) cblas_ctpmv.$(PSUFFIX) : ztpmv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_ztpmv.$(SUFFIX) cblas_ztpmv.$(PSUFFIX) : ztpmv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_chpmv.$(SUFFIX) cblas_chpmv.$(PSUFFIX) : zhpmv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_zhpmv.$(SUFFIX) cblas_zhpmv.$(PSUFFIX) : zhpmv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_stpsv.$(SUFFIX) cblas_stpsv.$(PSUFFIX) : tpsv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_dtpsv.$(SUFFIX) cblas_dtpsv.$(PSUFFIX) : tpsv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_ctpsv.$(SUFFIX) cblas_ctpsv.$(PSUFFIX) : ztpsv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_ztpsv.$(SUFFIX) cblas_ztpsv.$(PSUFFIX) : ztpsv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_ssymv.$(SUFFIX) cblas_ssymv.$(PSUFFIX) : symv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_dsymv.$(SUFFIX) cblas_dsymv.$(PSUFFIX) : symv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_chemv.$(SUFFIX) cblas_chemv.$(PSUFFIX) : zhemv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_zhemv.$(SUFFIX) cblas_zhemv.$(PSUFFIX) : zhemv.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_sgemm.$(SUFFIX) cblas_sgemm.$(PSUFFIX) : gemm.c ../param.h + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_dgemm.$(SUFFIX) cblas_dgemm.$(PSUFFIX) : gemm.c ../param.h + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_cgemm.$(SUFFIX) cblas_cgemm.$(PSUFFIX) : gemm.c ../param.h + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_zgemm.$(SUFFIX) cblas_zgemm.$(PSUFFIX) : gemm.c ../param.h + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_ssymm.$(SUFFIX) cblas_ssymm.$(PSUFFIX) : symm.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_dsymm.$(SUFFIX) cblas_dsymm.$(PSUFFIX) : symm.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_csymm.$(SUFFIX) cblas_csymm.$(PSUFFIX) : symm.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_zsymm.$(SUFFIX) cblas_zsymm.$(PSUFFIX) : symm.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_ssyrk.$(SUFFIX) cblas_ssyrk.$(PSUFFIX) : syrk.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_dsyrk.$(SUFFIX) cblas_dsyrk.$(PSUFFIX) : syrk.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_csyrk.$(SUFFIX) cblas_csyrk.$(PSUFFIX) : syrk.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_zsyrk.$(SUFFIX) cblas_zsyrk.$(PSUFFIX) : syrk.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_ssyr2k.$(SUFFIX) cblas_ssyr2k.$(PSUFFIX) : syr2k.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_dsyr2k.$(SUFFIX) cblas_dsyr2k.$(PSUFFIX) : syr2k.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_csyr2k.$(SUFFIX) cblas_csyr2k.$(PSUFFIX) : syr2k.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_zsyr2k.$(SUFFIX) cblas_zsyr2k.$(PSUFFIX) : syr2k.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_strmm.$(SUFFIX) cblas_strmm.$(PSUFFIX) : trsm.c + $(CC) -DCBLAS -c $(CFLAGS) -DTRMM $< -o $(@F) + +cblas_dtrmm.$(SUFFIX) cblas_dtrmm.$(PSUFFIX) : trsm.c + $(CC) -DCBLAS -c $(CFLAGS) -DTRMM $< -o $(@F) + +cblas_ctrmm.$(SUFFIX) cblas_ctrmm.$(PSUFFIX) : trsm.c + $(CC) -DCBLAS -c $(CFLAGS) -DTRMM $< -o $(@F) + +cblas_ztrmm.$(SUFFIX) cblas_ztrmm.$(PSUFFIX) : trsm.c + $(CC) -DCBLAS -c $(CFLAGS) -DTRMM $< -o $(@F) + +cblas_strsm.$(SUFFIX) cblas_strsm.$(PSUFFIX) : trsm.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_dtrsm.$(SUFFIX) cblas_dtrsm.$(PSUFFIX) : trsm.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_ctrsm.$(SUFFIX) cblas_ctrsm.$(PSUFFIX) : trsm.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_ztrsm.$(SUFFIX) cblas_ztrsm.$(PSUFFIX) : trsm.c + $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) + +cblas_chemm.$(SUFFIX) cblas_chemm.$(PSUFFIX) : symm.c + $(CC) -DCBLAS -c $(CFLAGS) -DHEMM $< -o $(@F) + +cblas_zhemm.$(SUFFIX) cblas_zhemm.$(PSUFFIX) : symm.c + $(CC) -DCBLAS -c $(CFLAGS) -DHEMM $< -o $(@F) + +cblas_cherk.$(SUFFIX) cblas_cherk.$(PSUFFIX) : syrk.c + $(CC) -DCBLAS -c $(CFLAGS) -DHEMM $< -o $(@F) + +cblas_zherk.$(SUFFIX) cblas_zherk.$(PSUFFIX) : syrk.c + $(CC) -DCBLAS -c $(CFLAGS) -DHEMM $< -o $(@F) + +cblas_cher2k.$(SUFFIX) cblas_cher2k.$(PSUFFIX) : syr2k.c + $(CC) -DCBLAS -c $(CFLAGS) -DHEMM $< -o $(@F) + +cblas_zher2k.$(SUFFIX) cblas_zher2k.$(PSUFFIX) : syr2k.c + $(CC) -DCBLAS -c $(CFLAGS) -DHEMM $< -o $(@F) + +sgetf2.$(SUFFIX) sgetf2.$(PSUFFIX) : getf2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dgetf2.$(SUFFIX) dgetf2.$(PSUFFIX) : getf2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qgetf2.$(SUFFIX) qgetf2.$(PSUFFIX) : getf2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +cgetf2.$(SUFFIX) cgetf2.$(PSUFFIX) : zgetf2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zgetf2.$(SUFFIX) zgetf2.$(PSUFFIX) : zgetf2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xgetf2.$(SUFFIX) xgetf2.$(PSUFFIX) : zgetf2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +sgetrf.$(SUFFIX) sgetrf.$(PSUFFIX) : getrf.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dgetrf.$(SUFFIX) dgetrf.$(PSUFFIX) : getrf.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qgetrf.$(SUFFIX) qgetrf.$(PSUFFIX) : getrf.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +cgetrf.$(SUFFIX) cgetrf.$(PSUFFIX) : zgetrf.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zgetrf.$(SUFFIX) zgetrf.$(PSUFFIX) : zgetrf.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xgetrf.$(SUFFIX) xgetrf.$(PSUFFIX) : zgetrf.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +slauu2.$(SUFFIX) slauu2.$(PSUFFIX) : lauu2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dlauu2.$(SUFFIX) dlauu2.$(PSUFFIX) : lauu2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qlauu2.$(SUFFIX) qlauu2.$(PSUFFIX) : lauu2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +clauu2.$(SUFFIX) clauu2.$(PSUFFIX) : zlauu2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zlauu2.$(SUFFIX) zlauu2.$(PSUFFIX) : zlauu2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xlauu2.$(SUFFIX) xlauu2.$(PSUFFIX) : zlauu2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +slauum.$(SUFFIX) slauum.$(PSUFFIX) : lauum.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dlauum.$(SUFFIX) dlauum.$(PSUFFIX) : lauum.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qlauum.$(SUFFIX) qlauum.$(PSUFFIX) : lauum.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +clauum.$(SUFFIX) clauum.$(PSUFFIX) : zlauum.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zlauum.$(SUFFIX) zlauum.$(PSUFFIX) : zlauum.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xlauum.$(SUFFIX) xlauum.$(PSUFFIX) : zlauum.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +spotf2.$(SUFFIX) spotf2.$(PSUFFIX) : potf2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dpotf2.$(SUFFIX) dpotf2.$(PSUFFIX) : potf2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qpotf2.$(SUFFIX) qpotf2.$(PSUFFIX) : potf2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +cpotf2.$(SUFFIX) cpotf2.$(PSUFFIX) : zpotf2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zpotf2.$(SUFFIX) zpotf2.$(PSUFFIX) : zpotf2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xpotf2.$(SUFFIX) xpotf2.$(PSUFFIX) : zpotf2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +spotrf.$(SUFFIX) spotrf.$(PSUFFIX) : potrf.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dpotrf.$(SUFFIX) dpotrf.$(PSUFFIX) : potrf.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qpotrf.$(SUFFIX) qpotrf.$(PSUFFIX) : potrf.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +cpotrf.$(SUFFIX) cpotrf.$(PSUFFIX) : zpotrf.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zpotrf.$(SUFFIX) zpotrf.$(PSUFFIX) : zpotrf.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xpotrf.$(SUFFIX) xpotrf.$(PSUFFIX) : zpotrf.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +strti2.$(SUFFIX) strti2.$(PSUFFIX) : trti2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dtrti2.$(SUFFIX) dtrti2.$(PSUFFIX) : trti2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qtrti2.$(SUFFIX) qtrti2.$(PSUFFIX) : trti2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ctrti2.$(SUFFIX) ctrti2.$(PSUFFIX) : ztrti2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ztrti2.$(SUFFIX) ztrti2.$(PSUFFIX) : ztrti2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xtrti2.$(SUFFIX) xtrti2.$(PSUFFIX) : ztrti2.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +strtri.$(SUFFIX) strtri.$(PSUFFIX) : trtri.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dtrtri.$(SUFFIX) dtrtri.$(PSUFFIX) : trtri.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qtrtri.$(SUFFIX) qtrtri.$(PSUFFIX) : trtri.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ctrtri.$(SUFFIX) ctrtri.$(PSUFFIX) : ztrtri.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +ztrtri.$(SUFFIX) ztrtri.$(PSUFFIX) : ztrtri.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xtrtri.$(SUFFIX) xtrtri.$(PSUFFIX) : ztrtri.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +slaswp.$(SUFFIX) slaswp.$(PSUFFIX) : laswp.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dlaswp.$(SUFFIX) dlaswp.$(PSUFFIX) : laswp.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qlaswp.$(SUFFIX) qlaswp.$(PSUFFIX) : laswp.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +claswp.$(SUFFIX) claswp.$(PSUFFIX) : zlaswp.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zlaswp.$(SUFFIX) zlaswp.$(PSUFFIX) : zlaswp.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xlaswp.$(SUFFIX) xlaswp.$(PSUFFIX) : zlaswp.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +sgetrs.$(SUFFIX) sgetrs.$(PSUFFIX) : getrs.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dgetrs.$(SUFFIX) dgetrs.$(PSUFFIX) : getrs.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qgetrs.$(SUFFIX) qgetrs.$(PSUFFIX) : getrs.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +cgetrs.$(SUFFIX) cgetrs.$(PSUFFIX) : zgetrs.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zgetrs.$(SUFFIX) zgetrs.$(PSUFFIX) : zgetrs.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xgetrs.$(SUFFIX) xgetrs.$(PSUFFIX) : zgetrs.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +sgesv.$(SUFFIX) sgesv.$(PSUFFIX) : gesv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dgesv.$(SUFFIX) dgesv.$(PSUFFIX) : gesv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qgesv.$(SUFFIX) qgesv.$(PSUFFIX) : gesv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +cgesv.$(SUFFIX) cgesv.$(PSUFFIX) : gesv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zgesv.$(SUFFIX) zgesv.$(PSUFFIX) : gesv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xgesv.$(SUFFIX) xgesv.$(PSUFFIX) : gesv.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +spotri.$(SUFFIX) spotri.$(PSUFFIX) : potri.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dpotri.$(SUFFIX) dpotri.$(PSUFFIX) : potri.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qpotri.$(SUFFIX) qpotri.$(PSUFFIX) : potri.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +cpotri.$(SUFFIX) cpotri.$(PSUFFIX) : zpotri.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zpotri.$(SUFFIX) zpotri.$(PSUFFIX) : zpotri.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xpotri.$(SUFFIX) xpotri.$(PSUFFIX) : zpotri.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +slarf.$(SUFFIX) slarf.$(PSUFFIX) : larf.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dlarf.$(SUFFIX) dlarf.$(PSUFFIX) : larf.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +qlarf.$(SUFFIX) qlarf.$(PSUFFIX) : larf.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +clarf.$(SUFFIX) clarf.$(PSUFFIX) : larf.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zlarf.$(SUFFIX) zlarf.$(PSUFFIX) : larf.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +xlarf.$(SUFFIX) xlarf.$(PSUFFIX) : larf.c + $(CC) -c $(CFLAGS) $< -o $(@F) + diff --git a/interface/asum.c b/interface/asum.c new file mode 100644 index 000000000..634836e28 --- /dev/null +++ b/interface/asum.c @@ -0,0 +1,93 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifndef CBLAS + +FLOATRET NAME(blasint *N, FLOAT *x, blasint *INCX){ + + BLASLONG n = *N; + BLASLONG incx = *INCX; + FLOATRET ret; + + PRINT_DEBUG_NAME; + + if (n <= 0) return 0; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + ret = (FLOATRET)ASUM_K(n, x, incx); + + FUNCTION_PROFILE_END(COMPSIZE, n, n); + + IDEBUG_END; + + return ret; +} + +#else + +FLOAT CNAME(blasint n, FLOAT *x, blasint incx){ + + FLOAT ret; + + PRINT_DEBUG_CNAME; + + if (n <= 0) return 0; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + ret = ASUM_K(n, x, incx); + + FUNCTION_PROFILE_END(COMPSIZE, n, n); + + IDEBUG_END; + + return ret; +} + +#endif diff --git a/interface/axpy.c b/interface/axpy.c new file mode 100644 index 000000000..03b981985 --- /dev/null +++ b/interface/axpy.c @@ -0,0 +1,112 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifndef CBLAS + +void NAME(blasint *N, FLOAT *ALPHA, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY){ + + BLASLONG n = *N; + BLASLONG incx = *INCX; + BLASLONG incy = *INCY; + FLOAT alpha = *ALPHA; + +#else + +void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *y, blasint incy){ + +#endif + +#ifdef SMP + int mode, nthreads; +#endif + +#ifndef CBLAS + PRINT_DEBUG_NAME; +#else + PRINT_DEBUG_CNAME; +#endif + + if (n <= 0) return; + + if (alpha == ZERO) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0) x -= (n - 1) * incx; + if (incy < 0) y -= (n - 1) * incy; + +#ifdef SMP + nthreads = num_cpu_avail(1); + + if (nthreads == 1) { +#endif + + AXPYU_K(n, 0, 0, alpha, x, incx, y, incy, NULL, 0); + +#ifdef SMP + } else { + +#ifdef XDOUBLE + mode = BLAS_XDOUBLE | BLAS_REAL; +#elif defined(DOUBLE) + mode = BLAS_DOUBLE | BLAS_REAL; +#else + mode = BLAS_SINGLE | BLAS_REAL; +#endif + + blas_level1_thread(mode, n, 0, 0, &alpha, + x, incx, y, incy, NULL, 0, (void *)AXPYU_K, nthreads); + + } +#endif + + FUNCTION_PROFILE_END(1, 2 * n, 2 * n); + + IDEBUG_END; + + return; + +} diff --git a/interface/copy.c b/interface/copy.c new file mode 100644 index 000000000..6965682ec --- /dev/null +++ b/interface/copy.c @@ -0,0 +1,80 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifndef CBLAS + +void NAME(blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY){ + + BLASLONG n = *N; + BLASLONG incx = *INCX; + BLASLONG incy = *INCY; + + PRINT_DEBUG_NAME; + +#else + +void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){ + + PRINT_DEBUG_CNAME; + +#endif + + if (n <= 0) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0) x -= (n - 1) * incx * COMPSIZE; + if (incy < 0) y -= (n - 1) * incy * COMPSIZE; + + COPY_K(n, x, incx, y, incy); + + FUNCTION_PROFILE_END(COMPSIZE, COMPSIZE * n, 0); + + IDEBUG_END; + + return; + +} diff --git a/interface/create b/interface/create new file mode 100644 index 000000000..b7be8ab6e --- /dev/null +++ b/interface/create @@ -0,0 +1,22 @@ +#!/usr/bin/perl + +$count = 0; + +foreach (@ARGV) { + print "#define\tinterface_", $_, "\t\t", $count, "\n"; + $count ++; +} + +print "#ifdef USE_FUNCTABLE\n"; + +print "#define MAX_PROF_TABLE ", $count, "\n"; + +print "static char *func_table[] = {\n"; + +foreach (@ARGV) { + print "\"", $_, "\",\n"; +} + +print "};\n"; +print "#endif\n"; + diff --git a/interface/dot.c b/interface/dot.c new file mode 100644 index 000000000..3744db5ea --- /dev/null +++ b/interface/dot.c @@ -0,0 +1,101 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifndef CBLAS + +FLOATRET NAME(blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY){ + + BLASLONG n = *N; + BLASLONG incx = *INCX; + BLASLONG incy = *INCY; + FLOATRET ret; + + PRINT_DEBUG_NAME; + + if (n <= 0) return 0.; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0) x -= (n - 1) * incx; + if (incy < 0) y -= (n - 1) * incy; + + ret = (FLOATRET)DOTU_K(n, x, incx, y, incy); + + FUNCTION_PROFILE_END(1, 2 * n, 2 * n); + + IDEBUG_END; + + return ret; +} + +#else + +FLOAT CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){ + + FLOAT ret; + + PRINT_DEBUG_CNAME; + + if (n <= 0) return 0.; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0) x -= (n - 1) * incx; + if (incy < 0) y -= (n - 1) * incy; + + ret = DOTU_K(n, x, incx, y, incy); + + FUNCTION_PROFILE_END(1, 2 * n, 2 * n); + + IDEBUG_END; + + return ret; + +} + +#endif diff --git a/interface/dsdot.c b/interface/dsdot.c new file mode 100644 index 000000000..66f7917d5 --- /dev/null +++ b/interface/dsdot.c @@ -0,0 +1,99 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifndef CBLAS + +double NAME(blasint *N, float *x, blasint *INCX, float *y, blasint *INCY){ + + BLASLONG n = *N; + BLASLONG incx = *INCX; + BLASLONG incy = *INCY; + + PRINT_DEBUG_NAME; + + if (n <= 0) return 0; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0) x -= (n - 1) * incx; + if (incy < 0) y -= (n - 1) * incy; + + return DSDOT_K(n, x, incx, y, incy); + + FUNCTION_PROFILE_END(1, n, n); + + IDEBUG_END; + + return 0; + +} + +#else + +double CNAME(blasint n, float *x, blasint incx, float *y, blasint incy){ + + PRINT_DEBUG_CNAME; + + if (n <= 0) return 0; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0) x -= (n - 1) * incx; + if (incy < 0) y -= (n - 1) * incy; + + return DSDOT_K(n, x, incx, y, incy); + + FUNCTION_PROFILE_END(1, n, n); + + IDEBUG_END; + + return 0; + +} + +#endif diff --git a/interface/gbmv.c b/interface/gbmv.c new file mode 100644 index 000000000..a76c48d04 --- /dev/null +++ b/interface/gbmv.c @@ -0,0 +1,252 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "QGBMV " +#elif defined(DOUBLE) +#define ERROR_NAME "DGBMV " +#else +#define ERROR_NAME "SGBMV " +#endif + +static void (*gbmv[])(BLASLONG, BLASLONG, BLASLONG, BLASLONG, FLOAT, + FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, void *) = { +#ifdef XDOUBLE + qgbmv_n, qgbmv_t, +#elif defined(DOUBLE) + dgbmv_n, dgbmv_t, +#else + sgbmv_n, sgbmv_t, +#endif +}; + +#ifdef SMP +static int (*gbmv_thread[])(BLASLONG, BLASLONG, BLASLONG, BLASLONG, FLOAT, + FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = { +#ifdef XDOUBLE + qgbmv_thread_n, qgbmv_thread_t, +#elif defined(DOUBLE) + dgbmv_thread_n, dgbmv_thread_t, +#else + sgbmv_thread_n, sgbmv_thread_t, +#endif +}; +#endif + +#ifndef CBLAS + +void NAME(char *TRANS, blasint *M, blasint *N, + blasint *KU, blasint *KL, + FLOAT *ALPHA, FLOAT *a, blasint *LDA, + FLOAT *x, blasint *INCX, + FLOAT *BETA, FLOAT *y, blasint *INCY){ + + char trans = *TRANS; + blasint m = *M; + blasint n = *N; + blasint ku = *KU; + blasint kl = *KL; + blasint lda = *LDA; + blasint incx = *INCX; + blasint incy = *INCY; + FLOAT *buffer; +#ifdef SMP + int nthreads; +#endif + + FLOAT alpha = *ALPHA; + FLOAT beta = *BETA; + + blasint info; + blasint lenx, leny; + blasint i; + + PRINT_DEBUG_NAME; + + TOUPPER(trans); + + info = 0; + + i = -1; + + if (trans == 'N') i = 0; + if (trans == 'T') i = 1; + if (trans == 'R') i = 0; + if (trans == 'C') i = 1; + + if (incy == 0) info = 13; + if (incx == 0) info = 10; + if (lda < kl + ku + 1) info = 8; + if (kl < 0) info = 5; + if (ku < 0) info = 4; + if (n < 0) info = 3; + if (m < 0) info = 2; + if (i < 0) info = 1; + + trans = i; + + if (info != 0){ + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#else + +void CNAME(enum CBLAS_ORDER order, + enum CBLAS_TRANSPOSE TransA, + blasint m, blasint n, + blasint ku, blasint kl, + FLOAT alpha, + FLOAT *a, blasint lda, + FLOAT *x, blasint incx, + FLOAT beta, + FLOAT *y, blasint incy){ + + FLOAT *buffer; + blasint lenx, leny, info, t; + int trans; +#ifdef SMP + int nthreads; +#endif + + PRINT_DEBUG_CNAME; + + trans = -1; + info = 0; + + if (order == CblasColMajor) { + if (TransA == CblasNoTrans) trans = 0; + if (TransA == CblasTrans) trans = 1; + if (TransA == CblasConjNoTrans) trans = 0; + if (TransA == CblasConjTrans) trans = 1; + + info = -1; + + if (incy == 0) info = 13; + if (incx == 0) info = 10; + if (lda < kl + ku + 1) info = 8; + if (kl < 0) info = 5; + if (ku < 0) info = 4; + if (n < 0) info = 3; + if (m < 0) info = 2; + if (trans < 0) info = 1; + } + + if (order == CblasRowMajor) { + if (TransA == CblasNoTrans) trans = 1; + if (TransA == CblasTrans) trans = 0; + if (TransA == CblasConjNoTrans) trans = 1; + if (TransA == CblasConjTrans) trans = 0; + + info = -1; + + t = n; + n = m; + m = t; + + t = ku; + ku = kl; + kl = t; + + if (incy == 0) info = 13; + if (incx == 0) info = 10; + if (lda < kl + ku + 1) info = 8; + if (kl < 0) info = 5; + if (ku < 0) info = 4; + if (n < 0) info = 3; + if (m < 0) info = 2; + if (trans < 0) info = 1; + } + + if (info >= 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#endif + + if ((m==0) || (n==0)) return; + + lenx = n; + leny = m; + if (trans) lenx = m; + if (trans) leny = n; + + if (beta != ONE) SCAL_K(leny, 0, 0, beta, y, abs(incy), NULL, 0, NULL, 0); + + if (alpha == ZERO) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0) x -= (lenx-1)*incx; + if (incy < 0) y -= (leny-1)*incy; + + buffer = (FLOAT *)blas_memory_alloc(1); + +#ifdef SMP + nthreads = num_cpu_avail(2); + + if (nthreads == 1) { +#endif + + (gbmv[(int)trans])(m, n, kl, ku, alpha, a, lda, x, incx, y, incy, buffer); + +#ifdef SMP + } else { + + (gbmv_thread[(int)trans])(m, n, kl, ku, alpha, a, lda, x, incx, y, incy, buffer, nthreads); + + } +#endif + + blas_memory_free(buffer); + + FUNCTION_PROFILE_END(1, m * n / 2 + n, m * n); + + IDEBUG_END; + + return; +} diff --git a/interface/gemm.c b/interface/gemm.c new file mode 100644 index 000000000..7919f822e --- /dev/null +++ b/interface/gemm.c @@ -0,0 +1,452 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifndef COMPLEX +#ifdef XDOUBLE +#define ERROR_NAME "QGEMM " +#elif defined(DOUBLE) +#define ERROR_NAME "DGEMM " +#else +#define ERROR_NAME "SGEMM " +#endif +#else +#ifndef GEMM3M +#ifdef XDOUBLE +#define ERROR_NAME "XGEMM " +#elif defined(DOUBLE) +#define ERROR_NAME "ZGEMM " +#else +#define ERROR_NAME "CGEMM " +#endif +#else +#ifdef XDOUBLE +#define ERROR_NAME "XGEMM3M " +#elif defined(DOUBLE) +#define ERROR_NAME "ZGEMM3M " +#else +#define ERROR_NAME "CGEMM3M " +#endif +#endif +#endif + +static int (*gemm[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = { +#ifndef GEMM3M + GEMM_NN, GEMM_TN, GEMM_RN, GEMM_CN, + GEMM_NT, GEMM_TT, GEMM_RT, GEMM_CT, + GEMM_NR, GEMM_TR, GEMM_RR, GEMM_CR, + GEMM_NC, GEMM_TC, GEMM_RC, GEMM_CC, +#if defined(SMP) && !defined(USE_SIMPLE_THREADED_LEVEL3) + GEMM_THREAD_NN, GEMM_THREAD_TN, GEMM_THREAD_RN, GEMM_THREAD_CN, + GEMM_THREAD_NT, GEMM_THREAD_TT, GEMM_THREAD_RT, GEMM_THREAD_CT, + GEMM_THREAD_NR, GEMM_THREAD_TR, GEMM_THREAD_RR, GEMM_THREAD_CR, + GEMM_THREAD_NC, GEMM_THREAD_TC, GEMM_THREAD_RC, GEMM_THREAD_CC, +#endif +#else + GEMM3M_NN, GEMM3M_TN, GEMM3M_RN, GEMM3M_CN, + GEMM3M_NT, GEMM3M_TT, GEMM3M_RT, GEMM3M_CT, + GEMM3M_NR, GEMM3M_TR, GEMM3M_RR, GEMM3M_CR, + GEMM3M_NC, GEMM3M_TC, GEMM3M_RC, GEMM3M_CC, +#if defined(SMP) && !defined(USE_SIMPLE_THREADED_LEVEL3) + GEMM3M_THREAD_NN, GEMM3M_THREAD_TN, GEMM3M_THREAD_RN, GEMM3M_THREAD_CN, + GEMM3M_THREAD_NT, GEMM3M_THREAD_TT, GEMM3M_THREAD_RT, GEMM3M_THREAD_CT, + GEMM3M_THREAD_NR, GEMM3M_THREAD_TR, GEMM3M_THREAD_RR, GEMM3M_THREAD_CR, + GEMM3M_THREAD_NC, GEMM3M_THREAD_TC, GEMM3M_THREAD_RC, GEMM3M_THREAD_CC, +#endif +#endif +}; + +#ifndef CBLAS + +void NAME(char *TRANSA, char *TRANSB, + blasint *M, blasint *N, blasint *K, + FLOAT *alpha, + FLOAT *a, blasint *ldA, + FLOAT *b, blasint *ldB, + FLOAT *beta, + FLOAT *c, blasint *ldC){ + + blas_arg_t args; + + int transa, transb, nrowa, nrowb; + blasint info; + + char transA, transB; + FLOAT *buffer; + FLOAT *sa, *sb; + +#ifdef SMP +#ifndef COMPLEX +#ifdef XDOUBLE + int mode = BLAS_XDOUBLE | BLAS_REAL; +#elif defined(DOUBLE) + int mode = BLAS_DOUBLE | BLAS_REAL; +#else + int mode = BLAS_SINGLE | BLAS_REAL; +#endif +#else +#ifdef XDOUBLE + int mode = BLAS_XDOUBLE | BLAS_COMPLEX; +#elif defined(DOUBLE) + int mode = BLAS_DOUBLE | BLAS_COMPLEX; +#else + int mode = BLAS_SINGLE | BLAS_COMPLEX; +#endif +#endif +#endif + +#if defined(SMP) && !defined(NO_AFFINITY) && !defined(USE_SIMPLE_THREADED_LEVEL3) + int nodes; +#endif + + PRINT_DEBUG_NAME; + + args.m = *M; + args.n = *N; + args.k = *K; + + args.a = (void *)a; + args.b = (void *)b; + args.c = (void *)c; + + args.lda = *ldA; + args.ldb = *ldB; + args.ldc = *ldC; + + args.alpha = (void *)alpha; + args.beta = (void *)beta; + + transA = *TRANSA; + transB = *TRANSB; + + TOUPPER(transA); + TOUPPER(transB); + + transa = -1; + transb = -1; + + if (transA == 'N') transa = 0; + if (transA == 'T') transa = 1; +#ifndef COMPLEX + if (transA == 'R') transa = 0; + if (transA == 'C') transa = 1; +#else + if (transA == 'R') transa = 2; + if (transA == 'C') transa = 3; +#endif + + if (transB == 'N') transb = 0; + if (transB == 'T') transb = 1; +#ifndef COMPLEX + if (transB == 'R') transb = 0; + if (transB == 'C') transb = 1; +#else + if (transB == 'R') transb = 2; + if (transB == 'C') transb = 3; +#endif + + nrowa = args.m; + if (transa & 1) nrowa = args.k; + nrowb = args.k; + if (transb & 1) nrowb = args.n; + + info = 0; + + if (args.ldc < args.m) info = 13; + if (args.ldb < nrowb) info = 10; + if (args.lda < nrowa) info = 8; + if (args.k < 0) info = 5; + if (args.n < 0) info = 4; + if (args.m < 0) info = 3; + if (transb < 0) info = 2; + if (transa < 0) info = 1; + + if (info){ + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#else + +void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, + blasint m, blasint n, blasint k, +#ifndef COMPLEX + FLOAT alpha, +#else + FLOAT *alpha, +#endif + FLOAT *a, blasint lda, + FLOAT *b, blasint ldb, +#ifndef COMPLEX + FLOAT beta, +#else + FLOAT *beta, +#endif + FLOAT *c, blasint ldc) { + + blas_arg_t args; + int transa, transb; + blasint nrowa, nrowb, info; + + XFLOAT *buffer; + XFLOAT *sa, *sb; + +#ifdef SMP +#ifndef COMPLEX +#ifdef XDOUBLE + int mode = BLAS_XDOUBLE | BLAS_REAL; +#elif defined(DOUBLE) + int mode = BLAS_DOUBLE | BLAS_REAL; +#else + int mode = BLAS_SINGLE | BLAS_REAL; +#endif +#else +#ifdef XDOUBLE + int mode = BLAS_XDOUBLE | BLAS_COMPLEX; +#elif defined(DOUBLE) + int mode = BLAS_DOUBLE | BLAS_COMPLEX; +#else + int mode = BLAS_SINGLE | BLAS_COMPLEX; +#endif +#endif +#endif + +#if defined(SMP) && !defined(NO_AFFINITY) && !defined(USE_SIMPLE_THREADED_LEVEL3) + int nodes; +#endif + + PRINT_DEBUG_CNAME; + +#ifndef COMPLEX + args.alpha = (void *)α + args.beta = (void *)β +#else + args.alpha = (void *)alpha; + args.beta = (void *)beta; +#endif + + transa = -1; + transb = -1; + info = 0; + + if (order == CblasColMajor) { + args.m = m; + args.n = n; + args.k = k; + + args.a = (void *)a; + args.b = (void *)b; + args.c = (void *)c; + + args.lda = lda; + args.ldb = ldb; + args.ldc = ldc; + + if (TransA == CblasNoTrans) transa = 0; + if (TransA == CblasTrans) transa = 1; +#ifndef COMPLEX + if (TransA == CblasConjNoTrans) transa = 0; + if (TransA == CblasConjTrans) transa = 1; +#else + if (TransA == CblasConjNoTrans) transa = 2; + if (TransA == CblasConjTrans) transa = 3; +#endif + if (TransB == CblasNoTrans) transb = 0; + if (TransB == CblasTrans) transb = 1; +#ifndef COMPLEX + if (TransB == CblasConjNoTrans) transb = 0; + if (TransB == CblasConjTrans) transb = 1; +#else + if (TransB == CblasConjNoTrans) transb = 2; + if (TransB == CblasConjTrans) transb = 3; +#endif + + nrowa = args.m; + if (transa & 1) nrowa = args.k; + nrowb = args.k; + if (transb & 1) nrowb = args.n; + + info = -1; + + if (args.ldc < args.m) info = 13; + if (args.ldb < nrowb) info = 10; + if (args.lda < nrowa) info = 8; + if (args.k < 0) info = 5; + if (args.n < 0) info = 4; + if (args.m < 0) info = 3; + if (transb < 0) info = 2; + if (transa < 0) info = 1; + } + + if (order == CblasRowMajor) { + args.m = n; + args.n = m; + args.k = k; + + args.a = (void *)b; + args.b = (void *)a; + args.c = (void *)c; + + args.lda = ldb; + args.ldb = lda; + args.ldc = ldc; + + if (TransB == CblasNoTrans) transa = 0; + if (TransB == CblasTrans) transa = 1; +#ifndef COMPLEX + if (TransB == CblasConjNoTrans) transa = 0; + if (TransB == CblasConjTrans) transa = 1; +#else + if (TransB == CblasConjNoTrans) transa = 2; + if (TransB == CblasConjTrans) transa = 3; +#endif + if (TransA == CblasNoTrans) transb = 0; + if (TransA == CblasTrans) transb = 1; +#ifndef COMPLEX + if (TransA == CblasConjNoTrans) transb = 0; + if (TransA == CblasConjTrans) transb = 1; +#else + if (TransA == CblasConjNoTrans) transb = 2; + if (TransA == CblasConjTrans) transb = 3; +#endif + + nrowa = args.m; + if (transa & 1) nrowa = args.k; + nrowb = args.k; + if (transb & 1) nrowb = args.n; + + info = -1; + + if (args.ldc < args.m) info = 13; + if (args.ldb < nrowb) info = 10; + if (args.lda < nrowa) info = 8; + if (args.k < 0) info = 5; + if (args.n < 0) info = 4; + if (args.m < 0) info = 3; + if (transb < 0) info = 2; + if (transa < 0) info = 1; + + } + + if (info >= 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#endif + + if ((args.m == 0) || (args.n == 0)) return; + +#if 0 + fprintf(stderr, "m = %4d n = %d k = %d lda = %4d ldb = %4d ldc = %4d\n", + args.m, args.n, args.k, args.lda, args.ldb, args.ldc); +#endif + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + buffer = (XFLOAT *)blas_memory_alloc(0); + + sa = (XFLOAT *)((BLASLONG)buffer +GEMM_OFFSET_A); + sb = (XFLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); + +#ifdef SMP + mode |= (transa << BLAS_TRANSA_SHIFT); + mode |= (transb << BLAS_TRANSB_SHIFT); + + args.common = NULL; + args.nthreads = num_cpu_avail(3); + + if (args.nthreads == 1) { +#endif + + (gemm[(transb << 2) | transa])(&args, NULL, NULL, sa, sb, 0); + +#ifdef SMP + + } else { + +#ifndef USE_SIMPLE_THREADED_LEVEL3 + +#ifndef NO_AFFINITY + nodes = get_num_nodes(); + + if ((nodes > 1) && get_node_equal()) { + + args.nthreads /= nodes; + + gemm_thread_mn(mode, &args, NULL, NULL, gemm[16 | (transb << 2) | transa], sa, sb, nodes); + + } else { +#endif + + (gemm[16 | (transb << 2) | transa])(&args, NULL, NULL, sa, sb, 0); + +#else + + GEMM_THREAD(mode, &args, NULL, NULL, gemm[(transb << 2) | transa], sa, sb, args.nthreads); + +#endif + +#ifndef USE_SIMPLE_THREADED_LEVEL3 +#ifndef NO_AFFINITY + } +#endif +#endif + +#endif + +#ifdef SMP + } +#endif + + blas_memory_free(buffer); + + FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, args.m * args.k + args.k * args.n + args.m * args.n, 2 * args.m * args.n * args.k); + + IDEBUG_END; + + return; +} diff --git a/interface/gemv.c b/interface/gemv.c new file mode 100644 index 000000000..9ea8aa895 --- /dev/null +++ b/interface/gemv.c @@ -0,0 +1,237 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "QGEMV " +#elif defined(DOUBLE) +#define ERROR_NAME "DGEMV " +#else +#define ERROR_NAME "SGEMV " +#endif + +#ifdef SMP +static int (*gemv_thread[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT * , BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = { +#ifdef XDOUBLE + qgemv_thread_n, qgemv_thread_t, +#elif defined DOUBLE + dgemv_thread_n, dgemv_thread_t, +#else + sgemv_thread_n, sgemv_thread_t, +#endif +}; +#endif + +#ifndef CBLAS + +void NAME(char *TRANS, blasint *M, blasint *N, + FLOAT *ALPHA, FLOAT *a, blasint *LDA, + FLOAT *x, blasint *INCX, + FLOAT *BETA, FLOAT *y, blasint *INCY){ + + char trans = *TRANS; + blasint m = *M; + blasint n = *N; + blasint lda = *LDA; + blasint incx = *INCX; + blasint incy = *INCY; + FLOAT alpha = *ALPHA; + FLOAT beta = *BETA; + FLOAT *buffer; +#ifdef SMP + int nthreads; +#endif + + int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT * , BLASLONG, FLOAT *, BLASLONG, FLOAT *) = { + GEMV_N, GEMV_T, + }; + + blasint info; + blasint lenx, leny; + blasint i; + + PRINT_DEBUG_NAME; + + TOUPPER(trans); + + info = 0; + + i = -1; + + if (trans == 'N') i = 0; + if (trans == 'T') i = 1; + if (trans == 'R') i = 0; + if (trans == 'C') i = 1; + + if (incy == 0) info = 11; + if (incx == 0) info = 8; + if (lda < MAX(1, m)) info = 6; + if (n < 0) info = 3; + if (m < 0) info = 2; + if (i < 0) info = 1; + + trans = i; + + if (info != 0){ + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#else + +void CNAME(enum CBLAS_ORDER order, + enum CBLAS_TRANSPOSE TransA, + blasint m, blasint n, + FLOAT alpha, + FLOAT *a, blasint lda, + FLOAT *x, blasint incx, + FLOAT beta, + FLOAT *y, blasint incy){ + + FLOAT *buffer; + blasint lenx, leny; + int trans; + blasint info, t; +#ifdef SMP + int nthreads; +#endif + + int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT * , BLASLONG, FLOAT *, BLASLONG, FLOAT *) = { + GEMV_N, GEMV_T, + }; + + PRINT_DEBUG_CNAME; + + trans = -1; + info = 0; + + if (order == CblasColMajor) { + if (TransA == CblasNoTrans) trans = 0; + if (TransA == CblasTrans) trans = 1; + if (TransA == CblasConjNoTrans) trans = 0; + if (TransA == CblasConjTrans) trans = 1; + + info = -1; + + if (incy == 0) info = 11; + if (incx == 0) info = 8; + if (lda < MAX(1, m)) info = 6; + if (n < 0) info = 3; + if (m < 0) info = 2; + if (trans < 0) info = 1; + + } + + if (order == CblasRowMajor) { + if (TransA == CblasNoTrans) trans = 1; + if (TransA == CblasTrans) trans = 0; + if (TransA == CblasConjNoTrans) trans = 1; + if (TransA == CblasConjTrans) trans = 0; + + info = -1; + + t = n; + n = m; + m = t; + + if (incy == 0) info = 11; + if (incx == 0) info = 8; + if (lda < MAX(1, m)) info = 6; + if (n < 0) info = 3; + if (m < 0) info = 2; + if (trans < 0) info = 1; + + } + + if (info >= 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#endif + + if ((m==0) || (n==0)) return; + + lenx = n; + leny = m; + if (trans) lenx = m; + if (trans) leny = n; + + if (beta != ONE) SCAL_K(leny, 0, 0, beta, y, abs(incy), NULL, 0, NULL, 0); + + if (alpha == ZERO) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0) x -= (lenx - 1) * incx; + if (incy < 0) y -= (leny - 1) * incy; + + buffer = (FLOAT *)blas_memory_alloc(1); + +#ifdef SMP + nthreads = num_cpu_avail(2); + + if (nthreads == 1) { +#endif + + (gemv[(int)trans])(m, n, 0, alpha, a, lda, x, incx, y, incy, buffer); + +#ifdef SMP + } else { + + (gemv_thread[(int)trans])(m, n, alpha, a, lda, x, incx, y, incy, buffer, nthreads); + + } +#endif + + blas_memory_free(buffer); + + FUNCTION_PROFILE_END(1, m * n + m + n, 2 * m * n); + + IDEBUG_END; + + return; + +} diff --git a/interface/ger.c b/interface/ger.c new file mode 100644 index 000000000..0218d94dd --- /dev/null +++ b/interface/ger.c @@ -0,0 +1,193 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "QGER " +#elif defined DOUBLE +#define ERROR_NAME "DGER " +#else +#define ERROR_NAME "SGER " +#endif + +#define GER GERU_K + +#if defined XDOUBLE +#define GER_THREAD qger_thread +#elif defined DOUBLE +#define GER_THREAD dger_thread +#else +#define GER_THREAD sger_thread +#endif + + +#ifndef CBLAS + +void NAME(blasint *M, blasint *N, FLOAT *Alpha, + FLOAT *x, blasint *INCX, + FLOAT *y, blasint *INCY, + FLOAT *a, blasint *LDA){ + + blasint m = *M; + blasint n = *N; + FLOAT alpha = *Alpha; + blasint incx = *INCX; + blasint incy = *INCY; + blasint lda = *LDA; + FLOAT *buffer; +#ifdef SMP + int nthreads; +#endif + + blasint info; + + PRINT_DEBUG_NAME; + + info = 0; + + if (lda < MAX(1,m)) info = 9; + if (incy == 0) info = 7; + if (incx == 0) info = 5; + if (n < 0) info = 2; + if (m < 0) info = 1; + + if (info){ + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#else + +void CNAME(enum CBLAS_ORDER order, + blasint m, blasint n, + FLOAT alpha, + FLOAT *x, blasint incx, + FLOAT *y, blasint incy, + FLOAT *a, blasint lda) { + + FLOAT *buffer; + blasint info, t; +#ifdef SMP + int nthreads; +#endif + + PRINT_DEBUG_CNAME; + + info = 0; + + if (order == CblasColMajor) { + info = -1; + + if (lda < MAX(1,m)) info = 9; + if (incy == 0) info = 7; + if (incx == 0) info = 5; + if (n < 0) info = 2; + if (m < 0) info = 1; + } + + if (order == CblasRowMajor) { + info = -1; + + t = n; + n = m; + m = t; + + t = incx; + incx = incy; + incy = t; + + buffer = x; + x = y; + y = buffer; + + if (lda < MAX(1,m)) info = 9; + if (incy == 0) info = 7; + if (incx == 0) info = 5; + if (n < 0) info = 2; + if (m < 0) info = 1; + } + + if (info >= 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#endif + + /* Quick return if possible. */ + if (m == 0 || n == 0) return; + if (alpha == 0.) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incy < 0) y -= (n - 1) * incy; + if (incx < 0) x -= (m - 1) * incx; + + buffer = (FLOAT *)blas_memory_alloc(1); + +#ifdef SMP + nthreads = num_cpu_avail(2); + + if (nthreads == 1) { +#endif + + GER(m, n, 0, alpha, x, incx, y, incy, a, lda, buffer); + +#ifdef SMP + } else { + + GER_THREAD(m, n, alpha, x, incx, y, incy, a, lda, buffer, nthreads); + + } +#endif + + blas_memory_free(buffer); + + FUNCTION_PROFILE_END(1, m * n + m + n, 2 * m * n); + + IDEBUG_END; + + return; +} diff --git a/interface/gesv.c b/interface/gesv.c new file mode 100644 index 000000000..ce6bcbd0b --- /dev/null +++ b/interface/gesv.c @@ -0,0 +1,154 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifndef COMPLEX +#ifdef XDOUBLE +#define ERROR_NAME "QGESV " +#elif defined(DOUBLE) +#define ERROR_NAME "DGESV " +#else +#define ERROR_NAME "SGESV " +#endif +#else +#ifdef XDOUBLE +#define ERROR_NAME "XGESV " +#elif defined(DOUBLE) +#define ERROR_NAME "ZGESV " +#else +#define ERROR_NAME "CGESV " +#endif +#endif + +int NAME(blasint *N, blasint *NRHS, FLOAT *a, blasint *ldA, blasint *ipiv, + FLOAT *b, blasint *ldB, blasint *Info){ + + blas_arg_t args; + + blasint info; + FLOAT *buffer; +#ifdef PPC440 + extern +#endif + FLOAT *sa, *sb; + + PRINT_DEBUG_NAME; + + args.m = *N; + args.n = *NRHS; + args.a = (void *)a; + args.lda = *ldA; + args.b = (void *)b; + args.ldb = *ldB; + args.c = (void *)ipiv; + + info = 0; + if (args.ldb < MAX(1,args.m)) info = 7; + if (args.lda < MAX(1,args.m)) info = 4; + if (args.n < 0) info = 2; + if (args.m < 0) info = 1; + + if (info) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + *Info = - info; + return 0; + } + + args.alpha = NULL; + args.beta = NULL; + + *Info = 0; + + if (args.m == 0 || args.n == 0) return 0; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + +#ifndef PPC440 + buffer = (FLOAT *)blas_memory_alloc(1); + + sa = (FLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A); + sb = (FLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); +#endif + +#ifdef SMP + args.common = NULL; + args.nthreads = num_cpu_avail(4); + + if (args.nthreads == 1) { +#endif + + args.n = *N; + info = GETRF_SINGLE(&args, NULL, NULL, sa, sb, 0); + + if (info == 0){ + args.n = *NRHS; + GETRS_N_SINGLE(&args, NULL, NULL, sa, sb, 0); + } + +#ifdef SMP + } else { + + args.n = *N; + info = GETRF_PARALLEL(&args, NULL, NULL, sa, sb, 0); + + if (info == 0){ + args.n = *NRHS; + GETRS_N_PARALLEL(&args, NULL, NULL, sa, sb, 0); + } + } +#endif + +#ifndef PPC440 + blas_memory_free(buffer); +#endif + + *Info = info; + + FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, *N * *N, 2. / 3. * *N * *N * *N + *N * *N); + + IDEBUG_END; + + return 0; +} diff --git a/interface/getf2.c b/interface/getf2.c new file mode 100644 index 000000000..cae15953b --- /dev/null +++ b/interface/getf2.c @@ -0,0 +1,109 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "QGETF2" +#elif defined(DOUBLE) +#define ERROR_NAME "DGETF2" +#else +#define ERROR_NAME "SGETF2" +#endif + +int NAME(blasint *M, blasint *N, FLOAT *a, blasint *ldA, blasint *ipiv, blasint *Info){ + + blas_arg_t args; + + blasint info; + FLOAT *buffer; +#ifdef PPC440 + extern +#endif + FLOAT *sa, *sb; + + PRINT_DEBUG_NAME; + + args.m = *M; + args.n = *N; + args.a = (void *)a; + args.lda = *ldA; + args.c = (void *)ipiv; + + info = 0; + if (args.lda < MAX(1,args.m)) info = 4; + if (args.n < 0) info = 2; + if (args.m < 0) info = 1; + if (info) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + *Info = - info; + return 0; + } + + *Info = 0; + if (args.m == 0 || args.n == 0) return 0; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + +#ifndef PPC440 + buffer = (FLOAT *)blas_memory_alloc(1); + + sa = (FLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A); + sb = (FLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); +#endif + + info = GETF2(&args, NULL, NULL, sa, sb, 0); + + *Info = info; + +#ifndef PPC440 + blas_memory_free(buffer); +#endif + + FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, args.m * args.n, 2. / 3. * args.m * args.n * args.n); + + IDEBUG_END; + + return 0; +} diff --git a/interface/getrf.c b/interface/getrf.c new file mode 100644 index 000000000..aa799e8d3 --- /dev/null +++ b/interface/getrf.c @@ -0,0 +1,121 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "QGETRF" +#elif defined(DOUBLE) +#define ERROR_NAME "DGETRF" +#else +#define ERROR_NAME "SGETRF" +#endif + +int NAME(blasint *M, blasint *N, FLOAT *a, blasint *ldA, blasint *ipiv, blasint *Info){ + + blas_arg_t args; + + blasint info; + FLOAT *buffer; +#ifdef PPC440 + extern +#endif + FLOAT *sa, *sb; + + PRINT_DEBUG_NAME; + + args.m = *M; + args.n = *N; + args.a = (void *)a; + args.lda = *ldA; + args.c = (void *)ipiv; + + info = 0; + if (args.lda < MAX(1,args.m)) info = 4; + if (args.n < 0) info = 2; + if (args.m < 0) info = 1; + if (info) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + *Info = - info; + return 0; + } + + *Info = 0; + if (args.m == 0 || args.n == 0) return 0; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + +#ifndef PPC440 + buffer = (FLOAT *)blas_memory_alloc(1); + + sa = (FLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A); + sb = (FLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); +#endif + +#ifdef SMP + args.common = NULL; + args.nthreads = num_cpu_avail(4); + + if (args.nthreads == 1) { +#endif + + *Info = GETRF_SINGLE(&args, NULL, NULL, sa, sb, 0); + +#ifdef SMP + } else { + + *Info = GETRF_PARALLEL(&args, NULL, NULL, sa, sb, 0); + } +#endif + +#ifndef PPC440 + blas_memory_free(buffer); +#endif + + FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, args.m * args.n, 2. / 3. * args.m * args.n * args.n); + + IDEBUG_END; + + return 0; +} diff --git a/interface/getrs.c b/interface/getrs.c new file mode 100644 index 000000000..761a00160 --- /dev/null +++ b/interface/getrs.c @@ -0,0 +1,152 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "QGETRS" +#elif defined(DOUBLE) +#define ERROR_NAME "DGETRS" +#else +#define ERROR_NAME "SGETRS" +#endif + +static blasint (*getrs_single[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = { + GETRS_N_SINGLE, GETRS_T_SINGLE, +}; + +#ifdef SMP +static blasint (*getrs_parallel[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = { + GETRS_N_PARALLEL, GETRS_T_PARALLEL, +}; +#endif + +int NAME(char *TRANS, blasint *N, blasint *NRHS, FLOAT *a, blasint *ldA, + blasint *ipiv, FLOAT *b, blasint *ldB, blasint *Info){ + + char trans_arg = *TRANS; + + blas_arg_t args; + + blasint info; + int trans; + FLOAT *buffer; +#ifdef PPC440 + extern +#endif + FLOAT *sa, *sb; + + PRINT_DEBUG_NAME; + + args.m = *N; + args.n = *NRHS; + args.a = (void *)a; + args.lda = *ldA; + args.b = (void *)b; + args.ldb = *ldB; + args.c = (void *)ipiv; + + info = 0; + + TOUPPER(trans_arg); + trans = -1; + + if (trans_arg == 'N') trans = 0; + if (trans_arg == 'T') trans = 1; + if (trans_arg == 'R') trans = 0; + if (trans_arg == 'C') trans = 1; + + if (args.ldb < MAX(1, args.m)) info = 8; + if (args.lda < MAX(1, args.m)) info = 5; + if (args.n < 0) info = 3; + if (args.m < 0) info = 2; + if (trans < 0) info = 1; + + if (info != 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return 0; + } + + args.alpha = NULL; + args.beta = NULL; + + *Info = info; + + if (args.m == 0 || args.n == 0) return 0; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + +#ifndef PPC440 + buffer = (FLOAT *)blas_memory_alloc(1); + + sa = (FLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A); + sb = (FLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); +#endif + +#ifdef SMP + args.common = NULL; + args.nthreads = num_cpu_avail(4); + + if (args.nthreads == 1) { +#endif + + (getrs_single[trans])(&args, NULL, NULL, sa, sb, 0); + +#ifdef SMP + } else { + (getrs_parallel[trans])(&args, NULL, NULL, sa, sb, 0); + } +#endif + +#ifndef PPC440 + blas_memory_free(buffer); +#endif + + FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, args.m * args.n, 2 * args.m * args.m * args.n); + + IDEBUG_END; + + return 0; + +} diff --git a/interface/imax.c b/interface/imax.c new file mode 100644 index 000000000..37396c7f8 --- /dev/null +++ b/interface/imax.c @@ -0,0 +1,171 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#undef MAX_K + +#ifdef USE_ABS + +#ifndef USE_MIN + +/* ABS & MAX */ +#ifndef COMPLEX +#ifdef XDOUBLE +#define MAX_K IQAMAX_K +#elif defined(DOUBLE) +#define MAX_K IDAMAX_K +#else +#define MAX_K ISAMAX_K +#endif +#else +#ifdef XDOUBLE +#define MAX_K IXAMAX_K +#elif defined(DOUBLE) +#define MAX_K IZAMAX_K +#else +#define MAX_K ICAMAX_K +#endif +#endif + +#else + +/* ABS & MIN */ +#ifndef COMPLEX +#ifdef XDOUBLE +#define MAX_K IQAMIN_K +#elif defined(DOUBLE) +#define MAX_K IDAMIN_K +#else +#define MAX_K ISAMIN_K +#endif +#else +#ifdef XDOUBLE +#define MAX_K IXAMIN_K +#elif defined(DOUBLE) +#define MAX_K IZAMIN_K +#else +#define MAX_K ICAMIN_K +#endif +#endif + +#endif + +#else + +#ifndef USE_MIN + +/* MAX */ +#ifdef XDOUBLE +#define MAX_K IQMAX_K +#elif defined(DOUBLE) +#define MAX_K IDMAX_K +#else +#define MAX_K ISMAX_K +#endif + +#else + +/* MIN */ +#ifdef XDOUBLE +#define MAX_K IQMIN_K +#elif defined(DOUBLE) +#define MAX_K IDMIN_K +#else +#define MAX_K ISMIN_K +#endif + +#endif + +#endif + +#ifndef CBLAS + +blasint NAME(blasint *N, FLOAT *x, blasint *INCX){ + + BLASLONG n = *N; + BLASLONG incx = *INCX; + blasint ret; + + PRINT_DEBUG_NAME; + + if (n <= 0) return 0; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + ret = (blasint)MAX_K(n, x, incx); + + FUNCTION_PROFILE_END(COMPSIZE, n, 0); + + IDEBUG_END; + + return ret; +} + +#else + +CBLAS_INDEX CNAME(blasint n, FLOAT *x, blasint incx){ + + CBLAS_INDEX ret; + + PRINT_DEBUG_CNAME; + + if (n <= 0) return 0; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + ret = MAX_K(n, x, incx); + + if (ret) ret --; + + FUNCTION_PROFILE_END(COMPSIZE, n, 0); + + IDEBUG_END; + + return ret; +} + +#endif diff --git a/interface/larf.c b/interface/larf.c new file mode 100644 index 000000000..3b538c4a2 --- /dev/null +++ b/interface/larf.c @@ -0,0 +1,109 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +static int (*larf[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = { + LARF_L, LARF_R, +}; + +int NAME(char *SIDE, blasint *M, blasint *N, FLOAT *v, blasint *incV, FLOAT *tau, FLOAT *c, blasint *ldC, FLOAT *work){ + + blas_arg_t args; + + FLOAT *buffer; +#ifdef PPC440 + extern +#endif + FLOAT *sa, *sb; + + char side_arg = *SIDE; + int side; + + PRINT_DEBUG_NAME; + + TOUPPER(side_arg); + + args.m = *M; + args.n = *N; + args.a = (void *)v; + args.lda = *incV; + args.c = (void *)c; + args.ldc = *ldC; + + args.alpha = (void *)tau; + + side = -1; + if (side_arg == 'L') side = 0; + if (side_arg == 'R') side = 1; + + if (args.m == 0 || args.n == 0) return 0; + +#ifndef COMPLEX + if (*tau == ZERO) return 0; +#else + if ((*(tau + 0) == ZERO) && (*(tau + 1) == ZERO)) return 0; +#endif + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + +#ifndef PPC440 + buffer = (FLOAT *)blas_memory_alloc(1); + + sa = (FLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A); + sb = (FLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); +#endif + + larf[side](&args, NULL, NULL, sa, sb, 0); + +#ifndef PPC440 + blas_memory_free(buffer); +#endif + + FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, args.m * args.n, 2. / 3. * args.m * args.n * args.n); + + IDEBUG_END; + + return 0; +} diff --git a/interface/laswp.c b/interface/laswp.c new file mode 100644 index 000000000..026b5156f --- /dev/null +++ b/interface/laswp.c @@ -0,0 +1,110 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +static int (*laswp[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, blasint *, BLASLONG) = { +#ifdef XDOUBLE + qlaswp_plus, qlaswp_minus, +#elif defined(DOUBLE) + dlaswp_plus, dlaswp_minus, +#else + slaswp_plus, slaswp_minus, +#endif +}; + +int NAME(blasint *N, FLOAT *a, blasint *LDA, blasint *K1, blasint *K2, blasint *ipiv, blasint *INCX){ + + blasint n = *N; + blasint lda = *LDA; + blasint k1 = *K1; + blasint k2 = *K2; + blasint incx = *INCX; + int flag; + +#ifdef SMP + int mode, nthreads; + FLOAT dummyalpha[2] = {ZERO, ZERO}; +#endif + + PRINT_DEBUG_NAME; + + if (incx == 0 || n <= 0) return 0; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + flag = (incx < 0); + +#ifdef SMP + nthreads = num_cpu_avail(1); + + if (nthreads == 1) { +#endif + + (laswp[flag])(n, k1, k2, ZERO, a, lda, NULL, 0, ipiv, incx); + +#ifdef SMP + } else { + +#ifdef XDOUBLE + mode = BLAS_XDOUBLE | BLAS_REAL; +#elif defined(DOUBLE) + mode = BLAS_DOUBLE | BLAS_REAL; +#else + mode = BLAS_SINGLE | BLAS_REAL; +#endif + + blas_level1_thread(mode, n, k1, k2, dummyalpha, + a, lda, NULL, 0, ipiv, incx, + laswp[flag], nthreads); + } +#endif + + FUNCTION_PROFILE_END(COMPSIZE, n * (k2 - k1), 0); + + IDEBUG_END; + + return 0; + +} diff --git a/interface/lauu2.c b/interface/lauu2.c new file mode 100644 index 000000000..14417e986 --- /dev/null +++ b/interface/lauu2.c @@ -0,0 +1,128 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "QLAUU2" +#elif defined(DOUBLE) +#define ERROR_NAME "DLAUU2" +#else +#define ERROR_NAME "SLAUU2" +#endif + +static blasint (*lauu2[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = { +#ifdef XDOUBLE + qlauu2_U, qlauu2_L, +#elif defined(DOUBLE) + dlauu2_U, dlauu2_L, +#else + slauu2_U, slauu2_L, +#endif + }; + +int NAME(char *UPLO, blasint *N, FLOAT *a, blasint *ldA, blasint *Info){ + + blas_arg_t args; + + blasint uplo_arg = *UPLO; + blasint uplo; + blasint info; + FLOAT *buffer; +#ifdef PPC440 + extern +#endif + FLOAT *sa, *sb; + + PRINT_DEBUG_NAME; + + args.n = *N; + args.a = (void *)a; + args.lda = *ldA; + + TOUPPER(uplo_arg); + + uplo = -1; + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + + info = 0; + if (args.lda < MAX(1,args.n)) info = 4; + if (args.n < 0) info = 2; + if (uplo < 0) info = 1; + if (info) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + *Info = - info; + return 0; + } + + *Info = 0; + + if (args.n <= 0) return 0; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + +#ifndef PPC440 + buffer = (FLOAT *)blas_memory_alloc(1); + + sa = (FLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A); + sb = (FLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); +#endif + + info = (lauu2[uplo])(&args, NULL, NULL, sa, sb, 0); + + *Info = info; + +#ifndef PPC440 + blas_memory_free(buffer); +#endif + + FUNCTION_PROFILE_END(1, .5 * args.n * args.n, + args.n * (1./3. + args.n * ( 1./2. + args.n * 1./6.)) + + 1./6. * args.n * (args.n * args.n - 1)); + + IDEBUG_END; + + return 0; +} diff --git a/interface/lauum.c b/interface/lauum.c new file mode 100644 index 000000000..e5b593f30 --- /dev/null +++ b/interface/lauum.c @@ -0,0 +1,139 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "QLAUUM" +#elif defined(DOUBLE) +#define ERROR_NAME "DLAUUM" +#else +#define ERROR_NAME "SLAUUM" +#endif + +static blasint (*lauum_single[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = { + LAUUM_U_SINGLE, LAUUM_L_SINGLE, +}; + +#ifdef SMP +static blasint (*lauum_parallel[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = { + LAUUM_U_PARALLEL, LAUUM_L_PARALLEL, +}; +#endif + +int NAME(char *UPLO, blasint *N, FLOAT *a, blasint *ldA, blasint *Info){ + + blas_arg_t args; + + blasint uplo_arg = *UPLO; + blasint uplo; + blasint info; + FLOAT *buffer; +#ifdef PPC440 + extern +#endif + FLOAT *sa, *sb; + + PRINT_DEBUG_NAME; + + args.n = *N; + args.a = (void *)a; + args.lda = *ldA; + + TOUPPER(uplo_arg); + + uplo = -1; + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + + info = 0; + if (args.lda < MAX(1,args.n)) info = 4; + if (args.n < 0) info = 2; + if (uplo < 0) info = 1; + if (info) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + *Info = - info; + return 0; + } + + *Info = 0; + + if (args.n == 0) return 0; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + +#ifndef PPC440 + buffer = (FLOAT *)blas_memory_alloc(1); + + sa = (FLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A); + sb = (FLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); +#endif + +#ifdef SMP + args.common = NULL; + args.nthreads = num_cpu_avail(4); + + if (args.nthreads == 1) { +#endif + + *Info = (lauum_single[uplo])(&args, NULL, NULL, sa, sb, 0); + +#ifdef SMP + } else { + *Info = (lauum_parallel[uplo])(&args, NULL, NULL, sa, sb, 0); + } +#endif + +#ifndef PPC440 + blas_memory_free(buffer); +#endif + + FUNCTION_PROFILE_END(1, .5 * args.n * args.n, + args.n * (1./3. + args.n * ( 1./2. + args.n * 1./6.)) + + 1./6. * args.n * (args.n * args.n - 1)); + + IDEBUG_END; + + return 0; +} diff --git a/interface/max.c b/interface/max.c new file mode 100644 index 000000000..9bedaddd0 --- /dev/null +++ b/interface/max.c @@ -0,0 +1,169 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#undef MAX_K + +#ifdef USE_ABS + +#ifndef USE_MIN + +/* ABS & MAX */ +#ifndef COMPLEX +#ifdef XDOUBLE +#define MAX_K QAMAX_K +#elif defined(DOUBLE) +#define MAX_K DAMAX_K +#else +#define MAX_K SAMAX_K +#endif +#else +#ifdef XDOUBLE +#define MAX_K XAMAX_K +#elif defined(DOUBLE) +#define MAX_K ZAMAX_K +#else +#define MAX_K CAMAX_K +#endif +#endif + +#else + +/* ABS & MIN */ +#ifndef COMPLEX +#ifdef XDOUBLE +#define MAX_K QAMIN_K +#elif defined(DOUBLE) +#define MAX_K DAMIN_K +#else +#define MAX_K SAMIN_K +#endif +#else +#ifdef XDOUBLE +#define MAX_K XAMIN_K +#elif defined(DOUBLE) +#define MAX_K ZAMIN_K +#else +#define MAX_K CAMIN_K +#endif +#endif + +#endif + +#else + +#ifndef USE_MIN + +/* MAX */ +#ifdef XDOUBLE +#define MAX_K QMAX_K +#elif defined(DOUBLE) +#define MAX_K DMAX_K +#else +#define MAX_K SMAX_K +#endif + +#else + +/* MIN */ +#ifdef XDOUBLE +#define MAX_K QMIN_K +#elif defined(DOUBLE) +#define MAX_K DMIN_K +#else +#define MAX_K SMIN_K +#endif + +#endif + +#endif + +#ifndef CBLAS + +FLOATRET NAME(blasint *N, FLOAT *x, blasint *INCX){ + + BLASLONG n = *N; + BLASLONG incx = *INCX; + FLOATRET ret; + + PRINT_DEBUG_NAME; + + if (n <= 0) return 0; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + ret = (FLOATRET)MAX_K(n, x, incx); + + FUNCTION_PROFILE_END(COMPSIZE, n, 0); + + IDEBUG_END; + + return ret; +} + +#else + +FLOAT CNAME(blasint n, FLOAT *x, blasint incx){ + + FLOAT ret; + + PRINT_DEBUG_CNAME; + + if (n <= 0) return 0; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + ret = MAX_K(n, x, incx); + + FUNCTION_PROFILE_END(COMPSIZE, n, 0); + + IDEBUG_END; + + return ret; +} + +#endif diff --git a/interface/nrm2.c b/interface/nrm2.c new file mode 100644 index 000000000..ff8ef6d0d --- /dev/null +++ b/interface/nrm2.c @@ -0,0 +1,93 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifndef CBLAS + +FLOATRET NAME(blasint *N, FLOAT *x, blasint *INCX){ + + BLASLONG n = *N; + BLASLONG incx = *INCX; + FLOATRET ret; + + PRINT_DEBUG_NAME; + + if (n <= 0) return 0.; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + ret = (FLOATRET)NRM2_K(n, x, incx); + + FUNCTION_PROFILE_END(COMPSIZE, n, 2 * n); + + IDEBUG_END; + + return ret; +} + +#else + +FLOAT CNAME(blasint n, FLOAT *x, blasint incx){ + + FLOAT ret; + + PRINT_DEBUG_CNAME; + + if (n <= 0) return 0.; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + ret = NRM2_K(n, x, incx); + + FUNCTION_PROFILE_END(COMPSIZE, n, 2 * n); + + IDEBUG_END; + + return ret; +} + +#endif diff --git a/interface/potf2.c b/interface/potf2.c new file mode 100644 index 000000000..76822a49c --- /dev/null +++ b/interface/potf2.c @@ -0,0 +1,128 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "QPOTF2" +#elif defined(DOUBLE) +#define ERROR_NAME "DPOTF2" +#else +#define ERROR_NAME "SPOTF2" +#endif + +static blasint (*potf2[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = { +#ifdef XDOUBLE + qpotf2_U, qpotf2_L, +#elif defined(DOUBLE) + dpotf2_U, dpotf2_L, +#else + spotf2_U, spotf2_L, +#endif + }; + +int NAME(char *UPLO, blasint *N, FLOAT *a, blasint *ldA, blasint *Info){ + + blas_arg_t args; + + blasint uplo_arg = *UPLO; + blasint uplo; + blasint info; + FLOAT *buffer; +#ifdef PPC440 + extern +#endif + FLOAT *sa, *sb; + + PRINT_DEBUG_NAME; + + args.n = *N; + args.a = (void *)a; + args.lda = *ldA; + + TOUPPER(uplo_arg); + + uplo = -1; + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + + info = 0; + if (args.lda < MAX(1,args.n)) info = 4; + if (args.n < 0) info = 2; + if (uplo < 0) info = 1; + if (info) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + *Info = - info; + return 0; + } + + *Info = 0; + + if (args.n <= 0) return 0; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + +#ifndef PPC440 + buffer = (FLOAT *)blas_memory_alloc(1); + + sa = (FLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A); + sb = (FLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); +#endif + + info = (potf2[uplo])(&args, NULL, NULL, sa, sb, 0); + + *Info = info; + +#ifndef PPC440 + blas_memory_free(buffer); +#endif + + FUNCTION_PROFILE_END(1, .5 * args.n * args.n, + args.n * (1./3. + args.n * ( 1./2. + args.n * 1./6.)) + + 1./6. * args.n * (args.n * args.n - 1)); + + IDEBUG_END; + + return 0; +} diff --git a/interface/potrf.c b/interface/potrf.c new file mode 100644 index 000000000..9a15012d3 --- /dev/null +++ b/interface/potrf.c @@ -0,0 +1,139 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "QPOTRF" +#elif defined(DOUBLE) +#define ERROR_NAME "DPOTRF" +#else +#define ERROR_NAME "SPOTRF" +#endif + +static blasint (*potrf_single[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = { + POTRF_U_SINGLE, POTRF_L_SINGLE, +}; + +#ifdef SMP +static blasint (*potrf_parallel[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = { + POTRF_U_PARALLEL, POTRF_L_PARALLEL, +}; +#endif + +int NAME(char *UPLO, blasint *N, FLOAT *a, blasint *ldA, blasint *Info){ + + blas_arg_t args; + + blasint uplo_arg = *UPLO; + blasint uplo; + blasint info; + FLOAT *buffer; +#ifdef PPC440 + extern +#endif + FLOAT *sa, *sb; + + PRINT_DEBUG_NAME; + + args.n = *N; + args.a = (void *)a; + args.lda = *ldA; + + TOUPPER(uplo_arg); + + uplo = -1; + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + + info = 0; + if (args.lda < MAX(1,args.n)) info = 4; + if (args.n < 0) info = 2; + if (uplo < 0) info = 1; + if (info) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + *Info = - info; + return 0; + } + + *Info = 0; + + if (args.n == 0) return 0; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + +#ifndef PPC440 + buffer = (FLOAT *)blas_memory_alloc(1); + + sa = (FLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A); + sb = (FLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); +#endif + +#ifdef SMP + args.common = NULL; + args.nthreads = num_cpu_avail(4); + + if (args.nthreads == 1) { +#endif + + *Info = (potrf_single[uplo])(&args, NULL, NULL, sa, sb, 0); + +#ifdef SMP + } else { + *Info = (potrf_parallel[uplo])(&args, NULL, NULL, sa, sb, 0); + } +#endif + +#ifndef PPC440 + blas_memory_free(buffer); +#endif + + FUNCTION_PROFILE_END(1, .5 * args.n * args.n, + args.n * (1./3. + args.n * ( 1./2. + args.n * 1./6.)) + + 1./6. * args.n * (args.n * args.n - 1)); + + IDEBUG_END; + + return 0; +} diff --git a/interface/potri.c b/interface/potri.c new file mode 100644 index 000000000..a4f33221a --- /dev/null +++ b/interface/potri.c @@ -0,0 +1,160 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "QPOTRI" +#elif defined(DOUBLE) +#define ERROR_NAME "DPOTRI" +#else +#define ERROR_NAME "SPOTRI" +#endif + +static blasint (*trtri_single[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) ={ + TRTRI_UN_SINGLE, TRTRI_LN_SINGLE, +}; + +static blasint (*lauum_single[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) ={ + LAUUM_U_SINGLE, LAUUM_L_SINGLE, +}; + +#ifdef SMP +static blasint (*trtri_parallel[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) ={ + TRTRI_UN_PARALLEL, TRTRI_LN_PARALLEL, +}; + +static blasint (*lauum_parallel[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) ={ + LAUUM_U_PARALLEL, LAUUM_L_PARALLEL, +}; +#endif + +int NAME(char *UPLO, blasint *N, FLOAT *a, blasint *ldA, blasint *Info){ + + blas_arg_t args; + + blasint uplo_arg = *UPLO; + blasint uplo; + blasint info; + FLOAT *buffer; +#ifdef PPC440 + extern +#endif + FLOAT *sa, *sb; + + PRINT_DEBUG_NAME; + + args.n = *N; + args.a = (void *)a; + args.lda = *ldA; + + TOUPPER(uplo_arg); + + uplo = -1; + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + + info = 0; + if (args.lda < MAX(1,args.n)) info = 4; + if (args.n < 0) info = 2; + if (uplo < 0) info = 1; + + if (info) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + *Info = - info; + return 0; + } + + *Info = 0; + + if (args.n == 0) return 0; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + +#ifndef PPC440 + buffer = (FLOAT *)blas_memory_alloc(1); + + sa = (FLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A); + sb = (FLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); +#endif + +#ifdef SMP + args.common = NULL; + args.nthreads = num_cpu_avail(4); + + if (args.nthreads == 1) { +#endif + + info = (trtri_single[uplo])(&args, NULL, NULL, sa, sb, 0); + + if (!info) { + info = (lauum_single[uplo])(&args, NULL, NULL, sa, sb, 0); + } + + *Info = info; + +#ifdef SMP + } else { + info = (trtri_parallel[uplo])(&args, NULL, NULL, sa, sb, 0); + + if (!info) { + info = (lauum_parallel[uplo])(&args, NULL, NULL, sa, sb, 0); + } + + *Info = info; + } +#endif + +#ifndef PPC440 + blas_memory_free(buffer); +#endif + + FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, .5 * args.n * args.n, + args.n * (1./3. + args.n * ( 1./2. + args.n * 1./6.)) + + args.n * (1./3. + args.n * (-1./2. + args.n * 1./6.))); + + IDEBUG_END; + + return 0; +} diff --git a/interface/rot.c b/interface/rot.c new file mode 100644 index 000000000..2e458b12d --- /dev/null +++ b/interface/rot.c @@ -0,0 +1,82 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifndef CBLAS + +void NAME(blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY, FLOAT *C, FLOAT *S){ + + BLASLONG n = *N; + BLASLONG incx = *INCX; + BLASLONG incy = *INCY; + FLOAT c = *C; + FLOAT s = *S; + + PRINT_DEBUG_NAME; + +#else + +void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy, FLOAT c, FLOAT s){ + + PRINT_DEBUG_CNAME; + +#endif + + if (n <= 0) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0) x -= (n - 1) * incx; + if (incy < 0) y -= (n - 1) * incy; + + ROT_K(n, x, incx, y, incy, c, s); + + FUNCTION_PROFILE_END(1, n, n); + + IDEBUG_END; + + return; + +} diff --git a/interface/rotg.c b/interface/rotg.c new file mode 100644 index 000000000..49088ab02 --- /dev/null +++ b/interface/rotg.c @@ -0,0 +1,109 @@ +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifndef CBLAS + +void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){ + +#else + +void CNAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){ + +#endif + + +#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) + + long double da = *DA; + long double db = *DB; + long double c; + long double s; + long double r, roe, z; + + long double ada = fabs(da); + long double adb = fabs(db); + long double scale = ada + adb; + +#ifndef CBLAS + PRINT_DEBUG_NAME; +#else + PRINT_DEBUG_CNAME; +#endif + + roe = db; + if (ada > adb) roe = da; + + if (scale == ZERO) { + *C = ONE; + *S = ZERO; + *DA = ZERO; + *DB = ZERO; + } else { + r = sqrt(da * da + db * db); + if (roe < 0) r = -r; + c = da / r; + s = db / r; + z = ONE; + if (da != ZERO) { + if (ada > adb){ + z = s; + } else { + z = ONE / c; + } + } + + *C = c; + *S = s; + *DA = r; + *DB = z; + } + +#else + FLOAT da = *DA; + FLOAT db = *DB; + FLOAT c = *C; + FLOAT s = *S; + FLOAT r, roe, z; + + FLOAT ada = fabs(da); + FLOAT adb = fabs(db); + FLOAT scale = ada + adb; + +#ifndef CBLAS + PRINT_DEBUG_NAME; +#else + PRINT_DEBUG_CNAME; +#endif + + roe = db; + if (ada > adb) roe = da; + + if (scale == ZERO) { + *C = ONE; + *S = ZERO; + *DA = ZERO; + *DB = ZERO; + } else { + FLOAT aa = da / scale; + FLOAT bb = db / scale; + + r = scale * sqrt(aa * aa + bb * bb); + if (roe < 0) r = -r; + c = da / r; + s = db / r; + z = ONE; + if (ada > adb) z = s; + if ((ada < adb) && (c != ZERO)) z = ONE / c; + + *C = c; + *S = s; + *DA = r; + *DB = z; + } +#endif + + return; +} diff --git a/interface/rotm.c b/interface/rotm.c new file mode 100644 index 000000000..4f026c75d --- /dev/null +++ b/interface/rotm.c @@ -0,0 +1,155 @@ +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifndef CBLAS + +void NAME(blasint *N, FLOAT *dx, blasint *INCX, FLOAT *dy, blasint *INCY, FLOAT *dparam){ + + blasint n = *N; + blasint incx = *INCX; + blasint incy = *INCY; + +#else + +void CNAME(blasint n, FLOAT *dx, blasint incx, FLOAT *dy, blasint incy, FLOAT *dparam){ + +#endif + + blasint i__1, i__2; + + blasint i__; + FLOAT w, z__; + blasint kx, ky; + FLOAT dh11, dh12, dh22, dh21, dflag; + blasint nsteps; + +#ifndef CBLAS + PRINT_DEBUG_CNAME; +#else + PRINT_DEBUG_CNAME; +#endif + + --dparam; + --dy; + --dx; + + dflag = dparam[1]; + if (n <= 0 || dflag == - 2.0) goto L140; + + if (! (incx == incy && incx > 0)) goto L70; + + nsteps = n * incx; + if (dflag < 0.) { + goto L50; + } else if (dflag == 0) { + goto L10; + } else { + goto L30; + } +L10: + dh12 = dparam[4]; + dh21 = dparam[3]; + i__1 = nsteps; + i__2 = incx; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + w = dx[i__]; + z__ = dy[i__]; + dx[i__] = w + z__ * dh12; + dy[i__] = w * dh21 + z__; +/* L20: */ + } + goto L140; +L30: + dh11 = dparam[2]; + dh22 = dparam[5]; + i__2 = nsteps; + i__1 = incx; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { + w = dx[i__]; + z__ = dy[i__]; + dx[i__] = w * dh11 + z__; + dy[i__] = -w + dh22 * z__; +/* L40: */ + } + goto L140; +L50: + dh11 = dparam[2]; + dh12 = dparam[4]; + dh21 = dparam[3]; + dh22 = dparam[5]; + i__1 = nsteps; + i__2 = incx; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + w = dx[i__]; + z__ = dy[i__]; + dx[i__] = w * dh11 + z__ * dh12; + dy[i__] = w * dh21 + z__ * dh22; +/* L60: */ + } + goto L140; +L70: + kx = 1; + ky = 1; + if (incx < 0) { + kx = (1 - n) * incx + 1; + } + if (incy < 0) { + ky = (1 - n) * incy + 1; + } + + if (dflag < 0.) { + goto L120; + } else if (dflag == 0) { + goto L80; + } else { + goto L100; + } +L80: + dh12 = dparam[4]; + dh21 = dparam[3]; + i__2 = n; + for (i__ = 1; i__ <= i__2; ++i__) { + w = dx[kx]; + z__ = dy[ky]; + dx[kx] = w + z__ * dh12; + dy[ky] = w * dh21 + z__; + kx += incx; + ky += incy; +/* L90: */ + } + goto L140; +L100: + dh11 = dparam[2]; + dh22 = dparam[5]; + i__2 = n; + for (i__ = 1; i__ <= i__2; ++i__) { + w = dx[kx]; + z__ = dy[ky]; + dx[kx] = w * dh11 + z__; + dy[ky] = -w + dh22 * z__; + kx += incx; + ky += incy; +/* L110: */ + } + goto L140; +L120: + dh11 = dparam[2]; + dh12 = dparam[4]; + dh21 = dparam[3]; + dh22 = dparam[5]; + i__2 = n; + for (i__ = 1; i__ <= i__2; ++i__) { + w = dx[kx]; + z__ = dy[ky]; + dx[kx] = w * dh11 + z__ * dh12; + dy[ky] = w * dh21 + z__ * dh22; + kx += incx; + ky += incy; +/* L130: */ + } +L140: + return; +} + diff --git a/interface/rotmg.c b/interface/rotmg.c new file mode 100644 index 000000000..c37c09914 --- /dev/null +++ b/interface/rotmg.c @@ -0,0 +1,199 @@ +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#define GAM 4096.e0 +#define GAMSQ 16777216.e0 +#define RGAMSQ 5.9604645e-8 + +#ifndef CBLAS + +void NAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT *DY1, FLOAT *dparam){ + + FLOAT dy1 = *DY1; + +#else + +void CNAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT dy1, FLOAT *dparam){ + +#endif + + FLOAT du, dp1, dp2, dq2, dq1, dh11, dh21, dh12, dh22; + int igo, flag; + FLOAT dtemp; + +#ifndef CBLAS + PRINT_DEBUG_NAME; +#else + PRINT_DEBUG_CNAME; +#endif + + dh11 = ZERO; + dh12 = ZERO; + dh21 = ZERO; + dh22 = ZERO; + + if (*dd1 < ZERO) goto L60; + + dp2 = *dd2 * dy1; + + if (dp2 == ZERO) { + flag = -2; + goto L260; + } + + dp1 = *dd1 * *dx1; + dq2 = dp2 * dy1; + dq1 = dp1 * *dx1; + + if (! (abs(dq1) > abs(dq2))) goto L40; + + dh21 = -(dy1) / *dx1; + dh12 = dp2 / dp1; + + du = ONE - dh12 * dh21; + + if (du <= ZERO) goto L60; + + flag = 0; + *dd1 /= du; + *dd2 /= du; + *dx1 *= du; + + goto L100; + +L40: + if (dq2 < ZERO) goto L60; + + flag = 1; + dh11 = dp1 / dp2; + dh22 = *dx1 / dy1; + du = ONE + dh11 * dh22; + dtemp = *dd2 / du; + *dd2 = *dd1 / du; + *dd1 = dtemp; + *dx1 = dy1 * du; + goto L100; + +L60: + flag = -1; + dh11 = ZERO; + dh12 = ZERO; + dh21 = ZERO; + dh22 = ZERO; + + *dd1 = ZERO; + *dd2 = ZERO; + *dx1 = ZERO; + goto L220; + + +L70: + if (flag < 0) goto L90; + + if (flag > 0) goto L80; + + dh11 = ONE; + dh22 = ONE; + flag = -1; + goto L90; + +L80: + dh21 = -ONE; + dh12 = ONE; + flag = -1; + +L90: + switch (igo) { + case 0: goto L120; + case 1: goto L150; + case 2: goto L180; + case 3: goto L210; + } + +L100: + if (!(*dd1 <= RGAMSQ)) goto L130; + if (*dd1 == ZERO) goto L160; + igo = 0; + goto L70; + +L120: + *dd1 *= GAM * GAM; + *dx1 /= GAM; + dh11 /= GAM; + dh12 /= GAM; + goto L100; + +L130: + if (! (*dd1 >= GAMSQ)) { + goto L160; + } + igo = 1; + goto L70; + +L150: + *dd1 /= GAM * GAM; + *dx1 *= GAM; + dh11 *= GAM; + dh12 *= GAM; + goto L130; + +L160: + if (! (abs(*dd2) <= RGAMSQ)) { + goto L190; + } + if (*dd2 == ZERO) { + goto L220; + } + igo = 2; + goto L70; + +L180: +/* Computing 2nd power */ + *dd2 *= GAM * GAM; + dh21 /= GAM; + dh22 /= GAM; + goto L160; + +L190: + if (! (abs(*dd2) >= GAMSQ)) { + goto L220; + } + igo = 3; + goto L70; + +L210: +/* Computing 2nd power */ + *dd2 /= GAM * GAM; + dh21 *= GAM; + dh22 *= GAM; + goto L190; + +L220: + if (flag < 0) { + goto L250; + } else if (flag == 0) { + goto L230; + } else { + goto L240; + } +L230: + dparam[2] = dh21; + dparam[3] = dh12; + goto L260; +L240: + dparam[2] = dh11; + dparam[4] = dh22; + goto L260; +L250: + dparam[1] = dh11; + dparam[2] = dh21; + dparam[3] = dh12; + dparam[4] = dh22; +L260: + dparam[0] = (FLOAT) flag; + return; +} + + diff --git a/interface/sbmv.c b/interface/sbmv.c new file mode 100644 index 000000000..2ffe7f166 --- /dev/null +++ b/interface/sbmv.c @@ -0,0 +1,215 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "QSBMV " +#elif defined(DOUBLE) +#define ERROR_NAME "DSBMV " +#else +#define ERROR_NAME "SSBMV " +#endif + +static int (*sbmv[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, void *) = { +#ifdef XDOUBLE + qsbmv_U, qsbmv_L, +#elif defined(DOUBLE) + dsbmv_U, dsbmv_L, +#else + ssbmv_U, ssbmv_L, +#endif +}; + +#ifdef SMP +static int (*sbmv_thread[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = { +#ifdef XDOUBLE + qsbmv_thread_U, qsbmv_thread_L, +#elif defined(DOUBLE) + dsbmv_thread_U, dsbmv_thread_L, +#else + ssbmv_thread_U, ssbmv_thread_L, +#endif +}; +#endif + +#ifndef CBLAS + +void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint *LDA, + FLOAT *x, blasint *INCX, FLOAT *BETA, FLOAT *y, blasint *INCY){ + + char uplo_arg = *UPLO; + blasint n = *N; + blasint k = *K; + FLOAT alpha = *ALPHA; + blasint lda = *LDA; + blasint incx = *INCX; + FLOAT beta = *BETA; + blasint incy = *INCY; + + blasint info; + int uplo; + FLOAT *buffer; +#ifdef SMP + int nthreads; +#endif + + PRINT_DEBUG_NAME; + + TOUPPER(uplo_arg); + uplo = -1; + + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + + info = 0; + + if (incy == 0) info = 11; + if (incx == 0) info = 8; + if (lda < k + 1) info = 6; + if (k < 0) info = 3; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + + if (info != 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#else + +void CNAME(enum CBLAS_ORDER order, + enum CBLAS_UPLO Uplo, + blasint n, blasint k, + FLOAT alpha, + FLOAT *a, blasint lda, + FLOAT *x, blasint incx, + FLOAT beta, + FLOAT *y, blasint incy){ + + FLOAT *buffer; + int uplo; + blasint info; +#ifdef SMP + int nthreads; +#endif + + PRINT_DEBUG_CNAME; + + uplo = -1; + info = 0; + + if (order == CblasColMajor) { + if (Uplo == CblasUpper) uplo = 0; + if (Uplo == CblasLower) uplo = 1; + + info = -1; + + if (incy == 0) info = 11; + if (incx == 0) info = 8; + if (lda < k + 1) info = 6; + if (k < 0) info = 3; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (order == CblasRowMajor) { + if (Uplo == CblasUpper) uplo = 1; + if (Uplo == CblasLower) uplo = 0; + + info = -1; + + if (incy == 0) info = 11; + if (incx == 0) info = 8; + if (lda < k + 1) info = 6; + if (k < 0) info = 3; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (info >= 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#endif + + if (n == 0) return; + + if (beta != ONE) SCAL_K(n, 0, 0, beta, y, abs(incy), NULL, 0, NULL, 0); + + if (alpha == ZERO) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0 ) x -= (n - 1) * incx; + if (incy < 0 ) y -= (n - 1) * incy; + + buffer = (FLOAT *)blas_memory_alloc(1); + +#ifdef SMP + nthreads = num_cpu_avail(2); + + if (nthreads == 1) { +#endif + + (sbmv[uplo])(n, k, alpha, a, lda, x, incx, y, incy, buffer); + +#ifdef SMP + } else { + + (sbmv_thread[uplo])(n, k, alpha, a, lda, x, incx, y, incy, buffer, nthreads); + + } +#endif + + blas_memory_free(buffer); + + FUNCTION_PROFILE_END(1, n * k / 2 + n, n * k); + + IDEBUG_END; + + return; +} diff --git a/interface/scal.c b/interface/scal.c new file mode 100644 index 000000000..7b72ca01c --- /dev/null +++ b/interface/scal.c @@ -0,0 +1,112 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifndef CBLAS + +void NAME(blasint *N, FLOAT *ALPHA, FLOAT *x, blasint *INCX){ + + blasint n = *N; + blasint incx = *INCX; + FLOAT alpha = *ALPHA; + +#else + +void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx){ + +#endif + +#ifdef SMP + int mode, nthreads; +#endif + +#ifndef CBLAS + PRINT_DEBUG_NAME; +#else + PRINT_DEBUG_CNAME; +#endif + + if (incx <= 0 || n <= 0) return; + + if (alpha == ONE) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + +#ifdef SMP + nthreads = num_cpu_avail(1); + + if (nthreads == 1) { +#endif + + SCAL_K(n, 0, 0, alpha, x, incx, NULL, 0, NULL, 0); + +#ifdef SMP + } else { + +#ifdef DOUBLE + mode = BLAS_DOUBLE | BLAS_REAL; +#else + mode = BLAS_SINGLE | BLAS_REAL; +#endif + + blas_level1_thread(mode, n, 0, 0, +#ifndef CBLAS + ALPHA, +#else + &alpha, +#endif + x, incx, NULL, 0, NULL, 0, (void *)SCAL_K, nthreads); + + } +#endif + + FUNCTION_PROFILE_END(1, n, n); + + IDEBUG_END; + + return; + +} diff --git a/interface/sdsdot.c b/interface/sdsdot.c new file mode 100644 index 000000000..8540be660 --- /dev/null +++ b/interface/sdsdot.c @@ -0,0 +1,101 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifndef CBLAS + +FLOATRET NAME(blasint *N, FLOAT *a, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY){ + + BLASLONG n = *N; + BLASLONG incx = *INCX; + BLASLONG incy = *INCY; + FLOATRET ret; + + PRINT_DEBUG_NAME; + + if (n <= 0) return 0.; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0) x -= (n - 1) * incx; + if (incy < 0) y -= (n - 1) * incy; + + ret = (FLOATRET)(SDSDOT_K(n, x, incx, y, incy) + *a); + + FUNCTION_PROFILE_END(1, 2 * n, 2 * n); + + IDEBUG_END; + + return ret; + +} + +#else + +FLOAT CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *y, blasint incy){ + + FLOAT ret; + + PRINT_DEBUG_CNAME; + + if (n <= 0) return 0.; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0) x -= (n - 1) * incx; + if (incy < 0) y -= (n - 1) * incy; + + ret = SDSDOT_K(n, x, incx, y, incy) + alpha; + + FUNCTION_PROFILE_END(1, 2 * n, 2 * n); + + IDEBUG_END; + + return ret; +} + +#endif diff --git a/interface/spmv.c b/interface/spmv.c new file mode 100644 index 000000000..8d8902763 --- /dev/null +++ b/interface/spmv.c @@ -0,0 +1,207 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "QSPMV " +#elif defined(DOUBLE) +#define ERROR_NAME "DSPMV " +#else +#define ERROR_NAME "SSPMV " +#endif + +static int (*spmv[])(BLASLONG, FLOAT, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, void *) = { +#ifdef XDOUBLE + qspmv_U, qspmv_L, +#elif defined(DOUBLE) + dspmv_U, dspmv_L, +#else + sspmv_U, sspmv_L, +#endif +}; + +#ifdef SMP +static int (*spmv_thread[])(BLASLONG, FLOAT, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = { +#ifdef XDOUBLE + qspmv_thread_U, qspmv_thread_L, +#elif defined(DOUBLE) + dspmv_thread_U, dspmv_thread_L, +#else + sspmv_thread_U, sspmv_thread_L, +#endif +}; +#endif + +#ifndef CBLAS + +void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, FLOAT *a, + FLOAT *x, blasint *INCX, FLOAT *BETA, FLOAT *y, blasint *INCY){ + + char uplo_arg = *UPLO; + blasint n = *N; + FLOAT alpha = *ALPHA; + blasint incx = *INCX; + FLOAT beta = *BETA; + blasint incy = *INCY; + + blasint info; + int uplo; + FLOAT *buffer; +#ifdef SMP + int nthreads; +#endif + + PRINT_DEBUG_NAME; + + TOUPPER(uplo_arg); + uplo = -1; + + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + + info = 0; + + if (incy == 0) info = 9; + if (incx == 0) info = 6; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + + if (info != 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#else + +void CNAME(enum CBLAS_ORDER order, + enum CBLAS_UPLO Uplo, + blasint n, + FLOAT alpha, + FLOAT *a, + FLOAT *x, blasint incx, + FLOAT beta, + FLOAT *y, blasint incy){ + + FLOAT *buffer; + int uplo; + blasint info; +#ifdef SMP + int nthreads; +#endif + + PRINT_DEBUG_CNAME; + + uplo = -1; + info = 0; + + if (order == CblasColMajor) { + if (Uplo == CblasUpper) uplo = 0; + if (Uplo == CblasLower) uplo = 1; + + info = -1; + + if (incy == 0) info = 9; + if (incx == 0) info = 6; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (order == CblasRowMajor) { + if (Uplo == CblasUpper) uplo = 1; + if (Uplo == CblasLower) uplo = 0; + + info = -1; + + if (incy == 0) info = 9; + if (incx == 0) info = 6; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (info >= 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#endif + + if (n == 0) return; + + if (beta != ONE) SCAL_K(n, 0, 0, beta, y, abs(incy), NULL, 0, NULL, 0); + + if (alpha == ZERO) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0 ) x -= (n - 1) * incx; + if (incy < 0 ) y -= (n - 1) * incy; + + buffer = (FLOAT *)blas_memory_alloc(1); + +#ifdef SMP + nthreads = num_cpu_avail(2); + + if (nthreads == 1) { +#endif + + (spmv[uplo])(n, alpha, a, x, incx, y, incy, buffer); + +#ifdef SMP + } else { + + (spmv_thread[uplo])(n, alpha, a, x, incx, y, incy, buffer, nthreads); + + } +#endif + + blas_memory_free(buffer); + + FUNCTION_PROFILE_END(1, n * n / 2 + n, n * n); + + IDEBUG_END; + + return; +} diff --git a/interface/spr.c b/interface/spr.c new file mode 100644 index 000000000..aa2ff8f3f --- /dev/null +++ b/interface/spr.c @@ -0,0 +1,197 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "QSPR " +#elif defined(DOUBLE) +#define ERROR_NAME "DSPR " +#else +#define ERROR_NAME "SSPR " +#endif + +static int (*spr[])(BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, FLOAT *) = { +#ifdef XDOUBLE + qspr_U, qspr_L, +#elif defined(DOUBLE) + dspr_U, dspr_L, +#else + sspr_U, sspr_L, +#endif +}; + +#ifdef SMP +static int (*spr_thread[])(BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, FLOAT *, int) = { +#ifdef XDOUBLE + qspr_thread_U, qspr_thread_L, +#elif defined(DOUBLE) + dspr_thread_U, dspr_thread_L, +#else + sspr_thread_U, sspr_thread_L, +#endif +}; +#endif + +#ifndef CBLAS + +void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, + FLOAT *x, blasint *INCX, FLOAT *a){ + + char uplo_arg = *UPLO; + blasint n = *N; + FLOAT alpha = *ALPHA; + blasint incx = *INCX; + + blasint info; + int uplo; + FLOAT *buffer; +#ifdef SMP + int nthreads; +#endif + + PRINT_DEBUG_NAME; + + TOUPPER(uplo_arg); + uplo = -1; + + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + + info = 0; + + if (incx == 0) info = 5; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + + if (info != 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#else + +void CNAME(enum CBLAS_ORDER order, + enum CBLAS_UPLO Uplo, + blasint n, + FLOAT alpha, + FLOAT *x, blasint incx, + FLOAT *a) { + + FLOAT *buffer; + int uplo; + blasint info; +#ifdef SMP + int nthreads; +#endif + + PRINT_DEBUG_CNAME; + + uplo = -1; + info = 0; + + if (order == CblasColMajor) { + if (Uplo == CblasUpper) uplo = 0; + if (Uplo == CblasLower) uplo = 1; + + info = -1; + + if (incx == 0) info = 5; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (order == CblasRowMajor) { + if (Uplo == CblasUpper) uplo = 1; + if (Uplo == CblasLower) uplo = 0; + + info = -1; + + if (incx == 0) info = 5; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (info >= 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#endif + + if (n == 0) return; + + if (alpha == ZERO) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0 ) x -= (n - 1) * incx; + + buffer = (FLOAT *)blas_memory_alloc(1); + +#ifdef SMP + nthreads = num_cpu_avail(2); + + if (nthreads == 1) { +#endif + + (spr[uplo])(n, alpha, x, incx, a, buffer); + +#ifdef SMP + } else { + + (spr_thread[uplo])(n, alpha, x, incx, a, buffer, nthreads); + + } +#endif + + blas_memory_free(buffer); + + FUNCTION_PROFILE_END(1, n * n / 2 + n, n * n); + + IDEBUG_END; + + return; +} diff --git a/interface/spr2.c b/interface/spr2.c new file mode 100644 index 000000000..e556d3fa8 --- /dev/null +++ b/interface/spr2.c @@ -0,0 +1,203 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "QSPR2 " +#elif defined(DOUBLE) +#define ERROR_NAME "DSPR2 " +#else +#define ERROR_NAME "SSPR2 " +#endif + +static int (*spr2[])(BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, FLOAT *) = { +#ifdef XDOUBLE + qspr2_U, qspr2_L, +#elif defined(DOUBLE) + dspr2_U, dspr2_L, +#else + sspr2_U, sspr2_L, +#endif +}; + +#ifdef SMP +static int (*spr2_thread[])(BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, FLOAT *, int) = { +#ifdef XDOUBLE + qspr2_thread_U, qspr2_thread_L, +#elif defined(DOUBLE) + dspr2_thread_U, dspr2_thread_L, +#else + sspr2_thread_U, sspr2_thread_L, +#endif +}; +#endif + +#ifndef CBLAS + +void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, + FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY, FLOAT *a){ + + char uplo_arg = *UPLO; + blasint n = *N; + FLOAT alpha = *ALPHA; + blasint incx = *INCX; + blasint incy = *INCY; + + blasint info; + int uplo; + FLOAT *buffer; +#ifdef SMP + int nthreads; +#endif + + PRINT_DEBUG_NAME; + + TOUPPER(uplo_arg); + uplo = -1; + + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + + info = 0; + + if (incy == 0) info = 7; + if (incx == 0) info = 5; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + + if (info != 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#else + +void CNAME(enum CBLAS_ORDER order, + enum CBLAS_UPLO Uplo, + blasint n, + FLOAT alpha, + FLOAT *x, blasint incx, + FLOAT *y, blasint incy, + FLOAT *a) { + + FLOAT *buffer; + int uplo; + blasint info; +#ifdef SMP + int nthreads; +#endif + + PRINT_DEBUG_CNAME; + + uplo = -1; + info = 0; + + if (order == CblasColMajor) { + if (Uplo == CblasUpper) uplo = 0; + if (Uplo == CblasLower) uplo = 1; + + info = -1; + + if (incy == 0) info = 7; + if (incx == 0) info = 5; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (order == CblasRowMajor) { + if (Uplo == CblasUpper) uplo = 1; + if (Uplo == CblasLower) uplo = 0; + + info = -1; + + if (incy == 0) info = 7; + if (incx == 0) info = 5; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (info >= 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#endif + + if (n == 0) return; + + if (alpha == ZERO) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0 ) x -= (n - 1) * incx; + if (incy < 0 ) y -= (n - 1) * incy; + + buffer = (FLOAT *)blas_memory_alloc(1); + +#ifdef SMP + nthreads = num_cpu_avail(2); + + if (nthreads == 1) { +#endif + + (spr2[uplo])(n, alpha, x, incx, y, incy, a, buffer); + +#ifdef SMP + } else { + + (spr2_thread[uplo])(n, alpha, x, incx, y, incy, a, buffer, nthreads); + + } +#endif + + blas_memory_free(buffer); + + FUNCTION_PROFILE_END(1, n * n / 2 + 2 * n, 2 * n * n); + + IDEBUG_END; + + return; +} diff --git a/interface/swap.c b/interface/swap.c new file mode 100644 index 000000000..7676246f9 --- /dev/null +++ b/interface/swap.c @@ -0,0 +1,110 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifndef CBLAS + +void NAME(blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY){ + + blasint n = *N; + blasint incx = *INCX; + blasint incy = *INCY; + +#else + +void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){ + +#endif + +#ifdef SMP + int mode, nthreads; + FLOAT dummyalpha[2] = {ZERO, ZERO}; +#endif + +#ifndef CBLAS + PRINT_DEBUG_NAME; +#else + PRINT_DEBUG_CNAME; +#endif + + if (n <= 0) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0) x -= (n - 1) * incx; + if (incy < 0) y -= (n - 1) * incy; + +#ifdef SMP + nthreads = num_cpu_avail(1); + + if (nthreads == 1) { +#endif + + SWAP_K(n, 0, 0, ZERO, x, incx, y, incy, NULL, 0); + +#ifdef SMP + } else { + +#ifdef XDOUBLE + mode = BLAS_XDOUBLE | BLAS_REAL; +#elif defined(DOUBLE) + mode = BLAS_DOUBLE | BLAS_REAL; +#else + mode = BLAS_SINGLE | BLAS_REAL; +#endif + + blas_level1_thread(mode, n, 0, 0, dummyalpha, + x, incx, y, incy, NULL, 0, (void *)SWAP_K, nthreads); + } + +#endif + + FUNCTION_PROFILE_END(1, 2 * n, 0); + + IDEBUG_END; + + return; + +} diff --git a/interface/symm.c b/interface/symm.c new file mode 100644 index 000000000..a0d52c49d --- /dev/null +++ b/interface/symm.c @@ -0,0 +1,422 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifndef COMPLEX +#ifdef XDOUBLE +#define ERROR_NAME "QSYMM " +#elif defined(DOUBLE) +#define ERROR_NAME "DSYMM " +#else +#define ERROR_NAME "SSYMM " +#endif +#else +#ifndef GEMM3M +#ifndef HEMM +#ifdef XDOUBLE +#define ERROR_NAME "XSYMM " +#elif defined(DOUBLE) +#define ERROR_NAME "ZSYMM " +#else +#define ERROR_NAME "CSYMM " +#endif +#else +#ifdef XDOUBLE +#define ERROR_NAME "XHEMM " +#elif defined(DOUBLE) +#define ERROR_NAME "ZHEMM " +#else +#define ERROR_NAME "CHEMM " +#endif +#endif +#else +#ifndef HEMM +#ifdef XDOUBLE +#define ERROR_NAME "XSYMM3M " +#elif defined(DOUBLE) +#define ERROR_NAME "ZSYMM3M " +#else +#define ERROR_NAME "CSYMM3M " +#endif +#else +#ifdef XDOUBLE +#define ERROR_NAME "XHEMM3M " +#elif defined(DOUBLE) +#define ERROR_NAME "ZHEMM3M " +#else +#define ERROR_NAME "CHEMM3M " +#endif +#endif +#endif +#endif + +static int (*symm[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = { +#ifndef GEMM3M +#ifndef HEMM + SYMM_LU, SYMM_LL, SYMM_RU, SYMM_RL, +#if defined(SMP) && !defined(USE_SIMPLE_THREADED_LEVEL3) + SYMM_THREAD_LU, SYMM_THREAD_LL, SYMM_THREAD_RU, SYMM_THREAD_RL, +#endif +#else + HEMM_LU, HEMM_LL, HEMM_RU, HEMM_RL, +#if defined(SMP) && !defined(USE_SIMPLE_THREADED_LEVEL3) + HEMM_THREAD_LU, HEMM_THREAD_LL, HEMM_THREAD_RU, HEMM_THREAD_RL, +#endif +#endif +#else +#ifndef HEMM + SYMM3M_LU, SYMM3M_LL, SYMM3M_RU, SYMM3M_RL, +#if defined(SMP) && !defined(USE_SIMPLE_THREADED_LEVEL3) + SYMM3M_THREAD_LU, SYMM3M_THREAD_LL, SYMM3M_THREAD_RU, SYMM3M_THREAD_RL, +#endif +#else + HEMM3M_LU, HEMM3M_LL, HEMM3M_RU, HEMM3M_RL, +#if defined(SMP) && !defined(USE_SIMPLE_THREADED_LEVEL3) + HEMM3M_THREAD_LU, HEMM3M_THREAD_LL, HEMM3M_THREAD_RU, HEMM3M_THREAD_RL, +#endif +#endif +#endif +}; + +#ifndef CBLAS + +void NAME(char *SIDE, char *UPLO, + blasint *M, blasint *N, + FLOAT *alpha, FLOAT *a, blasint *ldA, + FLOAT *b, blasint *ldB, + FLOAT *beta, FLOAT *c, blasint *ldC){ + + char side_arg = *SIDE; + char uplo_arg = *UPLO; + + blas_arg_t args; + + FLOAT *buffer; + FLOAT *sa, *sb; + +#ifdef SMP +#ifdef XDOUBLE + int mode = BLAS_XDOUBLE | BLAS_REAL; +#elif defined(DOUBLE) + int mode = BLAS_DOUBLE | BLAS_REAL; +#else + int mode = BLAS_SINGLE | BLAS_REAL; +#endif +#endif + +#if defined(SMP) && !defined(NO_AFFINITY) + int nodes; +#endif + + blasint info; + int side; + int uplo; + + PRINT_DEBUG_NAME; + + args.alpha = (void *)alpha; + args.beta = (void *)beta; + + TOUPPER(side_arg); + TOUPPER(uplo_arg); + + side = -1; + uplo = -1; + + if (side_arg == 'L') side = 0; + if (side_arg == 'R') side = 1; + + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + + args.m = *M; + args.n = *N; + + args.c = (void *)c; + args.ldc = *ldC; + + info = 0; + + if (args.ldc < MAX(1, args.m)) info = 12; + + if (!side) { + args.a = (void *)a; + args.b = (void *)b; + + args.lda = *ldA; + args.ldb = *ldB; + + if (args.ldb < MAX(1, args.m)) info = 9; + if (args.lda < MAX(1, args.m)) info = 7; + + } else { + args.a = (void *)b; + args.b = (void *)a; + + args.lda = *ldB; + args.ldb = *ldA; + + if (args.lda < MAX(1, args.m)) info = 9; + if (args.ldb < MAX(1, args.n)) info = 7; + } + + if (args.n < 0) info = 4; + if (args.m < 0) info = 3; + if (uplo < 0) info = 2; + if (side < 0) info = 1; + + if (info != 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#else + +void CNAME(enum CBLAS_ORDER order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, + blasint m, blasint n, +#ifndef COMPLEX + FLOAT alpha, +#else + FLOAT *alpha, +#endif + FLOAT *a, blasint lda, + FLOAT *b, blasint ldb, +#ifndef COMPLEX + FLOAT beta, +#else + FLOAT *beta, +#endif + FLOAT *c, blasint ldc) { + + blas_arg_t args; + int side, uplo; + blasint info; + + FLOAT *buffer; + FLOAT *sa, *sb; + +#ifdef SMP +#ifdef XDOUBLE + int mode = BLAS_XDOUBLE | BLAS_REAL; +#elif defined(DOUBLE) + int mode = BLAS_DOUBLE | BLAS_REAL; +#else + int mode = BLAS_SINGLE | BLAS_REAL; +#endif +#endif + +#if defined(SMP) && !defined(NO_AFFINITY) + int nodes; +#endif + + PRINT_DEBUG_CNAME; + +#ifndef COMPLEX + args.alpha = (void *)α + args.beta = (void *)β +#else + args.alpha = (void *)alpha; + args.beta = (void *)beta; +#endif + + args.c = (void *)c; + args.ldc = ldc; + + side = -1; + uplo = -1; + info = 0; + + if (order == CblasColMajor) { + if (Side == CblasLeft) side = 0; + if (Side == CblasRight) side = 1; + + if (Uplo == CblasUpper) uplo = 0; + if (Uplo == CblasLower) uplo = 1; + + info = -1; + + args.m = m; + args.n = n; + + if (args.ldc < MAX(1, args.m)) info = 12; + + if (!side) { + args.a = (void *)a; + args.b = (void *)b; + + args.lda = lda; + args.ldb = ldb; + + if (args.ldb < MAX(1, args.m)) info = 9; + if (args.lda < MAX(1, args.m)) info = 7; + + } else { + args.a = (void *)b; + args.b = (void *)a; + + args.lda = ldb; + args.ldb = lda; + + if (args.lda < MAX(1, args.m)) info = 9; + if (args.ldb < MAX(1, args.n)) info = 7; + } + + if (args.n < 0) info = 4; + if (args.m < 0) info = 3; + if (uplo < 0) info = 2; + if (side < 0) info = 1; + } + + if (order == CblasRowMajor) { + if (Side == CblasLeft) side = 1; + if (Side == CblasRight) side = 0; + + if (Uplo == CblasUpper) uplo = 1; + if (Uplo == CblasLower) uplo = 0; + + info = -1; + + args.m = n; + args.n = m; + + if (args.ldc < MAX(1, args.m)) info = 12; + + if (!side) { + args.a = (void *)a; + args.b = (void *)b; + + args.lda = lda; + args.ldb = ldb; + + if (args.ldb < MAX(1, args.m)) info = 9; + if (args.lda < MAX(1, args.m)) info = 7; + + } else { + args.a = (void *)b; + args.b = (void *)a; + + args.lda = ldb; + args.ldb = lda; + + if (args.lda < MAX(1, args.m)) info = 9; + if (args.ldb < MAX(1, args.n)) info = 7; + } + + if (args.n < 0) info = 4; + if (args.m < 0) info = 3; + if (uplo < 0) info = 2; + if (side < 0) info = 1; + } + + if (info >= 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#endif + + if (args.m == 0 || args.n == 0) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + buffer = (FLOAT *)blas_memory_alloc(0); + + sa = (FLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A); + sb = (FLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); + +#ifdef SMP + args.common = NULL; + args.nthreads = num_cpu_avail(3); + + if (args.nthreads == 1) { +#endif + + (symm[(side << 1) | uplo ])(&args, NULL, NULL, sa, sb, 0); + +#ifdef SMP + + } else { + +#ifndef NO_AFFINITY + nodes = get_num_nodes(); + + if (nodes > 1) { + + args.nthreads /= nodes; + + gemm_thread_mn(mode, &args, NULL, NULL, + symm[4 | (side << 1) | uplo ], sa, sb, nodes); + + } else { +#endif + +#ifndef USE_SIMPLE_THREADED_LEVEL3 + + (symm[4 | (side << 1) | uplo ])(&args, NULL, NULL, sa, sb, 0); + +#else + + GEMM_THREAD(mode, &args, NULL, NULL, symm[(side << 1) | uplo ], sa, sb, args.nthreads); + +#endif + +#ifndef NO_AFFINITY + } +#endif + + } +#endif + + blas_memory_free(buffer); + + FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, + (!side)? args.m * (args.m / 2 + args.n) : args.n * (args.m + args.n / 2), + (!side)? 2 * args.m * args.m * args.n : 2 * args.m * args.n * args.n); + + IDEBUG_END; + + return; +} diff --git a/interface/symv.c b/interface/symv.c new file mode 100644 index 000000000..e8c24df66 --- /dev/null +++ b/interface/symv.c @@ -0,0 +1,205 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "QSYMV " +#elif defined(DOUBLE) +#define ERROR_NAME "DSYMV " +#else +#define ERROR_NAME "SSYMV " +#endif + +#ifndef CBLAS + +void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, FLOAT *a, blasint *LDA, + FLOAT *x, blasint *INCX, FLOAT *BETA, FLOAT *y, blasint *INCY){ + + char uplo_arg = *UPLO; + blasint n = *N; + FLOAT alpha = *ALPHA; + blasint lda = *LDA; + blasint incx = *INCX; + FLOAT beta = *BETA; + blasint incy = *INCY; + + int (*symv[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *) = { + SYMV_U, SYMV_L, + }; + +#ifdef SMP + int (*symv_thread[])(BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = { + SYMV_THREAD_U, SYMV_THREAD_L, + }; +#endif + + blasint info; + int uplo; + FLOAT *buffer; +#ifdef SMP + int nthreads; +#endif + + PRINT_DEBUG_NAME; + + TOUPPER(uplo_arg); + uplo = -1; + + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + + info = 0; + + if (incy == 0) info = 10; + if (incx == 0) info = 7; + if (lda < MAX(1, n)) info = 5; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + + if (info != 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#else + +void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT alpha, + FLOAT *a, blasint lda, FLOAT *x, blasint incx, FLOAT beta, FLOAT *y, blasint incy) { + + FLOAT *buffer; + int uplo; + blasint info; +#ifdef SMP + int nthreads; +#endif + + int (*symv[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *) = { + SYMV_U, SYMV_L, + }; + +#ifdef SMP + int (*symv_thread[])(BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = { + SYMV_THREAD_U, SYMV_THREAD_L, + }; +#endif + + PRINT_DEBUG_CNAME; + + uplo = -1; + info = 0; + + if (order == CblasColMajor) { + + if (Uplo == CblasUpper) uplo = 0; + if (Uplo == CblasLower) uplo = 1; + + info = -1; + + if (incy == 0) info = 10; + if (incx == 0) info = 7; + if (lda < MAX(1, n)) info = 5; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (order == CblasRowMajor) { + + if (Uplo == CblasUpper) uplo = 1; + if (Uplo == CblasLower) uplo = 0; + + info = -1; + + if (incy == 0) info = 10; + if (incx == 0) info = 7; + if (lda < MAX(1, n)) info = 5; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (info >= 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#endif + + if (n == 0) return; + + if (beta != ONE) SCAL_K(n, 0, 0, beta, y, abs(incy), NULL, 0, NULL, 0); + + if (alpha == ZERO) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0 ) x -= (n - 1) * incx; + if (incy < 0 ) y -= (n - 1) * incy; + + buffer = (FLOAT *)blas_memory_alloc(1); + +#ifdef SMP + nthreads = num_cpu_avail(2); + + if (nthreads == 1) { +#endif + + (symv[uplo])(n, n, alpha, a, lda, x, incx, y, incy, buffer); + +#ifdef SMP + } else { + + (symv_thread[uplo])(n, alpha, a, lda, x, incx, y, incy, buffer, nthreads); + + } +#endif + + blas_memory_free(buffer); + + FUNCTION_PROFILE_END(1, n * n / 2 + 2 * n, 2 * n * n); + + IDEBUG_END; + + return; +} diff --git a/interface/syr.c b/interface/syr.c new file mode 100644 index 000000000..2b2d3d1e2 --- /dev/null +++ b/interface/syr.c @@ -0,0 +1,200 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "QSYR " +#elif defined(DOUBLE) +#define ERROR_NAME "DSYR " +#else +#define ERROR_NAME "SSYR " +#endif + +static int (*syr[])(BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *) = { +#ifdef XDOUBLE + qsyr_U, qsyr_L, +#elif defined(DOUBLE) + dsyr_U, dsyr_L, +#else + ssyr_U, ssyr_L, +#endif +}; + +#ifdef SMP +static int (*syr_thread[])(BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = { +#ifdef XDOUBLE + qsyr_thread_U, qsyr_thread_L, +#elif defined(DOUBLE) + dsyr_thread_U, dsyr_thread_L, +#else + ssyr_thread_U, ssyr_thread_L, +#endif +}; +#endif + +#ifndef CBLAS + +void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, + FLOAT *x, blasint *INCX, FLOAT *a, blasint *LDA){ + + char uplo_arg = *UPLO; + blasint n = *N; + FLOAT alpha = *ALPHA; + blasint lda = *LDA; + blasint incx = *INCX; + + blasint info; + int uplo; + FLOAT *buffer; +#ifdef SMP + int nthreads; +#endif + + PRINT_DEBUG_NAME; + + TOUPPER(uplo_arg); + uplo = -1; + + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + + info = 0; + + if (lda < MAX(1, n)) info = 7; + if (incx == 0) info = 5; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + + if (info != 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#else + +void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *a, blasint lda) { + + FLOAT *buffer; + int trans, uplo; + blasint info; +#ifdef SMP + int nthreads; +#endif + + PRINT_DEBUG_CNAME; + + trans = -1; + uplo = -1; + info = 0; + + if (order == CblasColMajor) { + + if (Uplo == CblasUpper) uplo = 0; + if (Uplo == CblasLower) uplo = 1; + + info = -1; + + if (lda < MAX(1, n)) info = 7; + if (incx == 0) info = 5; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + + } + + if (order == CblasRowMajor) { + + if (Uplo == CblasUpper) uplo = 1; + if (Uplo == CblasLower) uplo = 0; + + info = -1; + + if (lda < MAX(1, n)) info = 7; + if (incx == 0) info = 5; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (info >= 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#endif + + if (n == 0) return; + + if (alpha == ZERO) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0 ) x -= (n - 1) * incx; + + buffer = (FLOAT *)blas_memory_alloc(1); + +#ifdef SMP + nthreads = num_cpu_avail(2); + + if (nthreads == 1) { +#endif + + (syr[uplo])(n, alpha, x, incx, a, lda, buffer); + +#ifdef SMP + } else { + + (syr_thread[uplo])(n, alpha, x, incx, a, lda, buffer, nthreads); + + } +#endif + + blas_memory_free(buffer); + + FUNCTION_PROFILE_END(1, n * n / 2 + n, n * n); + + IDEBUG_END; + + return; +} diff --git a/interface/syr2.c b/interface/syr2.c new file mode 100644 index 000000000..15dbae4bd --- /dev/null +++ b/interface/syr2.c @@ -0,0 +1,204 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "QSYR2 " +#elif defined(DOUBLE) +#define ERROR_NAME "DSYR2 " +#else +#define ERROR_NAME "SSYR2 " +#endif + +static int (*syr2[])(BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *) = { +#ifdef XDOUBLE + qsyr2_U, qsyr2_L, +#elif defined(DOUBLE) + dsyr2_U, dsyr2_L, +#else + ssyr2_U, ssyr2_L, +#endif +}; + +#ifdef SMP +static int (*syr2_thread[])(BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = { +#ifdef XDOUBLE + qsyr2_thread_U, qsyr2_thread_L, +#elif defined(DOUBLE) + dsyr2_thread_U, dsyr2_thread_L, +#else + ssyr2_thread_U, ssyr2_thread_L, +#endif +}; +#endif + +#ifndef CBLAS + +void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, + FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY, FLOAT *a, blasint *LDA){ + + char uplo_arg = *UPLO; + blasint n = *N; + FLOAT alpha = *ALPHA; + blasint lda = *LDA; + blasint incx = *INCX; + blasint incy = *INCY; + + blasint info; + int uplo; + FLOAT *buffer; +#ifdef SMP + int nthreads; +#endif + + PRINT_DEBUG_NAME; + + TOUPPER(uplo_arg); + uplo = -1; + + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + + info = 0; + + if (lda < MAX(1, n)) info = 9; + if (incy == 0) info = 7; + if (incx == 0) info = 5; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + + if (info != 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#else + +void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *y, blasint incy, FLOAT *a, blasint lda) { + + FLOAT *buffer; + int trans, uplo; + blasint info; +#ifdef SMP + int nthreads; +#endif + + PRINT_DEBUG_CNAME; + + trans = -1; + uplo = -1; + info = 0; + + if (order == CblasColMajor) { + + if (Uplo == CblasUpper) uplo = 0; + if (Uplo == CblasLower) uplo = 1; + + info = -1; + + if (lda < MAX(1, n)) info = 9; + if (incy == 0) info = 7; + if (incx == 0) info = 5; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (order == CblasRowMajor) { + + if (Uplo == CblasUpper) uplo = 1; + if (Uplo == CblasLower) uplo = 0; + + info = -1; + + if (lda < MAX(1, n)) info = 9; + if (incy == 0) info = 7; + if (incx == 0) info = 5; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (info >= 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#endif + + if (n == 0) return; + + if (alpha == ZERO) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0 ) x -= (n - 1) * incx; + if (incy < 0 ) y -= (n - 1) * incy; + + buffer = (FLOAT *)blas_memory_alloc(1); + +#ifdef SMP + nthreads = num_cpu_avail(2); + + if (nthreads == 1) { +#endif + + (syr2[uplo])(n, alpha, x, incx, y, incy, a, lda, buffer); + +#ifdef SMP + } else { + + (syr2_thread[uplo])(n, alpha, x, incx, y, incy, a, lda, buffer, nthreads); + + } +#endif + + blas_memory_free(buffer); + + FUNCTION_PROFILE_END(1, n * n / 2 + 2 * n, 2 * n * n); + + IDEBUG_END; + + return; +} diff --git a/interface/syr2k.c b/interface/syr2k.c new file mode 100644 index 000000000..70b840955 --- /dev/null +++ b/interface/syr2k.c @@ -0,0 +1,366 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifndef COMPLEX +#ifdef XDOUBLE +#define ERROR_NAME "QSYR2K" +#elif defined(DOUBLE) +#define ERROR_NAME "DSYR2K" +#else +#define ERROR_NAME "SSYR2K" +#endif +#else +#ifndef HEMM +#ifdef XDOUBLE +#define ERROR_NAME "XSYR2K" +#elif defined(DOUBLE) +#define ERROR_NAME "ZSYR2K" +#else +#define ERROR_NAME "CSYR2K" +#endif +#else +#ifdef XDOUBLE +#define ERROR_NAME "XHER2K" +#elif defined(DOUBLE) +#define ERROR_NAME "ZHER2K" +#else +#define ERROR_NAME "CHER2K" +#endif +#endif +#endif + +static int (*syr2k[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = { +#ifndef HEMM + SYR2K_UN, SYR2K_UC, SYR2K_LN, SYR2K_LC, +#else + HER2K_UN, HER2K_UC, HER2K_LN, HER2K_LC, +#endif +}; + +#ifndef CBLAS + +void NAME(char *UPLO, char *TRANS, + blasint *N, blasint *K, + FLOAT *alpha, FLOAT *a, blasint *ldA, + FLOAT *b, blasint *ldB, + FLOAT *beta, FLOAT *c, blasint *ldC){ + + char uplo_arg = *UPLO; + char trans_arg = *TRANS; + + blas_arg_t args; + + FLOAT *buffer; + FLOAT *sa, *sb; + +#ifdef SMP +#ifndef COMPLEX +#ifdef XDOUBLE + int mode = BLAS_XDOUBLE | BLAS_REAL; +#elif defined(DOUBLE) + int mode = BLAS_DOUBLE | BLAS_REAL; +#else + int mode = BLAS_SINGLE | BLAS_REAL; +#endif +#else +#ifdef XDOUBLE + int mode = BLAS_XDOUBLE | BLAS_COMPLEX; +#elif defined(DOUBLE) + int mode = BLAS_DOUBLE | BLAS_COMPLEX; +#else + int mode = BLAS_SINGLE | BLAS_COMPLEX; +#endif +#endif +#endif + + blasint info; + int uplo; + int trans; + int nrowa; + + PRINT_DEBUG_NAME; + + args.n = *N; + args.k = *K; + + args.a = (void *)a; + args.b = (void *)b; + args.c = (void *)c; + + args.lda = *ldA; + args.ldb = *ldB; + args.ldc = *ldC; + + args.alpha = (void *)alpha; + args.beta = (void *)beta; + + TOUPPER(uplo_arg); + TOUPPER(trans_arg); + + uplo = -1; + trans = -1; + + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + + if (trans_arg == 'N') trans = 0; + if (trans_arg == 'T') trans = 1; + if (trans_arg == 'R') trans = 0; + if (trans_arg == 'C') trans = 1; + + nrowa = args.n; + if (trans & 1) nrowa = args.k; + + info = 0; + + if (args.ldc < MAX(1,args.n)) info = 12; + if (args.ldb < MAX(1,nrowa)) info = 9; + if (args.lda < MAX(1,nrowa)) info = 7; + if (args.k < 0) info = 4; + if (args.n < 0) info = 3; + if (trans < 0) info = 2; + if (uplo < 0) info = 1; + + if (info != 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#else + +void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, + blasint n, blasint k, +#ifndef COMPLEX + FLOAT alpha, +#else + FLOAT *alpha, +#endif + FLOAT *a, blasint lda, + FLOAT *b, blasint ldb, +#if !defined(COMPLEX) || defined(HEMM) + FLOAT beta, +#else + FLOAT *beta, +#endif + FLOAT *c, blasint ldc) { + + blas_arg_t args; + int uplo, trans; + blasint info, nrowa; + + FLOAT *buffer; + FLOAT *sa, *sb; + +#ifdef HEMM + FLOAT CAlpha[2]; +#endif + +#ifdef SMP +#ifndef COMPLEX +#ifdef XDOUBLE + int mode = BLAS_XDOUBLE | BLAS_REAL; +#elif defined(DOUBLE) + int mode = BLAS_DOUBLE | BLAS_REAL; +#else + int mode = BLAS_SINGLE | BLAS_REAL; +#endif +#else +#ifdef XDOUBLE + int mode = BLAS_XDOUBLE | BLAS_COMPLEX; +#elif defined(DOUBLE) + int mode = BLAS_DOUBLE | BLAS_COMPLEX; +#else + int mode = BLAS_SINGLE | BLAS_COMPLEX; +#endif +#endif +#endif + + PRINT_DEBUG_CNAME; + + args.n = n; + args.k = k; + + args.a = (void *)a; + args.b = (void *)b; + args.c = (void *)c; + + args.lda = lda; + args.ldb = ldb; + args.ldc = ldc; + +#ifndef COMPLEX + args.alpha = (void *)α +#else + args.alpha = (void *)alpha; +#endif + +#if !defined(COMPLEX) || defined(HEMM) + args.beta = (void *)β +#else + args.beta = (void *)beta; +#endif + + trans = -1; + uplo = -1; + info = 0; + + if (order == CblasColMajor) { + if (Uplo == CblasUpper) uplo = 0; + if (Uplo == CblasLower) uplo = 1; + + if (Trans == CblasNoTrans) trans = 0; +#ifndef COMPLEX + if (Trans == CblasTrans) trans = 1; + if (Trans == CblasConjNoTrans) trans = 0; + if (Trans == CblasConjTrans) trans = 1; +#elif !defined(HEMM) + if (Trans == CblasTrans) trans = 1; +#else + if (Trans == CblasConjTrans) trans = 1; +#endif + + info = -1; + + nrowa = args.n; + if (trans & 1) nrowa = args.k; + + if (args.ldc < MAX(1,args.n)) info = 12; + if (args.ldb < MAX(1,nrowa)) info = 9; + if (args.lda < MAX(1,nrowa)) info = 7; + if (args.k < 0) info = 4; + if (args.n < 0) info = 3; + if (trans < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (order == CblasRowMajor) { + +#ifdef HEMM + CAlpha[0] = alpha[0]; + CAlpha[1] = -alpha[1]; + + args.alpha = (void *)CAlpha; +#endif + + if (Uplo == CblasUpper) uplo = 1; + if (Uplo == CblasLower) uplo = 0; + + if (Trans == CblasNoTrans) trans = 1; +#ifndef COMPLEX + if (Trans == CblasTrans) trans = 0; + if (Trans == CblasConjNoTrans) trans = 1; + if (Trans == CblasConjTrans) trans = 0; +#elif !defined(HEMM) + if (Trans == CblasTrans) trans = 0; +#else + if (Trans == CblasConjTrans) trans = 0; +#endif + + info = -1; + + nrowa = args.n; + if (trans & 1) nrowa = args.k; + + if (args.ldc < MAX(1,args.n)) info = 12; + if (args.ldb < MAX(1,nrowa)) info = 9; + if (args.lda < MAX(1,nrowa)) info = 7; + if (args.k < 0) info = 4; + if (args.n < 0) info = 3; + if (trans < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (info >= 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#endif + + if (args.n == 0) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + buffer = (FLOAT *)blas_memory_alloc(0); + + sa = (FLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A); + sb = (FLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); + +#ifdef SMP + if (!trans){ + mode |= (BLAS_TRANSA_N | BLAS_TRANSB_T); + } else { + mode |= (BLAS_TRANSA_T | BLAS_TRANSB_N); + } + + mode |= (uplo << BLAS_UPLO_SHIFT); + + args.common = NULL; + args.nthreads = num_cpu_avail(3); + + if (args.nthreads == 1) { +#endif + + (syr2k[(uplo << 1) | trans ])(&args, NULL, NULL, sa, sb, 0); + +#ifdef SMP + + } else { + + syrk_thread(mode, &args, NULL, NULL, syr2k[(uplo << 1) | trans ], sa, sb, args.nthreads); + + } +#endif + + blas_memory_free(buffer); + + FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, 2 * args.n * args.k + args.n * args.n, 2 * args.n * args.n * args.k); + + IDEBUG_END; + + return; +} diff --git a/interface/syrk.c b/interface/syrk.c new file mode 100644 index 000000000..a0cc64180 --- /dev/null +++ b/interface/syrk.c @@ -0,0 +1,355 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifndef COMPLEX +#ifdef XDOUBLE +#define ERROR_NAME "QSYRK " +#elif defined(DOUBLE) +#define ERROR_NAME "DSYRK " +#else +#define ERROR_NAME "SSYRK " +#endif +#else +#ifndef HEMM +#ifdef XDOUBLE +#define ERROR_NAME "XSYRK " +#elif defined(DOUBLE) +#define ERROR_NAME "ZSYRK " +#else +#define ERROR_NAME "CSYRK " +#endif +#else +#ifdef XDOUBLE +#define ERROR_NAME "XHERK " +#elif defined(DOUBLE) +#define ERROR_NAME "ZHERK " +#else +#define ERROR_NAME "CHERK " +#endif +#endif +#endif + +static int (*syrk[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = { +#ifndef HEMM + SYRK_UN, SYRK_UC, SYRK_LN, SYRK_LC, +#if defined(SMP) && !defined(USE_SIMPLE_THREADED_LEVEL3) + SYRK_THREAD_UN, SYRK_THREAD_UC, SYRK_THREAD_LN, SYRK_THREAD_LC, +#endif +#else + HERK_UN, HERK_UC, HERK_LN, HERK_LC, +#if defined(SMP) && !defined(USE_SIMPLE_THREADED_LEVEL3) + HERK_THREAD_UN, HERK_THREAD_UC, HERK_THREAD_LN, HERK_THREAD_LC, +#endif +#endif +}; + +#ifndef CBLAS + +void NAME(char *UPLO, char *TRANS, + blasint *N, blasint *K, + FLOAT *alpha, FLOAT *a, blasint *ldA, + FLOAT *beta, FLOAT *c, blasint *ldC){ + + char uplo_arg = *UPLO; + char trans_arg = *TRANS; + + blas_arg_t args; + + FLOAT *buffer; + FLOAT *sa, *sb; + +#ifdef SMP +#ifndef COMPLEX +#ifdef XDOUBLE + int mode = BLAS_XDOUBLE | BLAS_REAL; +#elif defined(DOUBLE) + int mode = BLAS_DOUBLE | BLAS_REAL; +#else + int mode = BLAS_SINGLE | BLAS_REAL; +#endif +#else +#ifdef XDOUBLE + int mode = BLAS_XDOUBLE | BLAS_COMPLEX; +#elif defined(DOUBLE) + int mode = BLAS_DOUBLE | BLAS_COMPLEX; +#else + int mode = BLAS_SINGLE | BLAS_COMPLEX; +#endif +#endif +#endif + + blasint info; + int uplo; + int trans; + int nrowa; + + PRINT_DEBUG_NAME; + + args.n = *N; + args.k = *K; + + args.a = (void *)a; + args.c = (void *)c; + + args.lda = *ldA; + args.ldc = *ldC; + + args.alpha = (void *)alpha; + args.beta = (void *)beta; + + TOUPPER(uplo_arg); + TOUPPER(trans_arg); + + uplo = -1; + trans = -1; + + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + + if (trans_arg == 'N') trans = 0; + if (trans_arg == 'T') trans = 1; + if (trans_arg == 'R') trans = 0; + if (trans_arg == 'C') trans = 1; + + nrowa = args.n; + if (trans & 1) nrowa = args.k; + + info = 0; + + if (args.ldc < MAX(1,args.n)) info = 10; + if (args.lda < MAX(1,nrowa)) info = 7; + if (args.k < 0) info = 4; + if (args.n < 0) info = 3; + if (trans < 0) info = 2; + if (uplo < 0) info = 1; + + if (info != 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#else + +void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, + blasint n, blasint k, +#if !defined(COMPLEX) || defined(HEMM) + FLOAT alpha, +#else + FLOAT *alpha, +#endif + FLOAT *a, blasint lda, +#if !defined(COMPLEX) || defined(HEMM) + FLOAT beta, +#else + FLOAT *beta, +#endif + FLOAT *c, blasint ldc) { + + blas_arg_t args; + int uplo, trans; + blasint info, nrowa; + + FLOAT *buffer; + FLOAT *sa, *sb; + +#ifdef SMP +#ifndef COMPLEX +#ifdef XDOUBLE + int mode = BLAS_XDOUBLE | BLAS_REAL; +#elif defined(DOUBLE) + int mode = BLAS_DOUBLE | BLAS_REAL; +#else + int mode = BLAS_SINGLE | BLAS_REAL; +#endif +#else +#ifdef XDOUBLE + int mode = BLAS_XDOUBLE | BLAS_COMPLEX; +#elif defined(DOUBLE) + int mode = BLAS_DOUBLE | BLAS_COMPLEX; +#else + int mode = BLAS_SINGLE | BLAS_COMPLEX; +#endif +#endif +#endif + + PRINT_DEBUG_CNAME; + + args.n = n; + args.k = k; + + args.a = (void *)a; + args.c = (void *)c; + + args.lda = lda; + args.ldc = ldc; + +#if !defined(COMPLEX) || defined(HEMM) + args.alpha = (void *)α + args.beta = (void *)β +#else + args.alpha = (void *)alpha; + args.beta = (void *)beta; +#endif + + trans = -1; + uplo = -1; + info = 0; + + if (order == CblasColMajor) { + if (Uplo == CblasUpper) uplo = 0; + if (Uplo == CblasLower) uplo = 1; + + if (Trans == CblasNoTrans) trans = 0; +#ifndef COMPLEX + if (Trans == CblasTrans) trans = 1; + if (Trans == CblasConjNoTrans) trans = 0; + if (Trans == CblasConjTrans) trans = 1; +#elif !defined(HEMM) + if (Trans == CblasTrans) trans = 1; +#else + if (Trans == CblasConjTrans) trans = 1; +#endif + + info = -1; + + nrowa = args.n; + if (trans & 1) nrowa = args.k; + + if (args.ldc < MAX(1,args.n)) info = 10; + if (args.lda < MAX(1,nrowa)) info = 7; + if (args.k < 0) info = 4; + if (args.n < 0) info = 3; + if (trans < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (order == CblasRowMajor) { + if (Uplo == CblasUpper) uplo = 1; + if (Uplo == CblasLower) uplo = 0; + + if (Trans == CblasNoTrans) trans = 1; +#ifndef COMPLEX + if (Trans == CblasTrans) trans = 0; + if (Trans == CblasConjNoTrans) trans = 1; + if (Trans == CblasConjTrans) trans = 0; +#elif !defined(HEMM) + if (Trans == CblasTrans) trans = 0; +#else + if (Trans == CblasConjTrans) trans = 0; +#endif + + info = -1; + + nrowa = args.n; + if (trans & 1) nrowa = args.k; + + if (args.ldc < MAX(1,args.n)) info = 10; + if (args.lda < MAX(1,nrowa)) info = 7; + if (args.k < 0) info = 4; + if (args.n < 0) info = 3; + if (trans < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (info >= 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#endif + + if (args.n == 0) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + buffer = (FLOAT *)blas_memory_alloc(0); + + sa = (FLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A); + sb = (FLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); + +#ifdef SMP + if (!trans){ + mode |= (BLAS_TRANSA_N | BLAS_TRANSB_T); + } else { + mode |= (BLAS_TRANSA_T | BLAS_TRANSB_N); + } + + mode |= (uplo << BLAS_UPLO_SHIFT); + + args.common = NULL; + args.nthreads = num_cpu_avail(3); + + if (args.nthreads == 1) { +#endif + + (syrk[(uplo << 1) | trans ])(&args, NULL, NULL, sa, sb, 0); + +#ifdef SMP + + } else { + +#ifndef USE_SIMPLE_THREADED_LEVEL3 + + (syrk[4 | (uplo << 1) | trans ])(&args, NULL, NULL, sa, sb, 0); + +#else + + syrk_thread(mode, &args, NULL, NULL, syrk[(uplo << 1) | trans ], sa, sb, args.nthreads); + +#endif + + } +#endif + + blas_memory_free(buffer); + + FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, args.n * args.k + args.n * args.n / 2, args.n * args.n * args.k); + + IDEBUG_END; + + return; +} diff --git a/interface/tbmv.c b/interface/tbmv.c new file mode 100644 index 000000000..cec2be465 --- /dev/null +++ b/interface/tbmv.c @@ -0,0 +1,248 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "QTBMV " +#elif defined(DOUBLE) +#define ERROR_NAME "DTBMV " +#else +#define ERROR_NAME "STBMV " +#endif + +static int (*tbmv[])(BLASLONG, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, void *) = { +#ifdef XDOUBLE + qtbmv_NUU, qtbmv_NUN, qtbmv_NLU, qtbmv_NLN, + qtbmv_TUU, qtbmv_TUN, qtbmv_TLU, qtbmv_TLN, +#elif defined(DOUBLE) + dtbmv_NUU, dtbmv_NUN, dtbmv_NLU, dtbmv_NLN, + dtbmv_TUU, dtbmv_TUN, dtbmv_TLU, dtbmv_TLN, +#else + stbmv_NUU, stbmv_NUN, stbmv_NLU, stbmv_NLN, + stbmv_TUU, stbmv_TUN, stbmv_TLU, stbmv_TLN, +#endif +}; + +#ifdef SMP +static int (*tbmv_thread[])(BLASLONG, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = { +#ifdef XDOUBLE + qtbmv_thread_NUU, qtbmv_thread_NUN, qtbmv_thread_NLU, qtbmv_thread_NLN, + qtbmv_thread_TUU, qtbmv_thread_TUN, qtbmv_thread_TLU, qtbmv_thread_TLN, +#elif defined(DOUBLE) + dtbmv_thread_NUU, dtbmv_thread_NUN, dtbmv_thread_NLU, dtbmv_thread_NLN, + dtbmv_thread_TUU, dtbmv_thread_TUN, dtbmv_thread_TLU, dtbmv_thread_TLN, +#else + stbmv_thread_NUU, stbmv_thread_NUN, stbmv_thread_NLU, stbmv_thread_NLN, + stbmv_thread_TUU, stbmv_thread_TUN, stbmv_thread_TLU, stbmv_thread_TLN, +#endif +}; +#endif + +#ifndef CBLAS + +void NAME(char *UPLO, char *TRANS, char *DIAG, + blasint *N, blasint *K, + FLOAT *a, blasint *LDA, FLOAT *x, blasint *INCX){ + + char uplo_arg = *UPLO; + char trans_arg = *TRANS; + char diag_arg = *DIAG; + + blasint n = *N; + blasint k = *K; + blasint lda = *LDA; + blasint incx = *INCX; + + blasint info; + int uplo; + int unit; + int trans; + FLOAT *buffer; +#ifdef SMP + int nthreads; +#endif + + PRINT_DEBUG_NAME; + + TOUPPER(uplo_arg); + TOUPPER(trans_arg); + TOUPPER(diag_arg); + + trans = -1; + unit = -1; + uplo = -1; + + if (trans_arg == 'N') trans = 0; + if (trans_arg == 'T') trans = 1; + if (trans_arg == 'R') trans = 0; + if (trans_arg == 'C') trans = 1; + + if (diag_arg == 'U') unit = 0; + if (diag_arg == 'N') unit = 1; + + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + + info = 0; + + if (incx == 0) info = 9; + if (lda < k + 1) info = 7; + if (k < 0) info = 5; + if (n < 0) info = 4; + if (unit < 0) info = 3; + if (trans < 0) info = 2; + if (uplo < 0) info = 1; + + if (info != 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#else + +void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + blasint n, blasint k, FLOAT *a, blasint lda, FLOAT *x, blasint incx) { + + int trans, uplo, unit; + blasint info; + FLOAT *buffer; +#ifdef SMP + int nthreads; +#endif + + PRINT_DEBUG_CNAME; + + unit = -1; + uplo = -1; + trans = -1; + info = 0; + + if (order == CblasColMajor) { + if (Uplo == CblasUpper) uplo = 0; + if (Uplo == CblasLower) uplo = 1; + + if (TransA == CblasNoTrans) trans = 0; + if (TransA == CblasTrans) trans = 1; + if (TransA == CblasConjNoTrans) trans = 0; + if (TransA == CblasConjTrans) trans = 1; + + if (Diag == CblasUnit) unit = 0; + if (Diag == CblasNonUnit) unit = 1; + + info = -1; + + if (incx == 0) info = 9; + if (lda < k + 1) info = 7; + if (k < 0) info = 5; + if (n < 0) info = 4; + if (unit < 0) info = 3; + if (trans < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (order == CblasRowMajor) { + if (Uplo == CblasUpper) uplo = 1; + if (Uplo == CblasLower) uplo = 0; + + if (TransA == CblasNoTrans) trans = 1; + if (TransA == CblasTrans) trans = 0; + if (TransA == CblasConjNoTrans) trans = 1; + if (TransA == CblasConjTrans) trans = 0; + + if (Diag == CblasUnit) unit = 0; + if (Diag == CblasNonUnit) unit = 1; + + info = -1; + + if (incx == 0) info = 9; + if (lda < k + 1) info = 7; + if (k < 0) info = 5; + if (n < 0) info = 4; + if (unit < 0) info = 3; + if (trans < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (info >= 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#endif + + if (n == 0) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0 ) x -= (n - 1) * incx; + + buffer = (FLOAT *)blas_memory_alloc(1); + +#ifdef SMP + nthreads = num_cpu_avail(2); + + if (nthreads == 1) { +#endif + + (tbmv[(trans<<2) | (uplo<<1) | unit])(n, k, a, lda, x, incx, buffer); + +#ifdef SMP + } else { + + (tbmv_thread[(trans<<2) | (uplo<<1) | unit])(n, k, a, lda, x, incx, buffer, nthreads); + + } +#endif + + blas_memory_free(buffer); + + FUNCTION_PROFILE_END(1, n * k / 2 + n, n * k); + + IDEBUG_END; + + return; +} diff --git a/interface/tbsv.c b/interface/tbsv.c new file mode 100644 index 000000000..a07c4c584 --- /dev/null +++ b/interface/tbsv.c @@ -0,0 +1,213 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "QTBSV " +#elif defined(DOUBLE) +#define ERROR_NAME "DTBSV " +#else +#define ERROR_NAME "STBSV " +#endif + +static int (*tbsv[])(BLASLONG, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, void *) = { +#ifdef XDOUBLE + qtbsv_NUU, qtbsv_NUN, qtbsv_NLU, qtbsv_NLN, + qtbsv_TUU, qtbsv_TUN, qtbsv_TLU, qtbsv_TLN, +#elif defined(DOUBLE) + dtbsv_NUU, dtbsv_NUN, dtbsv_NLU, dtbsv_NLN, + dtbsv_TUU, dtbsv_TUN, dtbsv_TLU, dtbsv_TLN, +#else + stbsv_NUU, stbsv_NUN, stbsv_NLU, stbsv_NLN, + stbsv_TUU, stbsv_TUN, stbsv_TLU, stbsv_TLN, +#endif +}; + +#ifndef CBLAS + +void NAME(char *UPLO, char *TRANS, char *DIAG, + blasint *N, blasint *K, + FLOAT *a, blasint *LDA, FLOAT *x, blasint *INCX){ + + char uplo_arg = *UPLO; + char trans_arg = *TRANS; + char diag_arg = *DIAG; + + blasint n = *N; + blasint k = *K; + blasint lda = *LDA; + blasint incx = *INCX; + + blasint info; + int uplo; + int unit; + int trans; + FLOAT *buffer; + + PRINT_DEBUG_NAME; + + TOUPPER(uplo_arg); + TOUPPER(trans_arg); + TOUPPER(diag_arg); + + trans = -1; + unit = -1; + uplo = -1; + + if (trans_arg == 'N') trans = 0; + if (trans_arg == 'T') trans = 1; + if (trans_arg == 'R') trans = 0; + if (trans_arg == 'C') trans = 1; + + if (diag_arg == 'U') unit = 0; + if (diag_arg == 'N') unit = 1; + + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + + info = 0; + + if (incx == 0) info = 9; + if (lda < k + 1) info = 7; + if (k < 0) info = 5; + if (n < 0) info = 4; + if (unit < 0) info = 3; + if (trans < 0) info = 2; + if (uplo < 0) info = 1; + + if (info != 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#else + +void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + blasint n, blasint k, FLOAT *a, blasint lda, FLOAT *x, blasint incx) { + + int trans, uplo, unit; + blasint info; + FLOAT *buffer; + + PRINT_DEBUG_CNAME; + + unit = -1; + uplo = -1; + trans = -1; + info = 0; + + if (order == CblasColMajor) { + if (Uplo == CblasUpper) uplo = 0; + if (Uplo == CblasLower) uplo = 1; + + if (TransA == CblasNoTrans) trans = 0; + if (TransA == CblasTrans) trans = 1; + if (TransA == CblasConjNoTrans) trans = 0; + if (TransA == CblasConjTrans) trans = 1; + + if (Diag == CblasUnit) unit = 0; + if (Diag == CblasNonUnit) unit = 1; + + info = -1; + + if (incx == 0) info = 9; + if (lda < k + 1) info = 7; + if (k < 0) info = 5; + if (n < 0) info = 4; + if (unit < 0) info = 3; + if (trans < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (order == CblasRowMajor) { + if (Uplo == CblasUpper) uplo = 1; + if (Uplo == CblasLower) uplo = 0; + + if (TransA == CblasNoTrans) trans = 1; + if (TransA == CblasTrans) trans = 0; + if (TransA == CblasConjNoTrans) trans = 1; + if (TransA == CblasConjTrans) trans = 0; + + if (Diag == CblasUnit) unit = 0; + if (Diag == CblasNonUnit) unit = 1; + + info = -1; + + if (incx == 0) info = 9; + if (lda < k + 1) info = 7; + if (k < 0) info = 5; + if (n < 0) info = 4; + if (unit < 0) info = 3; + if (trans < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (info >= 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#endif + + if (n == 0) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0 ) x -= (n - 1) * incx; + + buffer = (FLOAT *)blas_memory_alloc(1); + + (tbsv[(trans<<2) | (uplo<<1) | unit])(n, k, a, lda, x, incx, buffer); + + blas_memory_free(buffer); + + FUNCTION_PROFILE_END(1, n * k / 2 + n, n * k); + + IDEBUG_END; + + return; +} diff --git a/interface/tpmv.c b/interface/tpmv.c new file mode 100644 index 000000000..f0fc4f71c --- /dev/null +++ b/interface/tpmv.c @@ -0,0 +1,239 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "QTPMV " +#elif defined(DOUBLE) +#define ERROR_NAME "DTPMV " +#else +#define ERROR_NAME "STPMV " +#endif + +static int (*tpmv[])(BLASLONG, FLOAT *, FLOAT *, BLASLONG, void *) = { +#ifdef XDOUBLE + qtpmv_NUU, qtpmv_NUN, qtpmv_NLU, qtpmv_NLN, + qtpmv_TUU, qtpmv_TUN, qtpmv_TLU, qtpmv_TLN, +#elif defined(DOUBLE) + dtpmv_NUU, dtpmv_NUN, dtpmv_NLU, dtpmv_NLN, + dtpmv_TUU, dtpmv_TUN, dtpmv_TLU, dtpmv_TLN, +#else + stpmv_NUU, stpmv_NUN, stpmv_NLU, stpmv_NLN, + stpmv_TUU, stpmv_TUN, stpmv_TLU, stpmv_TLN, +#endif +}; + +#ifdef SMP +static int (*tpmv_thread[])(BLASLONG, FLOAT *, FLOAT *, BLASLONG, FLOAT *, int) = { +#ifdef XDOUBLE + qtpmv_thread_NUU, qtpmv_thread_NUN, qtpmv_thread_NLU, qtpmv_thread_NLN, + qtpmv_thread_TUU, qtpmv_thread_TUN, qtpmv_thread_TLU, qtpmv_thread_TLN, +#elif defined(DOUBLE) + dtpmv_thread_NUU, dtpmv_thread_NUN, dtpmv_thread_NLU, dtpmv_thread_NLN, + dtpmv_thread_TUU, dtpmv_thread_TUN, dtpmv_thread_TLU, dtpmv_thread_TLN, +#else + stpmv_thread_NUU, stpmv_thread_NUN, stpmv_thread_NLU, stpmv_thread_NLN, + stpmv_thread_TUU, stpmv_thread_TUN, stpmv_thread_TLU, stpmv_thread_TLN, +#endif +}; +#endif + +#ifndef CBLAS + +void NAME(char *UPLO, char *TRANS, char *DIAG, + blasint *N, FLOAT *a, FLOAT *x, blasint *INCX){ + + char uplo_arg = *UPLO; + char trans_arg = *TRANS; + char diag_arg = *DIAG; + + blasint n = *N; + blasint incx = *INCX; + + blasint info; + int uplo; + int unit; + int trans; + FLOAT *buffer; +#ifdef SMP + int nthreads; +#endif + + PRINT_DEBUG_NAME; + + TOUPPER(uplo_arg); + TOUPPER(trans_arg); + TOUPPER(diag_arg); + + trans = -1; + unit = -1; + uplo = -1; + + if (trans_arg == 'N') trans = 0; + if (trans_arg == 'T') trans = 1; + if (trans_arg == 'R') trans = 0; + if (trans_arg == 'C') trans = 1; + + if (diag_arg == 'U') unit = 0; + if (diag_arg == 'N') unit = 1; + + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + + info = 0; + + if (incx == 0) info = 7; + if (n < 0) info = 4; + if (unit < 0) info = 3; + if (trans < 0) info = 2; + if (uplo < 0) info = 1; + + if (info != 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#else + +void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + blasint n, FLOAT *a, FLOAT *x, blasint incx) { + + int trans, uplo, unit; + blasint info; + FLOAT *buffer; +#ifdef SMP + int nthreads; +#endif + + PRINT_DEBUG_CNAME; + + unit = -1; + uplo = -1; + trans = -1; + info = 0; + + if (order == CblasColMajor) { + if (Uplo == CblasUpper) uplo = 0; + if (Uplo == CblasLower) uplo = 1; + + if (TransA == CblasNoTrans) trans = 0; + if (TransA == CblasTrans) trans = 1; + if (TransA == CblasConjNoTrans) trans = 0; + if (TransA == CblasConjTrans) trans = 1; + + if (Diag == CblasUnit) unit = 0; + if (Diag == CblasNonUnit) unit = 1; + + info = -1; + + if (incx == 0) info = 7; + if (n < 0) info = 4; + if (unit < 0) info = 3; + if (trans < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (order == CblasRowMajor) { + if (Uplo == CblasUpper) uplo = 1; + if (Uplo == CblasLower) uplo = 0; + + if (TransA == CblasNoTrans) trans = 1; + if (TransA == CblasTrans) trans = 0; + if (TransA == CblasConjNoTrans) trans = 1; + if (TransA == CblasConjTrans) trans = 0; + + if (Diag == CblasUnit) unit = 0; + if (Diag == CblasNonUnit) unit = 1; + + info = -1; + + if (incx == 0) info = 7; + if (n < 0) info = 4; + if (unit < 0) info = 3; + if (trans < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (info >= 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#endif + + if (n == 0) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0 ) x -= (n - 1) * incx; + + buffer = (FLOAT *)blas_memory_alloc(1); + +#ifdef SMP + nthreads = num_cpu_avail(2); + + if (nthreads == 1) { +#endif + + (tpmv[(trans<<2) | (uplo<<1) | unit])(n, a, x, incx, buffer); + +#ifdef SMP + } else { + + (tpmv_thread[(trans<<2) | (uplo<<1) | unit])(n, a, x, incx, buffer, nthreads); + + } +#endif + + blas_memory_free(buffer); + + FUNCTION_PROFILE_END(1, n * n / 2 + n, n * n); + + IDEBUG_END; + + return; +} diff --git a/interface/tpsv.c b/interface/tpsv.c new file mode 100644 index 000000000..9dafd0b68 --- /dev/null +++ b/interface/tpsv.c @@ -0,0 +1,204 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "QTPSV " +#elif defined(DOUBLE) +#define ERROR_NAME "DTPSV " +#else +#define ERROR_NAME "STPSV " +#endif + +static int (*tpsv[])(BLASLONG, FLOAT *, FLOAT *, BLASLONG, void *) = { +#ifdef XDOUBLE + qtpsv_NUU, qtpsv_NUN, qtpsv_NLU, qtpsv_NLN, + qtpsv_TUU, qtpsv_TUN, qtpsv_TLU, qtpsv_TLN, +#elif defined(DOUBLE) + dtpsv_NUU, dtpsv_NUN, dtpsv_NLU, dtpsv_NLN, + dtpsv_TUU, dtpsv_TUN, dtpsv_TLU, dtpsv_TLN, +#else + stpsv_NUU, stpsv_NUN, stpsv_NLU, stpsv_NLN, + stpsv_TUU, stpsv_TUN, stpsv_TLU, stpsv_TLN, +#endif +}; + +#ifndef CBLAS + +void NAME(char *UPLO, char *TRANS, char *DIAG, + blasint *N, FLOAT *a, FLOAT *x, blasint *INCX){ + + char uplo_arg = *UPLO; + char trans_arg = *TRANS; + char diag_arg = *DIAG; + + blasint n = *N; + blasint incx = *INCX; + + blasint info; + int uplo; + int unit; + int trans; + FLOAT *buffer; + + PRINT_DEBUG_NAME; + + TOUPPER(uplo_arg); + TOUPPER(trans_arg); + TOUPPER(diag_arg); + + trans = -1; + unit = -1; + uplo = -1; + + if (trans_arg == 'N') trans = 0; + if (trans_arg == 'T') trans = 1; + if (trans_arg == 'R') trans = 0; + if (trans_arg == 'C') trans = 1; + + if (diag_arg == 'U') unit = 0; + if (diag_arg == 'N') unit = 1; + + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + + info = 0; + + if (incx == 0) info = 7; + if (n < 0) info = 4; + if (unit < 0) info = 3; + if (trans < 0) info = 2; + if (uplo < 0) info = 1; + + if (info != 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#else + +void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + blasint n, FLOAT *a, FLOAT *x, blasint incx) { + + int trans, uplo, unit; + blasint info; + FLOAT *buffer; + + PRINT_DEBUG_CNAME; + + unit = -1; + uplo = -1; + trans = -1; + info = 0; + + if (order == CblasColMajor) { + if (Uplo == CblasUpper) uplo = 0; + if (Uplo == CblasLower) uplo = 1; + + if (TransA == CblasNoTrans) trans = 0; + if (TransA == CblasTrans) trans = 1; + if (TransA == CblasConjNoTrans) trans = 0; + if (TransA == CblasConjTrans) trans = 1; + + if (Diag == CblasUnit) unit = 0; + if (Diag == CblasNonUnit) unit = 1; + + info = -1; + + if (incx == 0) info = 7; + if (n < 0) info = 4; + if (unit < 0) info = 3; + if (trans < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (order == CblasRowMajor) { + if (Uplo == CblasUpper) uplo = 1; + if (Uplo == CblasLower) uplo = 0; + + if (TransA == CblasNoTrans) trans = 1; + if (TransA == CblasTrans) trans = 0; + if (TransA == CblasConjNoTrans) trans = 1; + if (TransA == CblasConjTrans) trans = 0; + + if (Diag == CblasUnit) unit = 0; + if (Diag == CblasNonUnit) unit = 1; + + info = -1; + + if (incx == 0) info = 7; + if (n < 0) info = 4; + if (unit < 0) info = 3; + if (trans < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (info >= 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#endif + + if (n == 0) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0 ) x -= (n - 1) * incx; + + buffer = (FLOAT *)blas_memory_alloc(1); + + (tpsv[(trans<<2) | (uplo<<1) | unit])(n, a, x, incx, buffer); + + blas_memory_free(buffer); + + FUNCTION_PROFILE_END(1, n * n / 2 + n, n * n); + + IDEBUG_END; + + return; +} diff --git a/interface/trmv.c b/interface/trmv.c new file mode 100644 index 000000000..ed23cedc6 --- /dev/null +++ b/interface/trmv.c @@ -0,0 +1,243 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "QTRMV " +#elif defined(DOUBLE) +#define ERROR_NAME "DTRMV " +#else +#define ERROR_NAME "STRMV " +#endif + +static int (*trmv[])(BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *) = { +#ifdef XDOUBLE + qtrmv_NUU, qtrmv_NUN, qtrmv_NLU, qtrmv_NLN, + qtrmv_TUU, qtrmv_TUN, qtrmv_TLU, qtrmv_TLN, +#elif defined(DOUBLE) + dtrmv_NUU, dtrmv_NUN, dtrmv_NLU, dtrmv_NLN, + dtrmv_TUU, dtrmv_TUN, dtrmv_TLU, dtrmv_TLN, +#else + strmv_NUU, strmv_NUN, strmv_NLU, strmv_NLN, + strmv_TUU, strmv_TUN, strmv_TLU, strmv_TLN, +#endif +}; + +#ifdef SMP +static int (*trmv_thread[])(BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = { +#ifdef XDOUBLE + qtrmv_thread_NUU, qtrmv_thread_NUN, qtrmv_thread_NLU, qtrmv_thread_NLN, + qtrmv_thread_TUU, qtrmv_thread_TUN, qtrmv_thread_TLU, qtrmv_thread_TLN, +#elif defined(DOUBLE) + dtrmv_thread_NUU, dtrmv_thread_NUN, dtrmv_thread_NLU, dtrmv_thread_NLN, + dtrmv_thread_TUU, dtrmv_thread_TUN, dtrmv_thread_TLU, dtrmv_thread_TLN, +#else + strmv_thread_NUU, strmv_thread_NUN, strmv_thread_NLU, strmv_thread_NLN, + strmv_thread_TUU, strmv_thread_TUN, strmv_thread_TLU, strmv_thread_TLN, +#endif +}; +#endif + +#ifndef CBLAS + +void NAME(char *UPLO, char *TRANS, char *DIAG, + blasint *N, FLOAT *a, blasint *LDA, FLOAT *x, blasint *INCX){ + + char uplo_arg = *UPLO; + char trans_arg = *TRANS; + char diag_arg = *DIAG; + + blasint n = *N; + blasint lda = *LDA; + blasint incx = *INCX; + + blasint info; + int uplo; + int unit; + int trans; + FLOAT *buffer; +#ifdef SMP + int nthreads; +#endif + + PRINT_DEBUG_NAME; + + TOUPPER(uplo_arg); + TOUPPER(trans_arg); + TOUPPER(diag_arg); + + trans = -1; + unit = -1; + uplo = -1; + + if (trans_arg == 'N') trans = 0; + if (trans_arg == 'T') trans = 1; + if (trans_arg == 'R') trans = 0; + if (trans_arg == 'C') trans = 1; + + if (diag_arg == 'U') unit = 0; + if (diag_arg == 'N') unit = 1; + + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + + info = 0; + + if (incx == 0) info = 8; + if (lda < MAX(1, n)) info = 6; + if (n < 0) info = 4; + if (unit < 0) info = 3; + if (trans < 0) info = 2; + if (uplo < 0) info = 1; + + if (info != 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#else + +void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + blasint n, FLOAT *a, blasint lda, FLOAT *x, blasint incx) { + + int trans, uplo, unit; + blasint info; + FLOAT *buffer; +#ifdef SMP + int nthreads; +#endif + + PRINT_DEBUG_CNAME; + + unit = -1; + uplo = -1; + trans = -1; + info = 0; + + if (order == CblasColMajor) { + if (Uplo == CblasUpper) uplo = 0; + if (Uplo == CblasLower) uplo = 1; + + if (TransA == CblasNoTrans) trans = 0; + if (TransA == CblasTrans) trans = 1; + if (TransA == CblasConjNoTrans) trans = 0; + if (TransA == CblasConjTrans) trans = 1; + + if (Diag == CblasUnit) unit = 0; + if (Diag == CblasNonUnit) unit = 1; + + info = -1; + + if (incx == 0) info = 8; + if (lda < MAX(1, n)) info = 6; + if (n < 0) info = 4; + if (unit < 0) info = 3; + if (trans < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (order == CblasRowMajor) { + if (Uplo == CblasUpper) uplo = 1; + if (Uplo == CblasLower) uplo = 0; + + if (TransA == CblasNoTrans) trans = 1; + if (TransA == CblasTrans) trans = 0; + if (TransA == CblasConjNoTrans) trans = 1; + if (TransA == CblasConjTrans) trans = 0; + + if (Diag == CblasUnit) unit = 0; + if (Diag == CblasNonUnit) unit = 1; + + info = -1; + + if (incx == 0) info = 8; + if (lda < MAX(1, n)) info = 6; + if (n < 0) info = 4; + if (unit < 0) info = 3; + if (trans < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (info >= 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#endif + + if (n == 0) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0 ) x -= (n - 1) * incx; + + buffer = (FLOAT *)blas_memory_alloc(1); + +#ifdef SMP + nthreads = num_cpu_avail(2); + + if (nthreads == 1) { +#endif + + (trmv[(trans<<2) | (uplo<<1) | unit])(n, a, lda, x, incx, buffer); + +#ifdef SMP + } else { + + (trmv_thread[(trans<<2) | (uplo<<1) | unit])(n, a, lda, x, incx, buffer, nthreads); + + } +#endif + + blas_memory_free(buffer); + + FUNCTION_PROFILE_END(1, n * n / 2 + n, n * n); + + IDEBUG_END; + + return; +} diff --git a/interface/trsm.c b/interface/trsm.c new file mode 100644 index 000000000..5836ce2f0 --- /dev/null +++ b/interface/trsm.c @@ -0,0 +1,391 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifndef TRMM +#ifndef COMPLEX +#ifdef XDOUBLE +#define ERROR_NAME "QTRSM " +#elif defined(DOUBLE) +#define ERROR_NAME "DTRSM " +#else +#define ERROR_NAME "STRSM " +#endif +#else +#ifdef XDOUBLE +#define ERROR_NAME "XTRSM " +#elif defined(DOUBLE) +#define ERROR_NAME "ZTRSM " +#else +#define ERROR_NAME "CTRSM " +#endif +#endif +#else +#ifndef COMPLEX +#ifdef XDOUBLE +#define ERROR_NAME "QTRMM " +#elif defined(DOUBLE) +#define ERROR_NAME "DTRMM " +#else +#define ERROR_NAME "STRMM " +#endif +#else +#ifdef XDOUBLE +#define ERROR_NAME "XTRMM " +#elif defined(DOUBLE) +#define ERROR_NAME "ZTRMM " +#else +#define ERROR_NAME "CTRMM " +#endif +#endif +#endif + +static int (*trsm[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = { +#ifndef TRMM + TRSM_LNUU, TRSM_LNUN, TRSM_LNLU, TRSM_LNLN, + TRSM_LTUU, TRSM_LTUN, TRSM_LTLU, TRSM_LTLN, + TRSM_LRUU, TRSM_LRUN, TRSM_LRLU, TRSM_LRLN, + TRSM_LCUU, TRSM_LCUN, TRSM_LCLU, TRSM_LCLN, + TRSM_RNUU, TRSM_RNUN, TRSM_RNLU, TRSM_RNLN, + TRSM_RTUU, TRSM_RTUN, TRSM_RTLU, TRSM_RTLN, + TRSM_RRUU, TRSM_RRUN, TRSM_RRLU, TRSM_RRLN, + TRSM_RCUU, TRSM_RCUN, TRSM_RCLU, TRSM_RCLN, +#else + TRMM_LNUU, TRMM_LNUN, TRMM_LNLU, TRMM_LNLN, + TRMM_LTUU, TRMM_LTUN, TRMM_LTLU, TRMM_LTLN, + TRMM_LRUU, TRMM_LRUN, TRMM_LRLU, TRMM_LRLN, + TRMM_LCUU, TRMM_LCUN, TRMM_LCLU, TRMM_LCLN, + TRMM_RNUU, TRMM_RNUN, TRMM_RNLU, TRMM_RNLN, + TRMM_RTUU, TRMM_RTUN, TRMM_RTLU, TRMM_RTLN, + TRMM_RRUU, TRMM_RRUN, TRMM_RRLU, TRMM_RRLN, + TRMM_RCUU, TRMM_RCUN, TRMM_RCLU, TRMM_RCLN, +#endif +}; + +#ifndef CBLAS + +void NAME(char *SIDE, char *UPLO, char *TRANS, char *DIAG, + blasint *M, blasint *N, FLOAT *alpha, + FLOAT *a, blasint *ldA, FLOAT *b, blasint *ldB){ + + char side_arg = *SIDE; + char uplo_arg = *UPLO; + char trans_arg = *TRANS; + char diag_arg = *DIAG; + + blas_arg_t args; + + FLOAT *buffer; + FLOAT *sa, *sb; + +#ifdef SMP +#ifndef COMPLEX +#ifdef XDOUBLE + int mode = BLAS_XDOUBLE | BLAS_REAL; +#elif defined(DOUBLE) + int mode = BLAS_DOUBLE | BLAS_REAL; +#else + int mode = BLAS_SINGLE | BLAS_REAL; +#endif +#else +#ifdef XDOUBLE + int mode = BLAS_XDOUBLE | BLAS_COMPLEX; +#elif defined(DOUBLE) + int mode = BLAS_DOUBLE | BLAS_COMPLEX; +#else + int mode = BLAS_SINGLE | BLAS_COMPLEX; +#endif +#endif +#endif + + blasint info; + int side; + int uplo; + int unit; + int trans; + int nrowa; + + PRINT_DEBUG_NAME; + + args.m = *M; + args.n = *N; + + args.a = (void *)a; + args.b = (void *)b; + + args.lda = *ldA; + args.ldb = *ldB; + + args.beta = (void *)alpha; + + TOUPPER(side_arg); + TOUPPER(uplo_arg); + TOUPPER(trans_arg); + TOUPPER(diag_arg); + + side = -1; + trans = -1; + unit = -1; + uplo = -1; + + if (side_arg == 'L') side = 0; + if (side_arg == 'R') side = 1; + + if (trans_arg == 'N') trans = 0; + if (trans_arg == 'T') trans = 1; + if (trans_arg == 'R') trans = 2; + if (trans_arg == 'C') trans = 3; + + if (diag_arg == 'U') unit = 0; + if (diag_arg == 'N') unit = 1; + + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + + nrowa = args.m; + if (side & 1) nrowa = args.n; + + info = 0; + + if (args.ldb < MAX(1,args.m)) info = 11; + if (args.lda < MAX(1,nrowa)) info = 9; + if (args.n < 0) info = 6; + if (args.m < 0) info = 5; + if (unit < 0) info = 4; + if (trans < 0) info = 3; + if (uplo < 0) info = 2; + if (side < 0) info = 1; + + if (info != 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#else + +void CNAME(enum CBLAS_ORDER order, + enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE Trans, enum CBLAS_DIAG Diag, + blasint m, blasint n, +#ifndef COMPLEX + FLOAT alpha, +#else + FLOAT *alpha, +#endif + FLOAT *a, blasint lda, + FLOAT *b, blasint ldb) { + + blas_arg_t args; + int side, uplo, trans, unit; + blasint info, nrowa; + + XFLOAT *buffer; + XFLOAT *sa, *sb; + +#ifdef SMP +#ifndef COMPLEX +#ifdef XDOUBLE + int mode = BLAS_XDOUBLE | BLAS_REAL; +#elif defined(DOUBLE) + int mode = BLAS_DOUBLE | BLAS_REAL; +#else + int mode = BLAS_SINGLE | BLAS_REAL; +#endif +#else +#ifdef XDOUBLE + int mode = BLAS_XDOUBLE | BLAS_COMPLEX; +#elif defined(DOUBLE) + int mode = BLAS_DOUBLE | BLAS_COMPLEX; +#else + int mode = BLAS_SINGLE | BLAS_COMPLEX; +#endif +#endif +#endif + + PRINT_DEBUG_CNAME; + + args.a = (void *)a; + args.b = (void *)b; + + args.lda = lda; + args.ldb = ldb; + +#ifndef COMPLEX + args.beta = (void *)α +#else + args.beta = (void *)alpha; +#endif + + side = -1; + uplo = -1; + trans = -1; + unit = -1; + info = 0; + + if (order == CblasColMajor) { + args.m = m; + args.n = n; + + if (Side == CblasLeft) side = 0; + if (Side == CblasRight) side = 1; + + if (Uplo == CblasUpper) uplo = 0; + if (Uplo == CblasLower) uplo = 1; + + if (Trans == CblasNoTrans) trans = 0; + if (Trans == CblasTrans) trans = 1; +#ifndef COMPLEX + if (Trans == CblasConjNoTrans) trans = 0; + if (Trans == CblasConjTrans) trans = 1; +#else + if (Trans == CblasConjNoTrans) trans = 2; + if (Trans == CblasConjTrans) trans = 3; +#endif + + if (Diag == CblasUnit) unit = 0; + if (Diag == CblasNonUnit) unit = 1; + + info = -1; + + nrowa = args.m; + if (side & 1) nrowa = args.n; + + if (args.ldb < MAX(1,args.m)) info = 11; + if (args.lda < MAX(1,nrowa)) info = 9; + if (args.n < 0) info = 6; + if (args.m < 0) info = 5; + if (unit < 0) info = 4; + if (trans < 0) info = 3; + if (uplo < 0) info = 2; + if (side < 0) info = 1; + } + + if (order == CblasRowMajor) { + args.m = n; + args.n = m; + + if (Side == CblasLeft) side = 1; + if (Side == CblasRight) side = 0; + + if (Uplo == CblasUpper) uplo = 1; + if (Uplo == CblasLower) uplo = 0; + + if (Trans == CblasNoTrans) trans = 0; + if (Trans == CblasTrans) trans = 1; +#ifndef COMPLEX + if (Trans == CblasConjNoTrans) trans = 0; + if (Trans == CblasConjTrans) trans = 1; +#else + if (Trans == CblasConjNoTrans) trans = 2; + if (Trans == CblasConjTrans) trans = 3; +#endif + + if (Diag == CblasUnit) unit = 0; + if (Diag == CblasNonUnit) unit = 1; + + info = -1; + + nrowa = args.m; + if (side & 1) nrowa = args.n; + + if (args.ldb < MAX(1,args.m)) info = 11; + if (args.lda < MAX(1,nrowa)) info = 9; + if (args.n < 0) info = 6; + if (args.m < 0) info = 5; + if (unit < 0) info = 4; + if (trans < 0) info = 3; + if (uplo < 0) info = 2; + if (side < 0) info = 1; + } + + if (info >= 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#endif + + if ((args.m == 0) || (args.n == 0)) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + buffer = (FLOAT *)blas_memory_alloc(0); + + sa = (FLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A); + sb = (FLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); + +#ifdef SMP + mode |= (trans << BLAS_TRANSA_SHIFT); + mode |= (side << BLAS_RSIDE_SHIFT); + + args.nthreads = num_cpu_avail(3); + + if (args.nthreads == 1) { +#endif + + (trsm[(side<<4) | (trans<<2) | (uplo<<1) | unit])(&args, NULL, NULL, sa, sb, 0); + +#ifdef SMP + } else { + if (!side) { + gemm_thread_n(mode, &args, NULL, NULL, trsm[(side<<4) | (trans<<2) | (uplo<<1) | unit], sa, sb, args.nthreads); + } else { + gemm_thread_m(mode, &args, NULL, NULL, trsm[(side<<4) | (trans<<2) | (uplo<<1) | unit], sa, sb, args.nthreads); + } + } +#endif + + blas_memory_free(buffer); + + FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, + (!side) ? args.m * (args.m + args.n) : args.n * (args.m + args.n), + (!side) ? args.m * args.m * args.n : args.m * args.n * args.n); + + IDEBUG_END; + + return; +} + diff --git a/interface/trsv.c b/interface/trsv.c new file mode 100644 index 000000000..8ef6998db --- /dev/null +++ b/interface/trsv.c @@ -0,0 +1,208 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "QTRSV " +#elif defined(DOUBLE) +#define ERROR_NAME "DTRSV " +#else +#define ERROR_NAME "STRSV " +#endif + +static int (*trsv[])(BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, void *) = { +#ifdef XDOUBLE + qtrsv_NUU, qtrsv_NUN, qtrsv_NLU, qtrsv_NLN, + qtrsv_TUU, qtrsv_TUN, qtrsv_TLU, qtrsv_TLN, +#elif defined(DOUBLE) + dtrsv_NUU, dtrsv_NUN, dtrsv_NLU, dtrsv_NLN, + dtrsv_TUU, dtrsv_TUN, dtrsv_TLU, dtrsv_TLN, +#else + strsv_NUU, strsv_NUN, strsv_NLU, strsv_NLN, + strsv_TUU, strsv_TUN, strsv_TLU, strsv_TLN, +#endif +}; + +#ifndef CBLAS + +void NAME(char *UPLO, char *TRANS, char *DIAG, + blasint *N, FLOAT *a, blasint *LDA, FLOAT *x, blasint *INCX){ + + char uplo_arg = *UPLO; + char trans_arg = *TRANS; + char diag_arg = *DIAG; + + blasint n = *N; + blasint lda = *LDA; + blasint incx = *INCX; + + blasint info; + int uplo; + int unit; + int trans; + FLOAT *buffer; + + PRINT_DEBUG_NAME; + + TOUPPER(uplo_arg); + TOUPPER(trans_arg); + TOUPPER(diag_arg); + + trans = -1; + unit = -1; + uplo = -1; + + if (trans_arg == 'N') trans = 0; + if (trans_arg == 'T') trans = 1; + if (trans_arg == 'R') trans = 0; + if (trans_arg == 'C') trans = 1; + + if (diag_arg == 'U') unit = 0; + if (diag_arg == 'N') unit = 1; + + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + + info = 0; + + if (incx == 0) info = 8; + if (lda < MAX(1, n)) info = 6; + if (n < 0) info = 4; + if (unit < 0) info = 3; + if (trans < 0) info = 2; + if (uplo < 0) info = 1; + + if (info != 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#else + +void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + blasint n, FLOAT *a, blasint lda, FLOAT *x, blasint incx) { + + int trans, uplo, unit; + blasint info; + FLOAT *buffer; + + PRINT_DEBUG_CNAME; + + unit = -1; + uplo = -1; + trans = -1; + info = 0; + + if (order == CblasColMajor) { + if (Uplo == CblasUpper) uplo = 0; + if (Uplo == CblasLower) uplo = 1; + + if (TransA == CblasNoTrans) trans = 0; + if (TransA == CblasTrans) trans = 1; + if (TransA == CblasConjNoTrans) trans = 0; + if (TransA == CblasConjTrans) trans = 1; + + if (Diag == CblasUnit) unit = 0; + if (Diag == CblasNonUnit) unit = 1; + + info = -1; + + if (incx == 0) info = 8; + if (lda < MAX(1, n)) info = 6; + if (n < 0) info = 4; + if (unit < 0) info = 3; + if (trans < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (order == CblasRowMajor) { + if (Uplo == CblasUpper) uplo = 1; + if (Uplo == CblasLower) uplo = 0; + + if (TransA == CblasNoTrans) trans = 1; + if (TransA == CblasTrans) trans = 0; + if (TransA == CblasConjNoTrans) trans = 1; + if (TransA == CblasConjTrans) trans = 0; + + if (Diag == CblasUnit) unit = 0; + if (Diag == CblasNonUnit) unit = 1; + + info = -1; + + if (incx == 0) info = 8; + if (lda < MAX(1, n)) info = 6; + if (n < 0) info = 4; + if (unit < 0) info = 3; + if (trans < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (info >= 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#endif + + if (n == 0) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0 ) x -= (n - 1) * incx; + + buffer = (FLOAT *)blas_memory_alloc(1); + + (trsv[(trans<<2) | (uplo<<1) | unit])(n, a, lda, x, incx, buffer); + + blas_memory_free(buffer); + + FUNCTION_PROFILE_END(1, n * n / 2 + n, n * n); + + IDEBUG_END; + + return; +} diff --git a/interface/trti2.c b/interface/trti2.c new file mode 100644 index 000000000..e119b45af --- /dev/null +++ b/interface/trti2.c @@ -0,0 +1,134 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "QTRTI2" +#elif defined(DOUBLE) +#define ERROR_NAME "DTRTI2" +#else +#define ERROR_NAME "STRTI2" +#endif + +static blasint (*trti2[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = { +#ifdef XDOUBLE + qtrti2_UU, qtrti2_UN, qtrti2_LU, qtrti2_LN, +#elif defined(DOUBLE) + dtrti2_UU, dtrti2_UN, dtrti2_LU, dtrti2_LN, +#else + strti2_UU, strti2_UN, strti2_LU, strti2_LN, +#endif + }; + +int NAME(char *UPLO, char *DIAG, blasint *N, FLOAT *a, blasint *ldA, blasint *Info){ + + blas_arg_t args; + + blasint uplo_arg = *UPLO; + blasint diag_arg = *DIAG; + blasint uplo, diag; + blasint info; + FLOAT *buffer; +#ifdef PPC440 + extern +#endif + FLOAT *sa, *sb; + + PRINT_DEBUG_NAME; + + args.n = *N; + args.a = (void *)a; + args.lda = *ldA; + + TOUPPER(uplo_arg); + TOUPPER(diag_arg); + + uplo = -1; + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + diag = -1; + if (diag_arg == 'U') diag = 0; + if (diag_arg == 'N') diag = 1; + + info = 0; + if (args.lda < MAX(1,args.n)) info = 5; + if (args.n < 0) info = 3; + if (diag < 0) info = 2; + if (uplo < 0) info = 1; + if (info) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + *Info = - info; + return 0; + } + + *Info = 0; + + if (args.n <= 0) return 0; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + +#ifndef PPC440 + buffer = (FLOAT *)blas_memory_alloc(1); + + sa = (FLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A); + sb = (FLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); +#endif + + info = (trti2[(uplo << 1) | diag])(&args, NULL, NULL, sa, sb, 0); + + *Info = info; + +#ifndef PPC440 + blas_memory_free(buffer); +#endif + + FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, .5 * args.n * args.n, + args.n * (1./3. + args.n * ( 1./2. + args.n * 1./6.)) + + args.n * (1./3. + args.n * (-1./2. + args.n * 1./6.))); + + IDEBUG_END; + + return 0; +} diff --git a/interface/trtri.c b/interface/trtri.c new file mode 100644 index 000000000..9e31905df --- /dev/null +++ b/interface/trtri.c @@ -0,0 +1,153 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "QTRTRI" +#elif defined(DOUBLE) +#define ERROR_NAME "DTRTRI" +#else +#define ERROR_NAME "STRTRI" +#endif + +static blasint (*trtri_single[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) ={ + TRTRI_UU_SINGLE, TRTRI_UN_SINGLE, TRTRI_LU_SINGLE, TRTRI_LN_SINGLE, +}; + +#ifdef SMP +static blasint (*trtri_parallel[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) ={ + TRTRI_UU_PARALLEL, TRTRI_UN_PARALLEL, TRTRI_LU_PARALLEL, TRTRI_LN_PARALLEL, +}; +#endif + +int NAME(char *UPLO, char *DIAG, blasint *N, FLOAT *a, blasint *ldA, blasint *Info){ + + blas_arg_t args; + + blasint uplo_arg = *UPLO; + blasint diag_arg = *DIAG; + blasint uplo, diag; + blasint info; + FLOAT *buffer; +#ifdef PPC440 + extern +#endif + FLOAT *sa, *sb; + + PRINT_DEBUG_NAME; + + args.n = *N; + args.a = (void *)a; + args.lda = *ldA; + + TOUPPER(uplo_arg); + TOUPPER(diag_arg); + + uplo = -1; + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + diag = -1; + if (diag_arg == 'U') diag = 0; + if (diag_arg == 'N') diag = 1; + + info = 0; + if (args.lda < MAX(1,args.n)) info = 5; + if (args.n < 0) info = 3; + if (diag < 0) info = 2; + if (uplo < 0) info = 1; + if (info) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + *Info = - info; + return 0; + } + + *Info = 0; + + if (args.n == 0) return 0; + + if (diag) { + if (AMIN_K(args.n, args.a, args.lda + 1) == ZERO) { + *Info = IAMIN_K(args.n, args.a, args.lda + 1); + return 0; + } + } + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + +#ifndef PPC440 + buffer = (FLOAT *)blas_memory_alloc(1); + + sa = (FLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A); + sb = (FLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); +#endif + +#ifdef SMP + args.nthreads = num_cpu_avail(4); + + if (args.nthreads == 1) { +#endif + + *Info = (trtri_single[(uplo << 1) | diag])(&args, NULL, NULL, sa, sb, 0); + +#ifdef SMP + } else { + + *Info = (trtri_parallel[(uplo << 1) | diag])(&args, NULL, NULL, sa, sb, 0); + + } +#endif + +#ifndef PPC440 + blas_memory_free(buffer); +#endif + + FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, .5 * args.n * args.n, + args.n * (1./3. + args.n * ( 1./2. + args.n * 1./6.)) + + args.n * (1./3. + args.n * (-1./2. + args.n * 1./6.))); + + IDEBUG_END; + + return 0; +} diff --git a/interface/zaxpy.c b/interface/zaxpy.c new file mode 100644 index 000000000..d3355ea57 --- /dev/null +++ b/interface/zaxpy.c @@ -0,0 +1,122 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifndef CBLAS + +void NAME(blasint *N, FLOAT *ALPHA, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY){ + + blasint n = *N; + blasint incx = *INCX; + blasint incy = *INCY; + +#else + +void CNAME(blasint n, FLOAT *ALPHA, FLOAT *x, blasint incx, FLOAT *y, blasint incy){ + +#endif + + FLOAT alpha_r = *(ALPHA + 0); + FLOAT alpha_i = *(ALPHA + 1); + +#ifdef SMP + int mode, nthreads; +#endif + +#ifndef CBLAS + PRINT_DEBUG_CNAME; +#else + PRINT_DEBUG_CNAME; +#endif + + if (n <= 0) return; + + if ((alpha_r == ZERO) && (alpha_i == ZERO)) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0) x -= (n - 1) * incx * 2; + if (incy < 0) y -= (n - 1) * incy * 2; + +#ifdef SMP + nthreads = num_cpu_avail(1); + + if (nthreads == 1) { +#endif + +#ifndef CONJ + AXPYU_K (n, 0, 0, alpha_r, alpha_i, x, incx, y, incy, NULL, 0); +#else + AXPYC_K(n, 0, 0, alpha_r, alpha_i, x, incx, y, incy, NULL, 0); +#endif + +#ifdef SMP + } else { + +#ifdef XDOUBLE + mode = BLAS_XDOUBLE | BLAS_COMPLEX; +#elif defined(DOUBLE) + mode = BLAS_DOUBLE | BLAS_COMPLEX; +#else + mode = BLAS_SINGLE | BLAS_COMPLEX; +#endif + + blas_level1_thread(mode, n, 0, 0, ALPHA, x, incx, y, incy, NULL, 0, +#ifndef CONJ + (void *)AXPYU_K, +#else + (void *)AXPYC_K, +#endif + nthreads); + } +#endif + + FUNCTION_PROFILE_END(4, 2 * n, 2 * n); + + IDEBUG_END; + + return; + +} diff --git a/interface/zdot.c b/interface/zdot.c new file mode 100644 index 000000000..1380ce292 --- /dev/null +++ b/interface/zdot.c @@ -0,0 +1,202 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef RETURN_BY_STRUCT +#ifdef XDOUBLE +#define MYTYPE myxcomplex_t +#elif defined DOUBLE +#define MYTYPE myzcomplex_t +#else +#define MYTYPE myccomplex_t +#endif +#endif + +#ifndef CBLAS + +#ifdef RETURN_BY_STRUCT +MYTYPE NAME( blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) { +#elif defined RETURN_BY_STACK +void NAME(FLOAT _Complex *result, blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) { +#else +FLOAT _Complex NAME( blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) { +#endif + + BLASLONG n = *N; + BLASLONG incx = *INCX; + BLASLONG incy = *INCY; +#ifndef RETURN_BY_STACK + FLOAT _Complex ret; +#endif +#ifdef RETURN_BY_STRUCT + MYTYPE myret; +#endif + + PRINT_DEBUG_NAME; + + if (n <= 0) { +#ifdef RETURN_BY_STRUCT + myret.r = 0.; + myret.i = 0.; + return myret; +#elif defined RETURN_BY_STACK + *result = ZERO; + return; +#else + return ZERO; +#endif + } + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0) x -= (n - 1) * incx * 2; + if (incy < 0) y -= (n - 1) * incy * 2; + +#ifdef RETURN_BY_STRUCT + +#ifndef CONJ + ret = DOTU_K(n, x, incx, y, incy); +#else + ret = DOTC_K(n, x, incx, y, incy); +#endif + + myret.r = CREAL ret; + myret.i = CIMAG ret; + + FUNCTION_PROFILE_END(4, 2 * n, 2 * n); + + IDEBUG_END; + + return myret; + +#elif defined RETURN_BY_STACK + +#ifndef CONJ + *result = DOTU_K(n, x, incx, y, incy); +#else + *result = DOTC_K(n, x, incx, y, incy); +#endif + + FUNCTION_PROFILE_END(4, 2 * n, 2 * n); + + IDEBUG_END; + +#else + +#ifndef CONJ + ret = DOTU_K(n, x, incx, y, incy); +#else + ret = DOTC_K(n, x, incx, y, incy); +#endif + + FUNCTION_PROFILE_END(4, 2 * n, 2 * n); + + IDEBUG_END; + + return ret; + +#endif + +} + +#else + +#ifdef FORCE_USE_STACK +void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy, FLOAT _Complex *result){ +#else +FLOAT _Complex CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){ + + FLOAT _Complex ret; +#endif + + PRINT_DEBUG_CNAME; + + if (n <= 0) { +#ifdef FORCE_USE_STACK + *result = ZERO; + return; +#else + return ZERO; +#endif + } + + if (incx < 0) x -= (n - 1) * incx * 2; + if (incy < 0) y -= (n - 1) * incy * 2; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + +#ifdef FORCE_USE_STACK + +#ifndef CONJ + *result = DOTU_K(n, x, incx, y, incy); +#else + *result = DOTC_K(n, x, incx, y, incy); +#endif + + FUNCTION_PROFILE_END(4, 2 * n, 2 * n); + + IDEBUG_END; + +#else + +#ifndef CONJ + ret = DOTU_K(n, x, incx, y, incy); +#else + ret = DOTC_K(n, x, incx, y, incy); +#endif + + FUNCTION_PROFILE_END(4, 2 * n, 2 * n); + + IDEBUG_END; + + return ret; + +#endif + +} + +#endif diff --git a/interface/zgbmv.c b/interface/zgbmv.c new file mode 100644 index 000000000..ae1fd24bf --- /dev/null +++ b/interface/zgbmv.c @@ -0,0 +1,271 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "XGBMV " +#elif defined(DOUBLE) +#define ERROR_NAME "ZGBMV " +#else +#define ERROR_NAME "CGBMV " +#endif + +static void (*gbmv[])(BLASLONG, BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT, + FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, void *) = { +#ifdef XDOUBLE + xgbmv_n, xgbmv_t, xgbmv_r, xgbmv_c, + xgbmv_o, xgbmv_u, xgbmv_s, xgbmv_d, +#elif defined(DOUBLE) + zgbmv_n, zgbmv_t, zgbmv_r, zgbmv_c, + zgbmv_o, zgbmv_u, zgbmv_s, zgbmv_d, +#else + cgbmv_n, cgbmv_t, cgbmv_r, cgbmv_c, + cgbmv_o, cgbmv_u, cgbmv_s, cgbmv_d, +#endif +}; + +#ifdef SMP +static int (*gbmv_thread[])(BLASLONG, BLASLONG, BLASLONG, BLASLONG, FLOAT *, + FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = { +#ifdef XDOUBLE + xgbmv_thread_n, xgbmv_thread_t, xgbmv_thread_r, xgbmv_thread_c, + xgbmv_thread_o, xgbmv_thread_u, xgbmv_thread_s, xgbmv_thread_d, +#elif defined(DOUBLE) + zgbmv_thread_n, zgbmv_thread_t, zgbmv_thread_r, zgbmv_thread_c, + zgbmv_thread_o, zgbmv_thread_u, zgbmv_thread_s, zgbmv_thread_d, +#else + cgbmv_thread_n, cgbmv_thread_t, cgbmv_thread_r, cgbmv_thread_c, + cgbmv_thread_o, cgbmv_thread_u, cgbmv_thread_s, cgbmv_thread_d, +#endif +}; +#endif + +#ifndef CBLAS + +void NAME(char *TRANS, blasint *M, blasint *N, + blasint *KU, blasint *KL, + FLOAT *ALPHA, FLOAT *a, blasint *LDA, + FLOAT *x, blasint *INCX, + FLOAT *BETA, FLOAT *y, blasint *INCY){ + + char trans = *TRANS; + blasint m = *M; + blasint n = *N; + blasint ku = *KU; + blasint kl = *KL; + blasint lda = *LDA; + blasint incx = *INCX; + blasint incy = *INCY; + FLOAT *buffer; +#ifdef SMP + int nthreads; +#endif + + FLOAT alpha_r = ALPHA[0]; + FLOAT alpha_i = ALPHA[1]; + FLOAT beta_r = BETA[0]; + FLOAT beta_i = BETA[1]; + + blasint info; + blasint lenx, leny; + blasint i; + + PRINT_DEBUG_NAME; + + TOUPPER(trans); + + info = 0; + + i = -1; + + if (trans == 'N') i = 0; + if (trans == 'T') i = 1; + if (trans == 'R') i = 2; + if (trans == 'C') i = 3; + if (trans == 'O') i = 4; + if (trans == 'U') i = 5; + if (trans == 'S') i = 6; + if (trans == 'D') i = 7; + + if (incy == 0) info = 13; + if (incx == 0) info = 10; + if (lda < kl + ku + 1) info = 8; + if (kl < 0) info = 5; + if (ku < 0) info = 4; + if (n < 0) info = 3; + if (m < 0) info = 2; + if (i < 0) info = 1; + + trans = i; + + if (info != 0){ + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#else + +void CNAME(enum CBLAS_ORDER order, + enum CBLAS_TRANSPOSE TransA, + blasint m, blasint n, + blasint ku, blasint kl, + FLOAT *ALPHA, + FLOAT *a, blasint lda, + FLOAT *x, blasint incx, + FLOAT *BETA, + FLOAT *y, blasint incy){ + + FLOAT alpha_r = ALPHA[0]; + FLOAT alpha_i = ALPHA[1]; + FLOAT beta_r = BETA[0]; + FLOAT beta_i = BETA[1]; + + FLOAT *buffer; + blasint lenx, leny; + int trans; + blasint info, t; +#ifdef SMP + int nthreads; +#endif + + PRINT_DEBUG_CNAME; + + trans = -1; + info = 0; + + if (order == CblasColMajor) { + if (TransA == CblasNoTrans) trans = 0; + if (TransA == CblasTrans) trans = 1; + if (TransA == CblasConjNoTrans) trans = 2; + if (TransA == CblasConjTrans) trans = 3; + + info = -1; + + if (incy == 0) info = 13; + if (incx == 0) info = 10; + if (lda < kl + ku + 1) info = 8; + if (kl < 0) info = 5; + if (ku < 0) info = 4; + if (n < 0) info = 3; + if (m < 0) info = 2; + if (trans < 0) info = 1; + } + + if (order == CblasRowMajor) { + if (TransA == CblasNoTrans) trans = 1; + if (TransA == CblasTrans) trans = 0; + if (TransA == CblasConjNoTrans) trans = 3; + if (TransA == CblasConjTrans) trans = 2; + + info = -1; + + t = n; + n = m; + m = t; + + t = ku; + ku = kl; + kl = t; + + if (incy == 0) info = 13; + if (incx == 0) info = 10; + if (lda < kl + ku + 1) info = 8; + if (kl < 0) info = 5; + if (ku < 0) info = 4; + if (n < 0) info = 3; + if (m < 0) info = 2; + if (trans < 0) info = 1; + } + + if (info >= 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#endif + + if ((m==0) || (n==0)) return; + + lenx = n; + leny = m; + if (trans & 1) lenx = m; + if (trans & 1) leny = n; + + if (beta_r != ONE || beta_i != ZERO) SCAL_K(leny, 0, 0, beta_r, beta_i, y, abs(incy), NULL, 0, NULL, 0); + + if (alpha_r == ZERO && alpha_i == ZERO) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0) x -= (lenx - 1) * incx * 2; + if (incy < 0) y -= (leny - 1) * incy * 2; + + buffer = (FLOAT *)blas_memory_alloc(1); + +#ifdef SMP + nthreads = num_cpu_avail(2); + + if (nthreads == 1) { +#endif + + (gbmv[(int)trans])(m, n, kl, ku, alpha_r, alpha_i, a, lda, x, incx, y, incy, buffer); + +#ifdef SMP + + } else { + + (gbmv_thread[(int)trans])(m, n, kl, ku, ALPHA, a, lda, x, incx, y, incy, buffer, nthreads); + + } +#endif + + blas_memory_free(buffer); + + FUNCTION_PROFILE_END(4, m * n / 2 + n, m * n); + + IDEBUG_END; + + return; +} diff --git a/interface/zgemv.c b/interface/zgemv.c new file mode 100644 index 000000000..fb4784202 --- /dev/null +++ b/interface/zgemv.c @@ -0,0 +1,259 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "XGEMV " +#elif defined(DOUBLE) +#define ERROR_NAME "ZGEMV " +#else +#define ERROR_NAME "CGEMV " +#endif + +#ifdef SMP +static int (*gemv_thread[])(BLASLONG, BLASLONG, FLOAT *, FLOAT *, BLASLONG, FLOAT * , BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = { +#ifdef XDOUBLE + xgemv_thread_n, xgemv_thread_t, xgemv_thread_r, xgemv_thread_c, xgemv_thread_o, xgemv_thread_u, xgemv_thread_s, xgemv_thread_d, +#elif defined DOUBLE + zgemv_thread_n, zgemv_thread_t, zgemv_thread_r, zgemv_thread_c, zgemv_thread_o, zgemv_thread_u, zgemv_thread_s, zgemv_thread_d, +#else + cgemv_thread_n, cgemv_thread_t, cgemv_thread_r, cgemv_thread_c, cgemv_thread_o, cgemv_thread_u, cgemv_thread_s, cgemv_thread_d, +#endif +}; +#endif + +#ifndef CBLAS + +void NAME(char *TRANS, blasint *M, blasint *N, + FLOAT *ALPHA, FLOAT *a, blasint *LDA, + FLOAT *x, blasint *INCX, + FLOAT *BETA, FLOAT *y, blasint *INCY){ + + char trans = *TRANS; + blasint m = *M; + blasint n = *N; + blasint lda = *LDA; + blasint incx = *INCX; + blasint incy = *INCY; + + FLOAT *buffer; +#ifdef SMP + int nthreads; +#endif + + int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, + FLOAT * , BLASLONG, FLOAT *, BLASLONG, FLOAT *) = { + GEMV_N, GEMV_T, GEMV_R, GEMV_C, + GEMV_O, GEMV_U, GEMV_S, GEMV_D, + }; + + blasint info; + blasint lenx, leny; + blasint i; + + PRINT_DEBUG_NAME; + + FLOAT alpha_r = *(ALPHA + 0); + FLOAT alpha_i = *(ALPHA + 1); + + FLOAT beta_r = *(BETA + 0); + FLOAT beta_i = *(BETA + 1); + + TOUPPER(trans); + + info = 0; + + i = -1; + + if (trans == 'N') i = 0; + if (trans == 'T') i = 1; + if (trans == 'R') i = 2; + if (trans == 'C') i = 3; + if (trans == 'O') i = 4; + if (trans == 'U') i = 5; + if (trans == 'S') i = 6; + if (trans == 'D') i = 7; + + if (incy == 0) info = 11; + if (incx == 0) info = 8; + if (lda < MAX(1,m)) info = 6; + if (n < 0) info = 3; + if (m < 0) info = 2; + if (i < 0) info = 1; + + trans = i; + + if (info != 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#else + +void CNAME(enum CBLAS_ORDER order, + enum CBLAS_TRANSPOSE TransA, + blasint m, blasint n, + FLOAT *ALPHA, + FLOAT *a, blasint lda, + FLOAT *x, blasint incx, + FLOAT *BETA, + FLOAT *y, blasint incy){ + + FLOAT *buffer; + blasint lenx, leny; + int trans; + blasint info, t; +#ifdef SMP + int nthreads; +#endif + + int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, + FLOAT * , BLASLONG, FLOAT *, BLASLONG, FLOAT *) = { + GEMV_N, GEMV_T, GEMV_R, GEMV_C, + GEMV_O, GEMV_U, GEMV_S, GEMV_D, + }; + + PRINT_DEBUG_CNAME; + + FLOAT alpha_r = *(ALPHA + 0); + FLOAT alpha_i = *(ALPHA + 1); + + FLOAT beta_r = *(BETA + 0); + FLOAT beta_i = *(BETA + 1); + + trans = -1; + info = 0; + + if (order == CblasColMajor) { + if (TransA == CblasNoTrans) trans = 0; + if (TransA == CblasTrans) trans = 1; + if (TransA == CblasConjNoTrans) trans = 2; + if (TransA == CblasConjTrans) trans = 3; + + info = -1; + + if (incy == 0) info = 11; + if (incx == 0) info = 8; + if (lda < MAX(1, m)) info = 6; + if (n < 0) info = 3; + if (m < 0) info = 2; + if (trans < 0) info = 1; + + } + + if (order == CblasRowMajor) { + if (TransA == CblasNoTrans) trans = 1; + if (TransA == CblasTrans) trans = 0; + if (TransA == CblasConjNoTrans) trans = 3; + if (TransA == CblasConjTrans) trans = 2; + + info = -1; + + t = n; + n = m; + m = t; + + if (incy == 0) info = 11; + if (incx == 0) info = 8; + if (lda < MAX(1, m)) info = 6; + if (n < 0) info = 3; + if (m < 0) info = 2; + if (trans < 0) info = 1; + + } + + if (info >= 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#endif + + /* Quick return if possible. */ + + if (m == 0 || n == 0) return; + + lenx = n; + leny = m; + + if (trans & 1) lenx = m; + if (trans & 1) leny = n; + + if (beta_r != ONE || beta_i != ZERO) SCAL_K(leny, 0, 0, beta_r, beta_i, y, abs(incy), NULL, 0, NULL, 0); + + if (alpha_r == ZERO && alpha_i == ZERO) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0) x -= (lenx - 1) * incx * 2; + if (incy < 0) y -= (leny - 1) * incy * 2; + + buffer = (FLOAT *)blas_memory_alloc(1); + +#ifdef SMP + nthreads = num_cpu_avail(2); + + if (nthreads == 1) { +#endif + + (gemv[(int)trans])(m, n, 0, alpha_r, alpha_i, a, lda, x, incx, y, incy, buffer); + +#ifdef SMP + + } else { + + (gemv_thread[(int)trans])(m, n, ALPHA, a, lda, x, incx, y, incy, buffer, nthreads); + + } +#endif + + blas_memory_free(buffer); + + FUNCTION_PROFILE_END(4, m * n + m + n, 2 * m * n); + + IDEBUG_END; + + return; +} diff --git a/interface/zger.c b/interface/zger.c new file mode 100644 index 000000000..ad52f40bb --- /dev/null +++ b/interface/zger.c @@ -0,0 +1,249 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#ifndef CONJ +#define ERROR_NAME "XGERU " +#else +#define ERROR_NAME "XGERC " +#endif +#elif defined DOUBLE +#ifndef CONJ +#define ERROR_NAME "ZGERU " +#else +#define ERROR_NAME "ZGERC " +#endif +#else +#ifndef CONJ +#define ERROR_NAME "CGERU " +#else +#define ERROR_NAME "CGERC " +#endif +#endif + +#if defined XDOUBLE +#ifndef CONJ +#define GER GERU_K +#define GER_THREAD xger_thread_U +#else +#define GER GERC_K +#define GER_THREAD xger_thread_C +#define GERV GERV_K +#define GERV_THREAD xger_thread_V +#endif +#elif defined DOUBLE +#ifndef CONJ +#define GER GERU_K +#define GER_THREAD zger_thread_U +#else +#define GER GERC_K +#define GER_THREAD zger_thread_C +#define GERV GERV_K +#define GERV_THREAD zger_thread_V +#endif +#else +#ifndef CONJ +#define GER GERU_K +#define GER_THREAD cger_thread_U +#else +#define GER GERC_K +#define GER_THREAD cger_thread_C +#define GERV GERV_K +#define GERV_THREAD cger_thread_V +#endif +#endif + +#ifndef CBLAS + +void NAME(blasint *M, blasint *N, FLOAT *Alpha, + FLOAT *x, blasint *INCX, + FLOAT *y, blasint *INCY, + FLOAT *a, blasint *LDA){ + + blasint m = *M; + blasint n = *N; + FLOAT alpha_r = Alpha[0]; + FLOAT alpha_i = Alpha[1]; + blasint incx = *INCX; + blasint incy = *INCY; + blasint lda = *LDA; + FLOAT *buffer; +#ifdef SMP + int nthreads; +#endif + + blasint info; + + PRINT_DEBUG_NAME; + + info = 0; + + if (lda < MAX(1,m)) info = 9; + if (incy == 0) info = 7; + if (incx == 0) info = 5; + if (n < 0) info = 2; + if (m < 0) info = 1; + + if (info){ + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#else + +void CNAME(enum CBLAS_ORDER order, + blasint m, blasint n, + FLOAT *Alpha, + FLOAT *x, blasint incx, + FLOAT *y, blasint incy, + FLOAT *a, blasint lda) { + + FLOAT alpha_r = Alpha[0]; + FLOAT alpha_i = Alpha[1]; + + FLOAT *buffer; + blasint info, t; +#ifdef SMP + int nthreads; +#endif + + PRINT_DEBUG_CNAME; + + info = 0; + + if (order == CblasColMajor) { + info = -1; + + if (lda < MAX(1,m)) info = 9; + if (incy == 0) info = 7; + if (incx == 0) info = 5; + if (n < 0) info = 2; + if (m < 0) info = 1; + } + + if (order == CblasRowMajor) { + info = -1; + + t = n; + n = m; + m = t; + + t = incx; + incx = incy; + incy = t; + + buffer = x; + x = y; + y = buffer; + + if (lda < MAX(1,m)) info = 9; + if (incy == 0) info = 7; + if (incx == 0) info = 5; + if (n < 0) info = 2; + if (m < 0) info = 1; + } + + if (info >= 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#endif + + /* Quick return if possible. */ + if (m == 0 || n == 0) return; + + if ((alpha_r == 0.) && (alpha_i == 0.)) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incy < 0) y -= (n - 1) * incy * 2; + if (incx < 0) x -= (m - 1) * incx * 2; + + buffer = (FLOAT *)blas_memory_alloc(1); + +#ifdef SMP + nthreads = num_cpu_avail(2); + + if (nthreads == 1) { +#endif + +#if !defined(CBLAS) || !defined(CONJ) + GER(m, n, 0, alpha_r, alpha_i, x, incx, y, incy, a, lda, buffer); +#else + if (order == CblasColMajor) { + GER(m, n, 0, alpha_r, alpha_i, x, incx, y, incy, a, lda, buffer); + } else { + GERV(m, n, 0, alpha_r, alpha_i, x, incx, y, incy, a, lda, buffer); + } +#endif + +#ifdef SMP + + } else { + +#if !defined(CBLAS) || !defined(CONJ) + GER_THREAD(m, n, Alpha, x, incx, y, incy, a, lda, buffer, nthreads); +#else + if (order == CblasColMajor) { + GER_THREAD(m, n, Alpha, x, incx, y, incy, a, lda, buffer, nthreads); + } else { + GERV_THREAD(m, n, Alpha, x, incx, y, incy, a, lda, buffer, nthreads); + } +#endif + + } +#endif + + blas_memory_free(buffer); + + FUNCTION_PROFILE_END(4, m * n + m + n, 2 * m * n); + + IDEBUG_END; + + return; + +} diff --git a/interface/zgetf2.c b/interface/zgetf2.c new file mode 100644 index 000000000..950ef46e9 --- /dev/null +++ b/interface/zgetf2.c @@ -0,0 +1,109 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "XGETF2" +#elif defined(DOUBLE) +#define ERROR_NAME "ZGETF2" +#else +#define ERROR_NAME "CGETF2" +#endif + +int NAME(blasint *M, blasint *N, FLOAT *a, blasint *ldA, blasint *ipiv, blasint *Info){ + + blas_arg_t args; + + blasint info; + FLOAT *buffer; +#ifdef PPC440 + extern +#endif + FLOAT *sa, *sb; + + PRINT_DEBUG_NAME; + + args.m = *M; + args.n = *N; + args.a = (void *)a; + args.lda = *ldA; + args.c = (void *)ipiv; + + info = 0; + if (args.lda < MAX(1,args.m)) info = 4; + if (args.n < 0) info = 2; + if (args.m < 0) info = 1; + if (info) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + *Info = - info; + return 0; + } + + *Info = 0; + if (args.m == 0 || args.n == 0) return 0; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + +#ifndef PPC440 + buffer = (FLOAT *)blas_memory_alloc(1); + + sa = (FLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A); + sb = (FLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); +#endif + + info = GETF2(&args, NULL, NULL, sa, sb, 0); + + *Info = info; + +#ifndef PPC440 + blas_memory_free(buffer); +#endif + + FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, args.m * args.n, 2. / 3. * args.m * args.n * args.n); + + IDEBUG_END; + + return 0; +} diff --git a/interface/zgetrf.c b/interface/zgetrf.c new file mode 100644 index 000000000..9f041d9bd --- /dev/null +++ b/interface/zgetrf.c @@ -0,0 +1,122 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "XGETRF" +#elif defined(DOUBLE) +#define ERROR_NAME "ZGETRF" +#else +#define ERROR_NAME "CGETRF" +#endif + +int NAME(blasint *M, blasint *N, FLOAT *a, blasint *ldA, blasint *ipiv, blasint *Info){ + + blas_arg_t args; + + blasint info; + FLOAT *buffer; +#ifdef PPC440 + extern +#endif + FLOAT *sa, *sb; + + PRINT_DEBUG_NAME; + + args.m = *M; + args.n = *N; + args.a = (void *)a; + args.lda = *ldA; + args.c = (void *)ipiv; + + info = 0; + if (args.lda < MAX(1,args.m)) info = 4; + if (args.n < 0) info = 2; + if (args.m < 0) info = 1; + if (info) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + *Info = - info; + return 0; + } + + *Info = 0; + if (args.m == 0 || args.n == 0) return 0; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + +#ifndef PPC440 + buffer = (FLOAT *)blas_memory_alloc(1); + + sa = (FLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A); + sb = (FLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); +#endif + +#ifdef SMP + args.common = NULL; + args.nthreads = num_cpu_avail(4); + + if (args.nthreads == 1) { +#endif + + *Info = GETRF_SINGLE(&args, NULL, NULL, sa, sb, 0); + +#ifdef SMP + } else { + + *Info = GETRF_PARALLEL(&args, NULL, NULL, sa, sb, 0); + + } +#endif + +#ifndef PPC440 + blas_memory_free(buffer); +#endif + + FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, args.m * args.n, 2. / 3. * args.m * args.n * args.n); + + IDEBUG_END; + + return 0; +} diff --git a/interface/zgetrs.c b/interface/zgetrs.c new file mode 100644 index 000000000..81d50e34f --- /dev/null +++ b/interface/zgetrs.c @@ -0,0 +1,153 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "XGETRS" +#elif defined(DOUBLE) +#define ERROR_NAME "ZGETRS" +#else +#define ERROR_NAME "CGETRS" +#endif + +static blasint (*getrs_single[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) ={ + GETRS_N_SINGLE, GETRS_T_SINGLE, GETRS_R_SINGLE, GETRS_C_SINGLE, +}; + +#ifdef SMP +static blasint (*getrs_parallel[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) ={ + GETRS_N_PARALLEL, GETRS_T_PARALLEL, GETRS_R_PARALLEL, GETRS_C_PARALLEL, +}; +#endif + +int NAME(char *TRANS, blasint *N, blasint *NRHS, FLOAT *a, blasint *ldA, + blasint *ipiv, FLOAT *b, blasint *ldB, blasint *Info){ + + char trans_arg = *TRANS; + + blas_arg_t args; + + blasint info; + int trans; + FLOAT *buffer; +#ifdef PPC440 + extern +#endif + FLOAT *sa, *sb; + + PRINT_DEBUG_NAME; + + args.m = *N; + args.n = *NRHS; + args.a = (void *)a; + args.lda = *ldA; + args.b = (void *)b; + args.ldb = *ldB; + args.c = (void *)ipiv; + + info = 0; + + TOUPPER(trans_arg); + trans = -1; + + if (trans_arg == 'N') trans = 0; + if (trans_arg == 'T') trans = 1; + if (trans_arg == 'R') trans = 2; + if (trans_arg == 'C') trans = 3; + + if (args.ldb < MAX(1, args.m)) info = 8; + if (args.lda < MAX(1, args.m)) info = 5; + if (args.n < 0) info = 3; + if (args.m < 0) info = 2; + if (trans < 0) info = 1; + + if (info != 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return 0; + } + + args.alpha = NULL; + args.beta = NULL; + + *Info = info; + + if (args.m == 0 || args.n == 0) return 0; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + +#ifndef PPC440 + buffer = (FLOAT *)blas_memory_alloc(1); + + sa = (FLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A); + sb = (FLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); +#endif + +#ifdef SMP + args.nthreads = num_cpu_avail(4); + + if (args.nthreads == 1) { +#endif + + (getrs_single[trans])(&args, NULL, NULL, sa, sb, 0); + +#ifdef SMP + } else { + + (getrs_parallel[trans])(&args, NULL, NULL, sa, sb, 0); + + } +#endif + +#ifndef PPC440 + blas_memory_free(buffer); +#endif + + FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, args.m * args.n, 2 * args.m * args.m * args.n); + + IDEBUG_END; + + return 0; + +} diff --git a/interface/zhbmv.c b/interface/zhbmv.c new file mode 100644 index 000000000..c14ad9859 --- /dev/null +++ b/interface/zhbmv.c @@ -0,0 +1,223 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "XHBMV " +#elif defined(DOUBLE) +#define ERROR_NAME "ZHBMV " +#else +#define ERROR_NAME "CHBMV " +#endif + +static int (*hbmv[])(BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, void *) = { +#ifdef XDOUBLE + xhbmv_U, xhbmv_L, xhbmv_V, xhbmv_M, +#elif defined(DOUBLE) + zhbmv_U, zhbmv_L, zhbmv_V, zhbmv_M, +#else + chbmv_U, chbmv_L, chbmv_V, chbmv_M, +#endif +}; + +#ifdef SMP +static int (*hbmv_thread[])(BLASLONG, BLASLONG, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = { +#ifdef XDOUBLE + xhbmv_thread_U, xhbmv_thread_L, xhbmv_thread_V, xhbmv_thread_M, +#elif defined(DOUBLE) + zhbmv_thread_U, zhbmv_thread_L, zhbmv_thread_V, zhbmv_thread_M, +#else + chbmv_thread_U, chbmv_thread_L, chbmv_thread_V, chbmv_thread_M, +#endif +}; +#endif + +#ifndef CBLAS + +void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint *LDA, + FLOAT *x, blasint *INCX, FLOAT *BETA, FLOAT *y, blasint *INCY){ + + char uplo_arg = *UPLO; + blasint n = *N; + blasint k = *K; + FLOAT alpha_r = ALPHA[0]; + FLOAT alpha_i = ALPHA[1]; + blasint lda = *LDA; + blasint incx = *INCX; + FLOAT beta_r = BETA[0]; + FLOAT beta_i = BETA[1]; + blasint incy = *INCY; + + blasint info; + int uplo; + FLOAT *buffer; +#ifdef SMP + int nthreads; +#endif + + PRINT_DEBUG_NAME; + + TOUPPER(uplo_arg); + uplo = -1; + + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + if (uplo_arg == 'V') uplo = 2; + if (uplo_arg == 'M') uplo = 3; + + info = 0; + + if (incy == 0) info = 11; + if (incx == 0) info = 8; + if (lda < k + 1) info = 6; + if (k < 0) info = 3; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + + if (info != 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#else + +void CNAME(enum CBLAS_ORDER order, + enum CBLAS_UPLO Uplo, + blasint n, blasint k, + FLOAT *ALPHA, + FLOAT *a, blasint lda, + FLOAT *x, blasint incx, + FLOAT *BETA, + FLOAT *y, blasint incy){ + + FLOAT alpha_r = ALPHA[0]; + FLOAT alpha_i = ALPHA[1]; + FLOAT beta_r = BETA[0]; + FLOAT beta_i = BETA[1]; + FLOAT *buffer; + int uplo; + blasint info; +#ifdef SMP + int nthreads; +#endif + + PRINT_DEBUG_CNAME; + + uplo = -1; + info = 0; + + if (order == CblasColMajor) { + if (Uplo == CblasUpper) uplo = 0; + if (Uplo == CblasLower) uplo = 1; + + info = -1; + + if (incy == 0) info = 11; + if (incx == 0) info = 8; + if (lda < k + 1) info = 6; + if (k < 0) info = 3; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (order == CblasRowMajor) { + if (Uplo == CblasUpper) uplo = 3; + if (Uplo == CblasLower) uplo = 2; + + info = -1; + + if (incy == 0) info = 11; + if (incx == 0) info = 8; + if (lda < k + 1) info = 6; + if (k < 0) info = 3; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (info >= 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#endif + + if (n == 0) return; + + if ((beta_r != ONE) || (beta_i != ZERO)) SCAL_K(n, 0, 0, beta_r, beta_i, y, abs(incy), NULL, 0, NULL, 0); + + if ((alpha_r == ZERO) && (alpha_i == ZERO)) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0 ) x -= (n - 1) * incx * COMPSIZE; + if (incy < 0 ) y -= (n - 1) * incy * COMPSIZE; + + buffer = (FLOAT *)blas_memory_alloc(1); + +#ifdef SMP + nthreads = num_cpu_avail(2); + + if (nthreads == 1) { +#endif + + (hbmv[uplo])(n, k, alpha_r, alpha_i, a, lda, x, incx, y, incy, buffer); + +#ifdef SMP + } else { + + (hbmv_thread[uplo])(n, k, ALPHA, a, lda, x, incx, y, incy, buffer, nthreads); + + } +#endif + + blas_memory_free(buffer); + + FUNCTION_PROFILE_END(4, n * k / 2 + n, n * k); + + IDEBUG_END; + + return; +} diff --git a/interface/zhemv.c b/interface/zhemv.c new file mode 100644 index 000000000..3cba445c2 --- /dev/null +++ b/interface/zhemv.c @@ -0,0 +1,215 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "XHEMV " +#elif defined(DOUBLE) +#define ERROR_NAME "ZHEMV " +#else +#define ERROR_NAME "CHEMV " +#endif + +#ifndef CBLAS + +void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, FLOAT *a, blasint *LDA, + FLOAT *x, blasint *INCX, FLOAT *BETA, FLOAT *y, blasint *INCY){ + + char uplo_arg = *UPLO; + blasint n = *N; + FLOAT alpha_r = ALPHA[0]; + FLOAT alpha_i = ALPHA[1]; + blasint lda = *LDA; + blasint incx = *INCX; + FLOAT beta_r = BETA[0]; + FLOAT beta_i = BETA[1]; + blasint incy = *INCY; +#ifdef SMP + int nthreads; +#endif + + int (*hemv[])(BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *) = { + HEMV_U, HEMV_L, HEMV_V, HEMV_M, + }; + +#ifdef SMP + int (*hemv_thread[])(BLASLONG, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = { + HEMV_THREAD_U, HEMV_THREAD_L, HEMV_THREAD_V, HEMV_THREAD_M, + }; +#endif + + blasint info; + int uplo; + FLOAT *buffer; + + PRINT_DEBUG_NAME; + + TOUPPER(uplo_arg); + uplo = -1; + + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + if (uplo_arg == 'V') uplo = 2; + if (uplo_arg == 'M') uplo = 3; + + info = 0; + + if (incy == 0) info = 10; + if (incx == 0) info = 7; + if (lda < MAX(1, n)) info = 5; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + + if (info != 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#else + +void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT *ALPHA, + FLOAT *a, blasint lda, FLOAT *x, blasint incx, FLOAT *BETA, FLOAT *y, blasint incy) { + + FLOAT alpha_r = ALPHA[0]; + FLOAT alpha_i = ALPHA[1]; + FLOAT beta_r = BETA[0]; + FLOAT beta_i = BETA[1]; + + FLOAT *buffer; + int trans, uplo; + blasint info; +#ifdef SMP + int nthreads; +#endif + + int (*hemv[])(BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *) = { + HEMV_U, HEMV_L, HEMV_V, HEMV_M, + }; + +#ifdef SMP + int (*hemv_thread[])(BLASLONG, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = { + HEMV_THREAD_U, HEMV_THREAD_L, HEMV_THREAD_V, HEMV_THREAD_M, + }; +#endif + + PRINT_DEBUG_CNAME; + + trans = -1; + uplo = -1; + info = 0; + + if (order == CblasColMajor) { + + if (Uplo == CblasUpper) uplo = 0; + if (Uplo == CblasLower) uplo = 1; + + info = -1; + + if (incy == 0) info = 10; + if (incx == 0) info = 7; + if (lda < MAX(1, n)) info = 5; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (order == CblasRowMajor) { + + if (Uplo == CblasUpper) uplo = 3; + if (Uplo == CblasLower) uplo = 2; + + info = -1; + + if (incy == 0) info = 10; + if (incx == 0) info = 7; + if (lda < MAX(1, n)) info = 5; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (info >= 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#endif + + if (n == 0) return; + + if ((beta_r != ONE) || (beta_i != ZERO)) SCAL_K(n, 0, 0, beta_r, beta_i, y, abs(incy), NULL, 0, NULL, 0); + + if ((alpha_r == ZERO) && (alpha_i == ZERO)) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0 ) x -= (n - 1) * incx * 2; + if (incy < 0 ) y -= (n - 1) * incy * 2; + + buffer = (FLOAT *)blas_memory_alloc(1); + +#ifdef SMP + nthreads = num_cpu_avail(2); + + if (nthreads == 1) { +#endif + + (hemv[uplo])(n, n, alpha_r, alpha_i, a, lda, x, incx, y, incy, buffer); + +#ifdef SMP + } else { + + (hemv_thread[uplo])(n, ALPHA, a, lda, x, incx, y, incy, buffer, nthreads); + + } +#endif + + blas_memory_free(buffer); + + FUNCTION_PROFILE_END(4, n * n / 2 + n, 2 * n * n); + + IDEBUG_END; + + return; +} diff --git a/interface/zher.c b/interface/zher.c new file mode 100644 index 000000000..ad982dd68 --- /dev/null +++ b/interface/zher.c @@ -0,0 +1,200 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "XHER " +#elif defined(DOUBLE) +#define ERROR_NAME "ZHER " +#else +#define ERROR_NAME "CHER " +#endif + +static int (*her[])(BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *) = { +#ifdef XDOUBLE + xher_U, xher_L, xher_V, xher_M, +#elif defined(DOUBLE) + zher_U, zher_L, zher_V, zher_M, +#else + cher_U, cher_L, cher_V, cher_M, +#endif +}; + +#ifdef SMP +static int (*her_thread[])(BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = { +#ifdef XDOUBLE + xher_thread_U, xher_thread_L, xher_thread_V, xher_thread_M, +#elif defined(DOUBLE) + zher_thread_U, zher_thread_L, zher_thread_V, zher_thread_M, +#else + cher_thread_U, cher_thread_L, cher_thread_V, cher_thread_M, +#endif +}; +#endif + +#ifndef CBLAS + +void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, + FLOAT *x, blasint *INCX, FLOAT *a, blasint *LDA){ + + char uplo_arg = *UPLO; + blasint n = *N; + FLOAT alpha = *ALPHA; + blasint lda = *LDA; + blasint incx = *INCX; + + blasint info; + int uplo; + FLOAT *buffer; +#ifdef SMP + int nthreads; +#endif + + PRINT_DEBUG_NAME; + + TOUPPER(uplo_arg); + uplo = -1; + + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + + info = 0; + + if (lda < MAX(1, n)) info = 7; + if (incx == 0) info = 5; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + + if (info != 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#else + +void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *a, blasint lda) { + + FLOAT *buffer; + int trans, uplo; + blasint info; +#ifdef SMP + int nthreads; +#endif + + PRINT_DEBUG_CNAME; + + trans = -1; + uplo = -1; + info = 0; + + if (order == CblasColMajor) { + + if (Uplo == CblasUpper) uplo = 0; + if (Uplo == CblasLower) uplo = 1; + + info = -1; + + if (lda < MAX(1, n)) info = 7; + if (incx == 0) info = 5; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + + } + + if (order == CblasRowMajor) { + + if (Uplo == CblasUpper) uplo = 3; + if (Uplo == CblasLower) uplo = 2; + + info = -1; + + if (lda < MAX(1, n)) info = 7; + if (incx == 0) info = 5; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (info >= 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#endif + + if (n == 0) return; + + if (alpha == ZERO) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0 ) x -= (n - 1) * incx * 2; + + buffer = (FLOAT *)blas_memory_alloc(1); + +#ifdef SMP + nthreads = num_cpu_avail(2); + + if (nthreads == 1) { +#endif + + (her[uplo])(n, alpha, x, incx, a, lda, buffer); + +#ifdef SMP + } else { + + (her_thread[uplo])(n, alpha, x, incx, a, lda, buffer, nthreads); + + } +#endif + + blas_memory_free(buffer); + + FUNCTION_PROFILE_END(4, n * n / 2 + n, n * n); + + IDEBUG_END; + + return; +} diff --git a/interface/zher2.c b/interface/zher2.c new file mode 100644 index 000000000..88fececf7 --- /dev/null +++ b/interface/zher2.c @@ -0,0 +1,207 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "XHER2 " +#elif defined(DOUBLE) +#define ERROR_NAME "ZHER2 " +#else +#define ERROR_NAME "CHER2 " +#endif + +static int (*her2[])(BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *) = { +#ifdef XDOUBLE + xher2_U, xher2_L, xher2_V, xher2_M, +#elif defined(DOUBLE) + zher2_U, zher2_L, zher2_V, zher2_M, +#else + cher2_U, cher2_L, cher2_V, cher2_M, +#endif +}; + +#ifdef SMP +static int (*her2_thread[])(BLASLONG, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = { +#ifdef XDOUBLE + xher2_thread_U, xher2_thread_L, xher2_thread_V, xher2_thread_M, +#elif defined(DOUBLE) + zher2_thread_U, zher2_thread_L, zher2_thread_V, zher2_thread_M, +#else + cher2_thread_U, cher2_thread_L, cher2_thread_V, cher2_thread_M, +#endif +}; +#endif + +#ifndef CBLAS + +void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, + FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY, FLOAT *a, blasint *LDA){ + + char uplo_arg = *UPLO; + blasint n = *N; + FLOAT alpha_r = ALPHA[0]; + FLOAT alpha_i = ALPHA[1]; + blasint lda = *LDA; + blasint incx = *INCX; + blasint incy = *INCY; + + blasint info; + int uplo; + FLOAT *buffer; +#ifdef SMP + int nthreads; +#endif + + PRINT_DEBUG_NAME; + + TOUPPER(uplo_arg); + uplo = -1; + + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + + info = 0; + + if (lda < MAX(1, n)) info = 9; + if (incy == 0) info = 7; + if (incx == 0) info = 5; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + + if (info != 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#else + +void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT *ALPHA, FLOAT *x, blasint incx, FLOAT *y, blasint incy, FLOAT *a, blasint lda) { + + FLOAT alpha_r = ALPHA[0]; + FLOAT alpha_i = ALPHA[1]; + FLOAT *buffer; + int trans, uplo; + blasint info; +#ifdef SMP + int nthreads; +#endif + + PRINT_DEBUG_CNAME; + + trans = -1; + uplo = -1; + info = 0; + + if (order == CblasColMajor) { + + if (Uplo == CblasUpper) uplo = 0; + if (Uplo == CblasLower) uplo = 1; + + info = -1; + + if (lda < MAX(1, n)) info = 9; + if (incy == 0) info = 7; + if (incx == 0) info = 5; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (order == CblasRowMajor) { + + if (Uplo == CblasUpper) uplo = 3; + if (Uplo == CblasLower) uplo = 2; + + info = -1; + + if (lda < MAX(1, n)) info = 9; + if (incx == 0) info = 7; + if (incy == 0) info = 5; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (info >= 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#endif + + if (n == 0) return; + + if ((alpha_r == ZERO) && (alpha_i == ZERO)) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0 ) x -= (n - 1) * incx * 2; + if (incy < 0 ) y -= (n - 1) * incy * 2; + + buffer = (FLOAT *)blas_memory_alloc(1); + +#ifdef SMP + nthreads = num_cpu_avail(2); + + if (nthreads == 1) { +#endif + + (her2[uplo])(n, alpha_r, alpha_i, x, incx, y, incy, a, lda, buffer); + +#ifdef SMP + } else { + + (her2_thread[uplo])(n, ALPHA, x, incx, y, incy, a, lda, buffer, nthreads); + + } +#endif + + blas_memory_free(buffer); + + FUNCTION_PROFILE_END(4, n * n / 2 + 2 * n, 2 * n * n); + + IDEBUG_END; + + return; +} diff --git a/interface/zhpmv.c b/interface/zhpmv.c new file mode 100644 index 000000000..d7013e668 --- /dev/null +++ b/interface/zhpmv.c @@ -0,0 +1,213 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "XHPMV " +#elif defined(DOUBLE) +#define ERROR_NAME "ZHPMV " +#else +#define ERROR_NAME "CHPMV " +#endif + +static int (*hpmv[])(BLASLONG, FLOAT, FLOAT, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, void *) = { +#ifdef XDOUBLE + xhpmv_U, xhpmv_L, xhpmv_V, xhpmv_M, +#elif defined(DOUBLE) + zhpmv_U, zhpmv_L, zhpmv_V, zhpmv_M, +#else + chpmv_U, chpmv_L, chpmv_V, chpmv_M, +#endif +}; + +#ifdef SMP +static int (*hpmv_thread[])(BLASLONG, FLOAT *, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = { +#ifdef XDOUBLE + xhpmv_thread_U, xhpmv_thread_L, xhpmv_thread_V, xhpmv_thread_M, +#elif defined(DOUBLE) + zhpmv_thread_U, zhpmv_thread_L, zhpmv_thread_V, zhpmv_thread_M, +#else + chpmv_thread_U, chpmv_thread_L, chpmv_thread_V, chpmv_thread_M, +#endif +}; +#endif + +#ifndef CBLAS + +void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, FLOAT *a, + FLOAT *x, blasint *INCX, FLOAT *BETA, FLOAT *y, blasint *INCY){ + + char uplo_arg = *UPLO; + blasint n = *N; + FLOAT alpha_r = ALPHA[0]; + FLOAT alpha_i = ALPHA[1]; + blasint incx = *INCX; + FLOAT beta_r = BETA[0]; + FLOAT beta_i = BETA[1]; + blasint incy = *INCY; + + blasint info; + int uplo; + FLOAT *buffer; +#ifdef SMP + int nthreads; +#endif + + PRINT_DEBUG_NAME; + + TOUPPER(uplo_arg); + uplo = -1; + + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + + info = 0; + + if (incy == 0) info = 9; + if (incx == 0) info = 6; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + + if (info != 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#else + +void CNAME(enum CBLAS_ORDER order, + enum CBLAS_UPLO Uplo, + blasint n, + FLOAT *ALPHA, + FLOAT *a, + FLOAT *x, blasint incx, + FLOAT *BETA, + FLOAT *y, blasint incy){ + + FLOAT alpha_r = ALPHA[0]; + FLOAT alpha_i = ALPHA[1]; + FLOAT beta_r = BETA[0]; + FLOAT beta_i = BETA[1]; + FLOAT *buffer; + int uplo; + blasint info; +#ifdef SMP + int nthreads; +#endif + + PRINT_DEBUG_CNAME; + + uplo = -1; + info = 0; + + if (order == CblasColMajor) { + if (Uplo == CblasUpper) uplo = 0; + if (Uplo == CblasLower) uplo = 1; + + info = -1; + + if (incy == 0) info = 9; + if (incx == 0) info = 6; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (order == CblasRowMajor) { + if (Uplo == CblasUpper) uplo = 3; + if (Uplo == CblasLower) uplo = 2; + + info = -1; + + if (incy == 0) info = 9; + if (incx == 0) info = 6; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (info >= 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#endif + + if (n == 0) return; + + if ((beta_r != ONE) || (beta_i != ZERO)) SCAL_K(n, 0, 0, beta_r, beta_i, y, abs(incy), NULL, 0, NULL, 0); + + if ((alpha_r == ZERO) && (alpha_i == ZERO)) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0 ) x -= (n - 1) * incx * 2; + if (incy < 0 ) y -= (n - 1) * incy * 2; + + buffer = (FLOAT *)blas_memory_alloc(1); + +#ifdef SMP + nthreads = num_cpu_avail(2); + + if (nthreads == 1) { +#endif + + (hpmv[uplo])(n, alpha_r, alpha_i, a, x, incx, y, incy, buffer); + +#ifdef SMP + } else { + + (hpmv_thread[uplo])(n, ALPHA, a, x, incx, y, incy, buffer, nthreads); + + } +#endif + + blas_memory_free(buffer); + + FUNCTION_PROFILE_END(4, n * n / 2 + n, n * n); + + IDEBUG_END; + + return; +} diff --git a/interface/zhpr.c b/interface/zhpr.c new file mode 100644 index 000000000..c48e35238 --- /dev/null +++ b/interface/zhpr.c @@ -0,0 +1,198 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "XHPR " +#elif defined(DOUBLE) +#define ERROR_NAME "ZHPR " +#else +#define ERROR_NAME "CHPR " +#endif + +static int (*hpr[])(BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, FLOAT *) = { +#ifdef XDOUBLE + xhpr_U, xhpr_L, xhpr_V, xhpr_M, +#elif defined(DOUBLE) + zhpr_U, zhpr_L, zhpr_V, zhpr_M, +#else + chpr_U, chpr_L, chpr_V, chpr_M, +#endif +}; + +#ifdef SMP +static int (*hpr_thread[])(BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, FLOAT *, int) = { +#ifdef XDOUBLE + xhpr_thread_U, xhpr_thread_L, xhpr_thread_V, xhpr_thread_M, +#elif defined(DOUBLE) + zhpr_thread_U, zhpr_thread_L, zhpr_thread_V, zhpr_thread_M, +#else + chpr_thread_U, chpr_thread_L, chpr_thread_V, chpr_thread_M, +#endif +}; +#endif + +#ifndef CBLAS + +void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, + FLOAT *x, blasint *INCX, FLOAT *a){ + + char uplo_arg = *UPLO; + blasint n = *N; + FLOAT alpha = *ALPHA; + blasint incx = *INCX; + + blasint info; + int uplo; + FLOAT *buffer; +#ifdef SMP + int nthreads; +#endif + + PRINT_DEBUG_NAME; + + TOUPPER(uplo_arg); + uplo = -1; + + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + + info = 0; + + if (incx == 0) info = 5; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + + if (info != 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#else + +void CNAME(enum CBLAS_ORDER order, + enum CBLAS_UPLO Uplo, + blasint n, + FLOAT alpha, + FLOAT *x, blasint incx, + FLOAT *a) { + + FLOAT *buffer; + int uplo; + blasint info; +#ifdef SMP + int nthreads; +#endif + + PRINT_DEBUG_CNAME; + + uplo = -1; + info = 0; + + if (order == CblasColMajor) { + if (Uplo == CblasUpper) uplo = 0; + if (Uplo == CblasLower) uplo = 1; + + info = -1; + + if (incx == 0) info = 5; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (order == CblasRowMajor) { + if (Uplo == CblasUpper) uplo = 3; + if (Uplo == CblasLower) uplo = 2; + + info = -1; + + if (incx == 0) info = 5; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (info >= 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#endif + + if (n == 0) return; + + if (alpha == ZERO) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0 ) x -= (n - 1) * incx * 2; + + buffer = (FLOAT *)blas_memory_alloc(1); + +#ifdef SMP + nthreads = num_cpu_avail(2); + + if (nthreads == 1) { +#endif + + (hpr[uplo])(n, alpha, x, incx, a, buffer); + +#ifdef SMP + + } else { + + (hpr_thread[uplo])(n, alpha, x, incx, a, buffer, nthreads); + + } +#endif + + blas_memory_free(buffer); + + FUNCTION_PROFILE_END(4, n * n / 2 + n, n * n); + + IDEBUG_END; + + return; +} diff --git a/interface/zhpr2.c b/interface/zhpr2.c new file mode 100644 index 000000000..cf1d5f9fc --- /dev/null +++ b/interface/zhpr2.c @@ -0,0 +1,207 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "XHPR2 " +#elif defined(DOUBLE) +#define ERROR_NAME "ZHPR2 " +#else +#define ERROR_NAME "CHPR2 " +#endif + +static int (*hpr2[])(BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, FLOAT *) = { +#ifdef XDOUBLE + xhpr2_U, xhpr2_L, xhpr2_V, xhpr2_M, +#elif defined(DOUBLE) + zhpr2_U, zhpr2_L, zhpr2_V, zhpr2_M, +#else + chpr2_U, chpr2_L, chpr2_V, chpr2_M, +#endif +}; + +#ifdef SMP +static int (*hpr2_thread[])(BLASLONG, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, FLOAT *, int) = { +#ifdef XDOUBLE + xhpr2_thread_U, xhpr2_thread_L, xhpr2_thread_V, xhpr2_thread_M, +#elif defined(DOUBLE) + zhpr2_thread_U, zhpr2_thread_L, zhpr2_thread_V, zhpr2_thread_M, +#else + chpr2_thread_U, chpr2_thread_L, chpr2_thread_V, chpr2_thread_M, +#endif +}; +#endif + +#ifndef CBLAS + +void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, + FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY, FLOAT *a){ + + char uplo_arg = *UPLO; + blasint n = *N; + FLOAT alpha_r = ALPHA[0]; + FLOAT alpha_i = ALPHA[1]; + blasint incx = *INCX; + blasint incy = *INCY; + + blasint info; + int uplo; + FLOAT *buffer; +#ifdef SMP + int nthreads; +#endif + + PRINT_DEBUG_NAME; + + TOUPPER(uplo_arg); + uplo = -1; + + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + + info = 0; + + if (incy == 0) info = 7; + if (incx == 0) info = 5; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + + if (info != 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#else + +void CNAME(enum CBLAS_ORDER order, + enum CBLAS_UPLO Uplo, + blasint n, + FLOAT *ALPHA, + FLOAT *x, blasint incx, + FLOAT *y, blasint incy, + FLOAT *a) { + + FLOAT alpha_r = ALPHA[0]; + FLOAT alpha_i = ALPHA[1]; + FLOAT *buffer; + int uplo; + blasint info; +#ifdef SMP + int nthreads; +#endif + + PRINT_DEBUG_CNAME; + + uplo = -1; + info = 0; + + if (order == CblasColMajor) { + if (Uplo == CblasUpper) uplo = 0; + if (Uplo == CblasLower) uplo = 1; + + info = -1; + + if (incy == 0) info = 7; + if (incx == 0) info = 5; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (order == CblasRowMajor) { + if (Uplo == CblasUpper) uplo = 3; + if (Uplo == CblasLower) uplo = 2; + + info = -1; + + if (incx == 0) info = 7; + if (incy == 0) info = 5; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (info >= 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#endif + + if (n == 0) return; + + if ((alpha_r == ZERO) && (alpha_i == ZERO)) return; + + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0 ) x -= (n - 1) * incx * 2; + if (incy < 0 ) y -= (n - 1) * incy * 2; + + buffer = (FLOAT *)blas_memory_alloc(1); + +#ifdef SMP + nthreads = num_cpu_avail(2); + + if (nthreads == 1) { +#endif + + (hpr2[uplo])(n, alpha_r, alpha_i, x, incx, y, incy, a, buffer); + +#ifdef SMP + } else { + + (hpr2_thread[uplo])(n, ALPHA, x, incx, y, incy, a, buffer, nthreads); + + } +#endif + + blas_memory_free(buffer); + + FUNCTION_PROFILE_END(4, n * n / 2 + 2 * n, 2 * n * n); + + IDEBUG_END; + + return; +} diff --git a/interface/zlaswp.c b/interface/zlaswp.c new file mode 100644 index 000000000..85ead2c86 --- /dev/null +++ b/interface/zlaswp.c @@ -0,0 +1,108 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +static int (*laswp[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, blasint *, BLASLONG) = { +#ifdef XDOUBLE + xlaswp_plus, xlaswp_minus, +#elif defined(DOUBLE) + zlaswp_plus, zlaswp_minus, +#else + claswp_plus, claswp_minus, +#endif +}; + +int NAME(blasint *N, FLOAT *a, blasint *LDA, blasint *K1, blasint *K2, blasint *ipiv, blasint *INCX){ + + blasint n = *N; + blasint lda = *LDA; + blasint k1 = *K1; + blasint k2 = *K2; + blasint incx = *INCX; + int flag; + +#ifdef SMP + int mode; + FLOAT dummyalpha[2] = {ZERO, ZERO}; + int nthreads; +#endif + + PRINT_DEBUG_NAME; + + if (incx == 0 || n <= 0) return 0; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + flag = (incx < 0); + +#ifdef SMP + nthreads = num_cpu_avail(2); + + if (nthreads == 1) { +#endif + + (laswp[flag])(n, k1, k2, ZERO, ZERO, a, lda, NULL, 0, ipiv, incx); + +#ifdef SMP + } else { + +#ifdef XDOUBLE + mode = BLAS_XDOUBLE | BLAS_COMPLEX; +#elif defined(DOUBLE) + mode = BLAS_DOUBLE | BLAS_COMPLEX; +#else + mode = BLAS_SINGLE | BLAS_COMPLEX; +#endif + + blas_level1_thread(mode, n, k1, k2, dummyalpha, a, lda, NULL, 0, ipiv, incx, laswp[flag], nthreads); + } +#endif + + FUNCTION_PROFILE_END(COMPSIZE, n * (k2 - k1), 0); + + IDEBUG_END; + + return 0; +} diff --git a/interface/zlauu2.c b/interface/zlauu2.c new file mode 100644 index 000000000..05603fe1b --- /dev/null +++ b/interface/zlauu2.c @@ -0,0 +1,129 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "QLAUU2" +#elif defined(DOUBLE) +#define ERROR_NAME "ZLAUU2" +#else +#define ERROR_NAME "CLAUU2" +#endif + +static blasint (*lauu2[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = { + +#ifdef XDOUBLE + xlauu2_U, xlauu2_L, +#elif defined(DOUBLE) + zlauu2_U, zlauu2_L, +#else + clauu2_U, clauu2_L, +#endif + }; + +int NAME(char *UPLO, blasint *N, FLOAT *a, blasint *ldA, blasint *Info){ + + blas_arg_t args; + + blasint uplo_arg = *UPLO; + blasint uplo; + blasint info; + FLOAT *buffer; +#ifdef PPC440 + extern +#endif + FLOAT *sa, *sb; + + PRINT_DEBUG_NAME; + + args.n = *N; + args.a = (void *)a; + args.lda = *ldA; + + TOUPPER(uplo_arg); + + uplo = -1; + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + + info = 0; + if (args.lda < MAX(1,args.n)) info = 4; + if (args.n < 0) info = 2; + if (uplo < 0) info = 1; + if (info) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + *Info = - info; + return 0; + } + + *Info = 0; + + if (args.n <= 0) return 0; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + +#ifndef PPC440 + buffer = (FLOAT *)blas_memory_alloc(1); + + sa = (FLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A); + sb = (FLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); +#endif + + info = (lauu2[uplo])(&args, NULL, NULL, sa, sb, 0); + + *Info = info; + +#ifndef PPC440 + blas_memory_free(buffer); +#endif + + FUNCTION_PROFILE_END(1, .5 * args.n * args.n, + 2. * args.n * (1./3. + args.n * ( 1./2. + args.n * 1./6.)) + + 6. * 1./6. * args.n * (args.n * args.n - 1)); + + IDEBUG_END; + + return 0; +} diff --git a/interface/zlauum.c b/interface/zlauum.c new file mode 100644 index 000000000..23990e8e4 --- /dev/null +++ b/interface/zlauum.c @@ -0,0 +1,141 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "XLAUUM" +#elif defined(DOUBLE) +#define ERROR_NAME "ZLAUUM" +#else +#define ERROR_NAME "CLAUUM" +#endif + +static blasint (*lauum_single[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = { + LAUUM_U_SINGLE, LAUUM_L_SINGLE, +}; + +#ifdef SMP +static blasint (*lauum_parallel[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = { + LAUUM_U_PARALLEL, LAUUM_L_PARALLEL, +}; +#endif + +int NAME(char *UPLO, blasint *N, FLOAT *a, blasint *ldA, blasint *Info){ + + blas_arg_t args; + + blasint uplo_arg = *UPLO; + blasint uplo; + blasint info; + FLOAT *buffer; +#ifdef PPC440 + extern +#endif + FLOAT *sa, *sb; + + PRINT_DEBUG_NAME; + + args.n = *N; + args.a = (void *)a; + args.lda = *ldA; + + TOUPPER(uplo_arg); + + uplo = -1; + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + + info = 0; + if (args.lda < MAX(1,args.n)) info = 4; + if (args.n < 0) info = 2; + if (uplo < 0) info = 1; + if (info) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + *Info = - info; + return 0; + } + + *Info = 0; + + if (args.n == 0) return 0; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + +#ifndef PPC440 + buffer = (FLOAT *)blas_memory_alloc(1); + + sa = (FLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A); + sb = (FLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); +#endif + +#ifdef SMP + args.common = NULL; + args.nthreads = num_cpu_avail(4); + + if (args.nthreads == 1) { +#endif + + *Info = (lauum_single[uplo])(&args, NULL, NULL, sa, sb, 0); + +#ifdef SMP + } else { + + *Info = (lauum_parallel[uplo])(&args, NULL, NULL, sa, sb, 0); + + } +#endif + +#ifndef PPC440 + blas_memory_free(buffer); +#endif + + FUNCTION_PROFILE_END(1, .5 * args.n * args.n, + 2. * args.n * (1./3. + args.n * ( 1./2. + args.n * 1./6.)) + + args.n * (args.n * args.n - 1)); + + IDEBUG_END; + + return 0; +} diff --git a/interface/zpotf2.c b/interface/zpotf2.c new file mode 100644 index 000000000..f8f81e2c5 --- /dev/null +++ b/interface/zpotf2.c @@ -0,0 +1,129 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "XPOTF2" +#elif defined(DOUBLE) +#define ERROR_NAME "ZPOTF2" +#else +#define ERROR_NAME "CPOTF2" +#endif + +static blasint (*potf2[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = { + +#ifdef XDOUBLE + xpotf2_U, xpotf2_L, +#elif defined(DOUBLE) + zpotf2_U, zpotf2_L, +#else + cpotf2_U, cpotf2_L, +#endif + }; + +int NAME(char *UPLO, blasint *N, FLOAT *a, blasint *ldA, blasint *Info){ + + blas_arg_t args; + + blasint uplo_arg = *UPLO; + blasint uplo; + blasint info; + FLOAT *buffer; +#ifdef PPC440 + extern +#endif + FLOAT *sa, *sb; + + PRINT_DEBUG_NAME; + + args.n = *N; + args.a = (void *)a; + args.lda = *ldA; + + TOUPPER(uplo_arg); + + uplo = -1; + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + + info = 0; + if (args.lda < MAX(1,args.n)) info = 4; + if (args.n < 0) info = 2; + if (uplo < 0) info = 1; + if (info) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + *Info = - info; + return 0; + } + + *Info = 0; + + if (args.n <= 0) return 0; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + +#ifndef PPC440 + buffer = (FLOAT *)blas_memory_alloc(1); + + sa = (FLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A); + sb = (FLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); +#endif + + info = (potf2[uplo])(&args, NULL, NULL, sa, sb, 0); + + *Info = info; + +#ifndef PPC440 + blas_memory_free(buffer); +#endif + + FUNCTION_PROFILE_END(1, .5 * args.n * args.n, + 2. * args.n * (1./3. + args.n * ( 1./2. + args.n * 1./6.)) + + 6. * 1./6. * args.n * (args.n * args.n - 1)); + + IDEBUG_END; + + return 0; +} diff --git a/interface/zpotrf.c b/interface/zpotrf.c new file mode 100644 index 000000000..e2004d744 --- /dev/null +++ b/interface/zpotrf.c @@ -0,0 +1,141 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "XPOTRF" +#elif defined(DOUBLE) +#define ERROR_NAME "ZPOTRF" +#else +#define ERROR_NAME "CPOTRF" +#endif + +static blasint (*potrf_single[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) ={ + POTRF_U_SINGLE, POTRF_L_SINGLE, +}; + +#ifdef SMP +static blasint (*potrf_parallel[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) ={ + POTRF_U_PARALLEL, POTRF_L_PARALLEL, +}; +#endif + +int NAME(char *UPLO, blasint *N, FLOAT *a, blasint *ldA, blasint *Info){ + + blas_arg_t args; + + blasint uplo_arg = *UPLO; + blasint uplo; + blasint info; + FLOAT *buffer; +#ifdef PPC440 + extern +#endif + FLOAT *sa, *sb; + + PRINT_DEBUG_NAME; + + args.n = *N; + args.a = (void *)a; + args.lda = *ldA; + + TOUPPER(uplo_arg); + + uplo = -1; + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + + info = 0; + if (args.lda < MAX(1,args.n)) info = 4; + if (args.n < 0) info = 2; + if (uplo < 0) info = 1; + if (info) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + *Info = - info; + return 0; + } + + *Info = 0; + + if (args.n == 0) return 0; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + +#ifndef PPC440 + buffer = (FLOAT *)blas_memory_alloc(1); + + sa = (FLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A); + sb = (FLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); +#endif + +#ifdef SMP + args.common = NULL; + args.nthreads = num_cpu_avail(4); + + if (args.nthreads == 1) { +#endif + + *Info = (potrf_single[uplo])(&args, NULL, NULL, sa, sb, 0); + +#ifdef SMP + } else { + + *Info = (potrf_parallel[uplo])(&args, NULL, NULL, sa, sb, 0); + + } +#endif + +#ifndef PPC440 + blas_memory_free(buffer); +#endif + + FUNCTION_PROFILE_END(1, .5 * args.n * args.n, + 2. * args.n * (1./3. + args.n * ( 1./2. + args.n * 1./6.)) + + 6. * 1./6. * args.n * (args.n * args.n - 1)); + + IDEBUG_END; + + return 0; +} diff --git a/interface/zpotri.c b/interface/zpotri.c new file mode 100644 index 000000000..df325424e --- /dev/null +++ b/interface/zpotri.c @@ -0,0 +1,157 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "XPOTRI" +#elif defined(DOUBLE) +#define ERROR_NAME "ZPOTRI" +#else +#define ERROR_NAME "CPOTRI" +#endif + +static blasint (*trtri_single[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) ={ + TRTRI_UN_SINGLE, TRTRI_LN_SINGLE, +}; + +static blasint (*lauum_single[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) ={ + LAUUM_U_SINGLE, LAUUM_L_SINGLE, +}; + +#ifdef SMP +static blasint (*trtri_parallel[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) ={ + TRTRI_UN_PARALLEL, TRTRI_LN_PARALLEL, +}; + +static blasint (*lauum_parallel[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) ={ + LAUUM_U_PARALLEL, LAUUM_L_PARALLEL, +}; +#endif + +int NAME(char *UPLO, blasint *N, FLOAT *a, blasint *ldA, blasint *Info){ + + blas_arg_t args; + + blasint uplo_arg = *UPLO; + blasint uplo; + blasint info; + FLOAT *buffer; +#ifdef PPC440 + extern +#endif + FLOAT *sa, *sb; + + PRINT_DEBUG_NAME; + + args.n = *N; + args.a = (void *)a; + args.lda = *ldA; + + TOUPPER(uplo_arg); + + uplo = -1; + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + + info = 0; + if (args.lda < MAX(1,args.n)) info = 4; + if (args.n < 0) info = 2; + if (uplo < 0) info = 1; + + if (info) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + *Info = - info; + return 0; + } + + *Info = 0; + + if (args.n == 0) return 0; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + +#ifndef PPC440 + buffer = (FLOAT *)blas_memory_alloc(1); + + sa = (FLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A); + sb = (FLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); +#endif + +#ifdef SMP + args.nthreads = num_cpu_avail(4); + + if (args.nthreads == 1) { +#endif + + info = (trtri_single[uplo])(&args, NULL, NULL, sa, sb, 0); + + if (!info) { + info = (lauum_single[uplo])(&args, NULL, NULL, sa, sb, 0); + } + + *Info = info; + +#ifdef SMP + } else { + info = (trtri_parallel[uplo])(&args, NULL, NULL, sa, sb, 0); + + if (!info) { + info = (lauum_parallel[uplo])(&args, NULL, NULL, sa, sb, 0); + } + + *Info = info; + } +#endif + +#ifndef PPC440 + blas_memory_free(buffer); +#endif + + FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, args.m * args.n, 2. / 3. * args.m * args.n * args.n); + + IDEBUG_END; + + return 0; +} diff --git a/interface/zrot.c b/interface/zrot.c new file mode 100644 index 000000000..f18bbc6d1 --- /dev/null +++ b/interface/zrot.c @@ -0,0 +1,72 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +void NAME(blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY, FLOAT *C, FLOAT *S){ + + BLASLONG n = *N; + BLASLONG incx = *INCX; + BLASLONG incy = *INCY; + FLOAT c = *C; + FLOAT s = *S; + + PRINT_DEBUG_NAME; + + if (n <= 0) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0) x -= (n - 1) * 2 * incx; + if (incy < 0) y -= (n - 1) * 2 * incy; + + ROT_K(n, x, incx, y, incy, c, s); + + FUNCTION_PROFILE_END(4, n, n); + + IDEBUG_END; + + return; + +} diff --git a/interface/zrotg.c b/interface/zrotg.c new file mode 100644 index 000000000..e9e8a11df --- /dev/null +++ b/interface/zrotg.c @@ -0,0 +1,115 @@ +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){ + + PRINT_DEBUG_NAME; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + +#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) + + long double da_r = *(DA + 0); + long double da_i = *(DA + 1); + long double db_r = *(DB + 0); + long double db_i = *(DB + 1); + long double r; + + long double ada = fabs(da_r) + fabs(da_i); + + if (ada == ZERO) { + *C = ZERO; + *(S + 0) = ONE; + *(S + 1) = ZERO; + *(DA + 0) = db_r; + *(DA + 1) = db_i; + } else { + long double alpha_r, alpha_i; + + ada = sqrt(da_r * da_r + da_i * da_i); + + r = sqrt(da_r * da_r + da_i * da_i + db_r * db_r + db_i * db_i); + + alpha_r = da_r / ada; + alpha_i = da_i / ada; + + *(C + 0) = ada / r; + *(S + 0) = (alpha_r * db_r + alpha_i *db_i) / r; + *(S + 1) = (alpha_i * db_r - alpha_r *db_i) / r; + *(DA + 0) = alpha_r * r; + *(DA + 1) = alpha_i * r; + } +#else + FLOAT da_r = *(DA + 0); + FLOAT da_i = *(DA + 1); + FLOAT db_r = *(DB + 0); + FLOAT db_i = *(DB + 1); + FLOAT r; + + FLOAT ada = fabs(da_r) + fabs(da_i); + FLOAT adb; + + if (ada == ZERO) { + *C = ZERO; + *(S + 0) = ONE; + *(S + 1) = ZERO; + *(DA + 0) = db_r; + *(DA + 1) = db_i; + } else { + FLOAT scale; + FLOAT aa_r, aa_i, bb_r, bb_i; + FLOAT alpha_r, alpha_i; + + aa_r = fabs(da_r); + aa_i = fabs(da_i); + + if (aa_i > aa_r) { + aa_r = fabs(da_i); + aa_i = fabs(da_r); + } + + scale = (aa_i / aa_r); + ada = aa_r * sqrt(ONE + scale * scale); + + bb_r = fabs(db_r); + bb_i = fabs(db_i); + + if (bb_i > bb_r) { + bb_r = fabs(bb_i); + bb_i = fabs(bb_r); + } + + scale = (bb_i / bb_r); + adb = bb_r * sqrt(ONE + scale * scale); + + scale = ada + adb; + + aa_r = da_r / scale; + aa_i = da_i / scale; + bb_r = db_r / scale; + bb_i = db_i / scale; + + r = scale * sqrt(aa_r * aa_r + aa_i * aa_i + bb_r * bb_r + bb_i * bb_i); + + alpha_r = da_r / ada; + alpha_i = da_i / ada; + + *(C + 0) = ada / r; + *(S + 0) = (alpha_r * db_r + alpha_i *db_i) / r; + *(S + 1) = (alpha_i * db_r - alpha_r *db_i) / r; + *(DA + 0) = alpha_r * r; + *(DA + 1) = alpha_i * r; + } +#endif + + FUNCTION_PROFILE_END(4, 4, 4); + + IDEBUG_END; + + return; +} diff --git a/interface/zsbmv.c b/interface/zsbmv.c new file mode 100644 index 000000000..71c03a660 --- /dev/null +++ b/interface/zsbmv.c @@ -0,0 +1,157 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "XSBMV " +#elif defined(DOUBLE) +#define ERROR_NAME "ZSBMV " +#else +#define ERROR_NAME "CSBMV " +#endif + +static int (*sbmv[])(BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, void *) = { +#ifdef XDOUBLE + xsbmv_U, xsbmv_L, +#elif defined(DOUBLE) + zsbmv_U, zsbmv_L, +#else + csbmv_U, csbmv_L, +#endif +}; + +#ifdef SMP +static int (*sbmv_thread[])(BLASLONG, BLASLONG, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = { +#ifdef XDOUBLE + xsbmv_thread_U, xsbmv_thread_L, +#elif defined(DOUBLE) + zsbmv_thread_U, zsbmv_thread_L, +#else + csbmv_thread_U, csbmv_thread_L, +#endif +}; +#endif + +void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint *LDA, + FLOAT *b, blasint *INCX, FLOAT *BETA, FLOAT *c, blasint *INCY){ + + char uplo_arg = *UPLO; + blasint n = *N; + blasint k = *K; + FLOAT alpha_r = ALPHA[0]; + FLOAT alpha_i = ALPHA[1]; + blasint lda = *LDA; + blasint incx = *INCX; + FLOAT beta_r = BETA[0]; + FLOAT beta_i = BETA[1]; + blasint incy = *INCY; + + blasint info; + int uplo; + FLOAT *buffer; +#ifdef SMP + int nthreads; +#endif + + PRINT_DEBUG_NAME; + + TOUPPER(uplo_arg); + uplo = -1; + + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + + info = 0; + + if (incy == 0) info = 11; + if (incx == 0) info = 8; + if (lda < k + 1) info = 6; + if (k < 0) info = 3; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + + if (info != 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + + if (n == 0) return; + + if ((beta_r != ONE) || (beta_i != ZERO)) SCAL_K(n, 0, 0, beta_r, beta_i, c, abs(incy), NULL, 0, NULL, 0); + + if ((alpha_r == ZERO) && (alpha_i == ZERO)) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0 ) b -= (n - 1) * incx * COMPSIZE; + if (incy < 0 ) c -= (n - 1) * incy * COMPSIZE; + + buffer = (FLOAT *)blas_memory_alloc(1); + +#ifdef SMP + nthreads = num_cpu_avail(2); + + if (nthreads == 1) { +#endif + + (sbmv[uplo])(n, k, alpha_r, alpha_i, a, lda, b, incx, c, incy, buffer); + +#ifdef SMP + } else { + + (sbmv_thread[uplo])(n, k, ALPHA, a, lda, b, incx, c, incy, buffer, nthreads); + + } +#endif + + blas_memory_free(buffer); + + FUNCTION_PROFILE_END(4, n * k / 2 + n, n * k); + + IDEBUG_END; + + return; +} diff --git a/interface/zscal.c b/interface/zscal.c new file mode 100644 index 000000000..ad99874dc --- /dev/null +++ b/interface/zscal.c @@ -0,0 +1,117 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifndef CBLAS + +void NAME(blasint *N, FLOAT *ALPHA, FLOAT *x, blasint *INCX){ + + blasint n = *N; + blasint incx = *INCX; + +#ifndef SSCAL + FLOAT *alpha=ALPHA; +#else + FLOAT alpha[2] = {ALPHA[0], ZERO}; +#endif + +#else + +#ifndef SSCAL +void CNAME(blasint n, FLOAT *ALPHA, FLOAT *x, blasint incx){ + + FLOAT *alpha=ALPHA; +#else +void CNAME(blasint n, FLOAT alpha_r, FLOAT *x, blasint incx){ + + FLOAT alpha[2] = {alpha_r, ZERO}; +#endif +#endif + +#ifdef SMP + int mode; + int nthreads; +#endif + +#ifndef CBLAS + PRINT_DEBUG_NAME; +#else + PRINT_DEBUG_CNAME; +#endif + + if (incx <= 0 || n <= 0) return; + + if ((alpha[0] == ONE) && (alpha[1] == ZERO)) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + +#ifdef SMP + nthreads = num_cpu_avail(1); + + if (nthreads == 1) { +#endif + + SCAL_K(n, 0, 0, alpha[0], alpha[1], x, incx, NULL, 0, NULL, 0); + +#ifdef SMP + } else { +#ifdef DOUBLE + mode = BLAS_DOUBLE | BLAS_COMPLEX; +#else + mode = BLAS_SINGLE | BLAS_COMPLEX; +#endif + + blas_level1_thread(mode, n, 0, 0, alpha, x, incx, NULL, 0, NULL, 0, (void *)SCAL_K, nthreads); + + } +#endif + + FUNCTION_PROFILE_END(4, n, n); + + IDEBUG_END; + + return; + +} diff --git a/interface/zspmv.c b/interface/zspmv.c new file mode 100644 index 000000000..ecf1af586 --- /dev/null +++ b/interface/zspmv.c @@ -0,0 +1,154 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "ZSPMV " +#elif defined(DOUBLE) +#define ERROR_NAME "ZSPMV " +#else +#define ERROR_NAME "CSPMV " +#endif + +static int (*spmv[])(BLASLONG, FLOAT, FLOAT, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, void *) = { +#ifdef XDOUBLE + xspmv_U, xspmv_L, +#elif defined(DOUBLE) + zspmv_U, zspmv_L, +#else + cspmv_U, cspmv_L, +#endif +}; + +#ifdef SMP +static int (*spmv_thread[])(BLASLONG, FLOAT *, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = { +#ifdef XDOUBLE + xspmv_thread_U, xspmv_thread_L, +#elif defined(DOUBLE) + zspmv_thread_U, zspmv_thread_L, +#else + cspmv_thread_U, cspmv_thread_L, +#endif +}; +#endif + +void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, FLOAT *a, + FLOAT *b, blasint *INCX, FLOAT *BETA, FLOAT *c, blasint *INCY){ + + char uplo_arg = *UPLO; + blasint n = *N; + FLOAT alpha_r = ALPHA[0]; + FLOAT alpha_i = ALPHA[1]; + blasint incx = *INCX; + FLOAT beta_r = BETA[0]; + FLOAT beta_i = BETA[1]; + blasint incy = *INCY; + + blasint info; + int uplo; + FLOAT *buffer; +#ifdef SMP + int nthreads; +#endif + + PRINT_DEBUG_NAME; + + TOUPPER(uplo_arg); + uplo = -1; + + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + + info = 0; + + if (incy == 0) info = 9; + if (incx == 0) info = 6; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + + if (info != 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + + if (n == 0) return; + + if ((beta_r != ONE) || (beta_i != ZERO)) SCAL_K(n, 0, 0, beta_r, beta_i, c, abs(incy), NULL, 0, NULL, 0); + + if ((alpha_r == ZERO) && (alpha_i == ZERO)) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0 ) b -= (n - 1) * incx * COMPSIZE; + if (incy < 0 ) c -= (n - 1) * incy * COMPSIZE; + + buffer = (FLOAT *)blas_memory_alloc(1); + +#ifdef SMP + nthreads = num_cpu_avail(2); + + if (nthreads == 1) { +#endif + + (spmv[uplo])(n, alpha_r, alpha_i, a, b, incx, c, incy, buffer); + +#ifdef SMP + + } else { + + (spmv_thread[uplo])(n, ALPHA, a, b, incx, c, incy, buffer, nthreads); + + } +#endif + + blas_memory_free(buffer); + + FUNCTION_PROFILE_END(4, n * n / 2 + n, n * n); + + IDEBUG_END; + + return; +} diff --git a/interface/zspr.c b/interface/zspr.c new file mode 100644 index 000000000..0021bcda4 --- /dev/null +++ b/interface/zspr.c @@ -0,0 +1,146 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "XSPR " +#elif defined(DOUBLE) +#define ERROR_NAME "ZSPR " +#else +#define ERROR_NAME "CSPR " +#endif + +static int (*spr[])(BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, FLOAT *, FLOAT *) = { +#ifdef XDOUBLE + xspr_U, xspr_L, +#elif defined(DOUBLE) + zspr_U, zspr_L, +#else + cspr_U, cspr_L, +#endif +}; + +#ifdef SMP +static int (*spr_thread[])(BLASLONG, FLOAT *, FLOAT *, BLASLONG, FLOAT *, FLOAT *, int) = { +#ifdef XDOUBLE + xspr_thread_U, xspr_thread_L, +#elif defined(DOUBLE) + zspr_thread_U, zspr_thread_L, +#else + cspr_thread_U, cspr_thread_L, +#endif +}; +#endif + +void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, + FLOAT *x, blasint *INCX, FLOAT *a){ + + char uplo_arg = *UPLO; + blasint n = *N; + FLOAT alpha_r = ALPHA[0]; + FLOAT alpha_i = ALPHA[1]; + blasint incx = *INCX; + + blasint info; + int uplo; + FLOAT *buffer; +#ifdef SMP + int nthreads; +#endif + + PRINT_DEBUG_NAME; + + TOUPPER(uplo_arg); + uplo = -1; + + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + + info = 0; + + if (incx == 0) info = 5; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + + if (info != 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + + if (n == 0) return; + + if ((alpha_r == ZERO) && (alpha_i == ZERO)) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0 ) x -= (n - 1) * incx; + + buffer = (FLOAT *)blas_memory_alloc(1); + +#ifdef SMP + nthreads = num_cpu_avail(2); + + if (nthreads == 1) { +#endif + + (spr[uplo])(n, alpha_r, alpha_i, x, incx, a, buffer); + +#ifdef SMP + } else { + + (spr_thread[uplo])(n, ALPHA, x, incx, a, buffer, nthreads); + + } +#endif + + blas_memory_free(buffer); + + FUNCTION_PROFILE_END(4, n * n / 2 + n, n * n); + + IDEBUG_END; + + return; +} diff --git a/interface/zspr2.c b/interface/zspr2.c new file mode 100644 index 000000000..b54e1651a --- /dev/null +++ b/interface/zspr2.c @@ -0,0 +1,149 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "XSPR2 " +#elif defined(DOUBLE) +#define ERROR_NAME "ZSPR2 " +#else +#define ERROR_NAME "CSPR2 " +#endif + +static int (*spr2[])(BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, FLOAT *) = { +#ifdef XDOUBLE + xspr2_U, xspr2_L, +#elif defined(DOUBLE) + zspr2_U, zspr2_L, +#else + cspr2_U, cspr2_L, +#endif +}; + +#ifdef SMP +static int (*spr2_thread[])(BLASLONG, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, FLOAT *, int) = { +#ifdef XDOUBLE + xspr2_thread_U, xspr2_thread_L, +#elif defined(DOUBLE) + zspr2_thread_U, zspr2_thread_L, +#else + cspr2_thread_U, cspr2_thread_L, +#endif +}; +#endif + +void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, + FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY, FLOAT *a){ + + char uplo_arg = *UPLO; + blasint n = *N; + FLOAT alpha_r = ALPHA[0]; + FLOAT alpha_i = ALPHA[1]; + blasint incx = *INCX; + blasint incy = *INCY; + + blasint info; + int uplo; + FLOAT *buffer; +#ifdef SMP + int nthreads; +#endif + + PRINT_DEBUG_NAME; + + TOUPPER(uplo_arg); + uplo = -1; + + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + + info = 0; + + if (incy == 0) info = 7; + if (incx == 0) info = 5; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + + if (info != 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + + if (n == 0) return; + + if ((alpha_r == ZERO) && (alpha_i == ZERO)) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0 ) x -= (n - 1) * incx; + if (incy < 0 ) y -= (n - 1) * incy; + + buffer = (FLOAT *)blas_memory_alloc(1); + +#ifdef SMP + nthreads = num_cpu_avail(2); + + if (nthreads == 1) { +#endif + + (spr2[uplo])(n, alpha_r, alpha_i, x, incx, y, incy, a, buffer); + +#ifdef SMP + } else { + + (spr2_thread[uplo])(n, ALPHA, x, incx, y, incy, a, buffer, nthreads); + + } +#endif + + blas_memory_free(buffer); + + FUNCTION_PROFILE_END(4, n * n / 2 + 2 * n, 2 * n * n); + + IDEBUG_END; + + return; +} diff --git a/interface/zswap.c b/interface/zswap.c new file mode 100644 index 000000000..f4a03a550 --- /dev/null +++ b/interface/zswap.c @@ -0,0 +1,111 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifndef CBLAS + +void NAME(blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY){ + + blasint n = *N; + blasint incx = *INCX; + blasint incy = *INCY; + +#else + +void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){ + +#endif + +#ifdef SMP + int mode; + FLOAT dummyalpha[2] = {ZERO, ZERO}; + int nthreads; +#endif + +#ifndef CBLAS + PRINT_DEBUG_NAME; +#else + PRINT_DEBUG_CNAME; +#endif + + if (n <= 0) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0) x -= (n - 1) * incx * 2; + if (incy < 0) y -= (n - 1) * incy * 2; + +#ifdef SMP + nthreads = num_cpu_avail(1); + + if (nthreads == 1) { +#endif + + SWAP_K(n, 0, 0, ZERO, ZERO, x, incx, y, incy, NULL, 0); + +#ifdef SMP + } else { + +#ifdef XDOUBLE + mode = BLAS_XDOUBLE | BLAS_COMPLEX; +#elif defined(DOUBLE) + mode = BLAS_DOUBLE | BLAS_COMPLEX; +#else + mode = BLAS_SINGLE | BLAS_COMPLEX; +#endif + + blas_level1_thread(mode, n, 0, 0, dummyalpha, + x, incx, y, incy, NULL, 0, (void *)SWAP_K, nthreads); + + } +#endif + + FUNCTION_PROFILE_END(2, 2 * n, 0); + + IDEBUG_END; + + return; + +} diff --git a/interface/zsymv.c b/interface/zsymv.c new file mode 100644 index 000000000..afb2c1734 --- /dev/null +++ b/interface/zsymv.c @@ -0,0 +1,143 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "XSYMV " +#elif defined(DOUBLE) +#define ERROR_NAME "ZSYMV " +#else +#define ERROR_NAME "CSYMV " +#endif + +void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, FLOAT *a, blasint *LDA, + FLOAT *b, blasint *INCX, FLOAT *BETA, FLOAT *c, blasint *INCY){ + + char uplo_arg = *UPLO; + blasint n = *N; + FLOAT alpha_r = ALPHA[0]; + FLOAT alpha_i = ALPHA[1]; + blasint lda = *LDA; + blasint incx = *INCX; + FLOAT beta_r = BETA[0]; + FLOAT beta_i = BETA[1]; + blasint incy = *INCY; + + int (*symv[])(BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *) = { + SYMV_U, SYMV_L, + }; + +#ifdef SMP + int (*symv_thread[])(BLASLONG, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = { + SYMV_THREAD_U, SYMV_THREAD_L, + }; +#endif + + blasint info; + int uplo; + FLOAT *buffer; +#ifdef SMP + int nthreads; +#endif + + PRINT_DEBUG_NAME; + + TOUPPER(uplo_arg); + uplo = -1; + + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + + info = 0; + + if (incy == 0) info = 10; + if (incx == 0) info = 7; + if (lda < MAX(1, n)) info = 5; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + + if (info != 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + + if (n == 0) return; + + if ((beta_r != ONE) || (beta_i != ZERO)) SCAL_K(n, 0, 0, beta_r, beta_i, c, abs(incy), NULL, 0, NULL, 0); + + if ((alpha_r == ZERO) && (alpha_i == ZERO)) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0 ) b -= (n - 1) * incx * COMPSIZE; + if (incy < 0 ) c -= (n - 1) * incy * COMPSIZE; + + buffer = (FLOAT *)blas_memory_alloc(1); + +#ifdef SMP + nthreads = num_cpu_avail(2); + + if (nthreads == 1) { +#endif + + (symv[uplo])(n, n, alpha_r, alpha_i, a, lda, b, incx, c, incy, buffer); + +#ifdef SMP + } else { + + (symv_thread[uplo])(n, ALPHA, a, lda, b, incx, c, incy, buffer, nthreads); + + } +#endif + + blas_memory_free(buffer); + + FUNCTION_PROFILE_END(4, n * n / 2 + 2 * n, 2 * n * n); + + IDEBUG_END; + + return; +} diff --git a/interface/zsyr.c b/interface/zsyr.c new file mode 100644 index 000000000..b6b5202ec --- /dev/null +++ b/interface/zsyr.c @@ -0,0 +1,203 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "XSYR " +#elif defined(DOUBLE) +#define ERROR_NAME "ZSYR " +#else +#define ERROR_NAME "CSYR " +#endif + +static int (*syr[])(BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *) = { +#ifdef XDOUBLE + xsyr_U, xsyr_L, +#elif defined(DOUBLE) + zsyr_U, zsyr_L, +#else + csyr_U, csyr_L, +#endif +}; + +#ifdef SMP +static int (*syr_thread[])(BLASLONG, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = { +#ifdef XDOUBLE + xsyr_thread_U, xsyr_thread_L, +#elif defined(DOUBLE) + zsyr_thread_U, zsyr_thread_L, +#else + csyr_thread_U, csyr_thread_L, +#endif +}; +#endif + + +#ifndef CBLAS + +void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, + FLOAT *x, blasint *INCX, FLOAT *a, blasint *LDA){ + + char uplo_arg = *UPLO; + blasint n = *N; + FLOAT alpha_r = ALPHA[0]; + FLOAT alpha_i = ALPHA[1]; + blasint lda = *LDA; + blasint incx = *INCX; + + blasint info; + int uplo; + FLOAT *buffer; +#ifdef SMP + int nthreads; +#endif + + PRINT_DEBUG_NAME; + + TOUPPER(uplo_arg); + uplo = -1; + + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + + info = 0; + + if (lda < MAX(1, n)) info = 7; + if (incx == 0) info = 5; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + + if (info != 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + + +#else + +void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, int n, FLOAT alpha, FLOAT *x, int incx, FLOAT *a, int lda) { + + FLOAT *buffer; + int trans, uplo; + blasint info; +#ifdef SMP + int nthreads; +#endif + + PRINT_DEBUG_CNAME; + + trans = -1; + uplo = -1; + info = 0; + + if (order == CblasColMajor) { + + if (Uplo == CblasUpper) uplo = 0; + if (Uplo == CblasLower) uplo = 1; + + info = -1; + + if (lda < MAX(1, n)) info = 7; + if (incx == 0) info = 5; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + + } + + if (order == CblasRowMajor) { + + if (Uplo == CblasUpper) uplo = 1; + if (Uplo == CblasLower) uplo = 0; + + info = -1; + + if (lda < MAX(1, n)) info = 7; + if (incx == 0) info = 5; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (info >= 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#endif + + if (n == 0) return; + + if ((alpha_r == ZERO) && (alpha_i == ZERO)) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0 ) x -= (n - 1) * incx; + + buffer = (FLOAT *)blas_memory_alloc(1); + +#ifdef SMP + nthreads = num_cpu_avail(2); + + if (nthreads == 1) { +#endif + + (syr[uplo])(n, alpha_r, alpha_i, x, incx, a, lda, buffer); + +#ifdef SMP + } else { + + (syr_thread[uplo])(n, ALPHA, x, incx, a, lda, buffer, nthreads); + + } +#endif + + blas_memory_free(buffer); + + FUNCTION_PROFILE_END(4, n * n / 2 + n, n * n); + + IDEBUG_END; + + return; +} diff --git a/interface/zsyr2.c b/interface/zsyr2.c new file mode 100644 index 000000000..0c705cb12 --- /dev/null +++ b/interface/zsyr2.c @@ -0,0 +1,151 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "QSYR2 " +#elif defined(DOUBLE) +#define ERROR_NAME "ZSYR2 " +#else +#define ERROR_NAME "CSYR2 " +#endif + +static int (*syr2[])(BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *) = { +#ifdef XDOUBLE + xsyr2_U, xsyr2_L, +#elif defined(DOUBLE) + zsyr2_U, zsyr2_L, +#else + csyr2_U, csyr2_L, +#endif +}; + +#ifdef SMP +static int (*syr2_thread[])(BLASLONG, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = { +#ifdef XDOUBLE + xsyr2_thread_U, xsyr2_thread_L, +#elif defined(DOUBLE) + zsyr2_thread_U, zsyr2_thread_L, +#else + csyr2_thread_U, csyr2_thread_L, +#endif +}; +#endif + +void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, + FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY, FLOAT *a, blasint *LDA){ + + char uplo_arg = *UPLO; + blasint n = *N; + FLOAT alpha_r = ALPHA[0]; + FLOAT alpha_i = ALPHA[1]; + blasint lda = *LDA; + blasint incx = *INCX; + blasint incy = *INCY; + + blasint info; + int uplo; + FLOAT *buffer; +#ifdef SMP + int nthreads; +#endif + + PRINT_DEBUG_NAME; + + TOUPPER(uplo_arg); + uplo = -1; + + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + + info = 0; + + if (lda < MAX(1, n)) info = 9; + if (incy == 0) info = 7; + if (incx == 0) info = 5; + if (n < 0) info = 2; + if (uplo < 0) info = 1; + + if (info != 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + + if (n == 0) return; + + if ((alpha_r == ZERO) && (alpha_i == ZERO)) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0 ) x -= (n - 1) * incx; + if (incy < 0 ) y -= (n - 1) * incy; + + buffer = (FLOAT *)blas_memory_alloc(1); + +#ifdef SMP + nthreads = num_cpu_avail(2); + + if (nthreads == 1) { +#endif + + (syr2[uplo])(n, alpha_r, alpha_i, x, incx, y, incy, a, lda, buffer); + +#ifdef SMP + } else { + + (syr2_thread[uplo])(n, ALPHA, x, incx, y, incy, a, lda, buffer, nthreads); + + } +#endif + + blas_memory_free(buffer); + + FUNCTION_PROFILE_END(4, n * n / 2 + 2 * n, 2 * n * n); + + IDEBUG_END; + + return; +} diff --git a/interface/ztbmv.c b/interface/ztbmv.c new file mode 100644 index 000000000..85f53c4be --- /dev/null +++ b/interface/ztbmv.c @@ -0,0 +1,260 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "XTBMV " +#elif defined(DOUBLE) +#define ERROR_NAME "ZTBMV " +#else +#define ERROR_NAME "CTBMV " +#endif + +static int (*tbmv[])(BLASLONG, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, void *) = { +#ifdef XDOUBLE + xtbmv_NUU, xtbmv_NUN, xtbmv_NLU, xtbmv_NLN, + xtbmv_TUU, xtbmv_TUN, xtbmv_TLU, xtbmv_TLN, + xtbmv_RUU, xtbmv_RUN, xtbmv_RLU, xtbmv_RLN, + xtbmv_CUU, xtbmv_CUN, xtbmv_CLU, xtbmv_CLN, +#elif defined(DOUBLE) + ztbmv_NUU, ztbmv_NUN, ztbmv_NLU, ztbmv_NLN, + ztbmv_TUU, ztbmv_TUN, ztbmv_TLU, ztbmv_TLN, + ztbmv_RUU, ztbmv_RUN, ztbmv_RLU, ztbmv_RLN, + ztbmv_CUU, ztbmv_CUN, ztbmv_CLU, ztbmv_CLN, +#else + ctbmv_NUU, ctbmv_NUN, ctbmv_NLU, ctbmv_NLN, + ctbmv_TUU, ctbmv_TUN, ctbmv_TLU, ctbmv_TLN, + ctbmv_RUU, ctbmv_RUN, ctbmv_RLU, ctbmv_RLN, + ctbmv_CUU, ctbmv_CUN, ctbmv_CLU, ctbmv_CLN, +#endif +}; + +#ifdef SMP +static int (*tbmv_thread[])(BLASLONG, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = { +#ifdef XDOUBLE + xtbmv_thread_NUU, xtbmv_thread_NUN, xtbmv_thread_NLU, xtbmv_thread_NLN, + xtbmv_thread_TUU, xtbmv_thread_TUN, xtbmv_thread_TLU, xtbmv_thread_TLN, + xtbmv_thread_RUU, xtbmv_thread_RUN, xtbmv_thread_RLU, xtbmv_thread_RLN, + xtbmv_thread_CUU, xtbmv_thread_CUN, xtbmv_thread_CLU, xtbmv_thread_CLN, +#elif defined(DOUBLE) + ztbmv_thread_NUU, ztbmv_thread_NUN, ztbmv_thread_NLU, ztbmv_thread_NLN, + ztbmv_thread_TUU, ztbmv_thread_TUN, ztbmv_thread_TLU, ztbmv_thread_TLN, + ztbmv_thread_RUU, ztbmv_thread_RUN, ztbmv_thread_RLU, ztbmv_thread_RLN, + ztbmv_thread_CUU, ztbmv_thread_CUN, ztbmv_thread_CLU, ztbmv_thread_CLN, +#else + ctbmv_thread_NUU, ctbmv_thread_NUN, ctbmv_thread_NLU, ctbmv_thread_NLN, + ctbmv_thread_TUU, ctbmv_thread_TUN, ctbmv_thread_TLU, ctbmv_thread_TLN, + ctbmv_thread_RUU, ctbmv_thread_RUN, ctbmv_thread_RLU, ctbmv_thread_RLN, + ctbmv_thread_CUU, ctbmv_thread_CUN, ctbmv_thread_CLU, ctbmv_thread_CLN, +#endif +}; +#endif + +#ifndef CBLAS + +void NAME(char *UPLO, char *TRANS, char *DIAG, + blasint *N, blasint *K, + FLOAT *a, blasint *LDA, FLOAT *x, blasint *INCX){ + + char uplo_arg = *UPLO; + char trans_arg = *TRANS; + char diag_arg = *DIAG; + + blasint n = *N; + blasint k = *K; + blasint lda = *LDA; + blasint incx = *INCX; + + blasint info; + int uplo; + int unit; + int trans; + FLOAT *buffer; +#ifdef SMP + int nthreads; +#endif + + PRINT_DEBUG_NAME; + + TOUPPER(uplo_arg); + TOUPPER(trans_arg); + TOUPPER(diag_arg); + + trans = -1; + unit = -1; + uplo = -1; + + if (trans_arg == 'N') trans = 0; + if (trans_arg == 'T') trans = 1; + if (trans_arg == 'R') trans = 2; + if (trans_arg == 'C') trans = 3; + + if (diag_arg == 'U') unit = 0; + if (diag_arg == 'N') unit = 1; + + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + + info = 0; + + if (incx == 0) info = 9; + if (lda < k + 1) info = 7; + if (k < 0) info = 5; + if (n < 0) info = 4; + if (unit < 0) info = 3; + if (trans < 0) info = 2; + if (uplo < 0) info = 1; + + if (info != 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#else + +void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + blasint n, blasint k, FLOAT *a, blasint lda, FLOAT *x, blasint incx) { + + int trans, uplo, unit; + blasint info; + FLOAT *buffer; +#ifdef SMP + int nthreads; +#endif + + PRINT_DEBUG_CNAME; + + unit = -1; + uplo = -1; + trans = -1; + info = 0; + + if (order == CblasColMajor) { + if (Uplo == CblasUpper) uplo = 0; + if (Uplo == CblasLower) uplo = 1; + + if (TransA == CblasNoTrans) trans = 0; + if (TransA == CblasTrans) trans = 1; + if (TransA == CblasConjNoTrans) trans = 2; + if (TransA == CblasConjTrans) trans = 3; + + if (Diag == CblasUnit) unit = 0; + if (Diag == CblasNonUnit) unit = 1; + + info = -1; + + if (incx == 0) info = 9; + if (lda < k + 1) info = 7; + if (k < 0) info = 5; + if (n < 0) info = 4; + if (unit < 0) info = 3; + if (trans < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (order == CblasRowMajor) { + if (Uplo == CblasUpper) uplo = 1; + if (Uplo == CblasLower) uplo = 0; + + if (TransA == CblasNoTrans) trans = 1; + if (TransA == CblasTrans) trans = 0; + if (TransA == CblasConjNoTrans) trans = 3; + if (TransA == CblasConjTrans) trans = 2; + + if (Diag == CblasUnit) unit = 0; + if (Diag == CblasNonUnit) unit = 1; + + info = -1; + + if (incx == 0) info = 9; + if (lda < k + 1) info = 7; + if (k < 0) info = 5; + if (n < 0) info = 4; + if (unit < 0) info = 3; + if (trans < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (info >= 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#endif + + if (n == 0) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0 ) x -= (n - 1) * incx * 2; + + buffer = (FLOAT *)blas_memory_alloc(1); + +#ifdef SMP + nthreads = num_cpu_avail(2); + + if (nthreads == 1) { +#endif + + (tbmv[(trans<<2) | (uplo<<1) | unit])(n, k, a, lda, x, incx, buffer); + +#ifdef SMP + } else { + + (tbmv_thread[(trans<<2) | (uplo<<1) | unit])(n, k, a, lda, x, incx, buffer, nthreads); + + } +#endif + + blas_memory_free(buffer); + + FUNCTION_PROFILE_END(4, n * k / 2 + n, n * k); + + IDEBUG_END; + + return; +} diff --git a/interface/ztbsv.c b/interface/ztbsv.c new file mode 100644 index 000000000..3846a4b3d --- /dev/null +++ b/interface/ztbsv.c @@ -0,0 +1,219 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "XTBSV " +#elif defined(DOUBLE) +#define ERROR_NAME "ZTBSV " +#else +#define ERROR_NAME "CTBSV " +#endif + +static int (*tbsv[])(BLASLONG, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, void *) = { +#ifdef XDOUBLE + xtbsv_NUU, xtbsv_NUN, xtbsv_NLU, xtbsv_NLN, + xtbsv_TUU, xtbsv_TUN, xtbsv_TLU, xtbsv_TLN, + xtbsv_RUU, xtbsv_RUN, xtbsv_RLU, xtbsv_RLN, + xtbsv_CUU, xtbsv_CUN, xtbsv_CLU, xtbsv_CLN, +#elif defined(DOUBLE) + ztbsv_NUU, ztbsv_NUN, ztbsv_NLU, ztbsv_NLN, + ztbsv_TUU, ztbsv_TUN, ztbsv_TLU, ztbsv_TLN, + ztbsv_RUU, ztbsv_RUN, ztbsv_RLU, ztbsv_RLN, + ztbsv_CUU, ztbsv_CUN, ztbsv_CLU, ztbsv_CLN, +#else + ctbsv_NUU, ctbsv_NUN, ctbsv_NLU, ctbsv_NLN, + ctbsv_TUU, ctbsv_TUN, ctbsv_TLU, ctbsv_TLN, + ctbsv_RUU, ctbsv_RUN, ctbsv_RLU, ctbsv_RLN, + ctbsv_CUU, ctbsv_CUN, ctbsv_CLU, ctbsv_CLN, +#endif +}; + +#ifndef CBLAS + +void NAME(char *UPLO, char *TRANS, char *DIAG, + blasint *N, blasint *K, + FLOAT *a, blasint *LDA, FLOAT *x, blasint *INCX){ + + char uplo_arg = *UPLO; + char trans_arg = *TRANS; + char diag_arg = *DIAG; + + blasint n = *N; + blasint k = *K; + blasint lda = *LDA; + blasint incx = *INCX; + + blasint info; + int uplo; + int unit; + int trans; + FLOAT *buffer; + + PRINT_DEBUG_NAME; + + TOUPPER(uplo_arg); + TOUPPER(trans_arg); + TOUPPER(diag_arg); + + trans = -1; + unit = -1; + uplo = -1; + + if (trans_arg == 'N') trans = 0; + if (trans_arg == 'T') trans = 1; + if (trans_arg == 'R') trans = 2; + if (trans_arg == 'C') trans = 3; + + if (diag_arg == 'U') unit = 0; + if (diag_arg == 'N') unit = 1; + + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + + info = 0; + + if (incx == 0) info = 9; + if (lda < k + 1) info = 7; + if (k < 0) info = 5; + if (n < 0) info = 4; + if (unit < 0) info = 3; + if (trans < 0) info = 2; + if (uplo < 0) info = 1; + + if (info != 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#else + +void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + blasint n, blasint k, FLOAT *a, blasint lda, FLOAT *x, blasint incx) { + + int trans, uplo, unit; + blasint info; + FLOAT *buffer; + + PRINT_DEBUG_CNAME; + + unit = -1; + uplo = -1; + trans = -1; + info = 0; + + if (order == CblasColMajor) { + if (Uplo == CblasUpper) uplo = 0; + if (Uplo == CblasLower) uplo = 1; + + if (TransA == CblasNoTrans) trans = 0; + if (TransA == CblasTrans) trans = 1; + if (TransA == CblasConjNoTrans) trans = 2; + if (TransA == CblasConjTrans) trans = 3; + + if (Diag == CblasUnit) unit = 0; + if (Diag == CblasNonUnit) unit = 1; + + info = -1; + + if (incx == 0) info = 9; + if (lda < k + 1) info = 7; + if (k < 0) info = 5; + if (n < 0) info = 4; + if (unit < 0) info = 3; + if (trans < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (order == CblasRowMajor) { + if (Uplo == CblasUpper) uplo = 1; + if (Uplo == CblasLower) uplo = 0; + + if (TransA == CblasNoTrans) trans = 1; + if (TransA == CblasTrans) trans = 0; + if (TransA == CblasConjNoTrans) trans = 3; + if (TransA == CblasConjTrans) trans = 2; + + if (Diag == CblasUnit) unit = 0; + if (Diag == CblasNonUnit) unit = 1; + + info = -1; + + if (incx == 0) info = 9; + if (lda < k + 1) info = 7; + if (k < 0) info = 5; + if (n < 0) info = 4; + if (unit < 0) info = 3; + if (trans < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (info >= 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#endif + + if (n == 0) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0 ) x -= (n - 1) * incx * 2; + + buffer = (FLOAT *)blas_memory_alloc(1); + + (tbsv[(trans<<2) | (uplo<<1) | unit])(n, k, a, lda, x, incx, buffer); + + blas_memory_free(buffer); + + FUNCTION_PROFILE_END(4, n * k / 2 + n, n * k); + + IDEBUG_END; + + return; +} diff --git a/interface/ztpmv.c b/interface/ztpmv.c new file mode 100644 index 000000000..2f9c48f5a --- /dev/null +++ b/interface/ztpmv.c @@ -0,0 +1,252 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "XTPMV " +#elif defined(DOUBLE) +#define ERROR_NAME "ZTPMV " +#else +#define ERROR_NAME "CTPMV " +#endif + +static int (*tpmv[])(BLASLONG, FLOAT *, FLOAT *, BLASLONG, void *) = { +#ifdef XDOUBLE + xtpmv_NUU, xtpmv_NUN, xtpmv_NLU, xtpmv_NLN, + xtpmv_TUU, xtpmv_TUN, xtpmv_TLU, xtpmv_TLN, + xtpmv_RUU, xtpmv_RUN, xtpmv_RLU, xtpmv_RLN, + xtpmv_CUU, xtpmv_CUN, xtpmv_CLU, xtpmv_CLN, +#elif defined(DOUBLE) + ztpmv_NUU, ztpmv_NUN, ztpmv_NLU, ztpmv_NLN, + ztpmv_TUU, ztpmv_TUN, ztpmv_TLU, ztpmv_TLN, + ztpmv_RUU, ztpmv_RUN, ztpmv_RLU, ztpmv_RLN, + ztpmv_CUU, ztpmv_CUN, ztpmv_CLU, ztpmv_CLN, +#else + ctpmv_NUU, ctpmv_NUN, ctpmv_NLU, ctpmv_NLN, + ctpmv_TUU, ctpmv_TUN, ctpmv_TLU, ctpmv_TLN, + ctpmv_RUU, ctpmv_RUN, ctpmv_RLU, ctpmv_RLN, + ctpmv_CUU, ctpmv_CUN, ctpmv_CLU, ctpmv_CLN, +#endif +}; + +#ifdef SMP +static int (*tpmv_thread[])(BLASLONG, FLOAT *, FLOAT *, BLASLONG, FLOAT *, int) = { +#ifdef XDOUBLE + xtpmv_thread_NUU, xtpmv_thread_NUN, xtpmv_thread_NLU, xtpmv_thread_NLN, + xtpmv_thread_TUU, xtpmv_thread_TUN, xtpmv_thread_TLU, xtpmv_thread_TLN, + xtpmv_thread_RUU, xtpmv_thread_RUN, xtpmv_thread_RLU, xtpmv_thread_RLN, + xtpmv_thread_CUU, xtpmv_thread_CUN, xtpmv_thread_CLU, xtpmv_thread_CLN, +#elif defined(DOUBLE) + ztpmv_thread_NUU, ztpmv_thread_NUN, ztpmv_thread_NLU, ztpmv_thread_NLN, + ztpmv_thread_TUU, ztpmv_thread_TUN, ztpmv_thread_TLU, ztpmv_thread_TLN, + ztpmv_thread_RUU, ztpmv_thread_RUN, ztpmv_thread_RLU, ztpmv_thread_RLN, + ztpmv_thread_CUU, ztpmv_thread_CUN, ztpmv_thread_CLU, ztpmv_thread_CLN, +#else + ctpmv_thread_NUU, ctpmv_thread_NUN, ctpmv_thread_NLU, ctpmv_thread_NLN, + ctpmv_thread_TUU, ctpmv_thread_TUN, ctpmv_thread_TLU, ctpmv_thread_TLN, + ctpmv_thread_RUU, ctpmv_thread_RUN, ctpmv_thread_RLU, ctpmv_thread_RLN, + ctpmv_thread_CUU, ctpmv_thread_CUN, ctpmv_thread_CLU, ctpmv_thread_CLN, +#endif +}; +#endif + +#ifndef CBLAS + +void NAME(char *UPLO, char *TRANS, char *DIAG, + blasint *N, FLOAT *a, FLOAT *x, blasint *INCX){ + + char uplo_arg = *UPLO; + char trans_arg = *TRANS; + char diag_arg = *DIAG; + + blasint n = *N; + blasint incx = *INCX; + + blasint info; + int uplo; + int unit; + int trans; + FLOAT *buffer; +#ifdef SMP + int nthreads; +#endif + + PRINT_DEBUG_NAME; + + TOUPPER(uplo_arg); + TOUPPER(trans_arg); + TOUPPER(diag_arg); + + trans = -1; + unit = -1; + uplo = -1; + + if (trans_arg == 'N') trans = 0; + if (trans_arg == 'T') trans = 1; + if (trans_arg == 'R') trans = 2; + if (trans_arg == 'C') trans = 3; + + if (diag_arg == 'U') unit = 0; + if (diag_arg == 'N') unit = 1; + + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + + info = 0; + + if (incx == 0) info = 7; + if (n < 0) info = 4; + if (unit < 0) info = 3; + if (trans < 0) info = 2; + if (uplo < 0) info = 1; + + if (info != 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#else + +void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + blasint n, FLOAT *a, FLOAT *x, blasint incx) { + + int trans, uplo, unit; + blasint info; + FLOAT *buffer; + + PRINT_DEBUG_CNAME; + + unit = -1; + uplo = -1; + trans = -1; + info = 0; +#ifdef SMP + int nthreads; +#endif + + if (order == CblasColMajor) { + if (Uplo == CblasUpper) uplo = 0; + if (Uplo == CblasLower) uplo = 1; + + if (TransA == CblasNoTrans) trans = 0; + if (TransA == CblasTrans) trans = 1; + if (TransA == CblasConjNoTrans) trans = 2; + if (TransA == CblasConjTrans) trans = 3; + + if (Diag == CblasUnit) unit = 0; + if (Diag == CblasNonUnit) unit = 1; + + info = -1; + + if (incx == 0) info = 7; + if (n < 0) info = 4; + if (unit < 0) info = 3; + if (trans < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (order == CblasRowMajor) { + if (Uplo == CblasUpper) uplo = 1; + if (Uplo == CblasLower) uplo = 0; + + if (TransA == CblasNoTrans) trans = 1; + if (TransA == CblasTrans) trans = 0; + if (TransA == CblasConjNoTrans) trans = 3; + if (TransA == CblasConjTrans) trans = 2; + + if (Diag == CblasUnit) unit = 0; + if (Diag == CblasNonUnit) unit = 1; + + info = -1; + + if (incx == 0) info = 7; + if (n < 0) info = 4; + if (unit < 0) info = 3; + if (trans < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (info >= 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#endif + + if (n == 0) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0 ) x -= (n - 1) * incx * 2; + + buffer = (FLOAT *)blas_memory_alloc(1); + +#ifdef SMP + nthreads = num_cpu_avail(2); + + if (nthreads == 1) { +#endif + + (tpmv[(trans<<2) | (uplo<<1) | unit])(n, a, x, incx, buffer); + +#ifdef SMP + + } else { + + (tpmv_thread[(trans<<2) | (uplo<<1) | unit])(n, a, x, incx, buffer, nthreads); + + } +#endif + + blas_memory_free(buffer); + + FUNCTION_PROFILE_END(4, n * n / 2 + n, n * n); + + IDEBUG_END; + + return; +} diff --git a/interface/ztpsv.c b/interface/ztpsv.c new file mode 100644 index 000000000..fde500e37 --- /dev/null +++ b/interface/ztpsv.c @@ -0,0 +1,210 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "XTPSV " +#elif defined(DOUBLE) +#define ERROR_NAME "ZTPSV " +#else +#define ERROR_NAME "CTPSV " +#endif + +static int (*tpsv[])(BLASLONG, FLOAT *, FLOAT *, BLASLONG, void *) = { +#ifdef XDOUBLE + xtpsv_NUU, xtpsv_NUN, xtpsv_NLU, xtpsv_NLN, + xtpsv_TUU, xtpsv_TUN, xtpsv_TLU, xtpsv_TLN, + xtpsv_RUU, xtpsv_RUN, xtpsv_RLU, xtpsv_RLN, + xtpsv_CUU, xtpsv_CUN, xtpsv_CLU, xtpsv_CLN, +#elif defined(DOUBLE) + ztpsv_NUU, ztpsv_NUN, ztpsv_NLU, ztpsv_NLN, + ztpsv_TUU, ztpsv_TUN, ztpsv_TLU, ztpsv_TLN, + ztpsv_RUU, ztpsv_RUN, ztpsv_RLU, ztpsv_RLN, + ztpsv_CUU, ztpsv_CUN, ztpsv_CLU, ztpsv_CLN, +#else + ctpsv_NUU, ctpsv_NUN, ctpsv_NLU, ctpsv_NLN, + ctpsv_TUU, ctpsv_TUN, ctpsv_TLU, ctpsv_TLN, + ctpsv_RUU, ctpsv_RUN, ctpsv_RLU, ctpsv_RLN, + ctpsv_CUU, ctpsv_CUN, ctpsv_CLU, ctpsv_CLN, +#endif +}; + +#ifndef CBLAS + +void NAME(char *UPLO, char *TRANS, char *DIAG, + blasint *N, FLOAT *a, FLOAT *x, blasint *INCX){ + + char uplo_arg = *UPLO; + char trans_arg = *TRANS; + char diag_arg = *DIAG; + + blasint n = *N; + blasint incx = *INCX; + + blasint info; + int uplo; + int unit; + int trans; + FLOAT *buffer; + + PRINT_DEBUG_NAME; + + TOUPPER(uplo_arg); + TOUPPER(trans_arg); + TOUPPER(diag_arg); + + trans = -1; + unit = -1; + uplo = -1; + + if (trans_arg == 'N') trans = 0; + if (trans_arg == 'T') trans = 1; + if (trans_arg == 'R') trans = 2; + if (trans_arg == 'C') trans = 3; + + if (diag_arg == 'U') unit = 0; + if (diag_arg == 'N') unit = 1; + + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + + info = 0; + + if (incx == 0) info = 7; + if (n < 0) info = 4; + if (unit < 0) info = 3; + if (trans < 0) info = 2; + if (uplo < 0) info = 1; + + if (info != 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#else + +void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + blasint n, FLOAT *a, FLOAT *x, blasint incx) { + + int trans, uplo, unit; + blasint info; + FLOAT *buffer; + + PRINT_DEBUG_CNAME; + + unit = -1; + uplo = -1; + trans = -1; + info = 0; + + if (order == CblasColMajor) { + if (Uplo == CblasUpper) uplo = 0; + if (Uplo == CblasLower) uplo = 1; + + if (TransA == CblasNoTrans) trans = 0; + if (TransA == CblasTrans) trans = 1; + if (TransA == CblasConjNoTrans) trans = 2; + if (TransA == CblasConjTrans) trans = 3; + + if (Diag == CblasUnit) unit = 0; + if (Diag == CblasNonUnit) unit = 1; + + info = -1; + + if (incx == 0) info = 7; + if (n < 0) info = 4; + if (unit < 0) info = 3; + if (trans < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (order == CblasRowMajor) { + if (Uplo == CblasUpper) uplo = 1; + if (Uplo == CblasLower) uplo = 0; + + if (TransA == CblasNoTrans) trans = 1; + if (TransA == CblasTrans) trans = 0; + if (TransA == CblasConjNoTrans) trans = 3; + if (TransA == CblasConjTrans) trans = 2; + + if (Diag == CblasUnit) unit = 0; + if (Diag == CblasNonUnit) unit = 1; + + info = -1; + + if (incx == 0) info = 7; + if (n < 0) info = 4; + if (unit < 0) info = 3; + if (trans < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (info >= 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#endif + + if (n == 0) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0 ) x -= (n - 1) * incx * 2; + + buffer = (FLOAT *)blas_memory_alloc(1); + + (tpsv[(trans<<2) | (uplo<<1) | unit])(n, a, x, incx, buffer); + + blas_memory_free(buffer); + + FUNCTION_PROFILE_END(4, n * n / 2 + n, n * n); + + IDEBUG_END; + + return; +} diff --git a/interface/ztrmv.c b/interface/ztrmv.c new file mode 100644 index 000000000..5a18a85b1 --- /dev/null +++ b/interface/ztrmv.c @@ -0,0 +1,255 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "XTRMV " +#elif defined(DOUBLE) +#define ERROR_NAME "ZTRMV " +#else +#define ERROR_NAME "CTRMV " +#endif + +static int (*trmv[])(BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *) = { +#ifdef XDOUBLE + xtrmv_NUU, xtrmv_NUN, xtrmv_NLU, xtrmv_NLN, + xtrmv_TUU, xtrmv_TUN, xtrmv_TLU, xtrmv_TLN, + xtrmv_RUU, xtrmv_RUN, xtrmv_RLU, xtrmv_RLN, + xtrmv_CUU, xtrmv_CUN, xtrmv_CLU, xtrmv_CLN, +#elif defined(DOUBLE) + ztrmv_NUU, ztrmv_NUN, ztrmv_NLU, ztrmv_NLN, + ztrmv_TUU, ztrmv_TUN, ztrmv_TLU, ztrmv_TLN, + ztrmv_RUU, ztrmv_RUN, ztrmv_RLU, ztrmv_RLN, + ztrmv_CUU, ztrmv_CUN, ztrmv_CLU, ztrmv_CLN, +#else + ctrmv_NUU, ctrmv_NUN, ctrmv_NLU, ctrmv_NLN, + ctrmv_TUU, ctrmv_TUN, ctrmv_TLU, ctrmv_TLN, + ctrmv_RUU, ctrmv_RUN, ctrmv_RLU, ctrmv_RLN, + ctrmv_CUU, ctrmv_CUN, ctrmv_CLU, ctrmv_CLN, +#endif +}; + +#ifdef SMP +static int (*trmv_thread[])(BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = { +#ifdef XDOUBLE + xtrmv_thread_NUU, xtrmv_thread_NUN, xtrmv_thread_NLU, xtrmv_thread_NLN, + xtrmv_thread_TUU, xtrmv_thread_TUN, xtrmv_thread_TLU, xtrmv_thread_TLN, + xtrmv_thread_RUU, xtrmv_thread_RUN, xtrmv_thread_RLU, xtrmv_thread_RLN, + xtrmv_thread_CUU, xtrmv_thread_CUN, xtrmv_thread_CLU, xtrmv_thread_CLN, +#elif defined(DOUBLE) + ztrmv_thread_NUU, ztrmv_thread_NUN, ztrmv_thread_NLU, ztrmv_thread_NLN, + ztrmv_thread_TUU, ztrmv_thread_TUN, ztrmv_thread_TLU, ztrmv_thread_TLN, + ztrmv_thread_RUU, ztrmv_thread_RUN, ztrmv_thread_RLU, ztrmv_thread_RLN, + ztrmv_thread_CUU, ztrmv_thread_CUN, ztrmv_thread_CLU, ztrmv_thread_CLN, +#else + ctrmv_thread_NUU, ctrmv_thread_NUN, ctrmv_thread_NLU, ctrmv_thread_NLN, + ctrmv_thread_TUU, ctrmv_thread_TUN, ctrmv_thread_TLU, ctrmv_thread_TLN, + ctrmv_thread_RUU, ctrmv_thread_RUN, ctrmv_thread_RLU, ctrmv_thread_RLN, + ctrmv_thread_CUU, ctrmv_thread_CUN, ctrmv_thread_CLU, ctrmv_thread_CLN, +#endif +}; +#endif + +#ifndef CBLAS + +void NAME(char *UPLO, char *TRANS, char *DIAG, + blasint *N, FLOAT *a, blasint *LDA, FLOAT *x, blasint *INCX){ + + char uplo_arg = *UPLO; + char trans_arg = *TRANS; + char diag_arg = *DIAG; + + blasint n = *N; + blasint lda = *LDA; + blasint incx = *INCX; + + blasint info; + int uplo; + int unit; + int trans; + FLOAT *buffer; +#ifdef SMP + int nthreads; +#endif + + PRINT_DEBUG_NAME; + + TOUPPER(uplo_arg); + TOUPPER(trans_arg); + TOUPPER(diag_arg); + + trans = -1; + unit = -1; + uplo = -1; + + if (trans_arg == 'N') trans = 0; + if (trans_arg == 'T') trans = 1; + if (trans_arg == 'R') trans = 2; + if (trans_arg == 'C') trans = 3; + + if (diag_arg == 'U') unit = 0; + if (diag_arg == 'N') unit = 1; + + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + + info = 0; + + if (incx == 0) info = 8; + if (lda < MAX(1, n)) info = 6; + if (n < 0) info = 4; + if (unit < 0) info = 3; + if (trans < 0) info = 2; + if (uplo < 0) info = 1; + + if (info != 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#else + +void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + blasint n, FLOAT *a, blasint lda, FLOAT *x, blasint incx) { + + int trans, uplo, unit; + blasint info; + FLOAT *buffer; +#ifdef SMP + int nthreads; +#endif + + PRINT_DEBUG_CNAME; + + unit = -1; + uplo = -1; + trans = -1; + info = 0; + + if (order == CblasColMajor) { + if (Uplo == CblasUpper) uplo = 0; + if (Uplo == CblasLower) uplo = 1; + + if (TransA == CblasNoTrans) trans = 0; + if (TransA == CblasTrans) trans = 1; + if (TransA == CblasConjNoTrans) trans = 2; + if (TransA == CblasConjTrans) trans = 3; + + if (Diag == CblasUnit) unit = 0; + if (Diag == CblasNonUnit) unit = 1; + + info = -1; + + if (incx == 0) info = 8; + if (lda < MAX(1, n)) info = 6; + if (n < 0) info = 4; + if (unit < 0) info = 3; + if (trans < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (order == CblasRowMajor) { + if (Uplo == CblasUpper) uplo = 1; + if (Uplo == CblasLower) uplo = 0; + + if (TransA == CblasNoTrans) trans = 1; + if (TransA == CblasTrans) trans = 0; + if (TransA == CblasConjNoTrans) trans = 3; + if (TransA == CblasConjTrans) trans = 2; + + if (Diag == CblasUnit) unit = 0; + if (Diag == CblasNonUnit) unit = 1; + + info = -1; + + if (incx == 0) info = 8; + if (lda < MAX(1, n)) info = 6; + if (n < 0) info = 4; + if (unit < 0) info = 3; + if (trans < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (info >= 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#endif + + if (n == 0) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0 ) x -= (n - 1) * incx * 2; + + buffer = (FLOAT *)blas_memory_alloc(1); + +#ifdef SMP + nthreads = num_cpu_avail(2); + + if (nthreads == 1) { +#endif + + (trmv[(trans<<2) | (uplo<<1) | unit])(n, a, lda, x, incx, buffer); + +#ifdef SMP + } else { + + (trmv_thread[(trans<<2) | (uplo<<1) | unit])(n, a, lda, x, incx, buffer, nthreads); + + } +#endif + + blas_memory_free(buffer); + + FUNCTION_PROFILE_END(4, n * n / 2 + n, n * n); + + IDEBUG_END; + + return; +} diff --git a/interface/ztrsv.c b/interface/ztrsv.c new file mode 100644 index 000000000..08f7dc68c --- /dev/null +++ b/interface/ztrsv.c @@ -0,0 +1,216 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "XTRSV " +#elif defined(DOUBLE) +#define ERROR_NAME "ZTRSV " +#else +#define ERROR_NAME "CTRSV " +#endif + +static int (*trsv[])(BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, void *) = { +#ifdef XDOUBLE + xtrsv_NUU, xtrsv_NUN, xtrsv_NLU, xtrsv_NLN, + xtrsv_TUU, xtrsv_TUN, xtrsv_TLU, xtrsv_TLN, + xtrsv_RUU, xtrsv_RUN, xtrsv_RLU, xtrsv_RLN, + xtrsv_CUU, xtrsv_CUN, xtrsv_CLU, xtrsv_CLN, +#elif defined(DOUBLE) + ztrsv_NUU, ztrsv_NUN, ztrsv_NLU, ztrsv_NLN, + ztrsv_TUU, ztrsv_TUN, ztrsv_TLU, ztrsv_TLN, + ztrsv_RUU, ztrsv_RUN, ztrsv_RLU, ztrsv_RLN, + ztrsv_CUU, ztrsv_CUN, ztrsv_CLU, ztrsv_CLN, +#else + ctrsv_NUU, ctrsv_NUN, ctrsv_NLU, ctrsv_NLN, + ctrsv_TUU, ctrsv_TUN, ctrsv_TLU, ctrsv_TLN, + ctrsv_RUU, ctrsv_RUN, ctrsv_RLU, ctrsv_RLN, + ctrsv_CUU, ctrsv_CUN, ctrsv_CLU, ctrsv_CLN, +#endif +}; + +#ifndef CBLAS + +void NAME(char *UPLO, char *TRANS, char *DIAG, + blasint *N, FLOAT *a, blasint *LDA, FLOAT *x, blasint *INCX){ + + char uplo_arg = *UPLO; + char trans_arg = *TRANS; + char diag_arg = *DIAG; + + blasint n = *N; + blasint lda = *LDA; + blasint incx = *INCX; + + blasint info; + int uplo; + int unit; + int trans; + FLOAT *buffer; + + PRINT_DEBUG_NAME; + + TOUPPER(uplo_arg); + TOUPPER(trans_arg); + TOUPPER(diag_arg); + + trans = -1; + unit = -1; + uplo = -1; + + if (trans_arg == 'N') trans = 0; + if (trans_arg == 'T') trans = 1; + if (trans_arg == 'R') trans = 2; + if (trans_arg == 'C') trans = 3; + + if (diag_arg == 'U') unit = 0; + if (diag_arg == 'N') unit = 1; + + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + + + info = 0; + + if (incx == 0) info = 8; + if (lda < MAX(1, n)) info = 6; + if (n < 0) info = 4; + if (unit < 0) info = 3; + if (trans < 0) info = 2; + if (uplo < 0) info = 1; + + if (info != 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + + +#else + +void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + blasint n, FLOAT *a, blasint lda, FLOAT *x, blasint incx) { + + int trans, uplo, unit; + blasint info; + FLOAT *buffer; + + PRINT_DEBUG_CNAME; + + unit = -1; + uplo = -1; + trans = -1; + info = 0; + + if (order == CblasColMajor) { + if (Uplo == CblasUpper) uplo = 0; + if (Uplo == CblasLower) uplo = 1; + + if (TransA == CblasNoTrans) trans = 0; + if (TransA == CblasTrans) trans = 1; + if (TransA == CblasConjNoTrans) trans = 2; + if (TransA == CblasConjTrans) trans = 3; + + if (Diag == CblasUnit) unit = 0; + if (Diag == CblasNonUnit) unit = 1; + + info = -1; + + if (incx == 0) info = 8; + if (lda < MAX(1, n)) info = 6; + if (n < 0) info = 4; + if (unit < 0) info = 3; + if (trans < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (order == CblasRowMajor) { + if (Uplo == CblasUpper) uplo = 1; + if (Uplo == CblasLower) uplo = 0; + + if (TransA == CblasNoTrans) trans = 1; + if (TransA == CblasTrans) trans = 0; + if (TransA == CblasConjNoTrans) trans = 3; + if (TransA == CblasConjTrans) trans = 2; + + if (Diag == CblasUnit) unit = 0; + if (Diag == CblasNonUnit) unit = 1; + + info = -1; + + if (incx == 0) info = 8; + if (lda < MAX(1, n)) info = 6; + if (n < 0) info = 4; + if (unit < 0) info = 3; + if (trans < 0) info = 2; + if (uplo < 0) info = 1; + } + + if (info >= 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + +#endif + + if (n == 0) return; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + if (incx < 0 ) x -= (n - 1) * incx * 2; + + buffer = (FLOAT *)blas_memory_alloc(1); + + (trsv[(trans<<2) | (uplo<<1) | unit])(n, a, lda, x, incx, buffer); + + blas_memory_free(buffer); + + FUNCTION_PROFILE_END(4, n * n / 2 + n, n * n); + + IDEBUG_END; + + return; +} diff --git a/interface/ztrti2.c b/interface/ztrti2.c new file mode 100644 index 000000000..017374c37 --- /dev/null +++ b/interface/ztrti2.c @@ -0,0 +1,134 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "XTRTI2" +#elif defined(DOUBLE) +#define ERROR_NAME "ZTRTI2" +#else +#define ERROR_NAME "CTRTI2" +#endif + +static blasint (*trti2[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = { +#ifdef XDOUBLE + xtrti2_UU, xtrti2_UN, xtrti2_LU, xtrti2_LN, +#elif defined(DOUBLE) + ztrti2_UU, ztrti2_UN, ztrti2_LU, ztrti2_LN, +#else + ctrti2_UU, ctrti2_UN, ctrti2_LU, ctrti2_LN, +#endif + }; + +int NAME(char *UPLO, char *DIAG, blasint *N, FLOAT *a, blasint *ldA, blasint *Info){ + + blas_arg_t args; + + blasint uplo_arg = *UPLO; + blasint diag_arg = *DIAG; + blasint uplo, diag; + blasint info; + FLOAT *buffer; +#ifdef PPC440 + extern +#endif + FLOAT *sa, *sb; + + PRINT_DEBUG_NAME; + + args.n = *N; + args.a = (void *)a; + args.lda = *ldA; + + TOUPPER(uplo_arg); + TOUPPER(diag_arg); + + uplo = -1; + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + diag = -1; + if (diag_arg == 'U') diag = 0; + if (diag_arg == 'N') diag = 1; + + info = 0; + if (args.lda < MAX(1,args.n)) info = 5; + if (args.n < 0) info = 3; + if (diag < 0) info = 2; + if (uplo < 0) info = 1; + if (info) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + *Info = - info; + return 0; + } + + *Info = 0; + + if (args.n <= 0) return 0; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + +#ifndef PPC440 + buffer = (FLOAT *)blas_memory_alloc(1); + + sa = (FLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A); + sb = (FLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); +#endif + + info = (trti2[(uplo << 1) | diag])(&args, NULL, NULL, sa, sb, 0); + + *Info = info; + +#ifndef PPC440 + blas_memory_free(buffer); +#endif + + FUNCTION_PROFILE_END(1, .5 * args.n * args.n, + 2. * args.n * (1./3. + args.n * ( 1./2. + args.n * 1./6.)) + + 6. * args.n * (1./3. + args.n * (-1./2. + args.n * 1./6.))); + + IDEBUG_END; + + return 0; +} diff --git a/interface/ztrtri.c b/interface/ztrtri.c new file mode 100644 index 000000000..89caf80d3 --- /dev/null +++ b/interface/ztrtri.c @@ -0,0 +1,154 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifdef XDOUBLE +#define ERROR_NAME "XTRTRI" +#elif defined(DOUBLE) +#define ERROR_NAME "ZTRTRI" +#else +#define ERROR_NAME "CTRTRI" +#endif + +static blasint (*trtri_single[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) ={ + TRTRI_UU_SINGLE, TRTRI_UN_SINGLE, TRTRI_LU_SINGLE, TRTRI_LN_SINGLE, +}; + +#ifdef SMP +static blasint (*trtri_parallel[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) ={ + TRTRI_UU_PARALLEL, TRTRI_UN_PARALLEL, TRTRI_LU_PARALLEL, TRTRI_LN_PARALLEL, +}; +#endif + +int NAME(char *UPLO, char *DIAG, blasint *N, FLOAT *a, blasint *ldA, blasint *Info){ + + blas_arg_t args; + + blasint uplo_arg = *UPLO; + blasint diag_arg = *DIAG; + blasint uplo, diag; + blasint info; + FLOAT *buffer; +#ifdef PPC440 + extern +#endif + FLOAT *sa, *sb; + + PRINT_DEBUG_NAME; + + args.n = *N; + args.a = (void *)a; + args.lda = *ldA; + + TOUPPER(uplo_arg); + TOUPPER(diag_arg); + + uplo = -1; + if (uplo_arg == 'U') uplo = 0; + if (uplo_arg == 'L') uplo = 1; + diag = -1; + if (diag_arg == 'U') diag = 0; + if (diag_arg == 'N') diag = 1; + + info = 0; + if (args.lda < MAX(1,args.n)) info = 5; + if (args.n < 0) info = 3; + if (diag < 0) info = 2; + if (uplo < 0) info = 1; + if (info) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + *Info = - info; + return 0; + } + + *Info = 0; + + if (args.n == 0) return 0; + + if (diag) { + if (AMIN_K(args.n, args.a, args.lda + 1) == ZERO) { + *Info = IAMIN_K(args.n, args.a, args.lda + 1); + return 0; + } + } + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + +#ifndef PPC440 + buffer = (FLOAT *)blas_memory_alloc(1); + + sa = (FLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A); + sb = (FLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); +#endif + +#ifdef SMP + args.common = NULL; + args.nthreads = num_cpu_avail(4); + + if (args.nthreads == 1) { +#endif + + *Info = (trtri_single[(uplo << 1) | diag])(&args, NULL, NULL, sa, sb, 0); + +#ifdef SMP + } else { + + *Info = (trtri_parallel[(uplo << 1) | diag])(&args, NULL, NULL, sa, sb, 0); + + } +#endif + +#ifndef PPC440 + blas_memory_free(buffer); +#endif + + FUNCTION_PROFILE_END(1, .5 * args.n * args.n, + 2. * args.n * (1./3. + args.n * ( 1./2. + args.n * 1./6.)) + + 6. * args.n * (1./3. + args.n * (-1./2. + args.n * 1./6.))); + + IDEBUG_END; + + return 0; +} -- cgit v1.2.3