summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin Koehler <grisuthedragon@users.noreply.github.com>2015-02-16 13:46:20 +0100
committerMartin Koehler <grisuthedragon@users.noreply.github.com>2015-02-16 13:46:20 +0100
commit39cc6b21d3e282e8b91ffe4255e02e586885db2c (patch)
treeedf2051316a5dc2b8198e41dbe59acdda0c8cd08
parent771b18ae9c3aa70f4a2497b59700e06819a6ae30 (diff)
downloadopenblas-39cc6b21d3e282e8b91ffe4255e02e586885db2c.tar.gz
openblas-39cc6b21d3e282e8b91ffe4255e02e586885db2c.tar.bz2
openblas-39cc6b21d3e282e8b91ffe4255e02e586885db2c.zip
Add ATLAS-style ?geadd function
-rw-r--r--cblas.h10
-rw-r--r--cblas_noconst.h10
-rw-r--r--common_c.h2
-rw-r--r--common_d.h3
-rw-r--r--common_interface.h6
-rw-r--r--common_level3.h5
-rw-r--r--common_macro.h6
-rw-r--r--common_param.h4
-rw-r--r--common_s.h2
-rw-r--r--common_z.h3
-rw-r--r--exports/gensymbol4
-rw-r--r--interface/Makefile49
-rw-r--r--interface/geadd.c148
-rw-r--r--interface/zgeadd.c146
-rw-r--r--kernel/Makefile.L340
-rw-r--r--kernel/arm/geadd.c64
-rw-r--r--kernel/arm/zgeadd.c65
17 files changed, 553 insertions, 14 deletions
diff --git a/cblas.h b/cblas.h
index a21863d88..d6949e10c 100644
--- a/cblas.h
+++ b/cblas.h
@@ -347,6 +347,16 @@ void cblas_cimatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum
void cblas_zimatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE CTRANS, OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double* calpha, double* a,
OPENBLAS_CONST blasint clda, OPENBLAS_CONST blasint cldb);
+void cblas_sgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float calpha, float *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST float cbeta,
+ float *c, OPENBLAS_CONST blasint cldc);
+void cblas_dgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double calpha, double *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST double cbeta,
+ double *c, OPENBLAS_CONST blasint cldc);
+void cblas_cgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float *calpha, float *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST float *cbeta,
+ float *c, OPENBLAS_CONST blasint cldc);
+void cblas_zgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double *calpha, double *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST double *cbeta,
+ double *c, OPENBLAS_CONST blasint cldc);
+
+
#ifdef __cplusplus
}
#endif /* __cplusplus */
diff --git a/cblas_noconst.h b/cblas_noconst.h
index f6a6baf62..4451c304e 100644
--- a/cblas_noconst.h
+++ b/cblas_noconst.h
@@ -333,6 +333,16 @@ void cblas_cimatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, bl
blasint clda, blasint cldb);
void cblas_zimatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, double* calpha, double* a,
blasint clda, blasint cldb);
+
+void cblas_sgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, float calpha, float *a, blasint clda, float cbeta,
+ float *c, blasint cldc);
+void cblas_dgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, double calpha, double *a, blasint clda, double cbeta,
+ double *c, blasint cldc);
+void cblas_cgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, float *calpha, float *a, blasint clda, float *cbeta,
+ float *c, blasint cldc);
+void cblas_zgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, double *calpha, double *a, blasint clda, double *cbeta,
+ double *c, blasint cldc);
+
#ifdef __cplusplus
}
#endif /* __cplusplus */
diff --git a/common_c.h b/common_c.h
index 724d1e261..741d7d087 100644
--- a/common_c.h
+++ b/common_c.h
@@ -220,6 +220,7 @@
#define COMATCOPY_K_CTC comatcopy_k_ctc
#define COMATCOPY_K_RTC comatcopy_k_rtc
+#define CGEADD_K cgeadd_k
#else
@@ -402,6 +403,7 @@
#define COMATCOPY_K_RNC gotoblas -> comatcopy_k_rnc
#define COMATCOPY_K_CTC gotoblas -> comatcopy_k_ctc
#define COMATCOPY_K_RTC gotoblas -> comatcopy_k_rtc
+#define CGEADD_K gotoblas -> cgeadd_k
#endif
diff --git a/common_d.h b/common_d.h
index c34e1f28f..d6dfd7f04 100644
--- a/common_d.h
+++ b/common_d.h
@@ -149,6 +149,7 @@
#define DOMATCOPY_K_RN domatcopy_k_rn
#define DOMATCOPY_K_CT domatcopy_k_ct
#define DOMATCOPY_K_RT domatcopy_k_rt
+#define DGEADD_K dgeadd_k
#else
@@ -267,6 +268,8 @@
#define DOMATCOPY_K_CT gotoblas -> domatcopy_k_ct
#define DOMATCOPY_K_RT gotoblas -> domatcopy_k_rt
+#define DGEADD_K gotoblas -> dgeadd_k
+
#endif
#define DGEMM_NN dgemm_nn
diff --git a/common_interface.h b/common_interface.h
index ddd2cf6e5..15f69e02f 100644
--- a/common_interface.h
+++ b/common_interface.h
@@ -754,6 +754,12 @@ void BLASFUNC(dimatcopy) (char *, char *, blasint *, blasint *, double *, do
void BLASFUNC(cimatcopy) (char *, char *, blasint *, blasint *, float *, float *, blasint *, blasint *);
void BLASFUNC(zimatcopy) (char *, char *, blasint *, blasint *, double *, double *, blasint *, blasint *);
+void BLASFUNC(sgeadd) (blasint *, blasint *, float *, float *, blasint *, float *, float *, blasint*);
+void BLASFUNC(dgeadd) (blasint *, blasint *, double *, double *, blasint *, double *, double *, blasint*);
+void BLASFUNC(cgeadd) (blasint *, blasint *, float *, float *, blasint *, float *, float *, blasint*);
+void BLASFUNC(zgeadd) (blasint *, blasint *, double *, double *, blasint *, double *, double *, blasint*);
+
+
#ifdef __cplusplus
}
diff --git a/common_level3.h b/common_level3.h
index 0babd45b7..e0ecbc4e2 100644
--- a/common_level3.h
+++ b/common_level3.h
@@ -1762,6 +1762,11 @@ int zomatcopy_k_rnc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, dou
int zomatcopy_k_ctc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
int zomatcopy_k_rtc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
+int sgeadd_k(BLASLONG, BLASLONG, float, float*, BLASLONG, float, float *, BLASLONG);
+int dgeadd_k(BLASLONG, BLASLONG, double, double*, BLASLONG, double, double *, BLASLONG);
+int cgeadd_k(BLASLONG, BLASLONG, float, float, float*, BLASLONG, float, float, float *, BLASLONG);
+int zgeadd_k(BLASLONG, BLASLONG, double,double, double*, BLASLONG, double, double, double *, BLASLONG);
+
#ifdef __CUDACC__
}
diff --git a/common_macro.h b/common_macro.h
index f9de3773a..8555baa67 100644
--- a/common_macro.h
+++ b/common_macro.h
@@ -634,7 +634,7 @@
#define OMATCOPY_K_RN DOMATCOPY_K_RN
#define OMATCOPY_K_CT DOMATCOPY_K_CT
#define OMATCOPY_K_RT DOMATCOPY_K_RT
-
+#define GEADD_K DGEADD_K
#else
#define AMAX_K SAMAX_K
@@ -932,6 +932,7 @@
#define OMATCOPY_K_CT SOMATCOPY_K_CT
#define OMATCOPY_K_RT SOMATCOPY_K_RT
+#define GEADD_K SGEADD_K
#endif
#else
#ifdef XDOUBLE
@@ -1746,6 +1747,7 @@
#define OMATCOPY_K_RNC ZOMATCOPY_K_RNC
#define OMATCOPY_K_CTC ZOMATCOPY_K_CTC
#define OMATCOPY_K_RTC ZOMATCOPY_K_RTC
+#define GEADD_K ZGEADD_K
#else
@@ -2159,6 +2161,8 @@
#define OMATCOPY_K_CTC COMATCOPY_K_CTC
#define OMATCOPY_K_RTC COMATCOPY_K_RTC
+#define GEADD_K CGEADD_K
+
#endif
#endif
diff --git a/common_param.h b/common_param.h
index 49c1bf73b..1b56e85f0 100644
--- a/common_param.h
+++ b/common_param.h
@@ -855,6 +855,10 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
int (*zomatcopy_k_rnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
int (*zomatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
+ int (*sgeadd_k) (BLASLONG, BLASLONG, float, float *, BLASLONG, float, float *, BLASLONG);
+ int (*dgeadd_k) (BLASLONG, BLASLONG, double, double *, BLASLONG, double, double *, BLASLONG);
+ int (*cgeadd_k) (BLASLONG, BLASLONG, float, float, float *, BLASLONG, float, float, float *, BLASLONG);
+ int (*zgeadd_k) (BLASLONG, BLASLONG, float, double, double *, BLASLONG, double, double, double *, BLASLONG);
} gotoblas_t;
diff --git a/common_s.h b/common_s.h
index 4e9b6dbe7..a4d8679b7 100644
--- a/common_s.h
+++ b/common_s.h
@@ -153,6 +153,7 @@
#define SOMATCOPY_K_CT somatcopy_k_ct
#define SOMATCOPY_K_RT somatcopy_k_rt
+#define SGEADD_K sgeadd_k
#else
@@ -274,6 +275,7 @@
#define SOMATCOPY_K_CT gotoblas -> somatcopy_k_ct
#define SOMATCOPY_K_RT gotoblas -> somatcopy_k_rt
+#define SGEADD_K gotoblas -> sgeadd_k
#endif
diff --git a/common_z.h b/common_z.h
index 133dea80c..85f577a27 100644
--- a/common_z.h
+++ b/common_z.h
@@ -220,6 +220,7 @@
#define ZOMATCOPY_K_CTC zomatcopy_k_ctc
#define ZOMATCOPY_K_RTC zomatcopy_k_rtc
+#define ZGEADD_K zgeadd_k
#else
@@ -403,6 +404,8 @@
#define ZOMATCOPY_K_CTC gotoblas -> zomatcopy_k_ctc
#define ZOMATCOPY_K_RTC gotoblas -> zomatcopy_k_rtc
+#define ZGEADD_K zgeadd_k
+
#endif
#define ZGEMM_NN zgemm_nn
diff --git a/exports/gensymbol b/exports/gensymbol
index 2155f801f..12ca7376c 100644
--- a/exports/gensymbol
+++ b/exports/gensymbol
@@ -23,7 +23,8 @@
zhpr,zrotg,zscal,zswap,zsymm,zsyr2k,zsyrk,ztbmv,
ztbsv,ztpmv,ztpsv,ztrmm,ztrmv,ztrsm,ztrsv, zsymv,
xerbla,
- saxpby,daxpby,caxpby,zaxpby
+ saxpby,daxpby,caxpby,zaxpby,
+ sgeadd,dgeadd,cgeadd,zgeadd,
);
@cblasobjs = (
@@ -55,6 +56,7 @@
cblas_saxpby,cblas_daxpby,cblas_caxpby,cblas_zaxpby,
cblas_somatcopy, cblas_domatcopy, cblas_comatcopy, cblas_zomatcopy,
cblas_simatcopy, cblas_dimatcopy, cblas_cimatcopy, cblas_zimatcopy,
+ cblas_sgeadd, cblas_dgeadd,cblas_cgeadd, cblas_zgeadd
);
@exblasobjs = (
diff --git a/interface/Makefile b/interface/Makefile
index 54699b7e3..1666d9145 100644
--- a/interface/Makefile
+++ b/interface/Makefile
@@ -43,7 +43,8 @@ SBLAS2OBJS = \
SBLAS3OBJS = \
sgemm.$(SUFFIX) ssymm.$(SUFFIX) strmm.$(SUFFIX) \
strsm.$(SUFFIX) ssyrk.$(SUFFIX) ssyr2k.$(SUFFIX) \
- somatcopy.$(SUFFIX) simatcopy.$(SUFFIX)
+ somatcopy.$(SUFFIX) simatcopy.$(SUFFIX)\
+ sgeadd.$(SUFFIX)
DBLAS1OBJS = \
@@ -68,7 +69,8 @@ DBLAS2OBJS = \
DBLAS3OBJS = \
dgemm.$(SUFFIX) dsymm.$(SUFFIX) dtrmm.$(SUFFIX) \
dtrsm.$(SUFFIX) dsyrk.$(SUFFIX) dsyr2k.$(SUFFIX) \
- domatcopy.$(SUFFIX) dimatcopy.$(SUFFIX)
+ domatcopy.$(SUFFIX) dimatcopy.$(SUFFIX)\
+ dgeadd.$(SUFFIX)
CBLAS1OBJS = \
caxpy.$(SUFFIX) caxpyc.$(SUFFIX) cswap.$(SUFFIX) \
@@ -96,7 +98,8 @@ CBLAS3OBJS = \
cgemm.$(SUFFIX) csymm.$(SUFFIX) ctrmm.$(SUFFIX) \
ctrsm.$(SUFFIX) csyrk.$(SUFFIX) csyr2k.$(SUFFIX) \
chemm.$(SUFFIX) cherk.$(SUFFIX) cher2k.$(SUFFIX) \
- comatcopy.$(SUFFIX) cimatcopy.$(SUFFIX)
+ comatcopy.$(SUFFIX) cimatcopy.$(SUFFIX)\
+ cgeadd.$(SUFFIX)
ZBLAS1OBJS = \
zaxpy.$(SUFFIX) zaxpyc.$(SUFFIX) zswap.$(SUFFIX) \
@@ -124,7 +127,8 @@ ZBLAS3OBJS = \
zgemm.$(SUFFIX) zsymm.$(SUFFIX) ztrmm.$(SUFFIX) \
ztrsm.$(SUFFIX) zsyrk.$(SUFFIX) zsyr2k.$(SUFFIX) \
zhemm.$(SUFFIX) zherk.$(SUFFIX) zher2k.$(SUFFIX) \
- zomatcopy.$(SUFFIX) zimatcopy.$(SUFFIX)
+ zomatcopy.$(SUFFIX) zimatcopy.$(SUFFIX)\
+ zgeadd.$(SUFFIX)
ifeq ($(SUPPORT_GEMM3M), 1)
@@ -269,7 +273,8 @@ CSBLAS2OBJS = \
CSBLAS3OBJS = \
cblas_sgemm.$(SUFFIX) cblas_ssymm.$(SUFFIX) cblas_strmm.$(SUFFIX) cblas_strsm.$(SUFFIX) \
- cblas_ssyrk.$(SUFFIX) cblas_ssyr2k.$(SUFFIX) cblas_somatcopy.$(SUFFIX) cblas_simatcopy.$(SUFFIX)
+ cblas_ssyrk.$(SUFFIX) cblas_ssyr2k.$(SUFFIX) cblas_somatcopy.$(SUFFIX) cblas_simatcopy.$(SUFFIX)\
+ cblas_sgeadd.$(SUFFIX)
CDBLAS1OBJS = \
cblas_idamax.$(SUFFIX) cblas_dasum.$(SUFFIX) cblas_daxpy.$(SUFFIX) \
@@ -285,7 +290,8 @@ CDBLAS2OBJS = \
CDBLAS3OBJS += \
cblas_dgemm.$(SUFFIX) cblas_dsymm.$(SUFFIX) cblas_dtrmm.$(SUFFIX) cblas_dtrsm.$(SUFFIX) \
- cblas_dsyrk.$(SUFFIX) cblas_dsyr2k.$(SUFFIX) cblas_domatcopy.$(SUFFIX) cblas_dimatcopy.$(SUFFIX)
+ cblas_dsyrk.$(SUFFIX) cblas_dsyr2k.$(SUFFIX) cblas_domatcopy.$(SUFFIX) cblas_dimatcopy.$(SUFFIX) \
+ cblas_dgeadd.$(SUFFIX)
CCBLAS1OBJS = \
cblas_icamax.$(SUFFIX) cblas_scasum.$(SUFFIX) cblas_caxpy.$(SUFFIX) \
@@ -308,7 +314,9 @@ CCBLAS3OBJS = \
cblas_cgemm.$(SUFFIX) cblas_csymm.$(SUFFIX) cblas_ctrmm.$(SUFFIX) cblas_ctrsm.$(SUFFIX) \
cblas_csyrk.$(SUFFIX) cblas_csyr2k.$(SUFFIX) \
cblas_chemm.$(SUFFIX) cblas_cherk.$(SUFFIX) cblas_cher2k.$(SUFFIX) \
- cblas_comatcopy.$(SUFFIX) cblas_cimatcopy.$(SUFFIX)
+ cblas_comatcopy.$(SUFFIX) cblas_cimatcopy.$(SUFFIX)\
+ cblas_cgeadd.$(SUFFIX)
+
CZBLAS1OBJS = \
@@ -332,7 +340,9 @@ CZBLAS3OBJS = \
cblas_zgemm.$(SUFFIX) cblas_zsymm.$(SUFFIX) cblas_ztrmm.$(SUFFIX) cblas_ztrsm.$(SUFFIX) \
cblas_zsyrk.$(SUFFIX) cblas_zsyr2k.$(SUFFIX) \
cblas_zhemm.$(SUFFIX) cblas_zherk.$(SUFFIX) cblas_zher2k.$(SUFFIX)\
- cblas_zomatcopy.$(SUFFIX) cblas_zimatcopy.$(SUFFIX)
+ cblas_zomatcopy.$(SUFFIX) cblas_zimatcopy.$(SUFFIX) \
+ cblas_zgeadd.$(SUFFIX)
+
ifeq ($(SUPPORT_GEMM3M), 1)
@@ -2103,4 +2113,27 @@ zimatcopy.$(SUFFIX) zimatcopy.$(PSUFFIX) : zimatcopy.c
cblas_zimatcopy.$(SUFFIX) cblas_zimatcopy.$(PSUFFIX) : zimatcopy.c
$(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F)
+sgeadd.$(SUFFIX) sgeadd.$(PSUFFIX) : geadd.c
+ $(CC) -c $(CFLAGS) $< -o $(@F)
+
+dgeadd.$(SUFFIX) dgeadd.$(PSUFFIX) : geadd.c
+ $(CC) -c $(CFLAGS) $< -o $(@F)
+
+cgeadd.$(SUFFIX) cgeadd.$(PSUFFIX) : zgeadd.c
+ $(CC) -c $(CFLAGS) $< -o $(@F)
+
+zgeadd.$(SUFFIX) zgeadd.$(PSUFFIX) : zgeadd.c
+ $(CC) -c $(CFLAGS) $< -o $(@F)
+
+cblas_sgeadd.$(SUFFIX) cblas_sgeadd.$(PSUFFIX) : geadd.c
+ $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F)
+
+cblas_dgeadd.$(SUFFIX) cblas_dgeadd.$(PSUFFIX) : geadd.c
+ $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F)
+
+cblas_cgeadd.$(SUFFIX) cblas_cgeadd.$(PSUFFIX) : zgeadd.c
+ $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F)
+
+cblas_zgeadd.$(SUFFIX) cblas_zgeadd.$(PSUFFIX) : zgeadd.c
+ $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F)
diff --git a/interface/geadd.c b/interface/geadd.c
new file mode 100644
index 000000000..f0befa14a
--- /dev/null
+++ b/interface/geadd.c
@@ -0,0 +1,148 @@
+/*********************************************************************/
+/* Copyright 2009, 2010 The University of Texas at Austin. */
+/* All rights reserved. */
+/* */
+/* Redistribution and use in source and binary forms, with or */
+/* without modification, are permitted provided that the following */
+/* conditions are met: */
+/* */
+/* 1. Redistributions of source code must retain the above */
+/* copyright notice, this list of conditions and the following */
+/* disclaimer. */
+/* */
+/* 2. Redistributions in binary form must reproduce the above */
+/* copyright notice, this list of conditions and the following */
+/* disclaimer in the documentation and/or other materials */
+/* provided with the distribution. */
+/* */
+/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
+/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
+/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
+/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
+/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
+/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
+/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
+/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
+/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
+/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
+/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
+/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
+/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
+/* POSSIBILITY OF SUCH DAMAGE. */
+/* */
+/* The views and conclusions contained in the software and */
+/* documentation are those of the authors and should not be */
+/* interpreted as representing official policies, either expressed */
+/* or implied, of The University of Texas at Austin. */
+/*********************************************************************/
+
+#include <stdio.h>
+#include "common.h"
+#ifdef FUNCTION_PROFILE
+#include "functable.h"
+#endif
+
+#if defined(DOUBLE)
+#define ERROR_NAME "DGEADD "
+#else
+#define ERROR_NAME "SGEADD "
+#endif
+
+#ifndef CBLAS
+
+void NAME(blasint *M, blasint *N, FLOAT *ALPHA, FLOAT *a, blasint *LDA,
+ FLOAT *BETA, FLOAT *c, blasint *LDC)
+{
+
+ blasint m = *M;
+ blasint n = *N;
+ blasint lda = *LDA;
+ blasint ldc = *LDC;
+ FLOAT alpha = *ALPHA;
+ FLOAT beta = *BETA;
+
+ blasint info;
+
+ PRINT_DEBUG_NAME;
+
+ info = 0;
+
+
+ if (lda < MAX(1, m)) info = 6;
+ if (ldc < MAX(1, m)) info = 8;
+
+ if (n < 0) info = 2;
+ if (m < 0) info = 1;
+
+ if (info != 0){
+ BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
+ return;
+ }
+
+#else
+void CNAME( enum CBLAS_ORDER order, blasint m, blasint n, FLOAT alpha, FLOAT *a, blasint lda, FLOAT beta,
+ FLOAT *c, blasint ldc)
+{
+/*
+void CNAME(enum CBLAS_ORDER order,
+ blasint m, blasint n,
+ FLOAT alpha,
+ FLOAT *a, blasint lda,
+ FLOAT beta,
+ FLOAT *c, blasint ldc){ */
+
+ blasint info, t;
+
+ PRINT_DEBUG_CNAME;
+
+ info = 0;
+
+ if (order == CblasColMajor) {
+
+ info = -1;
+
+ if (ldc < MAX(1, m)) info = 8;
+ if (lda < MAX(1, m)) info = 5;
+ if (n < 0) info = 2;
+ if (m < 0) info = 1;
+
+ }
+
+ if (order == CblasRowMajor) {
+ info = -1;
+
+ t = n;
+ n = m;
+ m = t;
+
+ if (ldc < MAX(1, m)) info = 8;
+ if (lda < MAX(1, m)) info = 5;
+ if (n < 0) info = 2;
+ if (m < 0) info = 1;
+ }
+
+ if (info >= 0) {
+ BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
+ return;
+ }
+
+#endif
+
+ if ((m==0) || (n==0)) return;
+
+
+ IDEBUG_START;
+
+ FUNCTION_PROFILE_START();
+
+
+ GEADD_K(m,n,alpha, a, lda, beta, c, ldc);
+
+
+ FUNCTION_PROFILE_END(1, 2* m * n , 2 * m * n);
+
+ IDEBUG_END;
+
+ return;
+
+}
diff --git a/interface/zgeadd.c b/interface/zgeadd.c
new file mode 100644
index 000000000..7124cf230
--- /dev/null
+++ b/interface/zgeadd.c
@@ -0,0 +1,146 @@
+/*********************************************************************/
+/* Copyright 2009, 2010 The University of Texas at Austin. */
+/* All rights reserved. */
+/* */
+/* Redistribution and use in source and binary forms, with or */
+/* without modification, are permitted provided that the following */
+/* conditions are met: */
+/* */
+/* 1. Redistributions of source code must retain the above */
+/* copyright notice, this list of conditions and the following */
+/* disclaimer. */
+/* */
+/* 2. Redistributions in binary form must reproduce the above */
+/* copyright notice, this list of conditions and the following */
+/* disclaimer in the documentation and/or other materials */
+/* provided with the distribution. */
+/* */
+/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
+/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
+/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
+/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
+/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
+/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
+/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
+/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
+/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
+/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
+/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
+/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
+/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
+/* POSSIBILITY OF SUCH DAMAGE. */
+/* */
+/* The views and conclusions contained in the software and */
+/* documentation are those of the authors and should not be */
+/* interpreted as representing official policies, either expressed */
+/* or implied, of The University of Texas at Austin. */
+/*********************************************************************/
+
+#include <stdio.h>
+#include "common.h"
+#ifdef FUNCTION_PROFILE
+#include "functable.h"
+#endif
+
+#if defined(DOUBLE)
+#define ERROR_NAME "ZGEADD "
+#else
+#define ERROR_NAME "CGEADD "
+#endif
+
+#ifndef CBLAS
+
+void NAME(blasint *M, blasint *N, FLOAT *ALPHA, FLOAT *a, blasint *LDA,
+ FLOAT *BETA, FLOAT *c, blasint *LDC)
+{
+
+ blasint m = *M;
+ blasint n = *N;
+ blasint lda = *LDA;
+ blasint ldc = *LDC;
+
+ blasint info;
+
+ PRINT_DEBUG_NAME;
+
+ info = 0;
+
+
+ if (lda < MAX(1, m)) info = 6;
+ if (ldc < MAX(1, m)) info = 8;
+
+ if (n < 0) info = 2;
+ if (m < 0) info = 1;
+
+ if (info != 0){
+ BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
+ return;
+ }
+
+#else
+void CNAME( enum CBLAS_ORDER order, blasint m, blasint n, FLOAT *ALPHA, FLOAT *a, blasint lda, FLOAT *BETA,
+ FLOAT *c, blasint ldc)
+{
+/*
+void CNAME(enum CBLAS_ORDER order,
+ blasint m, blasint n,
+ FLOAT alpha,
+ FLOAT *a, blasint lda,
+ FLOAT beta,
+ FLOAT *c, blasint ldc){ */
+
+ blasint info, t;
+
+ PRINT_DEBUG_CNAME;
+
+ info = 0;
+
+ if (order == CblasColMajor) {
+
+ info = -1;
+
+ if (ldc < MAX(1, m)) info = 8;
+ if (lda < MAX(1, m)) info = 5;
+ if (n < 0) info = 2;
+ if (m < 0) info = 1;
+
+ }
+
+ if (order == CblasRowMajor) {
+ info = -1;
+
+ t = n;
+ n = m;
+ m = t;
+
+ if (ldc < MAX(1, m)) info = 8;
+ if (lda < MAX(1, m)) info = 5;
+ if (n < 0) info = 2;
+ if (m < 0) info = 1;
+ }
+
+ if (info >= 0) {
+ BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
+ return;
+ }
+
+#endif
+
+ if ((m==0) || (n==0)) return;
+
+
+ IDEBUG_START;
+
+ FUNCTION_PROFILE_START();
+
+
+ GEADD_K(m,n,ALPHA[0],ALPHA[1], a, lda, BETA[0], BETA[1], c, ldc);
+
+
+ FUNCTION_PROFILE_END(1, 2* m * n , 2 * m * n);
+
+ IDEBUG_END;
+
+ return;
+
+}
diff --git a/kernel/Makefile.L3 b/kernel/Makefile.L3
index 5702b7ac8..a3ccc19a9 100644
--- a/kernel/Makefile.L3
+++ b/kernel/Makefile.L3
@@ -329,23 +329,27 @@ endif
###### BLAS extensions #####
SBLASOBJS += \
somatcopy_k_cn$(TSUFFIX).$(SUFFIX) somatcopy_k_rn$(TSUFFIX).$(SUFFIX) \
- somatcopy_k_ct$(TSUFFIX).$(SUFFIX) somatcopy_k_rt$(TSUFFIX).$(SUFFIX)
+ somatcopy_k_ct$(TSUFFIX).$(SUFFIX) somatcopy_k_rt$(TSUFFIX).$(SUFFIX) \
+ sgeadd_k$(TSUFFIX).$(SUFFIX)
DBLASOBJS += \
domatcopy_k_cn$(TSUFFIX).$(SUFFIX) domatcopy_k_rn$(TSUFFIX).$(SUFFIX) \
- domatcopy_k_ct$(TSUFFIX).$(SUFFIX) domatcopy_k_rt$(TSUFFIX).$(SUFFIX)
+ domatcopy_k_ct$(TSUFFIX).$(SUFFIX) domatcopy_k_rt$(TSUFFIX).$(SUFFIX) \
+ dgeadd_k$(TSUFFIX).$(SUFFIX)
CBLASOBJS += \
comatcopy_k_cn$(TSUFFIX).$(SUFFIX) comatcopy_k_rn$(TSUFFIX).$(SUFFIX) \
comatcopy_k_ct$(TSUFFIX).$(SUFFIX) comatcopy_k_rt$(TSUFFIX).$(SUFFIX) \
comatcopy_k_cnc$(TSUFFIX).$(SUFFIX) comatcopy_k_rnc$(TSUFFIX).$(SUFFIX) \
- comatcopy_k_ctc$(TSUFFIX).$(SUFFIX) comatcopy_k_rtc$(TSUFFIX).$(SUFFIX)
+ comatcopy_k_ctc$(TSUFFIX).$(SUFFIX) comatcopy_k_rtc$(TSUFFIX).$(SUFFIX) \
+ cgeadd_k$(TSUFFIX).$(SUFFIX)
ZBLASOBJS += \
zomatcopy_k_cn$(TSUFFIX).$(SUFFIX) zomatcopy_k_rn$(TSUFFIX).$(SUFFIX) \
zomatcopy_k_ct$(TSUFFIX).$(SUFFIX) zomatcopy_k_rt$(TSUFFIX).$(SUFFIX) \
zomatcopy_k_cnc$(TSUFFIX).$(SUFFIX) zomatcopy_k_rnc$(TSUFFIX).$(SUFFIX) \
- zomatcopy_k_ctc$(TSUFFIX).$(SUFFIX) zomatcopy_k_rtc$(TSUFFIX).$(SUFFIX)
+ zomatcopy_k_ctc$(TSUFFIX).$(SUFFIX) zomatcopy_k_rtc$(TSUFFIX).$(SUFFIX) \
+ zgeadd_k$(TSUFFIX).$(SUFFIX)
SGEMMINCOPYOBJ_P = $(SGEMMINCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
@@ -3440,3 +3444,31 @@ $(KDIR)zomatcopy_k_rtc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZOMATCOPY_RTC)
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DROWM -DCONJ $< -o $@
+ifndef SGEADD_K
+SGEADD_K = ../arm/geadd.c
+endif
+
+$(KDIR)sgeadd_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEADD_K)
+ $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -UROWM $< -o $@
+
+ifndef DGEADD_K
+DGEADD_K = ../arm/geadd.c
+endif
+
+$(KDIR)dgeadd_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEADD_K)
+ $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -UROWM $< -o $@
+
+ifndef CGEADD_K
+CGEADD_K = ../arm/zgeadd.c
+endif
+
+$(KDIR)cgeadd_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEADD_K)
+ $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -UROWM $< -o $@
+
+ifndef ZGEADD_K
+ZGEADD_K = ../arm/zgeadd.c
+endif
+
+$(KDIR)zgeadd_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEADD_K)
+ $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -UROWM $< -o $@
+
diff --git a/kernel/arm/geadd.c b/kernel/arm/geadd.c
new file mode 100644
index 000000000..062918b8c
--- /dev/null
+++ b/kernel/arm/geadd.c
@@ -0,0 +1,64 @@
+/***************************************************************************
+Copyright (c) 2013, The OpenBLAS Project
+All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in
+the documentation and/or other materials provided with the
+distribution.
+3. Neither the name of the OpenBLAS project nor the names of
+its contributors may be used to endorse or promote products
+derived from this software without specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*****************************************************************************/
+
+#include "common.h"
+
+
+int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT beta, FLOAT *b, BLASLONG ldb)
+{
+ BLASLONG i;
+ FLOAT *aptr,*bptr;
+
+ if ( rows <= 0 ) return(0);
+ if ( cols <= 0 ) return(0);
+
+
+ aptr = a;
+ bptr = b;
+
+ if ( alpha == 0.0 )
+ {
+ for ( i=0; i<cols ; i++ )
+ {
+ SCAL_K(rows, 0,0, beta, bptr, 1, NULL, 0,NULL,0);
+ bptr+=ldb;
+ }
+
+ return(0);
+ }
+
+ for (i = 0; i < cols; i++) {
+ AXPBY_K(rows, alpha, aptr, 1, beta, bptr, 1);
+ aptr += lda;
+ bptr += ldb;
+ }
+
+ return(0);
+
+}
+
+
diff --git a/kernel/arm/zgeadd.c b/kernel/arm/zgeadd.c
new file mode 100644
index 000000000..66a7641d6
--- /dev/null
+++ b/kernel/arm/zgeadd.c
@@ -0,0 +1,65 @@
+/***************************************************************************
+Copyright (c) 2013, The OpenBLAS Project
+All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in
+the documentation and/or other materials provided with the
+distribution.
+3. Neither the name of the OpenBLAS project nor the names of
+its contributors may be used to endorse or promote products
+derived from this software without specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*****************************************************************************/
+
+#include "common.h"
+
+
+int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alphar, FLOAT alphai, FLOAT *a, BLASLONG lda, FLOAT betar, FLOAT betai , FLOAT *b, BLASLONG ldb)
+{
+ BLASLONG i;
+ FLOAT *aptr,*bptr;
+
+ if ( rows <= 0 ) return(0);
+ if ( cols <= 0 ) return(0);
+
+
+ aptr = a;
+ bptr = b;
+ lda *= 2;
+ ldb *= 2;
+
+ if ( alphar == 0.0 && alphai == 0.0 )
+ {
+ for ( i=0; i<cols ; i++ )
+ {
+ SCAL_K(rows, 0,0, betar, betai, bptr, 1, NULL, 0,NULL,0);
+ bptr+=ldb;
+ }
+
+ return(0);
+ }
+
+ for (i = 0; i < cols; i++) {
+ AXPBY_K(rows, alphar, alphai, aptr, 1, betar, betai, bptr, 1);
+ aptr += lda;
+ bptr += ldb;
+ }
+ return(0);
+
+}
+
+