diff options
author | wernsaar <wernsaar@googlemail.com> | 2014-06-10 16:14:34 +0200 |
---|---|---|
committer | wernsaar <wernsaar@googlemail.com> | 2014-06-10 16:14:34 +0200 |
commit | faeab93df04ec855b7b72b21714e92da1facb4fc (patch) | |
tree | 4b5f7addbd743bb194d0f684b871b864f3297bd8 | |
parent | cee257f38432c2cfa75449822e448c02a4417033 (diff) | |
download | openblas-faeab93df04ec855b7b72b21714e92da1facb4fc.tar.gz openblas-faeab93df04ec855b7b72b21714e92da1facb4fc.tar.bz2 openblas-faeab93df04ec855b7b72b21714e92da1facb4fc.zip |
Ref #51: added blas extensions simatcopy, dimatcopy, cimatcopy, zimatcopy
-rw-r--r-- | common_interface.h | 5 | ||||
-rw-r--r-- | interface/Makefile | 26 | ||||
-rw-r--r-- | interface/imatcopy.c | 142 | ||||
-rw-r--r-- | interface/zimatcopy.c | 185 |
4 files changed, 351 insertions, 7 deletions
diff --git a/common_interface.h b/common_interface.h index 0311e6776..2cc1619ff 100644 --- a/common_interface.h +++ b/common_interface.h @@ -769,6 +769,11 @@ void BLASFUNC(domatcopy) (char *, char *, blasint *, blasint *, double *, do void BLASFUNC(comatcopy) (char *, char *, blasint *, blasint *, float *, float *, blasint *, float *, blasint *); void BLASFUNC(zomatcopy) (char *, char *, blasint *, blasint *, double *, double *, blasint *, double *, blasint *); +void BLASFUNC(simatcopy) (char *, char *, blasint *, blasint *, float *, float *, blasint *, blasint *); +void BLASFUNC(dimatcopy) (char *, char *, blasint *, blasint *, double *, double *, blasint *, blasint *); +void BLASFUNC(cimatcopy) (char *, char *, blasint *, blasint *, float *, float *, blasint *, blasint *); +void BLASFUNC(zimatcopy) (char *, char *, blasint *, blasint *, double *, double *, blasint *, blasint *); + #ifdef __cplusplus } diff --git a/interface/Makefile b/interface/Makefile index da7d11dd4..51f9937b8 100644 --- a/interface/Makefile +++ b/interface/Makefile @@ -41,7 +41,7 @@ SBLAS2OBJS = \ SBLAS3OBJS = \ sgemm.$(SUFFIX) ssymm.$(SUFFIX) strmm.$(SUFFIX) \ strsm.$(SUFFIX) ssyrk.$(SUFFIX) ssyr2k.$(SUFFIX) \ - somatcopy.$(SUFFIX) + somatcopy.$(SUFFIX) simatcopy.$(SUFFIX) DBLAS1OBJS = \ @@ -66,7 +66,7 @@ DBLAS2OBJS = \ DBLAS3OBJS = \ dgemm.$(SUFFIX) dsymm.$(SUFFIX) dtrmm.$(SUFFIX) \ dtrsm.$(SUFFIX) dsyrk.$(SUFFIX) dsyr2k.$(SUFFIX) \ - domatcopy.$(SUFFIX) + domatcopy.$(SUFFIX) dimatcopy.$(SUFFIX) CBLAS1OBJS = \ caxpy.$(SUFFIX) caxpyc.$(SUFFIX) cswap.$(SUFFIX) \ @@ -94,7 +94,7 @@ CBLAS3OBJS = \ cgemm.$(SUFFIX) csymm.$(SUFFIX) ctrmm.$(SUFFIX) \ ctrsm.$(SUFFIX) csyrk.$(SUFFIX) csyr2k.$(SUFFIX) \ chemm.$(SUFFIX) cherk.$(SUFFIX) cher2k.$(SUFFIX) \ - comatcopy.$(SUFFIX) + comatcopy.$(SUFFIX) cimatcopy.$(SUFFIX) ZBLAS1OBJS = \ zaxpy.$(SUFFIX) zaxpyc.$(SUFFIX) zswap.$(SUFFIX) \ @@ -122,7 +122,7 @@ ZBLAS3OBJS = \ zgemm.$(SUFFIX) zsymm.$(SUFFIX) ztrmm.$(SUFFIX) \ ztrsm.$(SUFFIX) zsyrk.$(SUFFIX) zsyr2k.$(SUFFIX) \ zhemm.$(SUFFIX) zherk.$(SUFFIX) zher2k.$(SUFFIX) \ - zomatcopy.$(SUFFIX) + zomatcopy.$(SUFFIX) zimatcopy.$(SUFFIX) ifdef SUPPORT_GEMM3M @@ -2032,13 +2032,25 @@ cblas_caxpby.$(SUFFIX) cblas_caxpby.$(PSUFFIX) : zaxpby.c domatcopy.$(SUFFIX) domatcopy.$(PSUFFIX) : omatcopy.c $(CC) -c $(CFLAGS) $< -o $(@F) -somatcopy.$(SUFFIX) domatcopy.$(PSUFFIX) : omatcopy.c +somatcopy.$(SUFFIX) somatcopy.$(PSUFFIX) : omatcopy.c $(CC) -c $(CFLAGS) $< -o $(@F) -comatcopy.$(SUFFIX) domatcopy.$(PSUFFIX) : zomatcopy.c +comatcopy.$(SUFFIX) comatcopy.$(PSUFFIX) : zomatcopy.c $(CC) -c $(CFLAGS) $< -o $(@F) -zomatcopy.$(SUFFIX) domatcopy.$(PSUFFIX) : zomatcopy.c +zomatcopy.$(SUFFIX) zomatcopy.$(PSUFFIX) : zomatcopy.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dimatcopy.$(SUFFIX) dimatcopy.$(PSUFFIX) : imatcopy.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +simatcopy.$(SUFFIX) simatcopy.$(PSUFFIX) : imatcopy.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +cimatcopy.$(SUFFIX) cimatcopy.$(PSUFFIX) : zimatcopy.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zimatcopy.$(SUFFIX) zimatcopy.$(PSUFFIX) : zimatcopy.c $(CC) -c $(CFLAGS) $< -o $(@F) diff --git a/interface/imatcopy.c b/interface/imatcopy.c new file mode 100644 index 000000000..4a86d83cd --- /dev/null +++ b/interface/imatcopy.c @@ -0,0 +1,142 @@ +/*************************************************************************** +Copyright (c) 2014, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/*********************************************************** + * 2014/06/10 Saar +***********************************************************/ + +#include <stdio.h> +#include <stdlib.h> +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#if defined(DOUBLE) +#define ERROR_NAME "DIMATCOPY" +#else +#define ERROR_NAME "SIMATCOPY" +#endif + +#define BlasRowMajor 0 +#define BlasColMajor 1 +#define BlasNoTrans 0 +#define BlasTrans 1 + +#undef malloc +#undef free + +void NAME( char* ORDER, char* TRANS, blasint *rows, blasint *cols, FLOAT *alpha, FLOAT *a, blasint *lda, blasint *ldb) +{ + + char Order, Trans; + int order=-1,trans=-1; + blasint info = -1; + FLOAT *b; + size_t msize; + + Order = *ORDER; + Trans = *TRANS; + + TOUPPER(Order); + TOUPPER(Trans); + + if ( Order == 'C' ) order = BlasColMajor; + if ( Order == 'R' ) order = BlasRowMajor; + if ( Trans == 'N' ) trans = BlasNoTrans; + if ( Trans == 'R' ) trans = BlasNoTrans; + if ( Trans == 'T' ) trans = BlasTrans; + if ( Trans == 'C' ) trans = BlasTrans; + + if ( order == BlasColMajor) + { + if ( trans == BlasNoTrans && *ldb < *rows ) info = 9; + if ( trans == BlasTrans && *ldb < *cols ) info = 9; + } + if ( order == BlasRowMajor) + { + if ( trans == BlasNoTrans && *ldb < *cols ) info = 9; + if ( trans == BlasTrans && *ldb < *rows ) info = 9; + } + + if ( order == BlasColMajor && *lda < *rows ) info = 7; + if ( order == BlasRowMajor && *lda < *cols ) info = 7; + if ( *cols <= 0 ) info = 4; + if ( *rows <= 0 ) info = 3; + if ( trans < 0 ) info = 2; + if ( order < 0 ) info = 1; + + if (info >= 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + + if ( *lda > *ldb ) + msize = (*lda) * (*ldb) * sizeof(FLOAT); + else + msize = (*ldb) * (*ldb) * sizeof(FLOAT); + + b = malloc(msize); + if ( b == NULL ) + { + printf("Memory alloc failed\n"); + exit(1); + } + + if ( order == BlasColMajor ) + { + if ( trans == BlasNoTrans ) + { + OMATCOPY_K_CN(*rows, *cols, *alpha, a, *lda, b, *ldb ); + OMATCOPY_K_CN(*rows, *cols, (FLOAT) 1.0 , b, *ldb, a, *ldb ); + } + else + { + OMATCOPY_K_CT(*rows, *cols, *alpha, a, *lda, b, *ldb ); + OMATCOPY_K_CN(*rows, *cols, (FLOAT) 1.0, b, *ldb, a, *ldb ); + } + } + else + { + if ( trans == BlasNoTrans ) + { + OMATCOPY_K_RN(*rows, *cols, *alpha, a, *lda, b, *ldb ); + OMATCOPY_K_RN(*rows, *cols, (FLOAT) 1.0, b, *ldb, a, *ldb ); + } + else + { + OMATCOPY_K_RT(*rows, *cols, *alpha, a, *lda, b, *ldb ); + OMATCOPY_K_RN(*rows, *cols, (FLOAT) 1.0, b, *ldb, a, *ldb ); + } + } + + free(b); + return; + +} + + diff --git a/interface/zimatcopy.c b/interface/zimatcopy.c new file mode 100644 index 000000000..90402d3c4 --- /dev/null +++ b/interface/zimatcopy.c @@ -0,0 +1,185 @@ +/*************************************************************************** +Copyright (c) 2014, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/*********************************************************** + * 2014/06/10 Saar +***********************************************************/ + +#include <stdio.h> +#include <stdlib.h> +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#if defined(DOUBLE) +#define ERROR_NAME "ZIMATCOPY" +#else +#define ERROR_NAME "CIMATCOPY" +#endif + +#define BlasRowMajor 0 +#define BlasColMajor 1 +#define BlasNoTrans 0 +#define BlasTrans 1 +#define BlasTransConj 2 +#define BlasConj 3 + +void NAME( char* ORDER, char* TRANS, blasint *rows, blasint *cols, FLOAT *alpha, FLOAT *a, blasint *lda, blasint *ldb) +{ + + char Order, Trans; + int order=-1,trans=-1; + blasint info = -1; + FLOAT *b; + size_t msize; + + Order = *ORDER; + Trans = *TRANS; + + TOUPPER(Order); + TOUPPER(Trans); + + if ( Order == 'C' ) order = BlasColMajor; + if ( Order == 'R' ) order = BlasRowMajor; + if ( Trans == 'N' ) trans = BlasNoTrans; + if ( Trans == 'T' ) trans = BlasTrans; + if ( Trans == 'C' ) trans = BlasTransConj; + if ( Trans == 'R' ) trans = BlasConj; + + if ( order == BlasColMajor) + { + if ( trans == BlasNoTrans && *ldb < *rows ) info = 9; + if ( trans == BlasConj && *ldb < *rows ) info = 9; + if ( trans == BlasTrans && *ldb < *cols ) info = 9; + if ( trans == BlasTransConj && *ldb < *cols ) info = 9; + } + if ( order == BlasRowMajor) + { + if ( trans == BlasNoTrans && *ldb < *cols ) info = 9; + if ( trans == BlasConj && *ldb < *cols ) info = 9; + if ( trans == BlasTrans && *ldb < *rows ) info = 9; + if ( trans == BlasTransConj && *ldb < *rows ) info = 9; + } + + if ( order == BlasColMajor && *lda < *rows ) info = 7; + if ( order == BlasRowMajor && *lda < *cols ) info = 7; + if ( *cols <= 0 ) info = 4; + if ( *rows <= 0 ) info = 3; + if ( trans < 0 ) info = 2; + if ( order < 0 ) info = 1; + + if (info >= 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + + if ( *lda > *ldb ) + msize = (*lda) * (*ldb) * sizeof(FLOAT) * 2; + else + msize = (*ldb) * (*ldb) * sizeof(FLOAT) * 2; + + b = malloc(msize); + if ( b == NULL ) + { + printf("Memory alloc failed\n"); + exit(1); + } + + + if ( order == BlasColMajor ) + { + + if ( trans == BlasNoTrans ) + { + OMATCOPY_K_CN(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb ); + OMATCOPY_K_CN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb ); + free(b); + return; + } + if ( trans == BlasConj ) + { + OMATCOPY_K_CNC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb ); + OMATCOPY_K_CN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb ); + free(b); + return; + } + if ( trans == BlasTrans ) + { + OMATCOPY_K_CT(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb ); + OMATCOPY_K_CN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb ); + free(b); + return; + } + if ( trans == BlasTransConj ) + { + OMATCOPY_K_CTC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb ); + OMATCOPY_K_CN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb ); + free(b); + return; + } + + } + else + { + + if ( trans == BlasNoTrans ) + { + OMATCOPY_K_RN(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb ); + OMATCOPY_K_RN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb ); + free(b); + return; + } + if ( trans == BlasConj ) + { + OMATCOPY_K_RNC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb ); + OMATCOPY_K_RN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb ); + free(b); + return; + } + if ( trans == BlasTrans ) + { + OMATCOPY_K_RT(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb ); + OMATCOPY_K_RN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb ); + free(b); + return; + } + if ( trans == BlasTransConj ) + { + OMATCOPY_K_RTC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb ); + OMATCOPY_K_RN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb ); + free(b); + return; + } + + } + + return; + +} + + |