summaryrefslogtreecommitdiff
path: root/interface
diff options
context:
space:
mode:
authorZhang Xianyi <traits.zhang@gmail.com>2015-10-13 04:46:08 +0800
committerZhang Xianyi <traits.zhang@gmail.com>2015-10-13 04:46:08 +0800
commit94b125255f35615a6ec2431651690ef1e5b174ac (patch)
tree0dcd732c6d2c50c61208a2cd62e5cf52b2cfe9e3 /interface
parent17ee2237c382e8ea3f9d3a8aa74aef4a1d12ff17 (diff)
parent3684706a121f9d9e1ccfc4a2bbb98f698eb04514 (diff)
downloadopenblas-94b125255f35615a6ec2431651690ef1e5b174ac.tar.gz
openblas-94b125255f35615a6ec2431651690ef1e5b174ac.tar.bz2
openblas-94b125255f35615a6ec2431651690ef1e5b174ac.zip
Merge branch 'develop' into cmake
Conflicts: driver/others/memory.c
Diffstat (limited to 'interface')
-rw-r--r--interface/imatcopy.c35
-rw-r--r--interface/zimatcopy.c50
2 files changed, 82 insertions, 3 deletions
diff --git a/interface/imatcopy.c b/interface/imatcopy.c
index 89f0ec823..f4309a85c 100644
--- a/interface/imatcopy.c
+++ b/interface/imatcopy.c
@@ -26,7 +26,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
/***********************************************************
- * 2014/06/10 Saar
+ * 2014-06-10 Saar
+ * 2015-09-07 grisuthedragon
***********************************************************/
#include <stdio.h>
@@ -50,6 +51,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#undef malloc
#undef free
+/* Enables the New IMATCOPY code with inplace operation if lda == ldb */
+#define NEW_IMATCOPY
+
#ifndef CBLAS
void NAME( char* ORDER, char* TRANS, blasint *rows, blasint *cols, FLOAT *alpha, FLOAT *a, blasint *lda, blasint *ldb)
{
@@ -75,7 +79,6 @@ void NAME( char* ORDER, char* TRANS, blasint *rows, blasint *cols, FLOAT *alpha,
#else
void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, FLOAT calpha, FLOAT *a, blasint clda, blasint cldb)
{
- char Order, Trans;
int order=-1,trans=-1;
blasint info = -1;
FLOAT *b;
@@ -117,6 +120,34 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows,
BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
return;
}
+#ifdef NEW_IMATCOPY
+ if ( *lda == *ldb ) {
+ if ( order == BlasColMajor )
+ {
+ if ( trans == BlasNoTrans )
+ {
+ IMATCOPY_K_CN(*rows, *cols, *alpha, a, *lda );
+ }
+ else
+ {
+ IMATCOPY_K_CT(*rows, *cols, *alpha, a, *lda );
+ }
+ }
+ else
+ {
+ if ( trans == BlasNoTrans )
+ {
+ IMATCOPY_K_RN(*rows, *cols, *alpha, a, *lda );
+ }
+ else
+ {
+ IMATCOPY_K_RT(*rows, *cols, *alpha, a, *lda );
+ }
+ }
+ return;
+ }
+
+#endif
if ( *lda > *ldb )
msize = (*lda) * (*ldb) * sizeof(FLOAT);
diff --git a/interface/zimatcopy.c b/interface/zimatcopy.c
index 3f273cf13..b1e1d15dc 100644
--- a/interface/zimatcopy.c
+++ b/interface/zimatcopy.c
@@ -26,7 +26,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
/***********************************************************
- * 2014/06/10 Saar
+ * 2014-06-10 Saar
+ * 2015-09-07 grisuthedragon
***********************************************************/
#include <stdio.h>
@@ -49,6 +50,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define BlasTransConj 2
#define BlasConj 3
+#define NEW_IMATCOPY
#ifndef CBLAS
void NAME( char* ORDER, char* TRANS, blasint *rows, blasint *cols, FLOAT *alpha, FLOAT *a, blasint *lda, blasint *ldb)
@@ -124,6 +126,52 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows,
return;
}
+#ifdef NEW_IMATCOPY
+ if (*lda == *ldb) {
+ if ( order == BlasColMajor )
+ {
+
+ if ( trans == BlasNoTrans )
+ {
+ IMATCOPY_K_CN(*rows, *cols, alpha[0], alpha[1], a, *lda );
+ }
+ if ( trans == BlasConj )
+ {
+ IMATCOPY_K_CNC(*rows, *cols, alpha[0], alpha[1], a, *lda );
+ }
+ if ( trans == BlasTrans )
+ {
+ IMATCOPY_K_CT(*rows, *cols, alpha[0], alpha[1], a, *lda );
+ }
+ if ( trans == BlasTransConj )
+ {
+ IMATCOPY_K_CTC(*rows, *cols, alpha[0], alpha[1], a, *lda );
+ }
+ }
+ else
+ {
+
+ if ( trans == BlasNoTrans )
+ {
+ IMATCOPY_K_RN(*rows, *cols, alpha[0], alpha[1], a, *lda );
+ }
+ if ( trans == BlasConj )
+ {
+ IMATCOPY_K_RNC(*rows, *cols, alpha[0], alpha[1], a, *lda );
+ }
+ if ( trans == BlasTrans )
+ {
+ IMATCOPY_K_RT(*rows, *cols, alpha[0], alpha[1], a, *lda );
+ }
+ if ( trans == BlasTransConj )
+ {
+ IMATCOPY_K_RTC(*rows, *cols, alpha[0], alpha[1], a, *lda );
+ }
+ }
+ return;
+ }
+#endif
+
if ( *lda > *ldb )
msize = (*lda) * (*ldb) * sizeof(FLOAT) * 2;
else