diff options
author | eugene.chereshnev <eugenechereshnev@gmail.com> | 2017-01-31 16:44:06 -0800 |
---|---|---|
committer | eugene.chereshnev <eugenechereshnev@gmail.com> | 2017-02-03 18:26:07 -0800 |
commit | 65d313cf888fc288ab35ba2b3c665d008bbe06e7 (patch) | |
tree | e3740d7d384f27c591db4afa1d26bd0a67945644 | |
parent | 7871b1e54dcfe2b2ceabdf5004f57215261561c1 (diff) | |
download | lapack-65d313cf888fc288ab35ba2b3c665d008bbe06e7.tar.gz lapack-65d313cf888fc288ab35ba2b3c665d008bbe06e7.tar.bz2 lapack-65d313cf888fc288ab35ba2b3c665d008bbe06e7.zip |
lapacke_*(bb|or|un)(csd|bdb): forward calls to LAPACK without conversion
1) Ignore TRANS parameter for LAPACKE since it has the same meaning
as matrix_layout parameter.
TRANS = 'T' means matrices are handled in row-major format.
TRANS != 'T' means matrices are handled in col-major format.
2) So conversion from/to row-major layout can be removed in LAPACKE.
All the cases can be forwarded to LAPACK calls
with corresponding TRANS parameter.
3) nrows_* variables can be safely removed. NaN checkers for
different TRANS values can be called with corresponding layout.
LAPACKE calls are forwarded in the following way:
trans != 'T', col-major: call LAPACK(trans = 'N') - col-major
trans != 'T', row-major: call LAPACK(trans = 'T') - row-major
trans = 'T', col-major: call LAPACK(trans = 'T') - row-major
trans = 'T', row-major: call LAPACK(trans = 'T') - row-major
24 files changed, 404 insertions, 1926 deletions
diff --git a/LAPACKE/src/lapacke_cbbcsd.c b/LAPACKE/src/lapacke_cbbcsd.c index 5fba06d9..a881e659 100644 --- a/LAPACKE/src/lapacke_cbbcsd.c +++ b/LAPACKE/src/lapacke_cbbcsd.c @@ -47,17 +47,18 @@ lapack_int LAPACKE_cbbcsd( int matrix_layout, char jobu1, char jobu2, lapack_int lrwork = -1; float* rwork = NULL; float rwork_query; - lapack_int nrows_u1, nrows_u2, nrows_v1t, nrows_v2t; + int lapack_layout; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_cbbcsd", -1 ); return -1; } + if( LAPACKE_lsame( trans, 'n' ) && matrix_layout == LAPACK_COL_MAJOR ) { + lapack_layout = LAPACK_COL_MAJOR; + } else { + lapack_layout = LAPACK_ROW_MAJOR; + } #ifndef LAPACK_DISABLE_NAN_CHECK /* Optionally check input matrices for NaNs */ - nrows_u1 = ( LAPACKE_lsame( jobu1, 'y' ) ? p : 1); - nrows_u2 = ( LAPACKE_lsame( jobu2, 'y' ) ? m-p : 1); - nrows_v1t = ( LAPACKE_lsame( jobv1t, 'y' ) ? q : 1); - nrows_v2t = ( LAPACKE_lsame( jobv2t, 'y' ) ? m-q : 1); if( LAPACKE_s_nancheck( q-1, phi, 1 ) ) { return -11; } @@ -65,22 +66,22 @@ lapack_int LAPACKE_cbbcsd( int matrix_layout, char jobu1, char jobu2, return -10; } if( LAPACKE_lsame( jobu1, 'y' ) ) { - if( LAPACKE_cge_nancheck( matrix_layout, nrows_u1, p, u1, ldu1 ) ) { + if( LAPACKE_cge_nancheck( lapack_layout, p, p, u1, ldu1 ) ) { return -12; } } if( LAPACKE_lsame( jobu2, 'y' ) ) { - if( LAPACKE_cge_nancheck( matrix_layout, nrows_u2, m-p, u2, ldu2 ) ) { + if( LAPACKE_cge_nancheck( lapack_layout, m-p, m-p, u2, ldu2 ) ) { return -14; } } if( LAPACKE_lsame( jobv1t, 'y' ) ) { - if( LAPACKE_cge_nancheck( matrix_layout, nrows_v1t, q, v1t, ldv1t ) ) { + if( LAPACKE_cge_nancheck( lapack_layout, q, q, v1t, ldv1t ) ) { return -16; } } if( LAPACKE_lsame( jobv2t, 'y' ) ) { - if( LAPACKE_cge_nancheck( matrix_layout, nrows_v2t, m-q, v2t, ldv2t ) ) { + if( LAPACKE_cge_nancheck( lapack_layout, m-q, m-q, v2t, ldv2t ) ) { return -18; } } diff --git a/LAPACKE/src/lapacke_cbbcsd_work.c b/LAPACKE/src/lapacke_cbbcsd_work.c index 31ad6b55..f8bf17c2 100644 --- a/LAPACKE/src/lapacke_cbbcsd_work.c +++ b/LAPACKE/src/lapacke_cbbcsd_work.c @@ -47,156 +47,36 @@ lapack_int LAPACKE_cbbcsd_work( int matrix_layout, char jobu1, char jobu2, lapack_int lrwork ) { lapack_int info = 0; - if( matrix_layout == LAPACK_COL_MAJOR ) { + /* LAPACK function works with matrices in both layouts. It is supported + * through TRANS parameter. So all conversion between layouts can be + * completed in LAPACK function. See the table below which describes how + * every LAPACKE call is forwarded to corresponding LAPACK call. + * + * matrix_layout | trans_LAPACKE | -> trans_LAPACK + * | (trans) | (ltrans) + * -----------------+---------------+---------------- + * LAPACK_COL_MAJOR | 'N' | -> 'N' + * LAPACK_COL_MAJOR | 'T' | -> 'T' + * LAPACK_ROW_MAJOR | 'N' | -> 'T' + * LAPACK_ROW_MAJOR | 'T' | -> 'T' + * (note that for row major layout trans parameter is ignored) + */ + if( matrix_layout == LAPACK_COL_MAJOR || + matrix_layout == LAPACK_ROW_MAJOR ) { + char ltrans; + if( !LAPACKE_lsame( trans, 't' ) && matrix_layout == LAPACK_COL_MAJOR ) { + ltrans = 'n'; + } else { + ltrans = 't'; + } /* Call LAPACK function and adjust info */ - LAPACK_cbbcsd( &jobu1, &jobu2, &jobv1t, &jobv2t, &trans, &m, &p, &q, + LAPACK_cbbcsd( &jobu1, &jobu2, &jobv1t, &jobv2t, <rans, &m, &p, &q, theta, phi, u1, &ldu1, u2, &ldu2, v1t, &ldv1t, v2t, &ldv2t, b11d, b11e, b12d, b12e, b21d, b21e, b22d, b22e, rwork, &lrwork, &info ); if( info < 0 ) { info = info - 1; } - } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - lapack_int nrows_u1 = ( LAPACKE_lsame( jobu1, 'y' ) ? p : 1); - lapack_int nrows_u2 = ( LAPACKE_lsame( jobu2, 'y' ) ? m-p : 1); - lapack_int nrows_v1t = ( LAPACKE_lsame( jobv1t, 'y' ) ? q : 1); - lapack_int nrows_v2t = ( LAPACKE_lsame( jobv2t, 'y' ) ? m-q : 1); - lapack_int ldu1_t = MAX(1,nrows_u1); - lapack_int ldu2_t = MAX(1,nrows_u2); - lapack_int ldv1t_t = MAX(1,nrows_v1t); - lapack_int ldv2t_t = MAX(1,nrows_v2t); - lapack_complex_float* u1_t = NULL; - lapack_complex_float* u2_t = NULL; - lapack_complex_float* v1t_t = NULL; - lapack_complex_float* v2t_t = NULL; - /* Check leading dimension(s) */ - if( ldu1 < p ) { - info = -13; - LAPACKE_xerbla( "LAPACKE_cbbcsd_work", info ); - return info; - } - if( ldu2 < m-p ) { - info = -15; - LAPACKE_xerbla( "LAPACKE_cbbcsd_work", info ); - return info; - } - if( ldv1t < q ) { - info = -17; - LAPACKE_xerbla( "LAPACKE_cbbcsd_work", info ); - return info; - } - if( ldv2t < m-q ) { - info = -19; - LAPACKE_xerbla( "LAPACKE_cbbcsd_work", info ); - return info; - } - /* Query optimal working array(s) size if requested */ - if( lrwork == -1 ) { - LAPACK_cbbcsd( &jobu1, &jobu2, &jobv1t, &jobv2t, &trans, &m, &p, &q, - theta, phi, u1, &ldu1_t, u2, &ldu2_t, v1t, &ldv1t_t, - v2t, &ldv2t_t, b11d, b11e, b12d, b12e, b21d, b21e, - b22d, b22e, rwork, &lrwork, &info ); - return (info < 0) ? (info - 1) : info; - } - /* Allocate memory for temporary array(s) */ - if( LAPACKE_lsame( jobu1, 'y' ) ) { - u1_t = (lapack_complex_float*) - LAPACKE_malloc( sizeof(lapack_complex_float) * - ldu1_t * MAX(1,p) ); - if( u1_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_0; - } - } - if( LAPACKE_lsame( jobu2, 'y' ) ) { - u2_t = (lapack_complex_float*) - LAPACKE_malloc( sizeof(lapack_complex_float) * - ldu2_t * MAX(1,m-p) ); - if( u2_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_1; - } - } - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - v1t_t = (lapack_complex_float*) - LAPACKE_malloc( sizeof(lapack_complex_float) * - ldv1t_t * MAX(1,q) ); - if( v1t_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_2; - } - } - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - v2t_t = (lapack_complex_float*) - LAPACKE_malloc( sizeof(lapack_complex_float) * - ldv2t_t * MAX(1,m-q) ); - if( v2t_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_3; - } - } - /* Transpose input matrices */ - if( LAPACKE_lsame( jobu1, 'y' ) ) { - LAPACKE_cge_trans( matrix_layout, nrows_u1, p, u1, ldu1, u1_t, - ldu1_t ); - } - if( LAPACKE_lsame( jobu2, 'y' ) ) { - LAPACKE_cge_trans( matrix_layout, nrows_u2, m-p, u2, ldu2, u2_t, - ldu2_t ); - } - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - LAPACKE_cge_trans( matrix_layout, nrows_v1t, q, v1t, ldv1t, v1t_t, - ldv1t_t ); - } - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - LAPACKE_cge_trans( matrix_layout, nrows_v2t, m-q, v2t, ldv2t, v2t_t, - ldv2t_t ); - } - /* Call LAPACK function and adjust info */ - LAPACK_cbbcsd( &jobu1, &jobu2, &jobv1t, &jobv2t, &trans, &m, &p, &q, - theta, phi, u1_t, &ldu1_t, u2_t, &ldu2_t, v1t_t, - &ldv1t_t, v2t_t, &ldv2t_t, b11d, b11e, b12d, b12e, b21d, - b21e, b22d, b22e, rwork, &lrwork, &info ); - if( info < 0 ) { - info = info - 1; - } - /* Transpose output matrices */ - if( LAPACKE_lsame( jobu1, 'y' ) ) { - LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_u1, p, u1_t, ldu1_t, u1, - ldu1 ); - } - if( LAPACKE_lsame( jobu2, 'y' ) ) { - LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_u2, m-p, u2_t, ldu2_t, - u2, ldu2 ); - } - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_v1t, q, v1t_t, ldv1t_t, - v1t, ldv1t ); - } - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_v2t, m-q, v2t_t, ldv2t_t, - v2t, ldv2t ); - } - /* Release memory and exit */ - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - LAPACKE_free( v2t_t ); - } -exit_level_3: - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - LAPACKE_free( v1t_t ); - } -exit_level_2: - if( LAPACKE_lsame( jobu2, 'y' ) ) { - LAPACKE_free( u2_t ); - } -exit_level_1: - if( LAPACKE_lsame( jobu1, 'y' ) ) { - LAPACKE_free( u1_t ); - } -exit_level_0: - if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { - LAPACKE_xerbla( "LAPACKE_cbbcsd_work", info ); - } } else { info = -1; LAPACKE_xerbla( "LAPACKE_cbbcsd_work", info ); diff --git a/LAPACKE/src/lapacke_cunbdb.c b/LAPACKE/src/lapacke_cunbdb.c index 44f97db1..17abbb98 100644 --- a/LAPACKE/src/lapacke_cunbdb.c +++ b/LAPACKE/src/lapacke_cunbdb.c @@ -49,27 +49,28 @@ lapack_int LAPACKE_cunbdb( int matrix_layout, char trans, char signs, lapack_int lwork = -1; lapack_complex_float* work = NULL; lapack_complex_float work_query; - lapack_int nrows_x11, nrows_x12, nrows_x21, nrows_x22; + int lapack_layout; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_cunbdb", -1 ); return -1; } + if( LAPACKE_lsame( trans, 'n' ) && matrix_layout == LAPACK_COL_MAJOR ) { + lapack_layout = LAPACK_COL_MAJOR; + } else { + lapack_layout = LAPACK_ROW_MAJOR; + } #ifndef LAPACK_DISABLE_NAN_CHECK /* Optionally check input matrices for NaNs */ - nrows_x11 = ( LAPACKE_lsame( trans, 'n' ) ? p : q); - nrows_x12 = ( LAPACKE_lsame( trans, 'n' ) ? p : m-q); - nrows_x21 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : q); - nrows_x22 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : m-q); - if( LAPACKE_cge_nancheck( matrix_layout, nrows_x11, q, x11, ldx11 ) ) { + if( LAPACKE_cge_nancheck( lapack_layout, p, q, x11, ldx11 ) ) { return -7; } - if( LAPACKE_cge_nancheck( matrix_layout, nrows_x12, m-q, x12, ldx12 ) ) { + if( LAPACKE_cge_nancheck( lapack_layout, p, m-q, x12, ldx12 ) ) { return -9; } - if( LAPACKE_cge_nancheck( matrix_layout, nrows_x21, q, x21, ldx21 ) ) { + if( LAPACKE_cge_nancheck( lapack_layout, m-p, q, x21, ldx21 ) ) { return -11; } - if( LAPACKE_cge_nancheck( matrix_layout, nrows_x22, m-q, x22, ldx22 ) ) { + if( LAPACKE_cge_nancheck( lapack_layout, m-p, m-q, x22, ldx22 ) ) { return -13; } #endif diff --git a/LAPACKE/src/lapacke_cunbdb_work.c b/LAPACKE/src/lapacke_cunbdb_work.c index 7f0c47f8..066ce345 100644 --- a/LAPACKE/src/lapacke_cunbdb_work.c +++ b/LAPACKE/src/lapacke_cunbdb_work.c @@ -47,119 +47,35 @@ lapack_int LAPACKE_cunbdb_work( int matrix_layout, char trans, char signs, lapack_complex_float* work, lapack_int lwork ) { lapack_int info = 0; - if( matrix_layout == LAPACK_COL_MAJOR ) { + /* LAPACK function works with matrices in both layouts. It is supported + * through TRANS parameter. So all conversion between layouts can be + * completed in LAPACK function. See the table below which describes how + * every LAPACKE call is forwarded to corresponding LAPACK call. + * + * matrix_layout | trans_LAPACKE | -> trans_LAPACK + * | (trans) | (ltrans) + * -----------------+---------------+---------------- + * LAPACK_COL_MAJOR | 'N' | -> 'N' + * LAPACK_COL_MAJOR | 'T' | -> 'T' + * LAPACK_ROW_MAJOR | 'N' | -> 'T' + * LAPACK_ROW_MAJOR | 'T' | -> 'T' + * (note that for row major layout trans parameter is ignored) + */ + if( matrix_layout == LAPACK_COL_MAJOR || + matrix_layout == LAPACK_ROW_MAJOR ) { + char ltrans; + if( !LAPACKE_lsame( trans, 't' ) && matrix_layout == LAPACK_COL_MAJOR ) { + ltrans = 'n'; + } else { + ltrans = 't'; + } /* Call LAPACK function and adjust info */ - LAPACK_cunbdb( &trans, &signs, &m, &p, &q, x11, &ldx11, x12, &ldx12, + LAPACK_cunbdb( <rans, &signs, &m, &p, &q, x11, &ldx11, x12, &ldx12, x21, &ldx21, x22, &ldx22, theta, phi, taup1, taup2, tauq1, tauq2, work, &lwork, &info ); if( info < 0 ) { info = info - 1; } - } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - lapack_int nrows_x11 = ( LAPACKE_lsame( trans, 'n' ) ? p : q); - lapack_int nrows_x12 = ( LAPACKE_lsame( trans, 'n' ) ? p : m-q); - lapack_int nrows_x21 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : q); - lapack_int nrows_x22 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : m-q); - lapack_int ldx11_t = MAX(1,nrows_x11); - lapack_int ldx12_t = MAX(1,nrows_x12); - lapack_int ldx21_t = MAX(1,nrows_x21); - lapack_int ldx22_t = MAX(1,nrows_x22); - lapack_complex_float* x11_t = NULL; - lapack_complex_float* x12_t = NULL; - lapack_complex_float* x21_t = NULL; - lapack_complex_float* x22_t = NULL; - /* Check leading dimension(s) */ - if( ldx11 < q ) { - info = -8; - LAPACKE_xerbla( "LAPACKE_cunbdb_work", info ); - return info; - } - if( ldx12 < m-q ) { - info = -10; - LAPACKE_xerbla( "LAPACKE_cunbdb_work", info ); - return info; - } - if( ldx21 < q ) { - info = -12; - LAPACKE_xerbla( "LAPACKE_cunbdb_work", info ); - return info; - } - if( ldx22 < m-q ) { - info = -14; - LAPACKE_xerbla( "LAPACKE_cunbdb_work", info ); - return info; - } - /* Query optimal working array(s) size if requested */ - if( lwork == -1 ) { - LAPACK_cunbdb( &trans, &signs, &m, &p, &q, x11, &ldx11_t, x12, - &ldx12_t, x21, &ldx21_t, x22, &ldx22_t, theta, phi, - taup1, taup2, tauq1, tauq2, work, &lwork, &info ); - return (info < 0) ? (info - 1) : info; - } - /* Allocate memory for temporary array(s) */ - x11_t = (lapack_complex_float*) - LAPACKE_malloc( sizeof(lapack_complex_float) * ldx11_t * MAX(1,q) ); - if( x11_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_0; - } - x12_t = (lapack_complex_float*) - LAPACKE_malloc( sizeof(lapack_complex_float) * - ldx12_t * MAX(1,m-q) ); - if( x12_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_1; - } - x21_t = (lapack_complex_float*) - LAPACKE_malloc( sizeof(lapack_complex_float) * ldx21_t * MAX(1,q) ); - if( x21_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_2; - } - x22_t = (lapack_complex_float*) - LAPACKE_malloc( sizeof(lapack_complex_float) * - ldx22_t * MAX(1,m-q) ); - if( x22_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_3; - } - /* Transpose input matrices */ - LAPACKE_cge_trans( matrix_layout, nrows_x11, q, x11, ldx11, x11_t, - ldx11_t ); - LAPACKE_cge_trans( matrix_layout, nrows_x12, m-q, x12, ldx12, x12_t, - ldx12_t ); - LAPACKE_cge_trans( matrix_layout, nrows_x21, q, x21, ldx21, x21_t, - ldx21_t ); - LAPACKE_cge_trans( matrix_layout, nrows_x22, m-q, x22, ldx22, x22_t, - ldx22_t ); - /* Call LAPACK function and adjust info */ - LAPACK_cunbdb( &trans, &signs, &m, &p, &q, x11_t, &ldx11_t, x12_t, - &ldx12_t, x21_t, &ldx21_t, x22_t, &ldx22_t, theta, phi, - taup1, taup2, tauq1, tauq2, work, &lwork, &info ); - if( info < 0 ) { - info = info - 1; - } - /* Transpose output matrices */ - LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_x11, q, x11_t, ldx11_t, x11, - ldx11 ); - LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_x12, m-q, x12_t, ldx12_t, - x12, ldx12 ); - LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_x21, q, x21_t, ldx21_t, x21, - ldx21 ); - LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_x22, m-q, x22_t, ldx22_t, - x22, ldx22 ); - /* Release memory and exit */ - LAPACKE_free( x22_t ); -exit_level_3: - LAPACKE_free( x21_t ); -exit_level_2: - LAPACKE_free( x12_t ); -exit_level_1: - LAPACKE_free( x11_t ); -exit_level_0: - if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { - LAPACKE_xerbla( "LAPACKE_cunbdb_work", info ); - } } else { info = -1; LAPACKE_xerbla( "LAPACKE_cunbdb_work", info ); diff --git a/LAPACKE/src/lapacke_cuncsd.c b/LAPACKE/src/lapacke_cuncsd.c index 76f92b70..6173d1b0 100644 --- a/LAPACKE/src/lapacke_cuncsd.c +++ b/LAPACKE/src/lapacke_cuncsd.c @@ -54,27 +54,28 @@ lapack_int LAPACKE_cuncsd( int matrix_layout, char jobu1, char jobu2, lapack_complex_float* work = NULL; float rwork_query; lapack_complex_float work_query; - lapack_int nrows_x11, nrows_x12, nrows_x21, nrows_x22; + int lapack_layout; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_cuncsd", -1 ); return -1; } + if( LAPACKE_lsame( trans, 'n' ) && matrix_layout == LAPACK_COL_MAJOR ) { + lapack_layout = LAPACK_COL_MAJOR; + } else { + lapack_layout = LAPACK_ROW_MAJOR; + } #ifndef LAPACK_DISABLE_NAN_CHECK /* Optionally check input matrices for NaNs */ - nrows_x11 = ( LAPACKE_lsame( trans, 'n' ) ? p : q); - nrows_x12 = ( LAPACKE_lsame( trans, 'n' ) ? p : m-q); - nrows_x21 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : q); - nrows_x22 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : m-q); - if( LAPACKE_cge_nancheck( matrix_layout, nrows_x11, q, x11, ldx11 ) ) { + if( LAPACKE_cge_nancheck( lapack_layout, p, q, x11, ldx11 ) ) { return -11; } - if( LAPACKE_cge_nancheck( matrix_layout, nrows_x12, m-q, x12, ldx12 ) ) { + if( LAPACKE_cge_nancheck( lapack_layout, p, m-q, x12, ldx12 ) ) { return -13; } - if( LAPACKE_cge_nancheck( matrix_layout, nrows_x21, q, x21, ldx21 ) ) { + if( LAPACKE_cge_nancheck( lapack_layout, m-p, q, x21, ldx21 ) ) { return -15; } - if( LAPACKE_cge_nancheck( matrix_layout, nrows_x22, m-q, x22, ldx22 ) ) { + if( LAPACKE_cge_nancheck( lapack_layout, m-p, m-q, x22, ldx22 ) ) { return -17; } #endif diff --git a/LAPACKE/src/lapacke_cuncsd_work.c b/LAPACKE/src/lapacke_cuncsd_work.c index 60fbd9ab..0bc63764 100644 --- a/LAPACKE/src/lapacke_cuncsd_work.c +++ b/LAPACKE/src/lapacke_cuncsd_work.c @@ -50,224 +50,36 @@ lapack_int LAPACKE_cuncsd_work( int matrix_layout, char jobu1, char jobu2, lapack_int* iwork ) { lapack_int info = 0; - if( matrix_layout == LAPACK_COL_MAJOR ) { - /* Call LAPACK function and adjust info */ - LAPACK_cuncsd( &jobu1, &jobu2, &jobv1t, &jobv2t, &trans, &signs, &m, &p, - &q, x11, &ldx11, x12, &ldx12, x21, &ldx21, x22, &ldx22, - theta, u1, &ldu1, u2, &ldu2, v1t, &ldv1t, v2t, &ldv2t, - work, &lwork, rwork, &lrwork, iwork, &info ); - if( info < 0 ) { - info = info - 1; - } - } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - lapack_int nrows_x11 = ( LAPACKE_lsame( trans, 'n' ) ? p : q); - lapack_int nrows_x12 = ( LAPACKE_lsame( trans, 'n' ) ? p : m-q); - lapack_int nrows_x21 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : q); - lapack_int nrows_x22 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : m-q); - lapack_int nrows_u1 = ( LAPACKE_lsame( jobu1, 'y' ) ? p : 1); - lapack_int nrows_u2 = ( LAPACKE_lsame( jobu2, 'y' ) ? m-p : 1); - lapack_int nrows_v1t = ( LAPACKE_lsame( jobv1t, 'y' ) ? q : 1); - lapack_int nrows_v2t = ( LAPACKE_lsame( jobv2t, 'y' ) ? m-q : 1); - lapack_int ldu1_t = MAX(1,nrows_u1); - lapack_int ldu2_t = MAX(1,nrows_u2); - lapack_int ldv1t_t = MAX(1,nrows_v1t); - lapack_int ldv2t_t = MAX(1,nrows_v2t); - lapack_int ldx11_t = MAX(1,nrows_x11); - lapack_int ldx12_t = MAX(1,nrows_x12); - lapack_int ldx21_t = MAX(1,nrows_x21); - lapack_int ldx22_t = MAX(1,nrows_x22); - lapack_complex_float* x11_t = NULL; - lapack_complex_float* x12_t = NULL; - lapack_complex_float* x21_t = NULL; - lapack_complex_float* x22_t = NULL; - lapack_complex_float* u1_t = NULL; - lapack_complex_float* u2_t = NULL; - lapack_complex_float* v1t_t = NULL; - lapack_complex_float* v2t_t = NULL; - /* Check leading dimension(s) */ - if( ldu1 < p ) { - info = -21; - LAPACKE_xerbla( "LAPACKE_cuncsd_work", info ); - return info; - } - if( ldu2 < m-p ) { - info = -23; - LAPACKE_xerbla( "LAPACKE_cuncsd_work", info ); - return info; - } - if( ldv1t < q ) { - info = -25; - LAPACKE_xerbla( "LAPACKE_cuncsd_work", info ); - return info; - } - if( ldv2t < m-q ) { - info = -27; - LAPACKE_xerbla( "LAPACKE_cuncsd_work", info ); - return info; - } - if( ldx11 < q ) { - info = -12; - LAPACKE_xerbla( "LAPACKE_cuncsd_work", info ); - return info; - } - if( ldx12 < m-q ) { - info = -14; - LAPACKE_xerbla( "LAPACKE_cuncsd_work", info ); - return info; - } - if( ldx21 < q ) { - info = -16; - LAPACKE_xerbla( "LAPACKE_cuncsd_work", info ); - return info; - } - if( ldx22 < m-q ) { - info = -18; - LAPACKE_xerbla( "LAPACKE_cuncsd_work", info ); - return info; - } - /* Query optimal working array(s) size if requested */ - if( lrwork == -1 || lwork == -1 ) { - LAPACK_cuncsd( &jobu1, &jobu2, &jobv1t, &jobv2t, &trans, &signs, &m, - &p, &q, x11, &ldx11_t, x12, &ldx12_t, x21, &ldx21_t, - x22, &ldx22_t, theta, u1, &ldu1_t, u2, &ldu2_t, v1t, - &ldv1t_t, v2t, &ldv2t_t, work, &lwork, rwork, - &lrwork, iwork, &info ); - return (info < 0) ? (info - 1) : info; - } - /* Allocate memory for temporary array(s) */ - x11_t = (lapack_complex_float*) - LAPACKE_malloc( sizeof(lapack_complex_float) * ldx11_t * MAX(1,q) ); - if( x11_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_0; - } - x12_t = (lapack_complex_float*) - LAPACKE_malloc( sizeof(lapack_complex_float) * - ldx12_t * MAX(1,m-q) ); - if( x12_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_1; - } - x21_t = (lapack_complex_float*) - LAPACKE_malloc( sizeof(lapack_complex_float) * ldx21_t * MAX(1,q) ); - if( x21_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_2; + /* LAPACK function works with matrices in both layouts. It is supported + * through TRANS parameter. So all conversion between layouts can be + * completed in LAPACK function. See the table below which describes how + * every LAPACKE call is forwarded to corresponding LAPACK call. + * + * matrix_layout | trans_LAPACKE | -> trans_LAPACK + * | (trans) | (ltrans) + * -----------------+---------------+---------------- + * LAPACK_COL_MAJOR | 'N' | -> 'N' + * LAPACK_COL_MAJOR | 'T' | -> 'T' + * LAPACK_ROW_MAJOR | 'N' | -> 'T' + * LAPACK_ROW_MAJOR | 'T' | -> 'T' + * (note that for row major layout trans parameter is ignored) + */ + if( matrix_layout == LAPACK_COL_MAJOR || + matrix_layout == LAPACK_ROW_MAJOR ) { + char ltrans; + if( !LAPACKE_lsame( trans, 't' ) && matrix_layout == LAPACK_COL_MAJOR ) { + ltrans = 'n'; + } else { + ltrans = 't'; } - x22_t = (lapack_complex_float*) - LAPACKE_malloc( sizeof(lapack_complex_float) * - ldx22_t * MAX(1,m-q) ); - if( x22_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_3; - } - if( LAPACKE_lsame( jobu1, 'y' ) ) { - u1_t = (lapack_complex_float*) - LAPACKE_malloc( sizeof(lapack_complex_float) * - ldu1_t * MAX(1,p) ); - if( u1_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_4; - } - } - if( LAPACKE_lsame( jobu2, 'y' ) ) { - u2_t = (lapack_complex_float*) - LAPACKE_malloc( sizeof(lapack_complex_float) * - ldu2_t * MAX(1,m-p) ); - if( u2_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_5; - } - } - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - v1t_t = (lapack_complex_float*) - LAPACKE_malloc( sizeof(lapack_complex_float) * - ldv1t_t * MAX(1,q) ); - if( v1t_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_6; - } - } - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - v2t_t = (lapack_complex_float*) - LAPACKE_malloc( sizeof(lapack_complex_float) * - ldv2t_t * MAX(1,m-q) ); - if( v2t_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_7; - } - } - /* Transpose input matrices */ - LAPACKE_cge_trans( matrix_layout, nrows_x11, q, x11, ldx11, x11_t, - ldx11_t ); - LAPACKE_cge_trans( matrix_layout, nrows_x12, m-q, x12, ldx12, x12_t, - ldx12_t ); - LAPACKE_cge_trans( matrix_layout, nrows_x21, q, x21, ldx21, x21_t, - ldx21_t ); - LAPACKE_cge_trans( matrix_layout, nrows_x22, m-q, x22, ldx22, x22_t, - ldx22_t ); /* Call LAPACK function and adjust info */ - LAPACK_cuncsd( &jobu1, &jobu2, &jobv1t, &jobv2t, &trans, &signs, &m, &p, - &q, x11_t, &ldx11_t, x12_t, &ldx12_t, x21_t, &ldx21_t, - x22_t, &ldx22_t, theta, u1_t, &ldu1_t, u2_t, &ldu2_t, - v1t_t, &ldv1t_t, v2t_t, &ldv2t_t, work, &lwork, rwork, - &lrwork, iwork, &info ); + LAPACK_cuncsd( &jobu1, &jobu2, &jobv1t, &jobv2t, <rans, &signs, &m, + &p, &q, x11, &ldx11, x12, &ldx12, x21, &ldx21, x22, + &ldx22, theta, u1, &ldu1, u2, &ldu2, v1t, &ldv1t, v2t, + &ldv2t, work, &lwork, rwork, &lrwork, iwork, &info ); if( info < 0 ) { info = info - 1; } - /* Transpose output matrices */ - LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_x11, q, x11_t, ldx11_t, x11, - ldx11 ); - LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_x12, m-q, x12_t, ldx12_t, - x12, ldx12 ); - LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_x21, q, x21_t, ldx21_t, x21, - ldx21 ); - LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_x22, m-q, x22_t, ldx22_t, - x22, ldx22 ); - if( LAPACKE_lsame( jobu1, 'y' ) ) { - LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_u1, p, u1_t, ldu1_t, u1, - ldu1 ); - } - if( LAPACKE_lsame( jobu2, 'y' ) ) { - LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_u2, m-p, u2_t, ldu2_t, - u2, ldu2 ); - } - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_v1t, q, v1t_t, ldv1t_t, - v1t, ldv1t ); - } - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_v2t, m-q, v2t_t, ldv2t_t, - v2t, ldv2t ); - } - /* Release memory and exit */ - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - LAPACKE_free( v2t_t ); - } -exit_level_7: - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - LAPACKE_free( v1t_t ); - } -exit_level_6: - if( LAPACKE_lsame( jobu2, 'y' ) ) { - LAPACKE_free( u2_t ); - } -exit_level_5: - if( LAPACKE_lsame( jobu1, 'y' ) ) { - LAPACKE_free( u1_t ); - } -exit_level_4: - LAPACKE_free( x22_t ); -exit_level_3: - LAPACKE_free( x21_t ); -exit_level_2: - LAPACKE_free( x12_t ); -exit_level_1: - LAPACKE_free( x11_t ); -exit_level_0: - if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { - LAPACKE_xerbla( "LAPACKE_cuncsd_work", info ); - } } else { info = -1; LAPACKE_xerbla( "LAPACKE_cuncsd_work", info ); diff --git a/LAPACKE/src/lapacke_dbbcsd.c b/LAPACKE/src/lapacke_dbbcsd.c index 23cb7cc4..2bece7cb 100644 --- a/LAPACKE/src/lapacke_dbbcsd.c +++ b/LAPACKE/src/lapacke_dbbcsd.c @@ -47,17 +47,18 @@ lapack_int LAPACKE_dbbcsd( int matrix_layout, char jobu1, char jobu2, lapack_int lwork = -1; double* work = NULL; double work_query; - lapack_int nrows_u1, nrows_u2, nrows_v1t, nrows_v2t; + int lapack_layout; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_dbbcsd", -1 ); return -1; } + if( LAPACKE_lsame( trans, 'n' ) && matrix_layout == LAPACK_COL_MAJOR ) { + lapack_layout = LAPACK_COL_MAJOR; + } else { + lapack_layout = LAPACK_ROW_MAJOR; + } #ifndef LAPACK_DISABLE_NAN_CHECK /* Optionally check input matrices for NaNs */ - nrows_u1 = ( LAPACKE_lsame( jobu1, 'y' ) ? p : 1); - nrows_u2 = ( LAPACKE_lsame( jobu2, 'y' ) ? m-p : 1); - nrows_v1t = ( LAPACKE_lsame( jobv1t, 'y' ) ? q : 1); - nrows_v2t = ( LAPACKE_lsame( jobv2t, 'y' ) ? m-q : 1); if( LAPACKE_d_nancheck( q-1, phi, 1 ) ) { return -11; } @@ -65,22 +66,22 @@ lapack_int LAPACKE_dbbcsd( int matrix_layout, char jobu1, char jobu2, return -10; } if( LAPACKE_lsame( jobu1, 'y' ) ) { - if( LAPACKE_dge_nancheck( matrix_layout, nrows_u1, p, u1, ldu1 ) ) { + if( LAPACKE_dge_nancheck( lapack_layout, p, p, u1, ldu1 ) ) { return -12; } } if( LAPACKE_lsame( jobu2, 'y' ) ) { - if( LAPACKE_dge_nancheck( matrix_layout, nrows_u2, m-p, u2, ldu2 ) ) { + if( LAPACKE_dge_nancheck( lapack_layout, m-p, m-p, u2, ldu2 ) ) { return -14; } } if( LAPACKE_lsame( jobv1t, 'y' ) ) { - if( LAPACKE_dge_nancheck( matrix_layout, nrows_v1t, q, v1t, ldv1t ) ) { + if( LAPACKE_dge_nancheck( lapack_layout, q, q, v1t, ldv1t ) ) { return -16; } } if( LAPACKE_lsame( jobv2t, 'y' ) ) { - if( LAPACKE_dge_nancheck( matrix_layout, nrows_v2t, m-q, v2t, ldv2t ) ) { + if( LAPACKE_dge_nancheck( lapack_layout, m-q, m-q, v2t, ldv2t ) ) { return -18; } } diff --git a/LAPACKE/src/lapacke_dbbcsd_work.c b/LAPACKE/src/lapacke_dbbcsd_work.c index c4f072a4..7987ba38 100644 --- a/LAPACKE/src/lapacke_dbbcsd_work.c +++ b/LAPACKE/src/lapacke_dbbcsd_work.c @@ -45,152 +45,36 @@ lapack_int LAPACKE_dbbcsd_work( int matrix_layout, char jobu1, char jobu2, double* work, lapack_int lwork ) { lapack_int info = 0; - if( matrix_layout == LAPACK_COL_MAJOR ) { + /* LAPACK function works with matrices in both layouts. It is supported + * through TRANS parameter. So all conversion between layouts can be + * completed in LAPACK function. See the table below which describes how + * every LAPACKE call is forwarded to corresponding LAPACK call. + * + * matrix_layout | trans_LAPACKE | -> trans_LAPACK + * | (trans) | (ltrans) + * -----------------+---------------+---------------- + * LAPACK_COL_MAJOR | 'N' | -> 'N' + * LAPACK_COL_MAJOR | 'T' | -> 'T' + * LAPACK_ROW_MAJOR | 'N' | -> 'T' + * LAPACK_ROW_MAJOR | 'T' | -> 'T' + * (note that for row major layout trans parameter is ignored) + */ + if( matrix_layout == LAPACK_COL_MAJOR || + matrix_layout == LAPACK_ROW_MAJOR ) { + char ltrans; + if( !LAPACKE_lsame( trans, 't' ) && matrix_layout == LAPACK_COL_MAJOR ) { + ltrans = 'n'; + } else { + ltrans = 't'; + } /* Call LAPACK function and adjust info */ - LAPACK_dbbcsd( &jobu1, &jobu2, &jobv1t, &jobv2t, &trans, &m, &p, &q, + LAPACK_dbbcsd( &jobu1, &jobu2, &jobv1t, &jobv2t, <rans, &m, &p, &q, theta, phi, u1, &ldu1, u2, &ldu2, v1t, &ldv1t, v2t, &ldv2t, b11d, b11e, b12d, b12e, b21d, b21e, b22d, b22e, work, &lwork, &info ); if( info < 0 ) { info = info - 1; } - } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - lapack_int nrows_u1 = ( LAPACKE_lsame( jobu1, 'y' ) ? p : 1); - lapack_int nrows_u2 = ( LAPACKE_lsame( jobu2, 'y' ) ? m-p : 1); - lapack_int nrows_v1t = ( LAPACKE_lsame( jobv1t, 'y' ) ? q : 1); - lapack_int nrows_v2t = ( LAPACKE_lsame( jobv2t, 'y' ) ? m-q : 1); - lapack_int ldu1_t = MAX(1,nrows_u1); - lapack_int ldu2_t = MAX(1,nrows_u2); - lapack_int ldv1t_t = MAX(1,nrows_v1t); - lapack_int ldv2t_t = MAX(1,nrows_v2t); - double* u1_t = NULL; - double* u2_t = NULL; - double* v1t_t = NULL; - double* v2t_t = NULL; - /* Check leading dimension(s) */ - if( ldu1 < p ) { - info = -13; - LAPACKE_xerbla( "LAPACKE_dbbcsd_work", info ); - return info; - } - if( ldu2 < m-p ) { - info = -15; - LAPACKE_xerbla( "LAPACKE_dbbcsd_work", info ); - return info; - } - if( ldv1t < q ) { - info = -17; - LAPACKE_xerbla( "LAPACKE_dbbcsd_work", info ); - return info; - } - if( ldv2t < m-q ) { - info = -19; - LAPACKE_xerbla( "LAPACKE_dbbcsd_work", info ); - return info; - } - /* Query optimal working array(s) size if requested */ - if( lwork == -1 ) { - LAPACK_dbbcsd( &jobu1, &jobu2, &jobv1t, &jobv2t, &trans, &m, &p, &q, - theta, phi, u1, &ldu1_t, u2, &ldu2_t, v1t, &ldv1t_t, - v2t, &ldv2t_t, b11d, b11e, b12d, b12e, b21d, b21e, - b22d, b22e, work, &lwork, &info ); - return (info < 0) ? (info - 1) : info; - } - /* Allocate memory for temporary array(s) */ - if( LAPACKE_lsame( jobu1, 'y' ) ) { - u1_t = (double*) - LAPACKE_malloc( sizeof(double) * ldu1_t * MAX(1,p) ); - if( u1_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_0; - } - } - if( LAPACKE_lsame( jobu2, 'y' ) ) { - u2_t = (double*) - LAPACKE_malloc( sizeof(double) * ldu2_t * MAX(1,m-p) ); - if( u2_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_1; - } - } - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - v1t_t = (double*) - LAPACKE_malloc( sizeof(double) * ldv1t_t * MAX(1,q) ); - if( v1t_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_2; - } - } - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - v2t_t = (double*) - LAPACKE_malloc( sizeof(double) * ldv2t_t * MAX(1,m-q) ); - if( v2t_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_3; - } - } - /* Transpose input matrices */ - if( LAPACKE_lsame( jobu1, 'y' ) ) { - LAPACKE_dge_trans( matrix_layout, nrows_u1, p, u1, ldu1, u1_t, - ldu1_t ); - } - if( LAPACKE_lsame( jobu2, 'y' ) ) { - LAPACKE_dge_trans( matrix_layout, nrows_u2, m-p, u2, ldu2, u2_t, - ldu2_t ); - } - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - LAPACKE_dge_trans( matrix_layout, nrows_v1t, q, v1t, ldv1t, v1t_t, - ldv1t_t ); - } - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - LAPACKE_dge_trans( matrix_layout, nrows_v2t, m-q, v2t, ldv2t, v2t_t, - ldv2t_t ); - } - /* Call LAPACK function and adjust info */ - LAPACK_dbbcsd( &jobu1, &jobu2, &jobv1t, &jobv2t, &trans, &m, &p, &q, - theta, phi, u1_t, &ldu1_t, u2_t, &ldu2_t, v1t_t, - &ldv1t_t, v2t_t, &ldv2t_t, b11d, b11e, b12d, b12e, b21d, - b21e, b22d, b22e, work, &lwork, &info ); - if( info < 0 ) { - info = info - 1; - } - /* Transpose output matrices */ - if( LAPACKE_lsame( jobu1, 'y' ) ) { - LAPACKE_dge_trans( LAPACK_COL_MAJOR, nrows_u1, p, u1_t, ldu1_t, u1, - ldu1 ); - } - if( LAPACKE_lsame( jobu2, 'y' ) ) { - LAPACKE_dge_trans( LAPACK_COL_MAJOR, nrows_u2, m-p, u2_t, ldu2_t, - u2, ldu2 ); - } - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - LAPACKE_dge_trans( LAPACK_COL_MAJOR, nrows_v1t, q, v1t_t, ldv1t_t, - v1t, ldv1t ); - } - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - LAPACKE_dge_trans( LAPACK_COL_MAJOR, nrows_v2t, m-q, v2t_t, ldv2t_t, - v2t, ldv2t ); - } - /* Release memory and exit */ - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - LAPACKE_free( v2t_t ); - } -exit_level_3: - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - LAPACKE_free( v1t_t ); - } -exit_level_2: - if( LAPACKE_lsame( jobu2, 'y' ) ) { - LAPACKE_free( u2_t ); - } -exit_level_1: - if( LAPACKE_lsame( jobu1, 'y' ) ) { - LAPACKE_free( u1_t ); - } -exit_level_0: - if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { - LAPACKE_xerbla( "LAPACKE_dbbcsd_work", info ); - } } else { info = -1; LAPACKE_xerbla( "LAPACKE_dbbcsd_work", info ); diff --git a/LAPACKE/src/lapacke_dorbdb.c b/LAPACKE/src/lapacke_dorbdb.c index a32feb5f..fd2c8180 100644 --- a/LAPACKE/src/lapacke_dorbdb.c +++ b/LAPACKE/src/lapacke_dorbdb.c @@ -45,27 +45,28 @@ lapack_int LAPACKE_dorbdb( int matrix_layout, char trans, char signs, lapack_int lwork = -1; double* work = NULL; double work_query; - lapack_int nrows_x11, nrows_x12, nrows_x21, nrows_x22; + int lapack_layout; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_dorbdb", -1 ); return -1; } + if( LAPACKE_lsame( trans, 'n' ) && matrix_layout == LAPACK_COL_MAJOR ) { + lapack_layout = LAPACK_COL_MAJOR; + } else { + lapack_layout = LAPACK_ROW_MAJOR; + } #ifndef LAPACK_DISABLE_NAN_CHECK /* Optionally check input matrices for NaNs */ - nrows_x11 = ( LAPACKE_lsame( trans, 'n' ) ? p : q); - nrows_x12 = ( LAPACKE_lsame( trans, 'n' ) ? p : m-q); - nrows_x21 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : q); - nrows_x22 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : m-q); - if( LAPACKE_dge_nancheck( matrix_layout, nrows_x11, q, x11, ldx11 ) ) { + if( LAPACKE_dge_nancheck( lapack_layout, p, q, x11, ldx11 ) ) { return -7; } - if( LAPACKE_dge_nancheck( matrix_layout, nrows_x12, m-q, x12, ldx12 ) ) { + if( LAPACKE_dge_nancheck( lapack_layout, p, m-q, x12, ldx12 ) ) { return -9; } - if( LAPACKE_dge_nancheck( matrix_layout, nrows_x21, q, x21, ldx21 ) ) { + if( LAPACKE_dge_nancheck( lapack_layout, m-p, q, x21, ldx21 ) ) { return -11; } - if( LAPACKE_dge_nancheck( matrix_layout, nrows_x22, m-q, x22, ldx22 ) ) { + if( LAPACKE_dge_nancheck( lapack_layout, m-p, m-q, x22, ldx22 ) ) { return -13; } #endif diff --git a/LAPACKE/src/lapacke_dorbdb_work.c b/LAPACKE/src/lapacke_dorbdb_work.c index c33cc8f8..7d5ecf8d 100644 --- a/LAPACKE/src/lapacke_dorbdb_work.c +++ b/LAPACKE/src/lapacke_dorbdb_work.c @@ -43,115 +43,35 @@ lapack_int LAPACKE_dorbdb_work( int matrix_layout, char trans, char signs, lapack_int lwork ) { lapack_int info = 0; - if( matrix_layout == LAPACK_COL_MAJOR ) { + /* LAPACK function works with matrices in both layouts. It is supported + * through TRANS parameter. So all conversion between layouts can be + * completed in LAPACK function. See the table below which describes how + * every LAPACKE call is forwarded to corresponding LAPACK call. + * + * matrix_layout | trans_LAPACKE | -> trans_LAPACK + * | (trans) | (ltrans) + * -----------------+---------------+---------------- + * LAPACK_COL_MAJOR | 'N' | -> 'N' + * LAPACK_COL_MAJOR | 'T' | -> 'T' + * LAPACK_ROW_MAJOR | 'N' | -> 'T' + * LAPACK_ROW_MAJOR | 'T' | -> 'T' + * (note that for row major layout trans parameter is ignored) + */ + if( matrix_layout == LAPACK_COL_MAJOR || + matrix_layout == LAPACK_ROW_MAJOR ) { + char ltrans; + if( !LAPACKE_lsame( trans, 't' ) && matrix_layout == LAPACK_COL_MAJOR ) { + ltrans = 'n'; + } else { + ltrans = 't'; + } /* Call LAPACK function and adjust info */ - LAPACK_dorbdb( &trans, &signs, &m, &p, &q, x11, &ldx11, x12, &ldx12, + LAPACK_dorbdb( <rans, &signs, &m, &p, &q, x11, &ldx11, x12, &ldx12, x21, &ldx21, x22, &ldx22, theta, phi, taup1, taup2, tauq1, tauq2, work, &lwork, &info ); if( info < 0 ) { info = info - 1; } - } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - lapack_int nrows_x11 = ( LAPACKE_lsame( trans, 'n' ) ? p : q); - lapack_int nrows_x12 = ( LAPACKE_lsame( trans, 'n' ) ? p : m-q); - lapack_int nrows_x21 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : q); - lapack_int nrows_x22 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : m-q); - lapack_int ldx11_t = MAX(1,nrows_x11); - lapack_int ldx12_t = MAX(1,nrows_x12); - lapack_int ldx21_t = MAX(1,nrows_x21); - lapack_int ldx22_t = MAX(1,nrows_x22); - double* x11_t = NULL; - double* x12_t = NULL; - double* x21_t = NULL; - double* x22_t = NULL; - /* Check leading dimension(s) */ - if( ldx11 < q ) { - info = -8; - LAPACKE_xerbla( "LAPACKE_dorbdb_work", info ); - return info; - } - if( ldx12 < m-q ) { - info = -10; - LAPACKE_xerbla( "LAPACKE_dorbdb_work", info ); - return info; - } - if( ldx21 < q ) { - info = -12; - LAPACKE_xerbla( "LAPACKE_dorbdb_work", info ); - return info; - } - if( ldx22 < m-q ) { - info = -14; - LAPACKE_xerbla( "LAPACKE_dorbdb_work", info ); - return info; - } - /* Query optimal working array(s) size if requested */ - if( lwork == -1 ) { - LAPACK_dorbdb( &trans, &signs, &m, &p, &q, x11, &ldx11_t, x12, - &ldx12_t, x21, &ldx21_t, x22, &ldx22_t, theta, phi, - taup1, taup2, tauq1, tauq2, work, &lwork, &info ); - return (info < 0) ? (info - 1) : info; - } - /* Allocate memory for temporary array(s) */ - x11_t = (double*)LAPACKE_malloc( sizeof(double) * ldx11_t * MAX(1,q) ); - if( x11_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_0; - } - x12_t = (double*) - LAPACKE_malloc( sizeof(double) * ldx12_t * MAX(1,m-q) ); - if( x12_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_1; - } - x21_t = (double*)LAPACKE_malloc( sizeof(double) * ldx21_t * MAX(1,q) ); - if( x21_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_2; - } - x22_t = (double*) - LAPACKE_malloc( sizeof(double) * ldx22_t * MAX(1,m-q) ); - if( x22_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_3; - } - /* Transpose input matrices */ - LAPACKE_dge_trans( matrix_layout, nrows_x11, q, x11, ldx11, x11_t, - ldx11_t ); - LAPACKE_dge_trans( matrix_layout, nrows_x12, m-q, x12, ldx12, x12_t, - ldx12_t ); - LAPACKE_dge_trans( matrix_layout, nrows_x21, q, x21, ldx21, x21_t, - ldx21_t ); - LAPACKE_dge_trans( matrix_layout, nrows_x22, m-q, x22, ldx22, x22_t, - ldx22_t ); - /* Call LAPACK function and adjust info */ - LAPACK_dorbdb( &trans, &signs, &m, &p, &q, x11_t, &ldx11_t, x12_t, - &ldx12_t, x21_t, &ldx21_t, x22_t, &ldx22_t, theta, phi, - taup1, taup2, tauq1, tauq2, work, &lwork, &info ); - if( info < 0 ) { - info = info - 1; - } - /* Transpose output matrices */ - LAPACKE_dge_trans( LAPACK_COL_MAJOR, nrows_x11, q, x11_t, ldx11_t, x11, - ldx11 ); - LAPACKE_dge_trans( LAPACK_COL_MAJOR, nrows_x12, m-q, x12_t, ldx12_t, - x12, ldx12 ); - LAPACKE_dge_trans( LAPACK_COL_MAJOR, nrows_x21, q, x21_t, ldx21_t, x21, - ldx21 ); - LAPACKE_dge_trans( LAPACK_COL_MAJOR, nrows_x22, m-q, x22_t, ldx22_t, - x22, ldx22 ); - /* Release memory and exit */ - LAPACKE_free( x22_t ); -exit_level_3: - LAPACKE_free( x21_t ); -exit_level_2: - LAPACKE_free( x12_t ); -exit_level_1: - LAPACKE_free( x11_t ); -exit_level_0: - if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { - LAPACKE_xerbla( "LAPACKE_dorbdb_work", info ); - } } else { info = -1; LAPACKE_xerbla( "LAPACKE_dorbdb_work", info ); diff --git a/LAPACKE/src/lapacke_dorcsd.c b/LAPACKE/src/lapacke_dorcsd.c index a240ef13..4c4330a2 100644 --- a/LAPACKE/src/lapacke_dorcsd.c +++ b/LAPACKE/src/lapacke_dorcsd.c @@ -48,27 +48,28 @@ lapack_int LAPACKE_dorcsd( int matrix_layout, char jobu1, char jobu2, lapack_int* iwork = NULL; double* work = NULL; double work_query; - lapack_int nrows_x11, nrows_x12, nrows_x21, nrows_x22; + int lapack_layout; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_dorcsd", -1 ); return -1; } + if( LAPACKE_lsame( trans, 'n' ) && matrix_layout == LAPACK_COL_MAJOR ) { + lapack_layout = LAPACK_COL_MAJOR; + } else { + lapack_layout = LAPACK_ROW_MAJOR; + } #ifndef LAPACK_DISABLE_NAN_CHECK /* Optionally check input matrices for NaNs */ - nrows_x11 = ( LAPACKE_lsame( trans, 'n' ) ? p : q); - nrows_x12 = ( LAPACKE_lsame( trans, 'n' ) ? p : m-q); - nrows_x21 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : q); - nrows_x22 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : m-q); - if( LAPACKE_dge_nancheck( matrix_layout, nrows_x11, q, x11, ldx11 ) ) { + if( LAPACKE_dge_nancheck( lapack_layout, p, q, x11, ldx11 ) ) { return -11; } - if( LAPACKE_dge_nancheck( matrix_layout, nrows_x12, m-q, x12, ldx12 ) ) { + if( LAPACKE_dge_nancheck( lapack_layout, p, m-q, x12, ldx12 ) ) { return -13; } - if( LAPACKE_dge_nancheck( matrix_layout, nrows_x21, q, x21, ldx21 ) ) { + if( LAPACKE_dge_nancheck( lapack_layout, m-p, q, x21, ldx21 ) ) { return -15; } - if( LAPACKE_dge_nancheck( matrix_layout, nrows_x22, m-q, x22, ldx22 ) ) { + if( LAPACKE_dge_nancheck( lapack_layout, m-p, m-q, x22, ldx22 ) ) { return -17; } #endif diff --git a/LAPACKE/src/lapacke_dorcsd_work.c b/LAPACKE/src/lapacke_dorcsd_work.c index 44e3c930..7c23755d 100644 --- a/LAPACKE/src/lapacke_dorcsd_work.c +++ b/LAPACKE/src/lapacke_dorcsd_work.c @@ -46,216 +46,36 @@ lapack_int LAPACKE_dorcsd_work( int matrix_layout, char jobu1, char jobu2, lapack_int* iwork ) { lapack_int info = 0; - if( matrix_layout == LAPACK_COL_MAJOR ) { - /* Call LAPACK function and adjust info */ - LAPACK_dorcsd( &jobu1, &jobu2, &jobv1t, &jobv2t, &trans, &signs, &m, &p, - &q, x11, &ldx11, x12, &ldx12, x21, &ldx21, x22, &ldx22, - theta, u1, &ldu1, u2, &ldu2, v1t, &ldv1t, v2t, &ldv2t, - work, &lwork, iwork, &info ); - if( info < 0 ) { - info = info - 1; - } - } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - lapack_int nrows_x11 = ( LAPACKE_lsame( trans, 'n' ) ? p : q); - lapack_int nrows_x12 = ( LAPACKE_lsame( trans, 'n' ) ? p : m-q); - lapack_int nrows_x21 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : q); - lapack_int nrows_x22 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : m-q); - lapack_int nrows_u1 = ( LAPACKE_lsame( jobu1, 'y' ) ? p : 1); - lapack_int nrows_u2 = ( LAPACKE_lsame( jobu2, 'y' ) ? m-p : 1); - lapack_int nrows_v1t = ( LAPACKE_lsame( jobv1t, 'y' ) ? q : 1); - lapack_int nrows_v2t = ( LAPACKE_lsame( jobv2t, 'y' ) ? m-q : 1); - lapack_int ldu1_t = MAX(1,nrows_u1); - lapack_int ldu2_t = MAX(1,nrows_u2); - lapack_int ldv1t_t = MAX(1,nrows_v1t); - lapack_int ldv2t_t = MAX(1,nrows_v2t); - lapack_int ldx11_t = MAX(1,nrows_x11); - lapack_int ldx12_t = MAX(1,nrows_x12); - lapack_int ldx21_t = MAX(1,nrows_x21); - lapack_int ldx22_t = MAX(1,nrows_x22); - double* x11_t = NULL; - double* x12_t = NULL; - double* x21_t = NULL; - double* x22_t = NULL; - double* u1_t = NULL; - double* u2_t = NULL; - double* v1t_t = NULL; - double* v2t_t = NULL; - /* Check leading dimension(s) */ - if( ldu1 < p ) { - info = -21; - LAPACKE_xerbla( "LAPACKE_dorcsd_work", info ); - return info; - } - if( ldu2 < m-p ) { - info = -23; - LAPACKE_xerbla( "LAPACKE_dorcsd_work", info ); - return info; - } - if( ldv1t < q ) { - info = -25; - LAPACKE_xerbla( "LAPACKE_dorcsd_work", info ); - return info; - } - if( ldv2t < m-q ) { - info = -27; - LAPACKE_xerbla( "LAPACKE_dorcsd_work", info ); - return info; - } - if( ldx11 < q ) { - info = -12; - LAPACKE_xerbla( "LAPACKE_dorcsd_work", info ); - return info; - } - if( ldx12 < m-q ) { - info = -14; - LAPACKE_xerbla( "LAPACKE_dorcsd_work", info ); - return info; - } - if( ldx21 < q ) { - info = -16; - LAPACKE_xerbla( "LAPACKE_dorcsd_work", info ); - return info; - } - if( ldx22 < m-q ) { - info = -18; - LAPACKE_xerbla( "LAPACKE_dorcsd_work", info ); - return info; - } - /* Query optimal working array(s) size if requested */ - if( lwork == -1 ) { - LAPACK_dorcsd( &jobu1, &jobu2, &jobv1t, &jobv2t, &trans, &signs, &m, - &p, &q, x11, &ldx11_t, x12, &ldx12_t, x21, &ldx21_t, - x22, &ldx22_t, theta, u1, &ldu1_t, u2, &ldu2_t, v1t, - &ldv1t_t, v2t, &ldv2t_t, work, &lwork, iwork, - &info ); - return (info < 0) ? (info - 1) : info; - } - /* Allocate memory for temporary array(s) */ - x11_t = (double*)LAPACKE_malloc( sizeof(double) * ldx11_t * MAX(1,q) ); - if( x11_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_0; - } - x12_t = (double*) - LAPACKE_malloc( sizeof(double) * ldx12_t * MAX(1,m-q) ); - if( x12_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_1; - } - x21_t = (double*)LAPACKE_malloc( sizeof(double) * ldx21_t * MAX(1,q) ); - if( x21_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_2; + /* LAPACK function works with matrices in both layouts. It is supported + * through TRANS parameter. So all conversion between layouts can be + * completed in LAPACK function. See the table below which describes how + * every LAPACKE call is forwarded to corresponding LAPACK call. + * + * matrix_layout | trans_LAPACKE | -> trans_LAPACK + * | (trans) | (ltrans) + * -----------------+---------------+---------------- + * LAPACK_COL_MAJOR | 'N' | -> 'N' + * LAPACK_COL_MAJOR | 'T' | -> 'T' + * LAPACK_ROW_MAJOR | 'N' | -> 'T' + * LAPACK_ROW_MAJOR | 'T' | -> 'T' + * (note that for row major layout trans parameter is ignored) + */ + if( matrix_layout == LAPACK_COL_MAJOR || + matrix_layout == LAPACK_ROW_MAJOR ) { + char ltrans; + if( !LAPACKE_lsame( trans, 't' ) && matrix_layout == LAPACK_COL_MAJOR ) { + ltrans = 'n'; + } else { + ltrans = 't'; } - x22_t = (double*) - LAPACKE_malloc( sizeof(double) * ldx22_t * MAX(1,m-q) ); - if( x22_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_3; - } - if( LAPACKE_lsame( jobu1, 'y' ) ) { - u1_t = (double*) - LAPACKE_malloc( sizeof(double) * ldu1_t * MAX(1,p) ); - if( u1_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_4; - } - } - if( LAPACKE_lsame( jobu2, 'y' ) ) { - u2_t = (double*) - LAPACKE_malloc( sizeof(double) * ldu2_t * MAX(1,m-p) ); - if( u2_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_5; - } - } - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - v1t_t = (double*) - LAPACKE_malloc( sizeof(double) * ldv1t_t * MAX(1,q) ); - if( v1t_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_6; - } - } - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - v2t_t = (double*) - LAPACKE_malloc( sizeof(double) * ldv2t_t * MAX(1,m-q) ); - if( v2t_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_7; - } - } - /* Transpose input matrices */ - LAPACKE_dge_trans( matrix_layout, nrows_x11, q, x11, ldx11, x11_t, - ldx11_t ); - LAPACKE_dge_trans( matrix_layout, nrows_x12, m-q, x12, ldx12, x12_t, - ldx12_t ); - LAPACKE_dge_trans( matrix_layout, nrows_x21, q, x21, ldx21, x21_t, - ldx21_t ); - LAPACKE_dge_trans( matrix_layout, nrows_x22, m-q, x22, ldx22, x22_t, - ldx22_t ); /* Call LAPACK function and adjust info */ - LAPACK_dorcsd( &jobu1, &jobu2, &jobv1t, &jobv2t, &trans, &signs, &m, &p, - &q, x11_t, &ldx11_t, x12_t, &ldx12_t, x21_t, &ldx21_t, - x22_t, &ldx22_t, theta, u1_t, &ldu1_t, u2_t, &ldu2_t, - v1t_t, &ldv1t_t, v2t_t, &ldv2t_t, work, &lwork, iwork, - &info ); + LAPACK_dorcsd( &jobu1, &jobu2, &jobv1t, &jobv2t, <rans, &signs, &m, + &p, &q, x11, &ldx11, x12, &ldx12, x21, &ldx21, x22, + &ldx22, theta, u1, &ldu1, u2, &ldu2, v1t, &ldv1t, v2t, + &ldv2t, work, &lwork, iwork, &info ); if( info < 0 ) { info = info - 1; } - /* Transpose output matrices */ - LAPACKE_dge_trans( LAPACK_COL_MAJOR, nrows_x11, q, x11_t, ldx11_t, x11, - ldx11 ); - LAPACKE_dge_trans( LAPACK_COL_MAJOR, nrows_x12, m-q, x12_t, ldx12_t, - x12, ldx12 ); - LAPACKE_dge_trans( LAPACK_COL_MAJOR, nrows_x21, q, x21_t, ldx21_t, x21, - ldx21 ); - LAPACKE_dge_trans( LAPACK_COL_MAJOR, nrows_x22, m-q, x22_t, ldx22_t, - x22, ldx22 ); - if( LAPACKE_lsame( jobu1, 'y' ) ) { - LAPACKE_dge_trans( LAPACK_COL_MAJOR, nrows_u1, p, u1_t, ldu1_t, u1, - ldu1 ); - } - if( LAPACKE_lsame( jobu2, 'y' ) ) { - LAPACKE_dge_trans( LAPACK_COL_MAJOR, nrows_u2, m-p, u2_t, ldu2_t, - u2, ldu2 ); - } - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - LAPACKE_dge_trans( LAPACK_COL_MAJOR, nrows_v1t, q, v1t_t, ldv1t_t, - v1t, ldv1t ); - } - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - LAPACKE_dge_trans( LAPACK_COL_MAJOR, nrows_v2t, m-q, v2t_t, ldv2t_t, - v2t, ldv2t ); - } - /* Release memory and exit */ - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - LAPACKE_free( v2t_t ); - } -exit_level_7: - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - LAPACKE_free( v1t_t ); - } -exit_level_6: - if( LAPACKE_lsame( jobu2, 'y' ) ) { - LAPACKE_free( u2_t ); - } -exit_level_5: - if( LAPACKE_lsame( jobu1, 'y' ) ) { - LAPACKE_free( u1_t ); - } -exit_level_4: - LAPACKE_free( x22_t ); -exit_level_3: - LAPACKE_free( x21_t ); -exit_level_2: - LAPACKE_free( x12_t ); -exit_level_1: - LAPACKE_free( x11_t ); -exit_level_0: - if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { - LAPACKE_xerbla( "LAPACKE_dorcsd_work", info ); - } } else { info = -1; LAPACKE_xerbla( "LAPACKE_dorcsd_work", info ); diff --git a/LAPACKE/src/lapacke_sbbcsd.c b/LAPACKE/src/lapacke_sbbcsd.c index 05dbea8e..04464e61 100644 --- a/LAPACKE/src/lapacke_sbbcsd.c +++ b/LAPACKE/src/lapacke_sbbcsd.c @@ -46,17 +46,18 @@ lapack_int LAPACKE_sbbcsd( int matrix_layout, char jobu1, char jobu2, lapack_int lwork = -1; float* work = NULL; float work_query; - lapack_int nrows_u1, nrows_u2, nrows_v1t, nrows_v2t; + int lapack_layout; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_sbbcsd", -1 ); return -1; } + if( LAPACKE_lsame( trans, 'n' ) && matrix_layout == LAPACK_COL_MAJOR ) { + lapack_layout = LAPACK_COL_MAJOR; + } else { + lapack_layout = LAPACK_ROW_MAJOR; + } #ifndef LAPACK_DISABLE_NAN_CHECK /* Optionally check input matrices for NaNs */ - nrows_u1 = ( LAPACKE_lsame( jobu1, 'y' ) ? p : 1); - nrows_u2 = ( LAPACKE_lsame( jobu2, 'y' ) ? m-p : 1); - nrows_v1t = ( LAPACKE_lsame( jobv1t, 'y' ) ? q : 1); - nrows_v2t = ( LAPACKE_lsame( jobv2t, 'y' ) ? m-q : 1); if( LAPACKE_s_nancheck( q-1, phi, 1 ) ) { return -11; } @@ -64,22 +65,22 @@ lapack_int LAPACKE_sbbcsd( int matrix_layout, char jobu1, char jobu2, return -10; } if( LAPACKE_lsame( jobu1, 'y' ) ) { - if( LAPACKE_sge_nancheck( matrix_layout, nrows_u1, p, u1, ldu1 ) ) { + if( LAPACKE_sge_nancheck( lapack_layout, p, p, u1, ldu1 ) ) { return -12; } } if( LAPACKE_lsame( jobu2, 'y' ) ) { - if( LAPACKE_sge_nancheck( matrix_layout, nrows_u2, m-p, u2, ldu2 ) ) { + if( LAPACKE_sge_nancheck( lapack_layout, m-p, m-p, u2, ldu2 ) ) { return -14; } } if( LAPACKE_lsame( jobv1t, 'y' ) ) { - if( LAPACKE_sge_nancheck( matrix_layout, nrows_v1t, q, v1t, ldv1t ) ) { + if( LAPACKE_sge_nancheck( lapack_layout, q, q, v1t, ldv1t ) ) { return -16; } } if( LAPACKE_lsame( jobv2t, 'y' ) ) { - if( LAPACKE_sge_nancheck( matrix_layout, nrows_v2t, m-q, v2t, ldv2t ) ) { + if( LAPACKE_sge_nancheck( lapack_layout, m-q, m-q, v2t, ldv2t ) ) { return -18; } } diff --git a/LAPACKE/src/lapacke_sbbcsd_work.c b/LAPACKE/src/lapacke_sbbcsd_work.c index 9d840be1..2866d6fc 100644 --- a/LAPACKE/src/lapacke_sbbcsd_work.c +++ b/LAPACKE/src/lapacke_sbbcsd_work.c @@ -45,151 +45,36 @@ lapack_int LAPACKE_sbbcsd_work( int matrix_layout, char jobu1, char jobu2, float* work, lapack_int lwork ) { lapack_int info = 0; - if( matrix_layout == LAPACK_COL_MAJOR ) { + /* LAPACK function works with matrices in both layouts. It is supported + * through TRANS parameter. So all conversion between layouts can be + * completed in LAPACK function. See the table below which describes how + * every LAPACKE call is forwarded to corresponding LAPACK call. + * + * matrix_layout | trans_LAPACKE | -> trans_LAPACK + * | (trans) | (ltrans) + * -----------------+---------------+---------------- + * LAPACK_COL_MAJOR | 'N' | -> 'N' + * LAPACK_COL_MAJOR | 'T' | -> 'T' + * LAPACK_ROW_MAJOR | 'N' | -> 'T' + * LAPACK_ROW_MAJOR | 'T' | -> 'T' + * (note that for row major layout trans parameter is ignored) + */ + if( matrix_layout == LAPACK_COL_MAJOR || + matrix_layout == LAPACK_ROW_MAJOR ) { + char ltrans; + if( !LAPACKE_lsame( trans, 't' ) && matrix_layout == LAPACK_COL_MAJOR ) { + ltrans = 'n'; + } else { + ltrans = 't'; + } /* Call LAPACK function and adjust info */ - LAPACK_sbbcsd( &jobu1, &jobu2, &jobv1t, &jobv2t, &trans, &m, &p, &q, + LAPACK_sbbcsd( &jobu1, &jobu2, &jobv1t, &jobv2t, <rans, &m, &p, &q, theta, phi, u1, &ldu1, u2, &ldu2, v1t, &ldv1t, v2t, &ldv2t, b11d, b11e, b12d, b12e, b21d, b21e, b22d, b22e, work, &lwork, &info ); if( info < 0 ) { info = info - 1; } - } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - lapack_int nrows_u1 = ( LAPACKE_lsame( jobu1, 'y' ) ? p : 1); - lapack_int nrows_u2 = ( LAPACKE_lsame( jobu2, 'y' ) ? m-p : 1); - lapack_int nrows_v1t = ( LAPACKE_lsame( jobv1t, 'y' ) ? q : 1); - lapack_int nrows_v2t = ( LAPACKE_lsame( jobv2t, 'y' ) ? m-q : 1); - lapack_int ldu1_t = MAX(1,nrows_u1); - lapack_int ldu2_t = MAX(1,nrows_u2); - lapack_int ldv1t_t = MAX(1,nrows_v1t); - lapack_int ldv2t_t = MAX(1,nrows_v2t); - float* u1_t = NULL; - float* u2_t = NULL; - float* v1t_t = NULL; - float* v2t_t = NULL; - /* Check leading dimension(s) */ - if( ldu1 < p ) { - info = -13; - LAPACKE_xerbla( "LAPACKE_sbbcsd_work", info ); - return info; - } - if( ldu2 < m-p ) { - info = -15; - LAPACKE_xerbla( "LAPACKE_sbbcsd_work", info ); - return info; - } - if( ldv1t < q ) { - info = -17; - LAPACKE_xerbla( "LAPACKE_sbbcsd_work", info ); - return info; - } - if( ldv2t < m-q ) { - info = -19; - LAPACKE_xerbla( "LAPACKE_sbbcsd_work", info ); - return info; - } - /* Query optimal working array(s) size if requested */ - if( lwork == -1 ) { - LAPACK_sbbcsd( &jobu1, &jobu2, &jobv1t, &jobv2t, &trans, &m, &p, &q, - theta, phi, u1, &ldu1_t, u2, &ldu2_t, v1t, &ldv1t_t, - v2t, &ldv2t_t, b11d, b11e, b12d, b12e, b21d, b21e, - b22d, b22e, work, &lwork, &info ); - return (info < 0) ? (info - 1) : info; - } - /* Allocate memory for temporary array(s) */ - if( LAPACKE_lsame( jobu1, 'y' ) ) { - u1_t = (float*)LAPACKE_malloc( sizeof(float) * ldu1_t * MAX(1,p) ); - if( u1_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_0; - } - } - if( LAPACKE_lsame( jobu2, 'y' ) ) { - u2_t = (float*) - LAPACKE_malloc( sizeof(float) * ldu2_t * MAX(1,m-p) ); - if( u2_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_1; - } - } - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - v1t_t = (float*) - LAPACKE_malloc( sizeof(float) * ldv1t_t * MAX(1,q) ); - if( v1t_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_2; - } - } - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - v2t_t = (float*) - LAPACKE_malloc( sizeof(float) * ldv2t_t * MAX(1,m-q) ); - if( v2t_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_3; - } - } - /* Transpose input matrices */ - if( LAPACKE_lsame( jobu1, 'y' ) ) { - LAPACKE_sge_trans( matrix_layout, nrows_u1, p, u1, ldu1, u1_t, - ldu1_t ); - } - if( LAPACKE_lsame( jobu2, 'y' ) ) { - LAPACKE_sge_trans( matrix_layout, nrows_u2, m-p, u2, ldu2, u2_t, - ldu2_t ); - } - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - LAPACKE_sge_trans( matrix_layout, nrows_v1t, q, v1t, ldv1t, v1t_t, - ldv1t_t ); - } - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - LAPACKE_sge_trans( matrix_layout, nrows_v2t, m-q, v2t, ldv2t, v2t_t, - ldv2t_t ); - } - /* Call LAPACK function and adjust info */ - LAPACK_sbbcsd( &jobu1, &jobu2, &jobv1t, &jobv2t, &trans, &m, &p, &q, - theta, phi, u1_t, &ldu1_t, u2_t, &ldu2_t, v1t_t, - &ldv1t_t, v2t_t, &ldv2t_t, b11d, b11e, b12d, b12e, b21d, - b21e, b22d, b22e, work, &lwork, &info ); - if( info < 0 ) { - info = info - 1; - } - /* Transpose output matrices */ - if( LAPACKE_lsame( jobu1, 'y' ) ) { - LAPACKE_sge_trans( LAPACK_COL_MAJOR, nrows_u1, p, u1_t, ldu1_t, u1, - ldu1 ); - } - if( LAPACKE_lsame( jobu2, 'y' ) ) { - LAPACKE_sge_trans( LAPACK_COL_MAJOR, nrows_u2, m-p, u2_t, ldu2_t, - u2, ldu2 ); - } - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - LAPACKE_sge_trans( LAPACK_COL_MAJOR, nrows_v1t, q, v1t_t, ldv1t_t, - v1t, ldv1t ); - } - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - LAPACKE_sge_trans( LAPACK_COL_MAJOR, nrows_v2t, m-q, v2t_t, ldv2t_t, - v2t, ldv2t ); - } - /* Release memory and exit */ - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - LAPACKE_free( v2t_t ); - } -exit_level_3: - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - LAPACKE_free( v1t_t ); - } -exit_level_2: - if( LAPACKE_lsame( jobu2, 'y' ) ) { - LAPACKE_free( u2_t ); - } -exit_level_1: - if( LAPACKE_lsame( jobu1, 'y' ) ) { - LAPACKE_free( u1_t ); - } -exit_level_0: - if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { - LAPACKE_xerbla( "LAPACKE_sbbcsd_work", info ); - } } else { info = -1; LAPACKE_xerbla( "LAPACKE_sbbcsd_work", info ); diff --git a/LAPACKE/src/lapacke_sorbdb.c b/LAPACKE/src/lapacke_sorbdb.c index 1a0ede9a..8bb6c0b2 100644 --- a/LAPACKE/src/lapacke_sorbdb.c +++ b/LAPACKE/src/lapacke_sorbdb.c @@ -45,27 +45,28 @@ lapack_int LAPACKE_sorbdb( int matrix_layout, char trans, char signs, lapack_int lwork = -1; float* work = NULL; float work_query; - lapack_int nrows_x11, nrows_x12, nrows_x21, nrows_x22; + int lapack_layout; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_sorbdb", -1 ); return -1; } + if( LAPACKE_lsame( trans, 'n' ) && matrix_layout == LAPACK_COL_MAJOR ) { + lapack_layout = LAPACK_COL_MAJOR; + } else { + lapack_layout = LAPACK_ROW_MAJOR; + } #ifndef LAPACK_DISABLE_NAN_CHECK /* Optionally check input matrices for NaNs */ - nrows_x11 = ( LAPACKE_lsame( trans, 'n' ) ? p : q); - nrows_x12 = ( LAPACKE_lsame( trans, 'n' ) ? p : m-q); - nrows_x21 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : q); - nrows_x22 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : m-q); - if( LAPACKE_sge_nancheck( matrix_layout, nrows_x11, q, x11, ldx11 ) ) { + if( LAPACKE_sge_nancheck( lapack_layout, p, q, x11, ldx11 ) ) { return -7; } - if( LAPACKE_sge_nancheck( matrix_layout, nrows_x12, m-q, x12, ldx12 ) ) { + if( LAPACKE_sge_nancheck( lapack_layout, p, m-q, x12, ldx12 ) ) { return -9; } - if( LAPACKE_sge_nancheck( matrix_layout, nrows_x21, q, x21, ldx21 ) ) { + if( LAPACKE_sge_nancheck( lapack_layout, m-p, q, x21, ldx21 ) ) { return -11; } - if( LAPACKE_sge_nancheck( matrix_layout, nrows_x22, m-q, x22, ldx22 ) ) { + if( LAPACKE_sge_nancheck( lapack_layout, m-p, m-q, x22, ldx22 ) ) { return -13; } #endif diff --git a/LAPACKE/src/lapacke_sorbdb_work.c b/LAPACKE/src/lapacke_sorbdb_work.c index 54408201..dbb98636 100644 --- a/LAPACKE/src/lapacke_sorbdb_work.c +++ b/LAPACKE/src/lapacke_sorbdb_work.c @@ -43,113 +43,35 @@ lapack_int LAPACKE_sorbdb_work( int matrix_layout, char trans, char signs, lapack_int lwork ) { lapack_int info = 0; - if( matrix_layout == LAPACK_COL_MAJOR ) { + /* LAPACK function works with matrices in both layouts. It is supported + * through TRANS parameter. So all conversion between layouts can be + * completed in LAPACK function. See the table below which describes how + * every LAPACKE call is forwarded to corresponding LAPACK call. + * + * matrix_layout | trans_LAPACKE | -> trans_LAPACK + * | (trans) | (ltrans) + * -----------------+---------------+---------------- + * LAPACK_COL_MAJOR | 'N' | -> 'N' + * LAPACK_COL_MAJOR | 'T' | -> 'T' + * LAPACK_ROW_MAJOR | 'N' | -> 'T' + * LAPACK_ROW_MAJOR | 'T' | -> 'T' + * (note that for row major layout trans parameter is ignored) + */ + if( matrix_layout == LAPACK_COL_MAJOR || + matrix_layout == LAPACK_ROW_MAJOR ) { + char ltrans; + if( !LAPACKE_lsame( trans, 't' ) && matrix_layout == LAPACK_COL_MAJOR ) { + ltrans = 'n'; + } else { + ltrans = 't'; + } /* Call LAPACK function and adjust info */ - LAPACK_sorbdb( &trans, &signs, &m, &p, &q, x11, &ldx11, x12, &ldx12, + LAPACK_sorbdb( <rans, &signs, &m, &p, &q, x11, &ldx11, x12, &ldx12, x21, &ldx21, x22, &ldx22, theta, phi, taup1, taup2, tauq1, tauq2, work, &lwork, &info ); if( info < 0 ) { info = info - 1; } - } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - lapack_int nrows_x11 = ( LAPACKE_lsame( trans, 'n' ) ? p : q); - lapack_int nrows_x12 = ( LAPACKE_lsame( trans, 'n' ) ? p : m-q); - lapack_int nrows_x21 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : q); - lapack_int nrows_x22 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : m-q); - lapack_int ldx11_t = MAX(1,nrows_x11); - lapack_int ldx12_t = MAX(1,nrows_x12); - lapack_int ldx21_t = MAX(1,nrows_x21); - lapack_int ldx22_t = MAX(1,nrows_x22); - float* x11_t = NULL; - float* x12_t = NULL; - float* x21_t = NULL; - float* x22_t = NULL; - /* Check leading dimension(s) */ - if( ldx11 < q ) { - info = -8; - LAPACKE_xerbla( "LAPACKE_sorbdb_work", info ); - return info; - } - if( ldx12 < m-q ) { - info = -10; - LAPACKE_xerbla( "LAPACKE_sorbdb_work", info ); - return info; - } - if( ldx21 < q ) { - info = -12; - LAPACKE_xerbla( "LAPACKE_sorbdb_work", info ); - return info; - } - if( ldx22 < m-q ) { - info = -14; - LAPACKE_xerbla( "LAPACKE_sorbdb_work", info ); - return info; - } - /* Query optimal working array(s) size if requested */ - if( lwork == -1 ) { - LAPACK_sorbdb( &trans, &signs, &m, &p, &q, x11, &ldx11_t, x12, - &ldx12_t, x21, &ldx21_t, x22, &ldx22_t, theta, phi, - taup1, taup2, tauq1, tauq2, work, &lwork, &info ); - return (info < 0) ? (info - 1) : info; - } - /* Allocate memory for temporary array(s) */ - x11_t = (float*)LAPACKE_malloc( sizeof(float) * ldx11_t * MAX(1,q) ); - if( x11_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_0; - } - x12_t = (float*)LAPACKE_malloc( sizeof(float) * ldx12_t * MAX(1,m-q) ); - if( x12_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_1; - } - x21_t = (float*)LAPACKE_malloc( sizeof(float) * ldx21_t * MAX(1,q) ); - if( x21_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_2; - } - x22_t = (float*)LAPACKE_malloc( sizeof(float) * ldx22_t * MAX(1,m-q) ); - if( x22_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_3; - } - /* Transpose input matrices */ - LAPACKE_sge_trans( matrix_layout, nrows_x11, q, x11, ldx11, x11_t, - ldx11_t ); - LAPACKE_sge_trans( matrix_layout, nrows_x12, m-q, x12, ldx12, x12_t, - ldx12_t ); - LAPACKE_sge_trans( matrix_layout, nrows_x21, q, x21, ldx21, x21_t, - ldx21_t ); - LAPACKE_sge_trans( matrix_layout, nrows_x22, m-q, x22, ldx22, x22_t, - ldx22_t ); - /* Call LAPACK function and adjust info */ - LAPACK_sorbdb( &trans, &signs, &m, &p, &q, x11_t, &ldx11_t, x12_t, - &ldx12_t, x21_t, &ldx21_t, x22_t, &ldx22_t, theta, phi, - taup1, taup2, tauq1, tauq2, work, &lwork, &info ); - if( info < 0 ) { - info = info - 1; - } - /* Transpose output matrices */ - LAPACKE_sge_trans( LAPACK_COL_MAJOR, nrows_x11, q, x11_t, ldx11_t, x11, - ldx11 ); - LAPACKE_sge_trans( LAPACK_COL_MAJOR, nrows_x12, m-q, x12_t, ldx12_t, - x12, ldx12 ); - LAPACKE_sge_trans( LAPACK_COL_MAJOR, nrows_x21, q, x21_t, ldx21_t, x21, - ldx21 ); - LAPACKE_sge_trans( LAPACK_COL_MAJOR, nrows_x22, m-q, x22_t, ldx22_t, - x22, ldx22 ); - /* Release memory and exit */ - LAPACKE_free( x22_t ); -exit_level_3: - LAPACKE_free( x21_t ); -exit_level_2: - LAPACKE_free( x12_t ); -exit_level_1: - LAPACKE_free( x11_t ); -exit_level_0: - if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { - LAPACKE_xerbla( "LAPACKE_sorbdb_work", info ); - } } else { info = -1; LAPACKE_xerbla( "LAPACKE_sorbdb_work", info ); diff --git a/LAPACKE/src/lapacke_sorcsd.c b/LAPACKE/src/lapacke_sorcsd.c index b9864aef..7cb77ea4 100644 --- a/LAPACKE/src/lapacke_sorcsd.c +++ b/LAPACKE/src/lapacke_sorcsd.c @@ -48,27 +48,28 @@ lapack_int LAPACKE_sorcsd( int matrix_layout, char jobu1, char jobu2, lapack_int* iwork = NULL; float* work = NULL; float work_query; - lapack_int nrows_x11, nrows_x12, nrows_x21, nrows_x22; + int lapack_layout; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_sorcsd", -1 ); return -1; } + if( LAPACKE_lsame( trans, 'n' ) && matrix_layout == LAPACK_COL_MAJOR ) { + lapack_layout = LAPACK_COL_MAJOR; + } else { + lapack_layout = LAPACK_ROW_MAJOR; + } #ifndef LAPACK_DISABLE_NAN_CHECK /* Optionally check input matrices for NaNs */ - nrows_x11 = ( LAPACKE_lsame( trans, 'n' ) ? p : q); - nrows_x12 = ( LAPACKE_lsame( trans, 'n' ) ? p : m-q); - nrows_x21 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : q); - nrows_x22 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : m-q); - if( LAPACKE_sge_nancheck( matrix_layout, nrows_x11, q, x11, ldx11 ) ) { + if( LAPACKE_sge_nancheck( lapack_layout, p, q, x11, ldx11 ) ) { return -11; } - if( LAPACKE_sge_nancheck( matrix_layout, nrows_x12, m-q, x12, ldx12 ) ) { + if( LAPACKE_sge_nancheck( lapack_layout, p, m-q, x12, ldx12 ) ) { return -13; } - if( LAPACKE_sge_nancheck( matrix_layout, nrows_x21, q, x21, ldx21 ) ) { + if( LAPACKE_sge_nancheck( lapack_layout, m-p, q, x21, ldx21 ) ) { return -15; } - if( LAPACKE_sge_nancheck( matrix_layout, nrows_x22, m-q, x22, ldx22 ) ) { + if( LAPACKE_sge_nancheck( lapack_layout, m-p, m-q, x22, ldx22 ) ) { return -17; } #endif diff --git a/LAPACKE/src/lapacke_sorcsd_work.c b/LAPACKE/src/lapacke_sorcsd_work.c index feff8fab..3322607c 100644 --- a/LAPACKE/src/lapacke_sorcsd_work.c +++ b/LAPACKE/src/lapacke_sorcsd_work.c @@ -46,213 +46,36 @@ lapack_int LAPACKE_sorcsd_work( int matrix_layout, char jobu1, char jobu2, lapack_int* iwork ) { lapack_int info = 0; - if( matrix_layout == LAPACK_COL_MAJOR ) { - /* Call LAPACK function and adjust info */ - LAPACK_sorcsd( &jobu1, &jobu2, &jobv1t, &jobv2t, &trans, &signs, &m, &p, - &q, x11, &ldx11, x12, &ldx12, x21, &ldx21, x22, &ldx22, - theta, u1, &ldu1, u2, &ldu2, v1t, &ldv1t, v2t, &ldv2t, - work, &lwork, iwork, &info ); - if( info < 0 ) { - info = info - 1; - } - } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - lapack_int nrows_x11 = ( LAPACKE_lsame( trans, 'n' ) ? p : q); - lapack_int nrows_x12 = ( LAPACKE_lsame( trans, 'n' ) ? p : m-q); - lapack_int nrows_x21 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : q); - lapack_int nrows_x22 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : m-q); - lapack_int nrows_u1 = ( LAPACKE_lsame( jobu1, 'y' ) ? p : 1); - lapack_int nrows_u2 = ( LAPACKE_lsame( jobu2, 'y' ) ? m-p : 1); - lapack_int nrows_v1t = ( LAPACKE_lsame( jobv1t, 'y' ) ? q : 1); - lapack_int nrows_v2t = ( LAPACKE_lsame( jobv2t, 'y' ) ? m-q : 1); - lapack_int ldu1_t = MAX(1,nrows_u1); - lapack_int ldu2_t = MAX(1,nrows_u2); - lapack_int ldv1t_t = MAX(1,nrows_v1t); - lapack_int ldv2t_t = MAX(1,nrows_v2t); - lapack_int ldx11_t = MAX(1,nrows_x11); - lapack_int ldx12_t = MAX(1,nrows_x12); - lapack_int ldx21_t = MAX(1,nrows_x21); - lapack_int ldx22_t = MAX(1,nrows_x22); - float* x11_t = NULL; - float* x12_t = NULL; - float* x21_t = NULL; - float* x22_t = NULL; - float* u1_t = NULL; - float* u2_t = NULL; - float* v1t_t = NULL; - float* v2t_t = NULL; - /* Check leading dimension(s) */ - if( ldu1 < p ) { - info = -21; - LAPACKE_xerbla( "LAPACKE_sorcsd_work", info ); - return info; - } - if( ldu2 < m-p ) { - info = -23; - LAPACKE_xerbla( "LAPACKE_sorcsd_work", info ); - return info; - } - if( ldv1t < q ) { - info = -25; - LAPACKE_xerbla( "LAPACKE_sorcsd_work", info ); - return info; - } - if( ldv2t < m-q ) { - info = -27; - LAPACKE_xerbla( "LAPACKE_sorcsd_work", info ); - return info; - } - if( ldx11 < q ) { - info = -12; - LAPACKE_xerbla( "LAPACKE_sorcsd_work", info ); - return info; - } - if( ldx12 < m-q ) { - info = -14; - LAPACKE_xerbla( "LAPACKE_sorcsd_work", info ); - return info; - } - if( ldx21 < q ) { - info = -16; - LAPACKE_xerbla( "LAPACKE_sorcsd_work", info ); - return info; - } - if( ldx22 < m-q ) { - info = -18; - LAPACKE_xerbla( "LAPACKE_sorcsd_work", info ); - return info; - } - /* Query optimal working array(s) size if requested */ - if( lwork == -1 ) { - LAPACK_sorcsd( &jobu1, &jobu2, &jobv1t, &jobv2t, &trans, &signs, &m, - &p, &q, x11, &ldx11_t, x12, &ldx12_t, x21, &ldx21_t, - x22, &ldx22_t, theta, u1, &ldu1_t, u2, &ldu2_t, v1t, - &ldv1t_t, v2t, &ldv2t_t, work, &lwork, iwork, - &info ); - return (info < 0) ? (info - 1) : info; - } - /* Allocate memory for temporary array(s) */ - x11_t = (float*)LAPACKE_malloc( sizeof(float) * ldx11_t * MAX(1,q) ); - if( x11_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_0; - } - x12_t = (float*)LAPACKE_malloc( sizeof(float) * ldx12_t * MAX(1,m-q) ); - if( x12_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_1; - } - x21_t = (float*)LAPACKE_malloc( sizeof(float) * ldx21_t * MAX(1,q) ); - if( x21_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_2; + /* LAPACK function works with matrices in both layouts. It is supported + * through TRANS parameter. So all conversion between layouts can be + * completed in LAPACK function. See the table below which describes how + * every LAPACKE call is forwarded to corresponding LAPACK call. + * + * matrix_layout | trans_LAPACKE | -> trans_LAPACK + * | (trans) | (ltrans) + * -----------------+---------------+---------------- + * LAPACK_COL_MAJOR | 'N' | -> 'N' + * LAPACK_COL_MAJOR | 'T' | -> 'T' + * LAPACK_ROW_MAJOR | 'N' | -> 'T' + * LAPACK_ROW_MAJOR | 'T' | -> 'T' + * (note that for row major layout trans parameter is ignored) + */ + if( matrix_layout == LAPACK_COL_MAJOR || + matrix_layout == LAPACK_ROW_MAJOR ) { + char ltrans; + if( !LAPACKE_lsame( trans, 't' ) && matrix_layout == LAPACK_COL_MAJOR ) { + ltrans = 'n'; + } else { + ltrans = 't'; } - x22_t = (float*)LAPACKE_malloc( sizeof(float) * ldx22_t * MAX(1,m-q) ); - if( x22_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_3; - } - if( LAPACKE_lsame( jobu1, 'y' ) ) { - u1_t = (float*)LAPACKE_malloc( sizeof(float) * ldu1_t * MAX(1,p) ); - if( u1_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_4; - } - } - if( LAPACKE_lsame( jobu2, 'y' ) ) { - u2_t = (float*) - LAPACKE_malloc( sizeof(float) * ldu2_t * MAX(1,m-p) ); - if( u2_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_5; - } - } - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - v1t_t = (float*) - LAPACKE_malloc( sizeof(float) * ldv1t_t * MAX(1,q) ); - if( v1t_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_6; - } - } - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - v2t_t = (float*) - LAPACKE_malloc( sizeof(float) * ldv2t_t * MAX(1,m-q) ); - if( v2t_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_7; - } - } - /* Transpose input matrices */ - LAPACKE_sge_trans( matrix_layout, nrows_x11, q, x11, ldx11, x11_t, - ldx11_t ); - LAPACKE_sge_trans( matrix_layout, nrows_x12, m-q, x12, ldx12, x12_t, - ldx12_t ); - LAPACKE_sge_trans( matrix_layout, nrows_x21, q, x21, ldx21, x21_t, - ldx21_t ); - LAPACKE_sge_trans( matrix_layout, nrows_x22, m-q, x22, ldx22, x22_t, - ldx22_t ); /* Call LAPACK function and adjust info */ - LAPACK_sorcsd( &jobu1, &jobu2, &jobv1t, &jobv2t, &trans, &signs, &m, &p, - &q, x11_t, &ldx11_t, x12_t, &ldx12_t, x21_t, &ldx21_t, - x22_t, &ldx22_t, theta, u1_t, &ldu1_t, u2_t, &ldu2_t, - v1t_t, &ldv1t_t, v2t_t, &ldv2t_t, work, &lwork, iwork, - &info ); + LAPACK_sorcsd( &jobu1, &jobu2, &jobv1t, &jobv2t, <rans, &signs, &m, + &p, &q, x11, &ldx11, x12, &ldx12, x21, &ldx21, x22, + &ldx22, theta, u1, &ldu1, u2, &ldu2, v1t, &ldv1t, v2t, + &ldv2t, work, &lwork, iwork, &info ); if( info < 0 ) { info = info - 1; } - /* Transpose output matrices */ - LAPACKE_sge_trans( LAPACK_COL_MAJOR, nrows_x11, q, x11_t, ldx11_t, x11, - ldx11 ); - LAPACKE_sge_trans( LAPACK_COL_MAJOR, nrows_x12, m-q, x12_t, ldx12_t, - x12, ldx12 ); - LAPACKE_sge_trans( LAPACK_COL_MAJOR, nrows_x21, q, x21_t, ldx21_t, x21, - ldx21 ); - LAPACKE_sge_trans( LAPACK_COL_MAJOR, nrows_x22, m-q, x22_t, ldx22_t, - x22, ldx22 ); - if( LAPACKE_lsame( jobu1, 'y' ) ) { - LAPACKE_sge_trans( LAPACK_COL_MAJOR, nrows_u1, p, u1_t, ldu1_t, u1, - ldu1 ); - } - if( LAPACKE_lsame( jobu2, 'y' ) ) { - LAPACKE_sge_trans( LAPACK_COL_MAJOR, nrows_u2, m-p, u2_t, ldu2_t, - u2, ldu2 ); - } - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - LAPACKE_sge_trans( LAPACK_COL_MAJOR, nrows_v1t, q, v1t_t, ldv1t_t, - v1t, ldv1t ); - } - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - LAPACKE_sge_trans( LAPACK_COL_MAJOR, nrows_v2t, m-q, v2t_t, ldv2t_t, - v2t, ldv2t ); - } - /* Release memory and exit */ - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - LAPACKE_free( v2t_t ); - } -exit_level_7: - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - LAPACKE_free( v1t_t ); - } -exit_level_6: - if( LAPACKE_lsame( jobu2, 'y' ) ) { - LAPACKE_free( u2_t ); - } -exit_level_5: - if( LAPACKE_lsame( jobu1, 'y' ) ) { - LAPACKE_free( u1_t ); - } -exit_level_4: - LAPACKE_free( x22_t ); -exit_level_3: - LAPACKE_free( x21_t ); -exit_level_2: - LAPACKE_free( x12_t ); -exit_level_1: - LAPACKE_free( x11_t ); -exit_level_0: - if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { - LAPACKE_xerbla( "LAPACKE_sorcsd_work", info ); - } } else { info = -1; LAPACKE_xerbla( "LAPACKE_sorcsd_work", info ); diff --git a/LAPACKE/src/lapacke_zbbcsd.c b/LAPACKE/src/lapacke_zbbcsd.c index b88b9a85..6b09dd44 100644 --- a/LAPACKE/src/lapacke_zbbcsd.c +++ b/LAPACKE/src/lapacke_zbbcsd.c @@ -48,17 +48,18 @@ lapack_int LAPACKE_zbbcsd( int matrix_layout, char jobu1, char jobu2, lapack_int lrwork = -1; double* rwork = NULL; double rwork_query; - lapack_int nrows_u1, nrows_u2, nrows_v1t, nrows_v2t; + int lapack_layout; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_zbbcsd", -1 ); return -1; } + if( LAPACKE_lsame( trans, 'n' ) && matrix_layout == LAPACK_COL_MAJOR ) { + lapack_layout = LAPACK_COL_MAJOR; + } else { + lapack_layout = LAPACK_ROW_MAJOR; + } #ifndef LAPACK_DISABLE_NAN_CHECK /* Optionally check input matrices for NaNs */ - nrows_u1 = ( LAPACKE_lsame( jobu1, 'y' ) ? p : 1); - nrows_u2 = ( LAPACKE_lsame( jobu2, 'y' ) ? m-p : 1); - nrows_v1t = ( LAPACKE_lsame( jobv1t, 'y' ) ? q : 1); - nrows_v2t = ( LAPACKE_lsame( jobv2t, 'y' ) ? m-q : 1); if( LAPACKE_d_nancheck( q-1, phi, 1 ) ) { return -11; } @@ -66,22 +67,22 @@ lapack_int LAPACKE_zbbcsd( int matrix_layout, char jobu1, char jobu2, return -10; } if( LAPACKE_lsame( jobu1, 'y' ) ) { - if( LAPACKE_zge_nancheck( matrix_layout, nrows_u1, p, u1, ldu1 ) ) { + if( LAPACKE_zge_nancheck( lapack_layout, p, p, u1, ldu1 ) ) { return -12; } } if( LAPACKE_lsame( jobu2, 'y' ) ) { - if( LAPACKE_zge_nancheck( matrix_layout, nrows_u2, m-p, u2, ldu2 ) ) { + if( LAPACKE_zge_nancheck( lapack_layout, m-p, m-p, u2, ldu2 ) ) { return -14; } } if( LAPACKE_lsame( jobv1t, 'y' ) ) { - if( LAPACKE_zge_nancheck( matrix_layout, nrows_v1t, q, v1t, ldv1t ) ) { + if( LAPACKE_zge_nancheck( lapack_layout, q, q, v1t, ldv1t ) ) { return -16; } } if( LAPACKE_lsame( jobv2t, 'y' ) ) { - if( LAPACKE_zge_nancheck( matrix_layout, nrows_v2t, m-q, v2t, ldv2t ) ) { + if( LAPACKE_zge_nancheck( lapack_layout, m-q, m-q, v2t, ldv2t ) ) { return -18; } } diff --git a/LAPACKE/src/lapacke_zbbcsd_work.c b/LAPACKE/src/lapacke_zbbcsd_work.c index 34882ccd..1badab3d 100644 --- a/LAPACKE/src/lapacke_zbbcsd_work.c +++ b/LAPACKE/src/lapacke_zbbcsd_work.c @@ -47,156 +47,36 @@ lapack_int LAPACKE_zbbcsd_work( int matrix_layout, char jobu1, char jobu2, lapack_int lrwork ) { lapack_int info = 0; - if( matrix_layout == LAPACK_COL_MAJOR ) { + /* LAPACK function works with matrices in both layouts. It is supported + * through TRANS parameter. So all conversion between layouts can be + * completed in LAPACK function. See the table below which describes how + * every LAPACKE call is forwarded to corresponding LAPACK call. + * + * matrix_layout | trans_LAPACKE | -> trans_LAPACK + * | (trans) | (ltrans) + * -----------------+---------------+---------------- + * LAPACK_COL_MAJOR | 'N' | -> 'N' + * LAPACK_COL_MAJOR | 'T' | -> 'T' + * LAPACK_ROW_MAJOR | 'N' | -> 'T' + * LAPACK_ROW_MAJOR | 'T' | -> 'T' + * (note that for row major layout trans parameter is ignored) + */ + if( matrix_layout == LAPACK_COL_MAJOR || + matrix_layout == LAPACK_ROW_MAJOR ) { + char ltrans; + if( !LAPACKE_lsame( trans, 't' ) && matrix_layout == LAPACK_COL_MAJOR ) { + ltrans = 'n'; + } else { + ltrans = 't'; + } /* Call LAPACK function and adjust info */ - LAPACK_zbbcsd( &jobu1, &jobu2, &jobv1t, &jobv2t, &trans, &m, &p, &q, + LAPACK_zbbcsd( &jobu1, &jobu2, &jobv1t, &jobv2t, <rans, &m, &p, &q, theta, phi, u1, &ldu1, u2, &ldu2, v1t, &ldv1t, v2t, &ldv2t, b11d, b11e, b12d, b12e, b21d, b21e, b22d, b22e, rwork, &lrwork, &info ); if( info < 0 ) { info = info - 1; } - } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - lapack_int nrows_u1 = ( LAPACKE_lsame( jobu1, 'y' ) ? p : 1); - lapack_int nrows_u2 = ( LAPACKE_lsame( jobu2, 'y' ) ? m-p : 1); - lapack_int nrows_v1t = ( LAPACKE_lsame( jobv1t, 'y' ) ? q : 1); - lapack_int nrows_v2t = ( LAPACKE_lsame( jobv2t, 'y' ) ? m-q : 1); - lapack_int ldu1_t = MAX(1,nrows_u1); - lapack_int ldu2_t = MAX(1,nrows_u2); - lapack_int ldv1t_t = MAX(1,nrows_v1t); - lapack_int ldv2t_t = MAX(1,nrows_v2t); - lapack_complex_double* u1_t = NULL; - lapack_complex_double* u2_t = NULL; - lapack_complex_double* v1t_t = NULL; - lapack_complex_double* v2t_t = NULL; - /* Check leading dimension(s) */ - if( ldu1 < p ) { - info = -13; - LAPACKE_xerbla( "LAPACKE_zbbcsd_work", info ); - return info; - } - if( ldu2 < m-p ) { - info = -15; - LAPACKE_xerbla( "LAPACKE_zbbcsd_work", info ); - return info; - } - if( ldv1t < q ) { - info = -17; - LAPACKE_xerbla( "LAPACKE_zbbcsd_work", info ); - return info; - } - if( ldv2t < m-q ) { - info = -19; - LAPACKE_xerbla( "LAPACKE_zbbcsd_work", info ); - return info; - } - /* Query optimal working array(s) size if requested */ - if( lrwork == -1 ) { - LAPACK_zbbcsd( &jobu1, &jobu2, &jobv1t, &jobv2t, &trans, &m, &p, &q, - theta, phi, u1, &ldu1_t, u2, &ldu2_t, v1t, &ldv1t_t, - v2t, &ldv2t_t, b11d, b11e, b12d, b12e, b21d, b21e, - b22d, b22e, rwork, &lrwork, &info ); - return (info < 0) ? (info - 1) : info; - } - /* Allocate memory for temporary array(s) */ - if( LAPACKE_lsame( jobu1, 'y' ) ) { - u1_t = (lapack_complex_double*) - LAPACKE_malloc( sizeof(lapack_complex_double) * - ldu1_t * MAX(1,p) ); - if( u1_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_0; - } - } - if( LAPACKE_lsame( jobu2, 'y' ) ) { - u2_t = (lapack_complex_double*) - LAPACKE_malloc( sizeof(lapack_complex_double) * - ldu2_t * MAX(1,m-p) ); - if( u2_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_1; - } - } - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - v1t_t = (lapack_complex_double*) - LAPACKE_malloc( sizeof(lapack_complex_double) * - ldv1t_t * MAX(1,q) ); - if( v1t_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_2; - } - } - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - v2t_t = (lapack_complex_double*) - LAPACKE_malloc( sizeof(lapack_complex_double) * - ldv2t_t * MAX(1,m-q) ); - if( v2t_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_3; - } - } - /* Transpose input matrices */ - if( LAPACKE_lsame( jobu1, 'y' ) ) { - LAPACKE_zge_trans( matrix_layout, nrows_u1, p, u1, ldu1, u1_t, - ldu1_t ); - } - if( LAPACKE_lsame( jobu2, 'y' ) ) { - LAPACKE_zge_trans( matrix_layout, nrows_u2, m-p, u2, ldu2, u2_t, - ldu2_t ); - } - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - LAPACKE_zge_trans( matrix_layout, nrows_v1t, q, v1t, ldv1t, v1t_t, - ldv1t_t ); - } - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - LAPACKE_zge_trans( matrix_layout, nrows_v2t, m-q, v2t, ldv2t, v2t_t, - ldv2t_t ); - } - /* Call LAPACK function and adjust info */ - LAPACK_zbbcsd( &jobu1, &jobu2, &jobv1t, &jobv2t, &trans, &m, &p, &q, - theta, phi, u1_t, &ldu1_t, u2_t, &ldu2_t, v1t_t, - &ldv1t_t, v2t_t, &ldv2t_t, b11d, b11e, b12d, b12e, b21d, - b21e, b22d, b22e, rwork, &lrwork, &info ); - if( info < 0 ) { - info = info - 1; - } - /* Transpose output matrices */ - if( LAPACKE_lsame( jobu1, 'y' ) ) { - LAPACKE_zge_trans( LAPACK_COL_MAJOR, nrows_u1, p, u1_t, ldu1_t, u1, - ldu1 ); - } - if( LAPACKE_lsame( jobu2, 'y' ) ) { - LAPACKE_zge_trans( LAPACK_COL_MAJOR, nrows_u2, m-p, u2_t, ldu2_t, - u2, ldu2 ); - } - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - LAPACKE_zge_trans( LAPACK_COL_MAJOR, nrows_v1t, q, v1t_t, ldv1t_t, - v1t, ldv1t ); - } - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - LAPACKE_zge_trans( LAPACK_COL_MAJOR, nrows_v2t, m-q, v2t_t, ldv2t_t, - v2t, ldv2t ); - } - /* Release memory and exit */ - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - LAPACKE_free( v2t_t ); - } -exit_level_3: - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - LAPACKE_free( v1t_t ); - } -exit_level_2: - if( LAPACKE_lsame( jobu2, 'y' ) ) { - LAPACKE_free( u2_t ); - } -exit_level_1: - if( LAPACKE_lsame( jobu1, 'y' ) ) { - LAPACKE_free( u1_t ); - } -exit_level_0: - if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { - LAPACKE_xerbla( "LAPACKE_zbbcsd_work", info ); - } } else { info = -1; LAPACKE_xerbla( "LAPACKE_zbbcsd_work", info ); diff --git a/LAPACKE/src/lapacke_zunbdb.c b/LAPACKE/src/lapacke_zunbdb.c index 1aad69f1..1cf0b038 100644 --- a/LAPACKE/src/lapacke_zunbdb.c +++ b/LAPACKE/src/lapacke_zunbdb.c @@ -49,27 +49,28 @@ lapack_int LAPACKE_zunbdb( int matrix_layout, char trans, char signs, lapack_int lwork = -1; lapack_complex_double* work = NULL; lapack_complex_double work_query; - lapack_int nrows_x11, nrows_x12, nrows_x21, nrows_x22; + int lapack_layout; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_zunbdb", -1 ); return -1; } + if( LAPACKE_lsame( trans, 'n' ) && matrix_layout == LAPACK_COL_MAJOR ) { + lapack_layout = LAPACK_COL_MAJOR; + } else { + lapack_layout = LAPACK_ROW_MAJOR; + } #ifndef LAPACK_DISABLE_NAN_CHECK /* Optionally check input matrices for NaNs */ - nrows_x11 = ( LAPACKE_lsame( trans, 'n' ) ? p : q); - nrows_x12 = ( LAPACKE_lsame( trans, 'n' ) ? p : m-q); - nrows_x21 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : q); - nrows_x22 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : m-q); - if( LAPACKE_zge_nancheck( matrix_layout, nrows_x11, q, x11, ldx11 ) ) { + if( LAPACKE_zge_nancheck( lapack_layout, p, q, x11, ldx11 ) ) { return -7; } - if( LAPACKE_zge_nancheck( matrix_layout, nrows_x12, m-q, x12, ldx12 ) ) { + if( LAPACKE_zge_nancheck( lapack_layout, p, m-q, x12, ldx12 ) ) { return -9; } - if( LAPACKE_zge_nancheck( matrix_layout, nrows_x21, q, x21, ldx21 ) ) { + if( LAPACKE_zge_nancheck( lapack_layout, m-p, q, x21, ldx21 ) ) { return -11; } - if( LAPACKE_zge_nancheck( matrix_layout, nrows_x22, m-q, x22, ldx22 ) ) { + if( LAPACKE_zge_nancheck( lapack_layout, m-p, m-q, x22, ldx22 ) ) { return -13; } #endif diff --git a/LAPACKE/src/lapacke_zunbdb_work.c b/LAPACKE/src/lapacke_zunbdb_work.c index 689434a5..5221b1f7 100644 --- a/LAPACKE/src/lapacke_zunbdb_work.c +++ b/LAPACKE/src/lapacke_zunbdb_work.c @@ -47,121 +47,35 @@ lapack_int LAPACKE_zunbdb_work( int matrix_layout, char trans, char signs, lapack_complex_double* work, lapack_int lwork ) { lapack_int info = 0; - if( matrix_layout == LAPACK_COL_MAJOR ) { + /* LAPACK function works with matrices in both layouts. It is supported + * through TRANS parameter. So all conversion between layouts can be + * completed in LAPACK function. See the table below which describes how + * every LAPACKE call is forwarded to corresponding LAPACK call. + * + * matrix_layout | trans_LAPACKE | -> trans_LAPACK + * | (trans) | (ltrans) + * -----------------+---------------+---------------- + * LAPACK_COL_MAJOR | 'N' | -> 'N' + * LAPACK_COL_MAJOR | 'T' | -> 'T' + * LAPACK_ROW_MAJOR | 'N' | -> 'T' + * LAPACK_ROW_MAJOR | 'T' | -> 'T' + * (note that for row major layout trans parameter is ignored) + */ + if( matrix_layout == LAPACK_COL_MAJOR || + matrix_layout == LAPACK_ROW_MAJOR ) { + char ltrans; + if( !LAPACKE_lsame( trans, 't' ) && matrix_layout == LAPACK_COL_MAJOR ) { + ltrans = 'n'; + } else { + ltrans = 't'; + } /* Call LAPACK function and adjust info */ - LAPACK_zunbdb( &trans, &signs, &m, &p, &q, x11, &ldx11, x12, &ldx12, + LAPACK_zunbdb( <rans, &signs, &m, &p, &q, x11, &ldx11, x12, &ldx12, x21, &ldx21, x22, &ldx22, theta, phi, taup1, taup2, tauq1, tauq2, work, &lwork, &info ); if( info < 0 ) { info = info - 1; } - } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - lapack_int nrows_x11 = ( LAPACKE_lsame( trans, 'n' ) ? p : q); - lapack_int nrows_x12 = ( LAPACKE_lsame( trans, 'n' ) ? p : m-q); - lapack_int nrows_x21 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : q); - lapack_int nrows_x22 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : m-q); - lapack_int ldx11_t = MAX(1,nrows_x11); - lapack_int ldx12_t = MAX(1,nrows_x12); - lapack_int ldx21_t = MAX(1,nrows_x21); - lapack_int ldx22_t = MAX(1,nrows_x22); - lapack_complex_double* x11_t = NULL; - lapack_complex_double* x12_t = NULL; - lapack_complex_double* x21_t = NULL; - lapack_complex_double* x22_t = NULL; - /* Check leading dimension(s) */ - if( ldx11 < q ) { - info = -8; - LAPACKE_xerbla( "LAPACKE_zunbdb_work", info ); - return info; - } - if( ldx12 < m-q ) { - info = -10; - LAPACKE_xerbla( "LAPACKE_zunbdb_work", info ); - return info; - } - if( ldx21 < q ) { - info = -12; - LAPACKE_xerbla( "LAPACKE_zunbdb_work", info ); - return info; - } - if( ldx22 < m-q ) { - info = -14; - LAPACKE_xerbla( "LAPACKE_zunbdb_work", info ); - return info; - } - /* Query optimal working array(s) size if requested */ - if( lwork == -1 ) { - LAPACK_zunbdb( &trans, &signs, &m, &p, &q, x11, &ldx11_t, x12, - &ldx12_t, x21, &ldx21_t, x22, &ldx22_t, theta, phi, - taup1, taup2, tauq1, tauq2, work, &lwork, &info ); - return (info < 0) ? (info - 1) : info; - } - /* Allocate memory for temporary array(s) */ - x11_t = (lapack_complex_double*) - LAPACKE_malloc( sizeof(lapack_complex_double) * - ldx11_t * MAX(1,q) ); - if( x11_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_0; - } - x12_t = (lapack_complex_double*) - LAPACKE_malloc( sizeof(lapack_complex_double) * - ldx12_t * MAX(1,m-q) ); - if( x12_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_1; - } - x21_t = (lapack_complex_double*) - LAPACKE_malloc( sizeof(lapack_complex_double) * - ldx21_t * MAX(1,q) ); - if( x21_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_2; - } - x22_t = (lapack_complex_double*) - LAPACKE_malloc( sizeof(lapack_complex_double) * - ldx22_t * MAX(1,m-q) ); - if( x22_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_3; - } - /* Transpose input matrices */ - LAPACKE_zge_trans( matrix_layout, nrows_x11, q, x11, ldx11, x11_t, - ldx11_t ); - LAPACKE_zge_trans( matrix_layout, nrows_x12, m-q, x12, ldx12, x12_t, - ldx12_t ); - LAPACKE_zge_trans( matrix_layout, nrows_x21, q, x21, ldx21, x21_t, - ldx21_t ); - LAPACKE_zge_trans( matrix_layout, nrows_x22, m-q, x22, ldx22, x22_t, - ldx22_t ); - /* Call LAPACK function and adjust info */ - LAPACK_zunbdb( &trans, &signs, &m, &p, &q, x11_t, &ldx11_t, x12_t, - &ldx12_t, x21_t, &ldx21_t, x22_t, &ldx22_t, theta, phi, - taup1, taup2, tauq1, tauq2, work, &lwork, &info ); - if( info < 0 ) { - info = info - 1; - } - /* Transpose output matrices */ - LAPACKE_zge_trans( LAPACK_COL_MAJOR, nrows_x11, q, x11_t, ldx11_t, x11, - ldx11 ); - LAPACKE_zge_trans( LAPACK_COL_MAJOR, nrows_x12, m-q, x12_t, ldx12_t, - x12, ldx12 ); - LAPACKE_zge_trans( LAPACK_COL_MAJOR, nrows_x21, q, x21_t, ldx21_t, x21, - ldx21 ); - LAPACKE_zge_trans( LAPACK_COL_MAJOR, nrows_x22, m-q, x22_t, ldx22_t, - x22, ldx22 ); - /* Release memory and exit */ - LAPACKE_free( x22_t ); -exit_level_3: - LAPACKE_free( x21_t ); -exit_level_2: - LAPACKE_free( x12_t ); -exit_level_1: - LAPACKE_free( x11_t ); -exit_level_0: - if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { - LAPACKE_xerbla( "LAPACKE_zunbdb_work", info ); - } } else { info = -1; LAPACKE_xerbla( "LAPACKE_zunbdb_work", info ); diff --git a/LAPACKE/src/lapacke_zuncsd.c b/LAPACKE/src/lapacke_zuncsd.c index 384b3e09..d84c68af 100644 --- a/LAPACKE/src/lapacke_zuncsd.c +++ b/LAPACKE/src/lapacke_zuncsd.c @@ -54,27 +54,28 @@ lapack_int LAPACKE_zuncsd( int matrix_layout, char jobu1, char jobu2, lapack_complex_double* work = NULL; double rwork_query; lapack_complex_double work_query; - lapack_int nrows_x11, nrows_x12, nrows_x21, nrows_x22; + int lapack_layout; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_zuncsd", -1 ); return -1; } + if( LAPACKE_lsame( trans, 'n' ) && matrix_layout == LAPACK_COL_MAJOR ) { + lapack_layout = LAPACK_COL_MAJOR; + } else { + lapack_layout = LAPACK_ROW_MAJOR; + } #ifndef LAPACK_DISABLE_NAN_CHECK /* Optionally check input matrices for NaNs */ - nrows_x11 = ( LAPACKE_lsame( trans, 'n' ) ? p : q); - nrows_x12 = ( LAPACKE_lsame( trans, 'n' ) ? p : m-q); - nrows_x21 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : q); - nrows_x22 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : m-q); - if( LAPACKE_zge_nancheck( matrix_layout, nrows_x11, q, x11, ldx11 ) ) { + if( LAPACKE_zge_nancheck( lapack_layout, p, q, x11, ldx11 ) ) { return -11; } - if( LAPACKE_zge_nancheck( matrix_layout, nrows_x12, m-q, x12, ldx12 ) ) { + if( LAPACKE_zge_nancheck( lapack_layout, p, m-q, x12, ldx12 ) ) { return -13; } - if( LAPACKE_zge_nancheck( matrix_layout, nrows_x21, q, x21, ldx21 ) ) { + if( LAPACKE_zge_nancheck( lapack_layout, m-p, q, x21, ldx21 ) ) { return -15; } - if( LAPACKE_zge_nancheck( matrix_layout, nrows_x22, m-q, x22, ldx22 ) ) { + if( LAPACKE_zge_nancheck( lapack_layout, m-p, m-q, x22, ldx22 ) ) { return -17; } #endif diff --git a/LAPACKE/src/lapacke_zuncsd_work.c b/LAPACKE/src/lapacke_zuncsd_work.c index 7c3b3950..8d6ce563 100644 --- a/LAPACKE/src/lapacke_zuncsd_work.c +++ b/LAPACKE/src/lapacke_zuncsd_work.c @@ -50,226 +50,36 @@ lapack_int LAPACKE_zuncsd_work( int matrix_layout, char jobu1, char jobu2, lapack_int* iwork ) { lapack_int info = 0; - if( matrix_layout == LAPACK_COL_MAJOR ) { - /* Call LAPACK function and adjust info */ - LAPACK_zuncsd( &jobu1, &jobu2, &jobv1t, &jobv2t, &trans, &signs, &m, &p, - &q, x11, &ldx11, x12, &ldx12, x21, &ldx21, x22, &ldx22, - theta, u1, &ldu1, u2, &ldu2, v1t, &ldv1t, v2t, &ldv2t, - work, &lwork, rwork, &lrwork, iwork, &info ); - if( info < 0 ) { - info = info - 1; - } - } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - lapack_int nrows_x11 = ( LAPACKE_lsame( trans, 'n' ) ? p : q); - lapack_int nrows_x12 = ( LAPACKE_lsame( trans, 'n' ) ? p : m-q); - lapack_int nrows_x21 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : q); - lapack_int nrows_x22 = ( LAPACKE_lsame( trans, 'n' ) ? m-p : m-q); - lapack_int nrows_u1 = ( LAPACKE_lsame( jobu1, 'y' ) ? p : 1); - lapack_int nrows_u2 = ( LAPACKE_lsame( jobu2, 'y' ) ? m-p : 1); - lapack_int nrows_v1t = ( LAPACKE_lsame( jobv1t, 'y' ) ? q : 1); - lapack_int nrows_v2t = ( LAPACKE_lsame( jobv2t, 'y' ) ? m-q : 1); - lapack_int ldu1_t = MAX(1,nrows_u1); - lapack_int ldu2_t = MAX(1,nrows_u2); - lapack_int ldv1t_t = MAX(1,nrows_v1t); - lapack_int ldv2t_t = MAX(1,nrows_v2t); - lapack_int ldx11_t = MAX(1,nrows_x11); - lapack_int ldx12_t = MAX(1,nrows_x12); - lapack_int ldx21_t = MAX(1,nrows_x21); - lapack_int ldx22_t = MAX(1,nrows_x22); - lapack_complex_double* x11_t = NULL; - lapack_complex_double* x12_t = NULL; - lapack_complex_double* x21_t = NULL; - lapack_complex_double* x22_t = NULL; - lapack_complex_double* u1_t = NULL; - lapack_complex_double* u2_t = NULL; - lapack_complex_double* v1t_t = NULL; - lapack_complex_double* v2t_t = NULL; - /* Check leading dimension(s) */ - if( ldu1 < p ) { - info = -21; - LAPACKE_xerbla( "LAPACKE_zuncsd_work", info ); - return info; - } - if( ldu2 < m-p ) { - info = -23; - LAPACKE_xerbla( "LAPACKE_zuncsd_work", info ); - return info; - } - if( ldv1t < q ) { - info = -25; - LAPACKE_xerbla( "LAPACKE_zuncsd_work", info ); - return info; - } - if( ldv2t < m-q ) { - info = -27; - LAPACKE_xerbla( "LAPACKE_zuncsd_work", info ); - return info; - } - if( ldx11 < q ) { - info = -12; - LAPACKE_xerbla( "LAPACKE_zuncsd_work", info ); - return info; - } - if( ldx12 < m-q ) { - info = -14; - LAPACKE_xerbla( "LAPACKE_zuncsd_work", info ); - return info; - } - if( ldx21 < q ) { - info = -16; - LAPACKE_xerbla( "LAPACKE_zuncsd_work", info ); - return info; - } - if( ldx22 < m-q ) { - info = -18; - LAPACKE_xerbla( "LAPACKE_zuncsd_work", info ); - return info; - } - /* Query optimal working array(s) size if requested */ - if( lrwork == -1 || lwork == -1 ) { - LAPACK_zuncsd( &jobu1, &jobu2, &jobv1t, &jobv2t, &trans, &signs, &m, - &p, &q, x11, &ldx11_t, x12, &ldx12_t, x21, &ldx21_t, - x22, &ldx22_t, theta, u1, &ldu1_t, u2, &ldu2_t, v1t, - &ldv1t_t, v2t, &ldv2t_t, work, &lwork, rwork, - &lrwork, iwork, &info ); - return (info < 0) ? (info - 1) : info; - } - /* Allocate memory for temporary array(s) */ - x11_t = (lapack_complex_double*) - LAPACKE_malloc( sizeof(lapack_complex_double) * - ldx11_t * MAX(1,q) ); - if( x11_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_0; - } - x12_t = (lapack_complex_double*) - LAPACKE_malloc( sizeof(lapack_complex_double) * - ldx12_t * MAX(1,m-q) ); - if( x12_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_1; - } - x21_t = (lapack_complex_double*) - LAPACKE_malloc( sizeof(lapack_complex_double) * - ldx21_t * MAX(1,q) ); - if( x21_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_2; + /* LAPACK function works with matrices in both layouts. It is supported + * through TRANS parameter. So all conversion between layouts can be + * completed in LAPACK function. See the table below which describes how + * every LAPACKE call is forwarded to corresponding LAPACK call. + * + * matrix_layout | trans_LAPACKE | -> trans_LAPACK + * | (trans) | (ltrans) + * -----------------+---------------+---------------- + * LAPACK_COL_MAJOR | 'N' | -> 'N' + * LAPACK_COL_MAJOR | 'T' | -> 'T' + * LAPACK_ROW_MAJOR | 'N' | -> 'T' + * LAPACK_ROW_MAJOR | 'T' | -> 'T' + * (note that for row major layout trans parameter is ignored) + */ + if( matrix_layout == LAPACK_COL_MAJOR || + matrix_layout == LAPACK_ROW_MAJOR ) { + char ltrans; + if( !LAPACKE_lsame( trans, 't' ) && matrix_layout == LAPACK_COL_MAJOR ) { + ltrans = 'n'; + } else { + ltrans = 't'; } - x22_t = (lapack_complex_double*) - LAPACKE_malloc( sizeof(lapack_complex_double) * - ldx22_t * MAX(1,m-q) ); - if( x22_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_3; - } - if( LAPACKE_lsame( jobu1, 'y' ) ) { - u1_t = (lapack_complex_double*) - LAPACKE_malloc( sizeof(lapack_complex_double) * - ldu1_t * MAX(1,p) ); - if( u1_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_4; - } - } - if( LAPACKE_lsame( jobu2, 'y' ) ) { - u2_t = (lapack_complex_double*) - LAPACKE_malloc( sizeof(lapack_complex_double) * - ldu2_t * MAX(1,m-p) ); - if( u2_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_5; - } - } - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - v1t_t = (lapack_complex_double*) - LAPACKE_malloc( sizeof(lapack_complex_double) * - ldv1t_t * MAX(1,q) ); - if( v1t_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_6; - } - } - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - v2t_t = (lapack_complex_double*) - LAPACKE_malloc( sizeof(lapack_complex_double) * - ldv2t_t * MAX(1,m-q) ); - if( v2t_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_7; - } - } - /* Transpose input matrices */ - LAPACKE_zge_trans( matrix_layout, nrows_x11, q, x11, ldx11, x11_t, - ldx11_t ); - LAPACKE_zge_trans( matrix_layout, nrows_x12, m-q, x12, ldx12, x12_t, - ldx12_t ); - LAPACKE_zge_trans( matrix_layout, nrows_x21, q, x21, ldx21, x21_t, - ldx21_t ); - LAPACKE_zge_trans( matrix_layout, nrows_x22, m-q, x22, ldx22, x22_t, - ldx22_t ); /* Call LAPACK function and adjust info */ - LAPACK_zuncsd( &jobu1, &jobu2, &jobv1t, &jobv2t, &trans, &signs, &m, &p, - &q, x11_t, &ldx11_t, x12_t, &ldx12_t, x21_t, &ldx21_t, - x22_t, &ldx22_t, theta, u1_t, &ldu1_t, u2_t, &ldu2_t, - v1t_t, &ldv1t_t, v2t_t, &ldv2t_t, work, &lwork, rwork, - &lrwork, iwork, &info ); + LAPACK_zuncsd( &jobu1, &jobu2, &jobv1t, &jobv2t, <rans, &signs, &m, + &p, &q, x11, &ldx11, x12, &ldx12, x21, &ldx21, x22, + &ldx22, theta, u1, &ldu1, u2, &ldu2, v1t, &ldv1t, v2t, + &ldv2t, work, &lwork, rwork, &lrwork, iwork, &info ); if( info < 0 ) { info = info - 1; } - /* Transpose output matrices */ - LAPACKE_zge_trans( LAPACK_COL_MAJOR, nrows_x11, q, x11_t, ldx11_t, x11, - ldx11 ); - LAPACKE_zge_trans( LAPACK_COL_MAJOR, nrows_x12, m-q, x12_t, ldx12_t, - x12, ldx12 ); - LAPACKE_zge_trans( LAPACK_COL_MAJOR, nrows_x21, q, x21_t, ldx21_t, x21, - ldx21 ); - LAPACKE_zge_trans( LAPACK_COL_MAJOR, nrows_x22, m-q, x22_t, ldx22_t, - x22, ldx22 ); - if( LAPACKE_lsame( jobu1, 'y' ) ) { - LAPACKE_zge_trans( LAPACK_COL_MAJOR, nrows_u1, p, u1_t, ldu1_t, u1, - ldu1 ); - } - if( LAPACKE_lsame( jobu2, 'y' ) ) { - LAPACKE_zge_trans( LAPACK_COL_MAJOR, nrows_u2, m-p, u2_t, ldu2_t, - u2, ldu2 ); - } - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - LAPACKE_zge_trans( LAPACK_COL_MAJOR, nrows_v1t, q, v1t_t, ldv1t_t, - v1t, ldv1t ); - } - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - LAPACKE_zge_trans( LAPACK_COL_MAJOR, nrows_v2t, m-q, v2t_t, ldv2t_t, - v2t, ldv2t ); - } - /* Release memory and exit */ - if( LAPACKE_lsame( jobv2t, 'y' ) ) { - LAPACKE_free( v2t_t ); - } -exit_level_7: - if( LAPACKE_lsame( jobv1t, 'y' ) ) { - LAPACKE_free( v1t_t ); - } -exit_level_6: - if( LAPACKE_lsame( jobu2, 'y' ) ) { - LAPACKE_free( u2_t ); - } -exit_level_5: - if( LAPACKE_lsame( jobu1, 'y' ) ) { - LAPACKE_free( u1_t ); - } -exit_level_4: - LAPACKE_free( x22_t ); -exit_level_3: - LAPACKE_free( x21_t ); -exit_level_2: - LAPACKE_free( x12_t ); -exit_level_1: - LAPACKE_free( x11_t ); -exit_level_0: - if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { - LAPACKE_xerbla( "LAPACKE_zuncsd_work", info ); - } } else { info = -1; LAPACKE_xerbla( "LAPACKE_zuncsd_work", info ); |