63 files changed, 44789 insertions, 11 deletions
diff --git a/XBLAS/Makefile b/XBLAS/Makefile
index c43bc1b3..5a006bfd 100644
--- a/XBLAS/Makefile
+++ b/XBLAS/Makefile
@@ -11,10 +11,10 @@
 include make.conf
 include $(MAKEINC)
 
-LIB = dot-lib sum-lib axpby-lib waxpby-lib gemv-lib ge_sum_mv-lib gbmv-lib symv-lib spmv-lib sbmv-lib hemv-lib hpmv-lib hbmv-lib trmv-lib tpmv-lib trsv-lib tbsv-lib gemm-lib symm-lib hemm-lib gemv2-lib symv2-lib hemv2-lib
-TESTS = dot-test sum-test axpby-test waxpby-test gemv-test ge_sum_mv-test gbmv-test symv-test spmv-test sbmv-test hemv-test hpmv-test hbmv-test trmv-test tpmv-test trsv-test tbsv-test gemm-test symm-test hemm-test gemv2-test symv2-test hemv2-test
-TEST_LIB = dot-test-lib sum-test-lib axpby-test-lib waxpby-test-lib gemv-test-lib ge_sum_mv-test-lib gbmv-test-lib symv-test-lib spmv-test-lib sbmv-test-lib hemv-test-lib hpmv-test-lib hbmv-test-lib trmv-test-lib tpmv-test-lib trsv-test-lib tbsv-test-lib gemm-test-lib symm-test-lib hemm-test-lib gemv2-test-lib symv2-test-lib hemv2-test-lib
-CLEAN = dot-clean sum-clean axpby-clean waxpby-clean gemv-clean ge_sum_mv-clean gbmv-clean symv-clean spmv-clean sbmv-clean hemv-clean hpmv-clean hbmv-clean trmv-clean tpmv-clean trsv-clean tbsv-clean gemm-clean symm-clean hemm-clean gemv2-clean symv2-clean hemv2-clean dot2-clean
+LIB = dot-lib sum-lib axpby-lib waxpby-lib gemv-lib ge_sum_mv-lib gbmv-lib symv-lib spmv-lib sbmv-lib hemv-lib hpmv-lib hbmv-lib trmv-lib tpmv-lib trsv-lib tbsv-lib gemm-lib symm-lib hemm-lib gemv2-lib symv2-lib hemv2-lib gbmv2-lib
+TESTS = dot-test sum-test axpby-test waxpby-test gemv-test ge_sum_mv-test gbmv-test symv-test spmv-test sbmv-test hemv-test hpmv-test hbmv-test trmv-test tpmv-test trsv-test tbsv-test gemm-test symm-test hemm-test gemv2-test symv2-test hemv2-test gbmv2-test
+TEST_LIB = dot-test-lib sum-test-lib axpby-test-lib waxpby-test-lib gemv-test-lib ge_sum_mv-test-lib gbmv-test-lib symv-test-lib spmv-test-lib sbmv-test-lib hemv-test-lib hpmv-test-lib hbmv-test-lib trmv-test-lib tpmv-test-lib trsv-test-lib tbsv-test-lib gemm-test-lib symm-test-lib hemm-test-lib gemv2-test-lib symv2-test-lib hemv2-test-lib gbmv2-test-lib
+CLEAN = dot-clean sum-clean axpby-clean waxpby-clean gemv-clean ge_sum_mv-clean gbmv-clean symv-clean spmv-clean sbmv-clean hemv-clean hpmv-clean hbmv-clean trmv-clean tpmv-clean trsv-clean tbsv-clean gemm-clean symm-clean hemm-clean gemv2-clean symv2-clean hemv2-clean gbmv2-clean dot2-clean
 
 SRC_DIR = src
 TEST_DIR = testing
@@ -52,6 +52,7 @@ tests: test-lib $(TESTS)
 	cat $(TEST_DIR)/test-gemv2/gemv2.results >> testall.result
 	cat $(TEST_DIR)/test-symv2/symv2.results >> testall.result
 	cat $(TEST_DIR)/test-hemv2/hemv2.results >> testall.result
+	cat $(TEST_DIR)/test-gbmv2/gbmv2.results >> testall.result
 	grep 'FAIL/TOTAL' testall.result >testall.summary
 	cat testall.summary
 
@@ -86,6 +87,7 @@ lib: $(LIB)
 	cd $(SRC_DIR)/gemv2 && $(MAKE) lib
 	cd $(SRC_DIR)/symv2 && $(MAKE) lib
 	cd $(SRC_DIR)/hemv2 && $(MAKE) lib
+	cd $(SRC_DIR)/gbmv2 && $(MAKE) lib
 
 # custom test-dot2 stuff
 
@@ -536,6 +538,25 @@ hemv2-clean:
 	cd $(TEST_DIR)/test-hemv2 && $(MAKE) clean
 
 
+# gbmv2 stuff
+
+gbmv2: gbmv2-test
+
+
+gbmv2-lib: common-lib
+	cd $(SRC_DIR)/gbmv2 && $(MAKE)
+
+gbmv2-test-lib: lib common-test-lib
+	cd $(TEST_DIR)/test-gbmv2 && $(MAKE) do_test_gbmv2 
+
+gbmv2-test: gbmv2-test-lib
+	cd $(TEST_DIR)/test-gbmv2 && $(MAKE)
+
+gbmv2-clean:
+	cd $(SRC_DIR)/gbmv2 && $(MAKE) clean
+	cd $(TEST_DIR)/test-gbmv2 && $(MAKE) clean
+
+
 
 # Test library dependencies
 sum-test-lib: dot-test-lib
diff --git a/XBLAS/src/blas_extended_proto.h b/XBLAS/src/blas_extended_proto.h
index ba1497a0..3bdc3fde 100644
--- a/XBLAS/src/blas_extended_proto.h
+++ b/XBLAS/src/blas_extended_proto.h
@@ -1968,6 +1968,148 @@ void BLAS_zhemv2_z_d_x(enum blas_order_type order, enum blas_uplo_type uplo,
 		       enum blas_prec_type prec);
 
 
+void BLAS_dgbmv2_d_s(enum blas_order_type order, enum blas_trans_type trans,
+		     int m, int n, int kl, int ku, double alpha,
+		     const double *a, int lda, const float *head_x,
+		     const float *tail_x, int incx, double beta,
+		     double *y, int incy);
+void BLAS_dgbmv2_s_d(enum blas_order_type order, enum blas_trans_type trans,
+		     int m, int n, int kl, int ku, double alpha,
+		     const float *a, int lda, const double *head_x,
+		     const double *tail_x, int incx, double beta,
+		     double *y, int incy);
+void BLAS_dgbmv2_s_s(enum blas_order_type order, enum blas_trans_type trans,
+		     int m, int n, int kl, int ku, double alpha,
+		     const float *a, int lda, const float *head_x,
+		     const float *tail_x, int incx, double beta,
+		     double *y, int incy);
+void BLAS_zgbmv2_z_c(enum blas_order_type order, enum blas_trans_type trans,
+		     int m, int n, int kl, int ku, const void *alpha,
+		     const void *a, int lda, const void *head_x,
+		     const void *tail_x, int incx, const void *beta,
+		     void *y, int incy);
+void BLAS_zgbmv2_c_z(enum blas_order_type order, enum blas_trans_type trans,
+		     int m, int n, int kl, int ku, const void *alpha,
+		     const void *a, int lda, const void *head_x,
+		     const void *tail_x, int incx, const void *beta,
+		     void *y, int incy);
+void BLAS_zgbmv2_c_c(enum blas_order_type order, enum blas_trans_type trans,
+		     int m, int n, int kl, int ku, const void *alpha,
+		     const void *a, int lda, const void *head_x,
+		     const void *tail_x, int incx, const void *beta,
+		     void *y, int incy);
+void BLAS_cgbmv2_c_s(enum blas_order_type order, enum blas_trans_type trans,
+		     int m, int n, int kl, int ku, const void *alpha,
+		     const void *a, int lda, const float *head_x,
+		     const float *tail_x, int incx, const void *beta,
+		     void *y, int incy);
+void BLAS_cgbmv2_s_c(enum blas_order_type order, enum blas_trans_type trans,
+		     int m, int n, int kl, int ku, const void *alpha,
+		     const float *a, int lda, const void *head_x,
+		     const void *tail_x, int incx, const void *beta,
+		     void *y, int incy);
+void BLAS_cgbmv2_s_s(enum blas_order_type order, enum blas_trans_type trans,
+		     int m, int n, int kl, int ku, const void *alpha,
+		     const float *a, int lda, const float *head_x,
+		     const float *tail_x, int incx, const void *beta,
+		     void *y, int incy);
+void BLAS_zgbmv2_z_d(enum blas_order_type order, enum blas_trans_type trans,
+		     int m, int n, int kl, int ku, const void *alpha,
+		     const void *a, int lda, const double *head_x,
+		     const double *tail_x, int incx, const void *beta,
+		     void *y, int incy);
+void BLAS_zgbmv2_d_z(enum blas_order_type order, enum blas_trans_type trans,
+		     int m, int n, int kl, int ku, const void *alpha,
+		     const double *a, int lda, const void *head_x,
+		     const void *tail_x, int incx, const void *beta,
+		     void *y, int incy);
+void BLAS_zgbmv2_d_d(enum blas_order_type order, enum blas_trans_type trans,
+		     int m, int n, int kl, int ku, const void *alpha,
+		     const double *a, int lda, const double *head_x,
+		     const double *tail_x, int incx, const void *beta,
+		     void *y, int incy);
+void BLAS_sgbmv2_x(enum blas_order_type order, enum blas_trans_type trans,
+		   int m, int n, int kl, int ku, float alpha,
+		   const float *a, int lda, const float *head_x,
+		   const float *tail_x, int incx, float beta,
+		   float *y, int incy, enum blas_prec_type prec);
+void BLAS_dgbmv2_x(enum blas_order_type order, enum blas_trans_type trans,
+		   int m, int n, int kl, int ku, double alpha,
+		   const double *a, int lda, const double *head_x,
+		   const double *tail_x, int incx, double beta,
+		   double *y, int incy, enum blas_prec_type prec);
+void BLAS_cgbmv2_x(enum blas_order_type order, enum blas_trans_type trans,
+		   int m, int n, int kl, int ku, const void *alpha,
+		   const void *a, int lda, const void *head_x,
+		   const void *tail_x, int incx, const void *beta,
+		   void *y, int incy, enum blas_prec_type prec);
+void BLAS_zgbmv2_x(enum blas_order_type order, enum blas_trans_type trans,
+		   int m, int n, int kl, int ku, const void *alpha,
+		   const void *a, int lda, const void *head_x,
+		   const void *tail_x, int incx, const void *beta,
+		   void *y, int incy, enum blas_prec_type prec);
+void BLAS_dgbmv2_d_s_x(enum blas_order_type order, enum blas_trans_type trans,
+		       int m, int n, int kl, int ku, double alpha,
+		       const double *a, int lda, const float *head_x,
+		       const float *tail_x, int incx, double beta,
+		       double *y, int incy, enum blas_prec_type prec);
+void BLAS_dgbmv2_s_d_x(enum blas_order_type order, enum blas_trans_type trans,
+		       int m, int n, int kl, int ku, double alpha,
+		       const float *a, int lda, const double *head_x,
+		       const double *tail_x, int incx, double beta,
+		       double *y, int incy, enum blas_prec_type prec);
+void BLAS_dgbmv2_s_s_x(enum blas_order_type order, enum blas_trans_type trans,
+		       int m, int n, int kl, int ku, double alpha,
+		       const float *a, int lda, const float *head_x,
+		       const float *tail_x, int incx, double beta,
+		       double *y, int incy, enum blas_prec_type prec);
+void BLAS_zgbmv2_z_c_x(enum blas_order_type order, enum blas_trans_type trans,
+		       int m, int n, int kl, int ku, const void *alpha,
+		       const void *a, int lda, const void *head_x,
+		       const void *tail_x, int incx, const void *beta,
+		       void *y, int incy, enum blas_prec_type prec);
+void BLAS_zgbmv2_c_z_x(enum blas_order_type order, enum blas_trans_type trans,
+		       int m, int n, int kl, int ku, const void *alpha,
+		       const void *a, int lda, const void *head_x,
+		       const void *tail_x, int incx, const void *beta,
+		       void *y, int incy, enum blas_prec_type prec);
+void BLAS_zgbmv2_c_c_x(enum blas_order_type order, enum blas_trans_type trans,
+		       int m, int n, int kl, int ku, const void *alpha,
+		       const void *a, int lda, const void *head_x,
+		       const void *tail_x, int incx, const void *beta,
+		       void *y, int incy, enum blas_prec_type prec);
+void BLAS_cgbmv2_c_s_x(enum blas_order_type order, enum blas_trans_type trans,
+		       int m, int n, int kl, int ku, const void *alpha,
+		       const void *a, int lda, const float *head_x,
+		       const float *tail_x, int incx, const void *beta,
+		       void *y, int incy, enum blas_prec_type prec);
+void BLAS_cgbmv2_s_c_x(enum blas_order_type order, enum blas_trans_type trans,
+		       int m, int n, int kl, int ku, const void *alpha,
+		       const float *a, int lda, const void *head_x,
+		       const void *tail_x, int incx, const void *beta,
+		       void *y, int incy, enum blas_prec_type prec);
+void BLAS_cgbmv2_s_s_x(enum blas_order_type order, enum blas_trans_type trans,
+		       int m, int n, int kl, int ku, const void *alpha,
+		       const float *a, int lda, const float *head_x,
+		       const float *tail_x, int incx, const void *beta,
+		       void *y, int incy, enum blas_prec_type prec);
+void BLAS_zgbmv2_z_d_x(enum blas_order_type order, enum blas_trans_type trans,
+		       int m, int n, int kl, int ku, const void *alpha,
+		       const void *a, int lda, const double *head_x,
+		       const double *tail_x, int incx, const void *beta,
+		       void *y, int incy, enum blas_prec_type prec);
+void BLAS_zgbmv2_d_z_x(enum blas_order_type order, enum blas_trans_type trans,
+		       int m, int n, int kl, int ku, const void *alpha,
+		       const double *a, int lda, const void *head_x,
+		       const void *tail_x, int incx, const void *beta,
+		       void *y, int incy, enum blas_prec_type prec);
+void BLAS_zgbmv2_d_d_x(enum blas_order_type order, enum blas_trans_type trans,
+		       int m, int n, int kl, int ku, const void *alpha,
+		       const double *a, int lda, const double *head_x,
+		       const double *tail_x, int incx, const void *beta,
+		       void *y, int incy, enum blas_prec_type prec);
+
+
 
 int BLAS_fpinfo_x(enum blas_cmach_type cmach, enum blas_prec_type prec);
 void BLAS_error(const char *rname, int iflag, int ival, char *form, ...);
diff --git a/XBLAS/src/gbmv2/BLAS_cgbmv2_c_s-f2c.c b/XBLAS/src/gbmv2/BLAS_cgbmv2_c_s-f2c.c
new file mode 100644
index 00000000..5f4f728a
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_cgbmv2_c_s-f2c.c
@@ -0,0 +1,18 @@
+
+#include "f2c-bridge.h"
+#include "blas_enum.h"
+void BLAS_cgbmv2_c_s(enum blas_order_type order, enum blas_trans_type trans,
+		     int m, int n, int kl, int ku, const void *alpha,
+		     const void *a, int lda, const float *head_x,
+		     const float *tail_x, int incx, const void *beta,
+		     void *y, int incy);
+
+
+extern void FC_FUNC_(blas_cgbmv2_c_s, BLAS_CGBMV2_C_S)
+ 
+  (int *trans, int *m, int *n, int *kl, int *ku, const void *alpha,
+   const void *a, int *lda, const float *head_x, const float *tail_x,
+   int *incx, const void *beta, void *y, int *incy) {
+  BLAS_cgbmv2_c_s(blas_colmajor, (enum blas_trans_type) *trans, *m, *n, *kl,
+		  *ku, alpha, a, *lda, head_x, tail_x, *incx, beta, y, *incy);
+}
diff --git a/XBLAS/src/gbmv2/BLAS_cgbmv2_c_s.c b/XBLAS/src/gbmv2/BLAS_cgbmv2_c_s.c
new file mode 100644
index 00000000..3787bdba
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_cgbmv2_c_s.c
@@ -0,0 +1,303 @@
+#include "blas_extended.h"
+#include "blas_extended_private.h"
+void BLAS_cgbmv2_c_s(enum blas_order_type order, enum blas_trans_type trans,
+		     int m, int n, int kl, int ku, const void *alpha,
+		     const void *a, int lda, const float *head_x,
+		     const float *tail_x, int incx, const void *beta,
+		     void *y, int incy)
+
+/*           
+ * Purpose
+ * =======
+ *
+ * This routines computes the matrix product:
+ *
+ *     y  <-  alpha * op(A) * (x_head + x_tail) + beta * y
+ * 
+ * where 
+ *
+ *  A is a m x n banded matrix
+ *  x is a n x 1 vector
+ *  y is a m x 1 vector
+ *  alpha and beta are scalars 
+ *   
+ * Arguments
+ * =========
+ *
+ * order        (input) blas_order_type
+ *              Order of AB; row or column major
+ *
+ * trans        (input) blas_trans_type
+ *              Transpose of AB; no trans, 
+ *              trans, or conjugate trans
+ *
+ * m            (input) int
+ *              Dimension of AB
+ *
+ * n            (input) int
+ *              Dimension of AB and the length of vector x and z
+ *
+ * kl           (input) int 
+ *              Number of lower diagnols of AB
+ *
+ * ku           (input) int
+ *              Number of upper diagnols of AB
+ *
+ * alpha        (input) const void*
+ *              
+ * AB           (input) void*
+ *
+ * lda          (input) int 
+ *              Leading dimension of AB
+ *              lda >= ku + kl + 1
+ *
+ * head_x
+ * tail_x       (input) float*
+ * 
+ * incx         (input) int
+ *              The stride for vector x.
+ *
+ * beta         (input) const void*
+ *
+ * y            (input) const void*
+ *
+ * incy         (input) int
+ *              The stride for vector y.
+ * 
+ *
+ * LOCAL VARIABLES 
+ * ===============
+ * 
+ *  As an example, these variables are described on the mxn, column 
+ *  major, banded matrix described in section 2.2.3 of the specification  
+ *
+ *  astart      indexes first element in A where computation begins
+ *
+ *  incai1      indexes first element in row where row is less than lbound
+ * 
+ *  incai2      indexes first element in row where row exceeds lbound
+ *   
+ *  lbound      denotes the number of rows before  first element shifts 
+ *
+ *  rbound      denotes the columns where there is blank space
+ *   
+ *  ra          index of the rightmost element for a given row
+ *  
+ *  la          index of leftmost  elements for a given row
+ *
+ *  ra - la     width of a row
+ *
+ *                        rbound 
+ *            la   ra    ____|_____ 
+ *             |    |   |          |
+ *         |  a00  a01   *    *   *
+ * lbound -|  a10  a11  a12   *   *
+ *         |  a20  a21  a22  a23  *
+ *             *   a31  a32  a33 a34
+ *             *    *   a42  a43 a44
+ *
+ *  Varations on order and transpose have been implemented by modifying these
+ *  local variables. 
+ *
+ */
+{
+  static const char routine_name[] = "BLAS_cgbmv2_c_s";
+
+  int iy0, iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny;
+  int incaij, aij, incai1, incai2, astart, ai;
+  float *y_i = (float *) y;
+  const float *a_i = (float *) a;
+  const float *head_x_i = head_x;
+  const float *tail_x_i = tail_x;
+  float *alpha_i = (float *) alpha;
+  float *beta_i = (float *) beta;
+  float tmp1[2];
+  float tmp2[2];
+  float tmp3[2];
+  float tmp4[2];
+  float result[2];
+  float sum1[2];
+  float sum2[2];
+  float prod[2];
+  float a_elem[2];
+  float x_elem;
+  float y_elem[2];
+
+
+  if (order != blas_colmajor && order != blas_rowmajor)
+    BLAS_error(routine_name, -1, order, NULL);
+  if (trans != blas_no_trans &&
+      trans != blas_trans && trans != blas_conj_trans) {
+    BLAS_error(routine_name, -2, trans, NULL);
+  }
+  if (m < 0)
+    BLAS_error(routine_name, -3, m, NULL);
+  if (n < 0)
+    BLAS_error(routine_name, -4, n, NULL);
+  if (kl < 0 || kl >= m)
+    BLAS_error(routine_name, -5, kl, NULL);
+  if (ku < 0 || ku >= n)
+    BLAS_error(routine_name, -6, ku, NULL);
+  if (lda < kl + ku + 1)
+    BLAS_error(routine_name, -9, lda, NULL);
+  if (incx == 0)
+    BLAS_error(routine_name, -12, incx, NULL);
+  if (incy == 0)
+    BLAS_error(routine_name, -15, incy, NULL);
+
+  if (m == 0 || n == 0)
+    return;
+  if ((alpha_i[0] == 0.0 && alpha_i[1] == 0.0)
+      && ((beta_i[0] == 1.0 && beta_i[1] == 0.0)))
+    return;
+
+  if (trans == blas_no_trans) {
+    lenx = n;
+    leny = m;
+  } else {
+    lenx = m;
+    leny = n;
+  }
+
+  ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
+  iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;
+
+
+
+  /* if alpha = 0, return y = y*beta */
+  if ((order == blas_colmajor) && (trans == blas_no_trans)) {
+    astart = ku;
+    incai1 = 1;
+    incai2 = lda;
+    incaij = lda - 1;
+    lbound = kl;
+    rbound = n - ku - 1;
+    ra = ku;
+  } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
+    astart = ku;
+    incai1 = lda - 1;
+    incai2 = lda;
+    incaij = 1;
+    lbound = ku;
+    rbound = m - kl - 1;
+    ra = kl;
+  } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
+    astart = kl;
+    incai1 = lda - 1;
+    incai2 = lda;
+    incaij = 1;
+    lbound = kl;
+    rbound = n - ku - 1;
+    ra = ku;
+  } else {			/* rowmajor and blas_trans */
+    astart = kl;
+    incai1 = 1;
+    incai2 = lda;
+    incaij = lda - 1;
+    lbound = ku;
+    rbound = m - kl - 1;
+    ra = kl;
+  }
+
+  incy *= 2;
+  incaij *= 2;
+  incai1 *= 2;
+  incai2 *= 2;
+  astart *= 2;
+  iy0 *= 2;
+
+
+  la = 0;
+  ai = astart;
+  iy = iy0;
+  for (i = 0; i < leny; i++) {
+    sum1[0] = sum1[1] = 0.0;
+    sum2[0] = sum2[1] = 0.0;
+    aij = ai;
+    jx = ix0;
+    if (trans != blas_conj_trans) {
+      for (j = ra - la; j >= 0; j--) {
+	x_elem = head_x_i[jx];
+	a_elem[0] = a_i[aij];
+	a_elem[1] = a_i[aij + 1];
+	{
+	  prod[0] = a_elem[0] * x_elem;
+	  prod[1] = a_elem[1] * x_elem;
+	}
+	sum1[0] = sum1[0] + prod[0];
+	sum1[1] = sum1[1] + prod[1];
+	x_elem = tail_x_i[jx];
+	{
+	  prod[0] = a_elem[0] * x_elem;
+	  prod[1] = a_elem[1] * x_elem;
+	}
+	sum2[0] = sum2[0] + prod[0];
+	sum2[1] = sum2[1] + prod[1];
+	aij += incaij;
+	jx += incx;
+      }
+
+    } else {
+      for (j = ra - la; j >= 0; j--) {
+	x_elem = head_x_i[jx];
+	a_elem[0] = a_i[aij];
+	a_elem[1] = a_i[aij + 1];
+	a_elem[1] = -a_elem[1];
+	{
+	  prod[0] = a_elem[0] * x_elem;
+	  prod[1] = a_elem[1] * x_elem;
+	}
+	sum1[0] = sum1[0] + prod[0];
+	sum1[1] = sum1[1] + prod[1];
+	x_elem = tail_x_i[jx];
+	{
+	  prod[0] = a_elem[0] * x_elem;
+	  prod[1] = a_elem[1] * x_elem;
+	}
+	sum2[0] = sum2[0] + prod[0];
+	sum2[1] = sum2[1] + prod[1];
+	aij += incaij;
+	jx += incx;
+      }
+    }
+
+    {
+      tmp1[0] = sum1[0] * alpha_i[0] - sum1[1] * alpha_i[1];
+      tmp1[1] = sum1[0] * alpha_i[1] + sum1[1] * alpha_i[0];
+    }
+
+    {
+      tmp2[0] = sum2[0] * alpha_i[0] - sum2[1] * alpha_i[1];
+      tmp2[1] = sum2[0] * alpha_i[1] + sum2[1] * alpha_i[0];
+    }
+
+    tmp3[0] = tmp1[0] + tmp2[0];
+    tmp3[1] = tmp1[1] + tmp2[1];
+    y_elem[0] = y_i[iy];
+    y_elem[1] = y_i[iy + 1];
+    {
+      tmp4[0] = beta_i[0] * y_elem[0] - beta_i[1] * y_elem[1];
+      tmp4[1] = beta_i[0] * y_elem[1] + beta_i[1] * y_elem[0];
+    }
+
+    result[0] = tmp4[0] + tmp3[0];
+    result[1] = tmp4[1] + tmp3[1];
+    y_i[iy] = result[0];
+    y_i[iy + 1] = result[1];
+
+    iy += incy;
+    if (i >= lbound) {
+      ix0 += incx;
+      ai += incai2;
+      la++;
+    } else {
+      ai += incai1;
+    }
+    if (i < rbound) {
+      ra++;
+    }
+  }
+
+
+
+}				/* end BLAS_cgbmv2_c_s */
diff --git a/XBLAS/src/gbmv2/BLAS_cgbmv2_c_s_x-f2c.c b/XBLAS/src/gbmv2/BLAS_cgbmv2_c_s_x-f2c.c
new file mode 100644
index 00000000..9f95e1ff
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_cgbmv2_c_s_x-f2c.c
@@ -0,0 +1,19 @@
+
+#include "f2c-bridge.h"
+#include "blas_enum.h"
+void BLAS_cgbmv2_c_s_x(enum blas_order_type order, enum blas_trans_type trans,
+		       int m, int n, int kl, int ku, const void *alpha,
+		       const void *a, int lda, const float *head_x,
+		       const float *tail_x, int incx, const void *beta,
+		       void *y, int incy, enum blas_prec_type prec);
+
+
+extern void FC_FUNC_(blas_cgbmv2_c_s_x, BLAS_CGBMV2_C_S_X)
+ 
+  (int *trans, int *m, int *n, int *kl, int *ku, const void *alpha,
+   const void *a, int *lda, const float *head_x, const float *tail_x,
+   int *incx, const void *beta, void *y, int *incy, int *prec) {
+  BLAS_cgbmv2_c_s_x(blas_colmajor, (enum blas_trans_type) *trans, *m, *n, *kl,
+		    *ku, alpha, a, *lda, head_x, tail_x, *incx, beta, y,
+		    *incy, (enum blas_prec_type) *prec);
+}
diff --git a/XBLAS/src/gbmv2/BLAS_cgbmv2_c_s_x.c b/XBLAS/src/gbmv2/BLAS_cgbmv2_c_s_x.c
new file mode 100644
index 00000000..d4411041
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_cgbmv2_c_s_x.c
@@ -0,0 +1,1516 @@
+#include "blas_extended.h"
+#include "blas_extended_private.h"
+void BLAS_cgbmv2_c_s_x(enum blas_order_type order, enum blas_trans_type trans,
+		       int m, int n, int kl, int ku, const void *alpha,
+		       const void *a, int lda, const float *head_x,
+		       const float *tail_x, int incx, const void *beta,
+		       void *y, int incy, enum blas_prec_type prec)
+
+/*           
+ * Purpose
+ * =======
+ *
+ * This routines computes the matrix product:
+ *
+ *     y  <-  alpha * op(A) * (x_head + x_tail) + beta * y
+ * 
+ * where 
+ *
+ *  A is a m x n banded matrix
+ *  x is a n x 1 vector
+ *  y is a m x 1 vector
+ *  alpha and beta are scalars 
+ *   
+ * Arguments
+ * =========
+ *
+ * order        (input) blas_order_type
+ *              Order of AB; row or column major
+ *
+ * trans        (input) blas_trans_type
+ *              Transpose of AB; no trans, 
+ *              trans, or conjugate trans
+ *
+ * m            (input) int
+ *              Dimension of AB
+ *
+ * n            (input) int
+ *              Dimension of AB and the length of vector x and z
+ *
+ * kl           (input) int 
+ *              Number of lower diagnols of AB
+ *
+ * ku           (input) int
+ *              Number of upper diagnols of AB
+ *
+ * alpha        (input) const void*
+ *              
+ * AB           (input) void*
+ *
+ * lda          (input) int 
+ *              Leading dimension of AB
+ *              lda >= ku + kl + 1
+ *
+ * head_x
+ * tail_x       (input) float*
+ * 
+ * incx         (input) int
+ *              The stride for vector x.
+ *
+ * beta         (input) const void*
+ *
+ * y            (input) const void*
+ *
+ * incy         (input) int
+ *              The stride for vector y.
+ * 
+ * prec   (input) enum blas_prec_type
+ *        Specifies the internal precision to be used.
+ *        = blas_prec_single: single precision.
+ *        = blas_prec_double: double precision.
+ *        = blas_prec_extra : anything at least 1.5 times as accurate
+ *                            than double, and wider than 80-bits.
+ *                            We use double-double in our implementation.
+ *
+ *
+ * LOCAL VARIABLES 
+ * ===============
+ * 
+ *  As an example, these variables are described on the mxn, column 
+ *  major, banded matrix described in section 2.2.3 of the specification  
+ *
+ *  astart      indexes first element in A where computation begins
+ *
+ *  incai1      indexes first element in row where row is less than lbound
+ * 
+ *  incai2      indexes first element in row where row exceeds lbound
+ *   
+ *  lbound      denotes the number of rows before  first element shifts 
+ *
+ *  rbound      denotes the columns where there is blank space
+ *   
+ *  ra          index of the rightmost element for a given row
+ *  
+ *  la          index of leftmost  elements for a given row
+ *
+ *  ra - la     width of a row
+ *
+ *                        rbound 
+ *            la   ra    ____|_____ 
+ *             |    |   |          |
+ *         |  a00  a01   *    *   *
+ * lbound -|  a10  a11  a12   *   *
+ *         |  a20  a21  a22  a23  *
+ *             *   a31  a32  a33 a34
+ *             *    *   a42  a43 a44
+ *
+ *  Varations on order and transpose have been implemented by modifying these
+ *  local variables. 
+ *
+ */
+{
+  static const char routine_name[] = "BLAS_cgbmv2_c_s_x";
+
+  switch (prec) {
+  case blas_prec_single:{
+
+      int iy0, iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny;
+      int incaij, aij, incai1, incai2, astart, ai;
+      float *y_i = (float *) y;
+      const float *a_i = (float *) a;
+      const float *head_x_i = head_x;
+      const float *tail_x_i = tail_x;
+      float *alpha_i = (float *) alpha;
+      float *beta_i = (float *) beta;
+      float tmp1[2];
+      float tmp2[2];
+      float tmp3[2];
+      float tmp4[2];
+      float result[2];
+      float sum1[2];
+      float sum2[2];
+      float prod[2];
+      float a_elem[2];
+      float x_elem;
+      float y_elem[2];
+
+
+      if (order != blas_colmajor && order != blas_rowmajor)
+	BLAS_error(routine_name, -1, order, NULL);
+      if (trans != blas_no_trans &&
+	  trans != blas_trans && trans != blas_conj_trans) {
+	BLAS_error(routine_name, -2, trans, NULL);
+      }
+      if (m < 0)
+	BLAS_error(routine_name, -3, m, NULL);
+      if (n < 0)
+	BLAS_error(routine_name, -4, n, NULL);
+      if (kl < 0 || kl >= m)
+	BLAS_error(routine_name, -5, kl, NULL);
+      if (ku < 0 || ku >= n)
+	BLAS_error(routine_name, -6, ku, NULL);
+      if (lda < kl + ku + 1)
+	BLAS_error(routine_name, -9, lda, NULL);
+      if (incx == 0)
+	BLAS_error(routine_name, -12, incx, NULL);
+      if (incy == 0)
+	BLAS_error(routine_name, -15, incy, NULL);
+
+      if (m == 0 || n == 0)
+	return;
+      if ((alpha_i[0] == 0.0 && alpha_i[1] == 0.0)
+	  && ((beta_i[0] == 1.0 && beta_i[1] == 0.0)))
+	return;
+
+      if (trans == blas_no_trans) {
+	lenx = n;
+	leny = m;
+      } else {
+	lenx = m;
+	leny = n;
+      }
+
+      ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
+      iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;
+
+
+
+      /* if alpha = 0, return y = y*beta */
+      if ((order == blas_colmajor) && (trans == blas_no_trans)) {
+	astart = ku;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
+	astart = ku;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
+	astart = kl;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else {			/* rowmajor and blas_trans */
+	astart = kl;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      }
+
+      incy *= 2;
+      incaij *= 2;
+      incai1 *= 2;
+      incai2 *= 2;
+      astart *= 2;
+      iy0 *= 2;
+
+
+      la = 0;
+      ai = astart;
+      iy = iy0;
+      for (i = 0; i < leny; i++) {
+	sum1[0] = sum1[1] = 0.0;
+	sum2[0] = sum2[1] = 0.0;
+	aij = ai;
+	jx = ix0;
+	if (trans != blas_conj_trans) {
+	  for (j = ra - la; j >= 0; j--) {
+	    x_elem = head_x_i[jx];
+	    a_elem[0] = a_i[aij];
+	    a_elem[1] = a_i[aij + 1];
+	    {
+	      prod[0] = a_elem[0] * x_elem;
+	      prod[1] = a_elem[1] * x_elem;
+	    }
+	    sum1[0] = sum1[0] + prod[0];
+	    sum1[1] = sum1[1] + prod[1];
+	    x_elem = tail_x_i[jx];
+	    {
+	      prod[0] = a_elem[0] * x_elem;
+	      prod[1] = a_elem[1] * x_elem;
+	    }
+	    sum2[0] = sum2[0] + prod[0];
+	    sum2[1] = sum2[1] + prod[1];
+	    aij += incaij;
+	    jx += incx;
+	  }
+
+	} else {
+	  for (j = ra - la; j >= 0; j--) {
+	    x_elem = head_x_i[jx];
+	    a_elem[0] = a_i[aij];
+	    a_elem[1] = a_i[aij + 1];
+	    a_elem[1] = -a_elem[1];
+	    {
+	      prod[0] = a_elem[0] * x_elem;
+	      prod[1] = a_elem[1] * x_elem;
+	    }
+	    sum1[0] = sum1[0] + prod[0];
+	    sum1[1] = sum1[1] + prod[1];
+	    x_elem = tail_x_i[jx];
+	    {
+	      prod[0] = a_elem[0] * x_elem;
+	      prod[1] = a_elem[1] * x_elem;
+	    }
+	    sum2[0] = sum2[0] + prod[0];
+	    sum2[1] = sum2[1] + prod[1];
+	    aij += incaij;
+	    jx += incx;
+	  }
+	}
+
+	{
+	  tmp1[0] = sum1[0] * alpha_i[0] - sum1[1] * alpha_i[1];
+	  tmp1[1] = sum1[0] * alpha_i[1] + sum1[1] * alpha_i[0];
+	}
+
+	{
+	  tmp2[0] = sum2[0] * alpha_i[0] - sum2[1] * alpha_i[1];
+	  tmp2[1] = sum2[0] * alpha_i[1] + sum2[1] * alpha_i[0];
+	}
+
+	tmp3[0] = tmp1[0] + tmp2[0];
+	tmp3[1] = tmp1[1] + tmp2[1];
+	y_elem[0] = y_i[iy];
+	y_elem[1] = y_i[iy + 1];
+	{
+	  tmp4[0] = beta_i[0] * y_elem[0] - beta_i[1] * y_elem[1];
+	  tmp4[1] = beta_i[0] * y_elem[1] + beta_i[1] * y_elem[0];
+	}
+
+	result[0] = tmp4[0] + tmp3[0];
+	result[1] = tmp4[1] + tmp3[1];
+	y_i[iy] = result[0];
+	y_i[iy + 1] = result[1];
+
+	iy += incy;
+	if (i >= lbound) {
+	  ix0 += incx;
+	  ai += incai2;
+	  la++;
+	} else {
+	  ai += incai1;
+	}
+	if (i < rbound) {
+	  ra++;
+	}
+      }
+
+
+
+      break;
+    }
+  case blas_prec_double:
+  case blas_prec_indigenous:
+    {
+      int iy0, iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny;
+      int incaij, aij, incai1, incai2, astart, ai;
+      float *y_i = (float *) y;
+      const float *a_i = (float *) a;
+      const float *head_x_i = head_x;
+      const float *tail_x_i = tail_x;
+      float *alpha_i = (float *) alpha;
+      float *beta_i = (float *) beta;
+      double tmp1[2];
+      double tmp2[2];
+      double tmp3[2];
+      double tmp4[2];
+      float result[2];
+      double sum1[2];
+      double sum2[2];
+      double prod[2];
+      float a_elem[2];
+      float x_elem;
+      float y_elem[2];
+
+
+      if (order != blas_colmajor && order != blas_rowmajor)
+	BLAS_error(routine_name, -1, order, NULL);
+      if (trans != blas_no_trans &&
+	  trans != blas_trans && trans != blas_conj_trans) {
+	BLAS_error(routine_name, -2, trans, NULL);
+      }
+      if (m < 0)
+	BLAS_error(routine_name, -3, m, NULL);
+      if (n < 0)
+	BLAS_error(routine_name, -4, n, NULL);
+      if (kl < 0 || kl >= m)
+	BLAS_error(routine_name, -5, kl, NULL);
+      if (ku < 0 || ku >= n)
+	BLAS_error(routine_name, -6, ku, NULL);
+      if (lda < kl + ku + 1)
+	BLAS_error(routine_name, -9, lda, NULL);
+      if (incx == 0)
+	BLAS_error(routine_name, -12, incx, NULL);
+      if (incy == 0)
+	BLAS_error(routine_name, -15, incy, NULL);
+
+      if (m == 0 || n == 0)
+	return;
+      if ((alpha_i[0] == 0.0 && alpha_i[1] == 0.0)
+	  && ((beta_i[0] == 1.0 && beta_i[1] == 0.0)))
+	return;
+
+      if (trans == blas_no_trans) {
+	lenx = n;
+	leny = m;
+      } else {
+	lenx = m;
+	leny = n;
+      }
+
+      ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
+      iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;
+
+
+
+      /* if alpha = 0, return y = y*beta */
+      if ((order == blas_colmajor) && (trans == blas_no_trans)) {
+	astart = ku;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
+	astart = ku;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
+	astart = kl;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else {			/* rowmajor and blas_trans */
+	astart = kl;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      }
+
+      incy *= 2;
+      incaij *= 2;
+      incai1 *= 2;
+      incai2 *= 2;
+      astart *= 2;
+      iy0 *= 2;
+
+
+      la = 0;
+      ai = astart;
+      iy = iy0;
+      for (i = 0; i < leny; i++) {
+	sum1[0] = sum1[1] = 0.0;
+	sum2[0] = sum2[1] = 0.0;
+	aij = ai;
+	jx = ix0;
+	if (trans != blas_conj_trans) {
+	  for (j = ra - la; j >= 0; j--) {
+	    x_elem = head_x_i[jx];
+	    a_elem[0] = a_i[aij];
+	    a_elem[1] = a_i[aij + 1];
+	    {
+	      prod[0] = (double) a_elem[0] * x_elem;
+	      prod[1] = (double) a_elem[1] * x_elem;
+	    }
+	    sum1[0] = sum1[0] + prod[0];
+	    sum1[1] = sum1[1] + prod[1];
+	    x_elem = tail_x_i[jx];
+	    {
+	      prod[0] = (double) a_elem[0] * x_elem;
+	      prod[1] = (double) a_elem[1] * x_elem;
+	    }
+	    sum2[0] = sum2[0] + prod[0];
+	    sum2[1] = sum2[1] + prod[1];
+	    aij += incaij;
+	    jx += incx;
+	  }
+
+	} else {
+	  for (j = ra - la; j >= 0; j--) {
+	    x_elem = head_x_i[jx];
+	    a_elem[0] = a_i[aij];
+	    a_elem[1] = a_i[aij + 1];
+	    a_elem[1] = -a_elem[1];
+	    {
+	      prod[0] = (double) a_elem[0] * x_elem;
+	      prod[1] = (double) a_elem[1] * x_elem;
+	    }
+	    sum1[0] = sum1[0] + prod[0];
+	    sum1[1] = sum1[1] + prod[1];
+	    x_elem = tail_x_i[jx];
+	    {
+	      prod[0] = (double) a_elem[0] * x_elem;
+	      prod[1] = (double) a_elem[1] * x_elem;
+	    }
+	    sum2[0] = sum2[0] + prod[0];
+	    sum2[1] = sum2[1] + prod[1];
+	    aij += incaij;
+	    jx += incx;
+	  }
+	}
+
+	{
+	  tmp1[0] =
+	    (double) sum1[0] * alpha_i[0] - (double) sum1[1] * alpha_i[1];
+	  tmp1[1] =
+	    (double) sum1[0] * alpha_i[1] + (double) sum1[1] * alpha_i[0];
+	}
+	{
+	  tmp2[0] =
+	    (double) sum2[0] * alpha_i[0] - (double) sum2[1] * alpha_i[1];
+	  tmp2[1] =
+	    (double) sum2[0] * alpha_i[1] + (double) sum2[1] * alpha_i[0];
+	}
+	tmp3[0] = tmp1[0] + tmp2[0];
+	tmp3[1] = tmp1[1] + tmp2[1];
+	y_elem[0] = y_i[iy];
+	y_elem[1] = y_i[iy + 1];
+	{
+	  tmp4[0] =
+	    (double) beta_i[0] * y_elem[0] - (double) beta_i[1] * y_elem[1];
+	  tmp4[1] =
+	    (double) beta_i[0] * y_elem[1] + (double) beta_i[1] * y_elem[0];
+	}
+	result[0] = tmp4[0] + tmp3[0];
+	result[1] = tmp4[1] + tmp3[1];
+	y_i[iy] = result[0];
+	y_i[iy + 1] = result[1];
+
+	iy += incy;
+	if (i >= lbound) {
+	  ix0 += incx;
+	  ai += incai2;
+	  la++;
+	} else {
+	  ai += incai1;
+	}
+	if (i < rbound) {
+	  ra++;
+	}
+      }
+
+
+    }
+    break;
+  case blas_prec_extra:
+    {
+      int iy0, iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny;
+      int incaij, aij, incai1, incai2, astart, ai;
+      float *y_i = (float *) y;
+      const float *a_i = (float *) a;
+      const float *head_x_i = head_x;
+      const float *tail_x_i = tail_x;
+      float *alpha_i = (float *) alpha;
+      float *beta_i = (float *) beta;
+      double head_tmp1[2], tail_tmp1[2];
+      double head_tmp2[2], tail_tmp2[2];
+      double head_tmp3[2], tail_tmp3[2];
+      double head_tmp4[2], tail_tmp4[2];
+      float result[2];
+      double head_sum1[2], tail_sum1[2];
+      double head_sum2[2], tail_sum2[2];
+      double head_prod[2], tail_prod[2];
+      float a_elem[2];
+      float x_elem;
+      float y_elem[2];
+      FPU_FIX_DECL;
+
+      if (order != blas_colmajor && order != blas_rowmajor)
+	BLAS_error(routine_name, -1, order, NULL);
+      if (trans != blas_no_trans &&
+	  trans != blas_trans && trans != blas_conj_trans) {
+	BLAS_error(routine_name, -2, trans, NULL);
+      }
+      if (m < 0)
+	BLAS_error(routine_name, -3, m, NULL);
+      if (n < 0)
+	BLAS_error(routine_name, -4, n, NULL);
+      if (kl < 0 || kl >= m)
+	BLAS_error(routine_name, -5, kl, NULL);
+      if (ku < 0 || ku >= n)
+	BLAS_error(routine_name, -6, ku, NULL);
+      if (lda < kl + ku + 1)
+	BLAS_error(routine_name, -9, lda, NULL);
+      if (incx == 0)
+	BLAS_error(routine_name, -12, incx, NULL);
+      if (incy == 0)
+	BLAS_error(routine_name, -15, incy, NULL);
+
+      if (m == 0 || n == 0)
+	return;
+      if ((alpha_i[0] == 0.0 && alpha_i[1] == 0.0)
+	  && ((beta_i[0] == 1.0 && beta_i[1] == 0.0)))
+	return;
+
+      if (trans == blas_no_trans) {
+	lenx = n;
+	leny = m;
+      } else {
+	lenx = m;
+	leny = n;
+      }
+
+      ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
+      iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;
+
+      FPU_FIX_START;
+
+      /* if alpha = 0, return y = y*beta */
+      if ((order == blas_colmajor) && (trans == blas_no_trans)) {
+	astart = ku;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
+	astart = ku;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
+	astart = kl;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else {			/* rowmajor and blas_trans */
+	astart = kl;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      }
+
+      incy *= 2;
+      incaij *= 2;
+      incai1 *= 2;
+      incai2 *= 2;
+      astart *= 2;
+      iy0 *= 2;
+
+
+      la = 0;
+      ai = astart;
+      iy = iy0;
+      for (i = 0; i < leny; i++) {
+	head_sum1[0] = head_sum1[1] = tail_sum1[0] = tail_sum1[1] = 0.0;
+	head_sum2[0] = head_sum2[1] = tail_sum2[0] = tail_sum2[1] = 0.0;
+	aij = ai;
+	jx = ix0;
+	if (trans != blas_conj_trans) {
+	  for (j = ra - la; j >= 0; j--) {
+	    x_elem = head_x_i[jx];
+	    a_elem[0] = a_i[aij];
+	    a_elem[1] = a_i[aij + 1];
+	    {
+	      head_prod[0] = (double) a_elem[0] * x_elem;
+	      tail_prod[0] = 0.0;
+	      head_prod[1] = (double) a_elem[1] * x_elem;
+	      tail_prod[1] = 0.0;
+	    }
+	    {
+	      double head_t, tail_t;
+	      double head_a, tail_a;
+	      double head_b, tail_b;
+	      /* Real part */
+	      head_a = head_sum1[0];
+	      tail_a = tail_sum1[0];
+	      head_b = head_prod[0];
+	      tail_b = tail_prod[0];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum1[0] = head_t;
+	      tail_sum1[0] = tail_t;
+	      /* Imaginary part */
+	      head_a = head_sum1[1];
+	      tail_a = tail_sum1[1];
+	      head_b = head_prod[1];
+	      tail_b = tail_prod[1];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum1[1] = head_t;
+	      tail_sum1[1] = tail_t;
+	    }
+	    x_elem = tail_x_i[jx];
+	    {
+	      head_prod[0] = (double) a_elem[0] * x_elem;
+	      tail_prod[0] = 0.0;
+	      head_prod[1] = (double) a_elem[1] * x_elem;
+	      tail_prod[1] = 0.0;
+	    }
+	    {
+	      double head_t, tail_t;
+	      double head_a, tail_a;
+	      double head_b, tail_b;
+	      /* Real part */
+	      head_a = head_sum2[0];
+	      tail_a = tail_sum2[0];
+	      head_b = head_prod[0];
+	      tail_b = tail_prod[0];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum2[0] = head_t;
+	      tail_sum2[0] = tail_t;
+	      /* Imaginary part */
+	      head_a = head_sum2[1];
+	      tail_a = tail_sum2[1];
+	      head_b = head_prod[1];
+	      tail_b = tail_prod[1];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum2[1] = head_t;
+	      tail_sum2[1] = tail_t;
+	    }
+	    aij += incaij;
+	    jx += incx;
+	  }
+
+	} else {
+	  for (j = ra - la; j >= 0; j--) {
+	    x_elem = head_x_i[jx];
+	    a_elem[0] = a_i[aij];
+	    a_elem[1] = a_i[aij + 1];
+	    a_elem[1] = -a_elem[1];
+	    {
+	      head_prod[0] = (double) a_elem[0] * x_elem;
+	      tail_prod[0] = 0.0;
+	      head_prod[1] = (double) a_elem[1] * x_elem;
+	      tail_prod[1] = 0.0;
+	    }
+	    {
+	      double head_t, tail_t;
+	      double head_a, tail_a;
+	      double head_b, tail_b;
+	      /* Real part */
+	      head_a = head_sum1[0];
+	      tail_a = tail_sum1[0];
+	      head_b = head_prod[0];
+	      tail_b = tail_prod[0];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum1[0] = head_t;
+	      tail_sum1[0] = tail_t;
+	      /* Imaginary part */
+	      head_a = head_sum1[1];
+	      tail_a = tail_sum1[1];
+	      head_b = head_prod[1];
+	      tail_b = tail_prod[1];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum1[1] = head_t;
+	      tail_sum1[1] = tail_t;
+	    }
+	    x_elem = tail_x_i[jx];
+	    {
+	      head_prod[0] = (double) a_elem[0] * x_elem;
+	      tail_prod[0] = 0.0;
+	      head_prod[1] = (double) a_elem[1] * x_elem;
+	      tail_prod[1] = 0.0;
+	    }
+	    {
+	      double head_t, tail_t;
+	      double head_a, tail_a;
+	      double head_b, tail_b;
+	      /* Real part */
+	      head_a = head_sum2[0];
+	      tail_a = tail_sum2[0];
+	      head_b = head_prod[0];
+	      tail_b = tail_prod[0];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum2[0] = head_t;
+	      tail_sum2[0] = tail_t;
+	      /* Imaginary part */
+	      head_a = head_sum2[1];
+	      tail_a = tail_sum2[1];
+	      head_b = head_prod[1];
+	      tail_b = tail_prod[1];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum2[1] = head_t;
+	      tail_sum2[1] = tail_t;
+	    }
+	    aij += incaij;
+	    jx += incx;
+	  }
+	}
+
+	{
+	  double cd[2];
+	  cd[0] = (double) alpha_i[0];
+	  cd[1] = (double) alpha_i[1];
+	  {
+	    /* Compute complex-extra = complex-extra * complex-double. */
+	    double head_a0, tail_a0;
+	    double head_a1, tail_a1;
+	    double head_t1, tail_t1;
+	    double head_t2, tail_t2;
+	    head_a0 = head_sum1[0];
+	    tail_a0 = tail_sum1[0];
+	    head_a1 = head_sum1[1];
+	    tail_a1 = tail_sum1[1];
+	    /* real part */
+	    {
+	      /* Compute double-double = double-double * double. */
+	      double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	      con = head_a0 * split;
+	      a11 = con - head_a0;
+	      a11 = con - a11;
+	      a21 = head_a0 - a11;
+	      con = cd[0] * split;
+	      b1 = con - cd[0];
+	      b1 = con - b1;
+	      b2 = cd[0] - b1;
+
+	      c11 = head_a0 * cd[0];
+	      c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	      c2 = tail_a0 * cd[0];
+	      t1 = c11 + c2;
+	      t2 = (c2 - (t1 - c11)) + c21;
+
+	      head_t1 = t1 + t2;
+	      tail_t1 = t2 - (head_t1 - t1);
+	    }
+	    {
+	      /* Compute double-double = double-double * double. */
+	      double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	      con = head_a1 * split;
+	      a11 = con - head_a1;
+	      a11 = con - a11;
+	      a21 = head_a1 - a11;
+	      con = cd[1] * split;
+	      b1 = con - cd[1];
+	      b1 = con - b1;
+	      b2 = cd[1] - b1;
+
+	      c11 = head_a1 * cd[1];
+	      c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	      c2 = tail_a1 * cd[1];
+	      t1 = c11 + c2;
+	      t2 = (c2 - (t1 - c11)) + c21;
+
+	      head_t2 = t1 + t2;
+	      tail_t2 = t2 - (head_t2 - t1);
+	    }
+	    head_t2 = -head_t2;
+	    tail_t2 = -tail_t2;
+	    {
+	      /* Compute double-double = double-double + double-double. */
+	      double bv;
+	      double s1, s2, t1, t2;
+
+	      /* Add two hi words. */
+	      s1 = head_t1 + head_t2;
+	      bv = s1 - head_t1;
+	      s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+	      /* Add two lo words. */
+	      t1 = tail_t1 + tail_t2;
+	      bv = t1 - tail_t1;
+	      t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+	      s2 += t1;
+
+	      /* Renormalize (s1, s2)  to  (t1, s2) */
+	      t1 = s1 + s2;
+	      s2 = s2 - (t1 - s1);
+
+	      t2 += s2;
+
+	      /* Renormalize (t1, t2)  */
+	      head_t1 = t1 + t2;
+	      tail_t1 = t2 - (head_t1 - t1);
+	    }
+	    head_tmp1[0] = head_t1;
+	    tail_tmp1[0] = tail_t1;
+	    /* imaginary part */
+	    {
+	      /* Compute double-double = double-double * double. */
+	      double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	      con = head_a1 * split;
+	      a11 = con - head_a1;
+	      a11 = con - a11;
+	      a21 = head_a1 - a11;
+	      con = cd[0] * split;
+	      b1 = con - cd[0];
+	      b1 = con - b1;
+	      b2 = cd[0] - b1;
+
+	      c11 = head_a1 * cd[0];
+	      c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	      c2 = tail_a1 * cd[0];
+	      t1 = c11 + c2;
+	      t2 = (c2 - (t1 - c11)) + c21;
+
+	      head_t1 = t1 + t2;
+	      tail_t1 = t2 - (head_t1 - t1);
+	    }
+	    {
+	      /* Compute double-double = double-double * double. */
+	      double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	      con = head_a0 * split;
+	      a11 = con - head_a0;
+	      a11 = con - a11;
+	      a21 = head_a0 - a11;
+	      con = cd[1] * split;
+	      b1 = con - cd[1];
+	      b1 = con - b1;
+	      b2 = cd[1] - b1;
+
+	      c11 = head_a0 * cd[1];
+	      c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	      c2 = tail_a0 * cd[1];
+	      t1 = c11 + c2;
+	      t2 = (c2 - (t1 - c11)) + c21;
+
+	      head_t2 = t1 + t2;
+	      tail_t2 = t2 - (head_t2 - t1);
+	    }
+	    {
+	      /* Compute double-double = double-double + double-double. */
+	      double bv;
+	      double s1, s2, t1, t2;
+
+	      /* Add two hi words. */
+	      s1 = head_t1 + head_t2;
+	      bv = s1 - head_t1;
+	      s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+	      /* Add two lo words. */
+	      t1 = tail_t1 + tail_t2;
+	      bv = t1 - tail_t1;
+	      t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+	      s2 += t1;
+
+	      /* Renormalize (s1, s2)  to  (t1, s2) */
+	      t1 = s1 + s2;
+	      s2 = s2 - (t1 - s1);
+
+	      t2 += s2;
+
+	      /* Renormalize (t1, t2)  */
+	      head_t1 = t1 + t2;
+	      tail_t1 = t2 - (head_t1 - t1);
+	    }
+	    head_tmp1[1] = head_t1;
+	    tail_tmp1[1] = tail_t1;
+	  }
+
+	}
+	{
+	  double cd[2];
+	  cd[0] = (double) alpha_i[0];
+	  cd[1] = (double) alpha_i[1];
+	  {
+	    /* Compute complex-extra = complex-extra * complex-double. */
+	    double head_a0, tail_a0;
+	    double head_a1, tail_a1;
+	    double head_t1, tail_t1;
+	    double head_t2, tail_t2;
+	    head_a0 = head_sum2[0];
+	    tail_a0 = tail_sum2[0];
+	    head_a1 = head_sum2[1];
+	    tail_a1 = tail_sum2[1];
+	    /* real part */
+	    {
+	      /* Compute double-double = double-double * double. */
+	      double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	      con = head_a0 * split;
+	      a11 = con - head_a0;
+	      a11 = con - a11;
+	      a21 = head_a0 - a11;
+	      con = cd[0] * split;
+	      b1 = con - cd[0];
+	      b1 = con - b1;
+	      b2 = cd[0] - b1;
+
+	      c11 = head_a0 * cd[0];
+	      c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	      c2 = tail_a0 * cd[0];
+	      t1 = c11 + c2;
+	      t2 = (c2 - (t1 - c11)) + c21;
+
+	      head_t1 = t1 + t2;
+	      tail_t1 = t2 - (head_t1 - t1);
+	    }
+	    {
+	      /* Compute double-double = double-double * double. */
+	      double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	      con = head_a1 * split;
+	      a11 = con - head_a1;
+	      a11 = con - a11;
+	      a21 = head_a1 - a11;
+	      con = cd[1] * split;
+	      b1 = con - cd[1];
+	      b1 = con - b1;
+	      b2 = cd[1] - b1;
+
+	      c11 = head_a1 * cd[1];
+	      c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	      c2 = tail_a1 * cd[1];
+	      t1 = c11 + c2;
+	      t2 = (c2 - (t1 - c11)) + c21;
+
+	      head_t2 = t1 + t2;
+	      tail_t2 = t2 - (head_t2 - t1);
+	    }
+	    head_t2 = -head_t2;
+	    tail_t2 = -tail_t2;
+	    {
+	      /* Compute double-double = double-double + double-double. */
+	      double bv;
+	      double s1, s2, t1, t2;
+
+	      /* Add two hi words. */
+	      s1 = head_t1 + head_t2;
+	      bv = s1 - head_t1;
+	      s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+	      /* Add two lo words. */
+	      t1 = tail_t1 + tail_t2;
+	      bv = t1 - tail_t1;
+	      t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+	      s2 += t1;
+
+	      /* Renormalize (s1, s2)  to  (t1, s2) */
+	      t1 = s1 + s2;
+	      s2 = s2 - (t1 - s1);
+
+	      t2 += s2;
+
+	      /* Renormalize (t1, t2)  */
+	      head_t1 = t1 + t2;
+	      tail_t1 = t2 - (head_t1 - t1);
+	    }
+	    head_tmp2[0] = head_t1;
+	    tail_tmp2[0] = tail_t1;
+	    /* imaginary part */
+	    {
+	      /* Compute double-double = double-double * double. */
+	      double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	      con = head_a1 * split;
+	      a11 = con - head_a1;
+	      a11 = con - a11;
+	      a21 = head_a1 - a11;
+	      con = cd[0] * split;
+	      b1 = con - cd[0];
+	      b1 = con - b1;
+	      b2 = cd[0] - b1;
+
+	      c11 = head_a1 * cd[0];
+	      c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	      c2 = tail_a1 * cd[0];
+	      t1 = c11 + c2;
+	      t2 = (c2 - (t1 - c11)) + c21;
+
+	      head_t1 = t1 + t2;
+	      tail_t1 = t2 - (head_t1 - t1);
+	    }
+	    {
+	      /* Compute double-double = double-double * double. */
+	      double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	      con = head_a0 * split;
+	      a11 = con - head_a0;
+	      a11 = con - a11;
+	      a21 = head_a0 - a11;
+	      con = cd[1] * split;
+	      b1 = con - cd[1];
+	      b1 = con - b1;
+	      b2 = cd[1] - b1;
+
+	      c11 = head_a0 * cd[1];
+	      c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	      c2 = tail_a0 * cd[1];
+	      t1 = c11 + c2;
+	      t2 = (c2 - (t1 - c11)) + c21;
+
+	      head_t2 = t1 + t2;
+	      tail_t2 = t2 - (head_t2 - t1);
+	    }
+	    {
+	      /* Compute double-double = double-double + double-double. */
+	      double bv;
+	      double s1, s2, t1, t2;
+
+	      /* Add two hi words. */
+	      s1 = head_t1 + head_t2;
+	      bv = s1 - head_t1;
+	      s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+	      /* Add two lo words. */
+	      t1 = tail_t1 + tail_t2;
+	      bv = t1 - tail_t1;
+	      t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+	      s2 += t1;
+
+	      /* Renormalize (s1, s2)  to  (t1, s2) */
+	      t1 = s1 + s2;
+	      s2 = s2 - (t1 - s1);
+
+	      t2 += s2;
+
+	      /* Renormalize (t1, t2)  */
+	      head_t1 = t1 + t2;
+	      tail_t1 = t2 - (head_t1 - t1);
+	    }
+	    head_tmp2[1] = head_t1;
+	    tail_tmp2[1] = tail_t1;
+	  }
+
+	}
+	{
+	  double head_t, tail_t;
+	  double head_a, tail_a;
+	  double head_b, tail_b;
+	  /* Real part */
+	  head_a = head_tmp1[0];
+	  tail_a = tail_tmp1[0];
+	  head_b = head_tmp2[0];
+	  tail_b = tail_tmp2[0];
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_a + head_b;
+	    bv = s1 - head_a;
+	    s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_a + tail_b;
+	    bv = t1 - tail_a;
+	    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t = t1 + t2;
+	    tail_t = t2 - (head_t - t1);
+	  }
+	  head_tmp3[0] = head_t;
+	  tail_tmp3[0] = tail_t;
+	  /* Imaginary part */
+	  head_a = head_tmp1[1];
+	  tail_a = tail_tmp1[1];
+	  head_b = head_tmp2[1];
+	  tail_b = tail_tmp2[1];
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_a + head_b;
+	    bv = s1 - head_a;
+	    s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_a + tail_b;
+	    bv = t1 - tail_a;
+	    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t = t1 + t2;
+	    tail_t = t2 - (head_t - t1);
+	  }
+	  head_tmp3[1] = head_t;
+	  tail_tmp3[1] = tail_t;
+	}
+	y_elem[0] = y_i[iy];
+	y_elem[1] = y_i[iy + 1];
+	{
+	  double head_e1, tail_e1;
+	  double d1;
+	  double d2;
+	  /* Real part */
+	  d1 = (double) beta_i[0] * y_elem[0];
+	  d2 = (double) -beta_i[1] * y_elem[1];
+	  {
+	    /* Compute double-double = double + double. */
+	    double e, t1, t2;
+
+	    /* Knuth trick. */
+	    t1 = d1 + d2;
+	    e = t1 - d1;
+	    t2 = ((d2 - e) + (d1 - (t1 - e)));
+
+	    /* The result is t1 + t2, after normalization. */
+	    head_e1 = t1 + t2;
+	    tail_e1 = t2 - (head_e1 - t1);
+	  }
+	  head_tmp4[0] = head_e1;
+	  tail_tmp4[0] = tail_e1;
+	  /* imaginary part */
+	  d1 = (double) beta_i[0] * y_elem[1];
+	  d2 = (double) beta_i[1] * y_elem[0];
+	  {
+	    /* Compute double-double = double + double. */
+	    double e, t1, t2;
+
+	    /* Knuth trick. */
+	    t1 = d1 + d2;
+	    e = t1 - d1;
+	    t2 = ((d2 - e) + (d1 - (t1 - e)));
+
+	    /* The result is t1 + t2, after normalization. */
+	    head_e1 = t1 + t2;
+	    tail_e1 = t2 - (head_e1 - t1);
+	  }
+	  head_tmp4[1] = head_e1;
+	  tail_tmp4[1] = tail_e1;
+	}
+	{
+	  double head_a, tail_a;
+	  double head_b, tail_b;
+	  /* Real part */
+	  head_a = head_tmp4[0];
+	  tail_a = tail_tmp4[0];
+	  head_b = head_tmp3[0];
+	  tail_b = tail_tmp3[0];
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_a + head_b;
+	    bv = s1 - head_a;
+	    s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_a + tail_b;
+	    bv = t1 - tail_a;
+	    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    result[0] = t1 + t2;
+	  }
+	  /* Imaginary part */
+	  head_a = head_tmp4[1];
+	  tail_a = tail_tmp4[1];
+	  head_b = head_tmp3[1];
+	  tail_b = tail_tmp3[1];
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_a + head_b;
+	    bv = s1 - head_a;
+	    s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_a + tail_b;
+	    bv = t1 - tail_a;
+	    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    result[1] = t1 + t2;
+	  }
+	}
+	y_i[iy] = result[0];
+	y_i[iy + 1] = result[1];
+
+	iy += incy;
+	if (i >= lbound) {
+	  ix0 += incx;
+	  ai += incai2;
+	  la++;
+	} else {
+	  ai += incai1;
+	}
+	if (i < rbound) {
+	  ra++;
+	}
+      }
+
+      FPU_FIX_STOP;
+    }
+    break;
+  }
+}				/* end BLAS_cgbmv2_c_s_x */
diff --git a/XBLAS/src/gbmv2/BLAS_cgbmv2_s_c-f2c.c b/XBLAS/src/gbmv2/BLAS_cgbmv2_s_c-f2c.c
new file mode 100644
index 00000000..45b6160a
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_cgbmv2_s_c-f2c.c
@@ -0,0 +1,18 @@
+
+#include "f2c-bridge.h"
+#include "blas_enum.h"
+void BLAS_cgbmv2_s_c(enum blas_order_type order, enum blas_trans_type trans,
+		     int m, int n, int kl, int ku, const void *alpha,
+		     const float *a, int lda, const void *head_x,
+		     const void *tail_x, int incx, const void *beta,
+		     void *y, int incy);
+
+
+extern void FC_FUNC_(blas_cgbmv2_s_c, BLAS_CGBMV2_S_C)
+ 
+  (int *trans, int *m, int *n, int *kl, int *ku, const void *alpha,
+   const float *a, int *lda, const void *head_x, const void *tail_x,
+   int *incx, const void *beta, void *y, int *incy) {
+  BLAS_cgbmv2_s_c(blas_colmajor, (enum blas_trans_type) *trans, *m, *n, *kl,
+		  *ku, alpha, a, *lda, head_x, tail_x, *incx, beta, y, *incy);
+}
diff --git a/XBLAS/src/gbmv2/BLAS_cgbmv2_s_c.c b/XBLAS/src/gbmv2/BLAS_cgbmv2_s_c.c
new file mode 100644
index 00000000..f539a878
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_cgbmv2_s_c.c
@@ -0,0 +1,281 @@
+#include "blas_extended.h"
+#include "blas_extended_private.h"
+void BLAS_cgbmv2_s_c(enum blas_order_type order, enum blas_trans_type trans,
+		     int m, int n, int kl, int ku, const void *alpha,
+		     const float *a, int lda, const void *head_x,
+		     const void *tail_x, int incx, const void *beta,
+		     void *y, int incy)
+
+/*           
+ * Purpose
+ * =======
+ *
+ * This routines computes the matrix product:
+ *
+ *     y  <-  alpha * op(A) * (x_head + x_tail) + beta * y
+ * 
+ * where 
+ *
+ *  A is a m x n banded matrix
+ *  x is a n x 1 vector
+ *  y is a m x 1 vector
+ *  alpha and beta are scalars 
+ *   
+ * Arguments
+ * =========
+ *
+ * order        (input) blas_order_type
+ *              Order of AB; row or column major
+ *
+ * trans        (input) blas_trans_type
+ *              Transpose of AB; no trans, 
+ *              trans, or conjugate trans
+ *
+ * m            (input) int
+ *              Dimension of AB
+ *
+ * n            (input) int
+ *              Dimension of AB and the length of vector x and z
+ *
+ * kl           (input) int 
+ *              Number of lower diagnols of AB
+ *
+ * ku           (input) int
+ *              Number of upper diagnols of AB
+ *
+ * alpha        (input) const void*
+ *              
+ * AB           (input) float*
+ *
+ * lda          (input) int 
+ *              Leading dimension of AB
+ *              lda >= ku + kl + 1
+ *
+ * head_x
+ * tail_x       (input) void*
+ * 
+ * incx         (input) int
+ *              The stride for vector x.
+ *
+ * beta         (input) const void*
+ *
+ * y            (input) const void*
+ *
+ * incy         (input) int
+ *              The stride for vector y.
+ * 
+ *
+ * LOCAL VARIABLES 
+ * ===============
+ * 
+ *  As an example, these variables are described on the mxn, column 
+ *  major, banded matrix described in section 2.2.3 of the specification  
+ *
+ *  astart      indexes first element in A where computation begins
+ *
+ *  incai1      indexes first element in row where row is less than lbound
+ * 
+ *  incai2      indexes first element in row where row exceeds lbound
+ *   
+ *  lbound      denotes the number of rows before  first element shifts 
+ *
+ *  rbound      denotes the columns where there is blank space
+ *   
+ *  ra          index of the rightmost element for a given row
+ *  
+ *  la          index of leftmost  elements for a given row
+ *
+ *  ra - la     width of a row
+ *
+ *                        rbound 
+ *            la   ra    ____|_____ 
+ *             |    |   |          |
+ *         |  a00  a01   *    *   *
+ * lbound -|  a10  a11  a12   *   *
+ *         |  a20  a21  a22  a23  *
+ *             *   a31  a32  a33 a34
+ *             *    *   a42  a43 a44
+ *
+ *  Varations on order and transpose have been implemented by modifying these
+ *  local variables. 
+ *
+ */
+{
+  static const char routine_name[] = "BLAS_cgbmv2_s_c";
+
+  int iy0, iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny;
+  int incaij, aij, incai1, incai2, astart, ai;
+  float *y_i = (float *) y;
+  const float *a_i = a;
+  const float *head_x_i = (float *) head_x;
+  const float *tail_x_i = (float *) tail_x;
+  float *alpha_i = (float *) alpha;
+  float *beta_i = (float *) beta;
+  float tmp1[2];
+  float tmp2[2];
+  float tmp3[2];
+  float tmp4[2];
+  float result[2];
+  float sum1[2];
+  float sum2[2];
+  float prod[2];
+  float a_elem;
+  float x_elem[2];
+  float y_elem[2];
+
+
+  if (order != blas_colmajor && order != blas_rowmajor)
+    BLAS_error(routine_name, -1, order, NULL);
+  if (trans != blas_no_trans &&
+      trans != blas_trans && trans != blas_conj_trans) {
+    BLAS_error(routine_name, -2, trans, NULL);
+  }
+  if (m < 0)
+    BLAS_error(routine_name, -3, m, NULL);
+  if (n < 0)
+    BLAS_error(routine_name, -4, n, NULL);
+  if (kl < 0 || kl >= m)
+    BLAS_error(routine_name, -5, kl, NULL);
+  if (ku < 0 || ku >= n)
+    BLAS_error(routine_name, -6, ku, NULL);
+  if (lda < kl + ku + 1)
+    BLAS_error(routine_name, -9, lda, NULL);
+  if (incx == 0)
+    BLAS_error(routine_name, -12, incx, NULL);
+  if (incy == 0)
+    BLAS_error(routine_name, -15, incy, NULL);
+
+  if (m == 0 || n == 0)
+    return;
+  if ((alpha_i[0] == 0.0 && alpha_i[1] == 0.0)
+      && ((beta_i[0] == 1.0 && beta_i[1] == 0.0)))
+    return;
+
+  if (trans == blas_no_trans) {
+    lenx = n;
+    leny = m;
+  } else {
+    lenx = m;
+    leny = n;
+  }
+
+  ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
+  iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;
+
+
+
+  /* if alpha = 0, return y = y*beta */
+  if ((order == blas_colmajor) && (trans == blas_no_trans)) {
+    astart = ku;
+    incai1 = 1;
+    incai2 = lda;
+    incaij = lda - 1;
+    lbound = kl;
+    rbound = n - ku - 1;
+    ra = ku;
+  } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
+    astart = ku;
+    incai1 = lda - 1;
+    incai2 = lda;
+    incaij = 1;
+    lbound = ku;
+    rbound = m - kl - 1;
+    ra = kl;
+  } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
+    astart = kl;
+    incai1 = lda - 1;
+    incai2 = lda;
+    incaij = 1;
+    lbound = kl;
+    rbound = n - ku - 1;
+    ra = ku;
+  } else {			/* rowmajor and blas_trans */
+    astart = kl;
+    incai1 = 1;
+    incai2 = lda;
+    incaij = lda - 1;
+    lbound = ku;
+    rbound = m - kl - 1;
+    ra = kl;
+  }
+  incx *= 2;
+  incy *= 2;
+
+
+
+
+  iy0 *= 2;
+  ix0 *= 2;
+
+  la = 0;
+  ai = astart;
+  iy = iy0;
+  for (i = 0; i < leny; i++) {
+    sum1[0] = sum1[1] = 0.0;
+    sum2[0] = sum2[1] = 0.0;
+    aij = ai;
+    jx = ix0;
+
+    for (j = ra - la; j >= 0; j--) {
+      x_elem[0] = head_x_i[jx];
+      x_elem[1] = head_x_i[jx + 1];
+      a_elem = a_i[aij];
+      {
+	prod[0] = x_elem[0] * a_elem;
+	prod[1] = x_elem[1] * a_elem;
+      }
+      sum1[0] = sum1[0] + prod[0];
+      sum1[1] = sum1[1] + prod[1];
+      x_elem[0] = tail_x_i[jx];
+      x_elem[1] = tail_x_i[jx + 1];
+      {
+	prod[0] = x_elem[0] * a_elem;
+	prod[1] = x_elem[1] * a_elem;
+      }
+      sum2[0] = sum2[0] + prod[0];
+      sum2[1] = sum2[1] + prod[1];
+      aij += incaij;
+      jx += incx;
+    }
+
+
+    {
+      tmp1[0] = sum1[0] * alpha_i[0] - sum1[1] * alpha_i[1];
+      tmp1[1] = sum1[0] * alpha_i[1] + sum1[1] * alpha_i[0];
+    }
+
+    {
+      tmp2[0] = sum2[0] * alpha_i[0] - sum2[1] * alpha_i[1];
+      tmp2[1] = sum2[0] * alpha_i[1] + sum2[1] * alpha_i[0];
+    }
+
+    tmp3[0] = tmp1[0] + tmp2[0];
+    tmp3[1] = tmp1[1] + tmp2[1];
+    y_elem[0] = y_i[iy];
+    y_elem[1] = y_i[iy + 1];
+    {
+      tmp4[0] = beta_i[0] * y_elem[0] - beta_i[1] * y_elem[1];
+      tmp4[1] = beta_i[0] * y_elem[1] + beta_i[1] * y_elem[0];
+    }
+
+    result[0] = tmp4[0] + tmp3[0];
+    result[1] = tmp4[1] + tmp3[1];
+    y_i[iy] = result[0];
+    y_i[iy + 1] = result[1];
+
+    iy += incy;
+    if (i >= lbound) {
+      ix0 += incx;
+      ai += incai2;
+      la++;
+    } else {
+      ai += incai1;
+    }
+    if (i < rbound) {
+      ra++;
+    }
+  }
+
+
+
+}				/* end BLAS_cgbmv2_s_c */
diff --git a/XBLAS/src/gbmv2/BLAS_cgbmv2_s_c_x-f2c.c b/XBLAS/src/gbmv2/BLAS_cgbmv2_s_c_x-f2c.c
new file mode 100644
index 00000000..7d484f5a
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_cgbmv2_s_c_x-f2c.c
@@ -0,0 +1,19 @@
+
+#include "f2c-bridge.h"
+#include "blas_enum.h"
+void BLAS_cgbmv2_s_c_x(enum blas_order_type order, enum blas_trans_type trans,
+		       int m, int n, int kl, int ku, const void *alpha,
+		       const float *a, int lda, const void *head_x,
+		       const void *tail_x, int incx, const void *beta,
+		       void *y, int incy, enum blas_prec_type prec);
+
+
+extern void FC_FUNC_(blas_cgbmv2_s_c_x, BLAS_CGBMV2_S_C_X)
+ 
+  (int *trans, int *m, int *n, int *kl, int *ku, const void *alpha,
+   const float *a, int *lda, const void *head_x, const void *tail_x,
+   int *incx, const void *beta, void *y, int *incy, int *prec) {
+  BLAS_cgbmv2_s_c_x(blas_colmajor, (enum blas_trans_type) *trans, *m, *n, *kl,
+		    *ku, alpha, a, *lda, head_x, tail_x, *incx, beta, y,
+		    *incy, (enum blas_prec_type) *prec);
+}
diff --git a/XBLAS/src/gbmv2/BLAS_cgbmv2_s_c_x.c b/XBLAS/src/gbmv2/BLAS_cgbmv2_s_c_x.c
new file mode 100644
index 00000000..4a570797
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_cgbmv2_s_c_x.c
@@ -0,0 +1,1304 @@
+#include "blas_extended.h"
+#include "blas_extended_private.h"
+void BLAS_cgbmv2_s_c_x(enum blas_order_type order, enum blas_trans_type trans,
+		       int m, int n, int kl, int ku, const void *alpha,
+		       const float *a, int lda, const void *head_x,
+		       const void *tail_x, int incx, const void *beta,
+		       void *y, int incy, enum blas_prec_type prec)
+
+/*           
+ * Purpose
+ * =======
+ *
+ * This routines computes the matrix product:
+ *
+ *     y  <-  alpha * op(A) * (x_head + x_tail) + beta * y
+ * 
+ * where 
+ *
+ *  A is a m x n banded matrix
+ *  x is a n x 1 vector
+ *  y is a m x 1 vector
+ *  alpha and beta are scalars 
+ *   
+ * Arguments
+ * =========
+ *
+ * order        (input) blas_order_type
+ *              Order of AB; row or column major
+ *
+ * trans        (input) blas_trans_type
+ *              Transpose of AB; no trans, 
+ *              trans, or conjugate trans
+ *
+ * m            (input) int
+ *              Dimension of AB
+ *
+ * n            (input) int
+ *              Dimension of AB and the length of vector x and z
+ *
+ * kl           (input) int 
+ *              Number of lower diagnols of AB
+ *
+ * ku           (input) int
+ *              Number of upper diagnols of AB
+ *
+ * alpha        (input) const void*
+ *              
+ * AB           (input) float*
+ *
+ * lda          (input) int 
+ *              Leading dimension of AB
+ *              lda >= ku + kl + 1
+ *
+ * head_x
+ * tail_x       (input) void*
+ * 
+ * incx         (input) int
+ *              The stride for vector x.
+ *
+ * beta         (input) const void*
+ *
+ * y            (input) const void*
+ *
+ * incy         (input) int
+ *              The stride for vector y.
+ * 
+ * prec   (input) enum blas_prec_type
+ *        Specifies the internal precision to be used.
+ *        = blas_prec_single: single precision.
+ *        = blas_prec_double: double precision.
+ *        = blas_prec_extra : anything at least 1.5 times as accurate
+ *                            than double, and wider than 80-bits.
+ *                            We use double-double in our implementation.
+ *
+ *
+ * LOCAL VARIABLES 
+ * ===============
+ * 
+ *  As an example, these variables are described on the mxn, column 
+ *  major, banded matrix described in section 2.2.3 of the specification  
+ *
+ *  astart      indexes first element in A where computation begins
+ *
+ *  incai1      indexes first element in row where row is less than lbound
+ * 
+ *  incai2      indexes first element in row where row exceeds lbound
+ *   
+ *  lbound      denotes the number of rows before  first element shifts 
+ *
+ *  rbound      denotes the columns where there is blank space
+ *   
+ *  ra          index of the rightmost element for a given row
+ *  
+ *  la          index of leftmost  elements for a given row
+ *
+ *  ra - la     width of a row
+ *
+ *                        rbound 
+ *            la   ra    ____|_____ 
+ *             |    |   |          |
+ *         |  a00  a01   *    *   *
+ * lbound -|  a10  a11  a12   *   *
+ *         |  a20  a21  a22  a23  *
+ *             *   a31  a32  a33 a34
+ *             *    *   a42  a43 a44
+ *
+ *  Varations on order and transpose have been implemented by modifying these
+ *  local variables. 
+ *
+ */
+{
+  static const char routine_name[] = "BLAS_cgbmv2_s_c_x";
+
+  switch (prec) {
+  case blas_prec_single:{
+
+      int iy0, iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny;
+      int incaij, aij, incai1, incai2, astart, ai;
+      float *y_i = (float *) y;
+      const float *a_i = a;
+      const float *head_x_i = (float *) head_x;
+      const float *tail_x_i = (float *) tail_x;
+      float *alpha_i = (float *) alpha;
+      float *beta_i = (float *) beta;
+      float tmp1[2];
+      float tmp2[2];
+      float tmp3[2];
+      float tmp4[2];
+      float result[2];
+      float sum1[2];
+      float sum2[2];
+      float prod[2];
+      float a_elem;
+      float x_elem[2];
+      float y_elem[2];
+
+
+      if (order != blas_colmajor && order != blas_rowmajor)
+	BLAS_error(routine_name, -1, order, NULL);
+      if (trans != blas_no_trans &&
+	  trans != blas_trans && trans != blas_conj_trans) {
+	BLAS_error(routine_name, -2, trans, NULL);
+      }
+      if (m < 0)
+	BLAS_error(routine_name, -3, m, NULL);
+      if (n < 0)
+	BLAS_error(routine_name, -4, n, NULL);
+      if (kl < 0 || kl >= m)
+	BLAS_error(routine_name, -5, kl, NULL);
+      if (ku < 0 || ku >= n)
+	BLAS_error(routine_name, -6, ku, NULL);
+      if (lda < kl + ku + 1)
+	BLAS_error(routine_name, -9, lda, NULL);
+      if (incx == 0)
+	BLAS_error(routine_name, -12, incx, NULL);
+      if (incy == 0)
+	BLAS_error(routine_name, -15, incy, NULL);
+
+      if (m == 0 || n == 0)
+	return;
+      if ((alpha_i[0] == 0.0 && alpha_i[1] == 0.0)
+	  && ((beta_i[0] == 1.0 && beta_i[1] == 0.0)))
+	return;
+
+      if (trans == blas_no_trans) {
+	lenx = n;
+	leny = m;
+      } else {
+	lenx = m;
+	leny = n;
+      }
+
+      ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
+      iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;
+
+
+
+      /* if alpha = 0, return y = y*beta */
+      if ((order == blas_colmajor) && (trans == blas_no_trans)) {
+	astart = ku;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
+	astart = ku;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
+	astart = kl;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else {			/* rowmajor and blas_trans */
+	astart = kl;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      }
+      incx *= 2;
+      incy *= 2;
+
+
+
+
+      iy0 *= 2;
+      ix0 *= 2;
+
+      la = 0;
+      ai = astart;
+      iy = iy0;
+      for (i = 0; i < leny; i++) {
+	sum1[0] = sum1[1] = 0.0;
+	sum2[0] = sum2[1] = 0.0;
+	aij = ai;
+	jx = ix0;
+
+	for (j = ra - la; j >= 0; j--) {
+	  x_elem[0] = head_x_i[jx];
+	  x_elem[1] = head_x_i[jx + 1];
+	  a_elem = a_i[aij];
+	  {
+	    prod[0] = x_elem[0] * a_elem;
+	    prod[1] = x_elem[1] * a_elem;
+	  }
+	  sum1[0] = sum1[0] + prod[0];
+	  sum1[1] = sum1[1] + prod[1];
+	  x_elem[0] = tail_x_i[jx];
+	  x_elem[1] = tail_x_i[jx + 1];
+	  {
+	    prod[0] = x_elem[0] * a_elem;
+	    prod[1] = x_elem[1] * a_elem;
+	  }
+	  sum2[0] = sum2[0] + prod[0];
+	  sum2[1] = sum2[1] + prod[1];
+	  aij += incaij;
+	  jx += incx;
+	}
+
+
+	{
+	  tmp1[0] = sum1[0] * alpha_i[0] - sum1[1] * alpha_i[1];
+	  tmp1[1] = sum1[0] * alpha_i[1] + sum1[1] * alpha_i[0];
+	}
+
+	{
+	  tmp2[0] = sum2[0] * alpha_i[0] - sum2[1] * alpha_i[1];
+	  tmp2[1] = sum2[0] * alpha_i[1] + sum2[1] * alpha_i[0];
+	}
+
+	tmp3[0] = tmp1[0] + tmp2[0];
+	tmp3[1] = tmp1[1] + tmp2[1];
+	y_elem[0] = y_i[iy];
+	y_elem[1] = y_i[iy + 1];
+	{
+	  tmp4[0] = beta_i[0] * y_elem[0] - beta_i[1] * y_elem[1];
+	  tmp4[1] = beta_i[0] * y_elem[1] + beta_i[1] * y_elem[0];
+	}
+
+	result[0] = tmp4[0] + tmp3[0];
+	result[1] = tmp4[1] + tmp3[1];
+	y_i[iy] = result[0];
+	y_i[iy + 1] = result[1];
+
+	iy += incy;
+	if (i >= lbound) {
+	  ix0 += incx;
+	  ai += incai2;
+	  la++;
+	} else {
+	  ai += incai1;
+	}
+	if (i < rbound) {
+	  ra++;
+	}
+      }
+
+
+
+      break;
+    }
+  case blas_prec_double:
+  case blas_prec_indigenous:
+    {
+      int iy0, iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny;
+      int incaij, aij, incai1, incai2, astart, ai;
+      float *y_i = (float *) y;
+      const float *a_i = a;
+      const float *head_x_i = (float *) head_x;
+      const float *tail_x_i = (float *) tail_x;
+      float *alpha_i = (float *) alpha;
+      float *beta_i = (float *) beta;
+      double tmp1[2];
+      double tmp2[2];
+      double tmp3[2];
+      double tmp4[2];
+      float result[2];
+      double sum1[2];
+      double sum2[2];
+      double prod[2];
+      float a_elem;
+      float x_elem[2];
+      float y_elem[2];
+
+
+      if (order != blas_colmajor && order != blas_rowmajor)
+	BLAS_error(routine_name, -1, order, NULL);
+      if (trans != blas_no_trans &&
+	  trans != blas_trans && trans != blas_conj_trans) {
+	BLAS_error(routine_name, -2, trans, NULL);
+      }
+      if (m < 0)
+	BLAS_error(routine_name, -3, m, NULL);
+      if (n < 0)
+	BLAS_error(routine_name, -4, n, NULL);
+      if (kl < 0 || kl >= m)
+	BLAS_error(routine_name, -5, kl, NULL);
+      if (ku < 0 || ku >= n)
+	BLAS_error(routine_name, -6, ku, NULL);
+      if (lda < kl + ku + 1)
+	BLAS_error(routine_name, -9, lda, NULL);
+      if (incx == 0)
+	BLAS_error(routine_name, -12, incx, NULL);
+      if (incy == 0)
+	BLAS_error(routine_name, -15, incy, NULL);
+
+      if (m == 0 || n == 0)
+	return;
+      if ((alpha_i[0] == 0.0 && alpha_i[1] == 0.0)
+	  && ((beta_i[0] == 1.0 && beta_i[1] == 0.0)))
+	return;
+
+      if (trans == blas_no_trans) {
+	lenx = n;
+	leny = m;
+      } else {
+	lenx = m;
+	leny = n;
+      }
+
+      ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
+      iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;
+
+
+
+      /* if alpha = 0, return y = y*beta */
+      if ((order == blas_colmajor) && (trans == blas_no_trans)) {
+	astart = ku;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
+	astart = ku;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
+	astart = kl;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else {			/* rowmajor and blas_trans */
+	astart = kl;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      }
+      incx *= 2;
+      incy *= 2;
+
+
+
+
+      iy0 *= 2;
+      ix0 *= 2;
+
+      la = 0;
+      ai = astart;
+      iy = iy0;
+      for (i = 0; i < leny; i++) {
+	sum1[0] = sum1[1] = 0.0;
+	sum2[0] = sum2[1] = 0.0;
+	aij = ai;
+	jx = ix0;
+
+	for (j = ra - la; j >= 0; j--) {
+	  x_elem[0] = head_x_i[jx];
+	  x_elem[1] = head_x_i[jx + 1];
+	  a_elem = a_i[aij];
+	  {
+	    prod[0] = (double) x_elem[0] * a_elem;
+	    prod[1] = (double) x_elem[1] * a_elem;
+	  }
+	  sum1[0] = sum1[0] + prod[0];
+	  sum1[1] = sum1[1] + prod[1];
+	  x_elem[0] = tail_x_i[jx];
+	  x_elem[1] = tail_x_i[jx + 1];
+	  {
+	    prod[0] = (double) x_elem[0] * a_elem;
+	    prod[1] = (double) x_elem[1] * a_elem;
+	  }
+	  sum2[0] = sum2[0] + prod[0];
+	  sum2[1] = sum2[1] + prod[1];
+	  aij += incaij;
+	  jx += incx;
+	}
+
+
+	{
+	  tmp1[0] =
+	    (double) sum1[0] * alpha_i[0] - (double) sum1[1] * alpha_i[1];
+	  tmp1[1] =
+	    (double) sum1[0] * alpha_i[1] + (double) sum1[1] * alpha_i[0];
+	}
+	{
+	  tmp2[0] =
+	    (double) sum2[0] * alpha_i[0] - (double) sum2[1] * alpha_i[1];
+	  tmp2[1] =
+	    (double) sum2[0] * alpha_i[1] + (double) sum2[1] * alpha_i[0];
+	}
+	tmp3[0] = tmp1[0] + tmp2[0];
+	tmp3[1] = tmp1[1] + tmp2[1];
+	y_elem[0] = y_i[iy];
+	y_elem[1] = y_i[iy + 1];
+	{
+	  tmp4[0] =
+	    (double) beta_i[0] * y_elem[0] - (double) beta_i[1] * y_elem[1];
+	  tmp4[1] =
+	    (double) beta_i[0] * y_elem[1] + (double) beta_i[1] * y_elem[0];
+	}
+	result[0] = tmp4[0] + tmp3[0];
+	result[1] = tmp4[1] + tmp3[1];
+	y_i[iy] = result[0];
+	y_i[iy + 1] = result[1];
+
+	iy += incy;
+	if (i >= lbound) {
+	  ix0 += incx;
+	  ai += incai2;
+	  la++;
+	} else {
+	  ai += incai1;
+	}
+	if (i < rbound) {
+	  ra++;
+	}
+      }
+
+
+    }
+    break;
+  case blas_prec_extra:
+    {
+      int iy0, iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny;
+      int incaij, aij, incai1, incai2, astart, ai;
+      float *y_i = (float *) y;
+      const float *a_i = a;
+      const float *head_x_i = (float *) head_x;
+      const float *tail_x_i = (float *) tail_x;
+      float *alpha_i = (float *) alpha;
+      float *beta_i = (float *) beta;
+      double head_tmp1[2], tail_tmp1[2];
+      double head_tmp2[2], tail_tmp2[2];
+      double head_tmp3[2], tail_tmp3[2];
+      double head_tmp4[2], tail_tmp4[2];
+      float result[2];
+      double head_sum1[2], tail_sum1[2];
+      double head_sum2[2], tail_sum2[2];
+      double head_prod[2], tail_prod[2];
+      float a_elem;
+      float x_elem[2];
+      float y_elem[2];
+      FPU_FIX_DECL;
+
+      if (order != blas_colmajor && order != blas_rowmajor)
+	BLAS_error(routine_name, -1, order, NULL);
+      if (trans != blas_no_trans &&
+	  trans != blas_trans && trans != blas_conj_trans) {
+	BLAS_error(routine_name, -2, trans, NULL);
+      }
+      if (m < 0)
+	BLAS_error(routine_name, -3, m, NULL);
+      if (n < 0)
+	BLAS_error(routine_name, -4, n, NULL);
+      if (kl < 0 || kl >= m)
+	BLAS_error(routine_name, -5, kl, NULL);
+      if (ku < 0 || ku >= n)
+	BLAS_error(routine_name, -6, ku, NULL);
+      if (lda < kl + ku + 1)
+	BLAS_error(routine_name, -9, lda, NULL);
+      if (incx == 0)
+	BLAS_error(routine_name, -12, incx, NULL);
+      if (incy == 0)
+	BLAS_error(routine_name, -15, incy, NULL);
+
+      if (m == 0 || n == 0)
+	return;
+      if ((alpha_i[0] == 0.0 && alpha_i[1] == 0.0)
+	  && ((beta_i[0] == 1.0 && beta_i[1] == 0.0)))
+	return;
+
+      if (trans == blas_no_trans) {
+	lenx = n;
+	leny = m;
+      } else {
+	lenx = m;
+	leny = n;
+      }
+
+      ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
+      iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;
+
+      FPU_FIX_START;
+
+      /* if alpha = 0, return y = y*beta */
+      if ((order == blas_colmajor) && (trans == blas_no_trans)) {
+	astart = ku;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
+	astart = ku;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
+	astart = kl;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else {			/* rowmajor and blas_trans */
+	astart = kl;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      }
+      incx *= 2;
+      incy *= 2;
+
+
+
+
+      iy0 *= 2;
+      ix0 *= 2;
+
+      la = 0;
+      ai = astart;
+      iy = iy0;
+      for (i = 0; i < leny; i++) {
+	head_sum1[0] = head_sum1[1] = tail_sum1[0] = tail_sum1[1] = 0.0;
+	head_sum2[0] = head_sum2[1] = tail_sum2[0] = tail_sum2[1] = 0.0;
+	aij = ai;
+	jx = ix0;
+
+	for (j = ra - la; j >= 0; j--) {
+	  x_elem[0] = head_x_i[jx];
+	  x_elem[1] = head_x_i[jx + 1];
+	  a_elem = a_i[aij];
+	  {
+	    head_prod[0] = (double) x_elem[0] * a_elem;
+	    tail_prod[0] = 0.0;
+	    head_prod[1] = (double) x_elem[1] * a_elem;
+	    tail_prod[1] = 0.0;
+	  }
+	  {
+	    double head_t, tail_t;
+	    double head_a, tail_a;
+	    double head_b, tail_b;
+	    /* Real part */
+	    head_a = head_sum1[0];
+	    tail_a = tail_sum1[0];
+	    head_b = head_prod[0];
+	    tail_b = tail_prod[0];
+	    {
+	      /* Compute double-double = double-double + double-double. */
+	      double bv;
+	      double s1, s2, t1, t2;
+
+	      /* Add two hi words. */
+	      s1 = head_a + head_b;
+	      bv = s1 - head_a;
+	      s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	      /* Add two lo words. */
+	      t1 = tail_a + tail_b;
+	      bv = t1 - tail_a;
+	      t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	      s2 += t1;
+
+	      /* Renormalize (s1, s2)  to  (t1, s2) */
+	      t1 = s1 + s2;
+	      s2 = s2 - (t1 - s1);
+
+	      t2 += s2;
+
+	      /* Renormalize (t1, t2)  */
+	      head_t = t1 + t2;
+	      tail_t = t2 - (head_t - t1);
+	    }
+	    head_sum1[0] = head_t;
+	    tail_sum1[0] = tail_t;
+	    /* Imaginary part */
+	    head_a = head_sum1[1];
+	    tail_a = tail_sum1[1];
+	    head_b = head_prod[1];
+	    tail_b = tail_prod[1];
+	    {
+	      /* Compute double-double = double-double + double-double. */
+	      double bv;
+	      double s1, s2, t1, t2;
+
+	      /* Add two hi words. */
+	      s1 = head_a + head_b;
+	      bv = s1 - head_a;
+	      s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	      /* Add two lo words. */
+	      t1 = tail_a + tail_b;
+	      bv = t1 - tail_a;
+	      t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	      s2 += t1;
+
+	      /* Renormalize (s1, s2)  to  (t1, s2) */
+	      t1 = s1 + s2;
+	      s2 = s2 - (t1 - s1);
+
+	      t2 += s2;
+
+	      /* Renormalize (t1, t2)  */
+	      head_t = t1 + t2;
+	      tail_t = t2 - (head_t - t1);
+	    }
+	    head_sum1[1] = head_t;
+	    tail_sum1[1] = tail_t;
+	  }
+	  x_elem[0] = tail_x_i[jx];
+	  x_elem[1] = tail_x_i[jx + 1];
+	  {
+	    head_prod[0] = (double) x_elem[0] * a_elem;
+	    tail_prod[0] = 0.0;
+	    head_prod[1] = (double) x_elem[1] * a_elem;
+	    tail_prod[1] = 0.0;
+	  }
+	  {
+	    double head_t, tail_t;
+	    double head_a, tail_a;
+	    double head_b, tail_b;
+	    /* Real part */
+	    head_a = head_sum2[0];
+	    tail_a = tail_sum2[0];
+	    head_b = head_prod[0];
+	    tail_b = tail_prod[0];
+	    {
+	      /* Compute double-double = double-double + double-double. */
+	      double bv;
+	      double s1, s2, t1, t2;
+
+	      /* Add two hi words. */
+	      s1 = head_a + head_b;
+	      bv = s1 - head_a;
+	      s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	      /* Add two lo words. */
+	      t1 = tail_a + tail_b;
+	      bv = t1 - tail_a;
+	      t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	      s2 += t1;
+
+	      /* Renormalize (s1, s2)  to  (t1, s2) */
+	      t1 = s1 + s2;
+	      s2 = s2 - (t1 - s1);
+
+	      t2 += s2;
+
+	      /* Renormalize (t1, t2)  */
+	      head_t = t1 + t2;
+	      tail_t = t2 - (head_t - t1);
+	    }
+	    head_sum2[0] = head_t;
+	    tail_sum2[0] = tail_t;
+	    /* Imaginary part */
+	    head_a = head_sum2[1];
+	    tail_a = tail_sum2[1];
+	    head_b = head_prod[1];
+	    tail_b = tail_prod[1];
+	    {
+	      /* Compute double-double = double-double + double-double. */
+	      double bv;
+	      double s1, s2, t1, t2;
+
+	      /* Add two hi words. */
+	      s1 = head_a + head_b;
+	      bv = s1 - head_a;
+	      s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	      /* Add two lo words. */
+	      t1 = tail_a + tail_b;
+	      bv = t1 - tail_a;
+	      t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	      s2 += t1;
+
+	      /* Renormalize (s1, s2)  to  (t1, s2) */
+	      t1 = s1 + s2;
+	      s2 = s2 - (t1 - s1);
+
+	      t2 += s2;
+
+	      /* Renormalize (t1, t2)  */
+	      head_t = t1 + t2;
+	      tail_t = t2 - (head_t - t1);
+	    }
+	    head_sum2[1] = head_t;
+	    tail_sum2[1] = tail_t;
+	  }
+	  aij += incaij;
+	  jx += incx;
+	}
+
+
+	{
+	  double cd[2];
+	  cd[0] = (double) alpha_i[0];
+	  cd[1] = (double) alpha_i[1];
+	  {
+	    /* Compute complex-extra = complex-extra * complex-double. */
+	    double head_a0, tail_a0;
+	    double head_a1, tail_a1;
+	    double head_t1, tail_t1;
+	    double head_t2, tail_t2;
+	    head_a0 = head_sum1[0];
+	    tail_a0 = tail_sum1[0];
+	    head_a1 = head_sum1[1];
+	    tail_a1 = tail_sum1[1];
+	    /* real part */
+	    {
+	      /* Compute double-double = double-double * double. */
+	      double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	      con = head_a0 * split;
+	      a11 = con - head_a0;
+	      a11 = con - a11;
+	      a21 = head_a0 - a11;
+	      con = cd[0] * split;
+	      b1 = con - cd[0];
+	      b1 = con - b1;
+	      b2 = cd[0] - b1;
+
+	      c11 = head_a0 * cd[0];
+	      c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	      c2 = tail_a0 * cd[0];
+	      t1 = c11 + c2;
+	      t2 = (c2 - (t1 - c11)) + c21;
+
+	      head_t1 = t1 + t2;
+	      tail_t1 = t2 - (head_t1 - t1);
+	    }
+	    {
+	      /* Compute double-double = double-double * double. */
+	      double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	      con = head_a1 * split;
+	      a11 = con - head_a1;
+	      a11 = con - a11;
+	      a21 = head_a1 - a11;
+	      con = cd[1] * split;
+	      b1 = con - cd[1];
+	      b1 = con - b1;
+	      b2 = cd[1] - b1;
+
+	      c11 = head_a1 * cd[1];
+	      c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	      c2 = tail_a1 * cd[1];
+	      t1 = c11 + c2;
+	      t2 = (c2 - (t1 - c11)) + c21;
+
+	      head_t2 = t1 + t2;
+	      tail_t2 = t2 - (head_t2 - t1);
+	    }
+	    head_t2 = -head_t2;
+	    tail_t2 = -tail_t2;
+	    {
+	      /* Compute double-double = double-double + double-double. */
+	      double bv;
+	      double s1, s2, t1, t2;
+
+	      /* Add two hi words. */
+	      s1 = head_t1 + head_t2;
+	      bv = s1 - head_t1;
+	      s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+	      /* Add two lo words. */
+	      t1 = tail_t1 + tail_t2;
+	      bv = t1 - tail_t1;
+	      t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+	      s2 += t1;
+
+	      /* Renormalize (s1, s2)  to  (t1, s2) */
+	      t1 = s1 + s2;
+	      s2 = s2 - (t1 - s1);
+
+	      t2 += s2;
+
+	      /* Renormalize (t1, t2)  */
+	      head_t1 = t1 + t2;
+	      tail_t1 = t2 - (head_t1 - t1);
+	    }
+	    head_tmp1[0] = head_t1;
+	    tail_tmp1[0] = tail_t1;
+	    /* imaginary part */
+	    {
+	      /* Compute double-double = double-double * double. */
+	      double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	      con = head_a1 * split;
+	      a11 = con - head_a1;
+	      a11 = con - a11;
+	      a21 = head_a1 - a11;
+	      con = cd[0] * split;
+	      b1 = con - cd[0];
+	      b1 = con - b1;
+	      b2 = cd[0] - b1;
+
+	      c11 = head_a1 * cd[0];
+	      c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	      c2 = tail_a1 * cd[0];
+	      t1 = c11 + c2;
+	      t2 = (c2 - (t1 - c11)) + c21;
+
+	      head_t1 = t1 + t2;
+	      tail_t1 = t2 - (head_t1 - t1);
+	    }
+	    {
+	      /* Compute double-double = double-double * double. */
+	      double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	      con = head_a0 * split;
+	      a11 = con - head_a0;
+	      a11 = con - a11;
+	      a21 = head_a0 - a11;
+	      con = cd[1] * split;
+	      b1 = con - cd[1];
+	      b1 = con - b1;
+	      b2 = cd[1] - b1;
+
+	      c11 = head_a0 * cd[1];
+	      c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	      c2 = tail_a0 * cd[1];
+	      t1 = c11 + c2;
+	      t2 = (c2 - (t1 - c11)) + c21;
+
+	      head_t2 = t1 + t2;
+	      tail_t2 = t2 - (head_t2 - t1);
+	    }
+	    {
+	      /* Compute double-double = double-double + double-double. */
+	      double bv;
+	      double s1, s2, t1, t2;
+
+	      /* Add two hi words. */
+	      s1 = head_t1 + head_t2;
+	      bv = s1 - head_t1;
+	      s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+	      /* Add two lo words. */
+	      t1 = tail_t1 + tail_t2;
+	      bv = t1 - tail_t1;
+	      t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+	      s2 += t1;
+
+	      /* Renormalize (s1, s2)  to  (t1, s2) */
+	      t1 = s1 + s2;
+	      s2 = s2 - (t1 - s1);
+
+	      t2 += s2;
+
+	      /* Renormalize (t1, t2)  */
+	      head_t1 = t1 + t2;
+	      tail_t1 = t2 - (head_t1 - t1);
+	    }
+	    head_tmp1[1] = head_t1;
+	    tail_tmp1[1] = tail_t1;
+	  }
+
+	}
+	{
+	  double cd[2];
+	  cd[0] = (double) alpha_i[0];
+	  cd[1] = (double) alpha_i[1];
+	  {
+	    /* Compute complex-extra = complex-extra * complex-double. */
+	    double head_a0, tail_a0;
+	    double head_a1, tail_a1;
+	    double head_t1, tail_t1;
+	    double head_t2, tail_t2;
+	    head_a0 = head_sum2[0];
+	    tail_a0 = tail_sum2[0];
+	    head_a1 = head_sum2[1];
+	    tail_a1 = tail_sum2[1];
+	    /* real part */
+	    {
+	      /* Compute double-double = double-double * double. */
+	      double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	      con = head_a0 * split;
+	      a11 = con - head_a0;
+	      a11 = con - a11;
+	      a21 = head_a0 - a11;
+	      con = cd[0] * split;
+	      b1 = con - cd[0];
+	      b1 = con - b1;
+	      b2 = cd[0] - b1;
+
+	      c11 = head_a0 * cd[0];
+	      c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	      c2 = tail_a0 * cd[0];
+	      t1 = c11 + c2;
+	      t2 = (c2 - (t1 - c11)) + c21;
+
+	      head_t1 = t1 + t2;
+	      tail_t1 = t2 - (head_t1 - t1);
+	    }
+	    {
+	      /* Compute double-double = double-double * double. */
+	      double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	      con = head_a1 * split;
+	      a11 = con - head_a1;
+	      a11 = con - a11;
+	      a21 = head_a1 - a11;
+	      con = cd[1] * split;
+	      b1 = con - cd[1];
+	      b1 = con - b1;
+	      b2 = cd[1] - b1;
+
+	      c11 = head_a1 * cd[1];
+	      c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	      c2 = tail_a1 * cd[1];
+	      t1 = c11 + c2;
+	      t2 = (c2 - (t1 - c11)) + c21;
+
+	      head_t2 = t1 + t2;
+	      tail_t2 = t2 - (head_t2 - t1);
+	    }
+	    head_t2 = -head_t2;
+	    tail_t2 = -tail_t2;
+	    {
+	      /* Compute double-double = double-double + double-double. */
+	      double bv;
+	      double s1, s2, t1, t2;
+
+	      /* Add two hi words. */
+	      s1 = head_t1 + head_t2;
+	      bv = s1 - head_t1;
+	      s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+	      /* Add two lo words. */
+	      t1 = tail_t1 + tail_t2;
+	      bv = t1 - tail_t1;
+	      t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+	      s2 += t1;
+
+	      /* Renormalize (s1, s2)  to  (t1, s2) */
+	      t1 = s1 + s2;
+	      s2 = s2 - (t1 - s1);
+
+	      t2 += s2;
+
+	      /* Renormalize (t1, t2)  */
+	      head_t1 = t1 + t2;
+	      tail_t1 = t2 - (head_t1 - t1);
+	    }
+	    head_tmp2[0] = head_t1;
+	    tail_tmp2[0] = tail_t1;
+	    /* imaginary part */
+	    {
+	      /* Compute double-double = double-double * double. */
+	      double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	      con = head_a1 * split;
+	      a11 = con - head_a1;
+	      a11 = con - a11;
+	      a21 = head_a1 - a11;
+	      con = cd[0] * split;
+	      b1 = con - cd[0];
+	      b1 = con - b1;
+	      b2 = cd[0] - b1;
+
+	      c11 = head_a1 * cd[0];
+	      c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	      c2 = tail_a1 * cd[0];
+	      t1 = c11 + c2;
+	      t2 = (c2 - (t1 - c11)) + c21;
+
+	      head_t1 = t1 + t2;
+	      tail_t1 = t2 - (head_t1 - t1);
+	    }
+	    {
+	      /* Compute double-double = double-double * double. */
+	      double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	      con = head_a0 * split;
+	      a11 = con - head_a0;
+	      a11 = con - a11;
+	      a21 = head_a0 - a11;
+	      con = cd[1] * split;
+	      b1 = con - cd[1];
+	      b1 = con - b1;
+	      b2 = cd[1] - b1;
+
+	      c11 = head_a0 * cd[1];
+	      c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	      c2 = tail_a0 * cd[1];
+	      t1 = c11 + c2;
+	      t2 = (c2 - (t1 - c11)) + c21;
+
+	      head_t2 = t1 + t2;
+	      tail_t2 = t2 - (head_t2 - t1);
+	    }
+	    {
+	      /* Compute double-double = double-double + double-double. */
+	      double bv;
+	      double s1, s2, t1, t2;
+
+	      /* Add two hi words. */
+	      s1 = head_t1 + head_t2;
+	      bv = s1 - head_t1;
+	      s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+	      /* Add two lo words. */
+	      t1 = tail_t1 + tail_t2;
+	      bv = t1 - tail_t1;
+	      t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+	      s2 += t1;
+
+	      /* Renormalize (s1, s2)  to  (t1, s2) */
+	      t1 = s1 + s2;
+	      s2 = s2 - (t1 - s1);
+
+	      t2 += s2;
+
+	      /* Renormalize (t1, t2)  */
+	      head_t1 = t1 + t2;
+	      tail_t1 = t2 - (head_t1 - t1);
+	    }
+	    head_tmp2[1] = head_t1;
+	    tail_tmp2[1] = tail_t1;
+	  }
+
+	}
+	{
+	  double head_t, tail_t;
+	  double head_a, tail_a;
+	  double head_b, tail_b;
+	  /* Real part */
+	  head_a = head_tmp1[0];
+	  tail_a = tail_tmp1[0];
+	  head_b = head_tmp2[0];
+	  tail_b = tail_tmp2[0];
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_a + head_b;
+	    bv = s1 - head_a;
+	    s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_a + tail_b;
+	    bv = t1 - tail_a;
+	    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t = t1 + t2;
+	    tail_t = t2 - (head_t - t1);
+	  }
+	  head_tmp3[0] = head_t;
+	  tail_tmp3[0] = tail_t;
+	  /* Imaginary part */
+	  head_a = head_tmp1[1];
+	  tail_a = tail_tmp1[1];
+	  head_b = head_tmp2[1];
+	  tail_b = tail_tmp2[1];
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_a + head_b;
+	    bv = s1 - head_a;
+	    s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_a + tail_b;
+	    bv = t1 - tail_a;
+	    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t = t1 + t2;
+	    tail_t = t2 - (head_t - t1);
+	  }
+	  head_tmp3[1] = head_t;
+	  tail_tmp3[1] = tail_t;
+	}
+	y_elem[0] = y_i[iy];
+	y_elem[1] = y_i[iy + 1];
+	{
+	  double head_e1, tail_e1;
+	  double d1;
+	  double d2;
+	  /* Real part */
+	  d1 = (double) beta_i[0] * y_elem[0];
+	  d2 = (double) -beta_i[1] * y_elem[1];
+	  {
+	    /* Compute double-double = double + double. */
+	    double e, t1, t2;
+
+	    /* Knuth trick. */
+	    t1 = d1 + d2;
+	    e = t1 - d1;
+	    t2 = ((d2 - e) + (d1 - (t1 - e)));
+
+	    /* The result is t1 + t2, after normalization. */
+	    head_e1 = t1 + t2;
+	    tail_e1 = t2 - (head_e1 - t1);
+	  }
+	  head_tmp4[0] = head_e1;
+	  tail_tmp4[0] = tail_e1;
+	  /* imaginary part */
+	  d1 = (double) beta_i[0] * y_elem[1];
+	  d2 = (double) beta_i[1] * y_elem[0];
+	  {
+	    /* Compute double-double = double + double. */
+	    double e, t1, t2;
+
+	    /* Knuth trick. */
+	    t1 = d1 + d2;
+	    e = t1 - d1;
+	    t2 = ((d2 - e) + (d1 - (t1 - e)));
+
+	    /* The result is t1 + t2, after normalization. */
+	    head_e1 = t1 + t2;
+	    tail_e1 = t2 - (head_e1 - t1);
+	  }
+	  head_tmp4[1] = head_e1;
+	  tail_tmp4[1] = tail_e1;
+	}
+	{
+	  double head_a, tail_a;
+	  double head_b, tail_b;
+	  /* Real part */
+	  head_a = head_tmp4[0];
+	  tail_a = tail_tmp4[0];
+	  head_b = head_tmp3[0];
+	  tail_b = tail_tmp3[0];
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_a + head_b;
+	    bv = s1 - head_a;
+	    s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_a + tail_b;
+	    bv = t1 - tail_a;
+	    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    result[0] = t1 + t2;
+	  }
+	  /* Imaginary part */
+	  head_a = head_tmp4[1];
+	  tail_a = tail_tmp4[1];
+	  head_b = head_tmp3[1];
+	  tail_b = tail_tmp3[1];
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_a + head_b;
+	    bv = s1 - head_a;
+	    s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_a + tail_b;
+	    bv = t1 - tail_a;
+	    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    result[1] = t1 + t2;
+	  }
+	}
+	y_i[iy] = result[0];
+	y_i[iy + 1] = result[1];
+
+	iy += incy;
+	if (i >= lbound) {
+	  ix0 += incx;
+	  ai += incai2;
+	  la++;
+	} else {
+	  ai += incai1;
+	}
+	if (i < rbound) {
+	  ra++;
+	}
+      }
+
+      FPU_FIX_STOP;
+    }
+    break;
+  }
+}				/* end BLAS_cgbmv2_s_c_x */
diff --git a/XBLAS/src/gbmv2/BLAS_cgbmv2_s_s-f2c.c b/XBLAS/src/gbmv2/BLAS_cgbmv2_s_s-f2c.c
new file mode 100644
index 00000000..fddb2b00
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_cgbmv2_s_s-f2c.c
@@ -0,0 +1,18 @@
+
+#include "f2c-bridge.h"
+#include "blas_enum.h"
+void BLAS_cgbmv2_s_s(enum blas_order_type order, enum blas_trans_type trans,
+		     int m, int n, int kl, int ku, const void *alpha,
+		     const float *a, int lda, const float *head_x,
+		     const float *tail_x, int incx, const void *beta,
+		     void *y, int incy);
+
+
+extern void FC_FUNC_(blas_cgbmv2_s_s, BLAS_CGBMV2_S_S)
+ 
+  (int *trans, int *m, int *n, int *kl, int *ku, const void *alpha,
+   const float *a, int *lda, const float *head_x, const float *tail_x,
+   int *incx, const void *beta, void *y, int *incy) {
+  BLAS_cgbmv2_s_s(blas_colmajor, (enum blas_trans_type) *trans, *m, *n, *kl,
+		  *ku, alpha, a, *lda, head_x, tail_x, *incx, beta, y, *incy);
+}
diff --git a/XBLAS/src/gbmv2/BLAS_cgbmv2_s_s.c b/XBLAS/src/gbmv2/BLAS_cgbmv2_s_s.c
new file mode 100644
index 00000000..8e326907
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_cgbmv2_s_s.c
@@ -0,0 +1,269 @@
+#include "blas_extended.h"
+#include "blas_extended_private.h"
+void BLAS_cgbmv2_s_s(enum blas_order_type order, enum blas_trans_type trans,
+		     int m, int n, int kl, int ku, const void *alpha,
+		     const float *a, int lda, const float *head_x,
+		     const float *tail_x, int incx, const void *beta,
+		     void *y, int incy)
+
+/*           
+ * Purpose
+ * =======
+ *
+ * This routines computes the matrix product:
+ *
+ *     y  <-  alpha * op(A) * (x_head + x_tail) + beta * y
+ * 
+ * where 
+ *
+ *  A is a m x n banded matrix
+ *  x is a n x 1 vector
+ *  y is a m x 1 vector
+ *  alpha and beta are scalars 
+ *   
+ * Arguments
+ * =========
+ *
+ * order        (input) blas_order_type
+ *              Order of AB; row or column major
+ *
+ * trans        (input) blas_trans_type
+ *              Transpose of AB; no trans, 
+ *              trans, or conjugate trans
+ *
+ * m            (input) int
+ *              Dimension of AB
+ *
+ * n            (input) int
+ *              Dimension of AB and the length of vector x and z
+ *
+ * kl           (input) int 
+ *              Number of lower diagnols of AB
+ *
+ * ku           (input) int
+ *              Number of upper diagnols of AB
+ *
+ * alpha        (input) const void*
+ *              
+ * AB           (input) float*
+ *
+ * lda          (input) int 
+ *              Leading dimension of AB
+ *              lda >= ku + kl + 1
+ *
+ * head_x
+ * tail_x       (input) float*
+ * 
+ * incx         (input) int
+ *              The stride for vector x.
+ *
+ * beta         (input) const void*
+ *
+ * y            (input) const void*
+ *
+ * incy         (input) int
+ *              The stride for vector y.
+ * 
+ *
+ * LOCAL VARIABLES 
+ * ===============
+ * 
+ *  As an example, these variables are described on the mxn, column 
+ *  major, banded matrix described in section 2.2.3 of the specification  
+ *
+ *  astart      indexes first element in A where computation begins
+ *
+ *  incai1      indexes first element in row where row is less than lbound
+ * 
+ *  incai2      indexes first element in row where row exceeds lbound
+ *   
+ *  lbound      denotes the number of rows before  first element shifts 
+ *
+ *  rbound      denotes the columns where there is blank space
+ *   
+ *  ra          index of the rightmost element for a given row
+ *  
+ *  la          index of leftmost  elements for a given row
+ *
+ *  ra - la     width of a row
+ *
+ *                        rbound 
+ *            la   ra    ____|_____ 
+ *             |    |   |          |
+ *         |  a00  a01   *    *   *
+ * lbound -|  a10  a11  a12   *   *
+ *         |  a20  a21  a22  a23  *
+ *             *   a31  a32  a33 a34
+ *             *    *   a42  a43 a44
+ *
+ *  Varations on order and transpose have been implemented by modifying these
+ *  local variables. 
+ *
+ */
+{
+  static const char routine_name[] = "BLAS_cgbmv2_s_s";
+
+  int iy0, iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny;
+  int incaij, aij, incai1, incai2, astart, ai;
+  float *y_i = (float *) y;
+  const float *a_i = a;
+  const float *head_x_i = head_x;
+  const float *tail_x_i = tail_x;
+  float *alpha_i = (float *) alpha;
+  float *beta_i = (float *) beta;
+  float tmp1[2];
+  float tmp2[2];
+  float tmp3[2];
+  float tmp4[2];
+  float result[2];
+  float sum1;
+  float sum2;
+  float prod;
+  float a_elem;
+  float x_elem;
+  float y_elem[2];
+
+
+  if (order != blas_colmajor && order != blas_rowmajor)
+    BLAS_error(routine_name, -1, order, NULL);
+  if (trans != blas_no_trans &&
+      trans != blas_trans && trans != blas_conj_trans) {
+    BLAS_error(routine_name, -2, trans, NULL);
+  }
+  if (m < 0)
+    BLAS_error(routine_name, -3, m, NULL);
+  if (n < 0)
+    BLAS_error(routine_name, -4, n, NULL);
+  if (kl < 0 || kl >= m)
+    BLAS_error(routine_name, -5, kl, NULL);
+  if (ku < 0 || ku >= n)
+    BLAS_error(routine_name, -6, ku, NULL);
+  if (lda < kl + ku + 1)
+    BLAS_error(routine_name, -9, lda, NULL);
+  if (incx == 0)
+    BLAS_error(routine_name, -12, incx, NULL);
+  if (incy == 0)
+    BLAS_error(routine_name, -15, incy, NULL);
+
+  if (m == 0 || n == 0)
+    return;
+  if ((alpha_i[0] == 0.0 && alpha_i[1] == 0.0)
+      && ((beta_i[0] == 1.0 && beta_i[1] == 0.0)))
+    return;
+
+  if (trans == blas_no_trans) {
+    lenx = n;
+    leny = m;
+  } else {
+    lenx = m;
+    leny = n;
+  }
+
+  ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
+  iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;
+
+
+
+  /* if alpha = 0, return y = y*beta */
+  if ((order == blas_colmajor) && (trans == blas_no_trans)) {
+    astart = ku;
+    incai1 = 1;
+    incai2 = lda;
+    incaij = lda - 1;
+    lbound = kl;
+    rbound = n - ku - 1;
+    ra = ku;
+  } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
+    astart = ku;
+    incai1 = lda - 1;
+    incai2 = lda;
+    incaij = 1;
+    lbound = ku;
+    rbound = m - kl - 1;
+    ra = kl;
+  } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
+    astart = kl;
+    incai1 = lda - 1;
+    incai2 = lda;
+    incaij = 1;
+    lbound = kl;
+    rbound = n - ku - 1;
+    ra = ku;
+  } else {			/* rowmajor and blas_trans */
+    astart = kl;
+    incai1 = 1;
+    incai2 = lda;
+    incaij = lda - 1;
+    lbound = ku;
+    rbound = m - kl - 1;
+    ra = kl;
+  }
+
+  incy *= 2;
+
+
+
+
+  iy0 *= 2;
+
+
+  la = 0;
+  ai = astart;
+  iy = iy0;
+  for (i = 0; i < leny; i++) {
+    sum1 = 0.0;
+    sum2 = 0.0;
+    aij = ai;
+    jx = ix0;
+
+    for (j = ra - la; j >= 0; j--) {
+      x_elem = head_x_i[jx];
+      a_elem = a_i[aij];
+      prod = x_elem * a_elem;
+      sum1 = sum1 + prod;
+      x_elem = tail_x_i[jx];
+      prod = x_elem * a_elem;
+      sum2 = sum2 + prod;
+      aij += incaij;
+      jx += incx;
+    }
+
+
+    {
+      tmp1[0] = alpha_i[0] * sum1;
+      tmp1[1] = alpha_i[1] * sum1;
+    }
+    {
+      tmp2[0] = alpha_i[0] * sum2;
+      tmp2[1] = alpha_i[1] * sum2;
+    }
+    tmp3[0] = tmp1[0] + tmp2[0];
+    tmp3[1] = tmp1[1] + tmp2[1];
+    y_elem[0] = y_i[iy];
+    y_elem[1] = y_i[iy + 1];
+    {
+      tmp4[0] = beta_i[0] * y_elem[0] - beta_i[1] * y_elem[1];
+      tmp4[1] = beta_i[0] * y_elem[1] + beta_i[1] * y_elem[0];
+    }
+
+    result[0] = tmp4[0] + tmp3[0];
+    result[1] = tmp4[1] + tmp3[1];
+    y_i[iy] = result[0];
+    y_i[iy + 1] = result[1];
+
+    iy += incy;
+    if (i >= lbound) {
+      ix0 += incx;
+      ai += incai2;
+      la++;
+    } else {
+      ai += incai1;
+    }
+    if (i < rbound) {
+      ra++;
+    }
+  }
+
+
+
+}				/* end BLAS_cgbmv2_s_s */
diff --git a/XBLAS/src/gbmv2/BLAS_cgbmv2_s_s_x-f2c.c b/XBLAS/src/gbmv2/BLAS_cgbmv2_s_s_x-f2c.c
new file mode 100644
index 00000000..f05811d8
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_cgbmv2_s_s_x-f2c.c
@@ -0,0 +1,19 @@
+
+#include "f2c-bridge.h"
+#include "blas_enum.h"
+void BLAS_cgbmv2_s_s_x(enum blas_order_type order, enum blas_trans_type trans,
+		       int m, int n, int kl, int ku, const void *alpha,
+		       const float *a, int lda, const float *head_x,
+		       const float *tail_x, int incx, const void *beta,
+		       void *y, int incy, enum blas_prec_type prec);
+
+
+extern void FC_FUNC_(blas_cgbmv2_s_s_x, BLAS_CGBMV2_S_S_X)
+ 
+  (int *trans, int *m, int *n, int *kl, int *ku, const void *alpha,
+   const float *a, int *lda, const float *head_x, const float *tail_x,
+   int *incx, const void *beta, void *y, int *incy, int *prec) {
+  BLAS_cgbmv2_s_s_x(blas_colmajor, (enum blas_trans_type) *trans, *m, *n, *kl,
+		    *ku, alpha, a, *lda, head_x, tail_x, *incx, beta, y,
+		    *incy, (enum blas_prec_type) *prec);
+}
diff --git a/XBLAS/src/gbmv2/BLAS_cgbmv2_s_s_x.c b/XBLAS/src/gbmv2/BLAS_cgbmv2_s_s_x.c
new file mode 100644
index 00000000..0c7c8770
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_cgbmv2_s_s_x.c
@@ -0,0 +1,946 @@
+#include "blas_extended.h"
+#include "blas_extended_private.h"
+void BLAS_cgbmv2_s_s_x(enum blas_order_type order, enum blas_trans_type trans,
+		       int m, int n, int kl, int ku, const void *alpha,
+		       const float *a, int lda, const float *head_x,
+		       const float *tail_x, int incx, const void *beta,
+		       void *y, int incy, enum blas_prec_type prec)
+
+/*           
+ * Purpose
+ * =======
+ *
+ * This routines computes the matrix product:
+ *
+ *     y  <-  alpha * op(A) * (x_head + x_tail) + beta * y
+ * 
+ * where 
+ *
+ *  A is a m x n banded matrix
+ *  x is a n x 1 vector
+ *  y is a m x 1 vector
+ *  alpha and beta are scalars 
+ *   
+ * Arguments
+ * =========
+ *
+ * order        (input) blas_order_type
+ *              Order of AB; row or column major
+ *
+ * trans        (input) blas_trans_type
+ *              Transpose of AB; no trans, 
+ *              trans, or conjugate trans
+ *
+ * m            (input) int
+ *              Dimension of AB
+ *
+ * n            (input) int
+ *              Dimension of AB and the length of vector x and z
+ *
+ * kl           (input) int 
+ *              Number of lower diagnols of AB
+ *
+ * ku           (input) int
+ *              Number of upper diagnols of AB
+ *
+ * alpha        (input) const void*
+ *              
+ * AB           (input) float*
+ *
+ * lda          (input) int 
+ *              Leading dimension of AB
+ *              lda >= ku + kl + 1
+ *
+ * head_x
+ * tail_x       (input) float*
+ * 
+ * incx         (input) int
+ *              The stride for vector x.
+ *
+ * beta         (input) const void*
+ *
+ * y            (input) const void*
+ *
+ * incy         (input) int
+ *              The stride for vector y.
+ * 
+ * prec   (input) enum blas_prec_type
+ *        Specifies the internal precision to be used.
+ *        = blas_prec_single: single precision.
+ *        = blas_prec_double: double precision.
+ *        = blas_prec_extra : anything at least 1.5 times as accurate
+ *                            than double, and wider than 80-bits.
+ *                            We use double-double in our implementation.
+ *
+ *
+ * LOCAL VARIABLES 
+ * ===============
+ * 
+ *  As an example, these variables are described on the mxn, column 
+ *  major, banded matrix described in section 2.2.3 of the specification  
+ *
+ *  astart      indexes first element in A where computation begins
+ *
+ *  incai1      indexes first element in row where row is less than lbound
+ * 
+ *  incai2      indexes first element in row where row exceeds lbound
+ *   
+ *  lbound      denotes the number of rows before  first element shifts 
+ *
+ *  rbound      denotes the columns where there is blank space
+ *   
+ *  ra          index of the rightmost element for a given row
+ *  
+ *  la          index of leftmost  elements for a given row
+ *
+ *  ra - la     width of a row
+ *
+ *                        rbound 
+ *            la   ra    ____|_____ 
+ *             |    |   |          |
+ *         |  a00  a01   *    *   *
+ * lbound -|  a10  a11  a12   *   *
+ *         |  a20  a21  a22  a23  *
+ *             *   a31  a32  a33 a34
+ *             *    *   a42  a43 a44
+ *
+ *  Varations on order and transpose have been implemented by modifying these
+ *  local variables. 
+ *
+ */
+{
+  static const char routine_name[] = "BLAS_cgbmv2_s_s_x";
+
+  switch (prec) {
+  case blas_prec_single:{
+
+      int iy0, iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny;
+      int incaij, aij, incai1, incai2, astart, ai;
+      float *y_i = (float *) y;
+      const float *a_i = a;
+      const float *head_x_i = head_x;
+      const float *tail_x_i = tail_x;
+      float *alpha_i = (float *) alpha;
+      float *beta_i = (float *) beta;
+      float tmp1[2];
+      float tmp2[2];
+      float tmp3[2];
+      float tmp4[2];
+      float result[2];
+      float sum1;
+      float sum2;
+      float prod;
+      float a_elem;
+      float x_elem;
+      float y_elem[2];
+
+
+      if (order != blas_colmajor && order != blas_rowmajor)
+	BLAS_error(routine_name, -1, order, NULL);
+      if (trans != blas_no_trans &&
+	  trans != blas_trans && trans != blas_conj_trans) {
+	BLAS_error(routine_name, -2, trans, NULL);
+      }
+      if (m < 0)
+	BLAS_error(routine_name, -3, m, NULL);
+      if (n < 0)
+	BLAS_error(routine_name, -4, n, NULL);
+      if (kl < 0 || kl >= m)
+	BLAS_error(routine_name, -5, kl, NULL);
+      if (ku < 0 || ku >= n)
+	BLAS_error(routine_name, -6, ku, NULL);
+      if (lda < kl + ku + 1)
+	BLAS_error(routine_name, -9, lda, NULL);
+      if (incx == 0)
+	BLAS_error(routine_name, -12, incx, NULL);
+      if (incy == 0)
+	BLAS_error(routine_name, -15, incy, NULL);
+
+      if (m == 0 || n == 0)
+	return;
+      if ((alpha_i[0] == 0.0 && alpha_i[1] == 0.0)
+	  && ((beta_i[0] == 1.0 && beta_i[1] == 0.0)))
+	return;
+
+      if (trans == blas_no_trans) {
+	lenx = n;
+	leny = m;
+      } else {
+	lenx = m;
+	leny = n;
+      }
+
+      ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
+      iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;
+
+
+
+      /* if alpha = 0, return y = y*beta */
+      if ((order == blas_colmajor) && (trans == blas_no_trans)) {
+	astart = ku;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
+	astart = ku;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
+	astart = kl;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else {			/* rowmajor and blas_trans */
+	astart = kl;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      }
+
+      incy *= 2;
+
+
+
+
+      iy0 *= 2;
+
+
+      la = 0;
+      ai = astart;
+      iy = iy0;
+      for (i = 0; i < leny; i++) {
+	sum1 = 0.0;
+	sum2 = 0.0;
+	aij = ai;
+	jx = ix0;
+
+	for (j = ra - la; j >= 0; j--) {
+	  x_elem = head_x_i[jx];
+	  a_elem = a_i[aij];
+	  prod = x_elem * a_elem;
+	  sum1 = sum1 + prod;
+	  x_elem = tail_x_i[jx];
+	  prod = x_elem * a_elem;
+	  sum2 = sum2 + prod;
+	  aij += incaij;
+	  jx += incx;
+	}
+
+
+	{
+	  tmp1[0] = alpha_i[0] * sum1;
+	  tmp1[1] = alpha_i[1] * sum1;
+	}
+	{
+	  tmp2[0] = alpha_i[0] * sum2;
+	  tmp2[1] = alpha_i[1] * sum2;
+	}
+	tmp3[0] = tmp1[0] + tmp2[0];
+	tmp3[1] = tmp1[1] + tmp2[1];
+	y_elem[0] = y_i[iy];
+	y_elem[1] = y_i[iy + 1];
+	{
+	  tmp4[0] = beta_i[0] * y_elem[0] - beta_i[1] * y_elem[1];
+	  tmp4[1] = beta_i[0] * y_elem[1] + beta_i[1] * y_elem[0];
+	}
+
+	result[0] = tmp4[0] + tmp3[0];
+	result[1] = tmp4[1] + tmp3[1];
+	y_i[iy] = result[0];
+	y_i[iy + 1] = result[1];
+
+	iy += incy;
+	if (i >= lbound) {
+	  ix0 += incx;
+	  ai += incai2;
+	  la++;
+	} else {
+	  ai += incai1;
+	}
+	if (i < rbound) {
+	  ra++;
+	}
+      }
+
+
+
+      break;
+    }
+  case blas_prec_double:
+  case blas_prec_indigenous:
+    {
+      int iy0, iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny;
+      int incaij, aij, incai1, incai2, astart, ai;
+      float *y_i = (float *) y;
+      const float *a_i = a;
+      const float *head_x_i = head_x;
+      const float *tail_x_i = tail_x;
+      float *alpha_i = (float *) alpha;
+      float *beta_i = (float *) beta;
+      double tmp1[2];
+      double tmp2[2];
+      double tmp3[2];
+      double tmp4[2];
+      float result[2];
+      double sum1;
+      double sum2;
+      double prod;
+      float a_elem;
+      float x_elem;
+      float y_elem[2];
+
+
+      if (order != blas_colmajor && order != blas_rowmajor)
+	BLAS_error(routine_name, -1, order, NULL);
+      if (trans != blas_no_trans &&
+	  trans != blas_trans && trans != blas_conj_trans) {
+	BLAS_error(routine_name, -2, trans, NULL);
+      }
+      if (m < 0)
+	BLAS_error(routine_name, -3, m, NULL);
+      if (n < 0)
+	BLAS_error(routine_name, -4, n, NULL);
+      if (kl < 0 || kl >= m)
+	BLAS_error(routine_name, -5, kl, NULL);
+      if (ku < 0 || ku >= n)
+	BLAS_error(routine_name, -6, ku, NULL);
+      if (lda < kl + ku + 1)
+	BLAS_error(routine_name, -9, lda, NULL);
+      if (incx == 0)
+	BLAS_error(routine_name, -12, incx, NULL);
+      if (incy == 0)
+	BLAS_error(routine_name, -15, incy, NULL);
+
+      if (m == 0 || n == 0)
+	return;
+      if ((alpha_i[0] == 0.0 && alpha_i[1] == 0.0)
+	  && ((beta_i[0] == 1.0 && beta_i[1] == 0.0)))
+	return;
+
+      if (trans == blas_no_trans) {
+	lenx = n;
+	leny = m;
+      } else {
+	lenx = m;
+	leny = n;
+      }
+
+      ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
+      iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;
+
+
+
+      /* if alpha = 0, return y = y*beta */
+      if ((order == blas_colmajor) && (trans == blas_no_trans)) {
+	astart = ku;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
+	astart = ku;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
+	astart = kl;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else {			/* rowmajor and blas_trans */
+	astart = kl;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      }
+
+      incy *= 2;
+
+
+
+
+      iy0 *= 2;
+
+
+      la = 0;
+      ai = astart;
+      iy = iy0;
+      for (i = 0; i < leny; i++) {
+	sum1 = 0.0;
+	sum2 = 0.0;
+	aij = ai;
+	jx = ix0;
+
+	for (j = ra - la; j >= 0; j--) {
+	  x_elem = head_x_i[jx];
+	  a_elem = a_i[aij];
+	  prod = (double) x_elem *a_elem;
+	  sum1 = sum1 + prod;
+	  x_elem = tail_x_i[jx];
+	  prod = (double) x_elem *a_elem;
+	  sum2 = sum2 + prod;
+	  aij += incaij;
+	  jx += incx;
+	}
+
+
+	{
+	  tmp1[0] = alpha_i[0] * sum1;
+	  tmp1[1] = alpha_i[1] * sum1;
+	}
+	{
+	  tmp2[0] = alpha_i[0] * sum2;
+	  tmp2[1] = alpha_i[1] * sum2;
+	}
+	tmp3[0] = tmp1[0] + tmp2[0];
+	tmp3[1] = tmp1[1] + tmp2[1];
+	y_elem[0] = y_i[iy];
+	y_elem[1] = y_i[iy + 1];
+	{
+	  tmp4[0] =
+	    (double) beta_i[0] * y_elem[0] - (double) beta_i[1] * y_elem[1];
+	  tmp4[1] =
+	    (double) beta_i[0] * y_elem[1] + (double) beta_i[1] * y_elem[0];
+	}
+	result[0] = tmp4[0] + tmp3[0];
+	result[1] = tmp4[1] + tmp3[1];
+	y_i[iy] = result[0];
+	y_i[iy + 1] = result[1];
+
+	iy += incy;
+	if (i >= lbound) {
+	  ix0 += incx;
+	  ai += incai2;
+	  la++;
+	} else {
+	  ai += incai1;
+	}
+	if (i < rbound) {
+	  ra++;
+	}
+      }
+
+
+    }
+    break;
+  case blas_prec_extra:
+    {
+      int iy0, iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny;
+      int incaij, aij, incai1, incai2, astart, ai;
+      float *y_i = (float *) y;
+      const float *a_i = a;
+      const float *head_x_i = head_x;
+      const float *tail_x_i = tail_x;
+      float *alpha_i = (float *) alpha;
+      float *beta_i = (float *) beta;
+      double head_tmp1[2], tail_tmp1[2];
+      double head_tmp2[2], tail_tmp2[2];
+      double head_tmp3[2], tail_tmp3[2];
+      double head_tmp4[2], tail_tmp4[2];
+      float result[2];
+      double head_sum1, tail_sum1;
+      double head_sum2, tail_sum2;
+      double head_prod, tail_prod;
+      float a_elem;
+      float x_elem;
+      float y_elem[2];
+      FPU_FIX_DECL;
+
+      if (order != blas_colmajor && order != blas_rowmajor)
+	BLAS_error(routine_name, -1, order, NULL);
+      if (trans != blas_no_trans &&
+	  trans != blas_trans && trans != blas_conj_trans) {
+	BLAS_error(routine_name, -2, trans, NULL);
+      }
+      if (m < 0)
+	BLAS_error(routine_name, -3, m, NULL);
+      if (n < 0)
+	BLAS_error(routine_name, -4, n, NULL);
+      if (kl < 0 || kl >= m)
+	BLAS_error(routine_name, -5, kl, NULL);
+      if (ku < 0 || ku >= n)
+	BLAS_error(routine_name, -6, ku, NULL);
+      if (lda < kl + ku + 1)
+	BLAS_error(routine_name, -9, lda, NULL);
+      if (incx == 0)
+	BLAS_error(routine_name, -12, incx, NULL);
+      if (incy == 0)
+	BLAS_error(routine_name, -15, incy, NULL);
+
+      if (m == 0 || n == 0)
+	return;
+      if ((alpha_i[0] == 0.0 && alpha_i[1] == 0.0)
+	  && ((beta_i[0] == 1.0 && beta_i[1] == 0.0)))
+	return;
+
+      if (trans == blas_no_trans) {
+	lenx = n;
+	leny = m;
+      } else {
+	lenx = m;
+	leny = n;
+      }
+
+      ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
+      iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;
+
+      FPU_FIX_START;
+
+      /* if alpha = 0, return y = y*beta */
+      if ((order == blas_colmajor) && (trans == blas_no_trans)) {
+	astart = ku;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
+	astart = ku;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
+	astart = kl;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else {			/* rowmajor and blas_trans */
+	astart = kl;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      }
+
+      incy *= 2;
+
+
+
+
+      iy0 *= 2;
+
+
+      la = 0;
+      ai = astart;
+      iy = iy0;
+      for (i = 0; i < leny; i++) {
+	head_sum1 = tail_sum1 = 0.0;
+	head_sum2 = tail_sum2 = 0.0;
+	aij = ai;
+	jx = ix0;
+
+	for (j = ra - la; j >= 0; j--) {
+	  x_elem = head_x_i[jx];
+	  a_elem = a_i[aij];
+	  head_prod = (double) x_elem *a_elem;
+	  tail_prod = 0.0;
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_sum1 + head_prod;
+	    bv = s1 - head_sum1;
+	    s2 = ((head_prod - bv) + (head_sum1 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_sum1 + tail_prod;
+	    bv = t1 - tail_sum1;
+	    t2 = ((tail_prod - bv) + (tail_sum1 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_sum1 = t1 + t2;
+	    tail_sum1 = t2 - (head_sum1 - t1);
+	  }
+	  x_elem = tail_x_i[jx];
+	  head_prod = (double) x_elem *a_elem;
+	  tail_prod = 0.0;
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_sum2 + head_prod;
+	    bv = s1 - head_sum2;
+	    s2 = ((head_prod - bv) + (head_sum2 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_sum2 + tail_prod;
+	    bv = t1 - tail_sum2;
+	    t2 = ((tail_prod - bv) + (tail_sum2 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_sum2 = t1 + t2;
+	    tail_sum2 = t2 - (head_sum2 - t1);
+	  }
+	  aij += incaij;
+	  jx += incx;
+	}
+
+
+	{
+	  double head_e1, tail_e1;
+	  double dt;
+	  dt = (double) alpha_i[0];
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_sum1 * split;
+	    a11 = con - head_sum1;
+	    a11 = con - a11;
+	    a21 = head_sum1 - a11;
+	    con = dt * split;
+	    b1 = con - dt;
+	    b1 = con - b1;
+	    b2 = dt - b1;
+
+	    c11 = head_sum1 * dt;
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_sum1 * dt;
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_e1 = t1 + t2;
+	    tail_e1 = t2 - (head_e1 - t1);
+	  }
+	  head_tmp1[0] = head_e1;
+	  tail_tmp1[0] = tail_e1;
+	  dt = (double) alpha_i[1];
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_sum1 * split;
+	    a11 = con - head_sum1;
+	    a11 = con - a11;
+	    a21 = head_sum1 - a11;
+	    con = dt * split;
+	    b1 = con - dt;
+	    b1 = con - b1;
+	    b2 = dt - b1;
+
+	    c11 = head_sum1 * dt;
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_sum1 * dt;
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_e1 = t1 + t2;
+	    tail_e1 = t2 - (head_e1 - t1);
+	  }
+	  head_tmp1[1] = head_e1;
+	  tail_tmp1[1] = tail_e1;
+	}
+	{
+	  double head_e1, tail_e1;
+	  double dt;
+	  dt = (double) alpha_i[0];
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_sum2 * split;
+	    a11 = con - head_sum2;
+	    a11 = con - a11;
+	    a21 = head_sum2 - a11;
+	    con = dt * split;
+	    b1 = con - dt;
+	    b1 = con - b1;
+	    b2 = dt - b1;
+
+	    c11 = head_sum2 * dt;
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_sum2 * dt;
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_e1 = t1 + t2;
+	    tail_e1 = t2 - (head_e1 - t1);
+	  }
+	  head_tmp2[0] = head_e1;
+	  tail_tmp2[0] = tail_e1;
+	  dt = (double) alpha_i[1];
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_sum2 * split;
+	    a11 = con - head_sum2;
+	    a11 = con - a11;
+	    a21 = head_sum2 - a11;
+	    con = dt * split;
+	    b1 = con - dt;
+	    b1 = con - b1;
+	    b2 = dt - b1;
+
+	    c11 = head_sum2 * dt;
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_sum2 * dt;
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_e1 = t1 + t2;
+	    tail_e1 = t2 - (head_e1 - t1);
+	  }
+	  head_tmp2[1] = head_e1;
+	  tail_tmp2[1] = tail_e1;
+	}
+	{
+	  double head_t, tail_t;
+	  double head_a, tail_a;
+	  double head_b, tail_b;
+	  /* Real part */
+	  head_a = head_tmp1[0];
+	  tail_a = tail_tmp1[0];
+	  head_b = head_tmp2[0];
+	  tail_b = tail_tmp2[0];
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_a + head_b;
+	    bv = s1 - head_a;
+	    s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_a + tail_b;
+	    bv = t1 - tail_a;
+	    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t = t1 + t2;
+	    tail_t = t2 - (head_t - t1);
+	  }
+	  head_tmp3[0] = head_t;
+	  tail_tmp3[0] = tail_t;
+	  /* Imaginary part */
+	  head_a = head_tmp1[1];
+	  tail_a = tail_tmp1[1];
+	  head_b = head_tmp2[1];
+	  tail_b = tail_tmp2[1];
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_a + head_b;
+	    bv = s1 - head_a;
+	    s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_a + tail_b;
+	    bv = t1 - tail_a;
+	    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t = t1 + t2;
+	    tail_t = t2 - (head_t - t1);
+	  }
+	  head_tmp3[1] = head_t;
+	  tail_tmp3[1] = tail_t;
+	}
+	y_elem[0] = y_i[iy];
+	y_elem[1] = y_i[iy + 1];
+	{
+	  double head_e1, tail_e1;
+	  double d1;
+	  double d2;
+	  /* Real part */
+	  d1 = (double) beta_i[0] * y_elem[0];
+	  d2 = (double) -beta_i[1] * y_elem[1];
+	  {
+	    /* Compute double-double = double + double. */
+	    double e, t1, t2;
+
+	    /* Knuth trick. */
+	    t1 = d1 + d2;
+	    e = t1 - d1;
+	    t2 = ((d2 - e) + (d1 - (t1 - e)));
+
+	    /* The result is t1 + t2, after normalization. */
+	    head_e1 = t1 + t2;
+	    tail_e1 = t2 - (head_e1 - t1);
+	  }
+	  head_tmp4[0] = head_e1;
+	  tail_tmp4[0] = tail_e1;
+	  /* imaginary part */
+	  d1 = (double) beta_i[0] * y_elem[1];
+	  d2 = (double) beta_i[1] * y_elem[0];
+	  {
+	    /* Compute double-double = double + double. */
+	    double e, t1, t2;
+
+	    /* Knuth trick. */
+	    t1 = d1 + d2;
+	    e = t1 - d1;
+	    t2 = ((d2 - e) + (d1 - (t1 - e)));
+
+	    /* The result is t1 + t2, after normalization. */
+	    head_e1 = t1 + t2;
+	    tail_e1 = t2 - (head_e1 - t1);
+	  }
+	  head_tmp4[1] = head_e1;
+	  tail_tmp4[1] = tail_e1;
+	}
+	{
+	  double head_a, tail_a;
+	  double head_b, tail_b;
+	  /* Real part */
+	  head_a = head_tmp4[0];
+	  tail_a = tail_tmp4[0];
+	  head_b = head_tmp3[0];
+	  tail_b = tail_tmp3[0];
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_a + head_b;
+	    bv = s1 - head_a;
+	    s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_a + tail_b;
+	    bv = t1 - tail_a;
+	    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    result[0] = t1 + t2;
+	  }
+	  /* Imaginary part */
+	  head_a = head_tmp4[1];
+	  tail_a = tail_tmp4[1];
+	  head_b = head_tmp3[1];
+	  tail_b = tail_tmp3[1];
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_a + head_b;
+	    bv = s1 - head_a;
+	    s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_a + tail_b;
+	    bv = t1 - tail_a;
+	    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    result[1] = t1 + t2;
+	  }
+	}
+	y_i[iy] = result[0];
+	y_i[iy + 1] = result[1];
+
+	iy += incy;
+	if (i >= lbound) {
+	  ix0 += incx;
+	  ai += incai2;
+	  la++;
+	} else {
+	  ai += incai1;
+	}
+	if (i < rbound) {
+	  ra++;
+	}
+      }
+
+      FPU_FIX_STOP;
+    }
+    break;
+  }
+}				/* end BLAS_cgbmv2_s_s_x */
diff --git a/XBLAS/src/gbmv2/BLAS_cgbmv2_x-f2c.c b/XBLAS/src/gbmv2/BLAS_cgbmv2_x-f2c.c
new file mode 100644
index 00000000..4f9fa6a1
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_cgbmv2_x-f2c.c
@@ -0,0 +1,19 @@
+
+#include "f2c-bridge.h"
+#include "blas_enum.h"
+void BLAS_cgbmv2_x(enum blas_order_type order, enum blas_trans_type trans,
+		   int m, int n, int kl, int ku, const void *alpha,
+		   const void *a, int lda, const void *head_x,
+		   const void *tail_x, int incx, const void *beta,
+		   void *y, int incy, enum blas_prec_type prec);
+
+
+extern void FC_FUNC_(blas_cgbmv2_x, BLAS_CGBMV2_X)
+ 
+  (int *trans, int *m, int *n, int *kl, int *ku, const void *alpha,
+   const void *a, int *lda, const void *head_x, const void *tail_x, int *incx,
+   const void *beta, void *y, int *incy, int *prec) {
+  BLAS_cgbmv2_x(blas_colmajor, (enum blas_trans_type) *trans, *m, *n, *kl,
+		*ku, alpha, a, *lda, head_x, tail_x, *incx, beta, y, *incy,
+		(enum blas_prec_type) *prec);
+}
diff --git a/XBLAS/src/gbmv2/BLAS_cgbmv2_x.c b/XBLAS/src/gbmv2/BLAS_cgbmv2_x.c
new file mode 100644
index 00000000..2985002f
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_cgbmv2_x.c
@@ -0,0 +1,1688 @@
+#include "blas_extended.h"
+#include "blas_extended_private.h"
+void BLAS_cgbmv2_x(enum blas_order_type order, enum blas_trans_type trans,
+		   int m, int n, int kl, int ku, const void *alpha,
+		   const void *a, int lda, const void *head_x,
+		   const void *tail_x, int incx, const void *beta,
+		   void *y, int incy, enum blas_prec_type prec)
+
+/*           
+ * Purpose
+ * =======
+ *
+ * This routines computes the matrix product:
+ *
+ *     y  <-  alpha * op(A) * (x_head + x_tail) + beta * y
+ * 
+ * where 
+ *
+ *  A is a m x n banded matrix
+ *  x is a n x 1 vector
+ *  y is a m x 1 vector
+ *  alpha and beta are scalars 
+ *   
+ * Arguments
+ * =========
+ *
+ * order        (input) blas_order_type
+ *              Order of AB; row or column major
+ *
+ * trans        (input) blas_trans_type
+ *              Transpose of AB; no trans, 
+ *              trans, or conjugate trans
+ *
+ * m            (input) int
+ *              Dimension of AB
+ *
+ * n            (input) int
+ *              Dimension of AB and the length of vector x and z
+ *
+ * kl           (input) int 
+ *              Number of lower diagnols of AB
+ *
+ * ku           (input) int
+ *              Number of upper diagnols of AB
+ *
+ * alpha        (input) const void*
+ *              
+ * AB           (input) void*
+ *
+ * lda          (input) int 
+ *              Leading dimension of AB
+ *              lda >= ku + kl + 1
+ *
+ * head_x
+ * tail_x       (input) void*
+ * 
+ * incx         (input) int
+ *              The stride for vector x.
+ *
+ * beta         (input) const void*
+ *
+ * y            (input) const void*
+ *
+ * incy         (input) int
+ *              The stride for vector y.
+ * 
+ * prec   (input) enum blas_prec_type
+ *        Specifies the internal precision to be used.
+ *        = blas_prec_single: single precision.
+ *        = blas_prec_double: double precision.
+ *        = blas_prec_extra : anything at least 1.5 times as accurate
+ *                            than double, and wider than 80-bits.
+ *                            We use double-double in our implementation.
+ *
+ *
+ * LOCAL VARIABLES 
+ * ===============
+ * 
+ *  As an example, these variables are described on the mxn, column 
+ *  major, banded matrix described in section 2.2.3 of the specification  
+ *
+ *  astart      indexes first element in A where computation begins
+ *
+ *  incai1      indexes first element in row where row is less than lbound
+ * 
+ *  incai2      indexes first element in row where row exceeds lbound
+ *   
+ *  lbound      denotes the number of rows before  first element shifts 
+ *
+ *  rbound      denotes the columns where there is blank space
+ *   
+ *  ra          index of the rightmost element for a given row
+ *  
+ *  la          index of leftmost  elements for a given row
+ *
+ *  ra - la     width of a row
+ *
+ *                        rbound 
+ *            la   ra    ____|_____ 
+ *             |    |   |          |
+ *         |  a00  a01   *    *   *
+ * lbound -|  a10  a11  a12   *   *
+ *         |  a20  a21  a22  a23  *
+ *             *   a31  a32  a33 a34
+ *             *    *   a42  a43 a44
+ *
+ *  Varations on order and transpose have been implemented by modifying these
+ *  local variables. 
+ *
+ */
+{
+  static const char routine_name[] = "BLAS_cgbmv2_x";
+
+  switch (prec) {
+  case blas_prec_single:{
+
+      int iy0, iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny;
+      int incaij, aij, incai1, incai2, astart, ai;
+      float *y_i = (float *) y;
+      const float *a_i = (float *) a;
+      const float *head_x_i = (float *) head_x;
+      const float *tail_x_i = (float *) tail_x;
+      float *alpha_i = (float *) alpha;
+      float *beta_i = (float *) beta;
+      float tmp1[2];
+      float tmp2[2];
+      float tmp3[2];
+      float tmp4[2];
+      float result[2];
+      float sum1[2];
+      float sum2[2];
+      float prod[2];
+      float a_elem[2];
+      float x_elem[2];
+      float y_elem[2];
+
+
+      if (order != blas_colmajor && order != blas_rowmajor)
+	BLAS_error(routine_name, -1, order, NULL);
+      if (trans != blas_no_trans &&
+	  trans != blas_trans && trans != blas_conj_trans) {
+	BLAS_error(routine_name, -2, trans, NULL);
+      }
+      if (m < 0)
+	BLAS_error(routine_name, -3, m, NULL);
+      if (n < 0)
+	BLAS_error(routine_name, -4, n, NULL);
+      if (kl < 0 || kl >= m)
+	BLAS_error(routine_name, -5, kl, NULL);
+      if (ku < 0 || ku >= n)
+	BLAS_error(routine_name, -6, ku, NULL);
+      if (lda < kl + ku + 1)
+	BLAS_error(routine_name, -9, lda, NULL);
+      if (incx == 0)
+	BLAS_error(routine_name, -12, incx, NULL);
+      if (incy == 0)
+	BLAS_error(routine_name, -15, incy, NULL);
+
+      if (m == 0 || n == 0)
+	return;
+      if ((alpha_i[0] == 0.0 && alpha_i[1] == 0.0)
+	  && ((beta_i[0] == 1.0 && beta_i[1] == 0.0)))
+	return;
+
+      if (trans == blas_no_trans) {
+	lenx = n;
+	leny = m;
+      } else {
+	lenx = m;
+	leny = n;
+      }
+
+      ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
+      iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;
+
+
+
+      /* if alpha = 0, return y = y*beta */
+      if ((order == blas_colmajor) && (trans == blas_no_trans)) {
+	astart = ku;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
+	astart = ku;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
+	astart = kl;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else {			/* rowmajor and blas_trans */
+	astart = kl;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      }
+      incx *= 2;
+      incy *= 2;
+      incaij *= 2;
+      incai1 *= 2;
+      incai2 *= 2;
+      astart *= 2;
+      iy0 *= 2;
+      ix0 *= 2;
+
+      la = 0;
+      ai = astart;
+      iy = iy0;
+      for (i = 0; i < leny; i++) {
+	sum1[0] = sum1[1] = 0.0;
+	sum2[0] = sum2[1] = 0.0;
+	aij = ai;
+	jx = ix0;
+	if (trans != blas_conj_trans) {
+	  for (j = ra - la; j >= 0; j--) {
+	    x_elem[0] = head_x_i[jx];
+	    x_elem[1] = head_x_i[jx + 1];
+	    a_elem[0] = a_i[aij];
+	    a_elem[1] = a_i[aij + 1];
+	    {
+	      prod[0] = x_elem[0] * a_elem[0] - x_elem[1] * a_elem[1];
+	      prod[1] = x_elem[0] * a_elem[1] + x_elem[1] * a_elem[0];
+	    }
+
+	    sum1[0] = sum1[0] + prod[0];
+	    sum1[1] = sum1[1] + prod[1];
+	    x_elem[0] = tail_x_i[jx];
+	    x_elem[1] = tail_x_i[jx + 1];
+	    {
+	      prod[0] = x_elem[0] * a_elem[0] - x_elem[1] * a_elem[1];
+	      prod[1] = x_elem[0] * a_elem[1] + x_elem[1] * a_elem[0];
+	    }
+
+	    sum2[0] = sum2[0] + prod[0];
+	    sum2[1] = sum2[1] + prod[1];
+	    aij += incaij;
+	    jx += incx;
+	  }
+
+	} else {
+	  for (j = ra - la; j >= 0; j--) {
+	    x_elem[0] = head_x_i[jx];
+	    x_elem[1] = head_x_i[jx + 1];
+	    a_elem[0] = a_i[aij];
+	    a_elem[1] = a_i[aij + 1];
+	    a_elem[1] = -a_elem[1];
+	    {
+	      prod[0] = x_elem[0] * a_elem[0] - x_elem[1] * a_elem[1];
+	      prod[1] = x_elem[0] * a_elem[1] + x_elem[1] * a_elem[0];
+	    }
+
+	    sum1[0] = sum1[0] + prod[0];
+	    sum1[1] = sum1[1] + prod[1];
+	    x_elem[0] = tail_x_i[jx];
+	    x_elem[1] = tail_x_i[jx + 1];
+	    {
+	      prod[0] = x_elem[0] * a_elem[0] - x_elem[1] * a_elem[1];
+	      prod[1] = x_elem[0] * a_elem[1] + x_elem[1] * a_elem[0];
+	    }
+
+	    sum2[0] = sum2[0] + prod[0];
+	    sum2[1] = sum2[1] + prod[1];
+	    aij += incaij;
+	    jx += incx;
+	  }
+	}
+
+	{
+	  tmp1[0] = sum1[0] * alpha_i[0] - sum1[1] * alpha_i[1];
+	  tmp1[1] = sum1[0] * alpha_i[1] + sum1[1] * alpha_i[0];
+	}
+
+	{
+	  tmp2[0] = sum2[0] * alpha_i[0] - sum2[1] * alpha_i[1];
+	  tmp2[1] = sum2[0] * alpha_i[1] + sum2[1] * alpha_i[0];
+	}
+
+	tmp3[0] = tmp1[0] + tmp2[0];
+	tmp3[1] = tmp1[1] + tmp2[1];
+	y_elem[0] = y_i[iy];
+	y_elem[1] = y_i[iy + 1];
+	{
+	  tmp4[0] = beta_i[0] * y_elem[0] - beta_i[1] * y_elem[1];
+	  tmp4[1] = beta_i[0] * y_elem[1] + beta_i[1] * y_elem[0];
+	}
+
+	result[0] = tmp4[0] + tmp3[0];
+	result[1] = tmp4[1] + tmp3[1];
+	y_i[iy] = result[0];
+	y_i[iy + 1] = result[1];
+
+	iy += incy;
+	if (i >= lbound) {
+	  ix0 += incx;
+	  ai += incai2;
+	  la++;
+	} else {
+	  ai += incai1;
+	}
+	if (i < rbound) {
+	  ra++;
+	}
+      }
+
+
+
+      break;
+    }
+  case blas_prec_double:
+  case blas_prec_indigenous:
+    {
+      int iy0, iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny;
+      int incaij, aij, incai1, incai2, astart, ai;
+      float *y_i = (float *) y;
+      const float *a_i = (float *) a;
+      const float *head_x_i = (float *) head_x;
+      const float *tail_x_i = (float *) tail_x;
+      float *alpha_i = (float *) alpha;
+      float *beta_i = (float *) beta;
+      double tmp1[2];
+      double tmp2[2];
+      double tmp3[2];
+      double tmp4[2];
+      float result[2];
+      double sum1[2];
+      double sum2[2];
+      double prod[2];
+      float a_elem[2];
+      float x_elem[2];
+      float y_elem[2];
+
+
+      if (order != blas_colmajor && order != blas_rowmajor)
+	BLAS_error(routine_name, -1, order, NULL);
+      if (trans != blas_no_trans &&
+	  trans != blas_trans && trans != blas_conj_trans) {
+	BLAS_error(routine_name, -2, trans, NULL);
+      }
+      if (m < 0)
+	BLAS_error(routine_name, -3, m, NULL);
+      if (n < 0)
+	BLAS_error(routine_name, -4, n, NULL);
+      if (kl < 0 || kl >= m)
+	BLAS_error(routine_name, -5, kl, NULL);
+      if (ku < 0 || ku >= n)
+	BLAS_error(routine_name, -6, ku, NULL);
+      if (lda < kl + ku + 1)
+	BLAS_error(routine_name, -9, lda, NULL);
+      if (incx == 0)
+	BLAS_error(routine_name, -12, incx, NULL);
+      if (incy == 0)
+	BLAS_error(routine_name, -15, incy, NULL);
+
+      if (m == 0 || n == 0)
+	return;
+      if ((alpha_i[0] == 0.0 && alpha_i[1] == 0.0)
+	  && ((beta_i[0] == 1.0 && beta_i[1] == 0.0)))
+	return;
+
+      if (trans == blas_no_trans) {
+	lenx = n;
+	leny = m;
+      } else {
+	lenx = m;
+	leny = n;
+      }
+
+      ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
+      iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;
+
+
+
+      /* if alpha = 0, return y = y*beta */
+      if ((order == blas_colmajor) && (trans == blas_no_trans)) {
+	astart = ku;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
+	astart = ku;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
+	astart = kl;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else {			/* rowmajor and blas_trans */
+	astart = kl;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      }
+      incx *= 2;
+      incy *= 2;
+      incaij *= 2;
+      incai1 *= 2;
+      incai2 *= 2;
+      astart *= 2;
+      iy0 *= 2;
+      ix0 *= 2;
+
+      la = 0;
+      ai = astart;
+      iy = iy0;
+      for (i = 0; i < leny; i++) {
+	sum1[0] = sum1[1] = 0.0;
+	sum2[0] = sum2[1] = 0.0;
+	aij = ai;
+	jx = ix0;
+	if (trans != blas_conj_trans) {
+	  for (j = ra - la; j >= 0; j--) {
+	    x_elem[0] = head_x_i[jx];
+	    x_elem[1] = head_x_i[jx + 1];
+	    a_elem[0] = a_i[aij];
+	    a_elem[1] = a_i[aij + 1];
+	    {
+	      prod[0] =
+		(double) x_elem[0] * a_elem[0] -
+		(double) x_elem[1] * a_elem[1];
+	      prod[1] =
+		(double) x_elem[0] * a_elem[1] +
+		(double) x_elem[1] * a_elem[0];
+	    }
+	    sum1[0] = sum1[0] + prod[0];
+	    sum1[1] = sum1[1] + prod[1];
+	    x_elem[0] = tail_x_i[jx];
+	    x_elem[1] = tail_x_i[jx + 1];
+	    {
+	      prod[0] =
+		(double) x_elem[0] * a_elem[0] -
+		(double) x_elem[1] * a_elem[1];
+	      prod[1] =
+		(double) x_elem[0] * a_elem[1] +
+		(double) x_elem[1] * a_elem[0];
+	    }
+	    sum2[0] = sum2[0] + prod[0];
+	    sum2[1] = sum2[1] + prod[1];
+	    aij += incaij;
+	    jx += incx;
+	  }
+
+	} else {
+	  for (j = ra - la; j >= 0; j--) {
+	    x_elem[0] = head_x_i[jx];
+	    x_elem[1] = head_x_i[jx + 1];
+	    a_elem[0] = a_i[aij];
+	    a_elem[1] = a_i[aij + 1];
+	    a_elem[1] = -a_elem[1];
+	    {
+	      prod[0] =
+		(double) x_elem[0] * a_elem[0] -
+		(double) x_elem[1] * a_elem[1];
+	      prod[1] =
+		(double) x_elem[0] * a_elem[1] +
+		(double) x_elem[1] * a_elem[0];
+	    }
+	    sum1[0] = sum1[0] + prod[0];
+	    sum1[1] = sum1[1] + prod[1];
+	    x_elem[0] = tail_x_i[jx];
+	    x_elem[1] = tail_x_i[jx + 1];
+	    {
+	      prod[0] =
+		(double) x_elem[0] * a_elem[0] -
+		(double) x_elem[1] * a_elem[1];
+	      prod[1] =
+		(double) x_elem[0] * a_elem[1] +
+		(double) x_elem[1] * a_elem[0];
+	    }
+	    sum2[0] = sum2[0] + prod[0];
+	    sum2[1] = sum2[1] + prod[1];
+	    aij += incaij;
+	    jx += incx;
+	  }
+	}
+
+	{
+	  tmp1[0] =
+	    (double) sum1[0] * alpha_i[0] - (double) sum1[1] * alpha_i[1];
+	  tmp1[1] =
+	    (double) sum1[0] * alpha_i[1] + (double) sum1[1] * alpha_i[0];
+	}
+	{
+	  tmp2[0] =
+	    (double) sum2[0] * alpha_i[0] - (double) sum2[1] * alpha_i[1];
+	  tmp2[1] =
+	    (double) sum2[0] * alpha_i[1] + (double) sum2[1] * alpha_i[0];
+	}
+	tmp3[0] = tmp1[0] + tmp2[0];
+	tmp3[1] = tmp1[1] + tmp2[1];
+	y_elem[0] = y_i[iy];
+	y_elem[1] = y_i[iy + 1];
+	{
+	  tmp4[0] =
+	    (double) beta_i[0] * y_elem[0] - (double) beta_i[1] * y_elem[1];
+	  tmp4[1] =
+	    (double) beta_i[0] * y_elem[1] + (double) beta_i[1] * y_elem[0];
+	}
+	result[0] = tmp4[0] + tmp3[0];
+	result[1] = tmp4[1] + tmp3[1];
+	y_i[iy] = result[0];
+	y_i[iy + 1] = result[1];
+
+	iy += incy;
+	if (i >= lbound) {
+	  ix0 += incx;
+	  ai += incai2;
+	  la++;
+	} else {
+	  ai += incai1;
+	}
+	if (i < rbound) {
+	  ra++;
+	}
+      }
+
+
+    }
+    break;
+  case blas_prec_extra:
+    {
+      int iy0, iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny;
+      int incaij, aij, incai1, incai2, astart, ai;
+      float *y_i = (float *) y;
+      const float *a_i = (float *) a;
+      const float *head_x_i = (float *) head_x;
+      const float *tail_x_i = (float *) tail_x;
+      float *alpha_i = (float *) alpha;
+      float *beta_i = (float *) beta;
+      double head_tmp1[2], tail_tmp1[2];
+      double head_tmp2[2], tail_tmp2[2];
+      double head_tmp3[2], tail_tmp3[2];
+      double head_tmp4[2], tail_tmp4[2];
+      float result[2];
+      double head_sum1[2], tail_sum1[2];
+      double head_sum2[2], tail_sum2[2];
+      double head_prod[2], tail_prod[2];
+      float a_elem[2];
+      float x_elem[2];
+      float y_elem[2];
+      FPU_FIX_DECL;
+
+      if (order != blas_colmajor && order != blas_rowmajor)
+	BLAS_error(routine_name, -1, order, NULL);
+      if (trans != blas_no_trans &&
+	  trans != blas_trans && trans != blas_conj_trans) {
+	BLAS_error(routine_name, -2, trans, NULL);
+      }
+      if (m < 0)
+	BLAS_error(routine_name, -3, m, NULL);
+      if (n < 0)
+	BLAS_error(routine_name, -4, n, NULL);
+      if (kl < 0 || kl >= m)
+	BLAS_error(routine_name, -5, kl, NULL);
+      if (ku < 0 || ku >= n)
+	BLAS_error(routine_name, -6, ku, NULL);
+      if (lda < kl + ku + 1)
+	BLAS_error(routine_name, -9, lda, NULL);
+      if (incx == 0)
+	BLAS_error(routine_name, -12, incx, NULL);
+      if (incy == 0)
+	BLAS_error(routine_name, -15, incy, NULL);
+
+      if (m == 0 || n == 0)
+	return;
+      if ((alpha_i[0] == 0.0 && alpha_i[1] == 0.0)
+	  && ((beta_i[0] == 1.0 && beta_i[1] == 0.0)))
+	return;
+
+      if (trans == blas_no_trans) {
+	lenx = n;
+	leny = m;
+      } else {
+	lenx = m;
+	leny = n;
+      }
+
+      ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
+      iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;
+
+      FPU_FIX_START;
+
+      /* if alpha = 0, return y = y*beta */
+      if ((order == blas_colmajor) && (trans == blas_no_trans)) {
+	astart = ku;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
+	astart = ku;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
+	astart = kl;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else {			/* rowmajor and blas_trans */
+	astart = kl;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      }
+      incx *= 2;
+      incy *= 2;
+      incaij *= 2;
+      incai1 *= 2;
+      incai2 *= 2;
+      astart *= 2;
+      iy0 *= 2;
+      ix0 *= 2;
+
+      la = 0;
+      ai = astart;
+      iy = iy0;
+      for (i = 0; i < leny; i++) {
+	head_sum1[0] = head_sum1[1] = tail_sum1[0] = tail_sum1[1] = 0.0;
+	head_sum2[0] = head_sum2[1] = tail_sum2[0] = tail_sum2[1] = 0.0;
+	aij = ai;
+	jx = ix0;
+	if (trans != blas_conj_trans) {
+	  for (j = ra - la; j >= 0; j--) {
+	    x_elem[0] = head_x_i[jx];
+	    x_elem[1] = head_x_i[jx + 1];
+	    a_elem[0] = a_i[aij];
+	    a_elem[1] = a_i[aij + 1];
+	    {
+	      double head_e1, tail_e1;
+	      double d1;
+	      double d2;
+	      /* Real part */
+	      d1 = (double) x_elem[0] * a_elem[0];
+	      d2 = (double) -x_elem[1] * a_elem[1];
+	      {
+		/* Compute double-double = double + double. */
+		double e, t1, t2;
+
+		/* Knuth trick. */
+		t1 = d1 + d2;
+		e = t1 - d1;
+		t2 = ((d2 - e) + (d1 - (t1 - e)));
+
+		/* The result is t1 + t2, after normalization. */
+		head_e1 = t1 + t2;
+		tail_e1 = t2 - (head_e1 - t1);
+	      }
+	      head_prod[0] = head_e1;
+	      tail_prod[0] = tail_e1;
+	      /* imaginary part */
+	      d1 = (double) x_elem[0] * a_elem[1];
+	      d2 = (double) x_elem[1] * a_elem[0];
+	      {
+		/* Compute double-double = double + double. */
+		double e, t1, t2;
+
+		/* Knuth trick. */
+		t1 = d1 + d2;
+		e = t1 - d1;
+		t2 = ((d2 - e) + (d1 - (t1 - e)));
+
+		/* The result is t1 + t2, after normalization. */
+		head_e1 = t1 + t2;
+		tail_e1 = t2 - (head_e1 - t1);
+	      }
+	      head_prod[1] = head_e1;
+	      tail_prod[1] = tail_e1;
+	    }
+	    {
+	      double head_t, tail_t;
+	      double head_a, tail_a;
+	      double head_b, tail_b;
+	      /* Real part */
+	      head_a = head_sum1[0];
+	      tail_a = tail_sum1[0];
+	      head_b = head_prod[0];
+	      tail_b = tail_prod[0];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum1[0] = head_t;
+	      tail_sum1[0] = tail_t;
+	      /* Imaginary part */
+	      head_a = head_sum1[1];
+	      tail_a = tail_sum1[1];
+	      head_b = head_prod[1];
+	      tail_b = tail_prod[1];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum1[1] = head_t;
+	      tail_sum1[1] = tail_t;
+	    }
+	    x_elem[0] = tail_x_i[jx];
+	    x_elem[1] = tail_x_i[jx + 1];
+	    {
+	      double head_e1, tail_e1;
+	      double d1;
+	      double d2;
+	      /* Real part */
+	      d1 = (double) x_elem[0] * a_elem[0];
+	      d2 = (double) -x_elem[1] * a_elem[1];
+	      {
+		/* Compute double-double = double + double. */
+		double e, t1, t2;
+
+		/* Knuth trick. */
+		t1 = d1 + d2;
+		e = t1 - d1;
+		t2 = ((d2 - e) + (d1 - (t1 - e)));
+
+		/* The result is t1 + t2, after normalization. */
+		head_e1 = t1 + t2;
+		tail_e1 = t2 - (head_e1 - t1);
+	      }
+	      head_prod[0] = head_e1;
+	      tail_prod[0] = tail_e1;
+	      /* imaginary part */
+	      d1 = (double) x_elem[0] * a_elem[1];
+	      d2 = (double) x_elem[1] * a_elem[0];
+	      {
+		/* Compute double-double = double + double. */
+		double e, t1, t2;
+
+		/* Knuth trick. */
+		t1 = d1 + d2;
+		e = t1 - d1;
+		t2 = ((d2 - e) + (d1 - (t1 - e)));
+
+		/* The result is t1 + t2, after normalization. */
+		head_e1 = t1 + t2;
+		tail_e1 = t2 - (head_e1 - t1);
+	      }
+	      head_prod[1] = head_e1;
+	      tail_prod[1] = tail_e1;
+	    }
+	    {
+	      double head_t, tail_t;
+	      double head_a, tail_a;
+	      double head_b, tail_b;
+	      /* Real part */
+	      head_a = head_sum2[0];
+	      tail_a = tail_sum2[0];
+	      head_b = head_prod[0];
+	      tail_b = tail_prod[0];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum2[0] = head_t;
+	      tail_sum2[0] = tail_t;
+	      /* Imaginary part */
+	      head_a = head_sum2[1];
+	      tail_a = tail_sum2[1];
+	      head_b = head_prod[1];
+	      tail_b = tail_prod[1];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum2[1] = head_t;
+	      tail_sum2[1] = tail_t;
+	    }
+	    aij += incaij;
+	    jx += incx;
+	  }
+
+	} else {
+	  for (j = ra - la; j >= 0; j--) {
+	    x_elem[0] = head_x_i[jx];
+	    x_elem[1] = head_x_i[jx + 1];
+	    a_elem[0] = a_i[aij];
+	    a_elem[1] = a_i[aij + 1];
+	    a_elem[1] = -a_elem[1];
+	    {
+	      double head_e1, tail_e1;
+	      double d1;
+	      double d2;
+	      /* Real part */
+	      d1 = (double) x_elem[0] * a_elem[0];
+	      d2 = (double) -x_elem[1] * a_elem[1];
+	      {
+		/* Compute double-double = double + double. */
+		double e, t1, t2;
+
+		/* Knuth trick. */
+		t1 = d1 + d2;
+		e = t1 - d1;
+		t2 = ((d2 - e) + (d1 - (t1 - e)));
+
+		/* The result is t1 + t2, after normalization. */
+		head_e1 = t1 + t2;
+		tail_e1 = t2 - (head_e1 - t1);
+	      }
+	      head_prod[0] = head_e1;
+	      tail_prod[0] = tail_e1;
+	      /* imaginary part */
+	      d1 = (double) x_elem[0] * a_elem[1];
+	      d2 = (double) x_elem[1] * a_elem[0];
+	      {
+		/* Compute double-double = double + double. */
+		double e, t1, t2;
+
+		/* Knuth trick. */
+		t1 = d1 + d2;
+		e = t1 - d1;
+		t2 = ((d2 - e) + (d1 - (t1 - e)));
+
+		/* The result is t1 + t2, after normalization. */
+		head_e1 = t1 + t2;
+		tail_e1 = t2 - (head_e1 - t1);
+	      }
+	      head_prod[1] = head_e1;
+	      tail_prod[1] = tail_e1;
+	    }
+	    {
+	      double head_t, tail_t;
+	      double head_a, tail_a;
+	      double head_b, tail_b;
+	      /* Real part */
+	      head_a = head_sum1[0];
+	      tail_a = tail_sum1[0];
+	      head_b = head_prod[0];
+	      tail_b = tail_prod[0];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum1[0] = head_t;
+	      tail_sum1[0] = tail_t;
+	      /* Imaginary part */
+	      head_a = head_sum1[1];
+	      tail_a = tail_sum1[1];
+	      head_b = head_prod[1];
+	      tail_b = tail_prod[1];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum1[1] = head_t;
+	      tail_sum1[1] = tail_t;
+	    }
+	    x_elem[0] = tail_x_i[jx];
+	    x_elem[1] = tail_x_i[jx + 1];
+	    {
+	      double head_e1, tail_e1;
+	      double d1;
+	      double d2;
+	      /* Real part */
+	      d1 = (double) x_elem[0] * a_elem[0];
+	      d2 = (double) -x_elem[1] * a_elem[1];
+	      {
+		/* Compute double-double = double + double. */
+		double e, t1, t2;
+
+		/* Knuth trick. */
+		t1 = d1 + d2;
+		e = t1 - d1;
+		t2 = ((d2 - e) + (d1 - (t1 - e)));
+
+		/* The result is t1 + t2, after normalization. */
+		head_e1 = t1 + t2;
+		tail_e1 = t2 - (head_e1 - t1);
+	      }
+	      head_prod[0] = head_e1;
+	      tail_prod[0] = tail_e1;
+	      /* imaginary part */
+	      d1 = (double) x_elem[0] * a_elem[1];
+	      d2 = (double) x_elem[1] * a_elem[0];
+	      {
+		/* Compute double-double = double + double. */
+		double e, t1, t2;
+
+		/* Knuth trick. */
+		t1 = d1 + d2;
+		e = t1 - d1;
+		t2 = ((d2 - e) + (d1 - (t1 - e)));
+
+		/* The result is t1 + t2, after normalization. */
+		head_e1 = t1 + t2;
+		tail_e1 = t2 - (head_e1 - t1);
+	      }
+	      head_prod[1] = head_e1;
+	      tail_prod[1] = tail_e1;
+	    }
+	    {
+	      double head_t, tail_t;
+	      double head_a, tail_a;
+	      double head_b, tail_b;
+	      /* Real part */
+	      head_a = head_sum2[0];
+	      tail_a = tail_sum2[0];
+	      head_b = head_prod[0];
+	      tail_b = tail_prod[0];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum2[0] = head_t;
+	      tail_sum2[0] = tail_t;
+	      /* Imaginary part */
+	      head_a = head_sum2[1];
+	      tail_a = tail_sum2[1];
+	      head_b = head_prod[1];
+	      tail_b = tail_prod[1];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum2[1] = head_t;
+	      tail_sum2[1] = tail_t;
+	    }
+	    aij += incaij;
+	    jx += incx;
+	  }
+	}
+
+	{
+	  double cd[2];
+	  cd[0] = (double) alpha_i[0];
+	  cd[1] = (double) alpha_i[1];
+	  {
+	    /* Compute complex-extra = complex-extra * complex-double. */
+	    double head_a0, tail_a0;
+	    double head_a1, tail_a1;
+	    double head_t1, tail_t1;
+	    double head_t2, tail_t2;
+	    head_a0 = head_sum1[0];
+	    tail_a0 = tail_sum1[0];
+	    head_a1 = head_sum1[1];
+	    tail_a1 = tail_sum1[1];
+	    /* real part */
+	    {
+	      /* Compute double-double = double-double * double. */
+	      double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	      con = head_a0 * split;
+	      a11 = con - head_a0;
+	      a11 = con - a11;
+	      a21 = head_a0 - a11;
+	      con = cd[0] * split;
+	      b1 = con - cd[0];
+	      b1 = con - b1;
+	      b2 = cd[0] - b1;
+
+	      c11 = head_a0 * cd[0];
+	      c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	      c2 = tail_a0 * cd[0];
+	      t1 = c11 + c2;
+	      t2 = (c2 - (t1 - c11)) + c21;
+
+	      head_t1 = t1 + t2;
+	      tail_t1 = t2 - (head_t1 - t1);
+	    }
+	    {
+	      /* Compute double-double = double-double * double. */
+	      double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	      con = head_a1 * split;
+	      a11 = con - head_a1;
+	      a11 = con - a11;
+	      a21 = head_a1 - a11;
+	      con = cd[1] * split;
+	      b1 = con - cd[1];
+	      b1 = con - b1;
+	      b2 = cd[1] - b1;
+
+	      c11 = head_a1 * cd[1];
+	      c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	      c2 = tail_a1 * cd[1];
+	      t1 = c11 + c2;
+	      t2 = (c2 - (t1 - c11)) + c21;
+
+	      head_t2 = t1 + t2;
+	      tail_t2 = t2 - (head_t2 - t1);
+	    }
+	    head_t2 = -head_t2;
+	    tail_t2 = -tail_t2;
+	    {
+	      /* Compute double-double = double-double + double-double. */
+	      double bv;
+	      double s1, s2, t1, t2;
+
+	      /* Add two hi words. */
+	      s1 = head_t1 + head_t2;
+	      bv = s1 - head_t1;
+	      s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+	      /* Add two lo words. */
+	      t1 = tail_t1 + tail_t2;
+	      bv = t1 - tail_t1;
+	      t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+	      s2 += t1;
+
+	      /* Renormalize (s1, s2)  to  (t1, s2) */
+	      t1 = s1 + s2;
+	      s2 = s2 - (t1 - s1);
+
+	      t2 += s2;
+
+	      /* Renormalize (t1, t2)  */
+	      head_t1 = t1 + t2;
+	      tail_t1 = t2 - (head_t1 - t1);
+	    }
+	    head_tmp1[0] = head_t1;
+	    tail_tmp1[0] = tail_t1;
+	    /* imaginary part */
+	    {
+	      /* Compute double-double = double-double * double. */
+	      double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	      con = head_a1 * split;
+	      a11 = con - head_a1;
+	      a11 = con - a11;
+	      a21 = head_a1 - a11;
+	      con = cd[0] * split;
+	      b1 = con - cd[0];
+	      b1 = con - b1;
+	      b2 = cd[0] - b1;
+
+	      c11 = head_a1 * cd[0];
+	      c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	      c2 = tail_a1 * cd[0];
+	      t1 = c11 + c2;
+	      t2 = (c2 - (t1 - c11)) + c21;
+
+	      head_t1 = t1 + t2;
+	      tail_t1 = t2 - (head_t1 - t1);
+	    }
+	    {
+	      /* Compute double-double = double-double * double. */
+	      double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	      con = head_a0 * split;
+	      a11 = con - head_a0;
+	      a11 = con - a11;
+	      a21 = head_a0 - a11;
+	      con = cd[1] * split;
+	      b1 = con - cd[1];
+	      b1 = con - b1;
+	      b2 = cd[1] - b1;
+
+	      c11 = head_a0 * cd[1];
+	      c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	      c2 = tail_a0 * cd[1];
+	      t1 = c11 + c2;
+	      t2 = (c2 - (t1 - c11)) + c21;
+
+	      head_t2 = t1 + t2;
+	      tail_t2 = t2 - (head_t2 - t1);
+	    }
+	    {
+	      /* Compute double-double = double-double + double-double. */
+	      double bv;
+	      double s1, s2, t1, t2;
+
+	      /* Add two hi words. */
+	      s1 = head_t1 + head_t2;
+	      bv = s1 - head_t1;
+	      s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+	      /* Add two lo words. */
+	      t1 = tail_t1 + tail_t2;
+	      bv = t1 - tail_t1;
+	      t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+	      s2 += t1;
+
+	      /* Renormalize (s1, s2)  to  (t1, s2) */
+	      t1 = s1 + s2;
+	      s2 = s2 - (t1 - s1);
+
+	      t2 += s2;
+
+	      /* Renormalize (t1, t2)  */
+	      head_t1 = t1 + t2;
+	      tail_t1 = t2 - (head_t1 - t1);
+	    }
+	    head_tmp1[1] = head_t1;
+	    tail_tmp1[1] = tail_t1;
+	  }
+
+	}
+	{
+	  double cd[2];
+	  cd[0] = (double) alpha_i[0];
+	  cd[1] = (double) alpha_i[1];
+	  {
+	    /* Compute complex-extra = complex-extra * complex-double. */
+	    double head_a0, tail_a0;
+	    double head_a1, tail_a1;
+	    double head_t1, tail_t1;
+	    double head_t2, tail_t2;
+	    head_a0 = head_sum2[0];
+	    tail_a0 = tail_sum2[0];
+	    head_a1 = head_sum2[1];
+	    tail_a1 = tail_sum2[1];
+	    /* real part */
+	    {
+	      /* Compute double-double = double-double * double. */
+	      double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	      con = head_a0 * split;
+	      a11 = con - head_a0;
+	      a11 = con - a11;
+	      a21 = head_a0 - a11;
+	      con = cd[0] * split;
+	      b1 = con - cd[0];
+	      b1 = con - b1;
+	      b2 = cd[0] - b1;
+
+	      c11 = head_a0 * cd[0];
+	      c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	      c2 = tail_a0 * cd[0];
+	      t1 = c11 + c2;
+	      t2 = (c2 - (t1 - c11)) + c21;
+
+	      head_t1 = t1 + t2;
+	      tail_t1 = t2 - (head_t1 - t1);
+	    }
+	    {
+	      /* Compute double-double = double-double * double. */
+	      double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	      con = head_a1 * split;
+	      a11 = con - head_a1;
+	      a11 = con - a11;
+	      a21 = head_a1 - a11;
+	      con = cd[1] * split;
+	      b1 = con - cd[1];
+	      b1 = con - b1;
+	      b2 = cd[1] - b1;
+
+	      c11 = head_a1 * cd[1];
+	      c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	      c2 = tail_a1 * cd[1];
+	      t1 = c11 + c2;
+	      t2 = (c2 - (t1 - c11)) + c21;
+
+	      head_t2 = t1 + t2;
+	      tail_t2 = t2 - (head_t2 - t1);
+	    }
+	    head_t2 = -head_t2;
+	    tail_t2 = -tail_t2;
+	    {
+	      /* Compute double-double = double-double + double-double. */
+	      double bv;
+	      double s1, s2, t1, t2;
+
+	      /* Add two hi words. */
+	      s1 = head_t1 + head_t2;
+	      bv = s1 - head_t1;
+	      s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+	      /* Add two lo words. */
+	      t1 = tail_t1 + tail_t2;
+	      bv = t1 - tail_t1;
+	      t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+	      s2 += t1;
+
+	      /* Renormalize (s1, s2)  to  (t1, s2) */
+	      t1 = s1 + s2;
+	      s2 = s2 - (t1 - s1);
+
+	      t2 += s2;
+
+	      /* Renormalize (t1, t2)  */
+	      head_t1 = t1 + t2;
+	      tail_t1 = t2 - (head_t1 - t1);
+	    }
+	    head_tmp2[0] = head_t1;
+	    tail_tmp2[0] = tail_t1;
+	    /* imaginary part */
+	    {
+	      /* Compute double-double = double-double * double. */
+	      double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	      con = head_a1 * split;
+	      a11 = con - head_a1;
+	      a11 = con - a11;
+	      a21 = head_a1 - a11;
+	      con = cd[0] * split;
+	      b1 = con - cd[0];
+	      b1 = con - b1;
+	      b2 = cd[0] - b1;
+
+	      c11 = head_a1 * cd[0];
+	      c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	      c2 = tail_a1 * cd[0];
+	      t1 = c11 + c2;
+	      t2 = (c2 - (t1 - c11)) + c21;
+
+	      head_t1 = t1 + t2;
+	      tail_t1 = t2 - (head_t1 - t1);
+	    }
+	    {
+	      /* Compute double-double = double-double * double. */
+	      double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	      con = head_a0 * split;
+	      a11 = con - head_a0;
+	      a11 = con - a11;
+	      a21 = head_a0 - a11;
+	      con = cd[1] * split;
+	      b1 = con - cd[1];
+	      b1 = con - b1;
+	      b2 = cd[1] - b1;
+
+	      c11 = head_a0 * cd[1];
+	      c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	      c2 = tail_a0 * cd[1];
+	      t1 = c11 + c2;
+	      t2 = (c2 - (t1 - c11)) + c21;
+
+	      head_t2 = t1 + t2;
+	      tail_t2 = t2 - (head_t2 - t1);
+	    }
+	    {
+	      /* Compute double-double = double-double + double-double. */
+	      double bv;
+	      double s1, s2, t1, t2;
+
+	      /* Add two hi words. */
+	      s1 = head_t1 + head_t2;
+	      bv = s1 - head_t1;
+	      s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+	      /* Add two lo words. */
+	      t1 = tail_t1 + tail_t2;
+	      bv = t1 - tail_t1;
+	      t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+	      s2 += t1;
+
+	      /* Renormalize (s1, s2)  to  (t1, s2) */
+	      t1 = s1 + s2;
+	      s2 = s2 - (t1 - s1);
+
+	      t2 += s2;
+
+	      /* Renormalize (t1, t2)  */
+	      head_t1 = t1 + t2;
+	      tail_t1 = t2 - (head_t1 - t1);
+	    }
+	    head_tmp2[1] = head_t1;
+	    tail_tmp2[1] = tail_t1;
+	  }
+
+	}
+	{
+	  double head_t, tail_t;
+	  double head_a, tail_a;
+	  double head_b, tail_b;
+	  /* Real part */
+	  head_a = head_tmp1[0];
+	  tail_a = tail_tmp1[0];
+	  head_b = head_tmp2[0];
+	  tail_b = tail_tmp2[0];
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_a + head_b;
+	    bv = s1 - head_a;
+	    s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_a + tail_b;
+	    bv = t1 - tail_a;
+	    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t = t1 + t2;
+	    tail_t = t2 - (head_t - t1);
+	  }
+	  head_tmp3[0] = head_t;
+	  tail_tmp3[0] = tail_t;
+	  /* Imaginary part */
+	  head_a = head_tmp1[1];
+	  tail_a = tail_tmp1[1];
+	  head_b = head_tmp2[1];
+	  tail_b = tail_tmp2[1];
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_a + head_b;
+	    bv = s1 - head_a;
+	    s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_a + tail_b;
+	    bv = t1 - tail_a;
+	    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t = t1 + t2;
+	    tail_t = t2 - (head_t - t1);
+	  }
+	  head_tmp3[1] = head_t;
+	  tail_tmp3[1] = tail_t;
+	}
+	y_elem[0] = y_i[iy];
+	y_elem[1] = y_i[iy + 1];
+	{
+	  double head_e1, tail_e1;
+	  double d1;
+	  double d2;
+	  /* Real part */
+	  d1 = (double) beta_i[0] * y_elem[0];
+	  d2 = (double) -beta_i[1] * y_elem[1];
+	  {
+	    /* Compute double-double = double + double. */
+	    double e, t1, t2;
+
+	    /* Knuth trick. */
+	    t1 = d1 + d2;
+	    e = t1 - d1;
+	    t2 = ((d2 - e) + (d1 - (t1 - e)));
+
+	    /* The result is t1 + t2, after normalization. */
+	    head_e1 = t1 + t2;
+	    tail_e1 = t2 - (head_e1 - t1);
+	  }
+	  head_tmp4[0] = head_e1;
+	  tail_tmp4[0] = tail_e1;
+	  /* imaginary part */
+	  d1 = (double) beta_i[0] * y_elem[1];
+	  d2 = (double) beta_i[1] * y_elem[0];
+	  {
+	    /* Compute double-double = double + double. */
+	    double e, t1, t2;
+
+	    /* Knuth trick. */
+	    t1 = d1 + d2;
+	    e = t1 - d1;
+	    t2 = ((d2 - e) + (d1 - (t1 - e)));
+
+	    /* The result is t1 + t2, after normalization. */
+	    head_e1 = t1 + t2;
+	    tail_e1 = t2 - (head_e1 - t1);
+	  }
+	  head_tmp4[1] = head_e1;
+	  tail_tmp4[1] = tail_e1;
+	}
+	{
+	  double head_a, tail_a;
+	  double head_b, tail_b;
+	  /* Real part */
+	  head_a = head_tmp4[0];
+	  tail_a = tail_tmp4[0];
+	  head_b = head_tmp3[0];
+	  tail_b = tail_tmp3[0];
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_a + head_b;
+	    bv = s1 - head_a;
+	    s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_a + tail_b;
+	    bv = t1 - tail_a;
+	    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    result[0] = t1 + t2;
+	  }
+	  /* Imaginary part */
+	  head_a = head_tmp4[1];
+	  tail_a = tail_tmp4[1];
+	  head_b = head_tmp3[1];
+	  tail_b = tail_tmp3[1];
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_a + head_b;
+	    bv = s1 - head_a;
+	    s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_a + tail_b;
+	    bv = t1 - tail_a;
+	    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    result[1] = t1 + t2;
+	  }
+	}
+	y_i[iy] = result[0];
+	y_i[iy + 1] = result[1];
+
+	iy += incy;
+	if (i >= lbound) {
+	  ix0 += incx;
+	  ai += incai2;
+	  la++;
+	} else {
+	  ai += incai1;
+	}
+	if (i < rbound) {
+	  ra++;
+	}
+      }
+
+      FPU_FIX_STOP;
+    }
+    break;
+  }
+}				/* end BLAS_cgbmv2_x */
diff --git a/XBLAS/src/gbmv2/BLAS_dgbmv2_d_s-f2c.c b/XBLAS/src/gbmv2/BLAS_dgbmv2_d_s-f2c.c
new file mode 100644
index 00000000..52ed8941
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_dgbmv2_d_s-f2c.c
@@ -0,0 +1,19 @@
+
+#include "f2c-bridge.h"
+#include "blas_enum.h"
+void BLAS_dgbmv2_d_s(enum blas_order_type order, enum blas_trans_type trans,
+		     int m, int n, int kl, int ku, double alpha,
+		     const double *a, int lda, const float *head_x,
+		     const float *tail_x, int incx, double beta,
+		     double *y, int incy);
+
+
+extern void FC_FUNC_(blas_dgbmv2_d_s, BLAS_DGBMV2_D_S)
+ 
+  (int *trans, int *m, int *n, int *kl, int *ku, double *alpha,
+   const double *a, int *lda, const float *head_x, const float *tail_x,
+   int *incx, double *beta, double *y, int *incy) {
+  BLAS_dgbmv2_d_s(blas_colmajor, (enum blas_trans_type) *trans, *m, *n, *kl,
+		  *ku, *alpha, a, *lda, head_x, tail_x, *incx, *beta, y,
+		  *incy);
+}
diff --git a/XBLAS/src/gbmv2/BLAS_dgbmv2_d_s.c b/XBLAS/src/gbmv2/BLAS_dgbmv2_d_s.c
new file mode 100644
index 00000000..0c70c784
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_dgbmv2_d_s.c
@@ -0,0 +1,254 @@
+#include "blas_extended.h"
+#include "blas_extended_private.h"
+void BLAS_dgbmv2_d_s(enum blas_order_type order, enum blas_trans_type trans,
+		     int m, int n, int kl, int ku, double alpha,
+		     const double *a, int lda, const float *head_x,
+		     const float *tail_x, int incx, double beta,
+		     double *y, int incy)
+
+/*           
+ * Purpose
+ * =======
+ *
+ * This routines computes the matrix product:
+ *
+ *     y  <-  alpha * op(A) * (x_head + x_tail) + beta * y
+ * 
+ * where 
+ *
+ *  A is a m x n banded matrix
+ *  x is a n x 1 vector
+ *  y is a m x 1 vector
+ *  alpha and beta are scalars 
+ *   
+ * Arguments
+ * =========
+ *
+ * order        (input) blas_order_type
+ *              Order of AB; row or column major
+ *
+ * trans        (input) blas_trans_type
+ *              Transpose of AB; no trans, 
+ *              trans, or conjugate trans
+ *
+ * m            (input) int
+ *              Dimension of AB
+ *
+ * n            (input) int
+ *              Dimension of AB and the length of vector x and z
+ *
+ * kl           (input) int 
+ *              Number of lower diagnols of AB
+ *
+ * ku           (input) int
+ *              Number of upper diagnols of AB
+ *
+ * alpha        (input) double
+ *              
+ * AB           (input) double*
+ *
+ * lda          (input) int 
+ *              Leading dimension of AB
+ *              lda >= ku + kl + 1
+ *
+ * head_x
+ * tail_x       (input) float*
+ * 
+ * incx         (input) int
+ *              The stride for vector x.
+ *
+ * beta         (input) double
+ *
+ * y            (input) const double*
+ *
+ * incy         (input) int
+ *              The stride for vector y.
+ * 
+ *
+ * LOCAL VARIABLES 
+ * ===============
+ * 
+ *  As an example, these variables are described on the mxn, column 
+ *  major, banded matrix described in section 2.2.3 of the specification  
+ *
+ *  astart      indexes first element in A where computation begins
+ *
+ *  incai1      indexes first element in row where row is less than lbound
+ * 
+ *  incai2      indexes first element in row where row exceeds lbound
+ *   
+ *  lbound      denotes the number of rows before  first element shifts 
+ *
+ *  rbound      denotes the columns where there is blank space
+ *   
+ *  ra          index of the rightmost element for a given row
+ *  
+ *  la          index of leftmost  elements for a given row
+ *
+ *  ra - la     width of a row
+ *
+ *                        rbound 
+ *            la   ra    ____|_____ 
+ *             |    |   |          |
+ *         |  a00  a01   *    *   *
+ * lbound -|  a10  a11  a12   *   *
+ *         |  a20  a21  a22  a23  *
+ *             *   a31  a32  a33 a34
+ *             *    *   a42  a43 a44
+ *
+ *  Varations on order and transpose have been implemented by modifying these
+ *  local variables. 
+ *
+ */
+{
+  static const char routine_name[] = "BLAS_dgbmv2_d_s";
+
+  int iy0, iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny;
+  int incaij, aij, incai1, incai2, astart, ai;
+  double *y_i = y;
+  const double *a_i = a;
+  const float *head_x_i = head_x;
+  const float *tail_x_i = tail_x;
+  double alpha_i = alpha;
+  double beta_i = beta;
+  double tmp1;
+  double tmp2;
+  double tmp3;
+  double tmp4;
+  double result;
+  double sum1;
+  double sum2;
+  double prod;
+  double a_elem;
+  float x_elem;
+  double y_elem;
+
+
+  if (order != blas_colmajor && order != blas_rowmajor)
+    BLAS_error(routine_name, -1, order, NULL);
+  if (trans != blas_no_trans &&
+      trans != blas_trans && trans != blas_conj_trans) {
+    BLAS_error(routine_name, -2, trans, NULL);
+  }
+  if (m < 0)
+    BLAS_error(routine_name, -3, m, NULL);
+  if (n < 0)
+    BLAS_error(routine_name, -4, n, NULL);
+  if (kl < 0 || kl >= m)
+    BLAS_error(routine_name, -5, kl, NULL);
+  if (ku < 0 || ku >= n)
+    BLAS_error(routine_name, -6, ku, NULL);
+  if (lda < kl + ku + 1)
+    BLAS_error(routine_name, -9, lda, NULL);
+  if (incx == 0)
+    BLAS_error(routine_name, -12, incx, NULL);
+  if (incy == 0)
+    BLAS_error(routine_name, -15, incy, NULL);
+
+  if (m == 0 || n == 0)
+    return;
+  if ((alpha_i == 0.0) && (beta_i == 1.0))
+    return;
+
+  if (trans == blas_no_trans) {
+    lenx = n;
+    leny = m;
+  } else {
+    lenx = m;
+    leny = n;
+  }
+
+  ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
+  iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;
+
+
+
+  /* if alpha = 0, return y = y*beta */
+  if ((order == blas_colmajor) && (trans == blas_no_trans)) {
+    astart = ku;
+    incai1 = 1;
+    incai2 = lda;
+    incaij = lda - 1;
+    lbound = kl;
+    rbound = n - ku - 1;
+    ra = ku;
+  } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
+    astart = ku;
+    incai1 = lda - 1;
+    incai2 = lda;
+    incaij = 1;
+    lbound = ku;
+    rbound = m - kl - 1;
+    ra = kl;
+  } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
+    astart = kl;
+    incai1 = lda - 1;
+    incai2 = lda;
+    incaij = 1;
+    lbound = kl;
+    rbound = n - ku - 1;
+    ra = ku;
+  } else {			/* rowmajor and blas_trans */
+    astart = kl;
+    incai1 = 1;
+    incai2 = lda;
+    incaij = lda - 1;
+    lbound = ku;
+    rbound = m - kl - 1;
+    ra = kl;
+  }
+
+
+
+
+
+
+
+
+
+  la = 0;
+  ai = astart;
+  iy = iy0;
+  for (i = 0; i < leny; i++) {
+    sum1 = 0.0;
+    sum2 = 0.0;
+    aij = ai;
+    jx = ix0;
+
+    for (j = ra - la; j >= 0; j--) {
+      x_elem = head_x_i[jx];
+      a_elem = a_i[aij];
+      prod = x_elem * a_elem;
+      sum1 = sum1 + prod;
+      x_elem = tail_x_i[jx];
+      prod = x_elem * a_elem;
+      sum2 = sum2 + prod;
+      aij += incaij;
+      jx += incx;
+    }
+
+
+    tmp1 = sum1 * alpha_i;
+    tmp2 = sum2 * alpha_i;
+    tmp3 = tmp1 + tmp2;
+    y_elem = y_i[iy];
+    tmp4 = beta_i * y_elem;
+    result = tmp4 + tmp3;
+    y_i[iy] = result;
+
+    iy += incy;
+    if (i >= lbound) {
+      ix0 += incx;
+      ai += incai2;
+      la++;
+    } else {
+      ai += incai1;
+    }
+    if (i < rbound) {
+      ra++;
+    }
+  }
+
+
+
+}				/* end BLAS_dgbmv2_d_s */
diff --git a/XBLAS/src/gbmv2/BLAS_dgbmv2_d_s_x-f2c.c b/XBLAS/src/gbmv2/BLAS_dgbmv2_d_s_x-f2c.c
new file mode 100644
index 00000000..bca44d58
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_dgbmv2_d_s_x-f2c.c
@@ -0,0 +1,19 @@
+
+#include "f2c-bridge.h"
+#include "blas_enum.h"
+void BLAS_dgbmv2_d_s_x(enum blas_order_type order, enum blas_trans_type trans,
+		       int m, int n, int kl, int ku, double alpha,
+		       const double *a, int lda, const float *head_x,
+		       const float *tail_x, int incx, double beta,
+		       double *y, int incy, enum blas_prec_type prec);
+
+
+extern void FC_FUNC_(blas_dgbmv2_d_s_x, BLAS_DGBMV2_D_S_X)
+ 
+  (int *trans, int *m, int *n, int *kl, int *ku, double *alpha,
+   const double *a, int *lda, const float *head_x, const float *tail_x,
+   int *incx, double *beta, double *y, int *incy, int *prec) {
+  BLAS_dgbmv2_d_s_x(blas_colmajor, (enum blas_trans_type) *trans, *m, *n, *kl,
+		    *ku, *alpha, a, *lda, head_x, tail_x, *incx, *beta, y,
+		    *incy, (enum blas_prec_type) *prec);
+}
diff --git a/XBLAS/src/gbmv2/BLAS_dgbmv2_d_s_x.c b/XBLAS/src/gbmv2/BLAS_dgbmv2_d_s_x.c
new file mode 100644
index 00000000..bd27105e
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_dgbmv2_d_s_x.c
@@ -0,0 +1,620 @@
+#include "blas_extended.h"
+#include "blas_extended_private.h"
+void BLAS_dgbmv2_d_s_x(enum blas_order_type order, enum blas_trans_type trans,
+		       int m, int n, int kl, int ku, double alpha,
+		       const double *a, int lda, const float *head_x,
+		       const float *tail_x, int incx, double beta,
+		       double *y, int incy, enum blas_prec_type prec)
+
+/*           
+ * Purpose
+ * =======
+ *
+ * This routines computes the matrix product:
+ *
+ *     y  <-  alpha * op(A) * (x_head + x_tail) + beta * y
+ * 
+ * where 
+ *
+ *  A is a m x n banded matrix
+ *  x is a n x 1 vector
+ *  y is a m x 1 vector
+ *  alpha and beta are scalars 
+ *   
+ * Arguments
+ * =========
+ *
+ * order        (input) blas_order_type
+ *              Order of AB; row or column major
+ *
+ * trans        (input) blas_trans_type
+ *              Transpose of AB; no trans, 
+ *              trans, or conjugate trans
+ *
+ * m            (input) int
+ *              Dimension of AB
+ *
+ * n            (input) int
+ *              Dimension of AB and the length of vector x and z
+ *
+ * kl           (input) int 
+ *              Number of lower diagnols of AB
+ *
+ * ku           (input) int
+ *              Number of upper diagnols of AB
+ *
+ * alpha        (input) double
+ *              
+ * AB           (input) double*
+ *
+ * lda          (input) int 
+ *              Leading dimension of AB
+ *              lda >= ku + kl + 1
+ *
+ * head_x
+ * tail_x       (input) float*
+ * 
+ * incx         (input) int
+ *              The stride for vector x.
+ *
+ * beta         (input) double
+ *
+ * y            (input) const double*
+ *
+ * incy         (input) int
+ *              The stride for vector y.
+ * 
+ * prec   (input) enum blas_prec_type
+ *        Specifies the internal precision to be used.
+ *        = blas_prec_single: single precision.
+ *        = blas_prec_double: double precision.
+ *        = blas_prec_extra : anything at least 1.5 times as accurate
+ *                            than double, and wider than 80-bits.
+ *                            We use double-double in our implementation.
+ *
+ *
+ * LOCAL VARIABLES 
+ * ===============
+ * 
+ *  As an example, these variables are described on the mxn, column 
+ *  major, banded matrix described in section 2.2.3 of the specification  
+ *
+ *  astart      indexes first element in A where computation begins
+ *
+ *  incai1      indexes first element in row where row is less than lbound
+ * 
+ *  incai2      indexes first element in row where row exceeds lbound
+ *   
+ *  lbound      denotes the number of rows before  first element shifts 
+ *
+ *  rbound      denotes the columns where there is blank space
+ *   
+ *  ra          index of the rightmost element for a given row
+ *  
+ *  la          index of leftmost  elements for a given row
+ *
+ *  ra - la     width of a row
+ *
+ *                        rbound 
+ *            la   ra    ____|_____ 
+ *             |    |   |          |
+ *         |  a00  a01   *    *   *
+ * lbound -|  a10  a11  a12   *   *
+ *         |  a20  a21  a22  a23  *
+ *             *   a31  a32  a33 a34
+ *             *    *   a42  a43 a44
+ *
+ *  Varations on order and transpose have been implemented by modifying these
+ *  local variables. 
+ *
+ */
+{
+  static const char routine_name[] = "BLAS_dgbmv2_d_s_x";
+
+  switch (prec) {
+  case blas_prec_single:
+  case blas_prec_double:
+  case blas_prec_indigenous:
+    {
+      int iy0, iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny;
+      int incaij, aij, incai1, incai2, astart, ai;
+      double *y_i = y;
+      const double *a_i = a;
+      const float *head_x_i = head_x;
+      const float *tail_x_i = tail_x;
+      double alpha_i = alpha;
+      double beta_i = beta;
+      double tmp1;
+      double tmp2;
+      double tmp3;
+      double tmp4;
+      double result;
+      double sum1;
+      double sum2;
+      double prod;
+      double a_elem;
+      float x_elem;
+      double y_elem;
+
+
+      if (order != blas_colmajor && order != blas_rowmajor)
+	BLAS_error(routine_name, -1, order, NULL);
+      if (trans != blas_no_trans &&
+	  trans != blas_trans && trans != blas_conj_trans) {
+	BLAS_error(routine_name, -2, trans, NULL);
+      }
+      if (m < 0)
+	BLAS_error(routine_name, -3, m, NULL);
+      if (n < 0)
+	BLAS_error(routine_name, -4, n, NULL);
+      if (kl < 0 || kl >= m)
+	BLAS_error(routine_name, -5, kl, NULL);
+      if (ku < 0 || ku >= n)
+	BLAS_error(routine_name, -6, ku, NULL);
+      if (lda < kl + ku + 1)
+	BLAS_error(routine_name, -9, lda, NULL);
+      if (incx == 0)
+	BLAS_error(routine_name, -12, incx, NULL);
+      if (incy == 0)
+	BLAS_error(routine_name, -15, incy, NULL);
+
+      if (m == 0 || n == 0)
+	return;
+      if ((alpha_i == 0.0) && (beta_i == 1.0))
+	return;
+
+      if (trans == blas_no_trans) {
+	lenx = n;
+	leny = m;
+      } else {
+	lenx = m;
+	leny = n;
+      }
+
+      ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
+      iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;
+
+
+
+      /* if alpha = 0, return y = y*beta */
+      if ((order == blas_colmajor) && (trans == blas_no_trans)) {
+	astart = ku;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
+	astart = ku;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
+	astart = kl;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else {			/* rowmajor and blas_trans */
+	astart = kl;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      }
+
+
+
+
+
+
+
+
+
+      la = 0;
+      ai = astart;
+      iy = iy0;
+      for (i = 0; i < leny; i++) {
+	sum1 = 0.0;
+	sum2 = 0.0;
+	aij = ai;
+	jx = ix0;
+
+	for (j = ra - la; j >= 0; j--) {
+	  x_elem = head_x_i[jx];
+	  a_elem = a_i[aij];
+	  prod = x_elem * a_elem;
+	  sum1 = sum1 + prod;
+	  x_elem = tail_x_i[jx];
+	  prod = x_elem * a_elem;
+	  sum2 = sum2 + prod;
+	  aij += incaij;
+	  jx += incx;
+	}
+
+
+	tmp1 = sum1 * alpha_i;
+	tmp2 = sum2 * alpha_i;
+	tmp3 = tmp1 + tmp2;
+	y_elem = y_i[iy];
+	tmp4 = beta_i * y_elem;
+	result = tmp4 + tmp3;
+	y_i[iy] = result;
+
+	iy += incy;
+	if (i >= lbound) {
+	  ix0 += incx;
+	  ai += incai2;
+	  la++;
+	} else {
+	  ai += incai1;
+	}
+	if (i < rbound) {
+	  ra++;
+	}
+      }
+
+
+    }
+    break;
+  case blas_prec_extra:
+    {
+      int iy0, iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny;
+      int incaij, aij, incai1, incai2, astart, ai;
+      double *y_i = y;
+      const double *a_i = a;
+      const float *head_x_i = head_x;
+      const float *tail_x_i = tail_x;
+      double alpha_i = alpha;
+      double beta_i = beta;
+      double head_tmp1, tail_tmp1;
+      double head_tmp2, tail_tmp2;
+      double head_tmp3, tail_tmp3;
+      double head_tmp4, tail_tmp4;
+      double result;
+      double head_sum1, tail_sum1;
+      double head_sum2, tail_sum2;
+      double head_prod, tail_prod;
+      double a_elem;
+      float x_elem;
+      double y_elem;
+      FPU_FIX_DECL;
+
+      if (order != blas_colmajor && order != blas_rowmajor)
+	BLAS_error(routine_name, -1, order, NULL);
+      if (trans != blas_no_trans &&
+	  trans != blas_trans && trans != blas_conj_trans) {
+	BLAS_error(routine_name, -2, trans, NULL);
+      }
+      if (m < 0)
+	BLAS_error(routine_name, -3, m, NULL);
+      if (n < 0)
+	BLAS_error(routine_name, -4, n, NULL);
+      if (kl < 0 || kl >= m)
+	BLAS_error(routine_name, -5, kl, NULL);
+      if (ku < 0 || ku >= n)
+	BLAS_error(routine_name, -6, ku, NULL);
+      if (lda < kl + ku + 1)
+	BLAS_error(routine_name, -9, lda, NULL);
+      if (incx == 0)
+	BLAS_error(routine_name, -12, incx, NULL);
+      if (incy == 0)
+	BLAS_error(routine_name, -15, incy, NULL);
+
+      if (m == 0 || n == 0)
+	return;
+      if ((alpha_i == 0.0) && (beta_i == 1.0))
+	return;
+
+      if (trans == blas_no_trans) {
+	lenx = n;
+	leny = m;
+      } else {
+	lenx = m;
+	leny = n;
+      }
+
+      ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
+      iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;
+
+      FPU_FIX_START;
+
+      /* if alpha = 0, return y = y*beta */
+      if ((order == blas_colmajor) && (trans == blas_no_trans)) {
+	astart = ku;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
+	astart = ku;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
+	astart = kl;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else {			/* rowmajor and blas_trans */
+	astart = kl;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      }
+
+
+
+
+
+
+
+
+
+      la = 0;
+      ai = astart;
+      iy = iy0;
+      for (i = 0; i < leny; i++) {
+	head_sum1 = tail_sum1 = 0.0;
+	head_sum2 = tail_sum2 = 0.0;
+	aij = ai;
+	jx = ix0;
+
+	for (j = ra - la; j >= 0; j--) {
+	  x_elem = head_x_i[jx];
+	  a_elem = a_i[aij];
+	  {
+	    double dt = (double) x_elem;
+	    {
+	      /* Compute double_double = double * double. */
+	      double a1, a2, b1, b2, con;
+
+	      con = dt * split;
+	      a1 = con - dt;
+	      a1 = con - a1;
+	      a2 = dt - a1;
+	      con = a_elem * split;
+	      b1 = con - a_elem;
+	      b1 = con - b1;
+	      b2 = a_elem - b1;
+
+	      head_prod = dt * a_elem;
+	      tail_prod =
+		(((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2;
+	    }
+	  }
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_sum1 + head_prod;
+	    bv = s1 - head_sum1;
+	    s2 = ((head_prod - bv) + (head_sum1 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_sum1 + tail_prod;
+	    bv = t1 - tail_sum1;
+	    t2 = ((tail_prod - bv) + (tail_sum1 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_sum1 = t1 + t2;
+	    tail_sum1 = t2 - (head_sum1 - t1);
+	  }
+	  x_elem = tail_x_i[jx];
+	  {
+	    double dt = (double) x_elem;
+	    {
+	      /* Compute double_double = double * double. */
+	      double a1, a2, b1, b2, con;
+
+	      con = dt * split;
+	      a1 = con - dt;
+	      a1 = con - a1;
+	      a2 = dt - a1;
+	      con = a_elem * split;
+	      b1 = con - a_elem;
+	      b1 = con - b1;
+	      b2 = a_elem - b1;
+
+	      head_prod = dt * a_elem;
+	      tail_prod =
+		(((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2;
+	    }
+	  }
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_sum2 + head_prod;
+	    bv = s1 - head_sum2;
+	    s2 = ((head_prod - bv) + (head_sum2 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_sum2 + tail_prod;
+	    bv = t1 - tail_sum2;
+	    t2 = ((tail_prod - bv) + (tail_sum2 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_sum2 = t1 + t2;
+	    tail_sum2 = t2 - (head_sum2 - t1);
+	  }
+	  aij += incaij;
+	  jx += incx;
+	}
+
+
+	{
+	  /* Compute double-double = double-double * double. */
+	  double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	  con = head_sum1 * split;
+	  a11 = con - head_sum1;
+	  a11 = con - a11;
+	  a21 = head_sum1 - a11;
+	  con = alpha_i * split;
+	  b1 = con - alpha_i;
+	  b1 = con - b1;
+	  b2 = alpha_i - b1;
+
+	  c11 = head_sum1 * alpha_i;
+	  c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	  c2 = tail_sum1 * alpha_i;
+	  t1 = c11 + c2;
+	  t2 = (c2 - (t1 - c11)) + c21;
+
+	  head_tmp1 = t1 + t2;
+	  tail_tmp1 = t2 - (head_tmp1 - t1);
+	}
+	{
+	  /* Compute double-double = double-double * double. */
+	  double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	  con = head_sum2 * split;
+	  a11 = con - head_sum2;
+	  a11 = con - a11;
+	  a21 = head_sum2 - a11;
+	  con = alpha_i * split;
+	  b1 = con - alpha_i;
+	  b1 = con - b1;
+	  b2 = alpha_i - b1;
+
+	  c11 = head_sum2 * alpha_i;
+	  c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	  c2 = tail_sum2 * alpha_i;
+	  t1 = c11 + c2;
+	  t2 = (c2 - (t1 - c11)) + c21;
+
+	  head_tmp2 = t1 + t2;
+	  tail_tmp2 = t2 - (head_tmp2 - t1);
+	}
+	{
+	  /* Compute double-double = double-double + double-double. */
+	  double bv;
+	  double s1, s2, t1, t2;
+
+	  /* Add two hi words. */
+	  s1 = head_tmp1 + head_tmp2;
+	  bv = s1 - head_tmp1;
+	  s2 = ((head_tmp2 - bv) + (head_tmp1 - (s1 - bv)));
+
+	  /* Add two lo words. */
+	  t1 = tail_tmp1 + tail_tmp2;
+	  bv = t1 - tail_tmp1;
+	  t2 = ((tail_tmp2 - bv) + (tail_tmp1 - (t1 - bv)));
+
+	  s2 += t1;
+
+	  /* Renormalize (s1, s2)  to  (t1, s2) */
+	  t1 = s1 + s2;
+	  s2 = s2 - (t1 - s1);
+
+	  t2 += s2;
+
+	  /* Renormalize (t1, t2)  */
+	  head_tmp3 = t1 + t2;
+	  tail_tmp3 = t2 - (head_tmp3 - t1);
+	}
+	y_elem = y_i[iy];
+	{
+	  /* Compute double_double = double * double. */
+	  double a1, a2, b1, b2, con;
+
+	  con = beta_i * split;
+	  a1 = con - beta_i;
+	  a1 = con - a1;
+	  a2 = beta_i - a1;
+	  con = y_elem * split;
+	  b1 = con - y_elem;
+	  b1 = con - b1;
+	  b2 = y_elem - b1;
+
+	  head_tmp4 = beta_i * y_elem;
+	  tail_tmp4 = (((a1 * b1 - head_tmp4) + a1 * b2) + a2 * b1) + a2 * b2;
+	}
+	{
+	  /* Compute double-double = double-double + double-double. */
+	  double bv;
+	  double s1, s2, t1, t2;
+
+	  /* Add two hi words. */
+	  s1 = head_tmp4 + head_tmp3;
+	  bv = s1 - head_tmp4;
+	  s2 = ((head_tmp3 - bv) + (head_tmp4 - (s1 - bv)));
+
+	  /* Add two lo words. */
+	  t1 = tail_tmp4 + tail_tmp3;
+	  bv = t1 - tail_tmp4;
+	  t2 = ((tail_tmp3 - bv) + (tail_tmp4 - (t1 - bv)));
+
+	  s2 += t1;
+
+	  /* Renormalize (s1, s2)  to  (t1, s2) */
+	  t1 = s1 + s2;
+	  s2 = s2 - (t1 - s1);
+
+	  t2 += s2;
+
+	  /* Renormalize (t1, t2)  */
+	  result = t1 + t2;
+	}
+	y_i[iy] = result;
+
+	iy += incy;
+	if (i >= lbound) {
+	  ix0 += incx;
+	  ai += incai2;
+	  la++;
+	} else {
+	  ai += incai1;
+	}
+	if (i < rbound) {
+	  ra++;
+	}
+      }
+
+      FPU_FIX_STOP;
+    }
+    break;
+  }
+}				/* end BLAS_dgbmv2_d_s_x */
diff --git a/XBLAS/src/gbmv2/BLAS_dgbmv2_s_d-f2c.c b/XBLAS/src/gbmv2/BLAS_dgbmv2_s_d-f2c.c
new file mode 100644
index 00000000..3fc22560
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_dgbmv2_s_d-f2c.c
@@ -0,0 +1,19 @@
+
+#include "f2c-bridge.h"
+#include "blas_enum.h"
+void BLAS_dgbmv2_s_d(enum blas_order_type order, enum blas_trans_type trans,
+		     int m, int n, int kl, int ku, double alpha,
+		     const float *a, int lda, const double *head_x,
+		     const double *tail_x, int incx, double beta,
+		     double *y, int incy);
+
+
+extern void FC_FUNC_(blas_dgbmv2_s_d, BLAS_DGBMV2_S_D)
+ 
+  (int *trans, int *m, int *n, int *kl, int *ku, double *alpha,
+   const float *a, int *lda, const double *head_x, const double *tail_x,
+   int *incx, double *beta, double *y, int *incy) {
+  BLAS_dgbmv2_s_d(blas_colmajor, (enum blas_trans_type) *trans, *m, *n, *kl,
+		  *ku, *alpha, a, *lda, head_x, tail_x, *incx, *beta, y,
+		  *incy);
+}
diff --git a/XBLAS/src/gbmv2/BLAS_dgbmv2_s_d.c b/XBLAS/src/gbmv2/BLAS_dgbmv2_s_d.c
new file mode 100644
index 00000000..8b840787
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_dgbmv2_s_d.c
@@ -0,0 +1,254 @@
+#include "blas_extended.h"
+#include "blas_extended_private.h"
+void BLAS_dgbmv2_s_d(enum blas_order_type order, enum blas_trans_type trans,
+		     int m, int n, int kl, int ku, double alpha,
+		     const float *a, int lda, const double *head_x,
+		     const double *tail_x, int incx, double beta,
+		     double *y, int incy)
+
+/*           
+ * Purpose
+ * =======
+ *
+ * This routines computes the matrix product:
+ *
+ *     y  <-  alpha * op(A) * (x_head + x_tail) + beta * y
+ * 
+ * where 
+ *
+ *  A is a m x n banded matrix
+ *  x is a n x 1 vector
+ *  y is a m x 1 vector
+ *  alpha and beta are scalars 
+ *   
+ * Arguments
+ * =========
+ *
+ * order        (input) blas_order_type
+ *              Order of AB; row or column major
+ *
+ * trans        (input) blas_trans_type
+ *              Transpose of AB; no trans, 
+ *              trans, or conjugate trans
+ *
+ * m            (input) int
+ *              Dimension of AB
+ *
+ * n            (input) int
+ *              Dimension of AB and the length of vector x and z
+ *
+ * kl           (input) int 
+ *              Number of lower diagnols of AB
+ *
+ * ku           (input) int
+ *              Number of upper diagnols of AB
+ *
+ * alpha        (input) double
+ *              
+ * AB           (input) float*
+ *
+ * lda          (input) int 
+ *              Leading dimension of AB
+ *              lda >= ku + kl + 1
+ *
+ * head_x
+ * tail_x       (input) double*
+ * 
+ * incx         (input) int
+ *              The stride for vector x.
+ *
+ * beta         (input) double
+ *
+ * y            (input) const double*
+ *
+ * incy         (input) int
+ *              The stride for vector y.
+ * 
+ *
+ * LOCAL VARIABLES 
+ * ===============
+ * 
+ *  As an example, these variables are described on the mxn, column 
+ *  major, banded matrix described in section 2.2.3 of the specification  
+ *
+ *  astart      indexes first element in A where computation begins
+ *
+ *  incai1      indexes first element in row where row is less than lbound
+ * 
+ *  incai2      indexes first element in row where row exceeds lbound
+ *   
+ *  lbound      denotes the number of rows before  first element shifts 
+ *
+ *  rbound      denotes the columns where there is blank space
+ *   
+ *  ra          index of the rightmost element for a given row
+ *  
+ *  la          index of leftmost  elements for a given row
+ *
+ *  ra - la     width of a row
+ *
+ *                        rbound 
+ *            la   ra    ____|_____ 
+ *             |    |   |          |
+ *         |  a00  a01   *    *   *
+ * lbound -|  a10  a11  a12   *   *
+ *         |  a20  a21  a22  a23  *
+ *             *   a31  a32  a33 a34
+ *             *    *   a42  a43 a44
+ *
+ *  Varations on order and transpose have been implemented by modifying these
+ *  local variables. 
+ *
+ */
+{
+  static const char routine_name[] = "BLAS_dgbmv2_s_d";
+
+  int iy0, iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny;
+  int incaij, aij, incai1, incai2, astart, ai;
+  double *y_i = y;
+  const float *a_i = a;
+  const double *head_x_i = head_x;
+  const double *tail_x_i = tail_x;
+  double alpha_i = alpha;
+  double beta_i = beta;
+  double tmp1;
+  double tmp2;
+  double tmp3;
+  double tmp4;
+  double result;
+  double sum1;
+  double sum2;
+  double prod;
+  float a_elem;
+  double x_elem;
+  double y_elem;
+
+
+  if (order != blas_colmajor && order != blas_rowmajor)
+    BLAS_error(routine_name, -1, order, NULL);
+  if (trans != blas_no_trans &&
+      trans != blas_trans && trans != blas_conj_trans) {
+    BLAS_error(routine_name, -2, trans, NULL);
+  }
+  if (m < 0)
+    BLAS_error(routine_name, -3, m, NULL);
+  if (n < 0)
+    BLAS_error(routine_name, -4, n, NULL);
+  if (kl < 0 || kl >= m)
+    BLAS_error(routine_name, -5, kl, NULL);
+  if (ku < 0 || ku >= n)
+    BLAS_error(routine_name, -6, ku, NULL);
+  if (lda < kl + ku + 1)
+    BLAS_error(routine_name, -9, lda, NULL);
+  if (incx == 0)
+    BLAS_error(routine_name, -12, incx, NULL);
+  if (incy == 0)
+    BLAS_error(routine_name, -15, incy, NULL);
+
+  if (m == 0 || n == 0)
+    return;
+  if ((alpha_i == 0.0) && (beta_i == 1.0))
+    return;
+
+  if (trans == blas_no_trans) {
+    lenx = n;
+    leny = m;
+  } else {
+    lenx = m;
+    leny = n;
+  }
+
+  ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
+  iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;
+
+
+
+  /* if alpha = 0, return y = y*beta */
+  if ((order == blas_colmajor) && (trans == blas_no_trans)) {
+    astart = ku;
+    incai1 = 1;
+    incai2 = lda;
+    incaij = lda - 1;
+    lbound = kl;
+    rbound = n - ku - 1;
+    ra = ku;
+  } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
+    astart = ku;
+    incai1 = lda - 1;
+    incai2 = lda;
+    incaij = 1;
+    lbound = ku;
+    rbound = m - kl - 1;
+    ra = kl;
+  } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
+    astart = kl;
+    incai1 = lda - 1;
+    incai2 = lda;
+    incaij = 1;
+    lbound = kl;
+    rbound = n - ku - 1;
+    ra = ku;
+  } else {			/* rowmajor and blas_trans */
+    astart = kl;
+    incai1 = 1;
+    incai2 = lda;
+    incaij = lda - 1;
+    lbound = ku;
+    rbound = m - kl - 1;
+    ra = kl;
+  }
+
+
+
+
+
+
+
+
+
+  la = 0;
+  ai = astart;
+  iy = iy0;
+  for (i = 0; i < leny; i++) {
+    sum1 = 0.0;
+    sum2 = 0.0;
+    aij = ai;
+    jx = ix0;
+
+    for (j = ra - la; j >= 0; j--) {
+      x_elem = head_x_i[jx];
+      a_elem = a_i[aij];
+      prod = x_elem * a_elem;
+      sum1 = sum1 + prod;
+      x_elem = tail_x_i[jx];
+      prod = x_elem * a_elem;
+      sum2 = sum2 + prod;
+      aij += incaij;
+      jx += incx;
+    }
+
+
+    tmp1 = sum1 * alpha_i;
+    tmp2 = sum2 * alpha_i;
+    tmp3 = tmp1 + tmp2;
+    y_elem = y_i[iy];
+    tmp4 = beta_i * y_elem;
+    result = tmp4 + tmp3;
+    y_i[iy] = result;
+
+    iy += incy;
+    if (i >= lbound) {
+      ix0 += incx;
+      ai += incai2;
+      la++;
+    } else {
+      ai += incai1;
+    }
+    if (i < rbound) {
+      ra++;
+    }
+  }
+
+
+
+}				/* end BLAS_dgbmv2_s_d */
diff --git a/XBLAS/src/gbmv2/BLAS_dgbmv2_s_d_x-f2c.c b/XBLAS/src/gbmv2/BLAS_dgbmv2_s_d_x-f2c.c
new file mode 100644
index 00000000..e02f9fd5
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_dgbmv2_s_d_x-f2c.c
@@ -0,0 +1,19 @@
+
+#include "f2c-bridge.h"
+#include "blas_enum.h"
+void BLAS_dgbmv2_s_d_x(enum blas_order_type order, enum blas_trans_type trans,
+		       int m, int n, int kl, int ku, double alpha,
+		       const float *a, int lda, const double *head_x,
+		       const double *tail_x, int incx, double beta,
+		       double *y, int incy, enum blas_prec_type prec);
+
+
+extern void FC_FUNC_(blas_dgbmv2_s_d_x, BLAS_DGBMV2_S_D_X)
+ 
+  (int *trans, int *m, int *n, int *kl, int *ku, double *alpha,
+   const float *a, int *lda, const double *head_x, const double *tail_x,
+   int *incx, double *beta, double *y, int *incy, int *prec) {
+  BLAS_dgbmv2_s_d_x(blas_colmajor, (enum blas_trans_type) *trans, *m, *n, *kl,
+		    *ku, *alpha, a, *lda, head_x, tail_x, *incx, *beta, y,
+		    *incy, (enum blas_prec_type) *prec);
+}
diff --git a/XBLAS/src/gbmv2/BLAS_dgbmv2_s_d_x.c b/XBLAS/src/gbmv2/BLAS_dgbmv2_s_d_x.c
new file mode 100644
index 00000000..eda4da18
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_dgbmv2_s_d_x.c
@@ -0,0 +1,620 @@
+#include "blas_extended.h"
+#include "blas_extended_private.h"
+void BLAS_dgbmv2_s_d_x(enum blas_order_type order, enum blas_trans_type trans,
+		       int m, int n, int kl, int ku, double alpha,
+		       const float *a, int lda, const double *head_x,
+		       const double *tail_x, int incx, double beta,
+		       double *y, int incy, enum blas_prec_type prec)
+
+/*           
+ * Purpose
+ * =======
+ *
+ * This routines computes the matrix product:
+ *
+ *     y  <-  alpha * op(A) * (x_head + x_tail) + beta * y
+ * 
+ * where 
+ *
+ *  A is a m x n banded matrix
+ *  x is a n x 1 vector
+ *  y is a m x 1 vector
+ *  alpha and beta are scalars 
+ *   
+ * Arguments
+ * =========
+ *
+ * order        (input) blas_order_type
+ *              Order of AB; row or column major
+ *
+ * trans        (input) blas_trans_type
+ *              Transpose of AB; no trans, 
+ *              trans, or conjugate trans
+ *
+ * m            (input) int
+ *              Dimension of AB
+ *
+ * n            (input) int
+ *              Dimension of AB and the length of vector x and z
+ *
+ * kl           (input) int 
+ *              Number of lower diagnols of AB
+ *
+ * ku           (input) int
+ *              Number of upper diagnols of AB
+ *
+ * alpha        (input) double
+ *              
+ * AB           (input) float*
+ *
+ * lda          (input) int 
+ *              Leading dimension of AB
+ *              lda >= ku + kl + 1
+ *
+ * head_x
+ * tail_x       (input) double*
+ * 
+ * incx         (input) int
+ *              The stride for vector x.
+ *
+ * beta         (input) double
+ *
+ * y            (input) const double*
+ *
+ * incy         (input) int
+ *              The stride for vector y.
+ * 
+ * prec   (input) enum blas_prec_type
+ *        Specifies the internal precision to be used.
+ *        = blas_prec_single: single precision.
+ *        = blas_prec_double: double precision.
+ *        = blas_prec_extra : anything at least 1.5 times as accurate
+ *                            than double, and wider than 80-bits.
+ *                            We use double-double in our implementation.
+ *
+ *
+ * LOCAL VARIABLES 
+ * ===============
+ * 
+ *  As an example, these variables are described on the mxn, column 
+ *  major, banded matrix described in section 2.2.3 of the specification  
+ *
+ *  astart      indexes first element in A where computation begins
+ *
+ *  incai1      indexes first element in row where row is less than lbound
+ * 
+ *  incai2      indexes first element in row where row exceeds lbound
+ *   
+ *  lbound      denotes the number of rows before  first element shifts 
+ *
+ *  rbound      denotes the columns where there is blank space
+ *   
+ *  ra          index of the rightmost element for a given row
+ *  
+ *  la          index of leftmost  elements for a given row
+ *
+ *  ra - la     width of a row
+ *
+ *                        rbound 
+ *            la   ra    ____|_____ 
+ *             |    |   |          |
+ *         |  a00  a01   *    *   *
+ * lbound -|  a10  a11  a12   *   *
+ *         |  a20  a21  a22  a23  *
+ *             *   a31  a32  a33 a34
+ *             *    *   a42  a43 a44
+ *
+ *  Varations on order and transpose have been implemented by modifying these
+ *  local variables. 
+ *
+ */
+{
+  static const char routine_name[] = "BLAS_dgbmv2_s_d_x";
+
+  switch (prec) {
+  case blas_prec_single:
+  case blas_prec_double:
+  case blas_prec_indigenous:
+    {
+      int iy0, iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny;
+      int incaij, aij, incai1, incai2, astart, ai;
+      double *y_i = y;
+      const float *a_i = a;
+      const double *head_x_i = head_x;
+      const double *tail_x_i = tail_x;
+      double alpha_i = alpha;
+      double beta_i = beta;
+      double tmp1;
+      double tmp2;
+      double tmp3;
+      double tmp4;
+      double result;
+      double sum1;
+      double sum2;
+      double prod;
+      float a_elem;
+      double x_elem;
+      double y_elem;
+
+
+      if (order != blas_colmajor && order != blas_rowmajor)
+	BLAS_error(routine_name, -1, order, NULL);
+      if (trans != blas_no_trans &&
+	  trans != blas_trans && trans != blas_conj_trans) {
+	BLAS_error(routine_name, -2, trans, NULL);
+      }
+      if (m < 0)
+	BLAS_error(routine_name, -3, m, NULL);
+      if (n < 0)
+	BLAS_error(routine_name, -4, n, NULL);
+      if (kl < 0 || kl >= m)
+	BLAS_error(routine_name, -5, kl, NULL);
+      if (ku < 0 || ku >= n)
+	BLAS_error(routine_name, -6, ku, NULL);
+      if (lda < kl + ku + 1)
+	BLAS_error(routine_name, -9, lda, NULL);
+      if (incx == 0)
+	BLAS_error(routine_name, -12, incx, NULL);
+      if (incy == 0)
+	BLAS_error(routine_name, -15, incy, NULL);
+
+      if (m == 0 || n == 0)
+	return;
+      if ((alpha_i == 0.0) && (beta_i == 1.0))
+	return;
+
+      if (trans == blas_no_trans) {
+	lenx = n;
+	leny = m;
+      } else {
+	lenx = m;
+	leny = n;
+      }
+
+      ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
+      iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;
+
+
+
+      /* if alpha = 0, return y = y*beta */
+      if ((order == blas_colmajor) && (trans == blas_no_trans)) {
+	astart = ku;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
+	astart = ku;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
+	astart = kl;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else {			/* rowmajor and blas_trans */
+	astart = kl;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      }
+
+
+
+
+
+
+
+
+
+      la = 0;
+      ai = astart;
+      iy = iy0;
+      for (i = 0; i < leny; i++) {
+	sum1 = 0.0;
+	sum2 = 0.0;
+	aij = ai;
+	jx = ix0;
+
+	for (j = ra - la; j >= 0; j--) {
+	  x_elem = head_x_i[jx];
+	  a_elem = a_i[aij];
+	  prod = x_elem * a_elem;
+	  sum1 = sum1 + prod;
+	  x_elem = tail_x_i[jx];
+	  prod = x_elem * a_elem;
+	  sum2 = sum2 + prod;
+	  aij += incaij;
+	  jx += incx;
+	}
+
+
+	tmp1 = sum1 * alpha_i;
+	tmp2 = sum2 * alpha_i;
+	tmp3 = tmp1 + tmp2;
+	y_elem = y_i[iy];
+	tmp4 = beta_i * y_elem;
+	result = tmp4 + tmp3;
+	y_i[iy] = result;
+
+	iy += incy;
+	if (i >= lbound) {
+	  ix0 += incx;
+	  ai += incai2;
+	  la++;
+	} else {
+	  ai += incai1;
+	}
+	if (i < rbound) {
+	  ra++;
+	}
+      }
+
+
+    }
+    break;
+  case blas_prec_extra:
+    {
+      int iy0, iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny;
+      int incaij, aij, incai1, incai2, astart, ai;
+      double *y_i = y;
+      const float *a_i = a;
+      const double *head_x_i = head_x;
+      const double *tail_x_i = tail_x;
+      double alpha_i = alpha;
+      double beta_i = beta;
+      double head_tmp1, tail_tmp1;
+      double head_tmp2, tail_tmp2;
+      double head_tmp3, tail_tmp3;
+      double head_tmp4, tail_tmp4;
+      double result;
+      double head_sum1, tail_sum1;
+      double head_sum2, tail_sum2;
+      double head_prod, tail_prod;
+      float a_elem;
+      double x_elem;
+      double y_elem;
+      FPU_FIX_DECL;
+
+      if (order != blas_colmajor && order != blas_rowmajor)
+	BLAS_error(routine_name, -1, order, NULL);
+      if (trans != blas_no_trans &&
+	  trans != blas_trans && trans != blas_conj_trans) {
+	BLAS_error(routine_name, -2, trans, NULL);
+      }
+      if (m < 0)
+	BLAS_error(routine_name, -3, m, NULL);
+      if (n < 0)
+	BLAS_error(routine_name, -4, n, NULL);
+      if (kl < 0 || kl >= m)
+	BLAS_error(routine_name, -5, kl, NULL);
+      if (ku < 0 || ku >= n)
+	BLAS_error(routine_name, -6, ku, NULL);
+      if (lda < kl + ku + 1)
+	BLAS_error(routine_name, -9, lda, NULL);
+      if (incx == 0)
+	BLAS_error(routine_name, -12, incx, NULL);
+      if (incy == 0)
+	BLAS_error(routine_name, -15, incy, NULL);
+
+      if (m == 0 || n == 0)
+	return;
+      if ((alpha_i == 0.0) && (beta_i == 1.0))
+	return;
+
+      if (trans == blas_no_trans) {
+	lenx = n;
+	leny = m;
+      } else {
+	lenx = m;
+	leny = n;
+      }
+
+      ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
+      iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;
+
+      FPU_FIX_START;
+
+      /* if alpha = 0, return y = y*beta */
+      if ((order == blas_colmajor) && (trans == blas_no_trans)) {
+	astart = ku;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
+	astart = ku;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
+	astart = kl;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else {			/* rowmajor and blas_trans */
+	astart = kl;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      }
+
+
+
+
+
+
+
+
+
+      la = 0;
+      ai = astart;
+      iy = iy0;
+      for (i = 0; i < leny; i++) {
+	head_sum1 = tail_sum1 = 0.0;
+	head_sum2 = tail_sum2 = 0.0;
+	aij = ai;
+	jx = ix0;
+
+	for (j = ra - la; j >= 0; j--) {
+	  x_elem = head_x_i[jx];
+	  a_elem = a_i[aij];
+	  {
+	    double dt = (double) a_elem;
+	    {
+	      /* Compute double_double = double * double. */
+	      double a1, a2, b1, b2, con;
+
+	      con = x_elem * split;
+	      a1 = con - x_elem;
+	      a1 = con - a1;
+	      a2 = x_elem - a1;
+	      con = dt * split;
+	      b1 = con - dt;
+	      b1 = con - b1;
+	      b2 = dt - b1;
+
+	      head_prod = x_elem * dt;
+	      tail_prod =
+		(((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2;
+	    }
+	  }
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_sum1 + head_prod;
+	    bv = s1 - head_sum1;
+	    s2 = ((head_prod - bv) + (head_sum1 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_sum1 + tail_prod;
+	    bv = t1 - tail_sum1;
+	    t2 = ((tail_prod - bv) + (tail_sum1 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_sum1 = t1 + t2;
+	    tail_sum1 = t2 - (head_sum1 - t1);
+	  }
+	  x_elem = tail_x_i[jx];
+	  {
+	    double dt = (double) a_elem;
+	    {
+	      /* Compute double_double = double * double. */
+	      double a1, a2, b1, b2, con;
+
+	      con = x_elem * split;
+	      a1 = con - x_elem;
+	      a1 = con - a1;
+	      a2 = x_elem - a1;
+	      con = dt * split;
+	      b1 = con - dt;
+	      b1 = con - b1;
+	      b2 = dt - b1;
+
+	      head_prod = x_elem * dt;
+	      tail_prod =
+		(((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2;
+	    }
+	  }
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_sum2 + head_prod;
+	    bv = s1 - head_sum2;
+	    s2 = ((head_prod - bv) + (head_sum2 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_sum2 + tail_prod;
+	    bv = t1 - tail_sum2;
+	    t2 = ((tail_prod - bv) + (tail_sum2 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_sum2 = t1 + t2;
+	    tail_sum2 = t2 - (head_sum2 - t1);
+	  }
+	  aij += incaij;
+	  jx += incx;
+	}
+
+
+	{
+	  /* Compute double-double = double-double * double. */
+	  double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	  con = head_sum1 * split;
+	  a11 = con - head_sum1;
+	  a11 = con - a11;
+	  a21 = head_sum1 - a11;
+	  con = alpha_i * split;
+	  b1 = con - alpha_i;
+	  b1 = con - b1;
+	  b2 = alpha_i - b1;
+
+	  c11 = head_sum1 * alpha_i;
+	  c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	  c2 = tail_sum1 * alpha_i;
+	  t1 = c11 + c2;
+	  t2 = (c2 - (t1 - c11)) + c21;
+
+	  head_tmp1 = t1 + t2;
+	  tail_tmp1 = t2 - (head_tmp1 - t1);
+	}
+	{
+	  /* Compute double-double = double-double * double. */
+	  double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	  con = head_sum2 * split;
+	  a11 = con - head_sum2;
+	  a11 = con - a11;
+	  a21 = head_sum2 - a11;
+	  con = alpha_i * split;
+	  b1 = con - alpha_i;
+	  b1 = con - b1;
+	  b2 = alpha_i - b1;
+
+	  c11 = head_sum2 * alpha_i;
+	  c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	  c2 = tail_sum2 * alpha_i;
+	  t1 = c11 + c2;
+	  t2 = (c2 - (t1 - c11)) + c21;
+
+	  head_tmp2 = t1 + t2;
+	  tail_tmp2 = t2 - (head_tmp2 - t1);
+	}
+	{
+	  /* Compute double-double = double-double + double-double. */
+	  double bv;
+	  double s1, s2, t1, t2;
+
+	  /* Add two hi words. */
+	  s1 = head_tmp1 + head_tmp2;
+	  bv = s1 - head_tmp1;
+	  s2 = ((head_tmp2 - bv) + (head_tmp1 - (s1 - bv)));
+
+	  /* Add two lo words. */
+	  t1 = tail_tmp1 + tail_tmp2;
+	  bv = t1 - tail_tmp1;
+	  t2 = ((tail_tmp2 - bv) + (tail_tmp1 - (t1 - bv)));
+
+	  s2 += t1;
+
+	  /* Renormalize (s1, s2)  to  (t1, s2) */
+	  t1 = s1 + s2;
+	  s2 = s2 - (t1 - s1);
+
+	  t2 += s2;
+
+	  /* Renormalize (t1, t2)  */
+	  head_tmp3 = t1 + t2;
+	  tail_tmp3 = t2 - (head_tmp3 - t1);
+	}
+	y_elem = y_i[iy];
+	{
+	  /* Compute double_double = double * double. */
+	  double a1, a2, b1, b2, con;
+
+	  con = beta_i * split;
+	  a1 = con - beta_i;
+	  a1 = con - a1;
+	  a2 = beta_i - a1;
+	  con = y_elem * split;
+	  b1 = con - y_elem;
+	  b1 = con - b1;
+	  b2 = y_elem - b1;
+
+	  head_tmp4 = beta_i * y_elem;
+	  tail_tmp4 = (((a1 * b1 - head_tmp4) + a1 * b2) + a2 * b1) + a2 * b2;
+	}
+	{
+	  /* Compute double-double = double-double + double-double. */
+	  double bv;
+	  double s1, s2, t1, t2;
+
+	  /* Add two hi words. */
+	  s1 = head_tmp4 + head_tmp3;
+	  bv = s1 - head_tmp4;
+	  s2 = ((head_tmp3 - bv) + (head_tmp4 - (s1 - bv)));
+
+	  /* Add two lo words. */
+	  t1 = tail_tmp4 + tail_tmp3;
+	  bv = t1 - tail_tmp4;
+	  t2 = ((tail_tmp3 - bv) + (tail_tmp4 - (t1 - bv)));
+
+	  s2 += t1;
+
+	  /* Renormalize (s1, s2)  to  (t1, s2) */
+	  t1 = s1 + s2;
+	  s2 = s2 - (t1 - s1);
+
+	  t2 += s2;
+
+	  /* Renormalize (t1, t2)  */
+	  result = t1 + t2;
+	}
+	y_i[iy] = result;
+
+	iy += incy;
+	if (i >= lbound) {
+	  ix0 += incx;
+	  ai += incai2;
+	  la++;
+	} else {
+	  ai += incai1;
+	}
+	if (i < rbound) {
+	  ra++;
+	}
+      }
+
+      FPU_FIX_STOP;
+    }
+    break;
+  }
+}				/* end BLAS_dgbmv2_s_d_x */
diff --git a/XBLAS/src/gbmv2/BLAS_dgbmv2_s_s-f2c.c b/XBLAS/src/gbmv2/BLAS_dgbmv2_s_s-f2c.c
new file mode 100644
index 00000000..f8d78cd8
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_dgbmv2_s_s-f2c.c
@@ -0,0 +1,19 @@
+
+#include "f2c-bridge.h"
+#include "blas_enum.h"
+void BLAS_dgbmv2_s_s(enum blas_order_type order, enum blas_trans_type trans,
+		     int m, int n, int kl, int ku, double alpha,
+		     const float *a, int lda, const float *head_x,
+		     const float *tail_x, int incx, double beta,
+		     double *y, int incy);
+
+
+extern void FC_FUNC_(blas_dgbmv2_s_s, BLAS_DGBMV2_S_S)
+ 
+  (int *trans, int *m, int *n, int *kl, int *ku, double *alpha,
+   const float *a, int *lda, const float *head_x, const float *tail_x,
+   int *incx, double *beta, double *y, int *incy) {
+  BLAS_dgbmv2_s_s(blas_colmajor, (enum blas_trans_type) *trans, *m, *n, *kl,
+		  *ku, *alpha, a, *lda, head_x, tail_x, *incx, *beta, y,
+		  *incy);
+}
diff --git a/XBLAS/src/gbmv2/BLAS_dgbmv2_s_s.c b/XBLAS/src/gbmv2/BLAS_dgbmv2_s_s.c
new file mode 100644
index 00000000..fa29f394
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_dgbmv2_s_s.c
@@ -0,0 +1,254 @@
+#include "blas_extended.h"
+#include "blas_extended_private.h"
+void BLAS_dgbmv2_s_s(enum blas_order_type order, enum blas_trans_type trans,
+		     int m, int n, int kl, int ku, double alpha,
+		     const float *a, int lda, const float *head_x,
+		     const float *tail_x, int incx, double beta,
+		     double *y, int incy)
+
+/*           
+ * Purpose
+ * =======
+ *
+ * This routines computes the matrix product:
+ *
+ *     y  <-  alpha * op(A) * (x_head + x_tail) + beta * y
+ * 
+ * where 
+ *
+ *  A is a m x n banded matrix
+ *  x is a n x 1 vector
+ *  y is a m x 1 vector
+ *  alpha and beta are scalars 
+ *   
+ * Arguments
+ * =========
+ *
+ * order        (input) blas_order_type
+ *              Order of AB; row or column major
+ *
+ * trans        (input) blas_trans_type
+ *              Transpose of AB; no trans, 
+ *              trans, or conjugate trans
+ *
+ * m            (input) int
+ *              Dimension of AB
+ *
+ * n            (input) int
+ *              Dimension of AB and the length of vector x and z
+ *
+ * kl           (input) int 
+ *              Number of lower diagnols of AB
+ *
+ * ku           (input) int
+ *              Number of upper diagnols of AB
+ *
+ * alpha        (input) double
+ *              
+ * AB           (input) float*
+ *
+ * lda          (input) int 
+ *              Leading dimension of AB
+ *              lda >= ku + kl + 1
+ *
+ * head_x
+ * tail_x       (input) float*
+ * 
+ * incx         (input) int
+ *              The stride for vector x.
+ *
+ * beta         (input) double
+ *
+ * y            (input) const double*
+ *
+ * incy         (input) int
+ *              The stride for vector y.
+ * 
+ *
+ * LOCAL VARIABLES 
+ * ===============
+ * 
+ *  As an example, these variables are described on the mxn, column 
+ *  major, banded matrix described in section 2.2.3 of the specification  
+ *
+ *  astart      indexes first element in A where computation begins
+ *
+ *  incai1      indexes first element in row where row is less than lbound
+ * 
+ *  incai2      indexes first element in row where row exceeds lbound
+ *   
+ *  lbound      denotes the number of rows before  first element shifts 
+ *
+ *  rbound      denotes the columns where there is blank space
+ *   
+ *  ra          index of the rightmost element for a given row
+ *  
+ *  la          index of leftmost  elements for a given row
+ *
+ *  ra - la     width of a row
+ *
+ *                        rbound 
+ *            la   ra    ____|_____ 
+ *             |    |   |          |
+ *         |  a00  a01   *    *   *
+ * lbound -|  a10  a11  a12   *   *
+ *         |  a20  a21  a22  a23  *
+ *             *   a31  a32  a33 a34
+ *             *    *   a42  a43 a44
+ *
+ *  Varations on order and transpose have been implemented by modifying these
+ *  local variables. 
+ *
+ */
+{
+  static const char routine_name[] = "BLAS_dgbmv2_s_s";
+
+  int iy0, iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny;
+  int incaij, aij, incai1, incai2, astart, ai;
+  double *y_i = y;
+  const float *a_i = a;
+  const float *head_x_i = head_x;
+  const float *tail_x_i = tail_x;
+  double alpha_i = alpha;
+  double beta_i = beta;
+  double tmp1;
+  double tmp2;
+  double tmp3;
+  double tmp4;
+  double result;
+  double sum1;
+  double sum2;
+  double prod;
+  float a_elem;
+  float x_elem;
+  double y_elem;
+
+
+  if (order != blas_colmajor && order != blas_rowmajor)
+    BLAS_error(routine_name, -1, order, NULL);
+  if (trans != blas_no_trans &&
+      trans != blas_trans && trans != blas_conj_trans) {
+    BLAS_error(routine_name, -2, trans, NULL);
+  }
+  if (m < 0)
+    BLAS_error(routine_name, -3, m, NULL);
+  if (n < 0)
+    BLAS_error(routine_name, -4, n, NULL);
+  if (kl < 0 || kl >= m)
+    BLAS_error(routine_name, -5, kl, NULL);
+  if (ku < 0 || ku >= n)
+    BLAS_error(routine_name, -6, ku, NULL);
+  if (lda < kl + ku + 1)
+    BLAS_error(routine_name, -9, lda, NULL);
+  if (incx == 0)
+    BLAS_error(routine_name, -12, incx, NULL);
+  if (incy == 0)
+    BLAS_error(routine_name, -15, incy, NULL);
+
+  if (m == 0 || n == 0)
+    return;
+  if ((alpha_i == 0.0) && (beta_i == 1.0))
+    return;
+
+  if (trans == blas_no_trans) {
+    lenx = n;
+    leny = m;
+  } else {
+    lenx = m;
+    leny = n;
+  }
+
+  ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
+  iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;
+
+
+
+  /* if alpha = 0, return y = y*beta */
+  if ((order == blas_colmajor) && (trans == blas_no_trans)) {
+    astart = ku;
+    incai1 = 1;
+    incai2 = lda;
+    incaij = lda - 1;
+    lbound = kl;
+    rbound = n - ku - 1;
+    ra = ku;
+  } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
+    astart = ku;
+    incai1 = lda - 1;
+    incai2 = lda;
+    incaij = 1;
+    lbound = ku;
+    rbound = m - kl - 1;
+    ra = kl;
+  } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
+    astart = kl;
+    incai1 = lda - 1;
+    incai2 = lda;
+    incaij = 1;
+    lbound = kl;
+    rbound = n - ku - 1;
+    ra = ku;
+  } else {			/* rowmajor and blas_trans */
+    astart = kl;
+    incai1 = 1;
+    incai2 = lda;
+    incaij = lda - 1;
+    lbound = ku;
+    rbound = m - kl - 1;
+    ra = kl;
+  }
+
+
+
+
+
+
+
+
+
+  la = 0;
+  ai = astart;
+  iy = iy0;
+  for (i = 0; i < leny; i++) {
+    sum1 = 0.0;
+    sum2 = 0.0;
+    aij = ai;
+    jx = ix0;
+
+    for (j = ra - la; j >= 0; j--) {
+      x_elem = head_x_i[jx];
+      a_elem = a_i[aij];
+      prod = (double) x_elem *a_elem;
+      sum1 = sum1 + prod;
+      x_elem = tail_x_i[jx];
+      prod = (double) x_elem *a_elem;
+      sum2 = sum2 + prod;
+      aij += incaij;
+      jx += incx;
+    }
+
+
+    tmp1 = sum1 * alpha_i;
+    tmp2 = sum2 * alpha_i;
+    tmp3 = tmp1 + tmp2;
+    y_elem = y_i[iy];
+    tmp4 = beta_i * y_elem;
+    result = tmp4 + tmp3;
+    y_i[iy] = result;
+
+    iy += incy;
+    if (i >= lbound) {
+      ix0 += incx;
+      ai += incai2;
+      la++;
+    } else {
+      ai += incai1;
+    }
+    if (i < rbound) {
+      ra++;
+    }
+  }
+
+
+
+}				/* end BLAS_dgbmv2_s_s */
diff --git a/XBLAS/src/gbmv2/BLAS_dgbmv2_s_s_x-f2c.c b/XBLAS/src/gbmv2/BLAS_dgbmv2_s_s_x-f2c.c
new file mode 100644
index 00000000..cac401bf
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_dgbmv2_s_s_x-f2c.c
@@ -0,0 +1,19 @@
+
+#include "f2c-bridge.h"
+#include "blas_enum.h"
+void BLAS_dgbmv2_s_s_x(enum blas_order_type order, enum blas_trans_type trans,
+		       int m, int n, int kl, int ku, double alpha,
+		       const float *a, int lda, const float *head_x,
+		       const float *tail_x, int incx, double beta,
+		       double *y, int incy, enum blas_prec_type prec);
+
+
+extern void FC_FUNC_(blas_dgbmv2_s_s_x, BLAS_DGBMV2_S_S_X)
+ 
+  (int *trans, int *m, int *n, int *kl, int *ku, double *alpha,
+   const float *a, int *lda, const float *head_x, const float *tail_x,
+   int *incx, double *beta, double *y, int *incy, int *prec) {
+  BLAS_dgbmv2_s_s_x(blas_colmajor, (enum blas_trans_type) *trans, *m, *n, *kl,
+		    *ku, *alpha, a, *lda, head_x, tail_x, *incx, *beta, y,
+		    *incy, (enum blas_prec_type) *prec);
+}
diff --git a/XBLAS/src/gbmv2/BLAS_dgbmv2_s_s_x.c b/XBLAS/src/gbmv2/BLAS_dgbmv2_s_s_x.c
new file mode 100644
index 00000000..8eb8369e
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_dgbmv2_s_s_x.c
@@ -0,0 +1,584 @@
+#include "blas_extended.h"
+#include "blas_extended_private.h"
+void BLAS_dgbmv2_s_s_x(enum blas_order_type order, enum blas_trans_type trans,
+		       int m, int n, int kl, int ku, double alpha,
+		       const float *a, int lda, const float *head_x,
+		       const float *tail_x, int incx, double beta,
+		       double *y, int incy, enum blas_prec_type prec)
+
+/*           
+ * Purpose
+ * =======
+ *
+ * This routines computes the matrix product:
+ *
+ *     y  <-  alpha * op(A) * (x_head + x_tail) + beta * y
+ * 
+ * where 
+ *
+ *  A is a m x n banded matrix
+ *  x is a n x 1 vector
+ *  y is a m x 1 vector
+ *  alpha and beta are scalars 
+ *   
+ * Arguments
+ * =========
+ *
+ * order        (input) blas_order_type
+ *              Order of AB; row or column major
+ *
+ * trans        (input) blas_trans_type
+ *              Transpose of AB; no trans, 
+ *              trans, or conjugate trans
+ *
+ * m            (input) int
+ *              Dimension of AB
+ *
+ * n            (input) int
+ *              Dimension of AB and the length of vector x and z
+ *
+ * kl           (input) int 
+ *              Number of lower diagnols of AB
+ *
+ * ku           (input) int
+ *              Number of upper diagnols of AB
+ *
+ * alpha        (input) double
+ *              
+ * AB           (input) float*
+ *
+ * lda          (input) int 
+ *              Leading dimension of AB
+ *              lda >= ku + kl + 1
+ *
+ * head_x
+ * tail_x       (input) float*
+ * 
+ * incx         (input) int
+ *              The stride for vector x.
+ *
+ * beta         (input) double
+ *
+ * y            (input) const double*
+ *
+ * incy         (input) int
+ *              The stride for vector y.
+ * 
+ * prec   (input) enum blas_prec_type
+ *        Specifies the internal precision to be used.
+ *        = blas_prec_single: single precision.
+ *        = blas_prec_double: double precision.
+ *        = blas_prec_extra : anything at least 1.5 times as accurate
+ *                            than double, and wider than 80-bits.
+ *                            We use double-double in our implementation.
+ *
+ *
+ * LOCAL VARIABLES 
+ * ===============
+ * 
+ *  As an example, these variables are described on the mxn, column 
+ *  major, banded matrix described in section 2.2.3 of the specification  
+ *
+ *  astart      indexes first element in A where computation begins
+ *
+ *  incai1      indexes first element in row where row is less than lbound
+ * 
+ *  incai2      indexes first element in row where row exceeds lbound
+ *   
+ *  lbound      denotes the number of rows before  first element shifts 
+ *
+ *  rbound      denotes the columns where there is blank space
+ *   
+ *  ra          index of the rightmost element for a given row
+ *  
+ *  la          index of leftmost  elements for a given row
+ *
+ *  ra - la     width of a row
+ *
+ *                        rbound 
+ *            la   ra    ____|_____ 
+ *             |    |   |          |
+ *         |  a00  a01   *    *   *
+ * lbound -|  a10  a11  a12   *   *
+ *         |  a20  a21  a22  a23  *
+ *             *   a31  a32  a33 a34
+ *             *    *   a42  a43 a44
+ *
+ *  Varations on order and transpose have been implemented by modifying these
+ *  local variables. 
+ *
+ */
+{
+  static const char routine_name[] = "BLAS_dgbmv2_s_s_x";
+
+  switch (prec) {
+  case blas_prec_single:
+  case blas_prec_double:
+  case blas_prec_indigenous:
+    {
+      int iy0, iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny;
+      int incaij, aij, incai1, incai2, astart, ai;
+      double *y_i = y;
+      const float *a_i = a;
+      const float *head_x_i = head_x;
+      const float *tail_x_i = tail_x;
+      double alpha_i = alpha;
+      double beta_i = beta;
+      double tmp1;
+      double tmp2;
+      double tmp3;
+      double tmp4;
+      double result;
+      double sum1;
+      double sum2;
+      double prod;
+      float a_elem;
+      float x_elem;
+      double y_elem;
+
+
+      if (order != blas_colmajor && order != blas_rowmajor)
+	BLAS_error(routine_name, -1, order, NULL);
+      if (trans != blas_no_trans &&
+	  trans != blas_trans && trans != blas_conj_trans) {
+	BLAS_error(routine_name, -2, trans, NULL);
+      }
+      if (m < 0)
+	BLAS_error(routine_name, -3, m, NULL);
+      if (n < 0)
+	BLAS_error(routine_name, -4, n, NULL);
+      if (kl < 0 || kl >= m)
+	BLAS_error(routine_name, -5, kl, NULL);
+      if (ku < 0 || ku >= n)
+	BLAS_error(routine_name, -6, ku, NULL);
+      if (lda < kl + ku + 1)
+	BLAS_error(routine_name, -9, lda, NULL);
+      if (incx == 0)
+	BLAS_error(routine_name, -12, incx, NULL);
+      if (incy == 0)
+	BLAS_error(routine_name, -15, incy, NULL);
+
+      if (m == 0 || n == 0)
+	return;
+      if ((alpha_i == 0.0) && (beta_i == 1.0))
+	return;
+
+      if (trans == blas_no_trans) {
+	lenx = n;
+	leny = m;
+      } else {
+	lenx = m;
+	leny = n;
+      }
+
+      ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
+      iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;
+
+
+
+      /* if alpha = 0, return y = y*beta */
+      if ((order == blas_colmajor) && (trans == blas_no_trans)) {
+	astart = ku;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
+	astart = ku;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
+	astart = kl;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else {			/* rowmajor and blas_trans */
+	astart = kl;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      }
+
+
+
+
+
+
+
+
+
+      la = 0;
+      ai = astart;
+      iy = iy0;
+      for (i = 0; i < leny; i++) {
+	sum1 = 0.0;
+	sum2 = 0.0;
+	aij = ai;
+	jx = ix0;
+
+	for (j = ra - la; j >= 0; j--) {
+	  x_elem = head_x_i[jx];
+	  a_elem = a_i[aij];
+	  prod = (double) x_elem *a_elem;
+	  sum1 = sum1 + prod;
+	  x_elem = tail_x_i[jx];
+	  prod = (double) x_elem *a_elem;
+	  sum2 = sum2 + prod;
+	  aij += incaij;
+	  jx += incx;
+	}
+
+
+	tmp1 = sum1 * alpha_i;
+	tmp2 = sum2 * alpha_i;
+	tmp3 = tmp1 + tmp2;
+	y_elem = y_i[iy];
+	tmp4 = beta_i * y_elem;
+	result = tmp4 + tmp3;
+	y_i[iy] = result;
+
+	iy += incy;
+	if (i >= lbound) {
+	  ix0 += incx;
+	  ai += incai2;
+	  la++;
+	} else {
+	  ai += incai1;
+	}
+	if (i < rbound) {
+	  ra++;
+	}
+      }
+
+
+    }
+    break;
+  case blas_prec_extra:
+    {
+      int iy0, iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny;
+      int incaij, aij, incai1, incai2, astart, ai;
+      double *y_i = y;
+      const float *a_i = a;
+      const float *head_x_i = head_x;
+      const float *tail_x_i = tail_x;
+      double alpha_i = alpha;
+      double beta_i = beta;
+      double head_tmp1, tail_tmp1;
+      double head_tmp2, tail_tmp2;
+      double head_tmp3, tail_tmp3;
+      double head_tmp4, tail_tmp4;
+      double result;
+      double head_sum1, tail_sum1;
+      double head_sum2, tail_sum2;
+      double head_prod, tail_prod;
+      float a_elem;
+      float x_elem;
+      double y_elem;
+      FPU_FIX_DECL;
+
+      if (order != blas_colmajor && order != blas_rowmajor)
+	BLAS_error(routine_name, -1, order, NULL);
+      if (trans != blas_no_trans &&
+	  trans != blas_trans && trans != blas_conj_trans) {
+	BLAS_error(routine_name, -2, trans, NULL);
+      }
+      if (m < 0)
+	BLAS_error(routine_name, -3, m, NULL);
+      if (n < 0)
+	BLAS_error(routine_name, -4, n, NULL);
+      if (kl < 0 || kl >= m)
+	BLAS_error(routine_name, -5, kl, NULL);
+      if (ku < 0 || ku >= n)
+	BLAS_error(routine_name, -6, ku, NULL);
+      if (lda < kl + ku + 1)
+	BLAS_error(routine_name, -9, lda, NULL);
+      if (incx == 0)
+	BLAS_error(routine_name, -12, incx, NULL);
+      if (incy == 0)
+	BLAS_error(routine_name, -15, incy, NULL);
+
+      if (m == 0 || n == 0)
+	return;
+      if ((alpha_i == 0.0) && (beta_i == 1.0))
+	return;
+
+      if (trans == blas_no_trans) {
+	lenx = n;
+	leny = m;
+      } else {
+	lenx = m;
+	leny = n;
+      }
+
+      ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
+      iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;
+
+      FPU_FIX_START;
+
+      /* if alpha = 0, return y = y*beta */
+      if ((order == blas_colmajor) && (trans == blas_no_trans)) {
+	astart = ku;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
+	astart = ku;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
+	astart = kl;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else {			/* rowmajor and blas_trans */
+	astart = kl;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      }
+
+
+
+
+
+
+
+
+
+      la = 0;
+      ai = astart;
+      iy = iy0;
+      for (i = 0; i < leny; i++) {
+	head_sum1 = tail_sum1 = 0.0;
+	head_sum2 = tail_sum2 = 0.0;
+	aij = ai;
+	jx = ix0;
+
+	for (j = ra - la; j >= 0; j--) {
+	  x_elem = head_x_i[jx];
+	  a_elem = a_i[aij];
+	  head_prod = (double) x_elem *a_elem;
+	  tail_prod = 0.0;
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_sum1 + head_prod;
+	    bv = s1 - head_sum1;
+	    s2 = ((head_prod - bv) + (head_sum1 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_sum1 + tail_prod;
+	    bv = t1 - tail_sum1;
+	    t2 = ((tail_prod - bv) + (tail_sum1 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_sum1 = t1 + t2;
+	    tail_sum1 = t2 - (head_sum1 - t1);
+	  }
+	  x_elem = tail_x_i[jx];
+	  head_prod = (double) x_elem *a_elem;
+	  tail_prod = 0.0;
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_sum2 + head_prod;
+	    bv = s1 - head_sum2;
+	    s2 = ((head_prod - bv) + (head_sum2 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_sum2 + tail_prod;
+	    bv = t1 - tail_sum2;
+	    t2 = ((tail_prod - bv) + (tail_sum2 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_sum2 = t1 + t2;
+	    tail_sum2 = t2 - (head_sum2 - t1);
+	  }
+	  aij += incaij;
+	  jx += incx;
+	}
+
+
+	{
+	  /* Compute double-double = double-double * double. */
+	  double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	  con = head_sum1 * split;
+	  a11 = con - head_sum1;
+	  a11 = con - a11;
+	  a21 = head_sum1 - a11;
+	  con = alpha_i * split;
+	  b1 = con - alpha_i;
+	  b1 = con - b1;
+	  b2 = alpha_i - b1;
+
+	  c11 = head_sum1 * alpha_i;
+	  c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	  c2 = tail_sum1 * alpha_i;
+	  t1 = c11 + c2;
+	  t2 = (c2 - (t1 - c11)) + c21;
+
+	  head_tmp1 = t1 + t2;
+	  tail_tmp1 = t2 - (head_tmp1 - t1);
+	}
+	{
+	  /* Compute double-double = double-double * double. */
+	  double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	  con = head_sum2 * split;
+	  a11 = con - head_sum2;
+	  a11 = con - a11;
+	  a21 = head_sum2 - a11;
+	  con = alpha_i * split;
+	  b1 = con - alpha_i;
+	  b1 = con - b1;
+	  b2 = alpha_i - b1;
+
+	  c11 = head_sum2 * alpha_i;
+	  c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	  c2 = tail_sum2 * alpha_i;
+	  t1 = c11 + c2;
+	  t2 = (c2 - (t1 - c11)) + c21;
+
+	  head_tmp2 = t1 + t2;
+	  tail_tmp2 = t2 - (head_tmp2 - t1);
+	}
+	{
+	  /* Compute double-double = double-double + double-double. */
+	  double bv;
+	  double s1, s2, t1, t2;
+
+	  /* Add two hi words. */
+	  s1 = head_tmp1 + head_tmp2;
+	  bv = s1 - head_tmp1;
+	  s2 = ((head_tmp2 - bv) + (head_tmp1 - (s1 - bv)));
+
+	  /* Add two lo words. */
+	  t1 = tail_tmp1 + tail_tmp2;
+	  bv = t1 - tail_tmp1;
+	  t2 = ((tail_tmp2 - bv) + (tail_tmp1 - (t1 - bv)));
+
+	  s2 += t1;
+
+	  /* Renormalize (s1, s2)  to  (t1, s2) */
+	  t1 = s1 + s2;
+	  s2 = s2 - (t1 - s1);
+
+	  t2 += s2;
+
+	  /* Renormalize (t1, t2)  */
+	  head_tmp3 = t1 + t2;
+	  tail_tmp3 = t2 - (head_tmp3 - t1);
+	}
+	y_elem = y_i[iy];
+	{
+	  /* Compute double_double = double * double. */
+	  double a1, a2, b1, b2, con;
+
+	  con = beta_i * split;
+	  a1 = con - beta_i;
+	  a1 = con - a1;
+	  a2 = beta_i - a1;
+	  con = y_elem * split;
+	  b1 = con - y_elem;
+	  b1 = con - b1;
+	  b2 = y_elem - b1;
+
+	  head_tmp4 = beta_i * y_elem;
+	  tail_tmp4 = (((a1 * b1 - head_tmp4) + a1 * b2) + a2 * b1) + a2 * b2;
+	}
+	{
+	  /* Compute double-double = double-double + double-double. */
+	  double bv;
+	  double s1, s2, t1, t2;
+
+	  /* Add two hi words. */
+	  s1 = head_tmp4 + head_tmp3;
+	  bv = s1 - head_tmp4;
+	  s2 = ((head_tmp3 - bv) + (head_tmp4 - (s1 - bv)));
+
+	  /* Add two lo words. */
+	  t1 = tail_tmp4 + tail_tmp3;
+	  bv = t1 - tail_tmp4;
+	  t2 = ((tail_tmp3 - bv) + (tail_tmp4 - (t1 - bv)));
+
+	  s2 += t1;
+
+	  /* Renormalize (s1, s2)  to  (t1, s2) */
+	  t1 = s1 + s2;
+	  s2 = s2 - (t1 - s1);
+
+	  t2 += s2;
+
+	  /* Renormalize (t1, t2)  */
+	  result = t1 + t2;
+	}
+	y_i[iy] = result;
+
+	iy += incy;
+	if (i >= lbound) {
+	  ix0 += incx;
+	  ai += incai2;
+	  la++;
+	} else {
+	  ai += incai1;
+	}
+	if (i < rbound) {
+	  ra++;
+	}
+      }
+
+      FPU_FIX_STOP;
+    }
+    break;
+  }
+}				/* end BLAS_dgbmv2_s_s_x */
diff --git a/XBLAS/src/gbmv2/BLAS_dgbmv2_x-f2c.c b/XBLAS/src/gbmv2/BLAS_dgbmv2_x-f2c.c
new file mode 100644
index 00000000..983c9585
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_dgbmv2_x-f2c.c
@@ -0,0 +1,19 @@
+
+#include "f2c-bridge.h"
+#include "blas_enum.h"
+void BLAS_dgbmv2_x(enum blas_order_type order, enum blas_trans_type trans,
+		   int m, int n, int kl, int ku, double alpha,
+		   const double *a, int lda, const double *head_x,
+		   const double *tail_x, int incx, double beta,
+		   double *y, int incy, enum blas_prec_type prec);
+
+
+extern void FC_FUNC_(blas_dgbmv2_x, BLAS_DGBMV2_X)
+ 
+  (int *trans, int *m, int *n, int *kl, int *ku, double *alpha,
+   const double *a, int *lda, const double *head_x, const double *tail_x,
+   int *incx, double *beta, double *y, int *incy, int *prec) {
+  BLAS_dgbmv2_x(blas_colmajor, (enum blas_trans_type) *trans, *m, *n, *kl,
+		*ku, *alpha, a, *lda, head_x, tail_x, *incx, *beta, y, *incy,
+		(enum blas_prec_type) *prec);
+}
diff --git a/XBLAS/src/gbmv2/BLAS_dgbmv2_x.c b/XBLAS/src/gbmv2/BLAS_dgbmv2_x.c
new file mode 100644
index 00000000..e66c3edf
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_dgbmv2_x.c
@@ -0,0 +1,614 @@
+#include "blas_extended.h"
+#include "blas_extended_private.h"
+void BLAS_dgbmv2_x(enum blas_order_type order, enum blas_trans_type trans,
+		   int m, int n, int kl, int ku, double alpha,
+		   const double *a, int lda, const double *head_x,
+		   const double *tail_x, int incx, double beta,
+		   double *y, int incy, enum blas_prec_type prec)
+
+/*           
+ * Purpose
+ * =======
+ *
+ * This routines computes the matrix product:
+ *
+ *     y  <-  alpha * op(A) * (x_head + x_tail) + beta * y
+ * 
+ * where 
+ *
+ *  A is a m x n banded matrix
+ *  x is a n x 1 vector
+ *  y is a m x 1 vector
+ *  alpha and beta are scalars 
+ *   
+ * Arguments
+ * =========
+ *
+ * order        (input) blas_order_type
+ *              Order of AB; row or column major
+ *
+ * trans        (input) blas_trans_type
+ *              Transpose of AB; no trans, 
+ *              trans, or conjugate trans
+ *
+ * m            (input) int
+ *              Dimension of AB
+ *
+ * n            (input) int
+ *              Dimension of AB and the length of vector x and z
+ *
+ * kl           (input) int 
+ *              Number of lower diagnols of AB
+ *
+ * ku           (input) int
+ *              Number of upper diagnols of AB
+ *
+ * alpha        (input) double
+ *              
+ * AB           (input) double*
+ *
+ * lda          (input) int 
+ *              Leading dimension of AB
+ *              lda >= ku + kl + 1
+ *
+ * head_x
+ * tail_x       (input) double*
+ * 
+ * incx         (input) int
+ *              The stride for vector x.
+ *
+ * beta         (input) double
+ *
+ * y            (input) const double*
+ *
+ * incy         (input) int
+ *              The stride for vector y.
+ * 
+ * prec   (input) enum blas_prec_type
+ *        Specifies the internal precision to be used.
+ *        = blas_prec_single: single precision.
+ *        = blas_prec_double: double precision.
+ *        = blas_prec_extra : anything at least 1.5 times as accurate
+ *                            than double, and wider than 80-bits.
+ *                            We use double-double in our implementation.
+ *
+ *
+ * LOCAL VARIABLES 
+ * ===============
+ * 
+ *  As an example, these variables are described on the mxn, column 
+ *  major, banded matrix described in section 2.2.3 of the specification  
+ *
+ *  astart      indexes first element in A where computation begins
+ *
+ *  incai1      indexes first element in row where row is less than lbound
+ * 
+ *  incai2      indexes first element in row where row exceeds lbound
+ *   
+ *  lbound      denotes the number of rows before  first element shifts 
+ *
+ *  rbound      denotes the columns where there is blank space
+ *   
+ *  ra          index of the rightmost element for a given row
+ *  
+ *  la          index of leftmost  elements for a given row
+ *
+ *  ra - la     width of a row
+ *
+ *                        rbound 
+ *            la   ra    ____|_____ 
+ *             |    |   |          |
+ *         |  a00  a01   *    *   *
+ * lbound -|  a10  a11  a12   *   *
+ *         |  a20  a21  a22  a23  *
+ *             *   a31  a32  a33 a34
+ *             *    *   a42  a43 a44
+ *
+ *  Varations on order and transpose have been implemented by modifying these
+ *  local variables. 
+ *
+ */
+{
+  static const char routine_name[] = "BLAS_dgbmv2_x";
+
+  switch (prec) {
+  case blas_prec_single:
+  case blas_prec_double:
+  case blas_prec_indigenous:
+    {
+      int iy0, iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny;
+      int incaij, aij, incai1, incai2, astart, ai;
+      double *y_i = y;
+      const double *a_i = a;
+      const double *head_x_i = head_x;
+      const double *tail_x_i = tail_x;
+      double alpha_i = alpha;
+      double beta_i = beta;
+      double tmp1;
+      double tmp2;
+      double tmp3;
+      double tmp4;
+      double result;
+      double sum1;
+      double sum2;
+      double prod;
+      double a_elem;
+      double x_elem;
+      double y_elem;
+
+
+      if (order != blas_colmajor && order != blas_rowmajor)
+	BLAS_error(routine_name, -1, order, NULL);
+      if (trans != blas_no_trans &&
+	  trans != blas_trans && trans != blas_conj_trans) {
+	BLAS_error(routine_name, -2, trans, NULL);
+      }
+      if (m < 0)
+	BLAS_error(routine_name, -3, m, NULL);
+      if (n < 0)
+	BLAS_error(routine_name, -4, n, NULL);
+      if (kl < 0 || kl >= m)
+	BLAS_error(routine_name, -5, kl, NULL);
+      if (ku < 0 || ku >= n)
+	BLAS_error(routine_name, -6, ku, NULL);
+      if (lda < kl + ku + 1)
+	BLAS_error(routine_name, -9, lda, NULL);
+      if (incx == 0)
+	BLAS_error(routine_name, -12, incx, NULL);
+      if (incy == 0)
+	BLAS_error(routine_name, -15, incy, NULL);
+
+      if (m == 0 || n == 0)
+	return;
+      if ((alpha_i == 0.0) && (beta_i == 1.0))
+	return;
+
+      if (trans == blas_no_trans) {
+	lenx = n;
+	leny = m;
+      } else {
+	lenx = m;
+	leny = n;
+      }
+
+      ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
+      iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;
+
+
+
+      /* if alpha = 0, return y = y*beta */
+      if ((order == blas_colmajor) && (trans == blas_no_trans)) {
+	astart = ku;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
+	astart = ku;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
+	astart = kl;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else {			/* rowmajor and blas_trans */
+	astart = kl;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      }
+
+
+
+
+
+
+
+
+
+      la = 0;
+      ai = astart;
+      iy = iy0;
+      for (i = 0; i < leny; i++) {
+	sum1 = 0.0;
+	sum2 = 0.0;
+	aij = ai;
+	jx = ix0;
+
+	for (j = ra - la; j >= 0; j--) {
+	  x_elem = head_x_i[jx];
+	  a_elem = a_i[aij];
+	  prod = x_elem * a_elem;
+	  sum1 = sum1 + prod;
+	  x_elem = tail_x_i[jx];
+	  prod = x_elem * a_elem;
+	  sum2 = sum2 + prod;
+	  aij += incaij;
+	  jx += incx;
+	}
+
+
+	tmp1 = sum1 * alpha_i;
+	tmp2 = sum2 * alpha_i;
+	tmp3 = tmp1 + tmp2;
+	y_elem = y_i[iy];
+	tmp4 = beta_i * y_elem;
+	result = tmp4 + tmp3;
+	y_i[iy] = result;
+
+	iy += incy;
+	if (i >= lbound) {
+	  ix0 += incx;
+	  ai += incai2;
+	  la++;
+	} else {
+	  ai += incai1;
+	}
+	if (i < rbound) {
+	  ra++;
+	}
+      }
+
+
+    }
+    break;
+  case blas_prec_extra:
+    {
+      int iy0, iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny;
+      int incaij, aij, incai1, incai2, astart, ai;
+      double *y_i = y;
+      const double *a_i = a;
+      const double *head_x_i = head_x;
+      const double *tail_x_i = tail_x;
+      double alpha_i = alpha;
+      double beta_i = beta;
+      double head_tmp1, tail_tmp1;
+      double head_tmp2, tail_tmp2;
+      double head_tmp3, tail_tmp3;
+      double head_tmp4, tail_tmp4;
+      double result;
+      double head_sum1, tail_sum1;
+      double head_sum2, tail_sum2;
+      double head_prod, tail_prod;
+      double a_elem;
+      double x_elem;
+      double y_elem;
+      FPU_FIX_DECL;
+
+      if (order != blas_colmajor && order != blas_rowmajor)
+	BLAS_error(routine_name, -1, order, NULL);
+      if (trans != blas_no_trans &&
+	  trans != blas_trans && trans != blas_conj_trans) {
+	BLAS_error(routine_name, -2, trans, NULL);
+      }
+      if (m < 0)
+	BLAS_error(routine_name, -3, m, NULL);
+      if (n < 0)
+	BLAS_error(routine_name, -4, n, NULL);
+      if (kl < 0 || kl >= m)
+	BLAS_error(routine_name, -5, kl, NULL);
+      if (ku < 0 || ku >= n)
+	BLAS_error(routine_name, -6, ku, NULL);
+      if (lda < kl + ku + 1)
+	BLAS_error(routine_name, -9, lda, NULL);
+      if (incx == 0)
+	BLAS_error(routine_name, -12, incx, NULL);
+      if (incy == 0)
+	BLAS_error(routine_name, -15, incy, NULL);
+
+      if (m == 0 || n == 0)
+	return;
+      if ((alpha_i == 0.0) && (beta_i == 1.0))
+	return;
+
+      if (trans == blas_no_trans) {
+	lenx = n;
+	leny = m;
+      } else {
+	lenx = m;
+	leny = n;
+      }
+
+      ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
+      iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;
+
+      FPU_FIX_START;
+
+      /* if alpha = 0, return y = y*beta */
+      if ((order == blas_colmajor) && (trans == blas_no_trans)) {
+	astart = ku;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
+	astart = ku;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
+	astart = kl;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else {			/* rowmajor and blas_trans */
+	astart = kl;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      }
+
+
+
+
+
+
+
+
+
+      la = 0;
+      ai = astart;
+      iy = iy0;
+      for (i = 0; i < leny; i++) {
+	head_sum1 = tail_sum1 = 0.0;
+	head_sum2 = tail_sum2 = 0.0;
+	aij = ai;
+	jx = ix0;
+
+	for (j = ra - la; j >= 0; j--) {
+	  x_elem = head_x_i[jx];
+	  a_elem = a_i[aij];
+	  {
+	    /* Compute double_double = double * double. */
+	    double a1, a2, b1, b2, con;
+
+	    con = x_elem * split;
+	    a1 = con - x_elem;
+	    a1 = con - a1;
+	    a2 = x_elem - a1;
+	    con = a_elem * split;
+	    b1 = con - a_elem;
+	    b1 = con - b1;
+	    b2 = a_elem - b1;
+
+	    head_prod = x_elem * a_elem;
+	    tail_prod =
+	      (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2;
+	  }
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_sum1 + head_prod;
+	    bv = s1 - head_sum1;
+	    s2 = ((head_prod - bv) + (head_sum1 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_sum1 + tail_prod;
+	    bv = t1 - tail_sum1;
+	    t2 = ((tail_prod - bv) + (tail_sum1 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_sum1 = t1 + t2;
+	    tail_sum1 = t2 - (head_sum1 - t1);
+	  }
+	  x_elem = tail_x_i[jx];
+	  {
+	    /* Compute double_double = double * double. */
+	    double a1, a2, b1, b2, con;
+
+	    con = x_elem * split;
+	    a1 = con - x_elem;
+	    a1 = con - a1;
+	    a2 = x_elem - a1;
+	    con = a_elem * split;
+	    b1 = con - a_elem;
+	    b1 = con - b1;
+	    b2 = a_elem - b1;
+
+	    head_prod = x_elem * a_elem;
+	    tail_prod =
+	      (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2;
+	  }
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_sum2 + head_prod;
+	    bv = s1 - head_sum2;
+	    s2 = ((head_prod - bv) + (head_sum2 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_sum2 + tail_prod;
+	    bv = t1 - tail_sum2;
+	    t2 = ((tail_prod - bv) + (tail_sum2 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_sum2 = t1 + t2;
+	    tail_sum2 = t2 - (head_sum2 - t1);
+	  }
+	  aij += incaij;
+	  jx += incx;
+	}
+
+
+	{
+	  /* Compute double-double = double-double * double. */
+	  double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	  con = head_sum1 * split;
+	  a11 = con - head_sum1;
+	  a11 = con - a11;
+	  a21 = head_sum1 - a11;
+	  con = alpha_i * split;
+	  b1 = con - alpha_i;
+	  b1 = con - b1;
+	  b2 = alpha_i - b1;
+
+	  c11 = head_sum1 * alpha_i;
+	  c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	  c2 = tail_sum1 * alpha_i;
+	  t1 = c11 + c2;
+	  t2 = (c2 - (t1 - c11)) + c21;
+
+	  head_tmp1 = t1 + t2;
+	  tail_tmp1 = t2 - (head_tmp1 - t1);
+	}
+	{
+	  /* Compute double-double = double-double * double. */
+	  double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	  con = head_sum2 * split;
+	  a11 = con - head_sum2;
+	  a11 = con - a11;
+	  a21 = head_sum2 - a11;
+	  con = alpha_i * split;
+	  b1 = con - alpha_i;
+	  b1 = con - b1;
+	  b2 = alpha_i - b1;
+
+	  c11 = head_sum2 * alpha_i;
+	  c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	  c2 = tail_sum2 * alpha_i;
+	  t1 = c11 + c2;
+	  t2 = (c2 - (t1 - c11)) + c21;
+
+	  head_tmp2 = t1 + t2;
+	  tail_tmp2 = t2 - (head_tmp2 - t1);
+	}
+	{
+	  /* Compute double-double = double-double + double-double. */
+	  double bv;
+	  double s1, s2, t1, t2;
+
+	  /* Add two hi words. */
+	  s1 = head_tmp1 + head_tmp2;
+	  bv = s1 - head_tmp1;
+	  s2 = ((head_tmp2 - bv) + (head_tmp1 - (s1 - bv)));
+
+	  /* Add two lo words. */
+	  t1 = tail_tmp1 + tail_tmp2;
+	  bv = t1 - tail_tmp1;
+	  t2 = ((tail_tmp2 - bv) + (tail_tmp1 - (t1 - bv)));
+
+	  s2 += t1;
+
+	  /* Renormalize (s1, s2)  to  (t1, s2) */
+	  t1 = s1 + s2;
+	  s2 = s2 - (t1 - s1);
+
+	  t2 += s2;
+
+	  /* Renormalize (t1, t2)  */
+	  head_tmp3 = t1 + t2;
+	  tail_tmp3 = t2 - (head_tmp3 - t1);
+	}
+	y_elem = y_i[iy];
+	{
+	  /* Compute double_double = double * double. */
+	  double a1, a2, b1, b2, con;
+
+	  con = beta_i * split;
+	  a1 = con - beta_i;
+	  a1 = con - a1;
+	  a2 = beta_i - a1;
+	  con = y_elem * split;
+	  b1 = con - y_elem;
+	  b1 = con - b1;
+	  b2 = y_elem - b1;
+
+	  head_tmp4 = beta_i * y_elem;
+	  tail_tmp4 = (((a1 * b1 - head_tmp4) + a1 * b2) + a2 * b1) + a2 * b2;
+	}
+	{
+	  /* Compute double-double = double-double + double-double. */
+	  double bv;
+	  double s1, s2, t1, t2;
+
+	  /* Add two hi words. */
+	  s1 = head_tmp4 + head_tmp3;
+	  bv = s1 - head_tmp4;
+	  s2 = ((head_tmp3 - bv) + (head_tmp4 - (s1 - bv)));
+
+	  /* Add two lo words. */
+	  t1 = tail_tmp4 + tail_tmp3;
+	  bv = t1 - tail_tmp4;
+	  t2 = ((tail_tmp3 - bv) + (tail_tmp4 - (t1 - bv)));
+
+	  s2 += t1;
+
+	  /* Renormalize (s1, s2)  to  (t1, s2) */
+	  t1 = s1 + s2;
+	  s2 = s2 - (t1 - s1);
+
+	  t2 += s2;
+
+	  /* Renormalize (t1, t2)  */
+	  result = t1 + t2;
+	}
+	y_i[iy] = result;
+
+	iy += incy;
+	if (i >= lbound) {
+	  ix0 += incx;
+	  ai += incai2;
+	  la++;
+	} else {
+	  ai += incai1;
+	}
+	if (i < rbound) {
+	  ra++;
+	}
+      }
+
+      FPU_FIX_STOP;
+    }
+    break;
+  }
+}				/* end BLAS_dgbmv2_x */
diff --git a/XBLAS/src/gbmv2/BLAS_sgbmv2_x-f2c.c b/XBLAS/src/gbmv2/BLAS_sgbmv2_x-f2c.c
new file mode 100644
index 00000000..6454c4f5
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_sgbmv2_x-f2c.c
@@ -0,0 +1,19 @@
+
+#include "f2c-bridge.h"
+#include "blas_enum.h"
+void BLAS_sgbmv2_x(enum blas_order_type order, enum blas_trans_type trans,
+		   int m, int n, int kl, int ku, float alpha,
+		   const float *a, int lda, const float *head_x,
+		   const float *tail_x, int incx, float beta,
+		   float *y, int incy, enum blas_prec_type prec);
+
+
+extern void FC_FUNC_(blas_sgbmv2_x, BLAS_SGBMV2_X)
+ 
+  (int *trans, int *m, int *n, int *kl, int *ku, float *alpha, const float *a,
+   int *lda, const float *head_x, const float *tail_x, int *incx, float *beta,
+   float *y, int *incy, int *prec) {
+  BLAS_sgbmv2_x(blas_colmajor, (enum blas_trans_type) *trans, *m, *n, *kl,
+		*ku, *alpha, a, *lda, head_x, tail_x, *incx, *beta, y, *incy,
+		(enum blas_prec_type) *prec);
+}
diff --git a/XBLAS/src/gbmv2/BLAS_sgbmv2_x.c b/XBLAS/src/gbmv2/BLAS_sgbmv2_x.c
new file mode 100644
index 00000000..c5397379
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_sgbmv2_x.c
@@ -0,0 +1,727 @@
+#include "blas_extended.h"
+#include "blas_extended_private.h"
+void BLAS_sgbmv2_x(enum blas_order_type order, enum blas_trans_type trans,
+		   int m, int n, int kl, int ku, float alpha,
+		   const float *a, int lda, const float *head_x,
+		   const float *tail_x, int incx, float beta,
+		   float *y, int incy, enum blas_prec_type prec)
+
+/*           
+ * Purpose
+ * =======
+ *
+ * This routines computes the matrix product:
+ *
+ *     y  <-  alpha * op(A) * (x_head + x_tail) + beta * y
+ * 
+ * where 
+ *
+ *  A is a m x n banded matrix
+ *  x is a n x 1 vector
+ *  y is a m x 1 vector
+ *  alpha and beta are scalars 
+ *   
+ * Arguments
+ * =========
+ *
+ * order        (input) blas_order_type
+ *              Order of AB; row or column major
+ *
+ * trans        (input) blas_trans_type
+ *              Transpose of AB; no trans, 
+ *              trans, or conjugate trans
+ *
+ * m            (input) int
+ *              Dimension of AB
+ *
+ * n            (input) int
+ *              Dimension of AB and the length of vector x and z
+ *
+ * kl           (input) int 
+ *              Number of lower diagnols of AB
+ *
+ * ku           (input) int
+ *              Number of upper diagnols of AB
+ *
+ * alpha        (input) float
+ *              
+ * AB           (input) float*
+ *
+ * lda          (input) int 
+ *              Leading dimension of AB
+ *              lda >= ku + kl + 1
+ *
+ * head_x
+ * tail_x       (input) float*
+ * 
+ * incx         (input) int
+ *              The stride for vector x.
+ *
+ * beta         (input) float
+ *
+ * y            (input) const float*
+ *
+ * incy         (input) int
+ *              The stride for vector y.
+ * 
+ * prec   (input) enum blas_prec_type
+ *        Specifies the internal precision to be used.
+ *        = blas_prec_single: single precision.
+ *        = blas_prec_double: double precision.
+ *        = blas_prec_extra : anything at least 1.5 times as accurate
+ *                            than double, and wider than 80-bits.
+ *                            We use double-double in our implementation.
+ *
+ *
+ * LOCAL VARIABLES 
+ * ===============
+ * 
+ *  As an example, these variables are described on the mxn, column 
+ *  major, banded matrix described in section 2.2.3 of the specification  
+ *
+ *  astart      indexes first element in A where computation begins
+ *
+ *  incai1      indexes first element in row where row is less than lbound
+ * 
+ *  incai2      indexes first element in row where row exceeds lbound
+ *   
+ *  lbound      denotes the number of rows before  first element shifts 
+ *
+ *  rbound      denotes the columns where there is blank space
+ *   
+ *  ra          index of the rightmost element for a given row
+ *  
+ *  la          index of leftmost  elements for a given row
+ *
+ *  ra - la     width of a row
+ *
+ *                        rbound 
+ *            la   ra    ____|_____ 
+ *             |    |   |          |
+ *         |  a00  a01   *    *   *
+ * lbound -|  a10  a11  a12   *   *
+ *         |  a20  a21  a22  a23  *
+ *             *   a31  a32  a33 a34
+ *             *    *   a42  a43 a44
+ *
+ *  Varations on order and transpose have been implemented by modifying these
+ *  local variables. 
+ *
+ */
+{
+  static const char routine_name[] = "BLAS_sgbmv2_x";
+
+  switch (prec) {
+  case blas_prec_single:{
+
+      int iy0, iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny;
+      int incaij, aij, incai1, incai2, astart, ai;
+      float *y_i = y;
+      const float *a_i = a;
+      const float *head_x_i = head_x;
+      const float *tail_x_i = tail_x;
+      float alpha_i = alpha;
+      float beta_i = beta;
+      float tmp1;
+      float tmp2;
+      float tmp3;
+      float tmp4;
+      float result;
+      float sum1;
+      float sum2;
+      float prod;
+      float a_elem;
+      float x_elem;
+      float y_elem;
+
+
+      if (order != blas_colmajor && order != blas_rowmajor)
+	BLAS_error(routine_name, -1, order, NULL);
+      if (trans != blas_no_trans &&
+	  trans != blas_trans && trans != blas_conj_trans) {
+	BLAS_error(routine_name, -2, trans, NULL);
+      }
+      if (m < 0)
+	BLAS_error(routine_name, -3, m, NULL);
+      if (n < 0)
+	BLAS_error(routine_name, -4, n, NULL);
+      if (kl < 0 || kl >= m)
+	BLAS_error(routine_name, -5, kl, NULL);
+      if (ku < 0 || ku >= n)
+	BLAS_error(routine_name, -6, ku, NULL);
+      if (lda < kl + ku + 1)
+	BLAS_error(routine_name, -9, lda, NULL);
+      if (incx == 0)
+	BLAS_error(routine_name, -12, incx, NULL);
+      if (incy == 0)
+	BLAS_error(routine_name, -15, incy, NULL);
+
+      if (m == 0 || n == 0)
+	return;
+      if ((alpha_i == 0.0) && (beta_i == 1.0))
+	return;
+
+      if (trans == blas_no_trans) {
+	lenx = n;
+	leny = m;
+      } else {
+	lenx = m;
+	leny = n;
+      }
+
+      ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
+      iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;
+
+
+
+      /* if alpha = 0, return y = y*beta */
+      if ((order == blas_colmajor) && (trans == blas_no_trans)) {
+	astart = ku;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
+	astart = ku;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
+	astart = kl;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else {			/* rowmajor and blas_trans */
+	astart = kl;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      }
+
+
+
+
+
+
+
+
+
+      la = 0;
+      ai = astart;
+      iy = iy0;
+      for (i = 0; i < leny; i++) {
+	sum1 = 0.0;
+	sum2 = 0.0;
+	aij = ai;
+	jx = ix0;
+
+	for (j = ra - la; j >= 0; j--) {
+	  x_elem = head_x_i[jx];
+	  a_elem = a_i[aij];
+	  prod = x_elem * a_elem;
+	  sum1 = sum1 + prod;
+	  x_elem = tail_x_i[jx];
+	  prod = x_elem * a_elem;
+	  sum2 = sum2 + prod;
+	  aij += incaij;
+	  jx += incx;
+	}
+
+
+	tmp1 = sum1 * alpha_i;
+	tmp2 = sum2 * alpha_i;
+	tmp3 = tmp1 + tmp2;
+	y_elem = y_i[iy];
+	tmp4 = beta_i * y_elem;
+	result = tmp4 + tmp3;
+	y_i[iy] = result;
+
+	iy += incy;
+	if (i >= lbound) {
+	  ix0 += incx;
+	  ai += incai2;
+	  la++;
+	} else {
+	  ai += incai1;
+	}
+	if (i < rbound) {
+	  ra++;
+	}
+      }
+
+
+
+      break;
+    }
+  case blas_prec_double:
+  case blas_prec_indigenous:
+    {
+      int iy0, iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny;
+      int incaij, aij, incai1, incai2, astart, ai;
+      float *y_i = y;
+      const float *a_i = a;
+      const float *head_x_i = head_x;
+      const float *tail_x_i = tail_x;
+      float alpha_i = alpha;
+      float beta_i = beta;
+      double tmp1;
+      double tmp2;
+      double tmp3;
+      double tmp4;
+      float result;
+      double sum1;
+      double sum2;
+      double prod;
+      float a_elem;
+      float x_elem;
+      float y_elem;
+
+
+      if (order != blas_colmajor && order != blas_rowmajor)
+	BLAS_error(routine_name, -1, order, NULL);
+      if (trans != blas_no_trans &&
+	  trans != blas_trans && trans != blas_conj_trans) {
+	BLAS_error(routine_name, -2, trans, NULL);
+      }
+      if (m < 0)
+	BLAS_error(routine_name, -3, m, NULL);
+      if (n < 0)
+	BLAS_error(routine_name, -4, n, NULL);
+      if (kl < 0 || kl >= m)
+	BLAS_error(routine_name, -5, kl, NULL);
+      if (ku < 0 || ku >= n)
+	BLAS_error(routine_name, -6, ku, NULL);
+      if (lda < kl + ku + 1)
+	BLAS_error(routine_name, -9, lda, NULL);
+      if (incx == 0)
+	BLAS_error(routine_name, -12, incx, NULL);
+      if (incy == 0)
+	BLAS_error(routine_name, -15, incy, NULL);
+
+      if (m == 0 || n == 0)
+	return;
+      if ((alpha_i == 0.0) && (beta_i == 1.0))
+	return;
+
+      if (trans == blas_no_trans) {
+	lenx = n;
+	leny = m;
+      } else {
+	lenx = m;
+	leny = n;
+      }
+
+      ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
+      iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;
+
+
+
+      /* if alpha = 0, return y = y*beta */
+      if ((order == blas_colmajor) && (trans == blas_no_trans)) {
+	astart = ku;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
+	astart = ku;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
+	astart = kl;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else {			/* rowmajor and blas_trans */
+	astart = kl;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      }
+
+
+
+
+
+
+
+
+
+      la = 0;
+      ai = astart;
+      iy = iy0;
+      for (i = 0; i < leny; i++) {
+	sum1 = 0.0;
+	sum2 = 0.0;
+	aij = ai;
+	jx = ix0;
+
+	for (j = ra - la; j >= 0; j--) {
+	  x_elem = head_x_i[jx];
+	  a_elem = a_i[aij];
+	  prod = (double) x_elem *a_elem;
+	  sum1 = sum1 + prod;
+	  x_elem = tail_x_i[jx];
+	  prod = (double) x_elem *a_elem;
+	  sum2 = sum2 + prod;
+	  aij += incaij;
+	  jx += incx;
+	}
+
+
+	tmp1 = sum1 * alpha_i;
+	tmp2 = sum2 * alpha_i;
+	tmp3 = tmp1 + tmp2;
+	y_elem = y_i[iy];
+	tmp4 = (double) beta_i *y_elem;
+	result = tmp4 + tmp3;
+	y_i[iy] = result;
+
+	iy += incy;
+	if (i >= lbound) {
+	  ix0 += incx;
+	  ai += incai2;
+	  la++;
+	} else {
+	  ai += incai1;
+	}
+	if (i < rbound) {
+	  ra++;
+	}
+      }
+
+
+    }
+    break;
+  case blas_prec_extra:
+    {
+      int iy0, iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny;
+      int incaij, aij, incai1, incai2, astart, ai;
+      float *y_i = y;
+      const float *a_i = a;
+      const float *head_x_i = head_x;
+      const float *tail_x_i = tail_x;
+      float alpha_i = alpha;
+      float beta_i = beta;
+      double head_tmp1, tail_tmp1;
+      double head_tmp2, tail_tmp2;
+      double head_tmp3, tail_tmp3;
+      double head_tmp4, tail_tmp4;
+      float result;
+      double head_sum1, tail_sum1;
+      double head_sum2, tail_sum2;
+      double head_prod, tail_prod;
+      float a_elem;
+      float x_elem;
+      float y_elem;
+      FPU_FIX_DECL;
+
+      if (order != blas_colmajor && order != blas_rowmajor)
+	BLAS_error(routine_name, -1, order, NULL);
+      if (trans != blas_no_trans &&
+	  trans != blas_trans && trans != blas_conj_trans) {
+	BLAS_error(routine_name, -2, trans, NULL);
+      }
+      if (m < 0)
+	BLAS_error(routine_name, -3, m, NULL);
+      if (n < 0)
+	BLAS_error(routine_name, -4, n, NULL);
+      if (kl < 0 || kl >= m)
+	BLAS_error(routine_name, -5, kl, NULL);
+      if (ku < 0 || ku >= n)
+	BLAS_error(routine_name, -6, ku, NULL);
+      if (lda < kl + ku + 1)
+	BLAS_error(routine_name, -9, lda, NULL);
+      if (incx == 0)
+	BLAS_error(routine_name, -12, incx, NULL);
+      if (incy == 0)
+	BLAS_error(routine_name, -15, incy, NULL);
+
+      if (m == 0 || n == 0)
+	return;
+      if ((alpha_i == 0.0) && (beta_i == 1.0))
+	return;
+
+      if (trans == blas_no_trans) {
+	lenx = n;
+	leny = m;
+      } else {
+	lenx = m;
+	leny = n;
+      }
+
+      ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
+      iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;
+
+      FPU_FIX_START;
+
+      /* if alpha = 0, return y = y*beta */
+      if ((order == blas_colmajor) && (trans == blas_no_trans)) {
+	astart = ku;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
+	astart = ku;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
+	astart = kl;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else {			/* rowmajor and blas_trans */
+	astart = kl;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      }
+
+
+
+
+
+
+
+
+
+      la = 0;
+      ai = astart;
+      iy = iy0;
+      for (i = 0; i < leny; i++) {
+	head_sum1 = tail_sum1 = 0.0;
+	head_sum2 = tail_sum2 = 0.0;
+	aij = ai;
+	jx = ix0;
+
+	for (j = ra - la; j >= 0; j--) {
+	  x_elem = head_x_i[jx];
+	  a_elem = a_i[aij];
+	  head_prod = (double) x_elem *a_elem;
+	  tail_prod = 0.0;
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_sum1 + head_prod;
+	    bv = s1 - head_sum1;
+	    s2 = ((head_prod - bv) + (head_sum1 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_sum1 + tail_prod;
+	    bv = t1 - tail_sum1;
+	    t2 = ((tail_prod - bv) + (tail_sum1 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_sum1 = t1 + t2;
+	    tail_sum1 = t2 - (head_sum1 - t1);
+	  }
+	  x_elem = tail_x_i[jx];
+	  head_prod = (double) x_elem *a_elem;
+	  tail_prod = 0.0;
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_sum2 + head_prod;
+	    bv = s1 - head_sum2;
+	    s2 = ((head_prod - bv) + (head_sum2 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_sum2 + tail_prod;
+	    bv = t1 - tail_sum2;
+	    t2 = ((tail_prod - bv) + (tail_sum2 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_sum2 = t1 + t2;
+	    tail_sum2 = t2 - (head_sum2 - t1);
+	  }
+	  aij += incaij;
+	  jx += incx;
+	}
+
+
+	{
+	  double dt = (double) alpha_i;
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_sum1 * split;
+	    a11 = con - head_sum1;
+	    a11 = con - a11;
+	    a21 = head_sum1 - a11;
+	    con = dt * split;
+	    b1 = con - dt;
+	    b1 = con - b1;
+	    b2 = dt - b1;
+
+	    c11 = head_sum1 * dt;
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_sum1 * dt;
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_tmp1 = t1 + t2;
+	    tail_tmp1 = t2 - (head_tmp1 - t1);
+	  }
+	}
+	{
+	  double dt = (double) alpha_i;
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_sum2 * split;
+	    a11 = con - head_sum2;
+	    a11 = con - a11;
+	    a21 = head_sum2 - a11;
+	    con = dt * split;
+	    b1 = con - dt;
+	    b1 = con - b1;
+	    b2 = dt - b1;
+
+	    c11 = head_sum2 * dt;
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_sum2 * dt;
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_tmp2 = t1 + t2;
+	    tail_tmp2 = t2 - (head_tmp2 - t1);
+	  }
+	}
+	{
+	  /* Compute double-double = double-double + double-double. */
+	  double bv;
+	  double s1, s2, t1, t2;
+
+	  /* Add two hi words. */
+	  s1 = head_tmp1 + head_tmp2;
+	  bv = s1 - head_tmp1;
+	  s2 = ((head_tmp2 - bv) + (head_tmp1 - (s1 - bv)));
+
+	  /* Add two lo words. */
+	  t1 = tail_tmp1 + tail_tmp2;
+	  bv = t1 - tail_tmp1;
+	  t2 = ((tail_tmp2 - bv) + (tail_tmp1 - (t1 - bv)));
+
+	  s2 += t1;
+
+	  /* Renormalize (s1, s2)  to  (t1, s2) */
+	  t1 = s1 + s2;
+	  s2 = s2 - (t1 - s1);
+
+	  t2 += s2;
+
+	  /* Renormalize (t1, t2)  */
+	  head_tmp3 = t1 + t2;
+	  tail_tmp3 = t2 - (head_tmp3 - t1);
+	}
+	y_elem = y_i[iy];
+	head_tmp4 = (double) beta_i *y_elem;
+	tail_tmp4 = 0.0;
+	{
+	  /* Compute double-double = double-double + double-double. */
+	  double bv;
+	  double s1, s2, t1, t2;
+
+	  /* Add two hi words. */
+	  s1 = head_tmp4 + head_tmp3;
+	  bv = s1 - head_tmp4;
+	  s2 = ((head_tmp3 - bv) + (head_tmp4 - (s1 - bv)));
+
+	  /* Add two lo words. */
+	  t1 = tail_tmp4 + tail_tmp3;
+	  bv = t1 - tail_tmp4;
+	  t2 = ((tail_tmp3 - bv) + (tail_tmp4 - (t1 - bv)));
+
+	  s2 += t1;
+
+	  /* Renormalize (s1, s2)  to  (t1, s2) */
+	  t1 = s1 + s2;
+	  s2 = s2 - (t1 - s1);
+
+	  t2 += s2;
+
+	  /* Renormalize (t1, t2)  */
+	  result = t1 + t2;
+	}
+	y_i[iy] = result;
+
+	iy += incy;
+	if (i >= lbound) {
+	  ix0 += incx;
+	  ai += incai2;
+	  la++;
+	} else {
+	  ai += incai1;
+	}
+	if (i < rbound) {
+	  ra++;
+	}
+      }
+
+      FPU_FIX_STOP;
+    }
+    break;
+  }
+}				/* end BLAS_sgbmv2_x */
diff --git a/XBLAS/src/gbmv2/BLAS_zgbmv2_c_c-f2c.c b/XBLAS/src/gbmv2/BLAS_zgbmv2_c_c-f2c.c
new file mode 100644
index 00000000..2165e22b
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_zgbmv2_c_c-f2c.c
@@ -0,0 +1,18 @@
+
+#include "f2c-bridge.h"
+#include "blas_enum.h"
+void BLAS_zgbmv2_c_c(enum blas_order_type order, enum blas_trans_type trans,
+		     int m, int n, int kl, int ku, const void *alpha,
+		     const void *a, int lda, const void *head_x,
+		     const void *tail_x, int incx, const void *beta,
+		     void *y, int incy);
+
+
+extern void FC_FUNC_(blas_zgbmv2_c_c, BLAS_ZGBMV2_C_C)
+ 
+  (int *trans, int *m, int *n, int *kl, int *ku, const void *alpha,
+   const void *a, int *lda, const void *head_x, const void *tail_x, int *incx,
+   const void *beta, void *y, int *incy) {
+  BLAS_zgbmv2_c_c(blas_colmajor, (enum blas_trans_type) *trans, *m, *n, *kl,
+		  *ku, alpha, a, *lda, head_x, tail_x, *incx, beta, y, *incy);
+}
diff --git a/XBLAS/src/gbmv2/BLAS_zgbmv2_c_c.c b/XBLAS/src/gbmv2/BLAS_zgbmv2_c_c.c
new file mode 100644
index 00000000..6da002ea
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_zgbmv2_c_c.c
@@ -0,0 +1,314 @@
+#include "blas_extended.h"
+#include "blas_extended_private.h"
+void BLAS_zgbmv2_c_c(enum blas_order_type order, enum blas_trans_type trans,
+		     int m, int n, int kl, int ku, const void *alpha,
+		     const void *a, int lda, const void *head_x,
+		     const void *tail_x, int incx, const void *beta,
+		     void *y, int incy)
+
+/*           
+ * Purpose
+ * =======
+ *
+ * This routines computes the matrix product:
+ *
+ *     y  <-  alpha * op(A) * (x_head + x_tail) + beta * y
+ * 
+ * where 
+ *
+ *  A is a m x n banded matrix
+ *  x is a n x 1 vector
+ *  y is a m x 1 vector
+ *  alpha and beta are scalars 
+ *   
+ * Arguments
+ * =========
+ *
+ * order        (input) blas_order_type
+ *              Order of AB; row or column major
+ *
+ * trans        (input) blas_trans_type
+ *              Transpose of AB; no trans, 
+ *              trans, or conjugate trans
+ *
+ * m            (input) int
+ *              Dimension of AB
+ *
+ * n            (input) int
+ *              Dimension of AB and the length of vector x and z
+ *
+ * kl           (input) int 
+ *              Number of lower diagnols of AB
+ *
+ * ku           (input) int
+ *              Number of upper diagnols of AB
+ *
+ * alpha        (input) const void*
+ *              
+ * AB           (input) void*
+ *
+ * lda          (input) int 
+ *              Leading dimension of AB
+ *              lda >= ku + kl + 1
+ *
+ * head_x
+ * tail_x       (input) void*
+ * 
+ * incx         (input) int
+ *              The stride for vector x.
+ *
+ * beta         (input) const void*
+ *
+ * y            (input) const void*
+ *
+ * incy         (input) int
+ *              The stride for vector y.
+ * 
+ *
+ * LOCAL VARIABLES 
+ * ===============
+ * 
+ *  As an example, these variables are described on the mxn, column 
+ *  major, banded matrix described in section 2.2.3 of the specification  
+ *
+ *  astart      indexes first element in A where computation begins
+ *
+ *  incai1      indexes first element in row where row is less than lbound
+ * 
+ *  incai2      indexes first element in row where row exceeds lbound
+ *   
+ *  lbound      denotes the number of rows before  first element shifts 
+ *
+ *  rbound      denotes the columns where there is blank space
+ *   
+ *  ra          index of the rightmost element for a given row
+ *  
+ *  la          index of leftmost  elements for a given row
+ *
+ *  ra - la     width of a row
+ *
+ *                        rbound 
+ *            la   ra    ____|_____ 
+ *             |    |   |          |
+ *         |  a00  a01   *    *   *
+ * lbound -|  a10  a11  a12   *   *
+ *         |  a20  a21  a22  a23  *
+ *             *   a31  a32  a33 a34
+ *             *    *   a42  a43 a44
+ *
+ *  Varations on order and transpose have been implemented by modifying these
+ *  local variables. 
+ *
+ */
+{
+  static const char routine_name[] = "BLAS_zgbmv2_c_c";
+
+  int iy0, iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny;
+  int incaij, aij, incai1, incai2, astart, ai;
+  double *y_i = (double *) y;
+  const float *a_i = (float *) a;
+  const float *head_x_i = (float *) head_x;
+  const float *tail_x_i = (float *) tail_x;
+  double *alpha_i = (double *) alpha;
+  double *beta_i = (double *) beta;
+  double tmp1[2];
+  double tmp2[2];
+  double tmp3[2];
+  double tmp4[2];
+  double result[2];
+  double sum1[2];
+  double sum2[2];
+  double prod[2];
+  float a_elem[2];
+  float x_elem[2];
+  double y_elem[2];
+
+
+  if (order != blas_colmajor && order != blas_rowmajor)
+    BLAS_error(routine_name, -1, order, NULL);
+  if (trans != blas_no_trans &&
+      trans != blas_trans && trans != blas_conj_trans) {
+    BLAS_error(routine_name, -2, trans, NULL);
+  }
+  if (m < 0)
+    BLAS_error(routine_name, -3, m, NULL);
+  if (n < 0)
+    BLAS_error(routine_name, -4, n, NULL);
+  if (kl < 0 || kl >= m)
+    BLAS_error(routine_name, -5, kl, NULL);
+  if (ku < 0 || ku >= n)
+    BLAS_error(routine_name, -6, ku, NULL);
+  if (lda < kl + ku + 1)
+    BLAS_error(routine_name, -9, lda, NULL);
+  if (incx == 0)
+    BLAS_error(routine_name, -12, incx, NULL);
+  if (incy == 0)
+    BLAS_error(routine_name, -15, incy, NULL);
+
+  if (m == 0 || n == 0)
+    return;
+  if ((alpha_i[0] == 0.0 && alpha_i[1] == 0.0)
+      && ((beta_i[0] == 1.0 && beta_i[1] == 0.0)))
+    return;
+
+  if (trans == blas_no_trans) {
+    lenx = n;
+    leny = m;
+  } else {
+    lenx = m;
+    leny = n;
+  }
+
+  ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
+  iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;
+
+
+
+  /* if alpha = 0, return y = y*beta */
+  if ((order == blas_colmajor) && (trans == blas_no_trans)) {
+    astart = ku;
+    incai1 = 1;
+    incai2 = lda;
+    incaij = lda - 1;
+    lbound = kl;
+    rbound = n - ku - 1;
+    ra = ku;
+  } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
+    astart = ku;
+    incai1 = lda - 1;
+    incai2 = lda;
+    incaij = 1;
+    lbound = ku;
+    rbound = m - kl - 1;
+    ra = kl;
+  } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
+    astart = kl;
+    incai1 = lda - 1;
+    incai2 = lda;
+    incaij = 1;
+    lbound = kl;
+    rbound = n - ku - 1;
+    ra = ku;
+  } else {			/* rowmajor and blas_trans */
+    astart = kl;
+    incai1 = 1;
+    incai2 = lda;
+    incaij = lda - 1;
+    lbound = ku;
+    rbound = m - kl - 1;
+    ra = kl;
+  }
+  incx *= 2;
+  incy *= 2;
+  incaij *= 2;
+  incai1 *= 2;
+  incai2 *= 2;
+  astart *= 2;
+  iy0 *= 2;
+  ix0 *= 2;
+
+  la = 0;
+  ai = astart;
+  iy = iy0;
+  for (i = 0; i < leny; i++) {
+    sum1[0] = sum1[1] = 0.0;
+    sum2[0] = sum2[1] = 0.0;
+    aij = ai;
+    jx = ix0;
+    if (trans != blas_conj_trans) {
+      for (j = ra - la; j >= 0; j--) {
+	x_elem[0] = head_x_i[jx];
+	x_elem[1] = head_x_i[jx + 1];
+	a_elem[0] = a_i[aij];
+	a_elem[1] = a_i[aij + 1];
+	{
+	  prod[0] =
+	    (double) x_elem[0] * a_elem[0] - (double) x_elem[1] * a_elem[1];
+	  prod[1] =
+	    (double) x_elem[0] * a_elem[1] + (double) x_elem[1] * a_elem[0];
+	}
+	sum1[0] = sum1[0] + prod[0];
+	sum1[1] = sum1[1] + prod[1];
+	x_elem[0] = tail_x_i[jx];
+	x_elem[1] = tail_x_i[jx + 1];
+	{
+	  prod[0] =
+	    (double) x_elem[0] * a_elem[0] - (double) x_elem[1] * a_elem[1];
+	  prod[1] =
+	    (double) x_elem[0] * a_elem[1] + (double) x_elem[1] * a_elem[0];
+	}
+	sum2[0] = sum2[0] + prod[0];
+	sum2[1] = sum2[1] + prod[1];
+	aij += incaij;
+	jx += incx;
+      }
+
+    } else {
+      for (j = ra - la; j >= 0; j--) {
+	x_elem[0] = head_x_i[jx];
+	x_elem[1] = head_x_i[jx + 1];
+	a_elem[0] = a_i[aij];
+	a_elem[1] = a_i[aij + 1];
+	a_elem[1] = -a_elem[1];
+	{
+	  prod[0] =
+	    (double) x_elem[0] * a_elem[0] - (double) x_elem[1] * a_elem[1];
+	  prod[1] =
+	    (double) x_elem[0] * a_elem[1] + (double) x_elem[1] * a_elem[0];
+	}
+	sum1[0] = sum1[0] + prod[0];
+	sum1[1] = sum1[1] + prod[1];
+	x_elem[0] = tail_x_i[jx];
+	x_elem[1] = tail_x_i[jx + 1];
+	{
+	  prod[0] =
+	    (double) x_elem[0] * a_elem[0] - (double) x_elem[1] * a_elem[1];
+	  prod[1] =
+	    (double) x_elem[0] * a_elem[1] + (double) x_elem[1] * a_elem[0];
+	}
+	sum2[0] = sum2[0] + prod[0];
+	sum2[1] = sum2[1] + prod[1];
+	aij += incaij;
+	jx += incx;
+      }
+    }
+
+    {
+      tmp1[0] = (double) sum1[0] * alpha_i[0] - (double) sum1[1] * alpha_i[1];
+      tmp1[1] = (double) sum1[0] * alpha_i[1] + (double) sum1[1] * alpha_i[0];
+    }
+    {
+      tmp2[0] = (double) sum2[0] * alpha_i[0] - (double) sum2[1] * alpha_i[1];
+      tmp2[1] = (double) sum2[0] * alpha_i[1] + (double) sum2[1] * alpha_i[0];
+    }
+    tmp3[0] = tmp1[0] + tmp2[0];
+    tmp3[1] = tmp1[1] + tmp2[1];
+    y_elem[0] = y_i[iy];
+    y_elem[1] = y_i[iy + 1];
+    {
+      tmp4[0] =
+	(double) beta_i[0] * y_elem[0] - (double) beta_i[1] * y_elem[1];
+      tmp4[1] =
+	(double) beta_i[0] * y_elem[1] + (double) beta_i[1] * y_elem[0];
+    }
+    result[0] = tmp4[0] + tmp3[0];
+    result[1] = tmp4[1] + tmp3[1];
+    y_i[iy] = result[0];
+    y_i[iy + 1] = result[1];
+
+    iy += incy;
+    if (i >= lbound) {
+      ix0 += incx;
+      ai += incai2;
+      la++;
+    } else {
+      ai += incai1;
+    }
+    if (i < rbound) {
+      ra++;
+    }
+  }
+
+
+
+}				/* end BLAS_zgbmv2_c_c */
diff --git a/XBLAS/src/gbmv2/BLAS_zgbmv2_c_c_x-f2c.c b/XBLAS/src/gbmv2/BLAS_zgbmv2_c_c_x-f2c.c
new file mode 100644
index 00000000..368eec3b
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_zgbmv2_c_c_x-f2c.c
@@ -0,0 +1,19 @@
+
+#include "f2c-bridge.h"
+#include "blas_enum.h"
+void BLAS_zgbmv2_c_c_x(enum blas_order_type order, enum blas_trans_type trans,
+		       int m, int n, int kl, int ku, const void *alpha,
+		       const void *a, int lda, const void *head_x,
+		       const void *tail_x, int incx, const void *beta,
+		       void *y, int incy, enum blas_prec_type prec);
+
+
+extern void FC_FUNC_(blas_zgbmv2_c_c_x, BLAS_ZGBMV2_C_C_X)
+ 
+  (int *trans, int *m, int *n, int *kl, int *ku, const void *alpha,
+   const void *a, int *lda, const void *head_x, const void *tail_x, int *incx,
+   const void *beta, void *y, int *incy, int *prec) {
+  BLAS_zgbmv2_c_c_x(blas_colmajor, (enum blas_trans_type) *trans, *m, *n, *kl,
+		    *ku, alpha, a, *lda, head_x, tail_x, *incx, beta, y,
+		    *incy, (enum blas_prec_type) *prec);
+}
diff --git a/XBLAS/src/gbmv2/BLAS_zgbmv2_c_c_x.c b/XBLAS/src/gbmv2/BLAS_zgbmv2_c_c_x.c
new file mode 100644
index 00000000..108a3062
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_zgbmv2_c_c_x.c
@@ -0,0 +1,1560 @@
+#include "blas_extended.h"
+#include "blas_extended_private.h"
+void BLAS_zgbmv2_c_c_x(enum blas_order_type order, enum blas_trans_type trans,
+		       int m, int n, int kl, int ku, const void *alpha,
+		       const void *a, int lda, const void *head_x,
+		       const void *tail_x, int incx, const void *beta,
+		       void *y, int incy, enum blas_prec_type prec)
+
+/*           
+ * Purpose
+ * =======
+ *
+ * This routines computes the matrix product:
+ *
+ *     y  <-  alpha * op(A) * (x_head + x_tail) + beta * y
+ * 
+ * where 
+ *
+ *  A is a m x n banded matrix
+ *  x is a n x 1 vector
+ *  y is a m x 1 vector
+ *  alpha and beta are scalars 
+ *   
+ * Arguments
+ * =========
+ *
+ * order        (input) blas_order_type
+ *              Order of AB; row or column major
+ *
+ * trans        (input) blas_trans_type
+ *              Transpose of AB; no trans, 
+ *              trans, or conjugate trans
+ *
+ * m            (input) int
+ *              Dimension of AB
+ *
+ * n            (input) int
+ *              Dimension of AB and the length of vector x and z
+ *
+ * kl           (input) int 
+ *              Number of lower diagnols of AB
+ *
+ * ku           (input) int
+ *              Number of upper diagnols of AB
+ *
+ * alpha        (input) const void*
+ *              
+ * AB           (input) void*
+ *
+ * lda          (input) int 
+ *              Leading dimension of AB
+ *              lda >= ku + kl + 1
+ *
+ * head_x
+ * tail_x       (input) void*
+ * 
+ * incx         (input) int
+ *              The stride for vector x.
+ *
+ * beta         (input) const void*
+ *
+ * y            (input) const void*
+ *
+ * incy         (input) int
+ *              The stride for vector y.
+ * 
+ * prec   (input) enum blas_prec_type
+ *        Specifies the internal precision to be used.
+ *        = blas_prec_single: single precision.
+ *        = blas_prec_double: double precision.
+ *        = blas_prec_extra : anything at least 1.5 times as accurate
+ *                            than double, and wider than 80-bits.
+ *                            We use double-double in our implementation.
+ *
+ *
+ * LOCAL VARIABLES 
+ * ===============
+ * 
+ *  As an example, these variables are described on the mxn, column 
+ *  major, banded matrix described in section 2.2.3 of the specification  
+ *
+ *  astart      indexes first element in A where computation begins
+ *
+ *  incai1      indexes first element in row where row is less than lbound
+ * 
+ *  incai2      indexes first element in row where row exceeds lbound
+ *   
+ *  lbound      denotes the number of rows before  first element shifts 
+ *
+ *  rbound      denotes the columns where there is blank space
+ *   
+ *  ra          index of the rightmost element for a given row
+ *  
+ *  la          index of leftmost  elements for a given row
+ *
+ *  ra - la     width of a row
+ *
+ *                        rbound 
+ *            la   ra    ____|_____ 
+ *             |    |   |          |
+ *         |  a00  a01   *    *   *
+ * lbound -|  a10  a11  a12   *   *
+ *         |  a20  a21  a22  a23  *
+ *             *   a31  a32  a33 a34
+ *             *    *   a42  a43 a44
+ *
+ *  Varations on order and transpose have been implemented by modifying these
+ *  local variables. 
+ *
+ */
+{
+  static const char routine_name[] = "BLAS_zgbmv2_c_c_x";
+
+  switch (prec) {
+  case blas_prec_single:
+  case blas_prec_double:
+  case blas_prec_indigenous:
+    {
+      int iy0, iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny;
+      int incaij, aij, incai1, incai2, astart, ai;
+      double *y_i = (double *) y;
+      const float *a_i = (float *) a;
+      const float *head_x_i = (float *) head_x;
+      const float *tail_x_i = (float *) tail_x;
+      double *alpha_i = (double *) alpha;
+      double *beta_i = (double *) beta;
+      double tmp1[2];
+      double tmp2[2];
+      double tmp3[2];
+      double tmp4[2];
+      double result[2];
+      double sum1[2];
+      double sum2[2];
+      double prod[2];
+      float a_elem[2];
+      float x_elem[2];
+      double y_elem[2];
+
+
+      if (order != blas_colmajor && order != blas_rowmajor)
+	BLAS_error(routine_name, -1, order, NULL);
+      if (trans != blas_no_trans &&
+	  trans != blas_trans && trans != blas_conj_trans) {
+	BLAS_error(routine_name, -2, trans, NULL);
+      }
+      if (m < 0)
+	BLAS_error(routine_name, -3, m, NULL);
+      if (n < 0)
+	BLAS_error(routine_name, -4, n, NULL);
+      if (kl < 0 || kl >= m)
+	BLAS_error(routine_name, -5, kl, NULL);
+      if (ku < 0 || ku >= n)
+	BLAS_error(routine_name, -6, ku, NULL);
+      if (lda < kl + ku + 1)
+	BLAS_error(routine_name, -9, lda, NULL);
+      if (incx == 0)
+	BLAS_error(routine_name, -12, incx, NULL);
+      if (incy == 0)
+	BLAS_error(routine_name, -15, incy, NULL);
+
+      if (m == 0 || n == 0)
+	return;
+      if ((alpha_i[0] == 0.0 && alpha_i[1] == 0.0)
+	  && ((beta_i[0] == 1.0 && beta_i[1] == 0.0)))
+	return;
+
+      if (trans == blas_no_trans) {
+	lenx = n;
+	leny = m;
+      } else {
+	lenx = m;
+	leny = n;
+      }
+
+      ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
+      iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;
+
+
+
+      /* if alpha = 0, return y = y*beta */
+      if ((order == blas_colmajor) && (trans == blas_no_trans)) {
+	astart = ku;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
+	astart = ku;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
+	astart = kl;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else {			/* rowmajor and blas_trans */
+	astart = kl;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      }
+      incx *= 2;
+      incy *= 2;
+      incaij *= 2;
+      incai1 *= 2;
+      incai2 *= 2;
+      astart *= 2;
+      iy0 *= 2;
+      ix0 *= 2;
+
+      la = 0;
+      ai = astart;
+      iy = iy0;
+      for (i = 0; i < leny; i++) {
+	sum1[0] = sum1[1] = 0.0;
+	sum2[0] = sum2[1] = 0.0;
+	aij = ai;
+	jx = ix0;
+	if (trans != blas_conj_trans) {
+	  for (j = ra - la; j >= 0; j--) {
+	    x_elem[0] = head_x_i[jx];
+	    x_elem[1] = head_x_i[jx + 1];
+	    a_elem[0] = a_i[aij];
+	    a_elem[1] = a_i[aij + 1];
+	    {
+	      prod[0] =
+		(double) x_elem[0] * a_elem[0] -
+		(double) x_elem[1] * a_elem[1];
+	      prod[1] =
+		(double) x_elem[0] * a_elem[1] +
+		(double) x_elem[1] * a_elem[0];
+	    }
+	    sum1[0] = sum1[0] + prod[0];
+	    sum1[1] = sum1[1] + prod[1];
+	    x_elem[0] = tail_x_i[jx];
+	    x_elem[1] = tail_x_i[jx + 1];
+	    {
+	      prod[0] =
+		(double) x_elem[0] * a_elem[0] -
+		(double) x_elem[1] * a_elem[1];
+	      prod[1] =
+		(double) x_elem[0] * a_elem[1] +
+		(double) x_elem[1] * a_elem[0];
+	    }
+	    sum2[0] = sum2[0] + prod[0];
+	    sum2[1] = sum2[1] + prod[1];
+	    aij += incaij;
+	    jx += incx;
+	  }
+
+	} else {
+	  for (j = ra - la; j >= 0; j--) {
+	    x_elem[0] = head_x_i[jx];
+	    x_elem[1] = head_x_i[jx + 1];
+	    a_elem[0] = a_i[aij];
+	    a_elem[1] = a_i[aij + 1];
+	    a_elem[1] = -a_elem[1];
+	    {
+	      prod[0] =
+		(double) x_elem[0] * a_elem[0] -
+		(double) x_elem[1] * a_elem[1];
+	      prod[1] =
+		(double) x_elem[0] * a_elem[1] +
+		(double) x_elem[1] * a_elem[0];
+	    }
+	    sum1[0] = sum1[0] + prod[0];
+	    sum1[1] = sum1[1] + prod[1];
+	    x_elem[0] = tail_x_i[jx];
+	    x_elem[1] = tail_x_i[jx + 1];
+	    {
+	      prod[0] =
+		(double) x_elem[0] * a_elem[0] -
+		(double) x_elem[1] * a_elem[1];
+	      prod[1] =
+		(double) x_elem[0] * a_elem[1] +
+		(double) x_elem[1] * a_elem[0];
+	    }
+	    sum2[0] = sum2[0] + prod[0];
+	    sum2[1] = sum2[1] + prod[1];
+	    aij += incaij;
+	    jx += incx;
+	  }
+	}
+
+	{
+	  tmp1[0] =
+	    (double) sum1[0] * alpha_i[0] - (double) sum1[1] * alpha_i[1];
+	  tmp1[1] =
+	    (double) sum1[0] * alpha_i[1] + (double) sum1[1] * alpha_i[0];
+	}
+	{
+	  tmp2[0] =
+	    (double) sum2[0] * alpha_i[0] - (double) sum2[1] * alpha_i[1];
+	  tmp2[1] =
+	    (double) sum2[0] * alpha_i[1] + (double) sum2[1] * alpha_i[0];
+	}
+	tmp3[0] = tmp1[0] + tmp2[0];
+	tmp3[1] = tmp1[1] + tmp2[1];
+	y_elem[0] = y_i[iy];
+	y_elem[1] = y_i[iy + 1];
+	{
+	  tmp4[0] =
+	    (double) beta_i[0] * y_elem[0] - (double) beta_i[1] * y_elem[1];
+	  tmp4[1] =
+	    (double) beta_i[0] * y_elem[1] + (double) beta_i[1] * y_elem[0];
+	}
+	result[0] = tmp4[0] + tmp3[0];
+	result[1] = tmp4[1] + tmp3[1];
+	y_i[iy] = result[0];
+	y_i[iy + 1] = result[1];
+
+	iy += incy;
+	if (i >= lbound) {
+	  ix0 += incx;
+	  ai += incai2;
+	  la++;
+	} else {
+	  ai += incai1;
+	}
+	if (i < rbound) {
+	  ra++;
+	}
+      }
+
+
+    }
+    break;
+  case blas_prec_extra:
+    {
+      int iy0, iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny;
+      int incaij, aij, incai1, incai2, astart, ai;
+      double *y_i = (double *) y;
+      const float *a_i = (float *) a;
+      const float *head_x_i = (float *) head_x;
+      const float *tail_x_i = (float *) tail_x;
+      double *alpha_i = (double *) alpha;
+      double *beta_i = (double *) beta;
+      double head_tmp1[2], tail_tmp1[2];
+      double head_tmp2[2], tail_tmp2[2];
+      double head_tmp3[2], tail_tmp3[2];
+      double head_tmp4[2], tail_tmp4[2];
+      double result[2];
+      double head_sum1[2], tail_sum1[2];
+      double head_sum2[2], tail_sum2[2];
+      double head_prod[2], tail_prod[2];
+      float a_elem[2];
+      float x_elem[2];
+      double y_elem[2];
+      FPU_FIX_DECL;
+
+      if (order != blas_colmajor && order != blas_rowmajor)
+	BLAS_error(routine_name, -1, order, NULL);
+      if (trans != blas_no_trans &&
+	  trans != blas_trans && trans != blas_conj_trans) {
+	BLAS_error(routine_name, -2, trans, NULL);
+      }
+      if (m < 0)
+	BLAS_error(routine_name, -3, m, NULL);
+      if (n < 0)
+	BLAS_error(routine_name, -4, n, NULL);
+      if (kl < 0 || kl >= m)
+	BLAS_error(routine_name, -5, kl, NULL);
+      if (ku < 0 || ku >= n)
+	BLAS_error(routine_name, -6, ku, NULL);
+      if (lda < kl + ku + 1)
+	BLAS_error(routine_name, -9, lda, NULL);
+      if (incx == 0)
+	BLAS_error(routine_name, -12, incx, NULL);
+      if (incy == 0)
+	BLAS_error(routine_name, -15, incy, NULL);
+
+      if (m == 0 || n == 0)
+	return;
+      if ((alpha_i[0] == 0.0 && alpha_i[1] == 0.0)
+	  && ((beta_i[0] == 1.0 && beta_i[1] == 0.0)))
+	return;
+
+      if (trans == blas_no_trans) {
+	lenx = n;
+	leny = m;
+      } else {
+	lenx = m;
+	leny = n;
+      }
+
+      ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
+      iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;
+
+      FPU_FIX_START;
+
+      /* if alpha = 0, return y = y*beta */
+      if ((order == blas_colmajor) && (trans == blas_no_trans)) {
+	astart = ku;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
+	astart = ku;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
+	astart = kl;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else {			/* rowmajor and blas_trans */
+	astart = kl;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      }
+      incx *= 2;
+      incy *= 2;
+      incaij *= 2;
+      incai1 *= 2;
+      incai2 *= 2;
+      astart *= 2;
+      iy0 *= 2;
+      ix0 *= 2;
+
+      la = 0;
+      ai = astart;
+      iy = iy0;
+      for (i = 0; i < leny; i++) {
+	head_sum1[0] = head_sum1[1] = tail_sum1[0] = tail_sum1[1] = 0.0;
+	head_sum2[0] = head_sum2[1] = tail_sum2[0] = tail_sum2[1] = 0.0;
+	aij = ai;
+	jx = ix0;
+	if (trans != blas_conj_trans) {
+	  for (j = ra - la; j >= 0; j--) {
+	    x_elem[0] = head_x_i[jx];
+	    x_elem[1] = head_x_i[jx + 1];
+	    a_elem[0] = a_i[aij];
+	    a_elem[1] = a_i[aij + 1];
+	    {
+	      double head_e1, tail_e1;
+	      double d1;
+	      double d2;
+	      /* Real part */
+	      d1 = (double) x_elem[0] * a_elem[0];
+	      d2 = (double) -x_elem[1] * a_elem[1];
+	      {
+		/* Compute double-double = double + double. */
+		double e, t1, t2;
+
+		/* Knuth trick. */
+		t1 = d1 + d2;
+		e = t1 - d1;
+		t2 = ((d2 - e) + (d1 - (t1 - e)));
+
+		/* The result is t1 + t2, after normalization. */
+		head_e1 = t1 + t2;
+		tail_e1 = t2 - (head_e1 - t1);
+	      }
+	      head_prod[0] = head_e1;
+	      tail_prod[0] = tail_e1;
+	      /* imaginary part */
+	      d1 = (double) x_elem[0] * a_elem[1];
+	      d2 = (double) x_elem[1] * a_elem[0];
+	      {
+		/* Compute double-double = double + double. */
+		double e, t1, t2;
+
+		/* Knuth trick. */
+		t1 = d1 + d2;
+		e = t1 - d1;
+		t2 = ((d2 - e) + (d1 - (t1 - e)));
+
+		/* The result is t1 + t2, after normalization. */
+		head_e1 = t1 + t2;
+		tail_e1 = t2 - (head_e1 - t1);
+	      }
+	      head_prod[1] = head_e1;
+	      tail_prod[1] = tail_e1;
+	    }
+	    {
+	      double head_t, tail_t;
+	      double head_a, tail_a;
+	      double head_b, tail_b;
+	      /* Real part */
+	      head_a = head_sum1[0];
+	      tail_a = tail_sum1[0];
+	      head_b = head_prod[0];
+	      tail_b = tail_prod[0];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum1[0] = head_t;
+	      tail_sum1[0] = tail_t;
+	      /* Imaginary part */
+	      head_a = head_sum1[1];
+	      tail_a = tail_sum1[1];
+	      head_b = head_prod[1];
+	      tail_b = tail_prod[1];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum1[1] = head_t;
+	      tail_sum1[1] = tail_t;
+	    }
+	    x_elem[0] = tail_x_i[jx];
+	    x_elem[1] = tail_x_i[jx + 1];
+	    {
+	      double head_e1, tail_e1;
+	      double d1;
+	      double d2;
+	      /* Real part */
+	      d1 = (double) x_elem[0] * a_elem[0];
+	      d2 = (double) -x_elem[1] * a_elem[1];
+	      {
+		/* Compute double-double = double + double. */
+		double e, t1, t2;
+
+		/* Knuth trick. */
+		t1 = d1 + d2;
+		e = t1 - d1;
+		t2 = ((d2 - e) + (d1 - (t1 - e)));
+
+		/* The result is t1 + t2, after normalization. */
+		head_e1 = t1 + t2;
+		tail_e1 = t2 - (head_e1 - t1);
+	      }
+	      head_prod[0] = head_e1;
+	      tail_prod[0] = tail_e1;
+	      /* imaginary part */
+	      d1 = (double) x_elem[0] * a_elem[1];
+	      d2 = (double) x_elem[1] * a_elem[0];
+	      {
+		/* Compute double-double = double + double. */
+		double e, t1, t2;
+
+		/* Knuth trick. */
+		t1 = d1 + d2;
+		e = t1 - d1;
+		t2 = ((d2 - e) + (d1 - (t1 - e)));
+
+		/* The result is t1 + t2, after normalization. */
+		head_e1 = t1 + t2;
+		tail_e1 = t2 - (head_e1 - t1);
+	      }
+	      head_prod[1] = head_e1;
+	      tail_prod[1] = tail_e1;
+	    }
+	    {
+	      double head_t, tail_t;
+	      double head_a, tail_a;
+	      double head_b, tail_b;
+	      /* Real part */
+	      head_a = head_sum2[0];
+	      tail_a = tail_sum2[0];
+	      head_b = head_prod[0];
+	      tail_b = tail_prod[0];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum2[0] = head_t;
+	      tail_sum2[0] = tail_t;
+	      /* Imaginary part */
+	      head_a = head_sum2[1];
+	      tail_a = tail_sum2[1];
+	      head_b = head_prod[1];
+	      tail_b = tail_prod[1];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum2[1] = head_t;
+	      tail_sum2[1] = tail_t;
+	    }
+	    aij += incaij;
+	    jx += incx;
+	  }
+
+	} else {
+	  for (j = ra - la; j >= 0; j--) {
+	    x_elem[0] = head_x_i[jx];
+	    x_elem[1] = head_x_i[jx + 1];
+	    a_elem[0] = a_i[aij];
+	    a_elem[1] = a_i[aij + 1];
+	    a_elem[1] = -a_elem[1];
+	    {
+	      double head_e1, tail_e1;
+	      double d1;
+	      double d2;
+	      /* Real part */
+	      d1 = (double) x_elem[0] * a_elem[0];
+	      d2 = (double) -x_elem[1] * a_elem[1];
+	      {
+		/* Compute double-double = double + double. */
+		double e, t1, t2;
+
+		/* Knuth trick. */
+		t1 = d1 + d2;
+		e = t1 - d1;
+		t2 = ((d2 - e) + (d1 - (t1 - e)));
+
+		/* The result is t1 + t2, after normalization. */
+		head_e1 = t1 + t2;
+		tail_e1 = t2 - (head_e1 - t1);
+	      }
+	      head_prod[0] = head_e1;
+	      tail_prod[0] = tail_e1;
+	      /* imaginary part */
+	      d1 = (double) x_elem[0] * a_elem[1];
+	      d2 = (double) x_elem[1] * a_elem[0];
+	      {
+		/* Compute double-double = double + double. */
+		double e, t1, t2;
+
+		/* Knuth trick. */
+		t1 = d1 + d2;
+		e = t1 - d1;
+		t2 = ((d2 - e) + (d1 - (t1 - e)));
+
+		/* The result is t1 + t2, after normalization. */
+		head_e1 = t1 + t2;
+		tail_e1 = t2 - (head_e1 - t1);
+	      }
+	      head_prod[1] = head_e1;
+	      tail_prod[1] = tail_e1;
+	    }
+	    {
+	      double head_t, tail_t;
+	      double head_a, tail_a;
+	      double head_b, tail_b;
+	      /* Real part */
+	      head_a = head_sum1[0];
+	      tail_a = tail_sum1[0];
+	      head_b = head_prod[0];
+	      tail_b = tail_prod[0];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum1[0] = head_t;
+	      tail_sum1[0] = tail_t;
+	      /* Imaginary part */
+	      head_a = head_sum1[1];
+	      tail_a = tail_sum1[1];
+	      head_b = head_prod[1];
+	      tail_b = tail_prod[1];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum1[1] = head_t;
+	      tail_sum1[1] = tail_t;
+	    }
+	    x_elem[0] = tail_x_i[jx];
+	    x_elem[1] = tail_x_i[jx + 1];
+	    {
+	      double head_e1, tail_e1;
+	      double d1;
+	      double d2;
+	      /* Real part */
+	      d1 = (double) x_elem[0] * a_elem[0];
+	      d2 = (double) -x_elem[1] * a_elem[1];
+	      {
+		/* Compute double-double = double + double. */
+		double e, t1, t2;
+
+		/* Knuth trick. */
+		t1 = d1 + d2;
+		e = t1 - d1;
+		t2 = ((d2 - e) + (d1 - (t1 - e)));
+
+		/* The result is t1 + t2, after normalization. */
+		head_e1 = t1 + t2;
+		tail_e1 = t2 - (head_e1 - t1);
+	      }
+	      head_prod[0] = head_e1;
+	      tail_prod[0] = tail_e1;
+	      /* imaginary part */
+	      d1 = (double) x_elem[0] * a_elem[1];
+	      d2 = (double) x_elem[1] * a_elem[0];
+	      {
+		/* Compute double-double = double + double. */
+		double e, t1, t2;
+
+		/* Knuth trick. */
+		t1 = d1 + d2;
+		e = t1 - d1;
+		t2 = ((d2 - e) + (d1 - (t1 - e)));
+
+		/* The result is t1 + t2, after normalization. */
+		head_e1 = t1 + t2;
+		tail_e1 = t2 - (head_e1 - t1);
+	      }
+	      head_prod[1] = head_e1;
+	      tail_prod[1] = tail_e1;
+	    }
+	    {
+	      double head_t, tail_t;
+	      double head_a, tail_a;
+	      double head_b, tail_b;
+	      /* Real part */
+	      head_a = head_sum2[0];
+	      tail_a = tail_sum2[0];
+	      head_b = head_prod[0];
+	      tail_b = tail_prod[0];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum2[0] = head_t;
+	      tail_sum2[0] = tail_t;
+	      /* Imaginary part */
+	      head_a = head_sum2[1];
+	      tail_a = tail_sum2[1];
+	      head_b = head_prod[1];
+	      tail_b = tail_prod[1];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum2[1] = head_t;
+	      tail_sum2[1] = tail_t;
+	    }
+	    aij += incaij;
+	    jx += incx;
+	  }
+	}
+
+	{
+	  /* Compute complex-extra = complex-extra * complex-double. */
+	  double head_a0, tail_a0;
+	  double head_a1, tail_a1;
+	  double head_t1, tail_t1;
+	  double head_t2, tail_t2;
+	  head_a0 = head_sum1[0];
+	  tail_a0 = tail_sum1[0];
+	  head_a1 = head_sum1[1];
+	  tail_a1 = tail_sum1[1];
+	  /* real part */
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_a0 * split;
+	    a11 = con - head_a0;
+	    a11 = con - a11;
+	    a21 = head_a0 - a11;
+	    con = alpha_i[0] * split;
+	    b1 = con - alpha_i[0];
+	    b1 = con - b1;
+	    b2 = alpha_i[0] - b1;
+
+	    c11 = head_a0 * alpha_i[0];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_a0 * alpha_i[0];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_a1 * split;
+	    a11 = con - head_a1;
+	    a11 = con - a11;
+	    a21 = head_a1 - a11;
+	    con = alpha_i[1] * split;
+	    b1 = con - alpha_i[1];
+	    b1 = con - b1;
+	    b2 = alpha_i[1] - b1;
+
+	    c11 = head_a1 * alpha_i[1];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_a1 * alpha_i[1];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t2 = t1 + t2;
+	    tail_t2 = t2 - (head_t2 - t1);
+	  }
+	  head_t2 = -head_t2;
+	  tail_t2 = -tail_t2;
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_t1 + head_t2;
+	    bv = s1 - head_t1;
+	    s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_t1 + tail_t2;
+	    bv = t1 - tail_t1;
+	    t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  head_tmp1[0] = head_t1;
+	  tail_tmp1[0] = tail_t1;
+	  /* imaginary part */
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_a1 * split;
+	    a11 = con - head_a1;
+	    a11 = con - a11;
+	    a21 = head_a1 - a11;
+	    con = alpha_i[0] * split;
+	    b1 = con - alpha_i[0];
+	    b1 = con - b1;
+	    b2 = alpha_i[0] - b1;
+
+	    c11 = head_a1 * alpha_i[0];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_a1 * alpha_i[0];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_a0 * split;
+	    a11 = con - head_a0;
+	    a11 = con - a11;
+	    a21 = head_a0 - a11;
+	    con = alpha_i[1] * split;
+	    b1 = con - alpha_i[1];
+	    b1 = con - b1;
+	    b2 = alpha_i[1] - b1;
+
+	    c11 = head_a0 * alpha_i[1];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_a0 * alpha_i[1];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t2 = t1 + t2;
+	    tail_t2 = t2 - (head_t2 - t1);
+	  }
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_t1 + head_t2;
+	    bv = s1 - head_t1;
+	    s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_t1 + tail_t2;
+	    bv = t1 - tail_t1;
+	    t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  head_tmp1[1] = head_t1;
+	  tail_tmp1[1] = tail_t1;
+	}
+
+	{
+	  /* Compute complex-extra = complex-extra * complex-double. */
+	  double head_a0, tail_a0;
+	  double head_a1, tail_a1;
+	  double head_t1, tail_t1;
+	  double head_t2, tail_t2;
+	  head_a0 = head_sum2[0];
+	  tail_a0 = tail_sum2[0];
+	  head_a1 = head_sum2[1];
+	  tail_a1 = tail_sum2[1];
+	  /* real part */
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_a0 * split;
+	    a11 = con - head_a0;
+	    a11 = con - a11;
+	    a21 = head_a0 - a11;
+	    con = alpha_i[0] * split;
+	    b1 = con - alpha_i[0];
+	    b1 = con - b1;
+	    b2 = alpha_i[0] - b1;
+
+	    c11 = head_a0 * alpha_i[0];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_a0 * alpha_i[0];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_a1 * split;
+	    a11 = con - head_a1;
+	    a11 = con - a11;
+	    a21 = head_a1 - a11;
+	    con = alpha_i[1] * split;
+	    b1 = con - alpha_i[1];
+	    b1 = con - b1;
+	    b2 = alpha_i[1] - b1;
+
+	    c11 = head_a1 * alpha_i[1];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_a1 * alpha_i[1];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t2 = t1 + t2;
+	    tail_t2 = t2 - (head_t2 - t1);
+	  }
+	  head_t2 = -head_t2;
+	  tail_t2 = -tail_t2;
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_t1 + head_t2;
+	    bv = s1 - head_t1;
+	    s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_t1 + tail_t2;
+	    bv = t1 - tail_t1;
+	    t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  head_tmp2[0] = head_t1;
+	  tail_tmp2[0] = tail_t1;
+	  /* imaginary part */
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_a1 * split;
+	    a11 = con - head_a1;
+	    a11 = con - a11;
+	    a21 = head_a1 - a11;
+	    con = alpha_i[0] * split;
+	    b1 = con - alpha_i[0];
+	    b1 = con - b1;
+	    b2 = alpha_i[0] - b1;
+
+	    c11 = head_a1 * alpha_i[0];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_a1 * alpha_i[0];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_a0 * split;
+	    a11 = con - head_a0;
+	    a11 = con - a11;
+	    a21 = head_a0 - a11;
+	    con = alpha_i[1] * split;
+	    b1 = con - alpha_i[1];
+	    b1 = con - b1;
+	    b2 = alpha_i[1] - b1;
+
+	    c11 = head_a0 * alpha_i[1];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_a0 * alpha_i[1];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t2 = t1 + t2;
+	    tail_t2 = t2 - (head_t2 - t1);
+	  }
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_t1 + head_t2;
+	    bv = s1 - head_t1;
+	    s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_t1 + tail_t2;
+	    bv = t1 - tail_t1;
+	    t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  head_tmp2[1] = head_t1;
+	  tail_tmp2[1] = tail_t1;
+	}
+
+	{
+	  double head_t, tail_t;
+	  double head_a, tail_a;
+	  double head_b, tail_b;
+	  /* Real part */
+	  head_a = head_tmp1[0];
+	  tail_a = tail_tmp1[0];
+	  head_b = head_tmp2[0];
+	  tail_b = tail_tmp2[0];
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_a + head_b;
+	    bv = s1 - head_a;
+	    s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_a + tail_b;
+	    bv = t1 - tail_a;
+	    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t = t1 + t2;
+	    tail_t = t2 - (head_t - t1);
+	  }
+	  head_tmp3[0] = head_t;
+	  tail_tmp3[0] = tail_t;
+	  /* Imaginary part */
+	  head_a = head_tmp1[1];
+	  tail_a = tail_tmp1[1];
+	  head_b = head_tmp2[1];
+	  tail_b = tail_tmp2[1];
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_a + head_b;
+	    bv = s1 - head_a;
+	    s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_a + tail_b;
+	    bv = t1 - tail_a;
+	    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t = t1 + t2;
+	    tail_t = t2 - (head_t - t1);
+	  }
+	  head_tmp3[1] = head_t;
+	  tail_tmp3[1] = tail_t;
+	}
+	y_elem[0] = y_i[iy];
+	y_elem[1] = y_i[iy + 1];
+	{
+	  /* Compute complex-extra = complex-double * complex-double. */
+	  double head_t1, tail_t1;
+	  double head_t2, tail_t2;
+	  /* Real part */
+	  {
+	    /* Compute double_double = double * double. */
+	    double a1, a2, b1, b2, con;
+
+	    con = beta_i[0] * split;
+	    a1 = con - beta_i[0];
+	    a1 = con - a1;
+	    a2 = beta_i[0] - a1;
+	    con = y_elem[0] * split;
+	    b1 = con - y_elem[0];
+	    b1 = con - b1;
+	    b2 = y_elem[0] - b1;
+
+	    head_t1 = beta_i[0] * y_elem[0];
+	    tail_t1 = (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
+	  }
+	  {
+	    /* Compute double_double = double * double. */
+	    double a1, a2, b1, b2, con;
+
+	    con = beta_i[1] * split;
+	    a1 = con - beta_i[1];
+	    a1 = con - a1;
+	    a2 = beta_i[1] - a1;
+	    con = y_elem[1] * split;
+	    b1 = con - y_elem[1];
+	    b1 = con - b1;
+	    b2 = y_elem[1] - b1;
+
+	    head_t2 = beta_i[1] * y_elem[1];
+	    tail_t2 = (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
+	  }
+	  head_t2 = -head_t2;
+	  tail_t2 = -tail_t2;
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_t1 + head_t2;
+	    bv = s1 - head_t1;
+	    s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_t1 + tail_t2;
+	    bv = t1 - tail_t1;
+	    t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  head_tmp4[0] = head_t1;
+	  tail_tmp4[0] = tail_t1;
+	  /* Imaginary part */
+	  {
+	    /* Compute double_double = double * double. */
+	    double a1, a2, b1, b2, con;
+
+	    con = beta_i[1] * split;
+	    a1 = con - beta_i[1];
+	    a1 = con - a1;
+	    a2 = beta_i[1] - a1;
+	    con = y_elem[0] * split;
+	    b1 = con - y_elem[0];
+	    b1 = con - b1;
+	    b2 = y_elem[0] - b1;
+
+	    head_t1 = beta_i[1] * y_elem[0];
+	    tail_t1 = (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
+	  }
+	  {
+	    /* Compute double_double = double * double. */
+	    double a1, a2, b1, b2, con;
+
+	    con = beta_i[0] * split;
+	    a1 = con - beta_i[0];
+	    a1 = con - a1;
+	    a2 = beta_i[0] - a1;
+	    con = y_elem[1] * split;
+	    b1 = con - y_elem[1];
+	    b1 = con - b1;
+	    b2 = y_elem[1] - b1;
+
+	    head_t2 = beta_i[0] * y_elem[1];
+	    tail_t2 = (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
+	  }
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_t1 + head_t2;
+	    bv = s1 - head_t1;
+	    s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_t1 + tail_t2;
+	    bv = t1 - tail_t1;
+	    t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  head_tmp4[1] = head_t1;
+	  tail_tmp4[1] = tail_t1;
+	}
+	{
+	  double head_a, tail_a;
+	  double head_b, tail_b;
+	  /* Real part */
+	  head_a = head_tmp4[0];
+	  tail_a = tail_tmp4[0];
+	  head_b = head_tmp3[0];
+	  tail_b = tail_tmp3[0];
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_a + head_b;
+	    bv = s1 - head_a;
+	    s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_a + tail_b;
+	    bv = t1 - tail_a;
+	    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    result[0] = t1 + t2;
+	  }
+	  /* Imaginary part */
+	  head_a = head_tmp4[1];
+	  tail_a = tail_tmp4[1];
+	  head_b = head_tmp3[1];
+	  tail_b = tail_tmp3[1];
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_a + head_b;
+	    bv = s1 - head_a;
+	    s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_a + tail_b;
+	    bv = t1 - tail_a;
+	    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    result[1] = t1 + t2;
+	  }
+	}
+	y_i[iy] = result[0];
+	y_i[iy + 1] = result[1];
+
+	iy += incy;
+	if (i >= lbound) {
+	  ix0 += incx;
+	  ai += incai2;
+	  la++;
+	} else {
+	  ai += incai1;
+	}
+	if (i < rbound) {
+	  ra++;
+	}
+      }
+
+      FPU_FIX_STOP;
+    }
+    break;
+  }
+}				/* end BLAS_zgbmv2_c_c_x */
diff --git a/XBLAS/src/gbmv2/BLAS_zgbmv2_c_z-f2c.c b/XBLAS/src/gbmv2/BLAS_zgbmv2_c_z-f2c.c
new file mode 100644
index 00000000..df7b7cc8
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_zgbmv2_c_z-f2c.c
@@ -0,0 +1,18 @@
+
+#include "f2c-bridge.h"
+#include "blas_enum.h"
+void BLAS_zgbmv2_c_z(enum blas_order_type order, enum blas_trans_type trans,
+		     int m, int n, int kl, int ku, const void *alpha,
+		     const void *a, int lda, const void *head_x,
+		     const void *tail_x, int incx, const void *beta,
+		     void *y, int incy);
+
+
+extern void FC_FUNC_(blas_zgbmv2_c_z, BLAS_ZGBMV2_C_Z)
+ 
+  (int *trans, int *m, int *n, int *kl, int *ku, const void *alpha,
+   const void *a, int *lda, const void *head_x, const void *tail_x, int *incx,
+   const void *beta, void *y, int *incy) {
+  BLAS_zgbmv2_c_z(blas_colmajor, (enum blas_trans_type) *trans, *m, *n, *kl,
+		  *ku, alpha, a, *lda, head_x, tail_x, *incx, beta, y, *incy);
+}
diff --git a/XBLAS/src/gbmv2/BLAS_zgbmv2_c_z.c b/XBLAS/src/gbmv2/BLAS_zgbmv2_c_z.c
new file mode 100644
index 00000000..bb4ad742
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_zgbmv2_c_z.c
@@ -0,0 +1,314 @@
+#include "blas_extended.h"
+#include "blas_extended_private.h"
+void BLAS_zgbmv2_c_z(enum blas_order_type order, enum blas_trans_type trans,
+		     int m, int n, int kl, int ku, const void *alpha,
+		     const void *a, int lda, const void *head_x,
+		     const void *tail_x, int incx, const void *beta,
+		     void *y, int incy)
+
+/*           
+ * Purpose
+ * =======
+ *
+ * This routines computes the matrix product:
+ *
+ *     y  <-  alpha * op(A) * (x_head + x_tail) + beta * y
+ * 
+ * where 
+ *
+ *  A is a m x n banded matrix
+ *  x is a n x 1 vector
+ *  y is a m x 1 vector
+ *  alpha and beta are scalars 
+ *   
+ * Arguments
+ * =========
+ *
+ * order        (input) blas_order_type
+ *              Order of AB; row or column major
+ *
+ * trans        (input) blas_trans_type
+ *              Transpose of AB; no trans, 
+ *              trans, or conjugate trans
+ *
+ * m            (input) int
+ *              Dimension of AB
+ *
+ * n            (input) int
+ *              Dimension of AB and the length of vector x and z
+ *
+ * kl           (input) int 
+ *              Number of lower diagnols of AB
+ *
+ * ku           (input) int
+ *              Number of upper diagnols of AB
+ *
+ * alpha        (input) const void*
+ *              
+ * AB           (input) void*
+ *
+ * lda          (input) int 
+ *              Leading dimension of AB
+ *              lda >= ku + kl + 1
+ *
+ * head_x
+ * tail_x       (input) void*
+ * 
+ * incx         (input) int
+ *              The stride for vector x.
+ *
+ * beta         (input) const void*
+ *
+ * y            (input) const void*
+ *
+ * incy         (input) int
+ *              The stride for vector y.
+ * 
+ *
+ * LOCAL VARIABLES 
+ * ===============
+ * 
+ *  As an example, these variables are described on the mxn, column 
+ *  major, banded matrix described in section 2.2.3 of the specification  
+ *
+ *  astart      indexes first element in A where computation begins
+ *
+ *  incai1      indexes first element in row where row is less than lbound
+ * 
+ *  incai2      indexes first element in row where row exceeds lbound
+ *   
+ *  lbound      denotes the number of rows before  first element shifts 
+ *
+ *  rbound      denotes the columns where there is blank space
+ *   
+ *  ra          index of the rightmost element for a given row
+ *  
+ *  la          index of leftmost  elements for a given row
+ *
+ *  ra - la     width of a row
+ *
+ *                        rbound 
+ *            la   ra    ____|_____ 
+ *             |    |   |          |
+ *         |  a00  a01   *    *   *
+ * lbound -|  a10  a11  a12   *   *
+ *         |  a20  a21  a22  a23  *
+ *             *   a31  a32  a33 a34
+ *             *    *   a42  a43 a44
+ *
+ *  Varations on order and transpose have been implemented by modifying these
+ *  local variables. 
+ *
+ */
+{
+  static const char routine_name[] = "BLAS_zgbmv2_c_z";
+
+  int iy0, iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny;
+  int incaij, aij, incai1, incai2, astart, ai;
+  double *y_i = (double *) y;
+  const float *a_i = (float *) a;
+  const double *head_x_i = (double *) head_x;
+  const double *tail_x_i = (double *) tail_x;
+  double *alpha_i = (double *) alpha;
+  double *beta_i = (double *) beta;
+  double tmp1[2];
+  double tmp2[2];
+  double tmp3[2];
+  double tmp4[2];
+  double result[2];
+  double sum1[2];
+  double sum2[2];
+  double prod[2];
+  float a_elem[2];
+  double x_elem[2];
+  double y_elem[2];
+
+
+  if (order != blas_colmajor && order != blas_rowmajor)
+    BLAS_error(routine_name, -1, order, NULL);
+  if (trans != blas_no_trans &&
+      trans != blas_trans && trans != blas_conj_trans) {
+    BLAS_error(routine_name, -2, trans, NULL);
+  }
+  if (m < 0)
+    BLAS_error(routine_name, -3, m, NULL);
+  if (n < 0)
+    BLAS_error(routine_name, -4, n, NULL);
+  if (kl < 0 || kl >= m)
+    BLAS_error(routine_name, -5, kl, NULL);
+  if (ku < 0 || ku >= n)
+    BLAS_error(routine_name, -6, ku, NULL);
+  if (lda < kl + ku + 1)
+    BLAS_error(routine_name, -9, lda, NULL);
+  if (incx == 0)
+    BLAS_error(routine_name, -12, incx, NULL);
+  if (incy == 0)
+    BLAS_error(routine_name, -15, incy, NULL);
+
+  if (m == 0 || n == 0)
+    return;
+  if ((alpha_i[0] == 0.0 && alpha_i[1] == 0.0)
+      && ((beta_i[0] == 1.0 && beta_i[1] == 0.0)))
+    return;
+
+  if (trans == blas_no_trans) {
+    lenx = n;
+    leny = m;
+  } else {
+    lenx = m;
+    leny = n;
+  }
+
+  ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
+  iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;
+
+
+
+  /* if alpha = 0, return y = y*beta */
+  if ((order == blas_colmajor) && (trans == blas_no_trans)) {
+    astart = ku;
+    incai1 = 1;
+    incai2 = lda;
+    incaij = lda - 1;
+    lbound = kl;
+    rbound = n - ku - 1;
+    ra = ku;
+  } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
+    astart = ku;
+    incai1 = lda - 1;
+    incai2 = lda;
+    incaij = 1;
+    lbound = ku;
+    rbound = m - kl - 1;
+    ra = kl;
+  } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
+    astart = kl;
+    incai1 = lda - 1;
+    incai2 = lda;
+    incaij = 1;
+    lbound = kl;
+    rbound = n - ku - 1;
+    ra = ku;
+  } else {			/* rowmajor and blas_trans */
+    astart = kl;
+    incai1 = 1;
+    incai2 = lda;
+    incaij = lda - 1;
+    lbound = ku;
+    rbound = m - kl - 1;
+    ra = kl;
+  }
+  incx *= 2;
+  incy *= 2;
+  incaij *= 2;
+  incai1 *= 2;
+  incai2 *= 2;
+  astart *= 2;
+  iy0 *= 2;
+  ix0 *= 2;
+
+  la = 0;
+  ai = astart;
+  iy = iy0;
+  for (i = 0; i < leny; i++) {
+    sum1[0] = sum1[1] = 0.0;
+    sum2[0] = sum2[1] = 0.0;
+    aij = ai;
+    jx = ix0;
+    if (trans != blas_conj_trans) {
+      for (j = ra - la; j >= 0; j--) {
+	x_elem[0] = head_x_i[jx];
+	x_elem[1] = head_x_i[jx + 1];
+	a_elem[0] = a_i[aij];
+	a_elem[1] = a_i[aij + 1];
+	{
+	  prod[0] =
+	    (double) x_elem[0] * a_elem[0] - (double) x_elem[1] * a_elem[1];
+	  prod[1] =
+	    (double) x_elem[0] * a_elem[1] + (double) x_elem[1] * a_elem[0];
+	}
+	sum1[0] = sum1[0] + prod[0];
+	sum1[1] = sum1[1] + prod[1];
+	x_elem[0] = tail_x_i[jx];
+	x_elem[1] = tail_x_i[jx + 1];
+	{
+	  prod[0] =
+	    (double) x_elem[0] * a_elem[0] - (double) x_elem[1] * a_elem[1];
+	  prod[1] =
+	    (double) x_elem[0] * a_elem[1] + (double) x_elem[1] * a_elem[0];
+	}
+	sum2[0] = sum2[0] + prod[0];
+	sum2[1] = sum2[1] + prod[1];
+	aij += incaij;
+	jx += incx;
+      }
+
+    } else {
+      for (j = ra - la; j >= 0; j--) {
+	x_elem[0] = head_x_i[jx];
+	x_elem[1] = head_x_i[jx + 1];
+	a_elem[0] = a_i[aij];
+	a_elem[1] = a_i[aij + 1];
+	a_elem[1] = -a_elem[1];
+	{
+	  prod[0] =
+	    (double) x_elem[0] * a_elem[0] - (double) x_elem[1] * a_elem[1];
+	  prod[1] =
+	    (double) x_elem[0] * a_elem[1] + (double) x_elem[1] * a_elem[0];
+	}
+	sum1[0] = sum1[0] + prod[0];
+	sum1[1] = sum1[1] + prod[1];
+	x_elem[0] = tail_x_i[jx];
+	x_elem[1] = tail_x_i[jx + 1];
+	{
+	  prod[0] =
+	    (double) x_elem[0] * a_elem[0] - (double) x_elem[1] * a_elem[1];
+	  prod[1] =
+	    (double) x_elem[0] * a_elem[1] + (double) x_elem[1] * a_elem[0];
+	}
+	sum2[0] = sum2[0] + prod[0];
+	sum2[1] = sum2[1] + prod[1];
+	aij += incaij;
+	jx += incx;
+      }
+    }
+
+    {
+      tmp1[0] = (double) sum1[0] * alpha_i[0] - (double) sum1[1] * alpha_i[1];
+      tmp1[1] = (double) sum1[0] * alpha_i[1] + (double) sum1[1] * alpha_i[0];
+    }
+    {
+      tmp2[0] = (double) sum2[0] * alpha_i[0] - (double) sum2[1] * alpha_i[1];
+      tmp2[1] = (double) sum2[0] * alpha_i[1] + (double) sum2[1] * alpha_i[0];
+    }
+    tmp3[0] = tmp1[0] + tmp2[0];
+    tmp3[1] = tmp1[1] + tmp2[1];
+    y_elem[0] = y_i[iy];
+    y_elem[1] = y_i[iy + 1];
+    {
+      tmp4[0] =
+	(double) beta_i[0] * y_elem[0] - (double) beta_i[1] * y_elem[1];
+      tmp4[1] =
+	(double) beta_i[0] * y_elem[1] + (double) beta_i[1] * y_elem[0];
+    }
+    result[0] = tmp4[0] + tmp3[0];
+    result[1] = tmp4[1] + tmp3[1];
+    y_i[iy] = result[0];
+    y_i[iy + 1] = result[1];
+
+    iy += incy;
+    if (i >= lbound) {
+      ix0 += incx;
+      ai += incai2;
+      la++;
+    } else {
+      ai += incai1;
+    }
+    if (i < rbound) {
+      ra++;
+    }
+  }
+
+
+
+}				/* end BLAS_zgbmv2_c_z */
diff --git a/XBLAS/src/gbmv2/BLAS_zgbmv2_c_z_x-f2c.c b/XBLAS/src/gbmv2/BLAS_zgbmv2_c_z_x-f2c.c
new file mode 100644
index 00000000..b29d074d
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_zgbmv2_c_z_x-f2c.c
@@ -0,0 +1,19 @@
+
+#include "f2c-bridge.h"
+#include "blas_enum.h"
+void BLAS_zgbmv2_c_z_x(enum blas_order_type order, enum blas_trans_type trans,
+		       int m, int n, int kl, int ku, const void *alpha,
+		       const void *a, int lda, const void *head_x,
+		       const void *tail_x, int incx, const void *beta,
+		       void *y, int incy, enum blas_prec_type prec);
+
+
+extern void FC_FUNC_(blas_zgbmv2_c_z_x, BLAS_ZGBMV2_C_Z_X)
+ 
+  (int *trans, int *m, int *n, int *kl, int *ku, const void *alpha,
+   const void *a, int *lda, const void *head_x, const void *tail_x, int *incx,
+   const void *beta, void *y, int *incy, int *prec) {
+  BLAS_zgbmv2_c_z_x(blas_colmajor, (enum blas_trans_type) *trans, *m, *n, *kl,
+		    *ku, alpha, a, *lda, head_x, tail_x, *incx, beta, y,
+		    *incy, (enum blas_prec_type) *prec);
+}
diff --git a/XBLAS/src/gbmv2/BLAS_zgbmv2_c_z_x.c b/XBLAS/src/gbmv2/BLAS_zgbmv2_c_z_x.c
new file mode 100644
index 00000000..4772c683
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_zgbmv2_c_z_x.c
@@ -0,0 +1,1956 @@
+#include "blas_extended.h"
+#include "blas_extended_private.h"
+void BLAS_zgbmv2_c_z_x(enum blas_order_type order, enum blas_trans_type trans,
+		       int m, int n, int kl, int ku, const void *alpha,
+		       const void *a, int lda, const void *head_x,
+		       const void *tail_x, int incx, const void *beta,
+		       void *y, int incy, enum blas_prec_type prec)
+
+/*           
+ * Purpose
+ * =======
+ *
+ * This routines computes the matrix product:
+ *
+ *     y  <-  alpha * op(A) * (x_head + x_tail) + beta * y
+ * 
+ * where 
+ *
+ *  A is a m x n banded matrix
+ *  x is a n x 1 vector
+ *  y is a m x 1 vector
+ *  alpha and beta are scalars 
+ *   
+ * Arguments
+ * =========
+ *
+ * order        (input) blas_order_type
+ *              Order of AB; row or column major
+ *
+ * trans        (input) blas_trans_type
+ *              Transpose of AB; no trans, 
+ *              trans, or conjugate trans
+ *
+ * m            (input) int
+ *              Dimension of AB
+ *
+ * n            (input) int
+ *              Dimension of AB and the length of vector x and z
+ *
+ * kl           (input) int 
+ *              Number of lower diagnols of AB
+ *
+ * ku           (input) int
+ *              Number of upper diagnols of AB
+ *
+ * alpha        (input) const void*
+ *              
+ * AB           (input) void*
+ *
+ * lda          (input) int 
+ *              Leading dimension of AB
+ *              lda >= ku + kl + 1
+ *
+ * head_x
+ * tail_x       (input) void*
+ * 
+ * incx         (input) int
+ *              The stride for vector x.
+ *
+ * beta         (input) const void*
+ *
+ * y            (input) const void*
+ *
+ * incy         (input) int
+ *              The stride for vector y.
+ * 
+ * prec   (input) enum blas_prec_type
+ *        Specifies the internal precision to be used.
+ *        = blas_prec_single: single precision.
+ *        = blas_prec_double: double precision.
+ *        = blas_prec_extra : anything at least 1.5 times as accurate
+ *                            than double, and wider than 80-bits.
+ *                            We use double-double in our implementation.
+ *
+ *
+ * LOCAL VARIABLES 
+ * ===============
+ * 
+ *  As an example, these variables are described on the mxn, column 
+ *  major, banded matrix described in section 2.2.3 of the specification  
+ *
+ *  astart      indexes first element in A where computation begins
+ *
+ *  incai1      indexes first element in row where row is less than lbound
+ * 
+ *  incai2      indexes first element in row where row exceeds lbound
+ *   
+ *  lbound      denotes the number of rows before  first element shifts 
+ *
+ *  rbound      denotes the columns where there is blank space
+ *   
+ *  ra          index of the rightmost element for a given row
+ *  
+ *  la          index of leftmost  elements for a given row
+ *
+ *  ra - la     width of a row
+ *
+ *                        rbound 
+ *            la   ra    ____|_____ 
+ *             |    |   |          |
+ *         |  a00  a01   *    *   *
+ * lbound -|  a10  a11  a12   *   *
+ *         |  a20  a21  a22  a23  *
+ *             *   a31  a32  a33 a34
+ *             *    *   a42  a43 a44
+ *
+ *  Varations on order and transpose have been implemented by modifying these
+ *  local variables. 
+ *
+ */
+{
+  static const char routine_name[] = "BLAS_zgbmv2_c_z_x";
+
+  switch (prec) {
+  case blas_prec_single:
+  case blas_prec_double:
+  case blas_prec_indigenous:
+    {
+      int iy0, iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny;
+      int incaij, aij, incai1, incai2, astart, ai;
+      double *y_i = (double *) y;
+      const float *a_i = (float *) a;
+      const double *head_x_i = (double *) head_x;
+      const double *tail_x_i = (double *) tail_x;
+      double *alpha_i = (double *) alpha;
+      double *beta_i = (double *) beta;
+      double tmp1[2];
+      double tmp2[2];
+      double tmp3[2];
+      double tmp4[2];
+      double result[2];
+      double sum1[2];
+      double sum2[2];
+      double prod[2];
+      float a_elem[2];
+      double x_elem[2];
+      double y_elem[2];
+
+
+      if (order != blas_colmajor && order != blas_rowmajor)
+	BLAS_error(routine_name, -1, order, NULL);
+      if (trans != blas_no_trans &&
+	  trans != blas_trans && trans != blas_conj_trans) {
+	BLAS_error(routine_name, -2, trans, NULL);
+      }
+      if (m < 0)
+	BLAS_error(routine_name, -3, m, NULL);
+      if (n < 0)
+	BLAS_error(routine_name, -4, n, NULL);
+      if (kl < 0 || kl >= m)
+	BLAS_error(routine_name, -5, kl, NULL);
+      if (ku < 0 || ku >= n)
+	BLAS_error(routine_name, -6, ku, NULL);
+      if (lda < kl + ku + 1)
+	BLAS_error(routine_name, -9, lda, NULL);
+      if (incx == 0)
+	BLAS_error(routine_name, -12, incx, NULL);
+      if (incy == 0)
+	BLAS_error(routine_name, -15, incy, NULL);
+
+      if (m == 0 || n == 0)
+	return;
+      if ((alpha_i[0] == 0.0 && alpha_i[1] == 0.0)
+	  && ((beta_i[0] == 1.0 && beta_i[1] == 0.0)))
+	return;
+
+      if (trans == blas_no_trans) {
+	lenx = n;
+	leny = m;
+      } else {
+	lenx = m;
+	leny = n;
+      }
+
+      ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
+      iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;
+
+
+
+      /* if alpha = 0, return y = y*beta */
+      if ((order == blas_colmajor) && (trans == blas_no_trans)) {
+	astart = ku;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
+	astart = ku;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
+	astart = kl;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else {			/* rowmajor and blas_trans */
+	astart = kl;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      }
+      incx *= 2;
+      incy *= 2;
+      incaij *= 2;
+      incai1 *= 2;
+      incai2 *= 2;
+      astart *= 2;
+      iy0 *= 2;
+      ix0 *= 2;
+
+      la = 0;
+      ai = astart;
+      iy = iy0;
+      for (i = 0; i < leny; i++) {
+	sum1[0] = sum1[1] = 0.0;
+	sum2[0] = sum2[1] = 0.0;
+	aij = ai;
+	jx = ix0;
+	if (trans != blas_conj_trans) {
+	  for (j = ra - la; j >= 0; j--) {
+	    x_elem[0] = head_x_i[jx];
+	    x_elem[1] = head_x_i[jx + 1];
+	    a_elem[0] = a_i[aij];
+	    a_elem[1] = a_i[aij + 1];
+	    {
+	      prod[0] =
+		(double) x_elem[0] * a_elem[0] -
+		(double) x_elem[1] * a_elem[1];
+	      prod[1] =
+		(double) x_elem[0] * a_elem[1] +
+		(double) x_elem[1] * a_elem[0];
+	    }
+	    sum1[0] = sum1[0] + prod[0];
+	    sum1[1] = sum1[1] + prod[1];
+	    x_elem[0] = tail_x_i[jx];
+	    x_elem[1] = tail_x_i[jx + 1];
+	    {
+	      prod[0] =
+		(double) x_elem[0] * a_elem[0] -
+		(double) x_elem[1] * a_elem[1];
+	      prod[1] =
+		(double) x_elem[0] * a_elem[1] +
+		(double) x_elem[1] * a_elem[0];
+	    }
+	    sum2[0] = sum2[0] + prod[0];
+	    sum2[1] = sum2[1] + prod[1];
+	    aij += incaij;
+	    jx += incx;
+	  }
+
+	} else {
+	  for (j = ra - la; j >= 0; j--) {
+	    x_elem[0] = head_x_i[jx];
+	    x_elem[1] = head_x_i[jx + 1];
+	    a_elem[0] = a_i[aij];
+	    a_elem[1] = a_i[aij + 1];
+	    a_elem[1] = -a_elem[1];
+	    {
+	      prod[0] =
+		(double) x_elem[0] * a_elem[0] -
+		(double) x_elem[1] * a_elem[1];
+	      prod[1] =
+		(double) x_elem[0] * a_elem[1] +
+		(double) x_elem[1] * a_elem[0];
+	    }
+	    sum1[0] = sum1[0] + prod[0];
+	    sum1[1] = sum1[1] + prod[1];
+	    x_elem[0] = tail_x_i[jx];
+	    x_elem[1] = tail_x_i[jx + 1];
+	    {
+	      prod[0] =
+		(double) x_elem[0] * a_elem[0] -
+		(double) x_elem[1] * a_elem[1];
+	      prod[1] =
+		(double) x_elem[0] * a_elem[1] +
+		(double) x_elem[1] * a_elem[0];
+	    }
+	    sum2[0] = sum2[0] + prod[0];
+	    sum2[1] = sum2[1] + prod[1];
+	    aij += incaij;
+	    jx += incx;
+	  }
+	}
+
+	{
+	  tmp1[0] =
+	    (double) sum1[0] * alpha_i[0] - (double) sum1[1] * alpha_i[1];
+	  tmp1[1] =
+	    (double) sum1[0] * alpha_i[1] + (double) sum1[1] * alpha_i[0];
+	}
+	{
+	  tmp2[0] =
+	    (double) sum2[0] * alpha_i[0] - (double) sum2[1] * alpha_i[1];
+	  tmp2[1] =
+	    (double) sum2[0] * alpha_i[1] + (double) sum2[1] * alpha_i[0];
+	}
+	tmp3[0] = tmp1[0] + tmp2[0];
+	tmp3[1] = tmp1[1] + tmp2[1];
+	y_elem[0] = y_i[iy];
+	y_elem[1] = y_i[iy + 1];
+	{
+	  tmp4[0] =
+	    (double) beta_i[0] * y_elem[0] - (double) beta_i[1] * y_elem[1];
+	  tmp4[1] =
+	    (double) beta_i[0] * y_elem[1] + (double) beta_i[1] * y_elem[0];
+	}
+	result[0] = tmp4[0] + tmp3[0];
+	result[1] = tmp4[1] + tmp3[1];
+	y_i[iy] = result[0];
+	y_i[iy + 1] = result[1];
+
+	iy += incy;
+	if (i >= lbound) {
+	  ix0 += incx;
+	  ai += incai2;
+	  la++;
+	} else {
+	  ai += incai1;
+	}
+	if (i < rbound) {
+	  ra++;
+	}
+      }
+
+
+    }
+    break;
+  case blas_prec_extra:
+    {
+      int iy0, iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny;
+      int incaij, aij, incai1, incai2, astart, ai;
+      double *y_i = (double *) y;
+      const float *a_i = (float *) a;
+      const double *head_x_i = (double *) head_x;
+      const double *tail_x_i = (double *) tail_x;
+      double *alpha_i = (double *) alpha;
+      double *beta_i = (double *) beta;
+      double head_tmp1[2], tail_tmp1[2];
+      double head_tmp2[2], tail_tmp2[2];
+      double head_tmp3[2], tail_tmp3[2];
+      double head_tmp4[2], tail_tmp4[2];
+      double result[2];
+      double head_sum1[2], tail_sum1[2];
+      double head_sum2[2], tail_sum2[2];
+      double head_prod[2], tail_prod[2];
+      float a_elem[2];
+      double x_elem[2];
+      double y_elem[2];
+      FPU_FIX_DECL;
+
+      if (order != blas_colmajor && order != blas_rowmajor)
+	BLAS_error(routine_name, -1, order, NULL);
+      if (trans != blas_no_trans &&
+	  trans != blas_trans && trans != blas_conj_trans) {
+	BLAS_error(routine_name, -2, trans, NULL);
+      }
+      if (m < 0)
+	BLAS_error(routine_name, -3, m, NULL);
+      if (n < 0)
+	BLAS_error(routine_name, -4, n, NULL);
+      if (kl < 0 || kl >= m)
+	BLAS_error(routine_name, -5, kl, NULL);
+      if (ku < 0 || ku >= n)
+	BLAS_error(routine_name, -6, ku, NULL);
+      if (lda < kl + ku + 1)
+	BLAS_error(routine_name, -9, lda, NULL);
+      if (incx == 0)
+	BLAS_error(routine_name, -12, incx, NULL);
+      if (incy == 0)
+	BLAS_error(routine_name, -15, incy, NULL);
+
+      if (m == 0 || n == 0)
+	return;
+      if ((alpha_i[0] == 0.0 && alpha_i[1] == 0.0)
+	  && ((beta_i[0] == 1.0 && beta_i[1] == 0.0)))
+	return;
+
+      if (trans == blas_no_trans) {
+	lenx = n;
+	leny = m;
+      } else {
+	lenx = m;
+	leny = n;
+      }
+
+      ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
+      iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;
+
+      FPU_FIX_START;
+
+      /* if alpha = 0, return y = y*beta */
+      if ((order == blas_colmajor) && (trans == blas_no_trans)) {
+	astart = ku;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
+	astart = ku;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
+	astart = kl;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else {			/* rowmajor and blas_trans */
+	astart = kl;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      }
+      incx *= 2;
+      incy *= 2;
+      incaij *= 2;
+      incai1 *= 2;
+      incai2 *= 2;
+      astart *= 2;
+      iy0 *= 2;
+      ix0 *= 2;
+
+      la = 0;
+      ai = astart;
+      iy = iy0;
+      for (i = 0; i < leny; i++) {
+	head_sum1[0] = head_sum1[1] = tail_sum1[0] = tail_sum1[1] = 0.0;
+	head_sum2[0] = head_sum2[1] = tail_sum2[0] = tail_sum2[1] = 0.0;
+	aij = ai;
+	jx = ix0;
+	if (trans != blas_conj_trans) {
+	  for (j = ra - la; j >= 0; j--) {
+	    x_elem[0] = head_x_i[jx];
+	    x_elem[1] = head_x_i[jx + 1];
+	    a_elem[0] = a_i[aij];
+	    a_elem[1] = a_i[aij + 1];
+	    {
+	      double cd[2];
+	      cd[0] = (double) a_elem[0];
+	      cd[1] = (double) a_elem[1];
+	      {
+		/* Compute complex-extra = complex-double * complex-double. */
+		double head_t1, tail_t1;
+		double head_t2, tail_t2;
+		/* Real part */
+		{
+		  /* Compute double_double = double * double. */
+		  double a1, a2, b1, b2, con;
+
+		  con = x_elem[0] * split;
+		  a1 = con - x_elem[0];
+		  a1 = con - a1;
+		  a2 = x_elem[0] - a1;
+		  con = cd[0] * split;
+		  b1 = con - cd[0];
+		  b1 = con - b1;
+		  b2 = cd[0] - b1;
+
+		  head_t1 = x_elem[0] * cd[0];
+		  tail_t1 =
+		    (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
+		}
+		{
+		  /* Compute double_double = double * double. */
+		  double a1, a2, b1, b2, con;
+
+		  con = x_elem[1] * split;
+		  a1 = con - x_elem[1];
+		  a1 = con - a1;
+		  a2 = x_elem[1] - a1;
+		  con = cd[1] * split;
+		  b1 = con - cd[1];
+		  b1 = con - b1;
+		  b2 = cd[1] - b1;
+
+		  head_t2 = x_elem[1] * cd[1];
+		  tail_t2 =
+		    (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
+		}
+		head_t2 = -head_t2;
+		tail_t2 = -tail_t2;
+		{
+		  /* Compute double-double = double-double + double-double. */
+		  double bv;
+		  double s1, s2, t1, t2;
+
+		  /* Add two hi words. */
+		  s1 = head_t1 + head_t2;
+		  bv = s1 - head_t1;
+		  s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+		  /* Add two lo words. */
+		  t1 = tail_t1 + tail_t2;
+		  bv = t1 - tail_t1;
+		  t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+		  s2 += t1;
+
+		  /* Renormalize (s1, s2)  to  (t1, s2) */
+		  t1 = s1 + s2;
+		  s2 = s2 - (t1 - s1);
+
+		  t2 += s2;
+
+		  /* Renormalize (t1, t2)  */
+		  head_t1 = t1 + t2;
+		  tail_t1 = t2 - (head_t1 - t1);
+		}
+		head_prod[0] = head_t1;
+		tail_prod[0] = tail_t1;
+		/* Imaginary part */
+		{
+		  /* Compute double_double = double * double. */
+		  double a1, a2, b1, b2, con;
+
+		  con = x_elem[1] * split;
+		  a1 = con - x_elem[1];
+		  a1 = con - a1;
+		  a2 = x_elem[1] - a1;
+		  con = cd[0] * split;
+		  b1 = con - cd[0];
+		  b1 = con - b1;
+		  b2 = cd[0] - b1;
+
+		  head_t1 = x_elem[1] * cd[0];
+		  tail_t1 =
+		    (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
+		}
+		{
+		  /* Compute double_double = double * double. */
+		  double a1, a2, b1, b2, con;
+
+		  con = x_elem[0] * split;
+		  a1 = con - x_elem[0];
+		  a1 = con - a1;
+		  a2 = x_elem[0] - a1;
+		  con = cd[1] * split;
+		  b1 = con - cd[1];
+		  b1 = con - b1;
+		  b2 = cd[1] - b1;
+
+		  head_t2 = x_elem[0] * cd[1];
+		  tail_t2 =
+		    (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
+		}
+		{
+		  /* Compute double-double = double-double + double-double. */
+		  double bv;
+		  double s1, s2, t1, t2;
+
+		  /* Add two hi words. */
+		  s1 = head_t1 + head_t2;
+		  bv = s1 - head_t1;
+		  s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+		  /* Add two lo words. */
+		  t1 = tail_t1 + tail_t2;
+		  bv = t1 - tail_t1;
+		  t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+		  s2 += t1;
+
+		  /* Renormalize (s1, s2)  to  (t1, s2) */
+		  t1 = s1 + s2;
+		  s2 = s2 - (t1 - s1);
+
+		  t2 += s2;
+
+		  /* Renormalize (t1, t2)  */
+		  head_t1 = t1 + t2;
+		  tail_t1 = t2 - (head_t1 - t1);
+		}
+		head_prod[1] = head_t1;
+		tail_prod[1] = tail_t1;
+	      }
+	    }
+	    {
+	      double head_t, tail_t;
+	      double head_a, tail_a;
+	      double head_b, tail_b;
+	      /* Real part */
+	      head_a = head_sum1[0];
+	      tail_a = tail_sum1[0];
+	      head_b = head_prod[0];
+	      tail_b = tail_prod[0];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum1[0] = head_t;
+	      tail_sum1[0] = tail_t;
+	      /* Imaginary part */
+	      head_a = head_sum1[1];
+	      tail_a = tail_sum1[1];
+	      head_b = head_prod[1];
+	      tail_b = tail_prod[1];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum1[1] = head_t;
+	      tail_sum1[1] = tail_t;
+	    }
+	    x_elem[0] = tail_x_i[jx];
+	    x_elem[1] = tail_x_i[jx + 1];
+	    {
+	      double cd[2];
+	      cd[0] = (double) a_elem[0];
+	      cd[1] = (double) a_elem[1];
+	      {
+		/* Compute complex-extra = complex-double * complex-double. */
+		double head_t1, tail_t1;
+		double head_t2, tail_t2;
+		/* Real part */
+		{
+		  /* Compute double_double = double * double. */
+		  double a1, a2, b1, b2, con;
+
+		  con = x_elem[0] * split;
+		  a1 = con - x_elem[0];
+		  a1 = con - a1;
+		  a2 = x_elem[0] - a1;
+		  con = cd[0] * split;
+		  b1 = con - cd[0];
+		  b1 = con - b1;
+		  b2 = cd[0] - b1;
+
+		  head_t1 = x_elem[0] * cd[0];
+		  tail_t1 =
+		    (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
+		}
+		{
+		  /* Compute double_double = double * double. */
+		  double a1, a2, b1, b2, con;
+
+		  con = x_elem[1] * split;
+		  a1 = con - x_elem[1];
+		  a1 = con - a1;
+		  a2 = x_elem[1] - a1;
+		  con = cd[1] * split;
+		  b1 = con - cd[1];
+		  b1 = con - b1;
+		  b2 = cd[1] - b1;
+
+		  head_t2 = x_elem[1] * cd[1];
+		  tail_t2 =
+		    (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
+		}
+		head_t2 = -head_t2;
+		tail_t2 = -tail_t2;
+		{
+		  /* Compute double-double = double-double + double-double. */
+		  double bv;
+		  double s1, s2, t1, t2;
+
+		  /* Add two hi words. */
+		  s1 = head_t1 + head_t2;
+		  bv = s1 - head_t1;
+		  s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+		  /* Add two lo words. */
+		  t1 = tail_t1 + tail_t2;
+		  bv = t1 - tail_t1;
+		  t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+		  s2 += t1;
+
+		  /* Renormalize (s1, s2)  to  (t1, s2) */
+		  t1 = s1 + s2;
+		  s2 = s2 - (t1 - s1);
+
+		  t2 += s2;
+
+		  /* Renormalize (t1, t2)  */
+		  head_t1 = t1 + t2;
+		  tail_t1 = t2 - (head_t1 - t1);
+		}
+		head_prod[0] = head_t1;
+		tail_prod[0] = tail_t1;
+		/* Imaginary part */
+		{
+		  /* Compute double_double = double * double. */
+		  double a1, a2, b1, b2, con;
+
+		  con = x_elem[1] * split;
+		  a1 = con - x_elem[1];
+		  a1 = con - a1;
+		  a2 = x_elem[1] - a1;
+		  con = cd[0] * split;
+		  b1 = con - cd[0];
+		  b1 = con - b1;
+		  b2 = cd[0] - b1;
+
+		  head_t1 = x_elem[1] * cd[0];
+		  tail_t1 =
+		    (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
+		}
+		{
+		  /* Compute double_double = double * double. */
+		  double a1, a2, b1, b2, con;
+
+		  con = x_elem[0] * split;
+		  a1 = con - x_elem[0];
+		  a1 = con - a1;
+		  a2 = x_elem[0] - a1;
+		  con = cd[1] * split;
+		  b1 = con - cd[1];
+		  b1 = con - b1;
+		  b2 = cd[1] - b1;
+
+		  head_t2 = x_elem[0] * cd[1];
+		  tail_t2 =
+		    (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
+		}
+		{
+		  /* Compute double-double = double-double + double-double. */
+		  double bv;
+		  double s1, s2, t1, t2;
+
+		  /* Add two hi words. */
+		  s1 = head_t1 + head_t2;
+		  bv = s1 - head_t1;
+		  s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+		  /* Add two lo words. */
+		  t1 = tail_t1 + tail_t2;
+		  bv = t1 - tail_t1;
+		  t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+		  s2 += t1;
+
+		  /* Renormalize (s1, s2)  to  (t1, s2) */
+		  t1 = s1 + s2;
+		  s2 = s2 - (t1 - s1);
+
+		  t2 += s2;
+
+		  /* Renormalize (t1, t2)  */
+		  head_t1 = t1 + t2;
+		  tail_t1 = t2 - (head_t1 - t1);
+		}
+		head_prod[1] = head_t1;
+		tail_prod[1] = tail_t1;
+	      }
+	    }
+	    {
+	      double head_t, tail_t;
+	      double head_a, tail_a;
+	      double head_b, tail_b;
+	      /* Real part */
+	      head_a = head_sum2[0];
+	      tail_a = tail_sum2[0];
+	      head_b = head_prod[0];
+	      tail_b = tail_prod[0];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum2[0] = head_t;
+	      tail_sum2[0] = tail_t;
+	      /* Imaginary part */
+	      head_a = head_sum2[1];
+	      tail_a = tail_sum2[1];
+	      head_b = head_prod[1];
+	      tail_b = tail_prod[1];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum2[1] = head_t;
+	      tail_sum2[1] = tail_t;
+	    }
+	    aij += incaij;
+	    jx += incx;
+	  }
+
+	} else {
+	  for (j = ra - la; j >= 0; j--) {
+	    x_elem[0] = head_x_i[jx];
+	    x_elem[1] = head_x_i[jx + 1];
+	    a_elem[0] = a_i[aij];
+	    a_elem[1] = a_i[aij + 1];
+	    a_elem[1] = -a_elem[1];
+	    {
+	      double cd[2];
+	      cd[0] = (double) a_elem[0];
+	      cd[1] = (double) a_elem[1];
+	      {
+		/* Compute complex-extra = complex-double * complex-double. */
+		double head_t1, tail_t1;
+		double head_t2, tail_t2;
+		/* Real part */
+		{
+		  /* Compute double_double = double * double. */
+		  double a1, a2, b1, b2, con;
+
+		  con = x_elem[0] * split;
+		  a1 = con - x_elem[0];
+		  a1 = con - a1;
+		  a2 = x_elem[0] - a1;
+		  con = cd[0] * split;
+		  b1 = con - cd[0];
+		  b1 = con - b1;
+		  b2 = cd[0] - b1;
+
+		  head_t1 = x_elem[0] * cd[0];
+		  tail_t1 =
+		    (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
+		}
+		{
+		  /* Compute double_double = double * double. */
+		  double a1, a2, b1, b2, con;
+
+		  con = x_elem[1] * split;
+		  a1 = con - x_elem[1];
+		  a1 = con - a1;
+		  a2 = x_elem[1] - a1;
+		  con = cd[1] * split;
+		  b1 = con - cd[1];
+		  b1 = con - b1;
+		  b2 = cd[1] - b1;
+
+		  head_t2 = x_elem[1] * cd[1];
+		  tail_t2 =
+		    (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
+		}
+		head_t2 = -head_t2;
+		tail_t2 = -tail_t2;
+		{
+		  /* Compute double-double = double-double + double-double. */
+		  double bv;
+		  double s1, s2, t1, t2;
+
+		  /* Add two hi words. */
+		  s1 = head_t1 + head_t2;
+		  bv = s1 - head_t1;
+		  s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+		  /* Add two lo words. */
+		  t1 = tail_t1 + tail_t2;
+		  bv = t1 - tail_t1;
+		  t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+		  s2 += t1;
+
+		  /* Renormalize (s1, s2)  to  (t1, s2) */
+		  t1 = s1 + s2;
+		  s2 = s2 - (t1 - s1);
+
+		  t2 += s2;
+
+		  /* Renormalize (t1, t2)  */
+		  head_t1 = t1 + t2;
+		  tail_t1 = t2 - (head_t1 - t1);
+		}
+		head_prod[0] = head_t1;
+		tail_prod[0] = tail_t1;
+		/* Imaginary part */
+		{
+		  /* Compute double_double = double * double. */
+		  double a1, a2, b1, b2, con;
+
+		  con = x_elem[1] * split;
+		  a1 = con - x_elem[1];
+		  a1 = con - a1;
+		  a2 = x_elem[1] - a1;
+		  con = cd[0] * split;
+		  b1 = con - cd[0];
+		  b1 = con - b1;
+		  b2 = cd[0] - b1;
+
+		  head_t1 = x_elem[1] * cd[0];
+		  tail_t1 =
+		    (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
+		}
+		{
+		  /* Compute double_double = double * double. */
+		  double a1, a2, b1, b2, con;
+
+		  con = x_elem[0] * split;
+		  a1 = con - x_elem[0];
+		  a1 = con - a1;
+		  a2 = x_elem[0] - a1;
+		  con = cd[1] * split;
+		  b1 = con - cd[1];
+		  b1 = con - b1;
+		  b2 = cd[1] - b1;
+
+		  head_t2 = x_elem[0] * cd[1];
+		  tail_t2 =
+		    (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
+		}
+		{
+		  /* Compute double-double = double-double + double-double. */
+		  double bv;
+		  double s1, s2, t1, t2;
+
+		  /* Add two hi words. */
+		  s1 = head_t1 + head_t2;
+		  bv = s1 - head_t1;
+		  s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+		  /* Add two lo words. */
+		  t1 = tail_t1 + tail_t2;
+		  bv = t1 - tail_t1;
+		  t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+		  s2 += t1;
+
+		  /* Renormalize (s1, s2)  to  (t1, s2) */
+		  t1 = s1 + s2;
+		  s2 = s2 - (t1 - s1);
+
+		  t2 += s2;
+
+		  /* Renormalize (t1, t2)  */
+		  head_t1 = t1 + t2;
+		  tail_t1 = t2 - (head_t1 - t1);
+		}
+		head_prod[1] = head_t1;
+		tail_prod[1] = tail_t1;
+	      }
+	    }
+	    {
+	      double head_t, tail_t;
+	      double head_a, tail_a;
+	      double head_b, tail_b;
+	      /* Real part */
+	      head_a = head_sum1[0];
+	      tail_a = tail_sum1[0];
+	      head_b = head_prod[0];
+	      tail_b = tail_prod[0];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum1[0] = head_t;
+	      tail_sum1[0] = tail_t;
+	      /* Imaginary part */
+	      head_a = head_sum1[1];
+	      tail_a = tail_sum1[1];
+	      head_b = head_prod[1];
+	      tail_b = tail_prod[1];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum1[1] = head_t;
+	      tail_sum1[1] = tail_t;
+	    }
+	    x_elem[0] = tail_x_i[jx];
+	    x_elem[1] = tail_x_i[jx + 1];
+	    {
+	      double cd[2];
+	      cd[0] = (double) a_elem[0];
+	      cd[1] = (double) a_elem[1];
+	      {
+		/* Compute complex-extra = complex-double * complex-double. */
+		double head_t1, tail_t1;
+		double head_t2, tail_t2;
+		/* Real part */
+		{
+		  /* Compute double_double = double * double. */
+		  double a1, a2, b1, b2, con;
+
+		  con = x_elem[0] * split;
+		  a1 = con - x_elem[0];
+		  a1 = con - a1;
+		  a2 = x_elem[0] - a1;
+		  con = cd[0] * split;
+		  b1 = con - cd[0];
+		  b1 = con - b1;
+		  b2 = cd[0] - b1;
+
+		  head_t1 = x_elem[0] * cd[0];
+		  tail_t1 =
+		    (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
+		}
+		{
+		  /* Compute double_double = double * double. */
+		  double a1, a2, b1, b2, con;
+
+		  con = x_elem[1] * split;
+		  a1 = con - x_elem[1];
+		  a1 = con - a1;
+		  a2 = x_elem[1] - a1;
+		  con = cd[1] * split;
+		  b1 = con - cd[1];
+		  b1 = con - b1;
+		  b2 = cd[1] - b1;
+
+		  head_t2 = x_elem[1] * cd[1];
+		  tail_t2 =
+		    (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
+		}
+		head_t2 = -head_t2;
+		tail_t2 = -tail_t2;
+		{
+		  /* Compute double-double = double-double + double-double. */
+		  double bv;
+		  double s1, s2, t1, t2;
+
+		  /* Add two hi words. */
+		  s1 = head_t1 + head_t2;
+		  bv = s1 - head_t1;
+		  s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+		  /* Add two lo words. */
+		  t1 = tail_t1 + tail_t2;
+		  bv = t1 - tail_t1;
+		  t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+		  s2 += t1;
+
+		  /* Renormalize (s1, s2)  to  (t1, s2) */
+		  t1 = s1 + s2;
+		  s2 = s2 - (t1 - s1);
+
+		  t2 += s2;
+
+		  /* Renormalize (t1, t2)  */
+		  head_t1 = t1 + t2;
+		  tail_t1 = t2 - (head_t1 - t1);
+		}
+		head_prod[0] = head_t1;
+		tail_prod[0] = tail_t1;
+		/* Imaginary part */
+		{
+		  /* Compute double_double = double * double. */
+		  double a1, a2, b1, b2, con;
+
+		  con = x_elem[1] * split;
+		  a1 = con - x_elem[1];
+		  a1 = con - a1;
+		  a2 = x_elem[1] - a1;
+		  con = cd[0] * split;
+		  b1 = con - cd[0];
+		  b1 = con - b1;
+		  b2 = cd[0] - b1;
+
+		  head_t1 = x_elem[1] * cd[0];
+		  tail_t1 =
+		    (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
+		}
+		{
+		  /* Compute double_double = double * double. */
+		  double a1, a2, b1, b2, con;
+
+		  con = x_elem[0] * split;
+		  a1 = con - x_elem[0];
+		  a1 = con - a1;
+		  a2 = x_elem[0] - a1;
+		  con = cd[1] * split;
+		  b1 = con - cd[1];
+		  b1 = con - b1;
+		  b2 = cd[1] - b1;
+
+		  head_t2 = x_elem[0] * cd[1];
+		  tail_t2 =
+		    (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
+		}
+		{
+		  /* Compute double-double = double-double + double-double. */
+		  double bv;
+		  double s1, s2, t1, t2;
+
+		  /* Add two hi words. */
+		  s1 = head_t1 + head_t2;
+		  bv = s1 - head_t1;
+		  s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+		  /* Add two lo words. */
+		  t1 = tail_t1 + tail_t2;
+		  bv = t1 - tail_t1;
+		  t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+		  s2 += t1;
+
+		  /* Renormalize (s1, s2)  to  (t1, s2) */
+		  t1 = s1 + s2;
+		  s2 = s2 - (t1 - s1);
+
+		  t2 += s2;
+
+		  /* Renormalize (t1, t2)  */
+		  head_t1 = t1 + t2;
+		  tail_t1 = t2 - (head_t1 - t1);
+		}
+		head_prod[1] = head_t1;
+		tail_prod[1] = tail_t1;
+	      }
+	    }
+	    {
+	      double head_t, tail_t;
+	      double head_a, tail_a;
+	      double head_b, tail_b;
+	      /* Real part */
+	      head_a = head_sum2[0];
+	      tail_a = tail_sum2[0];
+	      head_b = head_prod[0];
+	      tail_b = tail_prod[0];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum2[0] = head_t;
+	      tail_sum2[0] = tail_t;
+	      /* Imaginary part */
+	      head_a = head_sum2[1];
+	      tail_a = tail_sum2[1];
+	      head_b = head_prod[1];
+	      tail_b = tail_prod[1];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum2[1] = head_t;
+	      tail_sum2[1] = tail_t;
+	    }
+	    aij += incaij;
+	    jx += incx;
+	  }
+	}
+
+	{
+	  /* Compute complex-extra = complex-extra * complex-double. */
+	  double head_a0, tail_a0;
+	  double head_a1, tail_a1;
+	  double head_t1, tail_t1;
+	  double head_t2, tail_t2;
+	  head_a0 = head_sum1[0];
+	  tail_a0 = tail_sum1[0];
+	  head_a1 = head_sum1[1];
+	  tail_a1 = tail_sum1[1];
+	  /* real part */
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_a0 * split;
+	    a11 = con - head_a0;
+	    a11 = con - a11;
+	    a21 = head_a0 - a11;
+	    con = alpha_i[0] * split;
+	    b1 = con - alpha_i[0];
+	    b1 = con - b1;
+	    b2 = alpha_i[0] - b1;
+
+	    c11 = head_a0 * alpha_i[0];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_a0 * alpha_i[0];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_a1 * split;
+	    a11 = con - head_a1;
+	    a11 = con - a11;
+	    a21 = head_a1 - a11;
+	    con = alpha_i[1] * split;
+	    b1 = con - alpha_i[1];
+	    b1 = con - b1;
+	    b2 = alpha_i[1] - b1;
+
+	    c11 = head_a1 * alpha_i[1];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_a1 * alpha_i[1];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t2 = t1 + t2;
+	    tail_t2 = t2 - (head_t2 - t1);
+	  }
+	  head_t2 = -head_t2;
+	  tail_t2 = -tail_t2;
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_t1 + head_t2;
+	    bv = s1 - head_t1;
+	    s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_t1 + tail_t2;
+	    bv = t1 - tail_t1;
+	    t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  head_tmp1[0] = head_t1;
+	  tail_tmp1[0] = tail_t1;
+	  /* imaginary part */
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_a1 * split;
+	    a11 = con - head_a1;
+	    a11 = con - a11;
+	    a21 = head_a1 - a11;
+	    con = alpha_i[0] * split;
+	    b1 = con - alpha_i[0];
+	    b1 = con - b1;
+	    b2 = alpha_i[0] - b1;
+
+	    c11 = head_a1 * alpha_i[0];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_a1 * alpha_i[0];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_a0 * split;
+	    a11 = con - head_a0;
+	    a11 = con - a11;
+	    a21 = head_a0 - a11;
+	    con = alpha_i[1] * split;
+	    b1 = con - alpha_i[1];
+	    b1 = con - b1;
+	    b2 = alpha_i[1] - b1;
+
+	    c11 = head_a0 * alpha_i[1];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_a0 * alpha_i[1];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t2 = t1 + t2;
+	    tail_t2 = t2 - (head_t2 - t1);
+	  }
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_t1 + head_t2;
+	    bv = s1 - head_t1;
+	    s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_t1 + tail_t2;
+	    bv = t1 - tail_t1;
+	    t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  head_tmp1[1] = head_t1;
+	  tail_tmp1[1] = tail_t1;
+	}
+
+	{
+	  /* Compute complex-extra = complex-extra * complex-double. */
+	  double head_a0, tail_a0;
+	  double head_a1, tail_a1;
+	  double head_t1, tail_t1;
+	  double head_t2, tail_t2;
+	  head_a0 = head_sum2[0];
+	  tail_a0 = tail_sum2[0];
+	  head_a1 = head_sum2[1];
+	  tail_a1 = tail_sum2[1];
+	  /* real part */
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_a0 * split;
+	    a11 = con - head_a0;
+	    a11 = con - a11;
+	    a21 = head_a0 - a11;
+	    con = alpha_i[0] * split;
+	    b1 = con - alpha_i[0];
+	    b1 = con - b1;
+	    b2 = alpha_i[0] - b1;
+
+	    c11 = head_a0 * alpha_i[0];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_a0 * alpha_i[0];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_a1 * split;
+	    a11 = con - head_a1;
+	    a11 = con - a11;
+	    a21 = head_a1 - a11;
+	    con = alpha_i[1] * split;
+	    b1 = con - alpha_i[1];
+	    b1 = con - b1;
+	    b2 = alpha_i[1] - b1;
+
+	    c11 = head_a1 * alpha_i[1];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_a1 * alpha_i[1];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t2 = t1 + t2;
+	    tail_t2 = t2 - (head_t2 - t1);
+	  }
+	  head_t2 = -head_t2;
+	  tail_t2 = -tail_t2;
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_t1 + head_t2;
+	    bv = s1 - head_t1;
+	    s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_t1 + tail_t2;
+	    bv = t1 - tail_t1;
+	    t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  head_tmp2[0] = head_t1;
+	  tail_tmp2[0] = tail_t1;
+	  /* imaginary part */
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_a1 * split;
+	    a11 = con - head_a1;
+	    a11 = con - a11;
+	    a21 = head_a1 - a11;
+	    con = alpha_i[0] * split;
+	    b1 = con - alpha_i[0];
+	    b1 = con - b1;
+	    b2 = alpha_i[0] - b1;
+
+	    c11 = head_a1 * alpha_i[0];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_a1 * alpha_i[0];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_a0 * split;
+	    a11 = con - head_a0;
+	    a11 = con - a11;
+	    a21 = head_a0 - a11;
+	    con = alpha_i[1] * split;
+	    b1 = con - alpha_i[1];
+	    b1 = con - b1;
+	    b2 = alpha_i[1] - b1;
+
+	    c11 = head_a0 * alpha_i[1];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_a0 * alpha_i[1];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t2 = t1 + t2;
+	    tail_t2 = t2 - (head_t2 - t1);
+	  }
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_t1 + head_t2;
+	    bv = s1 - head_t1;
+	    s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_t1 + tail_t2;
+	    bv = t1 - tail_t1;
+	    t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  head_tmp2[1] = head_t1;
+	  tail_tmp2[1] = tail_t1;
+	}
+
+	{
+	  double head_t, tail_t;
+	  double head_a, tail_a;
+	  double head_b, tail_b;
+	  /* Real part */
+	  head_a = head_tmp1[0];
+	  tail_a = tail_tmp1[0];
+	  head_b = head_tmp2[0];
+	  tail_b = tail_tmp2[0];
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_a + head_b;
+	    bv = s1 - head_a;
+	    s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_a + tail_b;
+	    bv = t1 - tail_a;
+	    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t = t1 + t2;
+	    tail_t = t2 - (head_t - t1);
+	  }
+	  head_tmp3[0] = head_t;
+	  tail_tmp3[0] = tail_t;
+	  /* Imaginary part */
+	  head_a = head_tmp1[1];
+	  tail_a = tail_tmp1[1];
+	  head_b = head_tmp2[1];
+	  tail_b = tail_tmp2[1];
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_a + head_b;
+	    bv = s1 - head_a;
+	    s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_a + tail_b;
+	    bv = t1 - tail_a;
+	    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t = t1 + t2;
+	    tail_t = t2 - (head_t - t1);
+	  }
+	  head_tmp3[1] = head_t;
+	  tail_tmp3[1] = tail_t;
+	}
+	y_elem[0] = y_i[iy];
+	y_elem[1] = y_i[iy + 1];
+	{
+	  /* Compute complex-extra = complex-double * complex-double. */
+	  double head_t1, tail_t1;
+	  double head_t2, tail_t2;
+	  /* Real part */
+	  {
+	    /* Compute double_double = double * double. */
+	    double a1, a2, b1, b2, con;
+
+	    con = beta_i[0] * split;
+	    a1 = con - beta_i[0];
+	    a1 = con - a1;
+	    a2 = beta_i[0] - a1;
+	    con = y_elem[0] * split;
+	    b1 = con - y_elem[0];
+	    b1 = con - b1;
+	    b2 = y_elem[0] - b1;
+
+	    head_t1 = beta_i[0] * y_elem[0];
+	    tail_t1 = (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
+	  }
+	  {
+	    /* Compute double_double = double * double. */
+	    double a1, a2, b1, b2, con;
+
+	    con = beta_i[1] * split;
+	    a1 = con - beta_i[1];
+	    a1 = con - a1;
+	    a2 = beta_i[1] - a1;
+	    con = y_elem[1] * split;
+	    b1 = con - y_elem[1];
+	    b1 = con - b1;
+	    b2 = y_elem[1] - b1;
+
+	    head_t2 = beta_i[1] * y_elem[1];
+	    tail_t2 = (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
+	  }
+	  head_t2 = -head_t2;
+	  tail_t2 = -tail_t2;
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_t1 + head_t2;
+	    bv = s1 - head_t1;
+	    s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_t1 + tail_t2;
+	    bv = t1 - tail_t1;
+	    t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  head_tmp4[0] = head_t1;
+	  tail_tmp4[0] = tail_t1;
+	  /* Imaginary part */
+	  {
+	    /* Compute double_double = double * double. */
+	    double a1, a2, b1, b2, con;
+
+	    con = beta_i[1] * split;
+	    a1 = con - beta_i[1];
+	    a1 = con - a1;
+	    a2 = beta_i[1] - a1;
+	    con = y_elem[0] * split;
+	    b1 = con - y_elem[0];
+	    b1 = con - b1;
+	    b2 = y_elem[0] - b1;
+
+	    head_t1 = beta_i[1] * y_elem[0];
+	    tail_t1 = (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
+	  }
+	  {
+	    /* Compute double_double = double * double. */
+	    double a1, a2, b1, b2, con;
+
+	    con = beta_i[0] * split;
+	    a1 = con - beta_i[0];
+	    a1 = con - a1;
+	    a2 = beta_i[0] - a1;
+	    con = y_elem[1] * split;
+	    b1 = con - y_elem[1];
+	    b1 = con - b1;
+	    b2 = y_elem[1] - b1;
+
+	    head_t2 = beta_i[0] * y_elem[1];
+	    tail_t2 = (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
+	  }
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_t1 + head_t2;
+	    bv = s1 - head_t1;
+	    s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_t1 + tail_t2;
+	    bv = t1 - tail_t1;
+	    t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  head_tmp4[1] = head_t1;
+	  tail_tmp4[1] = tail_t1;
+	}
+	{
+	  double head_a, tail_a;
+	  double head_b, tail_b;
+	  /* Real part */
+	  head_a = head_tmp4[0];
+	  tail_a = tail_tmp4[0];
+	  head_b = head_tmp3[0];
+	  tail_b = tail_tmp3[0];
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_a + head_b;
+	    bv = s1 - head_a;
+	    s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_a + tail_b;
+	    bv = t1 - tail_a;
+	    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    result[0] = t1 + t2;
+	  }
+	  /* Imaginary part */
+	  head_a = head_tmp4[1];
+	  tail_a = tail_tmp4[1];
+	  head_b = head_tmp3[1];
+	  tail_b = tail_tmp3[1];
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_a + head_b;
+	    bv = s1 - head_a;
+	    s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_a + tail_b;
+	    bv = t1 - tail_a;
+	    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    result[1] = t1 + t2;
+	  }
+	}
+	y_i[iy] = result[0];
+	y_i[iy + 1] = result[1];
+
+	iy += incy;
+	if (i >= lbound) {
+	  ix0 += incx;
+	  ai += incai2;
+	  la++;
+	} else {
+	  ai += incai1;
+	}
+	if (i < rbound) {
+	  ra++;
+	}
+      }
+
+      FPU_FIX_STOP;
+    }
+    break;
+  }
+}				/* end BLAS_zgbmv2_c_z_x */
diff --git a/XBLAS/src/gbmv2/BLAS_zgbmv2_d_d-f2c.c b/XBLAS/src/gbmv2/BLAS_zgbmv2_d_d-f2c.c
new file mode 100644
index 00000000..cb517912
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_zgbmv2_d_d-f2c.c
@@ -0,0 +1,18 @@
+
+#include "f2c-bridge.h"
+#include "blas_enum.h"
+void BLAS_zgbmv2_d_d(enum blas_order_type order, enum blas_trans_type trans,
+		     int m, int n, int kl, int ku, const void *alpha,
+		     const double *a, int lda, const double *head_x,
+		     const double *tail_x, int incx, const void *beta,
+		     void *y, int incy);
+
+
+extern void FC_FUNC_(blas_zgbmv2_d_d, BLAS_ZGBMV2_D_D)
+ 
+  (int *trans, int *m, int *n, int *kl, int *ku, const void *alpha,
+   const double *a, int *lda, const double *head_x, const double *tail_x,
+   int *incx, const void *beta, void *y, int *incy) {
+  BLAS_zgbmv2_d_d(blas_colmajor, (enum blas_trans_type) *trans, *m, *n, *kl,
+		  *ku, alpha, a, *lda, head_x, tail_x, *incx, beta, y, *incy);
+}
diff --git a/XBLAS/src/gbmv2/BLAS_zgbmv2_d_d.c b/XBLAS/src/gbmv2/BLAS_zgbmv2_d_d.c
new file mode 100644
index 00000000..6c59b71d
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_zgbmv2_d_d.c
@@ -0,0 +1,270 @@
+#include "blas_extended.h"
+#include "blas_extended_private.h"
+void BLAS_zgbmv2_d_d(enum blas_order_type order, enum blas_trans_type trans,
+		     int m, int n, int kl, int ku, const void *alpha,
+		     const double *a, int lda, const double *head_x,
+		     const double *tail_x, int incx, const void *beta,
+		     void *y, int incy)
+
+/*           
+ * Purpose
+ * =======
+ *
+ * This routines computes the matrix product:
+ *
+ *     y  <-  alpha * op(A) * (x_head + x_tail) + beta * y
+ * 
+ * where 
+ *
+ *  A is a m x n banded matrix
+ *  x is a n x 1 vector
+ *  y is a m x 1 vector
+ *  alpha and beta are scalars 
+ *   
+ * Arguments
+ * =========
+ *
+ * order        (input) blas_order_type
+ *              Order of AB; row or column major
+ *
+ * trans        (input) blas_trans_type
+ *              Transpose of AB; no trans, 
+ *              trans, or conjugate trans
+ *
+ * m            (input) int
+ *              Dimension of AB
+ *
+ * n            (input) int
+ *              Dimension of AB and the length of vector x and z
+ *
+ * kl           (input) int 
+ *              Number of lower diagnols of AB
+ *
+ * ku           (input) int
+ *              Number of upper diagnols of AB
+ *
+ * alpha        (input) const void*
+ *              
+ * AB           (input) double*
+ *
+ * lda          (input) int 
+ *              Leading dimension of AB
+ *              lda >= ku + kl + 1
+ *
+ * head_x
+ * tail_x       (input) double*
+ * 
+ * incx         (input) int
+ *              The stride for vector x.
+ *
+ * beta         (input) const void*
+ *
+ * y            (input) const void*
+ *
+ * incy         (input) int
+ *              The stride for vector y.
+ * 
+ *
+ * LOCAL VARIABLES 
+ * ===============
+ * 
+ *  As an example, these variables are described on the mxn, column 
+ *  major, banded matrix described in section 2.2.3 of the specification  
+ *
+ *  astart      indexes first element in A where computation begins
+ *
+ *  incai1      indexes first element in row where row is less than lbound
+ * 
+ *  incai2      indexes first element in row where row exceeds lbound
+ *   
+ *  lbound      denotes the number of rows before  first element shifts 
+ *
+ *  rbound      denotes the columns where there is blank space
+ *   
+ *  ra          index of the rightmost element for a given row
+ *  
+ *  la          index of leftmost  elements for a given row
+ *
+ *  ra - la     width of a row
+ *
+ *                        rbound 
+ *            la   ra    ____|_____ 
+ *             |    |   |          |
+ *         |  a00  a01   *    *   *
+ * lbound -|  a10  a11  a12   *   *
+ *         |  a20  a21  a22  a23  *
+ *             *   a31  a32  a33 a34
+ *             *    *   a42  a43 a44
+ *
+ *  Varations on order and transpose have been implemented by modifying these
+ *  local variables. 
+ *
+ */
+{
+  static const char routine_name[] = "BLAS_zgbmv2_d_d";
+
+  int iy0, iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny;
+  int incaij, aij, incai1, incai2, astart, ai;
+  double *y_i = (double *) y;
+  const double *a_i = a;
+  const double *head_x_i = head_x;
+  const double *tail_x_i = tail_x;
+  double *alpha_i = (double *) alpha;
+  double *beta_i = (double *) beta;
+  double tmp1[2];
+  double tmp2[2];
+  double tmp3[2];
+  double tmp4[2];
+  double result[2];
+  double sum1;
+  double sum2;
+  double prod;
+  double a_elem;
+  double x_elem;
+  double y_elem[2];
+
+
+  if (order != blas_colmajor && order != blas_rowmajor)
+    BLAS_error(routine_name, -1, order, NULL);
+  if (trans != blas_no_trans &&
+      trans != blas_trans && trans != blas_conj_trans) {
+    BLAS_error(routine_name, -2, trans, NULL);
+  }
+  if (m < 0)
+    BLAS_error(routine_name, -3, m, NULL);
+  if (n < 0)
+    BLAS_error(routine_name, -4, n, NULL);
+  if (kl < 0 || kl >= m)
+    BLAS_error(routine_name, -5, kl, NULL);
+  if (ku < 0 || ku >= n)
+    BLAS_error(routine_name, -6, ku, NULL);
+  if (lda < kl + ku + 1)
+    BLAS_error(routine_name, -9, lda, NULL);
+  if (incx == 0)
+    BLAS_error(routine_name, -12, incx, NULL);
+  if (incy == 0)
+    BLAS_error(routine_name, -15, incy, NULL);
+
+  if (m == 0 || n == 0)
+    return;
+  if ((alpha_i[0] == 0.0 && alpha_i[1] == 0.0)
+      && ((beta_i[0] == 1.0 && beta_i[1] == 0.0)))
+    return;
+
+  if (trans == blas_no_trans) {
+    lenx = n;
+    leny = m;
+  } else {
+    lenx = m;
+    leny = n;
+  }
+
+  ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
+  iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;
+
+
+
+  /* if alpha = 0, return y = y*beta */
+  if ((order == blas_colmajor) && (trans == blas_no_trans)) {
+    astart = ku;
+    incai1 = 1;
+    incai2 = lda;
+    incaij = lda - 1;
+    lbound = kl;
+    rbound = n - ku - 1;
+    ra = ku;
+  } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
+    astart = ku;
+    incai1 = lda - 1;
+    incai2 = lda;
+    incaij = 1;
+    lbound = ku;
+    rbound = m - kl - 1;
+    ra = kl;
+  } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
+    astart = kl;
+    incai1 = lda - 1;
+    incai2 = lda;
+    incaij = 1;
+    lbound = kl;
+    rbound = n - ku - 1;
+    ra = ku;
+  } else {			/* rowmajor and blas_trans */
+    astart = kl;
+    incai1 = 1;
+    incai2 = lda;
+    incaij = lda - 1;
+    lbound = ku;
+    rbound = m - kl - 1;
+    ra = kl;
+  }
+
+  incy *= 2;
+
+
+
+
+  iy0 *= 2;
+
+
+  la = 0;
+  ai = astart;
+  iy = iy0;
+  for (i = 0; i < leny; i++) {
+    sum1 = 0.0;
+    sum2 = 0.0;
+    aij = ai;
+    jx = ix0;
+
+    for (j = ra - la; j >= 0; j--) {
+      x_elem = head_x_i[jx];
+      a_elem = a_i[aij];
+      prod = x_elem * a_elem;
+      sum1 = sum1 + prod;
+      x_elem = tail_x_i[jx];
+      prod = x_elem * a_elem;
+      sum2 = sum2 + prod;
+      aij += incaij;
+      jx += incx;
+    }
+
+
+    {
+      tmp1[0] = alpha_i[0] * sum1;
+      tmp1[1] = alpha_i[1] * sum1;
+    }
+    {
+      tmp2[0] = alpha_i[0] * sum2;
+      tmp2[1] = alpha_i[1] * sum2;
+    }
+    tmp3[0] = tmp1[0] + tmp2[0];
+    tmp3[1] = tmp1[1] + tmp2[1];
+    y_elem[0] = y_i[iy];
+    y_elem[1] = y_i[iy + 1];
+    {
+      tmp4[0] =
+	(double) beta_i[0] * y_elem[0] - (double) beta_i[1] * y_elem[1];
+      tmp4[1] =
+	(double) beta_i[0] * y_elem[1] + (double) beta_i[1] * y_elem[0];
+    }
+    result[0] = tmp4[0] + tmp3[0];
+    result[1] = tmp4[1] + tmp3[1];
+    y_i[iy] = result[0];
+    y_i[iy + 1] = result[1];
+
+    iy += incy;
+    if (i >= lbound) {
+      ix0 += incx;
+      ai += incai2;
+      la++;
+    } else {
+      ai += incai1;
+    }
+    if (i < rbound) {
+      ra++;
+    }
+  }
+
+
+
+}				/* end BLAS_zgbmv2_d_d */
diff --git a/XBLAS/src/gbmv2/BLAS_zgbmv2_d_d_x-f2c.c b/XBLAS/src/gbmv2/BLAS_zgbmv2_d_d_x-f2c.c
new file mode 100644
index 00000000..44031fb6
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_zgbmv2_d_d_x-f2c.c
@@ -0,0 +1,19 @@
+
+#include "f2c-bridge.h"
+#include "blas_enum.h"
+void BLAS_zgbmv2_d_d_x(enum blas_order_type order, enum blas_trans_type trans,
+		       int m, int n, int kl, int ku, const void *alpha,
+		       const double *a, int lda, const double *head_x,
+		       const double *tail_x, int incx, const void *beta,
+		       void *y, int incy, enum blas_prec_type prec);
+
+
+extern void FC_FUNC_(blas_zgbmv2_d_d_x, BLAS_ZGBMV2_D_D_X)
+ 
+  (int *trans, int *m, int *n, int *kl, int *ku, const void *alpha,
+   const double *a, int *lda, const double *head_x, const double *tail_x,
+   int *incx, const void *beta, void *y, int *incy, int *prec) {
+  BLAS_zgbmv2_d_d_x(blas_colmajor, (enum blas_trans_type) *trans, *m, *n, *kl,
+		    *ku, alpha, a, *lda, head_x, tail_x, *incx, beta, y,
+		    *incy, (enum blas_prec_type) *prec);
+}
diff --git a/XBLAS/src/gbmv2/BLAS_zgbmv2_d_d_x.c b/XBLAS/src/gbmv2/BLAS_zgbmv2_d_d_x.c
new file mode 100644
index 00000000..99329c0e
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_zgbmv2_d_d_x.c
@@ -0,0 +1,896 @@
+#include "blas_extended.h"
+#include "blas_extended_private.h"
+void BLAS_zgbmv2_d_d_x(enum blas_order_type order, enum blas_trans_type trans,
+		       int m, int n, int kl, int ku, const void *alpha,
+		       const double *a, int lda, const double *head_x,
+		       const double *tail_x, int incx, const void *beta,
+		       void *y, int incy, enum blas_prec_type prec)
+
+/*           
+ * Purpose
+ * =======
+ *
+ * This routines computes the matrix product:
+ *
+ *     y  <-  alpha * op(A) * (x_head + x_tail) + beta * y
+ * 
+ * where 
+ *
+ *  A is a m x n banded matrix
+ *  x is a n x 1 vector
+ *  y is a m x 1 vector
+ *  alpha and beta are scalars 
+ *   
+ * Arguments
+ * =========
+ *
+ * order        (input) blas_order_type
+ *              Order of AB; row or column major
+ *
+ * trans        (input) blas_trans_type
+ *              Transpose of AB; no trans, 
+ *              trans, or conjugate trans
+ *
+ * m            (input) int
+ *              Dimension of AB
+ *
+ * n            (input) int
+ *              Dimension of AB and the length of vector x and z
+ *
+ * kl           (input) int 
+ *              Number of lower diagnols of AB
+ *
+ * ku           (input) int
+ *              Number of upper diagnols of AB
+ *
+ * alpha        (input) const void*
+ *              
+ * AB           (input) double*
+ *
+ * lda          (input) int 
+ *              Leading dimension of AB
+ *              lda >= ku + kl + 1
+ *
+ * head_x
+ * tail_x       (input) double*
+ * 
+ * incx         (input) int
+ *              The stride for vector x.
+ *
+ * beta         (input) const void*
+ *
+ * y            (input) const void*
+ *
+ * incy         (input) int
+ *              The stride for vector y.
+ * 
+ * prec   (input) enum blas_prec_type
+ *        Specifies the internal precision to be used.
+ *        = blas_prec_single: single precision.
+ *        = blas_prec_double: double precision.
+ *        = blas_prec_extra : anything at least 1.5 times as accurate
+ *                            than double, and wider than 80-bits.
+ *                            We use double-double in our implementation.
+ *
+ *
+ * LOCAL VARIABLES 
+ * ===============
+ * 
+ *  As an example, these variables are described on the mxn, column 
+ *  major, banded matrix described in section 2.2.3 of the specification  
+ *
+ *  astart      indexes first element in A where computation begins
+ *
+ *  incai1      indexes first element in row where row is less than lbound
+ * 
+ *  incai2      indexes first element in row where row exceeds lbound
+ *   
+ *  lbound      denotes the number of rows before  first element shifts 
+ *
+ *  rbound      denotes the columns where there is blank space
+ *   
+ *  ra          index of the rightmost element for a given row
+ *  
+ *  la          index of leftmost  elements for a given row
+ *
+ *  ra - la     width of a row
+ *
+ *                        rbound 
+ *            la   ra    ____|_____ 
+ *             |    |   |          |
+ *         |  a00  a01   *    *   *
+ * lbound -|  a10  a11  a12   *   *
+ *         |  a20  a21  a22  a23  *
+ *             *   a31  a32  a33 a34
+ *             *    *   a42  a43 a44
+ *
+ *  Varations on order and transpose have been implemented by modifying these
+ *  local variables. 
+ *
+ */
+{
+  static const char routine_name[] = "BLAS_zgbmv2_d_d_x";
+
+  switch (prec) {
+  case blas_prec_single:
+  case blas_prec_double:
+  case blas_prec_indigenous:
+    {
+      int iy0, iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny;
+      int incaij, aij, incai1, incai2, astart, ai;
+      double *y_i = (double *) y;
+      const double *a_i = a;
+      const double *head_x_i = head_x;
+      const double *tail_x_i = tail_x;
+      double *alpha_i = (double *) alpha;
+      double *beta_i = (double *) beta;
+      double tmp1[2];
+      double tmp2[2];
+      double tmp3[2];
+      double tmp4[2];
+      double result[2];
+      double sum1;
+      double sum2;
+      double prod;
+      double a_elem;
+      double x_elem;
+      double y_elem[2];
+
+
+      if (order != blas_colmajor && order != blas_rowmajor)
+	BLAS_error(routine_name, -1, order, NULL);
+      if (trans != blas_no_trans &&
+	  trans != blas_trans && trans != blas_conj_trans) {
+	BLAS_error(routine_name, -2, trans, NULL);
+      }
+      if (m < 0)
+	BLAS_error(routine_name, -3, m, NULL);
+      if (n < 0)
+	BLAS_error(routine_name, -4, n, NULL);
+      if (kl < 0 || kl >= m)
+	BLAS_error(routine_name, -5, kl, NULL);
+      if (ku < 0 || ku >= n)
+	BLAS_error(routine_name, -6, ku, NULL);
+      if (lda < kl + ku + 1)
+	BLAS_error(routine_name, -9, lda, NULL);
+      if (incx == 0)
+	BLAS_error(routine_name, -12, incx, NULL);
+      if (incy == 0)
+	BLAS_error(routine_name, -15, incy, NULL);
+
+      if (m == 0 || n == 0)
+	return;
+      if ((alpha_i[0] == 0.0 && alpha_i[1] == 0.0)
+	  && ((beta_i[0] == 1.0 && beta_i[1] == 0.0)))
+	return;
+
+      if (trans == blas_no_trans) {
+	lenx = n;
+	leny = m;
+      } else {
+	lenx = m;
+	leny = n;
+      }
+
+      ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
+      iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;
+
+
+
+      /* if alpha = 0, return y = y*beta */
+      if ((order == blas_colmajor) && (trans == blas_no_trans)) {
+	astart = ku;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
+	astart = ku;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
+	astart = kl;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else {			/* rowmajor and blas_trans */
+	astart = kl;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      }
+
+      incy *= 2;
+
+
+
+
+      iy0 *= 2;
+
+
+      la = 0;
+      ai = astart;
+      iy = iy0;
+      for (i = 0; i < leny; i++) {
+	sum1 = 0.0;
+	sum2 = 0.0;
+	aij = ai;
+	jx = ix0;
+
+	for (j = ra - la; j >= 0; j--) {
+	  x_elem = head_x_i[jx];
+	  a_elem = a_i[aij];
+	  prod = x_elem * a_elem;
+	  sum1 = sum1 + prod;
+	  x_elem = tail_x_i[jx];
+	  prod = x_elem * a_elem;
+	  sum2 = sum2 + prod;
+	  aij += incaij;
+	  jx += incx;
+	}
+
+
+	{
+	  tmp1[0] = alpha_i[0] * sum1;
+	  tmp1[1] = alpha_i[1] * sum1;
+	}
+	{
+	  tmp2[0] = alpha_i[0] * sum2;
+	  tmp2[1] = alpha_i[1] * sum2;
+	}
+	tmp3[0] = tmp1[0] + tmp2[0];
+	tmp3[1] = tmp1[1] + tmp2[1];
+	y_elem[0] = y_i[iy];
+	y_elem[1] = y_i[iy + 1];
+	{
+	  tmp4[0] =
+	    (double) beta_i[0] * y_elem[0] - (double) beta_i[1] * y_elem[1];
+	  tmp4[1] =
+	    (double) beta_i[0] * y_elem[1] + (double) beta_i[1] * y_elem[0];
+	}
+	result[0] = tmp4[0] + tmp3[0];
+	result[1] = tmp4[1] + tmp3[1];
+	y_i[iy] = result[0];
+	y_i[iy + 1] = result[1];
+
+	iy += incy;
+	if (i >= lbound) {
+	  ix0 += incx;
+	  ai += incai2;
+	  la++;
+	} else {
+	  ai += incai1;
+	}
+	if (i < rbound) {
+	  ra++;
+	}
+      }
+
+
+    }
+    break;
+  case blas_prec_extra:
+    {
+      int iy0, iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny;
+      int incaij, aij, incai1, incai2, astart, ai;
+      double *y_i = (double *) y;
+      const double *a_i = a;
+      const double *head_x_i = head_x;
+      const double *tail_x_i = tail_x;
+      double *alpha_i = (double *) alpha;
+      double *beta_i = (double *) beta;
+      double head_tmp1[2], tail_tmp1[2];
+      double head_tmp2[2], tail_tmp2[2];
+      double head_tmp3[2], tail_tmp3[2];
+      double head_tmp4[2], tail_tmp4[2];
+      double result[2];
+      double head_sum1, tail_sum1;
+      double head_sum2, tail_sum2;
+      double head_prod, tail_prod;
+      double a_elem;
+      double x_elem;
+      double y_elem[2];
+      FPU_FIX_DECL;
+
+      if (order != blas_colmajor && order != blas_rowmajor)
+	BLAS_error(routine_name, -1, order, NULL);
+      if (trans != blas_no_trans &&
+	  trans != blas_trans && trans != blas_conj_trans) {
+	BLAS_error(routine_name, -2, trans, NULL);
+      }
+      if (m < 0)
+	BLAS_error(routine_name, -3, m, NULL);
+      if (n < 0)
+	BLAS_error(routine_name, -4, n, NULL);
+      if (kl < 0 || kl >= m)
+	BLAS_error(routine_name, -5, kl, NULL);
+      if (ku < 0 || ku >= n)
+	BLAS_error(routine_name, -6, ku, NULL);
+      if (lda < kl + ku + 1)
+	BLAS_error(routine_name, -9, lda, NULL);
+      if (incx == 0)
+	BLAS_error(routine_name, -12, incx, NULL);
+      if (incy == 0)
+	BLAS_error(routine_name, -15, incy, NULL);
+
+      if (m == 0 || n == 0)
+	return;
+      if ((alpha_i[0] == 0.0 && alpha_i[1] == 0.0)
+	  && ((beta_i[0] == 1.0 && beta_i[1] == 0.0)))
+	return;
+
+      if (trans == blas_no_trans) {
+	lenx = n;
+	leny = m;
+      } else {
+	lenx = m;
+	leny = n;
+      }
+
+      ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
+      iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;
+
+      FPU_FIX_START;
+
+      /* if alpha = 0, return y = y*beta */
+      if ((order == blas_colmajor) && (trans == blas_no_trans)) {
+	astart = ku;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
+	astart = ku;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
+	astart = kl;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else {			/* rowmajor and blas_trans */
+	astart = kl;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      }
+
+      incy *= 2;
+
+
+
+
+      iy0 *= 2;
+
+
+      la = 0;
+      ai = astart;
+      iy = iy0;
+      for (i = 0; i < leny; i++) {
+	head_sum1 = tail_sum1 = 0.0;
+	head_sum2 = tail_sum2 = 0.0;
+	aij = ai;
+	jx = ix0;
+
+	for (j = ra - la; j >= 0; j--) {
+	  x_elem = head_x_i[jx];
+	  a_elem = a_i[aij];
+	  {
+	    /* Compute double_double = double * double. */
+	    double a1, a2, b1, b2, con;
+
+	    con = x_elem * split;
+	    a1 = con - x_elem;
+	    a1 = con - a1;
+	    a2 = x_elem - a1;
+	    con = a_elem * split;
+	    b1 = con - a_elem;
+	    b1 = con - b1;
+	    b2 = a_elem - b1;
+
+	    head_prod = x_elem * a_elem;
+	    tail_prod =
+	      (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2;
+	  }
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_sum1 + head_prod;
+	    bv = s1 - head_sum1;
+	    s2 = ((head_prod - bv) + (head_sum1 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_sum1 + tail_prod;
+	    bv = t1 - tail_sum1;
+	    t2 = ((tail_prod - bv) + (tail_sum1 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_sum1 = t1 + t2;
+	    tail_sum1 = t2 - (head_sum1 - t1);
+	  }
+	  x_elem = tail_x_i[jx];
+	  {
+	    /* Compute double_double = double * double. */
+	    double a1, a2, b1, b2, con;
+
+	    con = x_elem * split;
+	    a1 = con - x_elem;
+	    a1 = con - a1;
+	    a2 = x_elem - a1;
+	    con = a_elem * split;
+	    b1 = con - a_elem;
+	    b1 = con - b1;
+	    b2 = a_elem - b1;
+
+	    head_prod = x_elem * a_elem;
+	    tail_prod =
+	      (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2;
+	  }
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_sum2 + head_prod;
+	    bv = s1 - head_sum2;
+	    s2 = ((head_prod - bv) + (head_sum2 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_sum2 + tail_prod;
+	    bv = t1 - tail_sum2;
+	    t2 = ((tail_prod - bv) + (tail_sum2 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_sum2 = t1 + t2;
+	    tail_sum2 = t2 - (head_sum2 - t1);
+	  }
+	  aij += incaij;
+	  jx += incx;
+	}
+
+
+	{
+	  /* Compute complex-extra = complex-double * real. */
+	  double head_t, tail_t;
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_sum1 * split;
+	    a11 = con - head_sum1;
+	    a11 = con - a11;
+	    a21 = head_sum1 - a11;
+	    con = alpha_i[0] * split;
+	    b1 = con - alpha_i[0];
+	    b1 = con - b1;
+	    b2 = alpha_i[0] - b1;
+
+	    c11 = head_sum1 * alpha_i[0];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_sum1 * alpha_i[0];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t = t1 + t2;
+	    tail_t = t2 - (head_t - t1);
+	  }
+	  head_tmp1[0] = head_t;
+	  tail_tmp1[0] = tail_t;
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_sum1 * split;
+	    a11 = con - head_sum1;
+	    a11 = con - a11;
+	    a21 = head_sum1 - a11;
+	    con = alpha_i[1] * split;
+	    b1 = con - alpha_i[1];
+	    b1 = con - b1;
+	    b2 = alpha_i[1] - b1;
+
+	    c11 = head_sum1 * alpha_i[1];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_sum1 * alpha_i[1];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t = t1 + t2;
+	    tail_t = t2 - (head_t - t1);
+	  }
+	  head_tmp1[1] = head_t;
+	  tail_tmp1[1] = tail_t;
+	}
+	{
+	  /* Compute complex-extra = complex-double * real. */
+	  double head_t, tail_t;
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_sum2 * split;
+	    a11 = con - head_sum2;
+	    a11 = con - a11;
+	    a21 = head_sum2 - a11;
+	    con = alpha_i[0] * split;
+	    b1 = con - alpha_i[0];
+	    b1 = con - b1;
+	    b2 = alpha_i[0] - b1;
+
+	    c11 = head_sum2 * alpha_i[0];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_sum2 * alpha_i[0];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t = t1 + t2;
+	    tail_t = t2 - (head_t - t1);
+	  }
+	  head_tmp2[0] = head_t;
+	  tail_tmp2[0] = tail_t;
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_sum2 * split;
+	    a11 = con - head_sum2;
+	    a11 = con - a11;
+	    a21 = head_sum2 - a11;
+	    con = alpha_i[1] * split;
+	    b1 = con - alpha_i[1];
+	    b1 = con - b1;
+	    b2 = alpha_i[1] - b1;
+
+	    c11 = head_sum2 * alpha_i[1];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_sum2 * alpha_i[1];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t = t1 + t2;
+	    tail_t = t2 - (head_t - t1);
+	  }
+	  head_tmp2[1] = head_t;
+	  tail_tmp2[1] = tail_t;
+	}
+	{
+	  double head_t, tail_t;
+	  double head_a, tail_a;
+	  double head_b, tail_b;
+	  /* Real part */
+	  head_a = head_tmp1[0];
+	  tail_a = tail_tmp1[0];
+	  head_b = head_tmp2[0];
+	  tail_b = tail_tmp2[0];
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_a + head_b;
+	    bv = s1 - head_a;
+	    s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_a + tail_b;
+	    bv = t1 - tail_a;
+	    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t = t1 + t2;
+	    tail_t = t2 - (head_t - t1);
+	  }
+	  head_tmp3[0] = head_t;
+	  tail_tmp3[0] = tail_t;
+	  /* Imaginary part */
+	  head_a = head_tmp1[1];
+	  tail_a = tail_tmp1[1];
+	  head_b = head_tmp2[1];
+	  tail_b = tail_tmp2[1];
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_a + head_b;
+	    bv = s1 - head_a;
+	    s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_a + tail_b;
+	    bv = t1 - tail_a;
+	    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t = t1 + t2;
+	    tail_t = t2 - (head_t - t1);
+	  }
+	  head_tmp3[1] = head_t;
+	  tail_tmp3[1] = tail_t;
+	}
+	y_elem[0] = y_i[iy];
+	y_elem[1] = y_i[iy + 1];
+	{
+	  /* Compute complex-extra = complex-double * complex-double. */
+	  double head_t1, tail_t1;
+	  double head_t2, tail_t2;
+	  /* Real part */
+	  {
+	    /* Compute double_double = double * double. */
+	    double a1, a2, b1, b2, con;
+
+	    con = beta_i[0] * split;
+	    a1 = con - beta_i[0];
+	    a1 = con - a1;
+	    a2 = beta_i[0] - a1;
+	    con = y_elem[0] * split;
+	    b1 = con - y_elem[0];
+	    b1 = con - b1;
+	    b2 = y_elem[0] - b1;
+
+	    head_t1 = beta_i[0] * y_elem[0];
+	    tail_t1 = (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
+	  }
+	  {
+	    /* Compute double_double = double * double. */
+	    double a1, a2, b1, b2, con;
+
+	    con = beta_i[1] * split;
+	    a1 = con - beta_i[1];
+	    a1 = con - a1;
+	    a2 = beta_i[1] - a1;
+	    con = y_elem[1] * split;
+	    b1 = con - y_elem[1];
+	    b1 = con - b1;
+	    b2 = y_elem[1] - b1;
+
+	    head_t2 = beta_i[1] * y_elem[1];
+	    tail_t2 = (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
+	  }
+	  head_t2 = -head_t2;
+	  tail_t2 = -tail_t2;
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_t1 + head_t2;
+	    bv = s1 - head_t1;
+	    s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_t1 + tail_t2;
+	    bv = t1 - tail_t1;
+	    t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  head_tmp4[0] = head_t1;
+	  tail_tmp4[0] = tail_t1;
+	  /* Imaginary part */
+	  {
+	    /* Compute double_double = double * double. */
+	    double a1, a2, b1, b2, con;
+
+	    con = beta_i[1] * split;
+	    a1 = con - beta_i[1];
+	    a1 = con - a1;
+	    a2 = beta_i[1] - a1;
+	    con = y_elem[0] * split;
+	    b1 = con - y_elem[0];
+	    b1 = con - b1;
+	    b2 = y_elem[0] - b1;
+
+	    head_t1 = beta_i[1] * y_elem[0];
+	    tail_t1 = (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
+	  }
+	  {
+	    /* Compute double_double = double * double. */
+	    double a1, a2, b1, b2, con;
+
+	    con = beta_i[0] * split;
+	    a1 = con - beta_i[0];
+	    a1 = con - a1;
+	    a2 = beta_i[0] - a1;
+	    con = y_elem[1] * split;
+	    b1 = con - y_elem[1];
+	    b1 = con - b1;
+	    b2 = y_elem[1] - b1;
+
+	    head_t2 = beta_i[0] * y_elem[1];
+	    tail_t2 = (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
+	  }
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_t1 + head_t2;
+	    bv = s1 - head_t1;
+	    s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_t1 + tail_t2;
+	    bv = t1 - tail_t1;
+	    t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  head_tmp4[1] = head_t1;
+	  tail_tmp4[1] = tail_t1;
+	}
+	{
+	  double head_a, tail_a;
+	  double head_b, tail_b;
+	  /* Real part */
+	  head_a = head_tmp4[0];
+	  tail_a = tail_tmp4[0];
+	  head_b = head_tmp3[0];
+	  tail_b = tail_tmp3[0];
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_a + head_b;
+	    bv = s1 - head_a;
+	    s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_a + tail_b;
+	    bv = t1 - tail_a;
+	    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    result[0] = t1 + t2;
+	  }
+	  /* Imaginary part */
+	  head_a = head_tmp4[1];
+	  tail_a = tail_tmp4[1];
+	  head_b = head_tmp3[1];
+	  tail_b = tail_tmp3[1];
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_a + head_b;
+	    bv = s1 - head_a;
+	    s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_a + tail_b;
+	    bv = t1 - tail_a;
+	    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    result[1] = t1 + t2;
+	  }
+	}
+	y_i[iy] = result[0];
+	y_i[iy + 1] = result[1];
+
+	iy += incy;
+	if (i >= lbound) {
+	  ix0 += incx;
+	  ai += incai2;
+	  la++;
+	} else {
+	  ai += incai1;
+	}
+	if (i < rbound) {
+	  ra++;
+	}
+      }
+
+      FPU_FIX_STOP;
+    }
+    break;
+  }
+}				/* end BLAS_zgbmv2_d_d_x */
diff --git a/XBLAS/src/gbmv2/BLAS_zgbmv2_d_z-f2c.c b/XBLAS/src/gbmv2/BLAS_zgbmv2_d_z-f2c.c
new file mode 100644
index 00000000..b034cc27
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_zgbmv2_d_z-f2c.c
@@ -0,0 +1,18 @@
+
+#include "f2c-bridge.h"
+#include "blas_enum.h"
+void BLAS_zgbmv2_d_z(enum blas_order_type order, enum blas_trans_type trans,
+		     int m, int n, int kl, int ku, const void *alpha,
+		     const double *a, int lda, const void *head_x,
+		     const void *tail_x, int incx, const void *beta,
+		     void *y, int incy);
+
+
+extern void FC_FUNC_(blas_zgbmv2_d_z, BLAS_ZGBMV2_D_Z)
+ 
+  (int *trans, int *m, int *n, int *kl, int *ku, const void *alpha,
+   const double *a, int *lda, const void *head_x, const void *tail_x,
+   int *incx, const void *beta, void *y, int *incy) {
+  BLAS_zgbmv2_d_z(blas_colmajor, (enum blas_trans_type) *trans, *m, *n, *kl,
+		  *ku, alpha, a, *lda, head_x, tail_x, *incx, beta, y, *incy);
+}
diff --git a/XBLAS/src/gbmv2/BLAS_zgbmv2_d_z.c b/XBLAS/src/gbmv2/BLAS_zgbmv2_d_z.c
new file mode 100644
index 00000000..6066a47f
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_zgbmv2_d_z.c
@@ -0,0 +1,280 @@
+#include "blas_extended.h"
+#include "blas_extended_private.h"
+void BLAS_zgbmv2_d_z(enum blas_order_type order, enum blas_trans_type trans,
+		     int m, int n, int kl, int ku, const void *alpha,
+		     const double *a, int lda, const void *head_x,
+		     const void *tail_x, int incx, const void *beta,
+		     void *y, int incy)
+
+/*           
+ * Purpose
+ * =======
+ *
+ * This routines computes the matrix product:
+ *
+ *     y  <-  alpha * op(A) * (x_head + x_tail) + beta * y
+ * 
+ * where 
+ *
+ *  A is a m x n banded matrix
+ *  x is a n x 1 vector
+ *  y is a m x 1 vector
+ *  alpha and beta are scalars 
+ *   
+ * Arguments
+ * =========
+ *
+ * order        (input) blas_order_type
+ *              Order of AB; row or column major
+ *
+ * trans        (input) blas_trans_type
+ *              Transpose of AB; no trans, 
+ *              trans, or conjugate trans
+ *
+ * m            (input) int
+ *              Dimension of AB
+ *
+ * n            (input) int
+ *              Dimension of AB and the length of vector x and z
+ *
+ * kl           (input) int 
+ *              Number of lower diagnols of AB
+ *
+ * ku           (input) int
+ *              Number of upper diagnols of AB
+ *
+ * alpha        (input) const void*
+ *              
+ * AB           (input) double*
+ *
+ * lda          (input) int 
+ *              Leading dimension of AB
+ *              lda >= ku + kl + 1
+ *
+ * head_x
+ * tail_x       (input) void*
+ * 
+ * incx         (input) int
+ *              The stride for vector x.
+ *
+ * beta         (input) const void*
+ *
+ * y            (input) const void*
+ *
+ * incy         (input) int
+ *              The stride for vector y.
+ * 
+ *
+ * LOCAL VARIABLES 
+ * ===============
+ * 
+ *  As an example, these variables are described on the mxn, column 
+ *  major, banded matrix described in section 2.2.3 of the specification  
+ *
+ *  astart      indexes first element in A where computation begins
+ *
+ *  incai1      indexes first element in row where row is less than lbound
+ * 
+ *  incai2      indexes first element in row where row exceeds lbound
+ *   
+ *  lbound      denotes the number of rows before  first element shifts 
+ *
+ *  rbound      denotes the columns where there is blank space
+ *   
+ *  ra          index of the rightmost element for a given row
+ *  
+ *  la          index of leftmost  elements for a given row
+ *
+ *  ra - la     width of a row
+ *
+ *                        rbound 
+ *            la   ra    ____|_____ 
+ *             |    |   |          |
+ *         |  a00  a01   *    *   *
+ * lbound -|  a10  a11  a12   *   *
+ *         |  a20  a21  a22  a23  *
+ *             *   a31  a32  a33 a34
+ *             *    *   a42  a43 a44
+ *
+ *  Varations on order and transpose have been implemented by modifying these
+ *  local variables. 
+ *
+ */
+{
+  static const char routine_name[] = "BLAS_zgbmv2_d_z";
+
+  int iy0, iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny;
+  int incaij, aij, incai1, incai2, astart, ai;
+  double *y_i = (double *) y;
+  const double *a_i = a;
+  const double *head_x_i = (double *) head_x;
+  const double *tail_x_i = (double *) tail_x;
+  double *alpha_i = (double *) alpha;
+  double *beta_i = (double *) beta;
+  double tmp1[2];
+  double tmp2[2];
+  double tmp3[2];
+  double tmp4[2];
+  double result[2];
+  double sum1[2];
+  double sum2[2];
+  double prod[2];
+  double a_elem;
+  double x_elem[2];
+  double y_elem[2];
+
+
+  if (order != blas_colmajor && order != blas_rowmajor)
+    BLAS_error(routine_name, -1, order, NULL);
+  if (trans != blas_no_trans &&
+      trans != blas_trans && trans != blas_conj_trans) {
+    BLAS_error(routine_name, -2, trans, NULL);
+  }
+  if (m < 0)
+    BLAS_error(routine_name, -3, m, NULL);
+  if (n < 0)
+    BLAS_error(routine_name, -4, n, NULL);
+  if (kl < 0 || kl >= m)
+    BLAS_error(routine_name, -5, kl, NULL);
+  if (ku < 0 || ku >= n)
+    BLAS_error(routine_name, -6, ku, NULL);
+  if (lda < kl + ku + 1)
+    BLAS_error(routine_name, -9, lda, NULL);
+  if (incx == 0)
+    BLAS_error(routine_name, -12, incx, NULL);
+  if (incy == 0)
+    BLAS_error(routine_name, -15, incy, NULL);
+
+  if (m == 0 || n == 0)
+    return;
+  if ((alpha_i[0] == 0.0 && alpha_i[1] == 0.0)
+      && ((beta_i[0] == 1.0 && beta_i[1] == 0.0)))
+    return;
+
+  if (trans == blas_no_trans) {
+    lenx = n;
+    leny = m;
+  } else {
+    lenx = m;
+    leny = n;
+  }
+
+  ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
+  iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;
+
+
+
+  /* if alpha = 0, return y = y*beta */
+  if ((order == blas_colmajor) && (trans == blas_no_trans)) {
+    astart = ku;
+    incai1 = 1;
+    incai2 = lda;
+    incaij = lda - 1;
+    lbound = kl;
+    rbound = n - ku - 1;
+    ra = ku;
+  } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
+    astart = ku;
+    incai1 = lda - 1;
+    incai2 = lda;
+    incaij = 1;
+    lbound = ku;
+    rbound = m - kl - 1;
+    ra = kl;
+  } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
+    astart = kl;
+    incai1 = lda - 1;
+    incai2 = lda;
+    incaij = 1;
+    lbound = kl;
+    rbound = n - ku - 1;
+    ra = ku;
+  } else {			/* rowmajor and blas_trans */
+    astart = kl;
+    incai1 = 1;
+    incai2 = lda;
+    incaij = lda - 1;
+    lbound = ku;
+    rbound = m - kl - 1;
+    ra = kl;
+  }
+  incx *= 2;
+  incy *= 2;
+
+
+
+
+  iy0 *= 2;
+  ix0 *= 2;
+
+  la = 0;
+  ai = astart;
+  iy = iy0;
+  for (i = 0; i < leny; i++) {
+    sum1[0] = sum1[1] = 0.0;
+    sum2[0] = sum2[1] = 0.0;
+    aij = ai;
+    jx = ix0;
+
+    for (j = ra - la; j >= 0; j--) {
+      x_elem[0] = head_x_i[jx];
+      x_elem[1] = head_x_i[jx + 1];
+      a_elem = a_i[aij];
+      {
+	prod[0] = x_elem[0] * a_elem;
+	prod[1] = x_elem[1] * a_elem;
+      }
+      sum1[0] = sum1[0] + prod[0];
+      sum1[1] = sum1[1] + prod[1];
+      x_elem[0] = tail_x_i[jx];
+      x_elem[1] = tail_x_i[jx + 1];
+      {
+	prod[0] = x_elem[0] * a_elem;
+	prod[1] = x_elem[1] * a_elem;
+      }
+      sum2[0] = sum2[0] + prod[0];
+      sum2[1] = sum2[1] + prod[1];
+      aij += incaij;
+      jx += incx;
+    }
+
+
+    {
+      tmp1[0] = (double) sum1[0] * alpha_i[0] - (double) sum1[1] * alpha_i[1];
+      tmp1[1] = (double) sum1[0] * alpha_i[1] + (double) sum1[1] * alpha_i[0];
+    }
+    {
+      tmp2[0] = (double) sum2[0] * alpha_i[0] - (double) sum2[1] * alpha_i[1];
+      tmp2[1] = (double) sum2[0] * alpha_i[1] + (double) sum2[1] * alpha_i[0];
+    }
+    tmp3[0] = tmp1[0] + tmp2[0];
+    tmp3[1] = tmp1[1] + tmp2[1];
+    y_elem[0] = y_i[iy];
+    y_elem[1] = y_i[iy + 1];
+    {
+      tmp4[0] =
+	(double) beta_i[0] * y_elem[0] - (double) beta_i[1] * y_elem[1];
+      tmp4[1] =
+	(double) beta_i[0] * y_elem[1] + (double) beta_i[1] * y_elem[0];
+    }
+    result[0] = tmp4[0] + tmp3[0];
+    result[1] = tmp4[1] + tmp3[1];
+    y_i[iy] = result[0];
+    y_i[iy + 1] = result[1];
+
+    iy += incy;
+    if (i >= lbound) {
+      ix0 += incx;
+      ai += incai2;
+      la++;
+    } else {
+      ai += incai1;
+    }
+    if (i < rbound) {
+      ra++;
+    }
+  }
+
+
+
+}				/* end BLAS_zgbmv2_d_z */
diff --git a/XBLAS/src/gbmv2/BLAS_zgbmv2_d_z_x-f2c.c b/XBLAS/src/gbmv2/BLAS_zgbmv2_d_z_x-f2c.c
new file mode 100644
index 00000000..f9bb4d1c
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_zgbmv2_d_z_x-f2c.c
@@ -0,0 +1,19 @@
+
+#include "f2c-bridge.h"
+#include "blas_enum.h"
+void BLAS_zgbmv2_d_z_x(enum blas_order_type order, enum blas_trans_type trans,
+		       int m, int n, int kl, int ku, const void *alpha,
+		       const double *a, int lda, const void *head_x,
+		       const void *tail_x, int incx, const void *beta,
+		       void *y, int incy, enum blas_prec_type prec);
+
+
+extern void FC_FUNC_(blas_zgbmv2_d_z_x, BLAS_ZGBMV2_D_Z_X)
+ 
+  (int *trans, int *m, int *n, int *kl, int *ku, const void *alpha,
+   const double *a, int *lda, const void *head_x, const void *tail_x,
+   int *incx, const void *beta, void *y, int *incy, int *prec) {
+  BLAS_zgbmv2_d_z_x(blas_colmajor, (enum blas_trans_type) *trans, *m, *n, *kl,
+		    *ku, alpha, a, *lda, head_x, tail_x, *incx, beta, y,
+		    *incy, (enum blas_prec_type) *prec);
+}
diff --git a/XBLAS/src/gbmv2/BLAS_zgbmv2_d_z_x.c b/XBLAS/src/gbmv2/BLAS_zgbmv2_d_z_x.c
new file mode 100644
index 00000000..996fa4dc
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_zgbmv2_d_z_x.c
@@ -0,0 +1,1274 @@
+#include "blas_extended.h"
+#include "blas_extended_private.h"
+void BLAS_zgbmv2_d_z_x(enum blas_order_type order, enum blas_trans_type trans,
+		       int m, int n, int kl, int ku, const void *alpha,
+		       const double *a, int lda, const void *head_x,
+		       const void *tail_x, int incx, const void *beta,
+		       void *y, int incy, enum blas_prec_type prec)
+
+/*           
+ * Purpose
+ * =======
+ *
+ * This routines computes the matrix product:
+ *
+ *     y  <-  alpha * op(A) * (x_head + x_tail) + beta * y
+ * 
+ * where 
+ *
+ *  A is a m x n banded matrix
+ *  x is a n x 1 vector
+ *  y is a m x 1 vector
+ *  alpha and beta are scalars 
+ *   
+ * Arguments
+ * =========
+ *
+ * order        (input) blas_order_type
+ *              Order of AB; row or column major
+ *
+ * trans        (input) blas_trans_type
+ *              Transpose of AB; no trans, 
+ *              trans, or conjugate trans
+ *
+ * m            (input) int
+ *              Dimension of AB
+ *
+ * n            (input) int
+ *              Dimension of AB and the length of vector x and z
+ *
+ * kl           (input) int 
+ *              Number of lower diagnols of AB
+ *
+ * ku           (input) int
+ *              Number of upper diagnols of AB
+ *
+ * alpha        (input) const void*
+ *              
+ * AB           (input) double*
+ *
+ * lda          (input) int 
+ *              Leading dimension of AB
+ *              lda >= ku + kl + 1
+ *
+ * head_x
+ * tail_x       (input) void*
+ * 
+ * incx         (input) int
+ *              The stride for vector x.
+ *
+ * beta         (input) const void*
+ *
+ * y            (input) const void*
+ *
+ * incy         (input) int
+ *              The stride for vector y.
+ * 
+ * prec   (input) enum blas_prec_type
+ *        Specifies the internal precision to be used.
+ *        = blas_prec_single: single precision.
+ *        = blas_prec_double: double precision.
+ *        = blas_prec_extra : anything at least 1.5 times as accurate
+ *                            than double, and wider than 80-bits.
+ *                            We use double-double in our implementation.
+ *
+ *
+ * LOCAL VARIABLES 
+ * ===============
+ * 
+ *  As an example, these variables are described on the mxn, column 
+ *  major, banded matrix described in section 2.2.3 of the specification  
+ *
+ *  astart      indexes first element in A where computation begins
+ *
+ *  incai1      indexes first element in row where row is less than lbound
+ * 
+ *  incai2      indexes first element in row where row exceeds lbound
+ *   
+ *  lbound      denotes the number of rows before  first element shifts 
+ *
+ *  rbound      denotes the columns where there is blank space
+ *   
+ *  ra          index of the rightmost element for a given row
+ *  
+ *  la          index of leftmost  elements for a given row
+ *
+ *  ra - la     width of a row
+ *
+ *                        rbound 
+ *            la   ra    ____|_____ 
+ *             |    |   |          |
+ *         |  a00  a01   *    *   *
+ * lbound -|  a10  a11  a12   *   *
+ *         |  a20  a21  a22  a23  *
+ *             *   a31  a32  a33 a34
+ *             *    *   a42  a43 a44
+ *
+ *  Varations on order and transpose have been implemented by modifying these
+ *  local variables. 
+ *
+ */
+{
+  static const char routine_name[] = "BLAS_zgbmv2_d_z_x";
+
+  switch (prec) {
+  case blas_prec_single:
+  case blas_prec_double:
+  case blas_prec_indigenous:
+    {
+      int iy0, iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny;
+      int incaij, aij, incai1, incai2, astart, ai;
+      double *y_i = (double *) y;
+      const double *a_i = a;
+      const double *head_x_i = (double *) head_x;
+      const double *tail_x_i = (double *) tail_x;
+      double *alpha_i = (double *) alpha;
+      double *beta_i = (double *) beta;
+      double tmp1[2];
+      double tmp2[2];
+      double tmp3[2];
+      double tmp4[2];
+      double result[2];
+      double sum1[2];
+      double sum2[2];
+      double prod[2];
+      double a_elem;
+      double x_elem[2];
+      double y_elem[2];
+
+
+      if (order != blas_colmajor && order != blas_rowmajor)
+	BLAS_error(routine_name, -1, order, NULL);
+      if (trans != blas_no_trans &&
+	  trans != blas_trans && trans != blas_conj_trans) {
+	BLAS_error(routine_name, -2, trans, NULL);
+      }
+      if (m < 0)
+	BLAS_error(routine_name, -3, m, NULL);
+      if (n < 0)
+	BLAS_error(routine_name, -4, n, NULL);
+      if (kl < 0 || kl >= m)
+	BLAS_error(routine_name, -5, kl, NULL);
+      if (ku < 0 || ku >= n)
+	BLAS_error(routine_name, -6, ku, NULL);
+      if (lda < kl + ku + 1)
+	BLAS_error(routine_name, -9, lda, NULL);
+      if (incx == 0)
+	BLAS_error(routine_name, -12, incx, NULL);
+      if (incy == 0)
+	BLAS_error(routine_name, -15, incy, NULL);
+
+      if (m == 0 || n == 0)
+	return;
+      if ((alpha_i[0] == 0.0 && alpha_i[1] == 0.0)
+	  && ((beta_i[0] == 1.0 && beta_i[1] == 0.0)))
+	return;
+
+      if (trans == blas_no_trans) {
+	lenx = n;
+	leny = m;
+      } else {
+	lenx = m;
+	leny = n;
+      }
+
+      ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
+      iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;
+
+
+
+      /* if alpha = 0, return y = y*beta */
+      if ((order == blas_colmajor) && (trans == blas_no_trans)) {
+	astart = ku;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
+	astart = ku;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
+	astart = kl;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else {			/* rowmajor and blas_trans */
+	astart = kl;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      }
+      incx *= 2;
+      incy *= 2;
+
+
+
+
+      iy0 *= 2;
+      ix0 *= 2;
+
+      la = 0;
+      ai = astart;
+      iy = iy0;
+      for (i = 0; i < leny; i++) {
+	sum1[0] = sum1[1] = 0.0;
+	sum2[0] = sum2[1] = 0.0;
+	aij = ai;
+	jx = ix0;
+
+	for (j = ra - la; j >= 0; j--) {
+	  x_elem[0] = head_x_i[jx];
+	  x_elem[1] = head_x_i[jx + 1];
+	  a_elem = a_i[aij];
+	  {
+	    prod[0] = x_elem[0] * a_elem;
+	    prod[1] = x_elem[1] * a_elem;
+	  }
+	  sum1[0] = sum1[0] + prod[0];
+	  sum1[1] = sum1[1] + prod[1];
+	  x_elem[0] = tail_x_i[jx];
+	  x_elem[1] = tail_x_i[jx + 1];
+	  {
+	    prod[0] = x_elem[0] * a_elem;
+	    prod[1] = x_elem[1] * a_elem;
+	  }
+	  sum2[0] = sum2[0] + prod[0];
+	  sum2[1] = sum2[1] + prod[1];
+	  aij += incaij;
+	  jx += incx;
+	}
+
+
+	{
+	  tmp1[0] =
+	    (double) sum1[0] * alpha_i[0] - (double) sum1[1] * alpha_i[1];
+	  tmp1[1] =
+	    (double) sum1[0] * alpha_i[1] + (double) sum1[1] * alpha_i[0];
+	}
+	{
+	  tmp2[0] =
+	    (double) sum2[0] * alpha_i[0] - (double) sum2[1] * alpha_i[1];
+	  tmp2[1] =
+	    (double) sum2[0] * alpha_i[1] + (double) sum2[1] * alpha_i[0];
+	}
+	tmp3[0] = tmp1[0] + tmp2[0];
+	tmp3[1] = tmp1[1] + tmp2[1];
+	y_elem[0] = y_i[iy];
+	y_elem[1] = y_i[iy + 1];
+	{
+	  tmp4[0] =
+	    (double) beta_i[0] * y_elem[0] - (double) beta_i[1] * y_elem[1];
+	  tmp4[1] =
+	    (double) beta_i[0] * y_elem[1] + (double) beta_i[1] * y_elem[0];
+	}
+	result[0] = tmp4[0] + tmp3[0];
+	result[1] = tmp4[1] + tmp3[1];
+	y_i[iy] = result[0];
+	y_i[iy + 1] = result[1];
+
+	iy += incy;
+	if (i >= lbound) {
+	  ix0 += incx;
+	  ai += incai2;
+	  la++;
+	} else {
+	  ai += incai1;
+	}
+	if (i < rbound) {
+	  ra++;
+	}
+      }
+
+
+    }
+    break;
+  case blas_prec_extra:
+    {
+      int iy0, iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny;
+      int incaij, aij, incai1, incai2, astart, ai;
+      double *y_i = (double *) y;
+      const double *a_i = a;
+      const double *head_x_i = (double *) head_x;
+      const double *tail_x_i = (double *) tail_x;
+      double *alpha_i = (double *) alpha;
+      double *beta_i = (double *) beta;
+      double head_tmp1[2], tail_tmp1[2];
+      double head_tmp2[2], tail_tmp2[2];
+      double head_tmp3[2], tail_tmp3[2];
+      double head_tmp4[2], tail_tmp4[2];
+      double result[2];
+      double head_sum1[2], tail_sum1[2];
+      double head_sum2[2], tail_sum2[2];
+      double head_prod[2], tail_prod[2];
+      double a_elem;
+      double x_elem[2];
+      double y_elem[2];
+      FPU_FIX_DECL;
+
+      if (order != blas_colmajor && order != blas_rowmajor)
+	BLAS_error(routine_name, -1, order, NULL);
+      if (trans != blas_no_trans &&
+	  trans != blas_trans && trans != blas_conj_trans) {
+	BLAS_error(routine_name, -2, trans, NULL);
+      }
+      if (m < 0)
+	BLAS_error(routine_name, -3, m, NULL);
+      if (n < 0)
+	BLAS_error(routine_name, -4, n, NULL);
+      if (kl < 0 || kl >= m)
+	BLAS_error(routine_name, -5, kl, NULL);
+      if (ku < 0 || ku >= n)
+	BLAS_error(routine_name, -6, ku, NULL);
+      if (lda < kl + ku + 1)
+	BLAS_error(routine_name, -9, lda, NULL);
+      if (incx == 0)
+	BLAS_error(routine_name, -12, incx, NULL);
+      if (incy == 0)
+	BLAS_error(routine_name, -15, incy, NULL);
+
+      if (m == 0 || n == 0)
+	return;
+      if ((alpha_i[0] == 0.0 && alpha_i[1] == 0.0)
+	  && ((beta_i[0] == 1.0 && beta_i[1] == 0.0)))
+	return;
+
+      if (trans == blas_no_trans) {
+	lenx = n;
+	leny = m;
+      } else {
+	lenx = m;
+	leny = n;
+      }
+
+      ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
+      iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;
+
+      FPU_FIX_START;
+
+      /* if alpha = 0, return y = y*beta */
+      if ((order == blas_colmajor) && (trans == blas_no_trans)) {
+	astart = ku;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
+	astart = ku;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
+	astart = kl;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else {			/* rowmajor and blas_trans */
+	astart = kl;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      }
+      incx *= 2;
+      incy *= 2;
+
+
+
+
+      iy0 *= 2;
+      ix0 *= 2;
+
+      la = 0;
+      ai = astart;
+      iy = iy0;
+      for (i = 0; i < leny; i++) {
+	head_sum1[0] = head_sum1[1] = tail_sum1[0] = tail_sum1[1] = 0.0;
+	head_sum2[0] = head_sum2[1] = tail_sum2[0] = tail_sum2[1] = 0.0;
+	aij = ai;
+	jx = ix0;
+
+	for (j = ra - la; j >= 0; j--) {
+	  x_elem[0] = head_x_i[jx];
+	  x_elem[1] = head_x_i[jx + 1];
+	  a_elem = a_i[aij];
+	  {
+	    /* Compute complex-extra = complex-double * real. */
+	    double head_t, tail_t;
+	    {
+	      /* Compute double_double = double * double. */
+	      double a1, a2, b1, b2, con;
+
+	      con = a_elem * split;
+	      a1 = con - a_elem;
+	      a1 = con - a1;
+	      a2 = a_elem - a1;
+	      con = x_elem[0] * split;
+	      b1 = con - x_elem[0];
+	      b1 = con - b1;
+	      b2 = x_elem[0] - b1;
+
+	      head_t = a_elem * x_elem[0];
+	      tail_t = (((a1 * b1 - head_t) + a1 * b2) + a2 * b1) + a2 * b2;
+	    }
+	    head_prod[0] = head_t;
+	    tail_prod[0] = tail_t;
+	    {
+	      /* Compute double_double = double * double. */
+	      double a1, a2, b1, b2, con;
+
+	      con = a_elem * split;
+	      a1 = con - a_elem;
+	      a1 = con - a1;
+	      a2 = a_elem - a1;
+	      con = x_elem[1] * split;
+	      b1 = con - x_elem[1];
+	      b1 = con - b1;
+	      b2 = x_elem[1] - b1;
+
+	      head_t = a_elem * x_elem[1];
+	      tail_t = (((a1 * b1 - head_t) + a1 * b2) + a2 * b1) + a2 * b2;
+	    }
+	    head_prod[1] = head_t;
+	    tail_prod[1] = tail_t;
+	  }
+	  {
+	    double head_t, tail_t;
+	    double head_a, tail_a;
+	    double head_b, tail_b;
+	    /* Real part */
+	    head_a = head_sum1[0];
+	    tail_a = tail_sum1[0];
+	    head_b = head_prod[0];
+	    tail_b = tail_prod[0];
+	    {
+	      /* Compute double-double = double-double + double-double. */
+	      double bv;
+	      double s1, s2, t1, t2;
+
+	      /* Add two hi words. */
+	      s1 = head_a + head_b;
+	      bv = s1 - head_a;
+	      s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	      /* Add two lo words. */
+	      t1 = tail_a + tail_b;
+	      bv = t1 - tail_a;
+	      t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	      s2 += t1;
+
+	      /* Renormalize (s1, s2)  to  (t1, s2) */
+	      t1 = s1 + s2;
+	      s2 = s2 - (t1 - s1);
+
+	      t2 += s2;
+
+	      /* Renormalize (t1, t2)  */
+	      head_t = t1 + t2;
+	      tail_t = t2 - (head_t - t1);
+	    }
+	    head_sum1[0] = head_t;
+	    tail_sum1[0] = tail_t;
+	    /* Imaginary part */
+	    head_a = head_sum1[1];
+	    tail_a = tail_sum1[1];
+	    head_b = head_prod[1];
+	    tail_b = tail_prod[1];
+	    {
+	      /* Compute double-double = double-double + double-double. */
+	      double bv;
+	      double s1, s2, t1, t2;
+
+	      /* Add two hi words. */
+	      s1 = head_a + head_b;
+	      bv = s1 - head_a;
+	      s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	      /* Add two lo words. */
+	      t1 = tail_a + tail_b;
+	      bv = t1 - tail_a;
+	      t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	      s2 += t1;
+
+	      /* Renormalize (s1, s2)  to  (t1, s2) */
+	      t1 = s1 + s2;
+	      s2 = s2 - (t1 - s1);
+
+	      t2 += s2;
+
+	      /* Renormalize (t1, t2)  */
+	      head_t = t1 + t2;
+	      tail_t = t2 - (head_t - t1);
+	    }
+	    head_sum1[1] = head_t;
+	    tail_sum1[1] = tail_t;
+	  }
+	  x_elem[0] = tail_x_i[jx];
+	  x_elem[1] = tail_x_i[jx + 1];
+	  {
+	    /* Compute complex-extra = complex-double * real. */
+	    double head_t, tail_t;
+	    {
+	      /* Compute double_double = double * double. */
+	      double a1, a2, b1, b2, con;
+
+	      con = a_elem * split;
+	      a1 = con - a_elem;
+	      a1 = con - a1;
+	      a2 = a_elem - a1;
+	      con = x_elem[0] * split;
+	      b1 = con - x_elem[0];
+	      b1 = con - b1;
+	      b2 = x_elem[0] - b1;
+
+	      head_t = a_elem * x_elem[0];
+	      tail_t = (((a1 * b1 - head_t) + a1 * b2) + a2 * b1) + a2 * b2;
+	    }
+	    head_prod[0] = head_t;
+	    tail_prod[0] = tail_t;
+	    {
+	      /* Compute double_double = double * double. */
+	      double a1, a2, b1, b2, con;
+
+	      con = a_elem * split;
+	      a1 = con - a_elem;
+	      a1 = con - a1;
+	      a2 = a_elem - a1;
+	      con = x_elem[1] * split;
+	      b1 = con - x_elem[1];
+	      b1 = con - b1;
+	      b2 = x_elem[1] - b1;
+
+	      head_t = a_elem * x_elem[1];
+	      tail_t = (((a1 * b1 - head_t) + a1 * b2) + a2 * b1) + a2 * b2;
+	    }
+	    head_prod[1] = head_t;
+	    tail_prod[1] = tail_t;
+	  }
+	  {
+	    double head_t, tail_t;
+	    double head_a, tail_a;
+	    double head_b, tail_b;
+	    /* Real part */
+	    head_a = head_sum2[0];
+	    tail_a = tail_sum2[0];
+	    head_b = head_prod[0];
+	    tail_b = tail_prod[0];
+	    {
+	      /* Compute double-double = double-double + double-double. */
+	      double bv;
+	      double s1, s2, t1, t2;
+
+	      /* Add two hi words. */
+	      s1 = head_a + head_b;
+	      bv = s1 - head_a;
+	      s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	      /* Add two lo words. */
+	      t1 = tail_a + tail_b;
+	      bv = t1 - tail_a;
+	      t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	      s2 += t1;
+
+	      /* Renormalize (s1, s2)  to  (t1, s2) */
+	      t1 = s1 + s2;
+	      s2 = s2 - (t1 - s1);
+
+	      t2 += s2;
+
+	      /* Renormalize (t1, t2)  */
+	      head_t = t1 + t2;
+	      tail_t = t2 - (head_t - t1);
+	    }
+	    head_sum2[0] = head_t;
+	    tail_sum2[0] = tail_t;
+	    /* Imaginary part */
+	    head_a = head_sum2[1];
+	    tail_a = tail_sum2[1];
+	    head_b = head_prod[1];
+	    tail_b = tail_prod[1];
+	    {
+	      /* Compute double-double = double-double + double-double. */
+	      double bv;
+	      double s1, s2, t1, t2;
+
+	      /* Add two hi words. */
+	      s1 = head_a + head_b;
+	      bv = s1 - head_a;
+	      s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	      /* Add two lo words. */
+	      t1 = tail_a + tail_b;
+	      bv = t1 - tail_a;
+	      t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	      s2 += t1;
+
+	      /* Renormalize (s1, s2)  to  (t1, s2) */
+	      t1 = s1 + s2;
+	      s2 = s2 - (t1 - s1);
+
+	      t2 += s2;
+
+	      /* Renormalize (t1, t2)  */
+	      head_t = t1 + t2;
+	      tail_t = t2 - (head_t - t1);
+	    }
+	    head_sum2[1] = head_t;
+	    tail_sum2[1] = tail_t;
+	  }
+	  aij += incaij;
+	  jx += incx;
+	}
+
+
+	{
+	  /* Compute complex-extra = complex-extra * complex-double. */
+	  double head_a0, tail_a0;
+	  double head_a1, tail_a1;
+	  double head_t1, tail_t1;
+	  double head_t2, tail_t2;
+	  head_a0 = head_sum1[0];
+	  tail_a0 = tail_sum1[0];
+	  head_a1 = head_sum1[1];
+	  tail_a1 = tail_sum1[1];
+	  /* real part */
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_a0 * split;
+	    a11 = con - head_a0;
+	    a11 = con - a11;
+	    a21 = head_a0 - a11;
+	    con = alpha_i[0] * split;
+	    b1 = con - alpha_i[0];
+	    b1 = con - b1;
+	    b2 = alpha_i[0] - b1;
+
+	    c11 = head_a0 * alpha_i[0];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_a0 * alpha_i[0];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_a1 * split;
+	    a11 = con - head_a1;
+	    a11 = con - a11;
+	    a21 = head_a1 - a11;
+	    con = alpha_i[1] * split;
+	    b1 = con - alpha_i[1];
+	    b1 = con - b1;
+	    b2 = alpha_i[1] - b1;
+
+	    c11 = head_a1 * alpha_i[1];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_a1 * alpha_i[1];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t2 = t1 + t2;
+	    tail_t2 = t2 - (head_t2 - t1);
+	  }
+	  head_t2 = -head_t2;
+	  tail_t2 = -tail_t2;
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_t1 + head_t2;
+	    bv = s1 - head_t1;
+	    s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_t1 + tail_t2;
+	    bv = t1 - tail_t1;
+	    t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  head_tmp1[0] = head_t1;
+	  tail_tmp1[0] = tail_t1;
+	  /* imaginary part */
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_a1 * split;
+	    a11 = con - head_a1;
+	    a11 = con - a11;
+	    a21 = head_a1 - a11;
+	    con = alpha_i[0] * split;
+	    b1 = con - alpha_i[0];
+	    b1 = con - b1;
+	    b2 = alpha_i[0] - b1;
+
+	    c11 = head_a1 * alpha_i[0];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_a1 * alpha_i[0];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_a0 * split;
+	    a11 = con - head_a0;
+	    a11 = con - a11;
+	    a21 = head_a0 - a11;
+	    con = alpha_i[1] * split;
+	    b1 = con - alpha_i[1];
+	    b1 = con - b1;
+	    b2 = alpha_i[1] - b1;
+
+	    c11 = head_a0 * alpha_i[1];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_a0 * alpha_i[1];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t2 = t1 + t2;
+	    tail_t2 = t2 - (head_t2 - t1);
+	  }
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_t1 + head_t2;
+	    bv = s1 - head_t1;
+	    s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_t1 + tail_t2;
+	    bv = t1 - tail_t1;
+	    t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  head_tmp1[1] = head_t1;
+	  tail_tmp1[1] = tail_t1;
+	}
+
+	{
+	  /* Compute complex-extra = complex-extra * complex-double. */
+	  double head_a0, tail_a0;
+	  double head_a1, tail_a1;
+	  double head_t1, tail_t1;
+	  double head_t2, tail_t2;
+	  head_a0 = head_sum2[0];
+	  tail_a0 = tail_sum2[0];
+	  head_a1 = head_sum2[1];
+	  tail_a1 = tail_sum2[1];
+	  /* real part */
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_a0 * split;
+	    a11 = con - head_a0;
+	    a11 = con - a11;
+	    a21 = head_a0 - a11;
+	    con = alpha_i[0] * split;
+	    b1 = con - alpha_i[0];
+	    b1 = con - b1;
+	    b2 = alpha_i[0] - b1;
+
+	    c11 = head_a0 * alpha_i[0];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_a0 * alpha_i[0];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_a1 * split;
+	    a11 = con - head_a1;
+	    a11 = con - a11;
+	    a21 = head_a1 - a11;
+	    con = alpha_i[1] * split;
+	    b1 = con - alpha_i[1];
+	    b1 = con - b1;
+	    b2 = alpha_i[1] - b1;
+
+	    c11 = head_a1 * alpha_i[1];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_a1 * alpha_i[1];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t2 = t1 + t2;
+	    tail_t2 = t2 - (head_t2 - t1);
+	  }
+	  head_t2 = -head_t2;
+	  tail_t2 = -tail_t2;
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_t1 + head_t2;
+	    bv = s1 - head_t1;
+	    s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_t1 + tail_t2;
+	    bv = t1 - tail_t1;
+	    t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  head_tmp2[0] = head_t1;
+	  tail_tmp2[0] = tail_t1;
+	  /* imaginary part */
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_a1 * split;
+	    a11 = con - head_a1;
+	    a11 = con - a11;
+	    a21 = head_a1 - a11;
+	    con = alpha_i[0] * split;
+	    b1 = con - alpha_i[0];
+	    b1 = con - b1;
+	    b2 = alpha_i[0] - b1;
+
+	    c11 = head_a1 * alpha_i[0];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_a1 * alpha_i[0];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_a0 * split;
+	    a11 = con - head_a0;
+	    a11 = con - a11;
+	    a21 = head_a0 - a11;
+	    con = alpha_i[1] * split;
+	    b1 = con - alpha_i[1];
+	    b1 = con - b1;
+	    b2 = alpha_i[1] - b1;
+
+	    c11 = head_a0 * alpha_i[1];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_a0 * alpha_i[1];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t2 = t1 + t2;
+	    tail_t2 = t2 - (head_t2 - t1);
+	  }
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_t1 + head_t2;
+	    bv = s1 - head_t1;
+	    s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_t1 + tail_t2;
+	    bv = t1 - tail_t1;
+	    t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  head_tmp2[1] = head_t1;
+	  tail_tmp2[1] = tail_t1;
+	}
+
+	{
+	  double head_t, tail_t;
+	  double head_a, tail_a;
+	  double head_b, tail_b;
+	  /* Real part */
+	  head_a = head_tmp1[0];
+	  tail_a = tail_tmp1[0];
+	  head_b = head_tmp2[0];
+	  tail_b = tail_tmp2[0];
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_a + head_b;
+	    bv = s1 - head_a;
+	    s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_a + tail_b;
+	    bv = t1 - tail_a;
+	    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t = t1 + t2;
+	    tail_t = t2 - (head_t - t1);
+	  }
+	  head_tmp3[0] = head_t;
+	  tail_tmp3[0] = tail_t;
+	  /* Imaginary part */
+	  head_a = head_tmp1[1];
+	  tail_a = tail_tmp1[1];
+	  head_b = head_tmp2[1];
+	  tail_b = tail_tmp2[1];
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_a + head_b;
+	    bv = s1 - head_a;
+	    s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_a + tail_b;
+	    bv = t1 - tail_a;
+	    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t = t1 + t2;
+	    tail_t = t2 - (head_t - t1);
+	  }
+	  head_tmp3[1] = head_t;
+	  tail_tmp3[1] = tail_t;
+	}
+	y_elem[0] = y_i[iy];
+	y_elem[1] = y_i[iy + 1];
+	{
+	  /* Compute complex-extra = complex-double * complex-double. */
+	  double head_t1, tail_t1;
+	  double head_t2, tail_t2;
+	  /* Real part */
+	  {
+	    /* Compute double_double = double * double. */
+	    double a1, a2, b1, b2, con;
+
+	    con = beta_i[0] * split;
+	    a1 = con - beta_i[0];
+	    a1 = con - a1;
+	    a2 = beta_i[0] - a1;
+	    con = y_elem[0] * split;
+	    b1 = con - y_elem[0];
+	    b1 = con - b1;
+	    b2 = y_elem[0] - b1;
+
+	    head_t1 = beta_i[0] * y_elem[0];
+	    tail_t1 = (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
+	  }
+	  {
+	    /* Compute double_double = double * double. */
+	    double a1, a2, b1, b2, con;
+
+	    con = beta_i[1] * split;
+	    a1 = con - beta_i[1];
+	    a1 = con - a1;
+	    a2 = beta_i[1] - a1;
+	    con = y_elem[1] * split;
+	    b1 = con - y_elem[1];
+	    b1 = con - b1;
+	    b2 = y_elem[1] - b1;
+
+	    head_t2 = beta_i[1] * y_elem[1];
+	    tail_t2 = (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
+	  }
+	  head_t2 = -head_t2;
+	  tail_t2 = -tail_t2;
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_t1 + head_t2;
+	    bv = s1 - head_t1;
+	    s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_t1 + tail_t2;
+	    bv = t1 - tail_t1;
+	    t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  head_tmp4[0] = head_t1;
+	  tail_tmp4[0] = tail_t1;
+	  /* Imaginary part */
+	  {
+	    /* Compute double_double = double * double. */
+	    double a1, a2, b1, b2, con;
+
+	    con = beta_i[1] * split;
+	    a1 = con - beta_i[1];
+	    a1 = con - a1;
+	    a2 = beta_i[1] - a1;
+	    con = y_elem[0] * split;
+	    b1 = con - y_elem[0];
+	    b1 = con - b1;
+	    b2 = y_elem[0] - b1;
+
+	    head_t1 = beta_i[1] * y_elem[0];
+	    tail_t1 = (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
+	  }
+	  {
+	    /* Compute double_double = double * double. */
+	    double a1, a2, b1, b2, con;
+
+	    con = beta_i[0] * split;
+	    a1 = con - beta_i[0];
+	    a1 = con - a1;
+	    a2 = beta_i[0] - a1;
+	    con = y_elem[1] * split;
+	    b1 = con - y_elem[1];
+	    b1 = con - b1;
+	    b2 = y_elem[1] - b1;
+
+	    head_t2 = beta_i[0] * y_elem[1];
+	    tail_t2 = (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
+	  }
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_t1 + head_t2;
+	    bv = s1 - head_t1;
+	    s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_t1 + tail_t2;
+	    bv = t1 - tail_t1;
+	    t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  head_tmp4[1] = head_t1;
+	  tail_tmp4[1] = tail_t1;
+	}
+	{
+	  double head_a, tail_a;
+	  double head_b, tail_b;
+	  /* Real part */
+	  head_a = head_tmp4[0];
+	  tail_a = tail_tmp4[0];
+	  head_b = head_tmp3[0];
+	  tail_b = tail_tmp3[0];
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_a + head_b;
+	    bv = s1 - head_a;
+	    s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_a + tail_b;
+	    bv = t1 - tail_a;
+	    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    result[0] = t1 + t2;
+	  }
+	  /* Imaginary part */
+	  head_a = head_tmp4[1];
+	  tail_a = tail_tmp4[1];
+	  head_b = head_tmp3[1];
+	  tail_b = tail_tmp3[1];
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_a + head_b;
+	    bv = s1 - head_a;
+	    s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_a + tail_b;
+	    bv = t1 - tail_a;
+	    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    result[1] = t1 + t2;
+	  }
+	}
+	y_i[iy] = result[0];
+	y_i[iy + 1] = result[1];
+
+	iy += incy;
+	if (i >= lbound) {
+	  ix0 += incx;
+	  ai += incai2;
+	  la++;
+	} else {
+	  ai += incai1;
+	}
+	if (i < rbound) {
+	  ra++;
+	}
+      }
+
+      FPU_FIX_STOP;
+    }
+    break;
+  }
+}				/* end BLAS_zgbmv2_d_z_x */
diff --git a/XBLAS/src/gbmv2/BLAS_zgbmv2_x-f2c.c b/XBLAS/src/gbmv2/BLAS_zgbmv2_x-f2c.c
new file mode 100644
index 00000000..17af6ea7
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_zgbmv2_x-f2c.c
@@ -0,0 +1,19 @@
+
+#include "f2c-bridge.h"
+#include "blas_enum.h"
+void BLAS_zgbmv2_x(enum blas_order_type order, enum blas_trans_type trans,
+		   int m, int n, int kl, int ku, const void *alpha,
+		   const void *a, int lda, const void *head_x,
+		   const void *tail_x, int incx, const void *beta,
+		   void *y, int incy, enum blas_prec_type prec);
+
+
+extern void FC_FUNC_(blas_zgbmv2_x, BLAS_ZGBMV2_X)
+ 
+  (int *trans, int *m, int *n, int *kl, int *ku, const void *alpha,
+   const void *a, int *lda, const void *head_x, const void *tail_x, int *incx,
+   const void *beta, void *y, int *incy, int *prec) {
+  BLAS_zgbmv2_x(blas_colmajor, (enum blas_trans_type) *trans, *m, *n, *kl,
+		*ku, alpha, a, *lda, head_x, tail_x, *incx, beta, y, *incy,
+		(enum blas_prec_type) *prec);
+}
diff --git a/XBLAS/src/gbmv2/BLAS_zgbmv2_x.c b/XBLAS/src/gbmv2/BLAS_zgbmv2_x.c
new file mode 100644
index 00000000..98cf4658
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_zgbmv2_x.c
@@ -0,0 +1,1936 @@
+#include "blas_extended.h"
+#include "blas_extended_private.h"
+void BLAS_zgbmv2_x(enum blas_order_type order, enum blas_trans_type trans,
+		   int m, int n, int kl, int ku, const void *alpha,
+		   const void *a, int lda, const void *head_x,
+		   const void *tail_x, int incx, const void *beta,
+		   void *y, int incy, enum blas_prec_type prec)
+
+/*           
+ * Purpose
+ * =======
+ *
+ * This routines computes the matrix product:
+ *
+ *     y  <-  alpha * op(A) * (x_head + x_tail) + beta * y
+ * 
+ * where 
+ *
+ *  A is a m x n banded matrix
+ *  x is a n x 1 vector
+ *  y is a m x 1 vector
+ *  alpha and beta are scalars 
+ *   
+ * Arguments
+ * =========
+ *
+ * order        (input) blas_order_type
+ *              Order of AB; row or column major
+ *
+ * trans        (input) blas_trans_type
+ *              Transpose of AB; no trans, 
+ *              trans, or conjugate trans
+ *
+ * m            (input) int
+ *              Dimension of AB
+ *
+ * n            (input) int
+ *              Dimension of AB and the length of vector x and z
+ *
+ * kl           (input) int 
+ *              Number of lower diagnols of AB
+ *
+ * ku           (input) int
+ *              Number of upper diagnols of AB
+ *
+ * alpha        (input) const void*
+ *              
+ * AB           (input) void*
+ *
+ * lda          (input) int 
+ *              Leading dimension of AB
+ *              lda >= ku + kl + 1
+ *
+ * head_x
+ * tail_x       (input) void*
+ * 
+ * incx         (input) int
+ *              The stride for vector x.
+ *
+ * beta         (input) const void*
+ *
+ * y            (input) const void*
+ *
+ * incy         (input) int
+ *              The stride for vector y.
+ * 
+ * prec   (input) enum blas_prec_type
+ *        Specifies the internal precision to be used.
+ *        = blas_prec_single: single precision.
+ *        = blas_prec_double: double precision.
+ *        = blas_prec_extra : anything at least 1.5 times as accurate
+ *                            than double, and wider than 80-bits.
+ *                            We use double-double in our implementation.
+ *
+ *
+ * LOCAL VARIABLES 
+ * ===============
+ * 
+ *  As an example, these variables are described on the mxn, column 
+ *  major, banded matrix described in section 2.2.3 of the specification  
+ *
+ *  astart      indexes first element in A where computation begins
+ *
+ *  incai1      indexes first element in row where row is less than lbound
+ * 
+ *  incai2      indexes first element in row where row exceeds lbound
+ *   
+ *  lbound      denotes the number of rows before  first element shifts 
+ *
+ *  rbound      denotes the columns where there is blank space
+ *   
+ *  ra          index of the rightmost element for a given row
+ *  
+ *  la          index of leftmost  elements for a given row
+ *
+ *  ra - la     width of a row
+ *
+ *                        rbound 
+ *            la   ra    ____|_____ 
+ *             |    |   |          |
+ *         |  a00  a01   *    *   *
+ * lbound -|  a10  a11  a12   *   *
+ *         |  a20  a21  a22  a23  *
+ *             *   a31  a32  a33 a34
+ *             *    *   a42  a43 a44
+ *
+ *  Varations on order and transpose have been implemented by modifying these
+ *  local variables. 
+ *
+ */
+{
+  static const char routine_name[] = "BLAS_zgbmv2_x";
+
+  switch (prec) {
+  case blas_prec_single:
+  case blas_prec_double:
+  case blas_prec_indigenous:
+    {
+      int iy0, iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny;
+      int incaij, aij, incai1, incai2, astart, ai;
+      double *y_i = (double *) y;
+      const double *a_i = (double *) a;
+      const double *head_x_i = (double *) head_x;
+      const double *tail_x_i = (double *) tail_x;
+      double *alpha_i = (double *) alpha;
+      double *beta_i = (double *) beta;
+      double tmp1[2];
+      double tmp2[2];
+      double tmp3[2];
+      double tmp4[2];
+      double result[2];
+      double sum1[2];
+      double sum2[2];
+      double prod[2];
+      double a_elem[2];
+      double x_elem[2];
+      double y_elem[2];
+
+
+      if (order != blas_colmajor && order != blas_rowmajor)
+	BLAS_error(routine_name, -1, order, NULL);
+      if (trans != blas_no_trans &&
+	  trans != blas_trans && trans != blas_conj_trans) {
+	BLAS_error(routine_name, -2, trans, NULL);
+      }
+      if (m < 0)
+	BLAS_error(routine_name, -3, m, NULL);
+      if (n < 0)
+	BLAS_error(routine_name, -4, n, NULL);
+      if (kl < 0 || kl >= m)
+	BLAS_error(routine_name, -5, kl, NULL);
+      if (ku < 0 || ku >= n)
+	BLAS_error(routine_name, -6, ku, NULL);
+      if (lda < kl + ku + 1)
+	BLAS_error(routine_name, -9, lda, NULL);
+      if (incx == 0)
+	BLAS_error(routine_name, -12, incx, NULL);
+      if (incy == 0)
+	BLAS_error(routine_name, -15, incy, NULL);
+
+      if (m == 0 || n == 0)
+	return;
+      if ((alpha_i[0] == 0.0 && alpha_i[1] == 0.0)
+	  && ((beta_i[0] == 1.0 && beta_i[1] == 0.0)))
+	return;
+
+      if (trans == blas_no_trans) {
+	lenx = n;
+	leny = m;
+      } else {
+	lenx = m;
+	leny = n;
+      }
+
+      ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
+      iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;
+
+
+
+      /* if alpha = 0, return y = y*beta */
+      if ((order == blas_colmajor) && (trans == blas_no_trans)) {
+	astart = ku;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
+	astart = ku;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
+	astart = kl;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else {			/* rowmajor and blas_trans */
+	astart = kl;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      }
+      incx *= 2;
+      incy *= 2;
+      incaij *= 2;
+      incai1 *= 2;
+      incai2 *= 2;
+      astart *= 2;
+      iy0 *= 2;
+      ix0 *= 2;
+
+      la = 0;
+      ai = astart;
+      iy = iy0;
+      for (i = 0; i < leny; i++) {
+	sum1[0] = sum1[1] = 0.0;
+	sum2[0] = sum2[1] = 0.0;
+	aij = ai;
+	jx = ix0;
+	if (trans != blas_conj_trans) {
+	  for (j = ra - la; j >= 0; j--) {
+	    x_elem[0] = head_x_i[jx];
+	    x_elem[1] = head_x_i[jx + 1];
+	    a_elem[0] = a_i[aij];
+	    a_elem[1] = a_i[aij + 1];
+	    {
+	      prod[0] =
+		(double) x_elem[0] * a_elem[0] -
+		(double) x_elem[1] * a_elem[1];
+	      prod[1] =
+		(double) x_elem[0] * a_elem[1] +
+		(double) x_elem[1] * a_elem[0];
+	    }
+	    sum1[0] = sum1[0] + prod[0];
+	    sum1[1] = sum1[1] + prod[1];
+	    x_elem[0] = tail_x_i[jx];
+	    x_elem[1] = tail_x_i[jx + 1];
+	    {
+	      prod[0] =
+		(double) x_elem[0] * a_elem[0] -
+		(double) x_elem[1] * a_elem[1];
+	      prod[1] =
+		(double) x_elem[0] * a_elem[1] +
+		(double) x_elem[1] * a_elem[0];
+	    }
+	    sum2[0] = sum2[0] + prod[0];
+	    sum2[1] = sum2[1] + prod[1];
+	    aij += incaij;
+	    jx += incx;
+	  }
+
+	} else {
+	  for (j = ra - la; j >= 0; j--) {
+	    x_elem[0] = head_x_i[jx];
+	    x_elem[1] = head_x_i[jx + 1];
+	    a_elem[0] = a_i[aij];
+	    a_elem[1] = a_i[aij + 1];
+	    a_elem[1] = -a_elem[1];
+	    {
+	      prod[0] =
+		(double) x_elem[0] * a_elem[0] -
+		(double) x_elem[1] * a_elem[1];
+	      prod[1] =
+		(double) x_elem[0] * a_elem[1] +
+		(double) x_elem[1] * a_elem[0];
+	    }
+	    sum1[0] = sum1[0] + prod[0];
+	    sum1[1] = sum1[1] + prod[1];
+	    x_elem[0] = tail_x_i[jx];
+	    x_elem[1] = tail_x_i[jx + 1];
+	    {
+	      prod[0] =
+		(double) x_elem[0] * a_elem[0] -
+		(double) x_elem[1] * a_elem[1];
+	      prod[1] =
+		(double) x_elem[0] * a_elem[1] +
+		(double) x_elem[1] * a_elem[0];
+	    }
+	    sum2[0] = sum2[0] + prod[0];
+	    sum2[1] = sum2[1] + prod[1];
+	    aij += incaij;
+	    jx += incx;
+	  }
+	}
+
+	{
+	  tmp1[0] =
+	    (double) sum1[0] * alpha_i[0] - (double) sum1[1] * alpha_i[1];
+	  tmp1[1] =
+	    (double) sum1[0] * alpha_i[1] + (double) sum1[1] * alpha_i[0];
+	}
+	{
+	  tmp2[0] =
+	    (double) sum2[0] * alpha_i[0] - (double) sum2[1] * alpha_i[1];
+	  tmp2[1] =
+	    (double) sum2[0] * alpha_i[1] + (double) sum2[1] * alpha_i[0];
+	}
+	tmp3[0] = tmp1[0] + tmp2[0];
+	tmp3[1] = tmp1[1] + tmp2[1];
+	y_elem[0] = y_i[iy];
+	y_elem[1] = y_i[iy + 1];
+	{
+	  tmp4[0] =
+	    (double) beta_i[0] * y_elem[0] - (double) beta_i[1] * y_elem[1];
+	  tmp4[1] =
+	    (double) beta_i[0] * y_elem[1] + (double) beta_i[1] * y_elem[0];
+	}
+	result[0] = tmp4[0] + tmp3[0];
+	result[1] = tmp4[1] + tmp3[1];
+	y_i[iy] = result[0];
+	y_i[iy + 1] = result[1];
+
+	iy += incy;
+	if (i >= lbound) {
+	  ix0 += incx;
+	  ai += incai2;
+	  la++;
+	} else {
+	  ai += incai1;
+	}
+	if (i < rbound) {
+	  ra++;
+	}
+      }
+
+
+    }
+    break;
+  case blas_prec_extra:
+    {
+      int iy0, iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny;
+      int incaij, aij, incai1, incai2, astart, ai;
+      double *y_i = (double *) y;
+      const double *a_i = (double *) a;
+      const double *head_x_i = (double *) head_x;
+      const double *tail_x_i = (double *) tail_x;
+      double *alpha_i = (double *) alpha;
+      double *beta_i = (double *) beta;
+      double head_tmp1[2], tail_tmp1[2];
+      double head_tmp2[2], tail_tmp2[2];
+      double head_tmp3[2], tail_tmp3[2];
+      double head_tmp4[2], tail_tmp4[2];
+      double result[2];
+      double head_sum1[2], tail_sum1[2];
+      double head_sum2[2], tail_sum2[2];
+      double head_prod[2], tail_prod[2];
+      double a_elem[2];
+      double x_elem[2];
+      double y_elem[2];
+      FPU_FIX_DECL;
+
+      if (order != blas_colmajor && order != blas_rowmajor)
+	BLAS_error(routine_name, -1, order, NULL);
+      if (trans != blas_no_trans &&
+	  trans != blas_trans && trans != blas_conj_trans) {
+	BLAS_error(routine_name, -2, trans, NULL);
+      }
+      if (m < 0)
+	BLAS_error(routine_name, -3, m, NULL);
+      if (n < 0)
+	BLAS_error(routine_name, -4, n, NULL);
+      if (kl < 0 || kl >= m)
+	BLAS_error(routine_name, -5, kl, NULL);
+      if (ku < 0 || ku >= n)
+	BLAS_error(routine_name, -6, ku, NULL);
+      if (lda < kl + ku + 1)
+	BLAS_error(routine_name, -9, lda, NULL);
+      if (incx == 0)
+	BLAS_error(routine_name, -12, incx, NULL);
+      if (incy == 0)
+	BLAS_error(routine_name, -15, incy, NULL);
+
+      if (m == 0 || n == 0)
+	return;
+      if ((alpha_i[0] == 0.0 && alpha_i[1] == 0.0)
+	  && ((beta_i[0] == 1.0 && beta_i[1] == 0.0)))
+	return;
+
+      if (trans == blas_no_trans) {
+	lenx = n;
+	leny = m;
+      } else {
+	lenx = m;
+	leny = n;
+      }
+
+      ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
+      iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;
+
+      FPU_FIX_START;
+
+      /* if alpha = 0, return y = y*beta */
+      if ((order == blas_colmajor) && (trans == blas_no_trans)) {
+	astart = ku;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
+	astart = ku;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
+	astart = kl;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else {			/* rowmajor and blas_trans */
+	astart = kl;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      }
+      incx *= 2;
+      incy *= 2;
+      incaij *= 2;
+      incai1 *= 2;
+      incai2 *= 2;
+      astart *= 2;
+      iy0 *= 2;
+      ix0 *= 2;
+
+      la = 0;
+      ai = astart;
+      iy = iy0;
+      for (i = 0; i < leny; i++) {
+	head_sum1[0] = head_sum1[1] = tail_sum1[0] = tail_sum1[1] = 0.0;
+	head_sum2[0] = head_sum2[1] = tail_sum2[0] = tail_sum2[1] = 0.0;
+	aij = ai;
+	jx = ix0;
+	if (trans != blas_conj_trans) {
+	  for (j = ra - la; j >= 0; j--) {
+	    x_elem[0] = head_x_i[jx];
+	    x_elem[1] = head_x_i[jx + 1];
+	    a_elem[0] = a_i[aij];
+	    a_elem[1] = a_i[aij + 1];
+	    {
+	      /* Compute complex-extra = complex-double * complex-double. */
+	      double head_t1, tail_t1;
+	      double head_t2, tail_t2;
+	      /* Real part */
+	      {
+		/* Compute double_double = double * double. */
+		double a1, a2, b1, b2, con;
+
+		con = x_elem[0] * split;
+		a1 = con - x_elem[0];
+		a1 = con - a1;
+		a2 = x_elem[0] - a1;
+		con = a_elem[0] * split;
+		b1 = con - a_elem[0];
+		b1 = con - b1;
+		b2 = a_elem[0] - b1;
+
+		head_t1 = x_elem[0] * a_elem[0];
+		tail_t1 =
+		  (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
+	      }
+	      {
+		/* Compute double_double = double * double. */
+		double a1, a2, b1, b2, con;
+
+		con = x_elem[1] * split;
+		a1 = con - x_elem[1];
+		a1 = con - a1;
+		a2 = x_elem[1] - a1;
+		con = a_elem[1] * split;
+		b1 = con - a_elem[1];
+		b1 = con - b1;
+		b2 = a_elem[1] - b1;
+
+		head_t2 = x_elem[1] * a_elem[1];
+		tail_t2 =
+		  (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
+	      }
+	      head_t2 = -head_t2;
+	      tail_t2 = -tail_t2;
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_t1 + head_t2;
+		bv = s1 - head_t1;
+		s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_t1 + tail_t2;
+		bv = t1 - tail_t1;
+		t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t1 = t1 + t2;
+		tail_t1 = t2 - (head_t1 - t1);
+	      }
+	      head_prod[0] = head_t1;
+	      tail_prod[0] = tail_t1;
+	      /* Imaginary part */
+	      {
+		/* Compute double_double = double * double. */
+		double a1, a2, b1, b2, con;
+
+		con = x_elem[1] * split;
+		a1 = con - x_elem[1];
+		a1 = con - a1;
+		a2 = x_elem[1] - a1;
+		con = a_elem[0] * split;
+		b1 = con - a_elem[0];
+		b1 = con - b1;
+		b2 = a_elem[0] - b1;
+
+		head_t1 = x_elem[1] * a_elem[0];
+		tail_t1 =
+		  (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
+	      }
+	      {
+		/* Compute double_double = double * double. */
+		double a1, a2, b1, b2, con;
+
+		con = x_elem[0] * split;
+		a1 = con - x_elem[0];
+		a1 = con - a1;
+		a2 = x_elem[0] - a1;
+		con = a_elem[1] * split;
+		b1 = con - a_elem[1];
+		b1 = con - b1;
+		b2 = a_elem[1] - b1;
+
+		head_t2 = x_elem[0] * a_elem[1];
+		tail_t2 =
+		  (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
+	      }
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_t1 + head_t2;
+		bv = s1 - head_t1;
+		s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_t1 + tail_t2;
+		bv = t1 - tail_t1;
+		t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t1 = t1 + t2;
+		tail_t1 = t2 - (head_t1 - t1);
+	      }
+	      head_prod[1] = head_t1;
+	      tail_prod[1] = tail_t1;
+	    }
+	    {
+	      double head_t, tail_t;
+	      double head_a, tail_a;
+	      double head_b, tail_b;
+	      /* Real part */
+	      head_a = head_sum1[0];
+	      tail_a = tail_sum1[0];
+	      head_b = head_prod[0];
+	      tail_b = tail_prod[0];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum1[0] = head_t;
+	      tail_sum1[0] = tail_t;
+	      /* Imaginary part */
+	      head_a = head_sum1[1];
+	      tail_a = tail_sum1[1];
+	      head_b = head_prod[1];
+	      tail_b = tail_prod[1];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum1[1] = head_t;
+	      tail_sum1[1] = tail_t;
+	    }
+	    x_elem[0] = tail_x_i[jx];
+	    x_elem[1] = tail_x_i[jx + 1];
+	    {
+	      /* Compute complex-extra = complex-double * complex-double. */
+	      double head_t1, tail_t1;
+	      double head_t2, tail_t2;
+	      /* Real part */
+	      {
+		/* Compute double_double = double * double. */
+		double a1, a2, b1, b2, con;
+
+		con = x_elem[0] * split;
+		a1 = con - x_elem[0];
+		a1 = con - a1;
+		a2 = x_elem[0] - a1;
+		con = a_elem[0] * split;
+		b1 = con - a_elem[0];
+		b1 = con - b1;
+		b2 = a_elem[0] - b1;
+
+		head_t1 = x_elem[0] * a_elem[0];
+		tail_t1 =
+		  (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
+	      }
+	      {
+		/* Compute double_double = double * double. */
+		double a1, a2, b1, b2, con;
+
+		con = x_elem[1] * split;
+		a1 = con - x_elem[1];
+		a1 = con - a1;
+		a2 = x_elem[1] - a1;
+		con = a_elem[1] * split;
+		b1 = con - a_elem[1];
+		b1 = con - b1;
+		b2 = a_elem[1] - b1;
+
+		head_t2 = x_elem[1] * a_elem[1];
+		tail_t2 =
+		  (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
+	      }
+	      head_t2 = -head_t2;
+	      tail_t2 = -tail_t2;
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_t1 + head_t2;
+		bv = s1 - head_t1;
+		s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_t1 + tail_t2;
+		bv = t1 - tail_t1;
+		t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t1 = t1 + t2;
+		tail_t1 = t2 - (head_t1 - t1);
+	      }
+	      head_prod[0] = head_t1;
+	      tail_prod[0] = tail_t1;
+	      /* Imaginary part */
+	      {
+		/* Compute double_double = double * double. */
+		double a1, a2, b1, b2, con;
+
+		con = x_elem[1] * split;
+		a1 = con - x_elem[1];
+		a1 = con - a1;
+		a2 = x_elem[1] - a1;
+		con = a_elem[0] * split;
+		b1 = con - a_elem[0];
+		b1 = con - b1;
+		b2 = a_elem[0] - b1;
+
+		head_t1 = x_elem[1] * a_elem[0];
+		tail_t1 =
+		  (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
+	      }
+	      {
+		/* Compute double_double = double * double. */
+		double a1, a2, b1, b2, con;
+
+		con = x_elem[0] * split;
+		a1 = con - x_elem[0];
+		a1 = con - a1;
+		a2 = x_elem[0] - a1;
+		con = a_elem[1] * split;
+		b1 = con - a_elem[1];
+		b1 = con - b1;
+		b2 = a_elem[1] - b1;
+
+		head_t2 = x_elem[0] * a_elem[1];
+		tail_t2 =
+		  (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
+	      }
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_t1 + head_t2;
+		bv = s1 - head_t1;
+		s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_t1 + tail_t2;
+		bv = t1 - tail_t1;
+		t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t1 = t1 + t2;
+		tail_t1 = t2 - (head_t1 - t1);
+	      }
+	      head_prod[1] = head_t1;
+	      tail_prod[1] = tail_t1;
+	    }
+	    {
+	      double head_t, tail_t;
+	      double head_a, tail_a;
+	      double head_b, tail_b;
+	      /* Real part */
+	      head_a = head_sum2[0];
+	      tail_a = tail_sum2[0];
+	      head_b = head_prod[0];
+	      tail_b = tail_prod[0];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum2[0] = head_t;
+	      tail_sum2[0] = tail_t;
+	      /* Imaginary part */
+	      head_a = head_sum2[1];
+	      tail_a = tail_sum2[1];
+	      head_b = head_prod[1];
+	      tail_b = tail_prod[1];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum2[1] = head_t;
+	      tail_sum2[1] = tail_t;
+	    }
+	    aij += incaij;
+	    jx += incx;
+	  }
+
+	} else {
+	  for (j = ra - la; j >= 0; j--) {
+	    x_elem[0] = head_x_i[jx];
+	    x_elem[1] = head_x_i[jx + 1];
+	    a_elem[0] = a_i[aij];
+	    a_elem[1] = a_i[aij + 1];
+	    a_elem[1] = -a_elem[1];
+	    {
+	      /* Compute complex-extra = complex-double * complex-double. */
+	      double head_t1, tail_t1;
+	      double head_t2, tail_t2;
+	      /* Real part */
+	      {
+		/* Compute double_double = double * double. */
+		double a1, a2, b1, b2, con;
+
+		con = x_elem[0] * split;
+		a1 = con - x_elem[0];
+		a1 = con - a1;
+		a2 = x_elem[0] - a1;
+		con = a_elem[0] * split;
+		b1 = con - a_elem[0];
+		b1 = con - b1;
+		b2 = a_elem[0] - b1;
+
+		head_t1 = x_elem[0] * a_elem[0];
+		tail_t1 =
+		  (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
+	      }
+	      {
+		/* Compute double_double = double * double. */
+		double a1, a2, b1, b2, con;
+
+		con = x_elem[1] * split;
+		a1 = con - x_elem[1];
+		a1 = con - a1;
+		a2 = x_elem[1] - a1;
+		con = a_elem[1] * split;
+		b1 = con - a_elem[1];
+		b1 = con - b1;
+		b2 = a_elem[1] - b1;
+
+		head_t2 = x_elem[1] * a_elem[1];
+		tail_t2 =
+		  (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
+	      }
+	      head_t2 = -head_t2;
+	      tail_t2 = -tail_t2;
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_t1 + head_t2;
+		bv = s1 - head_t1;
+		s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_t1 + tail_t2;
+		bv = t1 - tail_t1;
+		t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t1 = t1 + t2;
+		tail_t1 = t2 - (head_t1 - t1);
+	      }
+	      head_prod[0] = head_t1;
+	      tail_prod[0] = tail_t1;
+	      /* Imaginary part */
+	      {
+		/* Compute double_double = double * double. */
+		double a1, a2, b1, b2, con;
+
+		con = x_elem[1] * split;
+		a1 = con - x_elem[1];
+		a1 = con - a1;
+		a2 = x_elem[1] - a1;
+		con = a_elem[0] * split;
+		b1 = con - a_elem[0];
+		b1 = con - b1;
+		b2 = a_elem[0] - b1;
+
+		head_t1 = x_elem[1] * a_elem[0];
+		tail_t1 =
+		  (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
+	      }
+	      {
+		/* Compute double_double = double * double. */
+		double a1, a2, b1, b2, con;
+
+		con = x_elem[0] * split;
+		a1 = con - x_elem[0];
+		a1 = con - a1;
+		a2 = x_elem[0] - a1;
+		con = a_elem[1] * split;
+		b1 = con - a_elem[1];
+		b1 = con - b1;
+		b2 = a_elem[1] - b1;
+
+		head_t2 = x_elem[0] * a_elem[1];
+		tail_t2 =
+		  (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
+	      }
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_t1 + head_t2;
+		bv = s1 - head_t1;
+		s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_t1 + tail_t2;
+		bv = t1 - tail_t1;
+		t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t1 = t1 + t2;
+		tail_t1 = t2 - (head_t1 - t1);
+	      }
+	      head_prod[1] = head_t1;
+	      tail_prod[1] = tail_t1;
+	    }
+	    {
+	      double head_t, tail_t;
+	      double head_a, tail_a;
+	      double head_b, tail_b;
+	      /* Real part */
+	      head_a = head_sum1[0];
+	      tail_a = tail_sum1[0];
+	      head_b = head_prod[0];
+	      tail_b = tail_prod[0];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum1[0] = head_t;
+	      tail_sum1[0] = tail_t;
+	      /* Imaginary part */
+	      head_a = head_sum1[1];
+	      tail_a = tail_sum1[1];
+	      head_b = head_prod[1];
+	      tail_b = tail_prod[1];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum1[1] = head_t;
+	      tail_sum1[1] = tail_t;
+	    }
+	    x_elem[0] = tail_x_i[jx];
+	    x_elem[1] = tail_x_i[jx + 1];
+	    {
+	      /* Compute complex-extra = complex-double * complex-double. */
+	      double head_t1, tail_t1;
+	      double head_t2, tail_t2;
+	      /* Real part */
+	      {
+		/* Compute double_double = double * double. */
+		double a1, a2, b1, b2, con;
+
+		con = x_elem[0] * split;
+		a1 = con - x_elem[0];
+		a1 = con - a1;
+		a2 = x_elem[0] - a1;
+		con = a_elem[0] * split;
+		b1 = con - a_elem[0];
+		b1 = con - b1;
+		b2 = a_elem[0] - b1;
+
+		head_t1 = x_elem[0] * a_elem[0];
+		tail_t1 =
+		  (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
+	      }
+	      {
+		/* Compute double_double = double * double. */
+		double a1, a2, b1, b2, con;
+
+		con = x_elem[1] * split;
+		a1 = con - x_elem[1];
+		a1 = con - a1;
+		a2 = x_elem[1] - a1;
+		con = a_elem[1] * split;
+		b1 = con - a_elem[1];
+		b1 = con - b1;
+		b2 = a_elem[1] - b1;
+
+		head_t2 = x_elem[1] * a_elem[1];
+		tail_t2 =
+		  (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
+	      }
+	      head_t2 = -head_t2;
+	      tail_t2 = -tail_t2;
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_t1 + head_t2;
+		bv = s1 - head_t1;
+		s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_t1 + tail_t2;
+		bv = t1 - tail_t1;
+		t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t1 = t1 + t2;
+		tail_t1 = t2 - (head_t1 - t1);
+	      }
+	      head_prod[0] = head_t1;
+	      tail_prod[0] = tail_t1;
+	      /* Imaginary part */
+	      {
+		/* Compute double_double = double * double. */
+		double a1, a2, b1, b2, con;
+
+		con = x_elem[1] * split;
+		a1 = con - x_elem[1];
+		a1 = con - a1;
+		a2 = x_elem[1] - a1;
+		con = a_elem[0] * split;
+		b1 = con - a_elem[0];
+		b1 = con - b1;
+		b2 = a_elem[0] - b1;
+
+		head_t1 = x_elem[1] * a_elem[0];
+		tail_t1 =
+		  (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
+	      }
+	      {
+		/* Compute double_double = double * double. */
+		double a1, a2, b1, b2, con;
+
+		con = x_elem[0] * split;
+		a1 = con - x_elem[0];
+		a1 = con - a1;
+		a2 = x_elem[0] - a1;
+		con = a_elem[1] * split;
+		b1 = con - a_elem[1];
+		b1 = con - b1;
+		b2 = a_elem[1] - b1;
+
+		head_t2 = x_elem[0] * a_elem[1];
+		tail_t2 =
+		  (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
+	      }
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_t1 + head_t2;
+		bv = s1 - head_t1;
+		s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_t1 + tail_t2;
+		bv = t1 - tail_t1;
+		t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t1 = t1 + t2;
+		tail_t1 = t2 - (head_t1 - t1);
+	      }
+	      head_prod[1] = head_t1;
+	      tail_prod[1] = tail_t1;
+	    }
+	    {
+	      double head_t, tail_t;
+	      double head_a, tail_a;
+	      double head_b, tail_b;
+	      /* Real part */
+	      head_a = head_sum2[0];
+	      tail_a = tail_sum2[0];
+	      head_b = head_prod[0];
+	      tail_b = tail_prod[0];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum2[0] = head_t;
+	      tail_sum2[0] = tail_t;
+	      /* Imaginary part */
+	      head_a = head_sum2[1];
+	      tail_a = tail_sum2[1];
+	      head_b = head_prod[1];
+	      tail_b = tail_prod[1];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum2[1] = head_t;
+	      tail_sum2[1] = tail_t;
+	    }
+	    aij += incaij;
+	    jx += incx;
+	  }
+	}
+
+	{
+	  /* Compute complex-extra = complex-extra * complex-double. */
+	  double head_a0, tail_a0;
+	  double head_a1, tail_a1;
+	  double head_t1, tail_t1;
+	  double head_t2, tail_t2;
+	  head_a0 = head_sum1[0];
+	  tail_a0 = tail_sum1[0];
+	  head_a1 = head_sum1[1];
+	  tail_a1 = tail_sum1[1];
+	  /* real part */
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_a0 * split;
+	    a11 = con - head_a0;
+	    a11 = con - a11;
+	    a21 = head_a0 - a11;
+	    con = alpha_i[0] * split;
+	    b1 = con - alpha_i[0];
+	    b1 = con - b1;
+	    b2 = alpha_i[0] - b1;
+
+	    c11 = head_a0 * alpha_i[0];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_a0 * alpha_i[0];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_a1 * split;
+	    a11 = con - head_a1;
+	    a11 = con - a11;
+	    a21 = head_a1 - a11;
+	    con = alpha_i[1] * split;
+	    b1 = con - alpha_i[1];
+	    b1 = con - b1;
+	    b2 = alpha_i[1] - b1;
+
+	    c11 = head_a1 * alpha_i[1];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_a1 * alpha_i[1];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t2 = t1 + t2;
+	    tail_t2 = t2 - (head_t2 - t1);
+	  }
+	  head_t2 = -head_t2;
+	  tail_t2 = -tail_t2;
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_t1 + head_t2;
+	    bv = s1 - head_t1;
+	    s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_t1 + tail_t2;
+	    bv = t1 - tail_t1;
+	    t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  head_tmp1[0] = head_t1;
+	  tail_tmp1[0] = tail_t1;
+	  /* imaginary part */
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_a1 * split;
+	    a11 = con - head_a1;
+	    a11 = con - a11;
+	    a21 = head_a1 - a11;
+	    con = alpha_i[0] * split;
+	    b1 = con - alpha_i[0];
+	    b1 = con - b1;
+	    b2 = alpha_i[0] - b1;
+
+	    c11 = head_a1 * alpha_i[0];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_a1 * alpha_i[0];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_a0 * split;
+	    a11 = con - head_a0;
+	    a11 = con - a11;
+	    a21 = head_a0 - a11;
+	    con = alpha_i[1] * split;
+	    b1 = con - alpha_i[1];
+	    b1 = con - b1;
+	    b2 = alpha_i[1] - b1;
+
+	    c11 = head_a0 * alpha_i[1];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_a0 * alpha_i[1];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t2 = t1 + t2;
+	    tail_t2 = t2 - (head_t2 - t1);
+	  }
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_t1 + head_t2;
+	    bv = s1 - head_t1;
+	    s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_t1 + tail_t2;
+	    bv = t1 - tail_t1;
+	    t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  head_tmp1[1] = head_t1;
+	  tail_tmp1[1] = tail_t1;
+	}
+
+	{
+	  /* Compute complex-extra = complex-extra * complex-double. */
+	  double head_a0, tail_a0;
+	  double head_a1, tail_a1;
+	  double head_t1, tail_t1;
+	  double head_t2, tail_t2;
+	  head_a0 = head_sum2[0];
+	  tail_a0 = tail_sum2[0];
+	  head_a1 = head_sum2[1];
+	  tail_a1 = tail_sum2[1];
+	  /* real part */
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_a0 * split;
+	    a11 = con - head_a0;
+	    a11 = con - a11;
+	    a21 = head_a0 - a11;
+	    con = alpha_i[0] * split;
+	    b1 = con - alpha_i[0];
+	    b1 = con - b1;
+	    b2 = alpha_i[0] - b1;
+
+	    c11 = head_a0 * alpha_i[0];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_a0 * alpha_i[0];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_a1 * split;
+	    a11 = con - head_a1;
+	    a11 = con - a11;
+	    a21 = head_a1 - a11;
+	    con = alpha_i[1] * split;
+	    b1 = con - alpha_i[1];
+	    b1 = con - b1;
+	    b2 = alpha_i[1] - b1;
+
+	    c11 = head_a1 * alpha_i[1];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_a1 * alpha_i[1];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t2 = t1 + t2;
+	    tail_t2 = t2 - (head_t2 - t1);
+	  }
+	  head_t2 = -head_t2;
+	  tail_t2 = -tail_t2;
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_t1 + head_t2;
+	    bv = s1 - head_t1;
+	    s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_t1 + tail_t2;
+	    bv = t1 - tail_t1;
+	    t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  head_tmp2[0] = head_t1;
+	  tail_tmp2[0] = tail_t1;
+	  /* imaginary part */
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_a1 * split;
+	    a11 = con - head_a1;
+	    a11 = con - a11;
+	    a21 = head_a1 - a11;
+	    con = alpha_i[0] * split;
+	    b1 = con - alpha_i[0];
+	    b1 = con - b1;
+	    b2 = alpha_i[0] - b1;
+
+	    c11 = head_a1 * alpha_i[0];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_a1 * alpha_i[0];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_a0 * split;
+	    a11 = con - head_a0;
+	    a11 = con - a11;
+	    a21 = head_a0 - a11;
+	    con = alpha_i[1] * split;
+	    b1 = con - alpha_i[1];
+	    b1 = con - b1;
+	    b2 = alpha_i[1] - b1;
+
+	    c11 = head_a0 * alpha_i[1];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_a0 * alpha_i[1];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t2 = t1 + t2;
+	    tail_t2 = t2 - (head_t2 - t1);
+	  }
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_t1 + head_t2;
+	    bv = s1 - head_t1;
+	    s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_t1 + tail_t2;
+	    bv = t1 - tail_t1;
+	    t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  head_tmp2[1] = head_t1;
+	  tail_tmp2[1] = tail_t1;
+	}
+
+	{
+	  double head_t, tail_t;
+	  double head_a, tail_a;
+	  double head_b, tail_b;
+	  /* Real part */
+	  head_a = head_tmp1[0];
+	  tail_a = tail_tmp1[0];
+	  head_b = head_tmp2[0];
+	  tail_b = tail_tmp2[0];
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_a + head_b;
+	    bv = s1 - head_a;
+	    s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_a + tail_b;
+	    bv = t1 - tail_a;
+	    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t = t1 + t2;
+	    tail_t = t2 - (head_t - t1);
+	  }
+	  head_tmp3[0] = head_t;
+	  tail_tmp3[0] = tail_t;
+	  /* Imaginary part */
+	  head_a = head_tmp1[1];
+	  tail_a = tail_tmp1[1];
+	  head_b = head_tmp2[1];
+	  tail_b = tail_tmp2[1];
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_a + head_b;
+	    bv = s1 - head_a;
+	    s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_a + tail_b;
+	    bv = t1 - tail_a;
+	    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t = t1 + t2;
+	    tail_t = t2 - (head_t - t1);
+	  }
+	  head_tmp3[1] = head_t;
+	  tail_tmp3[1] = tail_t;
+	}
+	y_elem[0] = y_i[iy];
+	y_elem[1] = y_i[iy + 1];
+	{
+	  /* Compute complex-extra = complex-double * complex-double. */
+	  double head_t1, tail_t1;
+	  double head_t2, tail_t2;
+	  /* Real part */
+	  {
+	    /* Compute double_double = double * double. */
+	    double a1, a2, b1, b2, con;
+
+	    con = beta_i[0] * split;
+	    a1 = con - beta_i[0];
+	    a1 = con - a1;
+	    a2 = beta_i[0] - a1;
+	    con = y_elem[0] * split;
+	    b1 = con - y_elem[0];
+	    b1 = con - b1;
+	    b2 = y_elem[0] - b1;
+
+	    head_t1 = beta_i[0] * y_elem[0];
+	    tail_t1 = (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
+	  }
+	  {
+	    /* Compute double_double = double * double. */
+	    double a1, a2, b1, b2, con;
+
+	    con = beta_i[1] * split;
+	    a1 = con - beta_i[1];
+	    a1 = con - a1;
+	    a2 = beta_i[1] - a1;
+	    con = y_elem[1] * split;
+	    b1 = con - y_elem[1];
+	    b1 = con - b1;
+	    b2 = y_elem[1] - b1;
+
+	    head_t2 = beta_i[1] * y_elem[1];
+	    tail_t2 = (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
+	  }
+	  head_t2 = -head_t2;
+	  tail_t2 = -tail_t2;
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_t1 + head_t2;
+	    bv = s1 - head_t1;
+	    s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_t1 + tail_t2;
+	    bv = t1 - tail_t1;
+	    t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  head_tmp4[0] = head_t1;
+	  tail_tmp4[0] = tail_t1;
+	  /* Imaginary part */
+	  {
+	    /* Compute double_double = double * double. */
+	    double a1, a2, b1, b2, con;
+
+	    con = beta_i[1] * split;
+	    a1 = con - beta_i[1];
+	    a1 = con - a1;
+	    a2 = beta_i[1] - a1;
+	    con = y_elem[0] * split;
+	    b1 = con - y_elem[0];
+	    b1 = con - b1;
+	    b2 = y_elem[0] - b1;
+
+	    head_t1 = beta_i[1] * y_elem[0];
+	    tail_t1 = (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
+	  }
+	  {
+	    /* Compute double_double = double * double. */
+	    double a1, a2, b1, b2, con;
+
+	    con = beta_i[0] * split;
+	    a1 = con - beta_i[0];
+	    a1 = con - a1;
+	    a2 = beta_i[0] - a1;
+	    con = y_elem[1] * split;
+	    b1 = con - y_elem[1];
+	    b1 = con - b1;
+	    b2 = y_elem[1] - b1;
+
+	    head_t2 = beta_i[0] * y_elem[1];
+	    tail_t2 = (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
+	  }
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_t1 + head_t2;
+	    bv = s1 - head_t1;
+	    s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_t1 + tail_t2;
+	    bv = t1 - tail_t1;
+	    t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  head_tmp4[1] = head_t1;
+	  tail_tmp4[1] = tail_t1;
+	}
+	{
+	  double head_a, tail_a;
+	  double head_b, tail_b;
+	  /* Real part */
+	  head_a = head_tmp4[0];
+	  tail_a = tail_tmp4[0];
+	  head_b = head_tmp3[0];
+	  tail_b = tail_tmp3[0];
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_a + head_b;
+	    bv = s1 - head_a;
+	    s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_a + tail_b;
+	    bv = t1 - tail_a;
+	    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    result[0] = t1 + t2;
+	  }
+	  /* Imaginary part */
+	  head_a = head_tmp4[1];
+	  tail_a = tail_tmp4[1];
+	  head_b = head_tmp3[1];
+	  tail_b = tail_tmp3[1];
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_a + head_b;
+	    bv = s1 - head_a;
+	    s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_a + tail_b;
+	    bv = t1 - tail_a;
+	    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    result[1] = t1 + t2;
+	  }
+	}
+	y_i[iy] = result[0];
+	y_i[iy + 1] = result[1];
+
+	iy += incy;
+	if (i >= lbound) {
+	  ix0 += incx;
+	  ai += incai2;
+	  la++;
+	} else {
+	  ai += incai1;
+	}
+	if (i < rbound) {
+	  ra++;
+	}
+      }
+
+      FPU_FIX_STOP;
+    }
+    break;
+  }
+}				/* end BLAS_zgbmv2_x */
diff --git a/XBLAS/src/gbmv2/BLAS_zgbmv2_z_c-f2c.c b/XBLAS/src/gbmv2/BLAS_zgbmv2_z_c-f2c.c
new file mode 100644
index 00000000..e51d9551
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_zgbmv2_z_c-f2c.c
@@ -0,0 +1,18 @@
+
+#include "f2c-bridge.h"
+#include "blas_enum.h"
+void BLAS_zgbmv2_z_c(enum blas_order_type order, enum blas_trans_type trans,
+		     int m, int n, int kl, int ku, const void *alpha,
+		     const void *a, int lda, const void *head_x,
+		     const void *tail_x, int incx, const void *beta,
+		     void *y, int incy);
+
+
+extern void FC_FUNC_(blas_zgbmv2_z_c, BLAS_ZGBMV2_Z_C)
+ 
+  (int *trans, int *m, int *n, int *kl, int *ku, const void *alpha,
+   const void *a, int *lda, const void *head_x, const void *tail_x, int *incx,
+   const void *beta, void *y, int *incy) {
+  BLAS_zgbmv2_z_c(blas_colmajor, (enum blas_trans_type) *trans, *m, *n, *kl,
+		  *ku, alpha, a, *lda, head_x, tail_x, *incx, beta, y, *incy);
+}
diff --git a/XBLAS/src/gbmv2/BLAS_zgbmv2_z_c.c b/XBLAS/src/gbmv2/BLAS_zgbmv2_z_c.c
new file mode 100644
index 00000000..6581d257
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_zgbmv2_z_c.c
@@ -0,0 +1,314 @@
+#include "blas_extended.h"
+#include "blas_extended_private.h"
+void BLAS_zgbmv2_z_c(enum blas_order_type order, enum blas_trans_type trans,
+		     int m, int n, int kl, int ku, const void *alpha,
+		     const void *a, int lda, const void *head_x,
+		     const void *tail_x, int incx, const void *beta,
+		     void *y, int incy)
+
+/*           
+ * Purpose
+ * =======
+ *
+ * This routines computes the matrix product:
+ *
+ *     y  <-  alpha * op(A) * (x_head + x_tail) + beta * y
+ * 
+ * where 
+ *
+ *  A is a m x n banded matrix
+ *  x is a n x 1 vector
+ *  y is a m x 1 vector
+ *  alpha and beta are scalars 
+ *   
+ * Arguments
+ * =========
+ *
+ * order        (input) blas_order_type
+ *              Order of AB; row or column major
+ *
+ * trans        (input) blas_trans_type
+ *              Transpose of AB; no trans, 
+ *              trans, or conjugate trans
+ *
+ * m            (input) int
+ *              Dimension of AB
+ *
+ * n            (input) int
+ *              Dimension of AB and the length of vector x and z
+ *
+ * kl           (input) int 
+ *              Number of lower diagnols of AB
+ *
+ * ku           (input) int
+ *              Number of upper diagnols of AB
+ *
+ * alpha        (input) const void*
+ *              
+ * AB           (input) void*
+ *
+ * lda          (input) int 
+ *              Leading dimension of AB
+ *              lda >= ku + kl + 1
+ *
+ * head_x
+ * tail_x       (input) void*
+ * 
+ * incx         (input) int
+ *              The stride for vector x.
+ *
+ * beta         (input) const void*
+ *
+ * y            (input) const void*
+ *
+ * incy         (input) int
+ *              The stride for vector y.
+ * 
+ *
+ * LOCAL VARIABLES 
+ * ===============
+ * 
+ *  As an example, these variables are described on the mxn, column 
+ *  major, banded matrix described in section 2.2.3 of the specification  
+ *
+ *  astart      indexes first element in A where computation begins
+ *
+ *  incai1      indexes first element in row where row is less than lbound
+ * 
+ *  incai2      indexes first element in row where row exceeds lbound
+ *   
+ *  lbound      denotes the number of rows before  first element shifts 
+ *
+ *  rbound      denotes the columns where there is blank space
+ *   
+ *  ra          index of the rightmost element for a given row
+ *  
+ *  la          index of leftmost  elements for a given row
+ *
+ *  ra - la     width of a row
+ *
+ *                        rbound 
+ *            la   ra    ____|_____ 
+ *             |    |   |          |
+ *         |  a00  a01   *    *   *
+ * lbound -|  a10  a11  a12   *   *
+ *         |  a20  a21  a22  a23  *
+ *             *   a31  a32  a33 a34
+ *             *    *   a42  a43 a44
+ *
+ *  Varations on order and transpose have been implemented by modifying these
+ *  local variables. 
+ *
+ */
+{
+  static const char routine_name[] = "BLAS_zgbmv2_z_c";
+
+  int iy0, iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny;
+  int incaij, aij, incai1, incai2, astart, ai;
+  double *y_i = (double *) y;
+  const double *a_i = (double *) a;
+  const float *head_x_i = (float *) head_x;
+  const float *tail_x_i = (float *) tail_x;
+  double *alpha_i = (double *) alpha;
+  double *beta_i = (double *) beta;
+  double tmp1[2];
+  double tmp2[2];
+  double tmp3[2];
+  double tmp4[2];
+  double result[2];
+  double sum1[2];
+  double sum2[2];
+  double prod[2];
+  double a_elem[2];
+  float x_elem[2];
+  double y_elem[2];
+
+
+  if (order != blas_colmajor && order != blas_rowmajor)
+    BLAS_error(routine_name, -1, order, NULL);
+  if (trans != blas_no_trans &&
+      trans != blas_trans && trans != blas_conj_trans) {
+    BLAS_error(routine_name, -2, trans, NULL);
+  }
+  if (m < 0)
+    BLAS_error(routine_name, -3, m, NULL);
+  if (n < 0)
+    BLAS_error(routine_name, -4, n, NULL);
+  if (kl < 0 || kl >= m)
+    BLAS_error(routine_name, -5, kl, NULL);
+  if (ku < 0 || ku >= n)
+    BLAS_error(routine_name, -6, ku, NULL);
+  if (lda < kl + ku + 1)
+    BLAS_error(routine_name, -9, lda, NULL);
+  if (incx == 0)
+    BLAS_error(routine_name, -12, incx, NULL);
+  if (incy == 0)
+    BLAS_error(routine_name, -15, incy, NULL);
+
+  if (m == 0 || n == 0)
+    return;
+  if ((alpha_i[0] == 0.0 && alpha_i[1] == 0.0)
+      && ((beta_i[0] == 1.0 && beta_i[1] == 0.0)))
+    return;
+
+  if (trans == blas_no_trans) {
+    lenx = n;
+    leny = m;
+  } else {
+    lenx = m;
+    leny = n;
+  }
+
+  ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
+  iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;
+
+
+
+  /* if alpha = 0, return y = y*beta */
+  if ((order == blas_colmajor) && (trans == blas_no_trans)) {
+    astart = ku;
+    incai1 = 1;
+    incai2 = lda;
+    incaij = lda - 1;
+    lbound = kl;
+    rbound = n - ku - 1;
+    ra = ku;
+  } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
+    astart = ku;
+    incai1 = lda - 1;
+    incai2 = lda;
+    incaij = 1;
+    lbound = ku;
+    rbound = m - kl - 1;
+    ra = kl;
+  } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
+    astart = kl;
+    incai1 = lda - 1;
+    incai2 = lda;
+    incaij = 1;
+    lbound = kl;
+    rbound = n - ku - 1;
+    ra = ku;
+  } else {			/* rowmajor and blas_trans */
+    astart = kl;
+    incai1 = 1;
+    incai2 = lda;
+    incaij = lda - 1;
+    lbound = ku;
+    rbound = m - kl - 1;
+    ra = kl;
+  }
+  incx *= 2;
+  incy *= 2;
+  incaij *= 2;
+  incai1 *= 2;
+  incai2 *= 2;
+  astart *= 2;
+  iy0 *= 2;
+  ix0 *= 2;
+
+  la = 0;
+  ai = astart;
+  iy = iy0;
+  for (i = 0; i < leny; i++) {
+    sum1[0] = sum1[1] = 0.0;
+    sum2[0] = sum2[1] = 0.0;
+    aij = ai;
+    jx = ix0;
+    if (trans != blas_conj_trans) {
+      for (j = ra - la; j >= 0; j--) {
+	x_elem[0] = head_x_i[jx];
+	x_elem[1] = head_x_i[jx + 1];
+	a_elem[0] = a_i[aij];
+	a_elem[1] = a_i[aij + 1];
+	{
+	  prod[0] =
+	    (double) x_elem[0] * a_elem[0] - (double) x_elem[1] * a_elem[1];
+	  prod[1] =
+	    (double) x_elem[0] * a_elem[1] + (double) x_elem[1] * a_elem[0];
+	}
+	sum1[0] = sum1[0] + prod[0];
+	sum1[1] = sum1[1] + prod[1];
+	x_elem[0] = tail_x_i[jx];
+	x_elem[1] = tail_x_i[jx + 1];
+	{
+	  prod[0] =
+	    (double) x_elem[0] * a_elem[0] - (double) x_elem[1] * a_elem[1];
+	  prod[1] =
+	    (double) x_elem[0] * a_elem[1] + (double) x_elem[1] * a_elem[0];
+	}
+	sum2[0] = sum2[0] + prod[0];
+	sum2[1] = sum2[1] + prod[1];
+	aij += incaij;
+	jx += incx;
+      }
+
+    } else {
+      for (j = ra - la; j >= 0; j--) {
+	x_elem[0] = head_x_i[jx];
+	x_elem[1] = head_x_i[jx + 1];
+	a_elem[0] = a_i[aij];
+	a_elem[1] = a_i[aij + 1];
+	a_elem[1] = -a_elem[1];
+	{
+	  prod[0] =
+	    (double) x_elem[0] * a_elem[0] - (double) x_elem[1] * a_elem[1];
+	  prod[1] =
+	    (double) x_elem[0] * a_elem[1] + (double) x_elem[1] * a_elem[0];
+	}
+	sum1[0] = sum1[0] + prod[0];
+	sum1[1] = sum1[1] + prod[1];
+	x_elem[0] = tail_x_i[jx];
+	x_elem[1] = tail_x_i[jx + 1];
+	{
+	  prod[0] =
+	    (double) x_elem[0] * a_elem[0] - (double) x_elem[1] * a_elem[1];
+	  prod[1] =
+	    (double) x_elem[0] * a_elem[1] + (double) x_elem[1] * a_elem[0];
+	}
+	sum2[0] = sum2[0] + prod[0];
+	sum2[1] = sum2[1] + prod[1];
+	aij += incaij;
+	jx += incx;
+      }
+    }
+
+    {
+      tmp1[0] = (double) sum1[0] * alpha_i[0] - (double) sum1[1] * alpha_i[1];
+      tmp1[1] = (double) sum1[0] * alpha_i[1] + (double) sum1[1] * alpha_i[0];
+    }
+    {
+      tmp2[0] = (double) sum2[0] * alpha_i[0] - (double) sum2[1] * alpha_i[1];
+      tmp2[1] = (double) sum2[0] * alpha_i[1] + (double) sum2[1] * alpha_i[0];
+    }
+    tmp3[0] = tmp1[0] + tmp2[0];
+    tmp3[1] = tmp1[1] + tmp2[1];
+    y_elem[0] = y_i[iy];
+    y_elem[1] = y_i[iy + 1];
+    {
+      tmp4[0] =
+	(double) beta_i[0] * y_elem[0] - (double) beta_i[1] * y_elem[1];
+      tmp4[1] =
+	(double) beta_i[0] * y_elem[1] + (double) beta_i[1] * y_elem[0];
+    }
+    result[0] = tmp4[0] + tmp3[0];
+    result[1] = tmp4[1] + tmp3[1];
+    y_i[iy] = result[0];
+    y_i[iy + 1] = result[1];
+
+    iy += incy;
+    if (i >= lbound) {
+      ix0 += incx;
+      ai += incai2;
+      la++;
+    } else {
+      ai += incai1;
+    }
+    if (i < rbound) {
+      ra++;
+    }
+  }
+
+
+
+}				/* end BLAS_zgbmv2_z_c */
diff --git a/XBLAS/src/gbmv2/BLAS_zgbmv2_z_c_x-f2c.c b/XBLAS/src/gbmv2/BLAS_zgbmv2_z_c_x-f2c.c
new file mode 100644
index 00000000..45dbc677
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_zgbmv2_z_c_x-f2c.c
@@ -0,0 +1,19 @@
+
+#include "f2c-bridge.h"
+#include "blas_enum.h"
+void BLAS_zgbmv2_z_c_x(enum blas_order_type order, enum blas_trans_type trans,
+		       int m, int n, int kl, int ku, const void *alpha,
+		       const void *a, int lda, const void *head_x,
+		       const void *tail_x, int incx, const void *beta,
+		       void *y, int incy, enum blas_prec_type prec);
+
+
+extern void FC_FUNC_(blas_zgbmv2_z_c_x, BLAS_ZGBMV2_Z_C_X)
+ 
+  (int *trans, int *m, int *n, int *kl, int *ku, const void *alpha,
+   const void *a, int *lda, const void *head_x, const void *tail_x, int *incx,
+   const void *beta, void *y, int *incy, int *prec) {
+  BLAS_zgbmv2_z_c_x(blas_colmajor, (enum blas_trans_type) *trans, *m, *n, *kl,
+		    *ku, alpha, a, *lda, head_x, tail_x, *incx, beta, y,
+		    *incy, (enum blas_prec_type) *prec);
+}
diff --git a/XBLAS/src/gbmv2/BLAS_zgbmv2_z_c_x.c b/XBLAS/src/gbmv2/BLAS_zgbmv2_z_c_x.c
new file mode 100644
index 00000000..5b0f1841
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_zgbmv2_z_c_x.c
@@ -0,0 +1,1956 @@
+#include "blas_extended.h"
+#include "blas_extended_private.h"
+void BLAS_zgbmv2_z_c_x(enum blas_order_type order, enum blas_trans_type trans,
+		       int m, int n, int kl, int ku, const void *alpha,
+		       const void *a, int lda, const void *head_x,
+		       const void *tail_x, int incx, const void *beta,
+		       void *y, int incy, enum blas_prec_type prec)
+
+/*           
+ * Purpose
+ * =======
+ *
+ * This routines computes the matrix product:
+ *
+ *     y  <-  alpha * op(A) * (x_head + x_tail) + beta * y
+ * 
+ * where 
+ *
+ *  A is a m x n banded matrix
+ *  x is a n x 1 vector
+ *  y is a m x 1 vector
+ *  alpha and beta are scalars 
+ *   
+ * Arguments
+ * =========
+ *
+ * order        (input) blas_order_type
+ *              Order of AB; row or column major
+ *
+ * trans        (input) blas_trans_type
+ *              Transpose of AB; no trans, 
+ *              trans, or conjugate trans
+ *
+ * m            (input) int
+ *              Dimension of AB
+ *
+ * n            (input) int
+ *              Dimension of AB and the length of vector x and z
+ *
+ * kl           (input) int 
+ *              Number of lower diagnols of AB
+ *
+ * ku           (input) int
+ *              Number of upper diagnols of AB
+ *
+ * alpha        (input) const void*
+ *              
+ * AB           (input) void*
+ *
+ * lda          (input) int 
+ *              Leading dimension of AB
+ *              lda >= ku + kl + 1
+ *
+ * head_x
+ * tail_x       (input) void*
+ * 
+ * incx         (input) int
+ *              The stride for vector x.
+ *
+ * beta         (input) const void*
+ *
+ * y            (input) const void*
+ *
+ * incy         (input) int
+ *              The stride for vector y.
+ * 
+ * prec   (input) enum blas_prec_type
+ *        Specifies the internal precision to be used.
+ *        = blas_prec_single: single precision.
+ *        = blas_prec_double: double precision.
+ *        = blas_prec_extra : anything at least 1.5 times as accurate
+ *                            than double, and wider than 80-bits.
+ *                            We use double-double in our implementation.
+ *
+ *
+ * LOCAL VARIABLES 
+ * ===============
+ * 
+ *  As an example, these variables are described on the mxn, column 
+ *  major, banded matrix described in section 2.2.3 of the specification  
+ *
+ *  astart      indexes first element in A where computation begins
+ *
+ *  incai1      indexes first element in row where row is less than lbound
+ * 
+ *  incai2      indexes first element in row where row exceeds lbound
+ *   
+ *  lbound      denotes the number of rows before  first element shifts 
+ *
+ *  rbound      denotes the columns where there is blank space
+ *   
+ *  ra          index of the rightmost element for a given row
+ *  
+ *  la          index of leftmost  elements for a given row
+ *
+ *  ra - la     width of a row
+ *
+ *                        rbound 
+ *            la   ra    ____|_____ 
+ *             |    |   |          |
+ *         |  a00  a01   *    *   *
+ * lbound -|  a10  a11  a12   *   *
+ *         |  a20  a21  a22  a23  *
+ *             *   a31  a32  a33 a34
+ *             *    *   a42  a43 a44
+ *
+ *  Varations on order and transpose have been implemented by modifying these
+ *  local variables. 
+ *
+ */
+{
+  static const char routine_name[] = "BLAS_zgbmv2_z_c_x";
+
+  switch (prec) {
+  case blas_prec_single:
+  case blas_prec_double:
+  case blas_prec_indigenous:
+    {
+      int iy0, iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny;
+      int incaij, aij, incai1, incai2, astart, ai;
+      double *y_i = (double *) y;
+      const double *a_i = (double *) a;
+      const float *head_x_i = (float *) head_x;
+      const float *tail_x_i = (float *) tail_x;
+      double *alpha_i = (double *) alpha;
+      double *beta_i = (double *) beta;
+      double tmp1[2];
+      double tmp2[2];
+      double tmp3[2];
+      double tmp4[2];
+      double result[2];
+      double sum1[2];
+      double sum2[2];
+      double prod[2];
+      double a_elem[2];
+      float x_elem[2];
+      double y_elem[2];
+
+
+      if (order != blas_colmajor && order != blas_rowmajor)
+	BLAS_error(routine_name, -1, order, NULL);
+      if (trans != blas_no_trans &&
+	  trans != blas_trans && trans != blas_conj_trans) {
+	BLAS_error(routine_name, -2, trans, NULL);
+      }
+      if (m < 0)
+	BLAS_error(routine_name, -3, m, NULL);
+      if (n < 0)
+	BLAS_error(routine_name, -4, n, NULL);
+      if (kl < 0 || kl >= m)
+	BLAS_error(routine_name, -5, kl, NULL);
+      if (ku < 0 || ku >= n)
+	BLAS_error(routine_name, -6, ku, NULL);
+      if (lda < kl + ku + 1)
+	BLAS_error(routine_name, -9, lda, NULL);
+      if (incx == 0)
+	BLAS_error(routine_name, -12, incx, NULL);
+      if (incy == 0)
+	BLAS_error(routine_name, -15, incy, NULL);
+
+      if (m == 0 || n == 0)
+	return;
+      if ((alpha_i[0] == 0.0 && alpha_i[1] == 0.0)
+	  && ((beta_i[0] == 1.0 && beta_i[1] == 0.0)))
+	return;
+
+      if (trans == blas_no_trans) {
+	lenx = n;
+	leny = m;
+      } else {
+	lenx = m;
+	leny = n;
+      }
+
+      ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
+      iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;
+
+
+
+      /* if alpha = 0, return y = y*beta */
+      if ((order == blas_colmajor) && (trans == blas_no_trans)) {
+	astart = ku;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
+	astart = ku;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
+	astart = kl;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else {			/* rowmajor and blas_trans */
+	astart = kl;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      }
+      incx *= 2;
+      incy *= 2;
+      incaij *= 2;
+      incai1 *= 2;
+      incai2 *= 2;
+      astart *= 2;
+      iy0 *= 2;
+      ix0 *= 2;
+
+      la = 0;
+      ai = astart;
+      iy = iy0;
+      for (i = 0; i < leny; i++) {
+	sum1[0] = sum1[1] = 0.0;
+	sum2[0] = sum2[1] = 0.0;
+	aij = ai;
+	jx = ix0;
+	if (trans != blas_conj_trans) {
+	  for (j = ra - la; j >= 0; j--) {
+	    x_elem[0] = head_x_i[jx];
+	    x_elem[1] = head_x_i[jx + 1];
+	    a_elem[0] = a_i[aij];
+	    a_elem[1] = a_i[aij + 1];
+	    {
+	      prod[0] =
+		(double) x_elem[0] * a_elem[0] -
+		(double) x_elem[1] * a_elem[1];
+	      prod[1] =
+		(double) x_elem[0] * a_elem[1] +
+		(double) x_elem[1] * a_elem[0];
+	    }
+	    sum1[0] = sum1[0] + prod[0];
+	    sum1[1] = sum1[1] + prod[1];
+	    x_elem[0] = tail_x_i[jx];
+	    x_elem[1] = tail_x_i[jx + 1];
+	    {
+	      prod[0] =
+		(double) x_elem[0] * a_elem[0] -
+		(double) x_elem[1] * a_elem[1];
+	      prod[1] =
+		(double) x_elem[0] * a_elem[1] +
+		(double) x_elem[1] * a_elem[0];
+	    }
+	    sum2[0] = sum2[0] + prod[0];
+	    sum2[1] = sum2[1] + prod[1];
+	    aij += incaij;
+	    jx += incx;
+	  }
+
+	} else {
+	  for (j = ra - la; j >= 0; j--) {
+	    x_elem[0] = head_x_i[jx];
+	    x_elem[1] = head_x_i[jx + 1];
+	    a_elem[0] = a_i[aij];
+	    a_elem[1] = a_i[aij + 1];
+	    a_elem[1] = -a_elem[1];
+	    {
+	      prod[0] =
+		(double) x_elem[0] * a_elem[0] -
+		(double) x_elem[1] * a_elem[1];
+	      prod[1] =
+		(double) x_elem[0] * a_elem[1] +
+		(double) x_elem[1] * a_elem[0];
+	    }
+	    sum1[0] = sum1[0] + prod[0];
+	    sum1[1] = sum1[1] + prod[1];
+	    x_elem[0] = tail_x_i[jx];
+	    x_elem[1] = tail_x_i[jx + 1];
+	    {
+	      prod[0] =
+		(double) x_elem[0] * a_elem[0] -
+		(double) x_elem[1] * a_elem[1];
+	      prod[1] =
+		(double) x_elem[0] * a_elem[1] +
+		(double) x_elem[1] * a_elem[0];
+	    }
+	    sum2[0] = sum2[0] + prod[0];
+	    sum2[1] = sum2[1] + prod[1];
+	    aij += incaij;
+	    jx += incx;
+	  }
+	}
+
+	{
+	  tmp1[0] =
+	    (double) sum1[0] * alpha_i[0] - (double) sum1[1] * alpha_i[1];
+	  tmp1[1] =
+	    (double) sum1[0] * alpha_i[1] + (double) sum1[1] * alpha_i[0];
+	}
+	{
+	  tmp2[0] =
+	    (double) sum2[0] * alpha_i[0] - (double) sum2[1] * alpha_i[1];
+	  tmp2[1] =
+	    (double) sum2[0] * alpha_i[1] + (double) sum2[1] * alpha_i[0];
+	}
+	tmp3[0] = tmp1[0] + tmp2[0];
+	tmp3[1] = tmp1[1] + tmp2[1];
+	y_elem[0] = y_i[iy];
+	y_elem[1] = y_i[iy + 1];
+	{
+	  tmp4[0] =
+	    (double) beta_i[0] * y_elem[0] - (double) beta_i[1] * y_elem[1];
+	  tmp4[1] =
+	    (double) beta_i[0] * y_elem[1] + (double) beta_i[1] * y_elem[0];
+	}
+	result[0] = tmp4[0] + tmp3[0];
+	result[1] = tmp4[1] + tmp3[1];
+	y_i[iy] = result[0];
+	y_i[iy + 1] = result[1];
+
+	iy += incy;
+	if (i >= lbound) {
+	  ix0 += incx;
+	  ai += incai2;
+	  la++;
+	} else {
+	  ai += incai1;
+	}
+	if (i < rbound) {
+	  ra++;
+	}
+      }
+
+
+    }
+    break;
+  case blas_prec_extra:
+    {
+      int iy0, iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny;
+      int incaij, aij, incai1, incai2, astart, ai;
+      double *y_i = (double *) y;
+      const double *a_i = (double *) a;
+      const float *head_x_i = (float *) head_x;
+      const float *tail_x_i = (float *) tail_x;
+      double *alpha_i = (double *) alpha;
+      double *beta_i = (double *) beta;
+      double head_tmp1[2], tail_tmp1[2];
+      double head_tmp2[2], tail_tmp2[2];
+      double head_tmp3[2], tail_tmp3[2];
+      double head_tmp4[2], tail_tmp4[2];
+      double result[2];
+      double head_sum1[2], tail_sum1[2];
+      double head_sum2[2], tail_sum2[2];
+      double head_prod[2], tail_prod[2];
+      double a_elem[2];
+      float x_elem[2];
+      double y_elem[2];
+      FPU_FIX_DECL;
+
+      if (order != blas_colmajor && order != blas_rowmajor)
+	BLAS_error(routine_name, -1, order, NULL);
+      if (trans != blas_no_trans &&
+	  trans != blas_trans && trans != blas_conj_trans) {
+	BLAS_error(routine_name, -2, trans, NULL);
+      }
+      if (m < 0)
+	BLAS_error(routine_name, -3, m, NULL);
+      if (n < 0)
+	BLAS_error(routine_name, -4, n, NULL);
+      if (kl < 0 || kl >= m)
+	BLAS_error(routine_name, -5, kl, NULL);
+      if (ku < 0 || ku >= n)
+	BLAS_error(routine_name, -6, ku, NULL);
+      if (lda < kl + ku + 1)
+	BLAS_error(routine_name, -9, lda, NULL);
+      if (incx == 0)
+	BLAS_error(routine_name, -12, incx, NULL);
+      if (incy == 0)
+	BLAS_error(routine_name, -15, incy, NULL);
+
+      if (m == 0 || n == 0)
+	return;
+      if ((alpha_i[0] == 0.0 && alpha_i[1] == 0.0)
+	  && ((beta_i[0] == 1.0 && beta_i[1] == 0.0)))
+	return;
+
+      if (trans == blas_no_trans) {
+	lenx = n;
+	leny = m;
+      } else {
+	lenx = m;
+	leny = n;
+      }
+
+      ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
+      iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;
+
+      FPU_FIX_START;
+
+      /* if alpha = 0, return y = y*beta */
+      if ((order == blas_colmajor) && (trans == blas_no_trans)) {
+	astart = ku;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
+	astart = ku;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
+	astart = kl;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else {			/* rowmajor and blas_trans */
+	astart = kl;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      }
+      incx *= 2;
+      incy *= 2;
+      incaij *= 2;
+      incai1 *= 2;
+      incai2 *= 2;
+      astart *= 2;
+      iy0 *= 2;
+      ix0 *= 2;
+
+      la = 0;
+      ai = astart;
+      iy = iy0;
+      for (i = 0; i < leny; i++) {
+	head_sum1[0] = head_sum1[1] = tail_sum1[0] = tail_sum1[1] = 0.0;
+	head_sum2[0] = head_sum2[1] = tail_sum2[0] = tail_sum2[1] = 0.0;
+	aij = ai;
+	jx = ix0;
+	if (trans != blas_conj_trans) {
+	  for (j = ra - la; j >= 0; j--) {
+	    x_elem[0] = head_x_i[jx];
+	    x_elem[1] = head_x_i[jx + 1];
+	    a_elem[0] = a_i[aij];
+	    a_elem[1] = a_i[aij + 1];
+	    {
+	      double cd[2];
+	      cd[0] = (double) x_elem[0];
+	      cd[1] = (double) x_elem[1];
+	      {
+		/* Compute complex-extra = complex-double * complex-double. */
+		double head_t1, tail_t1;
+		double head_t2, tail_t2;
+		/* Real part */
+		{
+		  /* Compute double_double = double * double. */
+		  double a1, a2, b1, b2, con;
+
+		  con = a_elem[0] * split;
+		  a1 = con - a_elem[0];
+		  a1 = con - a1;
+		  a2 = a_elem[0] - a1;
+		  con = cd[0] * split;
+		  b1 = con - cd[0];
+		  b1 = con - b1;
+		  b2 = cd[0] - b1;
+
+		  head_t1 = a_elem[0] * cd[0];
+		  tail_t1 =
+		    (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
+		}
+		{
+		  /* Compute double_double = double * double. */
+		  double a1, a2, b1, b2, con;
+
+		  con = a_elem[1] * split;
+		  a1 = con - a_elem[1];
+		  a1 = con - a1;
+		  a2 = a_elem[1] - a1;
+		  con = cd[1] * split;
+		  b1 = con - cd[1];
+		  b1 = con - b1;
+		  b2 = cd[1] - b1;
+
+		  head_t2 = a_elem[1] * cd[1];
+		  tail_t2 =
+		    (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
+		}
+		head_t2 = -head_t2;
+		tail_t2 = -tail_t2;
+		{
+		  /* Compute double-double = double-double + double-double. */
+		  double bv;
+		  double s1, s2, t1, t2;
+
+		  /* Add two hi words. */
+		  s1 = head_t1 + head_t2;
+		  bv = s1 - head_t1;
+		  s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+		  /* Add two lo words. */
+		  t1 = tail_t1 + tail_t2;
+		  bv = t1 - tail_t1;
+		  t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+		  s2 += t1;
+
+		  /* Renormalize (s1, s2)  to  (t1, s2) */
+		  t1 = s1 + s2;
+		  s2 = s2 - (t1 - s1);
+
+		  t2 += s2;
+
+		  /* Renormalize (t1, t2)  */
+		  head_t1 = t1 + t2;
+		  tail_t1 = t2 - (head_t1 - t1);
+		}
+		head_prod[0] = head_t1;
+		tail_prod[0] = tail_t1;
+		/* Imaginary part */
+		{
+		  /* Compute double_double = double * double. */
+		  double a1, a2, b1, b2, con;
+
+		  con = a_elem[1] * split;
+		  a1 = con - a_elem[1];
+		  a1 = con - a1;
+		  a2 = a_elem[1] - a1;
+		  con = cd[0] * split;
+		  b1 = con - cd[0];
+		  b1 = con - b1;
+		  b2 = cd[0] - b1;
+
+		  head_t1 = a_elem[1] * cd[0];
+		  tail_t1 =
+		    (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
+		}
+		{
+		  /* Compute double_double = double * double. */
+		  double a1, a2, b1, b2, con;
+
+		  con = a_elem[0] * split;
+		  a1 = con - a_elem[0];
+		  a1 = con - a1;
+		  a2 = a_elem[0] - a1;
+		  con = cd[1] * split;
+		  b1 = con - cd[1];
+		  b1 = con - b1;
+		  b2 = cd[1] - b1;
+
+		  head_t2 = a_elem[0] * cd[1];
+		  tail_t2 =
+		    (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
+		}
+		{
+		  /* Compute double-double = double-double + double-double. */
+		  double bv;
+		  double s1, s2, t1, t2;
+
+		  /* Add two hi words. */
+		  s1 = head_t1 + head_t2;
+		  bv = s1 - head_t1;
+		  s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+		  /* Add two lo words. */
+		  t1 = tail_t1 + tail_t2;
+		  bv = t1 - tail_t1;
+		  t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+		  s2 += t1;
+
+		  /* Renormalize (s1, s2)  to  (t1, s2) */
+		  t1 = s1 + s2;
+		  s2 = s2 - (t1 - s1);
+
+		  t2 += s2;
+
+		  /* Renormalize (t1, t2)  */
+		  head_t1 = t1 + t2;
+		  tail_t1 = t2 - (head_t1 - t1);
+		}
+		head_prod[1] = head_t1;
+		tail_prod[1] = tail_t1;
+	      }
+	    }
+	    {
+	      double head_t, tail_t;
+	      double head_a, tail_a;
+	      double head_b, tail_b;
+	      /* Real part */
+	      head_a = head_sum1[0];
+	      tail_a = tail_sum1[0];
+	      head_b = head_prod[0];
+	      tail_b = tail_prod[0];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum1[0] = head_t;
+	      tail_sum1[0] = tail_t;
+	      /* Imaginary part */
+	      head_a = head_sum1[1];
+	      tail_a = tail_sum1[1];
+	      head_b = head_prod[1];
+	      tail_b = tail_prod[1];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum1[1] = head_t;
+	      tail_sum1[1] = tail_t;
+	    }
+	    x_elem[0] = tail_x_i[jx];
+	    x_elem[1] = tail_x_i[jx + 1];
+	    {
+	      double cd[2];
+	      cd[0] = (double) x_elem[0];
+	      cd[1] = (double) x_elem[1];
+	      {
+		/* Compute complex-extra = complex-double * complex-double. */
+		double head_t1, tail_t1;
+		double head_t2, tail_t2;
+		/* Real part */
+		{
+		  /* Compute double_double = double * double. */
+		  double a1, a2, b1, b2, con;
+
+		  con = a_elem[0] * split;
+		  a1 = con - a_elem[0];
+		  a1 = con - a1;
+		  a2 = a_elem[0] - a1;
+		  con = cd[0] * split;
+		  b1 = con - cd[0];
+		  b1 = con - b1;
+		  b2 = cd[0] - b1;
+
+		  head_t1 = a_elem[0] * cd[0];
+		  tail_t1 =
+		    (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
+		}
+		{
+		  /* Compute double_double = double * double. */
+		  double a1, a2, b1, b2, con;
+
+		  con = a_elem[1] * split;
+		  a1 = con - a_elem[1];
+		  a1 = con - a1;
+		  a2 = a_elem[1] - a1;
+		  con = cd[1] * split;
+		  b1 = con - cd[1];
+		  b1 = con - b1;
+		  b2 = cd[1] - b1;
+
+		  head_t2 = a_elem[1] * cd[1];
+		  tail_t2 =
+		    (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
+		}
+		head_t2 = -head_t2;
+		tail_t2 = -tail_t2;
+		{
+		  /* Compute double-double = double-double + double-double. */
+		  double bv;
+		  double s1, s2, t1, t2;
+
+		  /* Add two hi words. */
+		  s1 = head_t1 + head_t2;
+		  bv = s1 - head_t1;
+		  s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+		  /* Add two lo words. */
+		  t1 = tail_t1 + tail_t2;
+		  bv = t1 - tail_t1;
+		  t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+		  s2 += t1;
+
+		  /* Renormalize (s1, s2)  to  (t1, s2) */
+		  t1 = s1 + s2;
+		  s2 = s2 - (t1 - s1);
+
+		  t2 += s2;
+
+		  /* Renormalize (t1, t2)  */
+		  head_t1 = t1 + t2;
+		  tail_t1 = t2 - (head_t1 - t1);
+		}
+		head_prod[0] = head_t1;
+		tail_prod[0] = tail_t1;
+		/* Imaginary part */
+		{
+		  /* Compute double_double = double * double. */
+		  double a1, a2, b1, b2, con;
+
+		  con = a_elem[1] * split;
+		  a1 = con - a_elem[1];
+		  a1 = con - a1;
+		  a2 = a_elem[1] - a1;
+		  con = cd[0] * split;
+		  b1 = con - cd[0];
+		  b1 = con - b1;
+		  b2 = cd[0] - b1;
+
+		  head_t1 = a_elem[1] * cd[0];
+		  tail_t1 =
+		    (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
+		}
+		{
+		  /* Compute double_double = double * double. */
+		  double a1, a2, b1, b2, con;
+
+		  con = a_elem[0] * split;
+		  a1 = con - a_elem[0];
+		  a1 = con - a1;
+		  a2 = a_elem[0] - a1;
+		  con = cd[1] * split;
+		  b1 = con - cd[1];
+		  b1 = con - b1;
+		  b2 = cd[1] - b1;
+
+		  head_t2 = a_elem[0] * cd[1];
+		  tail_t2 =
+		    (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
+		}
+		{
+		  /* Compute double-double = double-double + double-double. */
+		  double bv;
+		  double s1, s2, t1, t2;
+
+		  /* Add two hi words. */
+		  s1 = head_t1 + head_t2;
+		  bv = s1 - head_t1;
+		  s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+		  /* Add two lo words. */
+		  t1 = tail_t1 + tail_t2;
+		  bv = t1 - tail_t1;
+		  t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+		  s2 += t1;
+
+		  /* Renormalize (s1, s2)  to  (t1, s2) */
+		  t1 = s1 + s2;
+		  s2 = s2 - (t1 - s1);
+
+		  t2 += s2;
+
+		  /* Renormalize (t1, t2)  */
+		  head_t1 = t1 + t2;
+		  tail_t1 = t2 - (head_t1 - t1);
+		}
+		head_prod[1] = head_t1;
+		tail_prod[1] = tail_t1;
+	      }
+	    }
+	    {
+	      double head_t, tail_t;
+	      double head_a, tail_a;
+	      double head_b, tail_b;
+	      /* Real part */
+	      head_a = head_sum2[0];
+	      tail_a = tail_sum2[0];
+	      head_b = head_prod[0];
+	      tail_b = tail_prod[0];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum2[0] = head_t;
+	      tail_sum2[0] = tail_t;
+	      /* Imaginary part */
+	      head_a = head_sum2[1];
+	      tail_a = tail_sum2[1];
+	      head_b = head_prod[1];
+	      tail_b = tail_prod[1];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum2[1] = head_t;
+	      tail_sum2[1] = tail_t;
+	    }
+	    aij += incaij;
+	    jx += incx;
+	  }
+
+	} else {
+	  for (j = ra - la; j >= 0; j--) {
+	    x_elem[0] = head_x_i[jx];
+	    x_elem[1] = head_x_i[jx + 1];
+	    a_elem[0] = a_i[aij];
+	    a_elem[1] = a_i[aij + 1];
+	    a_elem[1] = -a_elem[1];
+	    {
+	      double cd[2];
+	      cd[0] = (double) x_elem[0];
+	      cd[1] = (double) x_elem[1];
+	      {
+		/* Compute complex-extra = complex-double * complex-double. */
+		double head_t1, tail_t1;
+		double head_t2, tail_t2;
+		/* Real part */
+		{
+		  /* Compute double_double = double * double. */
+		  double a1, a2, b1, b2, con;
+
+		  con = a_elem[0] * split;
+		  a1 = con - a_elem[0];
+		  a1 = con - a1;
+		  a2 = a_elem[0] - a1;
+		  con = cd[0] * split;
+		  b1 = con - cd[0];
+		  b1 = con - b1;
+		  b2 = cd[0] - b1;
+
+		  head_t1 = a_elem[0] * cd[0];
+		  tail_t1 =
+		    (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
+		}
+		{
+		  /* Compute double_double = double * double. */
+		  double a1, a2, b1, b2, con;
+
+		  con = a_elem[1] * split;
+		  a1 = con - a_elem[1];
+		  a1 = con - a1;
+		  a2 = a_elem[1] - a1;
+		  con = cd[1] * split;
+		  b1 = con - cd[1];
+		  b1 = con - b1;
+		  b2 = cd[1] - b1;
+
+		  head_t2 = a_elem[1] * cd[1];
+		  tail_t2 =
+		    (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
+		}
+		head_t2 = -head_t2;
+		tail_t2 = -tail_t2;
+		{
+		  /* Compute double-double = double-double + double-double. */
+		  double bv;
+		  double s1, s2, t1, t2;
+
+		  /* Add two hi words. */
+		  s1 = head_t1 + head_t2;
+		  bv = s1 - head_t1;
+		  s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+		  /* Add two lo words. */
+		  t1 = tail_t1 + tail_t2;
+		  bv = t1 - tail_t1;
+		  t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+		  s2 += t1;
+
+		  /* Renormalize (s1, s2)  to  (t1, s2) */
+		  t1 = s1 + s2;
+		  s2 = s2 - (t1 - s1);
+
+		  t2 += s2;
+
+		  /* Renormalize (t1, t2)  */
+		  head_t1 = t1 + t2;
+		  tail_t1 = t2 - (head_t1 - t1);
+		}
+		head_prod[0] = head_t1;
+		tail_prod[0] = tail_t1;
+		/* Imaginary part */
+		{
+		  /* Compute double_double = double * double. */
+		  double a1, a2, b1, b2, con;
+
+		  con = a_elem[1] * split;
+		  a1 = con - a_elem[1];
+		  a1 = con - a1;
+		  a2 = a_elem[1] - a1;
+		  con = cd[0] * split;
+		  b1 = con - cd[0];
+		  b1 = con - b1;
+		  b2 = cd[0] - b1;
+
+		  head_t1 = a_elem[1] * cd[0];
+		  tail_t1 =
+		    (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
+		}
+		{
+		  /* Compute double_double = double * double. */
+		  double a1, a2, b1, b2, con;
+
+		  con = a_elem[0] * split;
+		  a1 = con - a_elem[0];
+		  a1 = con - a1;
+		  a2 = a_elem[0] - a1;
+		  con = cd[1] * split;
+		  b1 = con - cd[1];
+		  b1 = con - b1;
+		  b2 = cd[1] - b1;
+
+		  head_t2 = a_elem[0] * cd[1];
+		  tail_t2 =
+		    (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
+		}
+		{
+		  /* Compute double-double = double-double + double-double. */
+		  double bv;
+		  double s1, s2, t1, t2;
+
+		  /* Add two hi words. */
+		  s1 = head_t1 + head_t2;
+		  bv = s1 - head_t1;
+		  s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+		  /* Add two lo words. */
+		  t1 = tail_t1 + tail_t2;
+		  bv = t1 - tail_t1;
+		  t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+		  s2 += t1;
+
+		  /* Renormalize (s1, s2)  to  (t1, s2) */
+		  t1 = s1 + s2;
+		  s2 = s2 - (t1 - s1);
+
+		  t2 += s2;
+
+		  /* Renormalize (t1, t2)  */
+		  head_t1 = t1 + t2;
+		  tail_t1 = t2 - (head_t1 - t1);
+		}
+		head_prod[1] = head_t1;
+		tail_prod[1] = tail_t1;
+	      }
+	    }
+	    {
+	      double head_t, tail_t;
+	      double head_a, tail_a;
+	      double head_b, tail_b;
+	      /* Real part */
+	      head_a = head_sum1[0];
+	      tail_a = tail_sum1[0];
+	      head_b = head_prod[0];
+	      tail_b = tail_prod[0];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum1[0] = head_t;
+	      tail_sum1[0] = tail_t;
+	      /* Imaginary part */
+	      head_a = head_sum1[1];
+	      tail_a = tail_sum1[1];
+	      head_b = head_prod[1];
+	      tail_b = tail_prod[1];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum1[1] = head_t;
+	      tail_sum1[1] = tail_t;
+	    }
+	    x_elem[0] = tail_x_i[jx];
+	    x_elem[1] = tail_x_i[jx + 1];
+	    {
+	      double cd[2];
+	      cd[0] = (double) x_elem[0];
+	      cd[1] = (double) x_elem[1];
+	      {
+		/* Compute complex-extra = complex-double * complex-double. */
+		double head_t1, tail_t1;
+		double head_t2, tail_t2;
+		/* Real part */
+		{
+		  /* Compute double_double = double * double. */
+		  double a1, a2, b1, b2, con;
+
+		  con = a_elem[0] * split;
+		  a1 = con - a_elem[0];
+		  a1 = con - a1;
+		  a2 = a_elem[0] - a1;
+		  con = cd[0] * split;
+		  b1 = con - cd[0];
+		  b1 = con - b1;
+		  b2 = cd[0] - b1;
+
+		  head_t1 = a_elem[0] * cd[0];
+		  tail_t1 =
+		    (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
+		}
+		{
+		  /* Compute double_double = double * double. */
+		  double a1, a2, b1, b2, con;
+
+		  con = a_elem[1] * split;
+		  a1 = con - a_elem[1];
+		  a1 = con - a1;
+		  a2 = a_elem[1] - a1;
+		  con = cd[1] * split;
+		  b1 = con - cd[1];
+		  b1 = con - b1;
+		  b2 = cd[1] - b1;
+
+		  head_t2 = a_elem[1] * cd[1];
+		  tail_t2 =
+		    (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
+		}
+		head_t2 = -head_t2;
+		tail_t2 = -tail_t2;
+		{
+		  /* Compute double-double = double-double + double-double. */
+		  double bv;
+		  double s1, s2, t1, t2;
+
+		  /* Add two hi words. */
+		  s1 = head_t1 + head_t2;
+		  bv = s1 - head_t1;
+		  s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+		  /* Add two lo words. */
+		  t1 = tail_t1 + tail_t2;
+		  bv = t1 - tail_t1;
+		  t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+		  s2 += t1;
+
+		  /* Renormalize (s1, s2)  to  (t1, s2) */
+		  t1 = s1 + s2;
+		  s2 = s2 - (t1 - s1);
+
+		  t2 += s2;
+
+		  /* Renormalize (t1, t2)  */
+		  head_t1 = t1 + t2;
+		  tail_t1 = t2 - (head_t1 - t1);
+		}
+		head_prod[0] = head_t1;
+		tail_prod[0] = tail_t1;
+		/* Imaginary part */
+		{
+		  /* Compute double_double = double * double. */
+		  double a1, a2, b1, b2, con;
+
+		  con = a_elem[1] * split;
+		  a1 = con - a_elem[1];
+		  a1 = con - a1;
+		  a2 = a_elem[1] - a1;
+		  con = cd[0] * split;
+		  b1 = con - cd[0];
+		  b1 = con - b1;
+		  b2 = cd[0] - b1;
+
+		  head_t1 = a_elem[1] * cd[0];
+		  tail_t1 =
+		    (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
+		}
+		{
+		  /* Compute double_double = double * double. */
+		  double a1, a2, b1, b2, con;
+
+		  con = a_elem[0] * split;
+		  a1 = con - a_elem[0];
+		  a1 = con - a1;
+		  a2 = a_elem[0] - a1;
+		  con = cd[1] * split;
+		  b1 = con - cd[1];
+		  b1 = con - b1;
+		  b2 = cd[1] - b1;
+
+		  head_t2 = a_elem[0] * cd[1];
+		  tail_t2 =
+		    (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
+		}
+		{
+		  /* Compute double-double = double-double + double-double. */
+		  double bv;
+		  double s1, s2, t1, t2;
+
+		  /* Add two hi words. */
+		  s1 = head_t1 + head_t2;
+		  bv = s1 - head_t1;
+		  s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+		  /* Add two lo words. */
+		  t1 = tail_t1 + tail_t2;
+		  bv = t1 - tail_t1;
+		  t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+		  s2 += t1;
+
+		  /* Renormalize (s1, s2)  to  (t1, s2) */
+		  t1 = s1 + s2;
+		  s2 = s2 - (t1 - s1);
+
+		  t2 += s2;
+
+		  /* Renormalize (t1, t2)  */
+		  head_t1 = t1 + t2;
+		  tail_t1 = t2 - (head_t1 - t1);
+		}
+		head_prod[1] = head_t1;
+		tail_prod[1] = tail_t1;
+	      }
+	    }
+	    {
+	      double head_t, tail_t;
+	      double head_a, tail_a;
+	      double head_b, tail_b;
+	      /* Real part */
+	      head_a = head_sum2[0];
+	      tail_a = tail_sum2[0];
+	      head_b = head_prod[0];
+	      tail_b = tail_prod[0];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum2[0] = head_t;
+	      tail_sum2[0] = tail_t;
+	      /* Imaginary part */
+	      head_a = head_sum2[1];
+	      tail_a = tail_sum2[1];
+	      head_b = head_prod[1];
+	      tail_b = tail_prod[1];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum2[1] = head_t;
+	      tail_sum2[1] = tail_t;
+	    }
+	    aij += incaij;
+	    jx += incx;
+	  }
+	}
+
+	{
+	  /* Compute complex-extra = complex-extra * complex-double. */
+	  double head_a0, tail_a0;
+	  double head_a1, tail_a1;
+	  double head_t1, tail_t1;
+	  double head_t2, tail_t2;
+	  head_a0 = head_sum1[0];
+	  tail_a0 = tail_sum1[0];
+	  head_a1 = head_sum1[1];
+	  tail_a1 = tail_sum1[1];
+	  /* real part */
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_a0 * split;
+	    a11 = con - head_a0;
+	    a11 = con - a11;
+	    a21 = head_a0 - a11;
+	    con = alpha_i[0] * split;
+	    b1 = con - alpha_i[0];
+	    b1 = con - b1;
+	    b2 = alpha_i[0] - b1;
+
+	    c11 = head_a0 * alpha_i[0];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_a0 * alpha_i[0];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_a1 * split;
+	    a11 = con - head_a1;
+	    a11 = con - a11;
+	    a21 = head_a1 - a11;
+	    con = alpha_i[1] * split;
+	    b1 = con - alpha_i[1];
+	    b1 = con - b1;
+	    b2 = alpha_i[1] - b1;
+
+	    c11 = head_a1 * alpha_i[1];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_a1 * alpha_i[1];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t2 = t1 + t2;
+	    tail_t2 = t2 - (head_t2 - t1);
+	  }
+	  head_t2 = -head_t2;
+	  tail_t2 = -tail_t2;
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_t1 + head_t2;
+	    bv = s1 - head_t1;
+	    s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_t1 + tail_t2;
+	    bv = t1 - tail_t1;
+	    t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  head_tmp1[0] = head_t1;
+	  tail_tmp1[0] = tail_t1;
+	  /* imaginary part */
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_a1 * split;
+	    a11 = con - head_a1;
+	    a11 = con - a11;
+	    a21 = head_a1 - a11;
+	    con = alpha_i[0] * split;
+	    b1 = con - alpha_i[0];
+	    b1 = con - b1;
+	    b2 = alpha_i[0] - b1;
+
+	    c11 = head_a1 * alpha_i[0];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_a1 * alpha_i[0];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_a0 * split;
+	    a11 = con - head_a0;
+	    a11 = con - a11;
+	    a21 = head_a0 - a11;
+	    con = alpha_i[1] * split;
+	    b1 = con - alpha_i[1];
+	    b1 = con - b1;
+	    b2 = alpha_i[1] - b1;
+
+	    c11 = head_a0 * alpha_i[1];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_a0 * alpha_i[1];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t2 = t1 + t2;
+	    tail_t2 = t2 - (head_t2 - t1);
+	  }
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_t1 + head_t2;
+	    bv = s1 - head_t1;
+	    s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_t1 + tail_t2;
+	    bv = t1 - tail_t1;
+	    t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  head_tmp1[1] = head_t1;
+	  tail_tmp1[1] = tail_t1;
+	}
+
+	{
+	  /* Compute complex-extra = complex-extra * complex-double. */
+	  double head_a0, tail_a0;
+	  double head_a1, tail_a1;
+	  double head_t1, tail_t1;
+	  double head_t2, tail_t2;
+	  head_a0 = head_sum2[0];
+	  tail_a0 = tail_sum2[0];
+	  head_a1 = head_sum2[1];
+	  tail_a1 = tail_sum2[1];
+	  /* real part */
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_a0 * split;
+	    a11 = con - head_a0;
+	    a11 = con - a11;
+	    a21 = head_a0 - a11;
+	    con = alpha_i[0] * split;
+	    b1 = con - alpha_i[0];
+	    b1 = con - b1;
+	    b2 = alpha_i[0] - b1;
+
+	    c11 = head_a0 * alpha_i[0];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_a0 * alpha_i[0];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_a1 * split;
+	    a11 = con - head_a1;
+	    a11 = con - a11;
+	    a21 = head_a1 - a11;
+	    con = alpha_i[1] * split;
+	    b1 = con - alpha_i[1];
+	    b1 = con - b1;
+	    b2 = alpha_i[1] - b1;
+
+	    c11 = head_a1 * alpha_i[1];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_a1 * alpha_i[1];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t2 = t1 + t2;
+	    tail_t2 = t2 - (head_t2 - t1);
+	  }
+	  head_t2 = -head_t2;
+	  tail_t2 = -tail_t2;
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_t1 + head_t2;
+	    bv = s1 - head_t1;
+	    s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_t1 + tail_t2;
+	    bv = t1 - tail_t1;
+	    t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  head_tmp2[0] = head_t1;
+	  tail_tmp2[0] = tail_t1;
+	  /* imaginary part */
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_a1 * split;
+	    a11 = con - head_a1;
+	    a11 = con - a11;
+	    a21 = head_a1 - a11;
+	    con = alpha_i[0] * split;
+	    b1 = con - alpha_i[0];
+	    b1 = con - b1;
+	    b2 = alpha_i[0] - b1;
+
+	    c11 = head_a1 * alpha_i[0];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_a1 * alpha_i[0];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_a0 * split;
+	    a11 = con - head_a0;
+	    a11 = con - a11;
+	    a21 = head_a0 - a11;
+	    con = alpha_i[1] * split;
+	    b1 = con - alpha_i[1];
+	    b1 = con - b1;
+	    b2 = alpha_i[1] - b1;
+
+	    c11 = head_a0 * alpha_i[1];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_a0 * alpha_i[1];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t2 = t1 + t2;
+	    tail_t2 = t2 - (head_t2 - t1);
+	  }
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_t1 + head_t2;
+	    bv = s1 - head_t1;
+	    s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_t1 + tail_t2;
+	    bv = t1 - tail_t1;
+	    t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  head_tmp2[1] = head_t1;
+	  tail_tmp2[1] = tail_t1;
+	}
+
+	{
+	  double head_t, tail_t;
+	  double head_a, tail_a;
+	  double head_b, tail_b;
+	  /* Real part */
+	  head_a = head_tmp1[0];
+	  tail_a = tail_tmp1[0];
+	  head_b = head_tmp2[0];
+	  tail_b = tail_tmp2[0];
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_a + head_b;
+	    bv = s1 - head_a;
+	    s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_a + tail_b;
+	    bv = t1 - tail_a;
+	    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t = t1 + t2;
+	    tail_t = t2 - (head_t - t1);
+	  }
+	  head_tmp3[0] = head_t;
+	  tail_tmp3[0] = tail_t;
+	  /* Imaginary part */
+	  head_a = head_tmp1[1];
+	  tail_a = tail_tmp1[1];
+	  head_b = head_tmp2[1];
+	  tail_b = tail_tmp2[1];
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_a + head_b;
+	    bv = s1 - head_a;
+	    s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_a + tail_b;
+	    bv = t1 - tail_a;
+	    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t = t1 + t2;
+	    tail_t = t2 - (head_t - t1);
+	  }
+	  head_tmp3[1] = head_t;
+	  tail_tmp3[1] = tail_t;
+	}
+	y_elem[0] = y_i[iy];
+	y_elem[1] = y_i[iy + 1];
+	{
+	  /* Compute complex-extra = complex-double * complex-double. */
+	  double head_t1, tail_t1;
+	  double head_t2, tail_t2;
+	  /* Real part */
+	  {
+	    /* Compute double_double = double * double. */
+	    double a1, a2, b1, b2, con;
+
+	    con = beta_i[0] * split;
+	    a1 = con - beta_i[0];
+	    a1 = con - a1;
+	    a2 = beta_i[0] - a1;
+	    con = y_elem[0] * split;
+	    b1 = con - y_elem[0];
+	    b1 = con - b1;
+	    b2 = y_elem[0] - b1;
+
+	    head_t1 = beta_i[0] * y_elem[0];
+	    tail_t1 = (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
+	  }
+	  {
+	    /* Compute double_double = double * double. */
+	    double a1, a2, b1, b2, con;
+
+	    con = beta_i[1] * split;
+	    a1 = con - beta_i[1];
+	    a1 = con - a1;
+	    a2 = beta_i[1] - a1;
+	    con = y_elem[1] * split;
+	    b1 = con - y_elem[1];
+	    b1 = con - b1;
+	    b2 = y_elem[1] - b1;
+
+	    head_t2 = beta_i[1] * y_elem[1];
+	    tail_t2 = (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
+	  }
+	  head_t2 = -head_t2;
+	  tail_t2 = -tail_t2;
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_t1 + head_t2;
+	    bv = s1 - head_t1;
+	    s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_t1 + tail_t2;
+	    bv = t1 - tail_t1;
+	    t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  head_tmp4[0] = head_t1;
+	  tail_tmp4[0] = tail_t1;
+	  /* Imaginary part */
+	  {
+	    /* Compute double_double = double * double. */
+	    double a1, a2, b1, b2, con;
+
+	    con = beta_i[1] * split;
+	    a1 = con - beta_i[1];
+	    a1 = con - a1;
+	    a2 = beta_i[1] - a1;
+	    con = y_elem[0] * split;
+	    b1 = con - y_elem[0];
+	    b1 = con - b1;
+	    b2 = y_elem[0] - b1;
+
+	    head_t1 = beta_i[1] * y_elem[0];
+	    tail_t1 = (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
+	  }
+	  {
+	    /* Compute double_double = double * double. */
+	    double a1, a2, b1, b2, con;
+
+	    con = beta_i[0] * split;
+	    a1 = con - beta_i[0];
+	    a1 = con - a1;
+	    a2 = beta_i[0] - a1;
+	    con = y_elem[1] * split;
+	    b1 = con - y_elem[1];
+	    b1 = con - b1;
+	    b2 = y_elem[1] - b1;
+
+	    head_t2 = beta_i[0] * y_elem[1];
+	    tail_t2 = (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
+	  }
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_t1 + head_t2;
+	    bv = s1 - head_t1;
+	    s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_t1 + tail_t2;
+	    bv = t1 - tail_t1;
+	    t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  head_tmp4[1] = head_t1;
+	  tail_tmp4[1] = tail_t1;
+	}
+	{
+	  double head_a, tail_a;
+	  double head_b, tail_b;
+	  /* Real part */
+	  head_a = head_tmp4[0];
+	  tail_a = tail_tmp4[0];
+	  head_b = head_tmp3[0];
+	  tail_b = tail_tmp3[0];
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_a + head_b;
+	    bv = s1 - head_a;
+	    s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_a + tail_b;
+	    bv = t1 - tail_a;
+	    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    result[0] = t1 + t2;
+	  }
+	  /* Imaginary part */
+	  head_a = head_tmp4[1];
+	  tail_a = tail_tmp4[1];
+	  head_b = head_tmp3[1];
+	  tail_b = tail_tmp3[1];
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_a + head_b;
+	    bv = s1 - head_a;
+	    s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_a + tail_b;
+	    bv = t1 - tail_a;
+	    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    result[1] = t1 + t2;
+	  }
+	}
+	y_i[iy] = result[0];
+	y_i[iy + 1] = result[1];
+
+	iy += incy;
+	if (i >= lbound) {
+	  ix0 += incx;
+	  ai += incai2;
+	  la++;
+	} else {
+	  ai += incai1;
+	}
+	if (i < rbound) {
+	  ra++;
+	}
+      }
+
+      FPU_FIX_STOP;
+    }
+    break;
+  }
+}				/* end BLAS_zgbmv2_z_c_x */
diff --git a/XBLAS/src/gbmv2/BLAS_zgbmv2_z_d-f2c.c b/XBLAS/src/gbmv2/BLAS_zgbmv2_z_d-f2c.c
new file mode 100644
index 00000000..9d8451e7
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_zgbmv2_z_d-f2c.c
@@ -0,0 +1,18 @@
+
+#include "f2c-bridge.h"
+#include "blas_enum.h"
+void BLAS_zgbmv2_z_d(enum blas_order_type order, enum blas_trans_type trans,
+		     int m, int n, int kl, int ku, const void *alpha,
+		     const void *a, int lda, const double *head_x,
+		     const double *tail_x, int incx, const void *beta,
+		     void *y, int incy);
+
+
+extern void FC_FUNC_(blas_zgbmv2_z_d, BLAS_ZGBMV2_Z_D)
+ 
+  (int *trans, int *m, int *n, int *kl, int *ku, const void *alpha,
+   const void *a, int *lda, const double *head_x, const double *tail_x,
+   int *incx, const void *beta, void *y, int *incy) {
+  BLAS_zgbmv2_z_d(blas_colmajor, (enum blas_trans_type) *trans, *m, *n, *kl,
+		  *ku, alpha, a, *lda, head_x, tail_x, *incx, beta, y, *incy);
+}
diff --git a/XBLAS/src/gbmv2/BLAS_zgbmv2_z_d.c b/XBLAS/src/gbmv2/BLAS_zgbmv2_z_d.c
new file mode 100644
index 00000000..3a9e537f
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_zgbmv2_z_d.c
@@ -0,0 +1,302 @@
+#include "blas_extended.h"
+#include "blas_extended_private.h"
+void BLAS_zgbmv2_z_d(enum blas_order_type order, enum blas_trans_type trans,
+		     int m, int n, int kl, int ku, const void *alpha,
+		     const void *a, int lda, const double *head_x,
+		     const double *tail_x, int incx, const void *beta,
+		     void *y, int incy)
+
+/*           
+ * Purpose
+ * =======
+ *
+ * This routines computes the matrix product:
+ *
+ *     y  <-  alpha * op(A) * (x_head + x_tail) + beta * y
+ * 
+ * where 
+ *
+ *  A is a m x n banded matrix
+ *  x is a n x 1 vector
+ *  y is a m x 1 vector
+ *  alpha and beta are scalars 
+ *   
+ * Arguments
+ * =========
+ *
+ * order        (input) blas_order_type
+ *              Order of AB; row or column major
+ *
+ * trans        (input) blas_trans_type
+ *              Transpose of AB; no trans, 
+ *              trans, or conjugate trans
+ *
+ * m            (input) int
+ *              Dimension of AB
+ *
+ * n            (input) int
+ *              Dimension of AB and the length of vector x and z
+ *
+ * kl           (input) int 
+ *              Number of lower diagnols of AB
+ *
+ * ku           (input) int
+ *              Number of upper diagnols of AB
+ *
+ * alpha        (input) const void*
+ *              
+ * AB           (input) void*
+ *
+ * lda          (input) int 
+ *              Leading dimension of AB
+ *              lda >= ku + kl + 1
+ *
+ * head_x
+ * tail_x       (input) double*
+ * 
+ * incx         (input) int
+ *              The stride for vector x.
+ *
+ * beta         (input) const void*
+ *
+ * y            (input) const void*
+ *
+ * incy         (input) int
+ *              The stride for vector y.
+ * 
+ *
+ * LOCAL VARIABLES 
+ * ===============
+ * 
+ *  As an example, these variables are described on the mxn, column 
+ *  major, banded matrix described in section 2.2.3 of the specification  
+ *
+ *  astart      indexes first element in A where computation begins
+ *
+ *  incai1      indexes first element in row where row is less than lbound
+ * 
+ *  incai2      indexes first element in row where row exceeds lbound
+ *   
+ *  lbound      denotes the number of rows before  first element shifts 
+ *
+ *  rbound      denotes the columns where there is blank space
+ *   
+ *  ra          index of the rightmost element for a given row
+ *  
+ *  la          index of leftmost  elements for a given row
+ *
+ *  ra - la     width of a row
+ *
+ *                        rbound 
+ *            la   ra    ____|_____ 
+ *             |    |   |          |
+ *         |  a00  a01   *    *   *
+ * lbound -|  a10  a11  a12   *   *
+ *         |  a20  a21  a22  a23  *
+ *             *   a31  a32  a33 a34
+ *             *    *   a42  a43 a44
+ *
+ *  Varations on order and transpose have been implemented by modifying these
+ *  local variables. 
+ *
+ */
+{
+  static const char routine_name[] = "BLAS_zgbmv2_z_d";
+
+  int iy0, iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny;
+  int incaij, aij, incai1, incai2, astart, ai;
+  double *y_i = (double *) y;
+  const double *a_i = (double *) a;
+  const double *head_x_i = head_x;
+  const double *tail_x_i = tail_x;
+  double *alpha_i = (double *) alpha;
+  double *beta_i = (double *) beta;
+  double tmp1[2];
+  double tmp2[2];
+  double tmp3[2];
+  double tmp4[2];
+  double result[2];
+  double sum1[2];
+  double sum2[2];
+  double prod[2];
+  double a_elem[2];
+  double x_elem;
+  double y_elem[2];
+
+
+  if (order != blas_colmajor && order != blas_rowmajor)
+    BLAS_error(routine_name, -1, order, NULL);
+  if (trans != blas_no_trans &&
+      trans != blas_trans && trans != blas_conj_trans) {
+    BLAS_error(routine_name, -2, trans, NULL);
+  }
+  if (m < 0)
+    BLAS_error(routine_name, -3, m, NULL);
+  if (n < 0)
+    BLAS_error(routine_name, -4, n, NULL);
+  if (kl < 0 || kl >= m)
+    BLAS_error(routine_name, -5, kl, NULL);
+  if (ku < 0 || ku >= n)
+    BLAS_error(routine_name, -6, ku, NULL);
+  if (lda < kl + ku + 1)
+    BLAS_error(routine_name, -9, lda, NULL);
+  if (incx == 0)
+    BLAS_error(routine_name, -12, incx, NULL);
+  if (incy == 0)
+    BLAS_error(routine_name, -15, incy, NULL);
+
+  if (m == 0 || n == 0)
+    return;
+  if ((alpha_i[0] == 0.0 && alpha_i[1] == 0.0)
+      && ((beta_i[0] == 1.0 && beta_i[1] == 0.0)))
+    return;
+
+  if (trans == blas_no_trans) {
+    lenx = n;
+    leny = m;
+  } else {
+    lenx = m;
+    leny = n;
+  }
+
+  ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
+  iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;
+
+
+
+  /* if alpha = 0, return y = y*beta */
+  if ((order == blas_colmajor) && (trans == blas_no_trans)) {
+    astart = ku;
+    incai1 = 1;
+    incai2 = lda;
+    incaij = lda - 1;
+    lbound = kl;
+    rbound = n - ku - 1;
+    ra = ku;
+  } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
+    astart = ku;
+    incai1 = lda - 1;
+    incai2 = lda;
+    incaij = 1;
+    lbound = ku;
+    rbound = m - kl - 1;
+    ra = kl;
+  } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
+    astart = kl;
+    incai1 = lda - 1;
+    incai2 = lda;
+    incaij = 1;
+    lbound = kl;
+    rbound = n - ku - 1;
+    ra = ku;
+  } else {			/* rowmajor and blas_trans */
+    astart = kl;
+    incai1 = 1;
+    incai2 = lda;
+    incaij = lda - 1;
+    lbound = ku;
+    rbound = m - kl - 1;
+    ra = kl;
+  }
+
+  incy *= 2;
+  incaij *= 2;
+  incai1 *= 2;
+  incai2 *= 2;
+  astart *= 2;
+  iy0 *= 2;
+
+
+  la = 0;
+  ai = astart;
+  iy = iy0;
+  for (i = 0; i < leny; i++) {
+    sum1[0] = sum1[1] = 0.0;
+    sum2[0] = sum2[1] = 0.0;
+    aij = ai;
+    jx = ix0;
+    if (trans != blas_conj_trans) {
+      for (j = ra - la; j >= 0; j--) {
+	x_elem = head_x_i[jx];
+	a_elem[0] = a_i[aij];
+	a_elem[1] = a_i[aij + 1];
+	{
+	  prod[0] = a_elem[0] * x_elem;
+	  prod[1] = a_elem[1] * x_elem;
+	}
+	sum1[0] = sum1[0] + prod[0];
+	sum1[1] = sum1[1] + prod[1];
+	x_elem = tail_x_i[jx];
+	{
+	  prod[0] = a_elem[0] * x_elem;
+	  prod[1] = a_elem[1] * x_elem;
+	}
+	sum2[0] = sum2[0] + prod[0];
+	sum2[1] = sum2[1] + prod[1];
+	aij += incaij;
+	jx += incx;
+      }
+
+    } else {
+      for (j = ra - la; j >= 0; j--) {
+	x_elem = head_x_i[jx];
+	a_elem[0] = a_i[aij];
+	a_elem[1] = a_i[aij + 1];
+	a_elem[1] = -a_elem[1];
+	{
+	  prod[0] = a_elem[0] * x_elem;
+	  prod[1] = a_elem[1] * x_elem;
+	}
+	sum1[0] = sum1[0] + prod[0];
+	sum1[1] = sum1[1] + prod[1];
+	x_elem = tail_x_i[jx];
+	{
+	  prod[0] = a_elem[0] * x_elem;
+	  prod[1] = a_elem[1] * x_elem;
+	}
+	sum2[0] = sum2[0] + prod[0];
+	sum2[1] = sum2[1] + prod[1];
+	aij += incaij;
+	jx += incx;
+      }
+    }
+
+    {
+      tmp1[0] = (double) sum1[0] * alpha_i[0] - (double) sum1[1] * alpha_i[1];
+      tmp1[1] = (double) sum1[0] * alpha_i[1] + (double) sum1[1] * alpha_i[0];
+    }
+    {
+      tmp2[0] = (double) sum2[0] * alpha_i[0] - (double) sum2[1] * alpha_i[1];
+      tmp2[1] = (double) sum2[0] * alpha_i[1] + (double) sum2[1] * alpha_i[0];
+    }
+    tmp3[0] = tmp1[0] + tmp2[0];
+    tmp3[1] = tmp1[1] + tmp2[1];
+    y_elem[0] = y_i[iy];
+    y_elem[1] = y_i[iy + 1];
+    {
+      tmp4[0] =
+	(double) beta_i[0] * y_elem[0] - (double) beta_i[1] * y_elem[1];
+      tmp4[1] =
+	(double) beta_i[0] * y_elem[1] + (double) beta_i[1] * y_elem[0];
+    }
+    result[0] = tmp4[0] + tmp3[0];
+    result[1] = tmp4[1] + tmp3[1];
+    y_i[iy] = result[0];
+    y_i[iy + 1] = result[1];
+
+    iy += incy;
+    if (i >= lbound) {
+      ix0 += incx;
+      ai += incai2;
+      la++;
+    } else {
+      ai += incai1;
+    }
+    if (i < rbound) {
+      ra++;
+    }
+  }
+
+
+
+}				/* end BLAS_zgbmv2_z_d */
diff --git a/XBLAS/src/gbmv2/BLAS_zgbmv2_z_d_x-f2c.c b/XBLAS/src/gbmv2/BLAS_zgbmv2_z_d_x-f2c.c
new file mode 100644
index 00000000..fad23ee6
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_zgbmv2_z_d_x-f2c.c
@@ -0,0 +1,19 @@
+
+#include "f2c-bridge.h"
+#include "blas_enum.h"
+void BLAS_zgbmv2_z_d_x(enum blas_order_type order, enum blas_trans_type trans,
+		       int m, int n, int kl, int ku, const void *alpha,
+		       const void *a, int lda, const double *head_x,
+		       const double *tail_x, int incx, const void *beta,
+		       void *y, int incy, enum blas_prec_type prec);
+
+
+extern void FC_FUNC_(blas_zgbmv2_z_d_x, BLAS_ZGBMV2_Z_D_X)
+ 
+  (int *trans, int *m, int *n, int *kl, int *ku, const void *alpha,
+   const void *a, int *lda, const double *head_x, const double *tail_x,
+   int *incx, const void *beta, void *y, int *incy, int *prec) {
+  BLAS_zgbmv2_z_d_x(blas_colmajor, (enum blas_trans_type) *trans, *m, *n, *kl,
+		    *ku, alpha, a, *lda, head_x, tail_x, *incx, beta, y,
+		    *incy, (enum blas_prec_type) *prec);
+}
diff --git a/XBLAS/src/gbmv2/BLAS_zgbmv2_z_d_x.c b/XBLAS/src/gbmv2/BLAS_zgbmv2_z_d_x.c
new file mode 100644
index 00000000..df3e530f
--- /dev/null
+++ b/XBLAS/src/gbmv2/BLAS_zgbmv2_z_d_x.c
@@ -0,0 +1,1532 @@
+#include "blas_extended.h"
+#include "blas_extended_private.h"
+void BLAS_zgbmv2_z_d_x(enum blas_order_type order, enum blas_trans_type trans,
+		       int m, int n, int kl, int ku, const void *alpha,
+		       const void *a, int lda, const double *head_x,
+		       const double *tail_x, int incx, const void *beta,
+		       void *y, int incy, enum blas_prec_type prec)
+
+/*           
+ * Purpose
+ * =======
+ *
+ * This routines computes the matrix product:
+ *
+ *     y  <-  alpha * op(A) * (x_head + x_tail) + beta * y
+ * 
+ * where 
+ *
+ *  A is a m x n banded matrix
+ *  x is a n x 1 vector
+ *  y is a m x 1 vector
+ *  alpha and beta are scalars 
+ *   
+ * Arguments
+ * =========
+ *
+ * order        (input) blas_order_type
+ *              Order of AB; row or column major
+ *
+ * trans        (input) blas_trans_type
+ *              Transpose of AB; no trans, 
+ *              trans, or conjugate trans
+ *
+ * m            (input) int
+ *              Dimension of AB
+ *
+ * n            (input) int
+ *              Dimension of AB and the length of vector x and z
+ *
+ * kl           (input) int 
+ *              Number of lower diagnols of AB
+ *
+ * ku           (input) int
+ *              Number of upper diagnols of AB
+ *
+ * alpha        (input) const void*
+ *              
+ * AB           (input) void*
+ *
+ * lda          (input) int 
+ *              Leading dimension of AB
+ *              lda >= ku + kl + 1
+ *
+ * head_x
+ * tail_x       (input) double*
+ * 
+ * incx         (input) int
+ *              The stride for vector x.
+ *
+ * beta         (input) const void*
+ *
+ * y            (input) const void*
+ *
+ * incy         (input) int
+ *              The stride for vector y.
+ * 
+ * prec   (input) enum blas_prec_type
+ *        Specifies the internal precision to be used.
+ *        = blas_prec_single: single precision.
+ *        = blas_prec_double: double precision.
+ *        = blas_prec_extra : anything at least 1.5 times as accurate
+ *                            than double, and wider than 80-bits.
+ *                            We use double-double in our implementation.
+ *
+ *
+ * LOCAL VARIABLES 
+ * ===============
+ * 
+ *  As an example, these variables are described on the mxn, column 
+ *  major, banded matrix described in section 2.2.3 of the specification  
+ *
+ *  astart      indexes first element in A where computation begins
+ *
+ *  incai1      indexes first element in row where row is less than lbound
+ * 
+ *  incai2      indexes first element in row where row exceeds lbound
+ *   
+ *  lbound      denotes the number of rows before  first element shifts 
+ *
+ *  rbound      denotes the columns where there is blank space
+ *   
+ *  ra          index of the rightmost element for a given row
+ *  
+ *  la          index of leftmost  elements for a given row
+ *
+ *  ra - la     width of a row
+ *
+ *                        rbound 
+ *            la   ra    ____|_____ 
+ *             |    |   |          |
+ *         |  a00  a01   *    *   *
+ * lbound -|  a10  a11  a12   *   *
+ *         |  a20  a21  a22  a23  *
+ *             *   a31  a32  a33 a34
+ *             *    *   a42  a43 a44
+ *
+ *  Varations on order and transpose have been implemented by modifying these
+ *  local variables. 
+ *
+ */
+{
+  static const char routine_name[] = "BLAS_zgbmv2_z_d_x";
+
+  switch (prec) {
+  case blas_prec_single:
+  case blas_prec_double:
+  case blas_prec_indigenous:
+    {
+      int iy0, iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny;
+      int incaij, aij, incai1, incai2, astart, ai;
+      double *y_i = (double *) y;
+      const double *a_i = (double *) a;
+      const double *head_x_i = head_x;
+      const double *tail_x_i = tail_x;
+      double *alpha_i = (double *) alpha;
+      double *beta_i = (double *) beta;
+      double tmp1[2];
+      double tmp2[2];
+      double tmp3[2];
+      double tmp4[2];
+      double result[2];
+      double sum1[2];
+      double sum2[2];
+      double prod[2];
+      double a_elem[2];
+      double x_elem;
+      double y_elem[2];
+
+
+      if (order != blas_colmajor && order != blas_rowmajor)
+	BLAS_error(routine_name, -1, order, NULL);
+      if (trans != blas_no_trans &&
+	  trans != blas_trans && trans != blas_conj_trans) {
+	BLAS_error(routine_name, -2, trans, NULL);
+      }
+      if (m < 0)
+	BLAS_error(routine_name, -3, m, NULL);
+      if (n < 0)
+	BLAS_error(routine_name, -4, n, NULL);
+      if (kl < 0 || kl >= m)
+	BLAS_error(routine_name, -5, kl, NULL);
+      if (ku < 0 || ku >= n)
+	BLAS_error(routine_name, -6, ku, NULL);
+      if (lda < kl + ku + 1)
+	BLAS_error(routine_name, -9, lda, NULL);
+      if (incx == 0)
+	BLAS_error(routine_name, -12, incx, NULL);
+      if (incy == 0)
+	BLAS_error(routine_name, -15, incy, NULL);
+
+      if (m == 0 || n == 0)
+	return;
+      if ((alpha_i[0] == 0.0 && alpha_i[1] == 0.0)
+	  && ((beta_i[0] == 1.0 && beta_i[1] == 0.0)))
+	return;
+
+      if (trans == blas_no_trans) {
+	lenx = n;
+	leny = m;
+      } else {
+	lenx = m;
+	leny = n;
+      }
+
+      ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
+      iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;
+
+
+
+      /* if alpha = 0, return y = y*beta */
+      if ((order == blas_colmajor) && (trans == blas_no_trans)) {
+	astart = ku;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
+	astart = ku;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
+	astart = kl;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else {			/* rowmajor and blas_trans */
+	astart = kl;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      }
+
+      incy *= 2;
+      incaij *= 2;
+      incai1 *= 2;
+      incai2 *= 2;
+      astart *= 2;
+      iy0 *= 2;
+
+
+      la = 0;
+      ai = astart;
+      iy = iy0;
+      for (i = 0; i < leny; i++) {
+	sum1[0] = sum1[1] = 0.0;
+	sum2[0] = sum2[1] = 0.0;
+	aij = ai;
+	jx = ix0;
+	if (trans != blas_conj_trans) {
+	  for (j = ra - la; j >= 0; j--) {
+	    x_elem = head_x_i[jx];
+	    a_elem[0] = a_i[aij];
+	    a_elem[1] = a_i[aij + 1];
+	    {
+	      prod[0] = a_elem[0] * x_elem;
+	      prod[1] = a_elem[1] * x_elem;
+	    }
+	    sum1[0] = sum1[0] + prod[0];
+	    sum1[1] = sum1[1] + prod[1];
+	    x_elem = tail_x_i[jx];
+	    {
+	      prod[0] = a_elem[0] * x_elem;
+	      prod[1] = a_elem[1] * x_elem;
+	    }
+	    sum2[0] = sum2[0] + prod[0];
+	    sum2[1] = sum2[1] + prod[1];
+	    aij += incaij;
+	    jx += incx;
+	  }
+
+	} else {
+	  for (j = ra - la; j >= 0; j--) {
+	    x_elem = head_x_i[jx];
+	    a_elem[0] = a_i[aij];
+	    a_elem[1] = a_i[aij + 1];
+	    a_elem[1] = -a_elem[1];
+	    {
+	      prod[0] = a_elem[0] * x_elem;
+	      prod[1] = a_elem[1] * x_elem;
+	    }
+	    sum1[0] = sum1[0] + prod[0];
+	    sum1[1] = sum1[1] + prod[1];
+	    x_elem = tail_x_i[jx];
+	    {
+	      prod[0] = a_elem[0] * x_elem;
+	      prod[1] = a_elem[1] * x_elem;
+	    }
+	    sum2[0] = sum2[0] + prod[0];
+	    sum2[1] = sum2[1] + prod[1];
+	    aij += incaij;
+	    jx += incx;
+	  }
+	}
+
+	{
+	  tmp1[0] =
+	    (double) sum1[0] * alpha_i[0] - (double) sum1[1] * alpha_i[1];
+	  tmp1[1] =
+	    (double) sum1[0] * alpha_i[1] + (double) sum1[1] * alpha_i[0];
+	}
+	{
+	  tmp2[0] =
+	    (double) sum2[0] * alpha_i[0] - (double) sum2[1] * alpha_i[1];
+	  tmp2[1] =
+	    (double) sum2[0] * alpha_i[1] + (double) sum2[1] * alpha_i[0];
+	}
+	tmp3[0] = tmp1[0] + tmp2[0];
+	tmp3[1] = tmp1[1] + tmp2[1];
+	y_elem[0] = y_i[iy];
+	y_elem[1] = y_i[iy + 1];
+	{
+	  tmp4[0] =
+	    (double) beta_i[0] * y_elem[0] - (double) beta_i[1] * y_elem[1];
+	  tmp4[1] =
+	    (double) beta_i[0] * y_elem[1] + (double) beta_i[1] * y_elem[0];
+	}
+	result[0] = tmp4[0] + tmp3[0];
+	result[1] = tmp4[1] + tmp3[1];
+	y_i[iy] = result[0];
+	y_i[iy + 1] = result[1];
+
+	iy += incy;
+	if (i >= lbound) {
+	  ix0 += incx;
+	  ai += incai2;
+	  la++;
+	} else {
+	  ai += incai1;
+	}
+	if (i < rbound) {
+	  ra++;
+	}
+      }
+
+
+    }
+    break;
+  case blas_prec_extra:
+    {
+      int iy0, iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny;
+      int incaij, aij, incai1, incai2, astart, ai;
+      double *y_i = (double *) y;
+      const double *a_i = (double *) a;
+      const double *head_x_i = head_x;
+      const double *tail_x_i = tail_x;
+      double *alpha_i = (double *) alpha;
+      double *beta_i = (double *) beta;
+      double head_tmp1[2], tail_tmp1[2];
+      double head_tmp2[2], tail_tmp2[2];
+      double head_tmp3[2], tail_tmp3[2];
+      double head_tmp4[2], tail_tmp4[2];
+      double result[2];
+      double head_sum1[2], tail_sum1[2];
+      double head_sum2[2], tail_sum2[2];
+      double head_prod[2], tail_prod[2];
+      double a_elem[2];
+      double x_elem;
+      double y_elem[2];
+      FPU_FIX_DECL;
+
+      if (order != blas_colmajor && order != blas_rowmajor)
+	BLAS_error(routine_name, -1, order, NULL);
+      if (trans != blas_no_trans &&
+	  trans != blas_trans && trans != blas_conj_trans) {
+	BLAS_error(routine_name, -2, trans, NULL);
+      }
+      if (m < 0)
+	BLAS_error(routine_name, -3, m, NULL);
+      if (n < 0)
+	BLAS_error(routine_name, -4, n, NULL);
+      if (kl < 0 || kl >= m)
+	BLAS_error(routine_name, -5, kl, NULL);
+      if (ku < 0 || ku >= n)
+	BLAS_error(routine_name, -6, ku, NULL);
+      if (lda < kl + ku + 1)
+	BLAS_error(routine_name, -9, lda, NULL);
+      if (incx == 0)
+	BLAS_error(routine_name, -12, incx, NULL);
+      if (incy == 0)
+	BLAS_error(routine_name, -15, incy, NULL);
+
+      if (m == 0 || n == 0)
+	return;
+      if ((alpha_i[0] == 0.0 && alpha_i[1] == 0.0)
+	  && ((beta_i[0] == 1.0 && beta_i[1] == 0.0)))
+	return;
+
+      if (trans == blas_no_trans) {
+	lenx = n;
+	leny = m;
+      } else {
+	lenx = m;
+	leny = n;
+      }
+
+      ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
+      iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;
+
+      FPU_FIX_START;
+
+      /* if alpha = 0, return y = y*beta */
+      if ((order == blas_colmajor) && (trans == blas_no_trans)) {
+	astart = ku;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
+	astart = ku;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
+	astart = kl;
+	incai1 = lda - 1;
+	incai2 = lda;
+	incaij = 1;
+	lbound = kl;
+	rbound = n - ku - 1;
+	ra = ku;
+      } else {			/* rowmajor and blas_trans */
+	astart = kl;
+	incai1 = 1;
+	incai2 = lda;
+	incaij = lda - 1;
+	lbound = ku;
+	rbound = m - kl - 1;
+	ra = kl;
+      }
+
+      incy *= 2;
+      incaij *= 2;
+      incai1 *= 2;
+      incai2 *= 2;
+      astart *= 2;
+      iy0 *= 2;
+
+
+      la = 0;
+      ai = astart;
+      iy = iy0;
+      for (i = 0; i < leny; i++) {
+	head_sum1[0] = head_sum1[1] = tail_sum1[0] = tail_sum1[1] = 0.0;
+	head_sum2[0] = head_sum2[1] = tail_sum2[0] = tail_sum2[1] = 0.0;
+	aij = ai;
+	jx = ix0;
+	if (trans != blas_conj_trans) {
+	  for (j = ra - la; j >= 0; j--) {
+	    x_elem = head_x_i[jx];
+	    a_elem[0] = a_i[aij];
+	    a_elem[1] = a_i[aij + 1];
+	    {
+	      /* Compute complex-extra = complex-double * real. */
+	      double head_t, tail_t;
+	      {
+		/* Compute double_double = double * double. */
+		double a1, a2, b1, b2, con;
+
+		con = x_elem * split;
+		a1 = con - x_elem;
+		a1 = con - a1;
+		a2 = x_elem - a1;
+		con = a_elem[0] * split;
+		b1 = con - a_elem[0];
+		b1 = con - b1;
+		b2 = a_elem[0] - b1;
+
+		head_t = x_elem * a_elem[0];
+		tail_t = (((a1 * b1 - head_t) + a1 * b2) + a2 * b1) + a2 * b2;
+	      }
+	      head_prod[0] = head_t;
+	      tail_prod[0] = tail_t;
+	      {
+		/* Compute double_double = double * double. */
+		double a1, a2, b1, b2, con;
+
+		con = x_elem * split;
+		a1 = con - x_elem;
+		a1 = con - a1;
+		a2 = x_elem - a1;
+		con = a_elem[1] * split;
+		b1 = con - a_elem[1];
+		b1 = con - b1;
+		b2 = a_elem[1] - b1;
+
+		head_t = x_elem * a_elem[1];
+		tail_t = (((a1 * b1 - head_t) + a1 * b2) + a2 * b1) + a2 * b2;
+	      }
+	      head_prod[1] = head_t;
+	      tail_prod[1] = tail_t;
+	    }
+	    {
+	      double head_t, tail_t;
+	      double head_a, tail_a;
+	      double head_b, tail_b;
+	      /* Real part */
+	      head_a = head_sum1[0];
+	      tail_a = tail_sum1[0];
+	      head_b = head_prod[0];
+	      tail_b = tail_prod[0];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum1[0] = head_t;
+	      tail_sum1[0] = tail_t;
+	      /* Imaginary part */
+	      head_a = head_sum1[1];
+	      tail_a = tail_sum1[1];
+	      head_b = head_prod[1];
+	      tail_b = tail_prod[1];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum1[1] = head_t;
+	      tail_sum1[1] = tail_t;
+	    }
+	    x_elem = tail_x_i[jx];
+	    {
+	      /* Compute complex-extra = complex-double * real. */
+	      double head_t, tail_t;
+	      {
+		/* Compute double_double = double * double. */
+		double a1, a2, b1, b2, con;
+
+		con = x_elem * split;
+		a1 = con - x_elem;
+		a1 = con - a1;
+		a2 = x_elem - a1;
+		con = a_elem[0] * split;
+		b1 = con - a_elem[0];
+		b1 = con - b1;
+		b2 = a_elem[0] - b1;
+
+		head_t = x_elem * a_elem[0];
+		tail_t = (((a1 * b1 - head_t) + a1 * b2) + a2 * b1) + a2 * b2;
+	      }
+	      head_prod[0] = head_t;
+	      tail_prod[0] = tail_t;
+	      {
+		/* Compute double_double = double * double. */
+		double a1, a2, b1, b2, con;
+
+		con = x_elem * split;
+		a1 = con - x_elem;
+		a1 = con - a1;
+		a2 = x_elem - a1;
+		con = a_elem[1] * split;
+		b1 = con - a_elem[1];
+		b1 = con - b1;
+		b2 = a_elem[1] - b1;
+
+		head_t = x_elem * a_elem[1];
+		tail_t = (((a1 * b1 - head_t) + a1 * b2) + a2 * b1) + a2 * b2;
+	      }
+	      head_prod[1] = head_t;
+	      tail_prod[1] = tail_t;
+	    }
+	    {
+	      double head_t, tail_t;
+	      double head_a, tail_a;
+	      double head_b, tail_b;
+	      /* Real part */
+	      head_a = head_sum2[0];
+	      tail_a = tail_sum2[0];
+	      head_b = head_prod[0];
+	      tail_b = tail_prod[0];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum2[0] = head_t;
+	      tail_sum2[0] = tail_t;
+	      /* Imaginary part */
+	      head_a = head_sum2[1];
+	      tail_a = tail_sum2[1];
+	      head_b = head_prod[1];
+	      tail_b = tail_prod[1];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum2[1] = head_t;
+	      tail_sum2[1] = tail_t;
+	    }
+	    aij += incaij;
+	    jx += incx;
+	  }
+
+	} else {
+	  for (j = ra - la; j >= 0; j--) {
+	    x_elem = head_x_i[jx];
+	    a_elem[0] = a_i[aij];
+	    a_elem[1] = a_i[aij + 1];
+	    a_elem[1] = -a_elem[1];
+	    {
+	      /* Compute complex-extra = complex-double * real. */
+	      double head_t, tail_t;
+	      {
+		/* Compute double_double = double * double. */
+		double a1, a2, b1, b2, con;
+
+		con = x_elem * split;
+		a1 = con - x_elem;
+		a1 = con - a1;
+		a2 = x_elem - a1;
+		con = a_elem[0] * split;
+		b1 = con - a_elem[0];
+		b1 = con - b1;
+		b2 = a_elem[0] - b1;
+
+		head_t = x_elem * a_elem[0];
+		tail_t = (((a1 * b1 - head_t) + a1 * b2) + a2 * b1) + a2 * b2;
+	      }
+	      head_prod[0] = head_t;
+	      tail_prod[0] = tail_t;
+	      {
+		/* Compute double_double = double * double. */
+		double a1, a2, b1, b2, con;
+
+		con = x_elem * split;
+		a1 = con - x_elem;
+		a1 = con - a1;
+		a2 = x_elem - a1;
+		con = a_elem[1] * split;
+		b1 = con - a_elem[1];
+		b1 = con - b1;
+		b2 = a_elem[1] - b1;
+
+		head_t = x_elem * a_elem[1];
+		tail_t = (((a1 * b1 - head_t) + a1 * b2) + a2 * b1) + a2 * b2;
+	      }
+	      head_prod[1] = head_t;
+	      tail_prod[1] = tail_t;
+	    }
+	    {
+	      double head_t, tail_t;
+	      double head_a, tail_a;
+	      double head_b, tail_b;
+	      /* Real part */
+	      head_a = head_sum1[0];
+	      tail_a = tail_sum1[0];
+	      head_b = head_prod[0];
+	      tail_b = tail_prod[0];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum1[0] = head_t;
+	      tail_sum1[0] = tail_t;
+	      /* Imaginary part */
+	      head_a = head_sum1[1];
+	      tail_a = tail_sum1[1];
+	      head_b = head_prod[1];
+	      tail_b = tail_prod[1];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum1[1] = head_t;
+	      tail_sum1[1] = tail_t;
+	    }
+	    x_elem = tail_x_i[jx];
+	    {
+	      /* Compute complex-extra = complex-double * real. */
+	      double head_t, tail_t;
+	      {
+		/* Compute double_double = double * double. */
+		double a1, a2, b1, b2, con;
+
+		con = x_elem * split;
+		a1 = con - x_elem;
+		a1 = con - a1;
+		a2 = x_elem - a1;
+		con = a_elem[0] * split;
+		b1 = con - a_elem[0];
+		b1 = con - b1;
+		b2 = a_elem[0] - b1;
+
+		head_t = x_elem * a_elem[0];
+		tail_t = (((a1 * b1 - head_t) + a1 * b2) + a2 * b1) + a2 * b2;
+	      }
+	      head_prod[0] = head_t;
+	      tail_prod[0] = tail_t;
+	      {
+		/* Compute double_double = double * double. */
+		double a1, a2, b1, b2, con;
+
+		con = x_elem * split;
+		a1 = con - x_elem;
+		a1 = con - a1;
+		a2 = x_elem - a1;
+		con = a_elem[1] * split;
+		b1 = con - a_elem[1];
+		b1 = con - b1;
+		b2 = a_elem[1] - b1;
+
+		head_t = x_elem * a_elem[1];
+		tail_t = (((a1 * b1 - head_t) + a1 * b2) + a2 * b1) + a2 * b2;
+	      }
+	      head_prod[1] = head_t;
+	      tail_prod[1] = tail_t;
+	    }
+	    {
+	      double head_t, tail_t;
+	      double head_a, tail_a;
+	      double head_b, tail_b;
+	      /* Real part */
+	      head_a = head_sum2[0];
+	      tail_a = tail_sum2[0];
+	      head_b = head_prod[0];
+	      tail_b = tail_prod[0];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum2[0] = head_t;
+	      tail_sum2[0] = tail_t;
+	      /* Imaginary part */
+	      head_a = head_sum2[1];
+	      tail_a = tail_sum2[1];
+	      head_b = head_prod[1];
+	      tail_b = tail_prod[1];
+	      {
+		/* Compute double-double = double-double + double-double. */
+		double bv;
+		double s1, s2, t1, t2;
+
+		/* Add two hi words. */
+		s1 = head_a + head_b;
+		bv = s1 - head_a;
+		s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+		/* Add two lo words. */
+		t1 = tail_a + tail_b;
+		bv = t1 - tail_a;
+		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+		s2 += t1;
+
+		/* Renormalize (s1, s2)  to  (t1, s2) */
+		t1 = s1 + s2;
+		s2 = s2 - (t1 - s1);
+
+		t2 += s2;
+
+		/* Renormalize (t1, t2)  */
+		head_t = t1 + t2;
+		tail_t = t2 - (head_t - t1);
+	      }
+	      head_sum2[1] = head_t;
+	      tail_sum2[1] = tail_t;
+	    }
+	    aij += incaij;
+	    jx += incx;
+	  }
+	}
+
+	{
+	  /* Compute complex-extra = complex-extra * complex-double. */
+	  double head_a0, tail_a0;
+	  double head_a1, tail_a1;
+	  double head_t1, tail_t1;
+	  double head_t2, tail_t2;
+	  head_a0 = head_sum1[0];
+	  tail_a0 = tail_sum1[0];
+	  head_a1 = head_sum1[1];
+	  tail_a1 = tail_sum1[1];
+	  /* real part */
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_a0 * split;
+	    a11 = con - head_a0;
+	    a11 = con - a11;
+	    a21 = head_a0 - a11;
+	    con = alpha_i[0] * split;
+	    b1 = con - alpha_i[0];
+	    b1 = con - b1;
+	    b2 = alpha_i[0] - b1;
+
+	    c11 = head_a0 * alpha_i[0];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_a0 * alpha_i[0];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_a1 * split;
+	    a11 = con - head_a1;
+	    a11 = con - a11;
+	    a21 = head_a1 - a11;
+	    con = alpha_i[1] * split;
+	    b1 = con - alpha_i[1];
+	    b1 = con - b1;
+	    b2 = alpha_i[1] - b1;
+
+	    c11 = head_a1 * alpha_i[1];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_a1 * alpha_i[1];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t2 = t1 + t2;
+	    tail_t2 = t2 - (head_t2 - t1);
+	  }
+	  head_t2 = -head_t2;
+	  tail_t2 = -tail_t2;
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_t1 + head_t2;
+	    bv = s1 - head_t1;
+	    s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_t1 + tail_t2;
+	    bv = t1 - tail_t1;
+	    t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  head_tmp1[0] = head_t1;
+	  tail_tmp1[0] = tail_t1;
+	  /* imaginary part */
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_a1 * split;
+	    a11 = con - head_a1;
+	    a11 = con - a11;
+	    a21 = head_a1 - a11;
+	    con = alpha_i[0] * split;
+	    b1 = con - alpha_i[0];
+	    b1 = con - b1;
+	    b2 = alpha_i[0] - b1;
+
+	    c11 = head_a1 * alpha_i[0];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_a1 * alpha_i[0];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_a0 * split;
+	    a11 = con - head_a0;
+	    a11 = con - a11;
+	    a21 = head_a0 - a11;
+	    con = alpha_i[1] * split;
+	    b1 = con - alpha_i[1];
+	    b1 = con - b1;
+	    b2 = alpha_i[1] - b1;
+
+	    c11 = head_a0 * alpha_i[1];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_a0 * alpha_i[1];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t2 = t1 + t2;
+	    tail_t2 = t2 - (head_t2 - t1);
+	  }
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_t1 + head_t2;
+	    bv = s1 - head_t1;
+	    s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_t1 + tail_t2;
+	    bv = t1 - tail_t1;
+	    t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  head_tmp1[1] = head_t1;
+	  tail_tmp1[1] = tail_t1;
+	}
+
+	{
+	  /* Compute complex-extra = complex-extra * complex-double. */
+	  double head_a0, tail_a0;
+	  double head_a1, tail_a1;
+	  double head_t1, tail_t1;
+	  double head_t2, tail_t2;
+	  head_a0 = head_sum2[0];
+	  tail_a0 = tail_sum2[0];
+	  head_a1 = head_sum2[1];
+	  tail_a1 = tail_sum2[1];
+	  /* real part */
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_a0 * split;
+	    a11 = con - head_a0;
+	    a11 = con - a11;
+	    a21 = head_a0 - a11;
+	    con = alpha_i[0] * split;
+	    b1 = con - alpha_i[0];
+	    b1 = con - b1;
+	    b2 = alpha_i[0] - b1;
+
+	    c11 = head_a0 * alpha_i[0];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_a0 * alpha_i[0];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_a1 * split;
+	    a11 = con - head_a1;
+	    a11 = con - a11;
+	    a21 = head_a1 - a11;
+	    con = alpha_i[1] * split;
+	    b1 = con - alpha_i[1];
+	    b1 = con - b1;
+	    b2 = alpha_i[1] - b1;
+
+	    c11 = head_a1 * alpha_i[1];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_a1 * alpha_i[1];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t2 = t1 + t2;
+	    tail_t2 = t2 - (head_t2 - t1);
+	  }
+	  head_t2 = -head_t2;
+	  tail_t2 = -tail_t2;
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_t1 + head_t2;
+	    bv = s1 - head_t1;
+	    s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_t1 + tail_t2;
+	    bv = t1 - tail_t1;
+	    t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  head_tmp2[0] = head_t1;
+	  tail_tmp2[0] = tail_t1;
+	  /* imaginary part */
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_a1 * split;
+	    a11 = con - head_a1;
+	    a11 = con - a11;
+	    a21 = head_a1 - a11;
+	    con = alpha_i[0] * split;
+	    b1 = con - alpha_i[0];
+	    b1 = con - b1;
+	    b2 = alpha_i[0] - b1;
+
+	    c11 = head_a1 * alpha_i[0];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_a1 * alpha_i[0];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  {
+	    /* Compute double-double = double-double * double. */
+	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
+
+	    con = head_a0 * split;
+	    a11 = con - head_a0;
+	    a11 = con - a11;
+	    a21 = head_a0 - a11;
+	    con = alpha_i[1] * split;
+	    b1 = con - alpha_i[1];
+	    b1 = con - b1;
+	    b2 = alpha_i[1] - b1;
+
+	    c11 = head_a0 * alpha_i[1];
+	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
+
+	    c2 = tail_a0 * alpha_i[1];
+	    t1 = c11 + c2;
+	    t2 = (c2 - (t1 - c11)) + c21;
+
+	    head_t2 = t1 + t2;
+	    tail_t2 = t2 - (head_t2 - t1);
+	  }
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_t1 + head_t2;
+	    bv = s1 - head_t1;
+	    s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_t1 + tail_t2;
+	    bv = t1 - tail_t1;
+	    t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  head_tmp2[1] = head_t1;
+	  tail_tmp2[1] = tail_t1;
+	}
+
+	{
+	  double head_t, tail_t;
+	  double head_a, tail_a;
+	  double head_b, tail_b;
+	  /* Real part */
+	  head_a = head_tmp1[0];
+	  tail_a = tail_tmp1[0];
+	  head_b = head_tmp2[0];
+	  tail_b = tail_tmp2[0];
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_a + head_b;
+	    bv = s1 - head_a;
+	    s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_a + tail_b;
+	    bv = t1 - tail_a;
+	    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t = t1 + t2;
+	    tail_t = t2 - (head_t - t1);
+	  }
+	  head_tmp3[0] = head_t;
+	  tail_tmp3[0] = tail_t;
+	  /* Imaginary part */
+	  head_a = head_tmp1[1];
+	  tail_a = tail_tmp1[1];
+	  head_b = head_tmp2[1];
+	  tail_b = tail_tmp2[1];
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_a + head_b;
+	    bv = s1 - head_a;
+	    s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_a + tail_b;
+	    bv = t1 - tail_a;
+	    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t = t1 + t2;
+	    tail_t = t2 - (head_t - t1);
+	  }
+	  head_tmp3[1] = head_t;
+	  tail_tmp3[1] = tail_t;
+	}
+	y_elem[0] = y_i[iy];
+	y_elem[1] = y_i[iy + 1];
+	{
+	  /* Compute complex-extra = complex-double * complex-double. */
+	  double head_t1, tail_t1;
+	  double head_t2, tail_t2;
+	  /* Real part */
+	  {
+	    /* Compute double_double = double * double. */
+	    double a1, a2, b1, b2, con;
+
+	    con = beta_i[0] * split;
+	    a1 = con - beta_i[0];
+	    a1 = con - a1;
+	    a2 = beta_i[0] - a1;
+	    con = y_elem[0] * split;
+	    b1 = con - y_elem[0];
+	    b1 = con - b1;
+	    b2 = y_elem[0] - b1;
+
+	    head_t1 = beta_i[0] * y_elem[0];
+	    tail_t1 = (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
+	  }
+	  {
+	    /* Compute double_double = double * double. */
+	    double a1, a2, b1, b2, con;
+
+	    con = beta_i[1] * split;
+	    a1 = con - beta_i[1];
+	    a1 = con - a1;
+	    a2 = beta_i[1] - a1;
+	    con = y_elem[1] * split;
+	    b1 = con - y_elem[1];
+	    b1 = con - b1;
+	    b2 = y_elem[1] - b1;
+
+	    head_t2 = beta_i[1] * y_elem[1];
+	    tail_t2 = (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
+	  }
+	  head_t2 = -head_t2;
+	  tail_t2 = -tail_t2;
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_t1 + head_t2;
+	    bv = s1 - head_t1;
+	    s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_t1 + tail_t2;
+	    bv = t1 - tail_t1;
+	    t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  head_tmp4[0] = head_t1;
+	  tail_tmp4[0] = tail_t1;
+	  /* Imaginary part */
+	  {
+	    /* Compute double_double = double * double. */
+	    double a1, a2, b1, b2, con;
+
+	    con = beta_i[1] * split;
+	    a1 = con - beta_i[1];
+	    a1 = con - a1;
+	    a2 = beta_i[1] - a1;
+	    con = y_elem[0] * split;
+	    b1 = con - y_elem[0];
+	    b1 = con - b1;
+	    b2 = y_elem[0] - b1;
+
+	    head_t1 = beta_i[1] * y_elem[0];
+	    tail_t1 = (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
+	  }
+	  {
+	    /* Compute double_double = double * double. */
+	    double a1, a2, b1, b2, con;
+
+	    con = beta_i[0] * split;
+	    a1 = con - beta_i[0];
+	    a1 = con - a1;
+	    a2 = beta_i[0] - a1;
+	    con = y_elem[1] * split;
+	    b1 = con - y_elem[1];
+	    b1 = con - b1;
+	    b2 = y_elem[1] - b1;
+
+	    head_t2 = beta_i[0] * y_elem[1];
+	    tail_t2 = (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
+	  }
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_t1 + head_t2;
+	    bv = s1 - head_t1;
+	    s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_t1 + tail_t2;
+	    bv = t1 - tail_t1;
+	    t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    head_t1 = t1 + t2;
+	    tail_t1 = t2 - (head_t1 - t1);
+	  }
+	  head_tmp4[1] = head_t1;
+	  tail_tmp4[1] = tail_t1;
+	}
+	{
+	  double head_a, tail_a;
+	  double head_b, tail_b;
+	  /* Real part */
+	  head_a = head_tmp4[0];
+	  tail_a = tail_tmp4[0];
+	  head_b = head_tmp3[0];
+	  tail_b = tail_tmp3[0];
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_a + head_b;
+	    bv = s1 - head_a;
+	    s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_a + tail_b;
+	    bv = t1 - tail_a;
+	    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    result[0] = t1 + t2;
+	  }
+	  /* Imaginary part */
+	  head_a = head_tmp4[1];
+	  tail_a = tail_tmp4[1];
+	  head_b = head_tmp3[1];
+	  tail_b = tail_tmp3[1];
+	  {
+	    /* Compute double-double = double-double + double-double. */
+	    double bv;
+	    double s1, s2, t1, t2;
+
+	    /* Add two hi words. */
+	    s1 = head_a + head_b;
+	    bv = s1 - head_a;
+	    s2 = ((head_b - bv) + (head_a - (s1 - bv)));
+
+	    /* Add two lo words. */
+	    t1 = tail_a + tail_b;
+	    bv = t1 - tail_a;
+	    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));
+
+	    s2 += t1;
+
+	    /* Renormalize (s1, s2)  to  (t1, s2) */
+	    t1 = s1 + s2;
+	    s2 = s2 - (t1 - s1);
+
+	    t2 += s2;
+
+	    /* Renormalize (t1, t2)  */
+	    result[1] = t1 + t2;
+	  }
+	}
+	y_i[iy] = result[0];
+	y_i[iy + 1] = result[1];
+
+	iy += incy;
+	if (i >= lbound) {
+	  ix0 += incx;
+	  ai += incai2;
+	  la++;
+	} else {
+	  ai += incai1;
+	}
+	if (i < rbound) {
+	  ra++;
+	}
+      }
+
+      FPU_FIX_STOP;
+    }
+    break;
+  }
+}				/* end BLAS_zgbmv2_z_d_x */
diff --git a/XBLAS/src/gbmv2/Makefile b/XBLAS/src/gbmv2/Makefile
new file mode 100644
index 00000000..3138b90f
--- /dev/null
+++ b/XBLAS/src/gbmv2/Makefile
@@ -0,0 +1,80 @@
+include ../../make.conf
+include ../../$(MAKEINC)
+
+LIB_PATH = ../../$(OUTPUT_DIR)
+SRC_PATH = .
+HEADER_PATH = ..
+
+GBMV2_SRCS =\
+	$(SRC_PATH)/BLAS_dgbmv2_d_s.c\
+	$(SRC_PATH)/BLAS_dgbmv2_s_d.c\
+	$(SRC_PATH)/BLAS_dgbmv2_s_s.c\
+	$(SRC_PATH)/BLAS_zgbmv2_z_c.c\
+	$(SRC_PATH)/BLAS_zgbmv2_c_z.c\
+	$(SRC_PATH)/BLAS_zgbmv2_c_c.c\
+	$(SRC_PATH)/BLAS_cgbmv2_c_s.c\
+	$(SRC_PATH)/BLAS_cgbmv2_s_c.c\
+	$(SRC_PATH)/BLAS_cgbmv2_s_s.c\
+	$(SRC_PATH)/BLAS_zgbmv2_z_d.c\
+	$(SRC_PATH)/BLAS_zgbmv2_d_z.c\
+	$(SRC_PATH)/BLAS_zgbmv2_d_d.c\
+	$(SRC_PATH)/BLAS_sgbmv2_x.c\
+	$(SRC_PATH)/BLAS_dgbmv2_x.c\
+	$(SRC_PATH)/BLAS_cgbmv2_x.c\
+	$(SRC_PATH)/BLAS_zgbmv2_x.c\
+	$(SRC_PATH)/BLAS_dgbmv2_d_s_x.c\
+	$(SRC_PATH)/BLAS_dgbmv2_s_d_x.c\
+	$(SRC_PATH)/BLAS_dgbmv2_s_s_x.c\
+	$(SRC_PATH)/BLAS_zgbmv2_z_c_x.c\
+	$(SRC_PATH)/BLAS_zgbmv2_c_z_x.c\
+	$(SRC_PATH)/BLAS_zgbmv2_c_c_x.c\
+	$(SRC_PATH)/BLAS_cgbmv2_c_s_x.c\
+	$(SRC_PATH)/BLAS_cgbmv2_s_c_x.c\
+	$(SRC_PATH)/BLAS_cgbmv2_s_s_x.c\
+	$(SRC_PATH)/BLAS_zgbmv2_z_d_x.c\
+	$(SRC_PATH)/BLAS_zgbmv2_d_z_x.c\
+	$(SRC_PATH)/BLAS_zgbmv2_d_d_x.c \
+	\
+	$(SRC_PATH)/BLAS_dgbmv2_d_s-f2c.c\
+	$(SRC_PATH)/BLAS_dgbmv2_s_d-f2c.c\
+	$(SRC_PATH)/BLAS_dgbmv2_s_s-f2c.c\
+	$(SRC_PATH)/BLAS_zgbmv2_z_c-f2c.c\
+	$(SRC_PATH)/BLAS_zgbmv2_c_z-f2c.c\
+	$(SRC_PATH)/BLAS_zgbmv2_c_c-f2c.c\
+	$(SRC_PATH)/BLAS_cgbmv2_c_s-f2c.c\
+	$(SRC_PATH)/BLAS_cgbmv2_s_c-f2c.c\
+	$(SRC_PATH)/BLAS_cgbmv2_s_s-f2c.c\
+	$(SRC_PATH)/BLAS_zgbmv2_z_d-f2c.c\
+	$(SRC_PATH)/BLAS_zgbmv2_d_z-f2c.c\
+	$(SRC_PATH)/BLAS_zgbmv2_d_d-f2c.c\
+	$(SRC_PATH)/BLAS_sgbmv2_x-f2c.c\
+	$(SRC_PATH)/BLAS_dgbmv2_x-f2c.c\
+	$(SRC_PATH)/BLAS_cgbmv2_x-f2c.c\
+	$(SRC_PATH)/BLAS_zgbmv2_x-f2c.c\
+	$(SRC_PATH)/BLAS_dgbmv2_d_s_x-f2c.c\
+	$(SRC_PATH)/BLAS_dgbmv2_s_d_x-f2c.c\
+	$(SRC_PATH)/BLAS_dgbmv2_s_s_x-f2c.c\
+	$(SRC_PATH)/BLAS_zgbmv2_z_c_x-f2c.c\
+	$(SRC_PATH)/BLAS_zgbmv2_c_z_x-f2c.c\
+	$(SRC_PATH)/BLAS_zgbmv2_c_c_x-f2c.c\
+	$(SRC_PATH)/BLAS_cgbmv2_c_s_x-f2c.c\
+	$(SRC_PATH)/BLAS_cgbmv2_s_c_x-f2c.c\
+	$(SRC_PATH)/BLAS_cgbmv2_s_s_x-f2c.c\
+	$(SRC_PATH)/BLAS_zgbmv2_z_d_x-f2c.c\
+	$(SRC_PATH)/BLAS_zgbmv2_d_z_x-f2c.c\
+	$(SRC_PATH)/BLAS_zgbmv2_d_d_x-f2c.c 
+
+GBMV2_OBJS = $(GBMV2_SRCS:.c=.o)
+
+all: $(GBMV2_OBJS)
+
+lib: $(GBMV2_OBJS)
+	$(ARCH) $(ARCHFLAGS) $(LIB_PATH)/$(XBLASLIB) $(GBMV2_OBJS)
+	$(RANLIB) $(LIB_PATH)/$(XBLASLIB)
+
+.c.o: 
+	$(CC) $(CFLAGS) -I$(HEADER_PATH) -c -o $@ $<
+
+clean:
+	rm -f *.o *~ *.BAK
+
diff --git a/XBLAS/testing/blas_extended_test.h b/XBLAS/testing/blas_extended_test.h
index cab9b7d5..7f75b9bf 100644
--- a/XBLAS/testing/blas_extended_test.h
+++ b/XBLAS/testing/blas_extended_test.h
@@ -1071,13 +1071,110 @@ void BLAS_chemv2_c_s_testgen(int norm, enum blas_order_type order,
 			     void *y, int *seed, double *head_r_true,
 			     double *tail_r_true);
 
-void BLAS_sge_sum_mv_testgen(int norm, enum blas_order_type order,
-			     int m, int n, int randomize,
-			     float *alpha, int alpha_flag, float *beta,
-			     int beta_flag, float *a, int lda, float *b,
-			     int ldb, float *x, int incx,
-			     float *alpha_use_ptr, float *a_use, float *b_use,
-			     int *seed, double *head_r_true,
+
+void BLAS_sgbmv2_testgen(int norm, enum blas_order_type order,
+			 enum blas_trans_type trans, int m, int n, int kl,
+			 int ku, float *alpha, int alpha_flag, float *AB,
+			 int lda, float *x_head, float *x_tail, float *beta,
+			 int beta_flag, float *y, int *seed, double *r_true_l,
+			 double *r_true_t);
+void BLAS_dgbmv2_testgen(int norm, enum blas_order_type order,
+			 enum blas_trans_type trans, int m, int n, int kl,
+			 int ku, double *alpha, int alpha_flag, double *AB,
+			 int lda, double *x_head, double *x_tail,
+			 double *beta, int beta_flag, double *y, int *seed,
+			 double *r_true_l, double *r_true_t);
+void BLAS_cgbmv2_testgen(int norm, enum blas_order_type order,
+			 enum blas_trans_type trans, int m, int n, int kl,
+			 int ku, void *alpha, int alpha_flag, void *AB,
+			 int lda, void *x_head, void *x_tail, void *beta,
+			 int beta_flag, void *y, int *seed, double *r_true_l,
+			 double *r_true_t);
+void BLAS_zgbmv2_testgen(int norm, enum blas_order_type order,
+			 enum blas_trans_type trans, int m, int n, int kl,
+			 int ku, void *alpha, int alpha_flag, void *AB,
+			 int lda, void *x_head, void *x_tail, void *beta,
+			 int beta_flag, void *y, int *seed, double *r_true_l,
+			 double *r_true_t);
+void BLAS_cgbmv2_s_s_testgen(int norm, enum blas_order_type order,
+			     enum blas_trans_type trans, int m, int n, int kl,
+			     int ku, void *alpha, int alpha_flag, float *AB,
+			     int lda, float *x_head, float *x_tail,
+			     void *beta, int beta_flag, void *y, int *seed,
+			     double *r_true_l, double *r_true_t);
+void BLAS_cgbmv2_s_c_testgen(int norm, enum blas_order_type order,
+			     enum blas_trans_type trans, int m, int n, int kl,
+			     int ku, void *alpha, int alpha_flag, float *AB,
+			     int lda, void *x_head, void *x_tail, void *beta,
+			     int beta_flag, void *y, int *seed,
+			     double *r_true_l, double *r_true_t);
+void BLAS_cgbmv2_c_s_testgen(int norm, enum blas_order_type order,
+			     enum blas_trans_type trans, int m, int n, int kl,
+			     int ku, void *alpha, int alpha_flag, void *AB,
+			     int lda, float *x_head, float *x_tail,
+			     void *beta, int beta_flag, void *y, int *seed,
+			     double *r_true_l, double *r_true_t);
+void BLAS_zgbmv2_d_d_testgen(int norm, enum blas_order_type order,
+			     enum blas_trans_type trans, int m, int n, int kl,
+			     int ku, void *alpha, int alpha_flag, double *AB,
+			     int lda, double *x_head, double *x_tail,
+			     void *beta, int beta_flag, void *y, int *seed,
+			     double *r_true_l, double *r_true_t);
+void BLAS_zgbmv2_d_z_testgen(int norm, enum blas_order_type order,
+			     enum blas_trans_type trans, int m, int n, int kl,
+			     int ku, void *alpha, int alpha_flag, double *AB,
+			     int lda, void *x_head, void *x_tail, void *beta,
+			     int beta_flag, void *y, int *seed,
+			     double *r_true_l, double *r_true_t);
+void BLAS_zgbmv2_z_d_testgen(int norm, enum blas_order_type order,
+			     enum blas_trans_type trans, int m, int n, int kl,
+			     int ku, void *alpha, int alpha_flag, void *AB,
+			     int lda, double *x_head, double *x_tail,
+			     void *beta, int beta_flag, void *y, int *seed,
+			     double *r_true_l, double *r_true_t);
+void BLAS_dgbmv2_s_s_testgen(int norm, enum blas_order_type order,
+			     enum blas_trans_type trans, int m, int n, int kl,
+			     int ku, double *alpha, int alpha_flag, float *AB,
+			     int lda, float *x_head, float *x_tail,
+			     double *beta, int beta_flag, double *y,
+			     int *seed, double *r_true_l, double *r_true_t);
+void BLAS_dgbmv2_s_d_testgen(int norm, enum blas_order_type order,
+			     enum blas_trans_type trans, int m, int n, int kl,
+			     int ku, double *alpha, int alpha_flag, float *AB,
+			     int lda, double *x_head, double *x_tail,
+			     double *beta, int beta_flag, double *y,
+			     int *seed, double *r_true_l, double *r_true_t);
+void BLAS_dgbmv2_d_s_testgen(int norm, enum blas_order_type order,
+			     enum blas_trans_type trans, int m, int n, int kl,
+			     int ku, double *alpha, int alpha_flag,
+			     double *AB, int lda, float *x_head,
+			     float *x_tail, double *beta, int beta_flag,
+			     double *y, int *seed, double *r_true_l,
+			     double *r_true_t);
+void BLAS_zgbmv2_c_c_testgen(int norm, enum blas_order_type order,
+			     enum blas_trans_type trans, int m, int n, int kl,
+			     int ku, void *alpha, int alpha_flag, void *AB,
+			     int lda, void *x_head, void *x_tail, void *beta,
+			     int beta_flag, void *y, int *seed,
+			     double *r_true_l, double *r_true_t);
+void BLAS_zgbmv2_c_z_testgen(int norm, enum blas_order_type order,
+			     enum blas_trans_type trans, int m, int n, int kl,
+			     int ku, void *alpha, int alpha_flag, void *AB,
+			     int lda, void *x_head, void *x_tail, void *beta,
+			     int beta_flag, void *y, int *seed,
+			     double *r_true_l, double *r_true_t);
+void BLAS_zgbmv2_z_c_testgen(int norm, enum blas_order_type order,
+			     enum blas_trans_type trans, int m, int n, int kl,
+			     int ku, void *alpha, int alpha_flag, void *AB,
+			     int lda, void *x_head, void *x_tail, void *beta,
+			     int beta_flag, void *y, int *seed,
+			     double *r_true_l, double *r_true_t);
+void BLAS_sge_sum_mv_testgen(int norm, enum blas_order_type order, int m,
+			     int n, int randomize, float *alpha,
+			     int alpha_flag, float *beta, int beta_flag,
+			     float *a, int lda, float *b, int ldb, float *x,
+			     int incx, float *alpha_use_ptr, float *a_use,
+			     float *b_use, int *seed, double *head_r_true,
 			     double *tail_r_true);
 void BLAS_dge_sum_mv_testgen(int norm, enum blas_order_type order, int m,
 			     int n, int randomize, double *alpha,
diff --git a/XBLAS/testing/test-gbmv2/BLAS_gbmv2_testgen.c b/XBLAS/testing/test-gbmv2/BLAS_gbmv2_testgen.c
new file mode 100644
index 00000000..99564ff1
--- /dev/null
+++ b/XBLAS/testing/test-gbmv2/BLAS_gbmv2_testgen.c
@@ -0,0 +1,2315 @@
+#include "blas_extended.h"
+#include "blas_extended_private.h"
+#include "blas_extended_test.h"
+
+
+void BLAS_sgbmv2_testgen(int norm, enum blas_order_type order,
+			 enum blas_trans_type trans, int m, int n, int kl,
+			 int ku, float *alpha, int alpha_flag, float *AB,
+			 int lda, float *x_head, float *x_tail, float *beta,
+			 int beta_flag, float *y, int *seed, double *r_true_l,
+			 double *r_true_t)
+
+/*
+ * Purpose
+ * =======
+ *
+ * Generates alpha, AB, x, beta, and y, where AB is a banded
+ * matrix; and computes r_true.
+ *
+ * Arguments
+ * =========
+ *
+ * norm         (input) blas_norm_type 
+ *
+ * order        (input) blas_order_type
+ *              Order of AB; row or column major
+ *
+ * trans         (input) blas_trans_type
+ *              Whether AB is no trans, trans, or conj trans
+ *
+ * m            (input) int
+ *              The number of rows 
+ *
+ * n            (input) int
+ *              The number of columns
+ * 
+ * kl           (input) int
+ *              The number of subdiagonals
+ *
+ * ku           (input) int
+ *              The number of superdiagonals
+ *
+ * alpha        (input/output) float*
+ *              If alpha_flag = 1, alpha is input.
+ *              If alpha_flag = 0, alpha is output.
+ *
+ * alpha_flag   (input) int
+ *              = 0 : alpha is free, and is output.
+ *              = 1 : alpha is fixed on input.
+ *              
+ * AB           (output) float*
+ *              Matrix A in the banded storage.
+ *
+ *
+ * lda          (input) int
+ *              The first dimension of AB
+ *
+ * x_head       (input/output) float*
+ * x_tail       (input/output) float*
+ *
+ * beta         (input/output) float*
+ *              If beta_flag = 1, beta is input.
+ *              If beta_flag = 0, beta is output.
+ *
+ * beta_flag    (input) int
+ *              = 0 : beta is free, and is output.
+ *              = 1 : beta is fixed on input.
+ *
+ * y            (input/output) float*
+ *
+ * seed         (input/output) int
+ *
+ * r_true_l     (output) double*
+ *              The leading part of the truth in double-double.
+ *
+ * r_true_t     (output) double*
+ *              The trailing part of the truth in double-double.
+ *
+ */
+{
+  float *x_head_i = x_head;
+  float *x_tail_i = x_tail;
+  float *y_i = y;
+  int n_fix2;
+  int n_mix;
+  int ysize;
+  int i;
+
+  float *a_vec;
+  int m_i, n_i;
+  int max_mn;
+  int incy, incAB, incx;
+  float y_elem;
+
+  max_mn = MAX(m, n);
+  incx = incy = incAB = 1;
+
+
+
+
+  if (trans == blas_no_trans) {
+    m_i = m;
+    n_i = n;
+  } else {
+    m_i = n;
+    n_i = m;
+  }
+
+  a_vec = (float *) blas_malloc(max_mn * sizeof(float));
+  if (max_mn > 0 && a_vec == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  /* calling dot_testgen n time. in each iteration, one row of AB, and one 
+     element of y are produced. the vector x is produced at the first 
+     iteration only */
+  for (i = 0; i < m_i; i++) {
+    /* copy AB to a_vec */
+    sgbmv_prepare(order, trans, m, n, kl, ku, AB, lda, a_vec, i,
+		  &n_fix2, &n_mix, &ysize);
+
+    if (i == 1) {
+      /* from now on, fix alpha and beta */
+      alpha_flag = 1;
+      beta_flag = 1;
+    }
+
+    BLAS_sdot2_testgen(ysize, n_fix2, n_mix, norm, blas_no_conj, alpha,
+		       alpha_flag, beta, beta_flag, x_head, x_tail, a_vec,
+		       seed, &y_elem, &r_true_l[i * incy],
+		       &r_true_t[i * incy]);
+    y_i[i * incy] = y_elem;
+
+
+    /* copy a_vec to AB */
+    sgbmv_commit(order, trans, m, n, kl, ku, AB, lda, a_vec, i);
+  }
+
+  /* Zero out trailing part of x */
+  for (i = ysize; i < n_i; i++) {
+    x_head_i[i * incx] = 0.0;
+    x_tail_i[i * incx] = 0.0;
+  }
+
+  blas_free(a_vec);
+}
+void BLAS_dgbmv2_testgen(int norm, enum blas_order_type order,
+			 enum blas_trans_type trans, int m, int n, int kl,
+			 int ku, double *alpha, int alpha_flag, double *AB,
+			 int lda, double *x_head, double *x_tail,
+			 double *beta, int beta_flag, double *y, int *seed,
+			 double *r_true_l, double *r_true_t)
+
+/*
+ * Purpose
+ * =======
+ *
+ * Generates alpha, AB, x, beta, and y, where AB is a banded
+ * matrix; and computes r_true.
+ *
+ * Arguments
+ * =========
+ *
+ * norm         (input) blas_norm_type 
+ *
+ * order        (input) blas_order_type
+ *              Order of AB; row or column major
+ *
+ * trans         (input) blas_trans_type
+ *              Whether AB is no trans, trans, or conj trans
+ *
+ * m            (input) int
+ *              The number of rows 
+ *
+ * n            (input) int
+ *              The number of columns
+ * 
+ * kl           (input) int
+ *              The number of subdiagonals
+ *
+ * ku           (input) int
+ *              The number of superdiagonals
+ *
+ * alpha        (input/output) double*
+ *              If alpha_flag = 1, alpha is input.
+ *              If alpha_flag = 0, alpha is output.
+ *
+ * alpha_flag   (input) int
+ *              = 0 : alpha is free, and is output.
+ *              = 1 : alpha is fixed on input.
+ *              
+ * AB           (output) double*
+ *              Matrix A in the banded storage.
+ *
+ *
+ * lda          (input) int
+ *              The first dimension of AB
+ *
+ * x_head       (input/output) double*
+ * x_tail       (input/output) double*
+ *
+ * beta         (input/output) double*
+ *              If beta_flag = 1, beta is input.
+ *              If beta_flag = 0, beta is output.
+ *
+ * beta_flag    (input) int
+ *              = 0 : beta is free, and is output.
+ *              = 1 : beta is fixed on input.
+ *
+ * y            (input/output) double*
+ *
+ * seed         (input/output) int
+ *
+ * r_true_l     (output) double*
+ *              The leading part of the truth in double-double.
+ *
+ * r_true_t     (output) double*
+ *              The trailing part of the truth in double-double.
+ *
+ */
+{
+  double *x_head_i = x_head;
+  double *x_tail_i = x_tail;
+  double *y_i = y;
+  int n_fix2;
+  int n_mix;
+  int ysize;
+  int i;
+
+  double *a_vec;
+  int m_i, n_i;
+  int max_mn;
+  int incy, incAB, incx;
+  double y_elem;
+
+  max_mn = MAX(m, n);
+  incx = incy = incAB = 1;
+
+
+
+
+  if (trans == blas_no_trans) {
+    m_i = m;
+    n_i = n;
+  } else {
+    m_i = n;
+    n_i = m;
+  }
+
+  a_vec = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && a_vec == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  /* calling dot_testgen n time. in each iteration, one row of AB, and one 
+     element of y are produced. the vector x is produced at the first 
+     iteration only */
+  for (i = 0; i < m_i; i++) {
+    /* copy AB to a_vec */
+    dgbmv_prepare(order, trans, m, n, kl, ku, AB, lda, a_vec, i,
+		  &n_fix2, &n_mix, &ysize);
+
+    if (i == 1) {
+      /* from now on, fix alpha and beta */
+      alpha_flag = 1;
+      beta_flag = 1;
+    }
+
+    BLAS_ddot2_testgen(ysize, n_fix2, n_mix, norm, blas_no_conj, alpha,
+		       alpha_flag, beta, beta_flag, x_head, x_tail, a_vec,
+		       seed, &y_elem, &r_true_l[i * incy],
+		       &r_true_t[i * incy]);
+    y_i[i * incy] = y_elem;
+
+
+    /* copy a_vec to AB */
+    dgbmv_commit(order, trans, m, n, kl, ku, AB, lda, a_vec, i);
+  }
+
+  /* Zero out trailing part of x */
+  for (i = ysize; i < n_i; i++) {
+    x_head_i[i * incx] = 0.0;
+    x_tail_i[i * incx] = 0.0;
+  }
+
+  blas_free(a_vec);
+}
+void BLAS_cgbmv2_testgen(int norm, enum blas_order_type order,
+			 enum blas_trans_type trans, int m, int n, int kl,
+			 int ku, void *alpha, int alpha_flag, void *AB,
+			 int lda, void *x_head, void *x_tail, void *beta,
+			 int beta_flag, void *y, int *seed, double *r_true_l,
+			 double *r_true_t)
+
+/*
+ * Purpose
+ * =======
+ *
+ * Generates alpha, AB, x, beta, and y, where AB is a banded
+ * matrix; and computes r_true.
+ *
+ * Arguments
+ * =========
+ *
+ * norm         (input) blas_norm_type 
+ *
+ * order        (input) blas_order_type
+ *              Order of AB; row or column major
+ *
+ * trans         (input) blas_trans_type
+ *              Whether AB is no trans, trans, or conj trans
+ *
+ * m            (input) int
+ *              The number of rows 
+ *
+ * n            (input) int
+ *              The number of columns
+ * 
+ * kl           (input) int
+ *              The number of subdiagonals
+ *
+ * ku           (input) int
+ *              The number of superdiagonals
+ *
+ * alpha        (input/output) void*
+ *              If alpha_flag = 1, alpha is input.
+ *              If alpha_flag = 0, alpha is output.
+ *
+ * alpha_flag   (input) int
+ *              = 0 : alpha is free, and is output.
+ *              = 1 : alpha is fixed on input.
+ *              
+ * AB           (output) void*
+ *              Matrix A in the banded storage.
+ *
+ *
+ * lda          (input) int
+ *              The first dimension of AB
+ *
+ * x_head       (input/output) void*
+ * x_tail       (input/output) void*
+ *
+ * beta         (input/output) void*
+ *              If beta_flag = 1, beta is input.
+ *              If beta_flag = 0, beta is output.
+ *
+ * beta_flag    (input) int
+ *              = 0 : beta is free, and is output.
+ *              = 1 : beta is fixed on input.
+ *
+ * y            (input/output) void*
+ *
+ * seed         (input/output) int
+ *
+ * r_true_l     (output) double*
+ *              The leading part of the truth in double-double.
+ *
+ * r_true_t     (output) double*
+ *              The trailing part of the truth in double-double.
+ *
+ */
+{
+  float *x_head_i = (float *) x_head;
+  float *x_tail_i = (float *) x_tail;
+  float *y_i = (float *) y;
+  int n_fix2;
+  int n_mix;
+  int ysize;
+  int i;
+  int j;
+  float *a_vec;
+  int m_i, n_i;
+  int max_mn;
+  int incy, incAB, incx;
+  float y_elem[2];
+
+  max_mn = MAX(m, n);
+  incx = incy = incAB = 1;
+  incy *= 2;
+  incAB *= 2;
+  incx *= 2;
+
+  if (trans == blas_no_trans) {
+    m_i = m;
+    n_i = n;
+  } else {
+    m_i = n;
+    n_i = m;
+  }
+
+  a_vec = (float *) blas_malloc(max_mn * sizeof(float) * 2);
+  if (max_mn > 0 && a_vec == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  /* calling dot_testgen n time. in each iteration, one row of AB, and one 
+     element of y are produced. the vector x is produced at the first 
+     iteration only */
+  for (i = 0; i < m_i; i++) {
+    /* copy AB to a_vec */
+    cgbmv_prepare(order, trans, m, n, kl, ku, AB, lda, a_vec, i,
+		  &n_fix2, &n_mix, &ysize);
+
+    if (i == 1) {
+      /* from now on, fix alpha and beta */
+      alpha_flag = 1;
+      beta_flag = 1;
+    }
+
+    BLAS_cdot2_testgen(ysize, n_fix2, n_mix, norm, blas_no_conj, alpha,
+		       alpha_flag, beta, beta_flag, x_head, x_tail, a_vec,
+		       seed, y_elem, &r_true_l[i * incy],
+		       &r_true_t[i * incy]);
+    y_i[i * incy] = y_elem[0];
+    y_i[i * incy + 1] = y_elem[1];
+
+    if (trans == blas_conj_trans) {
+      for (j = 0; j < n_i * incAB; j += 2) {
+	a_vec[j + 1] = -a_vec[j + 1];
+      }
+    }
+    /* copy a_vec to AB */
+    cgbmv_commit(order, trans, m, n, kl, ku, AB, lda, a_vec, i);
+  }
+
+  /* Zero out trailing part of x */
+  for (i = ysize; i < n_i; i++) {
+    x_head_i[i * incx] = 0.0;
+    x_head_i[i * incx + 1] = 0.0;
+    x_tail_i[i * incx] = 0.0;
+    x_tail_i[i * incx + 1] = 0.0;
+  }
+
+  blas_free(a_vec);
+}
+void BLAS_zgbmv2_testgen(int norm, enum blas_order_type order,
+			 enum blas_trans_type trans, int m, int n, int kl,
+			 int ku, void *alpha, int alpha_flag, void *AB,
+			 int lda, void *x_head, void *x_tail, void *beta,
+			 int beta_flag, void *y, int *seed, double *r_true_l,
+			 double *r_true_t)
+
+/*
+ * Purpose
+ * =======
+ *
+ * Generates alpha, AB, x, beta, and y, where AB is a banded
+ * matrix; and computes r_true.
+ *
+ * Arguments
+ * =========
+ *
+ * norm         (input) blas_norm_type 
+ *
+ * order        (input) blas_order_type
+ *              Order of AB; row or column major
+ *
+ * trans         (input) blas_trans_type
+ *              Whether AB is no trans, trans, or conj trans
+ *
+ * m            (input) int
+ *              The number of rows 
+ *
+ * n            (input) int
+ *              The number of columns
+ * 
+ * kl           (input) int
+ *              The number of subdiagonals
+ *
+ * ku           (input) int
+ *              The number of superdiagonals
+ *
+ * alpha        (input/output) void*
+ *              If alpha_flag = 1, alpha is input.
+ *              If alpha_flag = 0, alpha is output.
+ *
+ * alpha_flag   (input) int
+ *              = 0 : alpha is free, and is output.
+ *              = 1 : alpha is fixed on input.
+ *              
+ * AB           (output) void*
+ *              Matrix A in the banded storage.
+ *
+ *
+ * lda          (input) int
+ *              The first dimension of AB
+ *
+ * x_head       (input/output) void*
+ * x_tail       (input/output) void*
+ *
+ * beta         (input/output) void*
+ *              If beta_flag = 1, beta is input.
+ *              If beta_flag = 0, beta is output.
+ *
+ * beta_flag    (input) int
+ *              = 0 : beta is free, and is output.
+ *              = 1 : beta is fixed on input.
+ *
+ * y            (input/output) void*
+ *
+ * seed         (input/output) int
+ *
+ * r_true_l     (output) double*
+ *              The leading part of the truth in double-double.
+ *
+ * r_true_t     (output) double*
+ *              The trailing part of the truth in double-double.
+ *
+ */
+{
+  double *x_head_i = (double *) x_head;
+  double *x_tail_i = (double *) x_tail;
+  double *y_i = (double *) y;
+  int n_fix2;
+  int n_mix;
+  int ysize;
+  int i;
+  int j;
+  double *a_vec;
+  int m_i, n_i;
+  int max_mn;
+  int incy, incAB, incx;
+  double y_elem[2];
+
+  max_mn = MAX(m, n);
+  incx = incy = incAB = 1;
+  incy *= 2;
+  incAB *= 2;
+  incx *= 2;
+
+  if (trans == blas_no_trans) {
+    m_i = m;
+    n_i = n;
+  } else {
+    m_i = n;
+    n_i = m;
+  }
+
+  a_vec = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && a_vec == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  /* calling dot_testgen n time. in each iteration, one row of AB, and one 
+     element of y are produced. the vector x is produced at the first 
+     iteration only */
+  for (i = 0; i < m_i; i++) {
+    /* copy AB to a_vec */
+    zgbmv_prepare(order, trans, m, n, kl, ku, AB, lda, a_vec, i,
+		  &n_fix2, &n_mix, &ysize);
+
+    if (i == 1) {
+      /* from now on, fix alpha and beta */
+      alpha_flag = 1;
+      beta_flag = 1;
+    }
+
+    BLAS_zdot2_testgen(ysize, n_fix2, n_mix, norm, blas_no_conj, alpha,
+		       alpha_flag, beta, beta_flag, x_head, x_tail, a_vec,
+		       seed, y_elem, &r_true_l[i * incy],
+		       &r_true_t[i * incy]);
+    y_i[i * incy] = y_elem[0];
+    y_i[i * incy + 1] = y_elem[1];
+
+    if (trans == blas_conj_trans) {
+      for (j = 0; j < n_i * incAB; j += 2) {
+	a_vec[j + 1] = -a_vec[j + 1];
+      }
+    }
+    /* copy a_vec to AB */
+    zgbmv_commit(order, trans, m, n, kl, ku, AB, lda, a_vec, i);
+  }
+
+  /* Zero out trailing part of x */
+  for (i = ysize; i < n_i; i++) {
+    x_head_i[i * incx] = 0.0;
+    x_head_i[i * incx + 1] = 0.0;
+    x_tail_i[i * incx] = 0.0;
+    x_tail_i[i * incx + 1] = 0.0;
+  }
+
+  blas_free(a_vec);
+}
+void BLAS_cgbmv2_s_s_testgen(int norm, enum blas_order_type order,
+			     enum blas_trans_type trans, int m, int n, int kl,
+			     int ku, void *alpha, int alpha_flag, float *AB,
+			     int lda, float *x_head, float *x_tail,
+			     void *beta, int beta_flag, void *y, int *seed,
+			     double *r_true_l, double *r_true_t)
+
+/*
+ * Purpose
+ * =======
+ *
+ * Generates alpha, AB, x, beta, and y, where AB is a banded
+ * matrix; and computes r_true.
+ *
+ * Arguments
+ * =========
+ *
+ * norm         (input) blas_norm_type 
+ *
+ * order        (input) blas_order_type
+ *              Order of AB; row or column major
+ *
+ * trans         (input) blas_trans_type
+ *              Whether AB is no trans, trans, or conj trans
+ *
+ * m            (input) int
+ *              The number of rows 
+ *
+ * n            (input) int
+ *              The number of columns
+ * 
+ * kl           (input) int
+ *              The number of subdiagonals
+ *
+ * ku           (input) int
+ *              The number of superdiagonals
+ *
+ * alpha        (input/output) void*
+ *              If alpha_flag = 1, alpha is input.
+ *              If alpha_flag = 0, alpha is output.
+ *
+ * alpha_flag   (input) int
+ *              = 0 : alpha is free, and is output.
+ *              = 1 : alpha is fixed on input.
+ *              
+ * AB           (output) float*
+ *              Matrix A in the banded storage.
+ *
+ *
+ * lda          (input) int
+ *              The first dimension of AB
+ *
+ * x_head       (input/output) float*
+ * x_tail       (input/output) float*
+ *
+ * beta         (input/output) void*
+ *              If beta_flag = 1, beta is input.
+ *              If beta_flag = 0, beta is output.
+ *
+ * beta_flag    (input) int
+ *              = 0 : beta is free, and is output.
+ *              = 1 : beta is fixed on input.
+ *
+ * y            (input/output) void*
+ *
+ * seed         (input/output) int
+ *
+ * r_true_l     (output) double*
+ *              The leading part of the truth in double-double.
+ *
+ * r_true_t     (output) double*
+ *              The trailing part of the truth in double-double.
+ *
+ */
+{
+  float *x_head_i = x_head;
+  float *x_tail_i = x_tail;
+  float *y_i = (float *) y;
+  int n_fix2;
+  int n_mix;
+  int ysize;
+  int i;
+
+  float *a_vec;
+  int m_i, n_i;
+  int max_mn;
+  int incy, incAB, incx;
+  float y_elem[2];
+
+  max_mn = MAX(m, n);
+  incx = incy = incAB = 1;
+  incy *= 2;
+
+
+
+  if (trans == blas_no_trans) {
+    m_i = m;
+    n_i = n;
+  } else {
+    m_i = n;
+    n_i = m;
+  }
+
+  a_vec = (float *) blas_malloc(max_mn * sizeof(float));
+  if (max_mn > 0 && a_vec == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  /* calling dot_testgen n time. in each iteration, one row of AB, and one 
+     element of y are produced. the vector x is produced at the first 
+     iteration only */
+  for (i = 0; i < m_i; i++) {
+    /* copy AB to a_vec */
+    sgbmv_prepare(order, trans, m, n, kl, ku, AB, lda, a_vec, i,
+		  &n_fix2, &n_mix, &ysize);
+
+    if (i == 1) {
+      /* from now on, fix alpha and beta */
+      alpha_flag = 1;
+      beta_flag = 1;
+    }
+
+    BLAS_cdot2_s_s_testgen(ysize, n_fix2, n_mix, norm, blas_no_conj, alpha,
+			   alpha_flag, beta, beta_flag, x_head, x_tail, a_vec,
+			   seed, y_elem, &r_true_l[i * incy],
+			   &r_true_t[i * incy]);
+    y_i[i * incy] = y_elem[0];
+    y_i[i * incy + 1] = y_elem[1];
+
+
+    /* copy a_vec to AB */
+    sgbmv_commit(order, trans, m, n, kl, ku, AB, lda, a_vec, i);
+  }
+
+  /* Zero out trailing part of x */
+  for (i = ysize; i < n_i; i++) {
+    x_head_i[i * incx] = 0.0;
+    x_tail_i[i * incx] = 0.0;
+  }
+
+  blas_free(a_vec);
+}
+void BLAS_cgbmv2_s_c_testgen(int norm, enum blas_order_type order,
+			     enum blas_trans_type trans, int m, int n, int kl,
+			     int ku, void *alpha, int alpha_flag, float *AB,
+			     int lda, void *x_head, void *x_tail, void *beta,
+			     int beta_flag, void *y, int *seed,
+			     double *r_true_l, double *r_true_t)
+
+/*
+ * Purpose
+ * =======
+ *
+ * Generates alpha, AB, x, beta, and y, where AB is a banded
+ * matrix; and computes r_true.
+ *
+ * Arguments
+ * =========
+ *
+ * norm         (input) blas_norm_type 
+ *
+ * order        (input) blas_order_type
+ *              Order of AB; row or column major
+ *
+ * trans         (input) blas_trans_type
+ *              Whether AB is no trans, trans, or conj trans
+ *
+ * m            (input) int
+ *              The number of rows 
+ *
+ * n            (input) int
+ *              The number of columns
+ * 
+ * kl           (input) int
+ *              The number of subdiagonals
+ *
+ * ku           (input) int
+ *              The number of superdiagonals
+ *
+ * alpha        (input/output) void*
+ *              If alpha_flag = 1, alpha is input.
+ *              If alpha_flag = 0, alpha is output.
+ *
+ * alpha_flag   (input) int
+ *              = 0 : alpha is free, and is output.
+ *              = 1 : alpha is fixed on input.
+ *              
+ * AB           (output) float*
+ *              Matrix A in the banded storage.
+ *
+ *
+ * lda          (input) int
+ *              The first dimension of AB
+ *
+ * x_head       (input/output) void*
+ * x_tail       (input/output) void*
+ *
+ * beta         (input/output) void*
+ *              If beta_flag = 1, beta is input.
+ *              If beta_flag = 0, beta is output.
+ *
+ * beta_flag    (input) int
+ *              = 0 : beta is free, and is output.
+ *              = 1 : beta is fixed on input.
+ *
+ * y            (input/output) void*
+ *
+ * seed         (input/output) int
+ *
+ * r_true_l     (output) double*
+ *              The leading part of the truth in double-double.
+ *
+ * r_true_t     (output) double*
+ *              The trailing part of the truth in double-double.
+ *
+ */
+{
+  float *x_head_i = (float *) x_head;
+  float *x_tail_i = (float *) x_tail;
+  float *y_i = (float *) y;
+  int n_fix2;
+  int n_mix;
+  int ysize;
+  int i;
+
+  float *a_vec;
+  int m_i, n_i;
+  int max_mn;
+  int incy, incAB, incx;
+  float y_elem[2];
+
+  max_mn = MAX(m, n);
+  incx = incy = incAB = 1;
+  incy *= 2;
+
+  incx *= 2;
+
+  if (trans == blas_no_trans) {
+    m_i = m;
+    n_i = n;
+  } else {
+    m_i = n;
+    n_i = m;
+  }
+
+  a_vec = (float *) blas_malloc(max_mn * sizeof(float));
+  if (max_mn > 0 && a_vec == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  /* calling dot_testgen n time. in each iteration, one row of AB, and one 
+     element of y are produced. the vector x is produced at the first 
+     iteration only */
+  for (i = 0; i < m_i; i++) {
+    /* copy AB to a_vec */
+    sgbmv_prepare(order, trans, m, n, kl, ku, AB, lda, a_vec, i,
+		  &n_fix2, &n_mix, &ysize);
+
+    if (i == 1) {
+      /* from now on, fix alpha and beta */
+      alpha_flag = 1;
+      beta_flag = 1;
+    }
+
+    BLAS_cdot2_c_s_testgen(ysize, n_fix2, n_mix, norm, blas_no_conj, alpha,
+			   alpha_flag, beta, beta_flag, x_head, x_tail, a_vec,
+			   seed, y_elem, &r_true_l[i * incy],
+			   &r_true_t[i * incy]);
+    y_i[i * incy] = y_elem[0];
+    y_i[i * incy + 1] = y_elem[1];
+
+
+    /* copy a_vec to AB */
+    sgbmv_commit(order, trans, m, n, kl, ku, AB, lda, a_vec, i);
+  }
+
+  /* Zero out trailing part of x */
+  for (i = ysize; i < n_i; i++) {
+    x_head_i[i * incx] = 0.0;
+    x_head_i[i * incx + 1] = 0.0;
+    x_tail_i[i * incx] = 0.0;
+    x_tail_i[i * incx + 1] = 0.0;
+  }
+
+  blas_free(a_vec);
+}
+void BLAS_cgbmv2_c_s_testgen(int norm, enum blas_order_type order,
+			     enum blas_trans_type trans, int m, int n, int kl,
+			     int ku, void *alpha, int alpha_flag, void *AB,
+			     int lda, float *x_head, float *x_tail,
+			     void *beta, int beta_flag, void *y, int *seed,
+			     double *r_true_l, double *r_true_t)
+
+/*
+ * Purpose
+ * =======
+ *
+ * Generates alpha, AB, x, beta, and y, where AB is a banded
+ * matrix; and computes r_true.
+ *
+ * Arguments
+ * =========
+ *
+ * norm         (input) blas_norm_type 
+ *
+ * order        (input) blas_order_type
+ *              Order of AB; row or column major
+ *
+ * trans         (input) blas_trans_type
+ *              Whether AB is no trans, trans, or conj trans
+ *
+ * m            (input) int
+ *              The number of rows 
+ *
+ * n            (input) int
+ *              The number of columns
+ * 
+ * kl           (input) int
+ *              The number of subdiagonals
+ *
+ * ku           (input) int
+ *              The number of superdiagonals
+ *
+ * alpha        (input/output) void*
+ *              If alpha_flag = 1, alpha is input.
+ *              If alpha_flag = 0, alpha is output.
+ *
+ * alpha_flag   (input) int
+ *              = 0 : alpha is free, and is output.
+ *              = 1 : alpha is fixed on input.
+ *              
+ * AB           (output) void*
+ *              Matrix A in the banded storage.
+ *
+ *
+ * lda          (input) int
+ *              The first dimension of AB
+ *
+ * x_head       (input/output) float*
+ * x_tail       (input/output) float*
+ *
+ * beta         (input/output) void*
+ *              If beta_flag = 1, beta is input.
+ *              If beta_flag = 0, beta is output.
+ *
+ * beta_flag    (input) int
+ *              = 0 : beta is free, and is output.
+ *              = 1 : beta is fixed on input.
+ *
+ * y            (input/output) void*
+ *
+ * seed         (input/output) int
+ *
+ * r_true_l     (output) double*
+ *              The leading part of the truth in double-double.
+ *
+ * r_true_t     (output) double*
+ *              The trailing part of the truth in double-double.
+ *
+ */
+{
+  float *x_head_i = x_head;
+  float *x_tail_i = x_tail;
+  float *y_i = (float *) y;
+  int n_fix2;
+  int n_mix;
+  int ysize;
+  int i;
+  int j;
+  float *a_vec;
+  int m_i, n_i;
+  int max_mn;
+  int incy, incAB, incx;
+  float y_elem[2];
+
+  max_mn = MAX(m, n);
+  incx = incy = incAB = 1;
+  incy *= 2;
+  incAB *= 2;
+
+
+  if (trans == blas_no_trans) {
+    m_i = m;
+    n_i = n;
+  } else {
+    m_i = n;
+    n_i = m;
+  }
+
+  a_vec = (float *) blas_malloc(max_mn * sizeof(float) * 2);
+  if (max_mn > 0 && a_vec == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  /* calling dot_testgen n time. in each iteration, one row of AB, and one 
+     element of y are produced. the vector x is produced at the first 
+     iteration only */
+  for (i = 0; i < m_i; i++) {
+    /* copy AB to a_vec */
+    cgbmv_prepare(order, trans, m, n, kl, ku, AB, lda, a_vec, i,
+		  &n_fix2, &n_mix, &ysize);
+
+    if (i == 1) {
+      /* from now on, fix alpha and beta */
+      alpha_flag = 1;
+      beta_flag = 1;
+    }
+
+    BLAS_cdot2_s_c_testgen(ysize, n_fix2, n_mix, norm, blas_no_conj, alpha,
+			   alpha_flag, beta, beta_flag, x_head, x_tail, a_vec,
+			   seed, y_elem, &r_true_l[i * incy],
+			   &r_true_t[i * incy]);
+    y_i[i * incy] = y_elem[0];
+    y_i[i * incy + 1] = y_elem[1];
+
+    if (trans == blas_conj_trans) {
+      for (j = 0; j < n_i * incAB; j += 2) {
+	a_vec[j + 1] = -a_vec[j + 1];
+      }
+    }
+    /* copy a_vec to AB */
+    cgbmv_commit(order, trans, m, n, kl, ku, AB, lda, a_vec, i);
+  }
+
+  /* Zero out trailing part of x */
+  for (i = ysize; i < n_i; i++) {
+    x_head_i[i * incx] = 0.0;
+    x_tail_i[i * incx] = 0.0;
+  }
+
+  blas_free(a_vec);
+}
+void BLAS_zgbmv2_d_d_testgen(int norm, enum blas_order_type order,
+			     enum blas_trans_type trans, int m, int n, int kl,
+			     int ku, void *alpha, int alpha_flag, double *AB,
+			     int lda, double *x_head, double *x_tail,
+			     void *beta, int beta_flag, void *y, int *seed,
+			     double *r_true_l, double *r_true_t)
+
+/*
+ * Purpose
+ * =======
+ *
+ * Generates alpha, AB, x, beta, and y, where AB is a banded
+ * matrix; and computes r_true.
+ *
+ * Arguments
+ * =========
+ *
+ * norm         (input) blas_norm_type 
+ *
+ * order        (input) blas_order_type
+ *              Order of AB; row or column major
+ *
+ * trans         (input) blas_trans_type
+ *              Whether AB is no trans, trans, or conj trans
+ *
+ * m            (input) int
+ *              The number of rows 
+ *
+ * n            (input) int
+ *              The number of columns
+ * 
+ * kl           (input) int
+ *              The number of subdiagonals
+ *
+ * ku           (input) int
+ *              The number of superdiagonals
+ *
+ * alpha        (input/output) void*
+ *              If alpha_flag = 1, alpha is input.
+ *              If alpha_flag = 0, alpha is output.
+ *
+ * alpha_flag   (input) int
+ *              = 0 : alpha is free, and is output.
+ *              = 1 : alpha is fixed on input.
+ *              
+ * AB           (output) double*
+ *              Matrix A in the banded storage.
+ *
+ *
+ * lda          (input) int
+ *              The first dimension of AB
+ *
+ * x_head       (input/output) double*
+ * x_tail       (input/output) double*
+ *
+ * beta         (input/output) void*
+ *              If beta_flag = 1, beta is input.
+ *              If beta_flag = 0, beta is output.
+ *
+ * beta_flag    (input) int
+ *              = 0 : beta is free, and is output.
+ *              = 1 : beta is fixed on input.
+ *
+ * y            (input/output) void*
+ *
+ * seed         (input/output) int
+ *
+ * r_true_l     (output) double*
+ *              The leading part of the truth in double-double.
+ *
+ * r_true_t     (output) double*
+ *              The trailing part of the truth in double-double.
+ *
+ */
+{
+  double *x_head_i = x_head;
+  double *x_tail_i = x_tail;
+  double *y_i = (double *) y;
+  int n_fix2;
+  int n_mix;
+  int ysize;
+  int i;
+
+  double *a_vec;
+  int m_i, n_i;
+  int max_mn;
+  int incy, incAB, incx;
+  double y_elem[2];
+
+  max_mn = MAX(m, n);
+  incx = incy = incAB = 1;
+  incy *= 2;
+
+
+
+  if (trans == blas_no_trans) {
+    m_i = m;
+    n_i = n;
+  } else {
+    m_i = n;
+    n_i = m;
+  }
+
+  a_vec = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && a_vec == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  /* calling dot_testgen n time. in each iteration, one row of AB, and one 
+     element of y are produced. the vector x is produced at the first 
+     iteration only */
+  for (i = 0; i < m_i; i++) {
+    /* copy AB to a_vec */
+    dgbmv_prepare(order, trans, m, n, kl, ku, AB, lda, a_vec, i,
+		  &n_fix2, &n_mix, &ysize);
+
+    if (i == 1) {
+      /* from now on, fix alpha and beta */
+      alpha_flag = 1;
+      beta_flag = 1;
+    }
+
+    BLAS_zdot2_d_d_testgen(ysize, n_fix2, n_mix, norm, blas_no_conj, alpha,
+			   alpha_flag, beta, beta_flag, x_head, x_tail, a_vec,
+			   seed, y_elem, &r_true_l[i * incy],
+			   &r_true_t[i * incy]);
+    y_i[i * incy] = y_elem[0];
+    y_i[i * incy + 1] = y_elem[1];
+
+
+    /* copy a_vec to AB */
+    dgbmv_commit(order, trans, m, n, kl, ku, AB, lda, a_vec, i);
+  }
+
+  /* Zero out trailing part of x */
+  for (i = ysize; i < n_i; i++) {
+    x_head_i[i * incx] = 0.0;
+    x_tail_i[i * incx] = 0.0;
+  }
+
+  blas_free(a_vec);
+}
+void BLAS_zgbmv2_d_z_testgen(int norm, enum blas_order_type order,
+			     enum blas_trans_type trans, int m, int n, int kl,
+			     int ku, void *alpha, int alpha_flag, double *AB,
+			     int lda, void *x_head, void *x_tail, void *beta,
+			     int beta_flag, void *y, int *seed,
+			     double *r_true_l, double *r_true_t)
+
+/*
+ * Purpose
+ * =======
+ *
+ * Generates alpha, AB, x, beta, and y, where AB is a banded
+ * matrix; and computes r_true.
+ *
+ * Arguments
+ * =========
+ *
+ * norm         (input) blas_norm_type 
+ *
+ * order        (input) blas_order_type
+ *              Order of AB; row or column major
+ *
+ * trans         (input) blas_trans_type
+ *              Whether AB is no trans, trans, or conj trans
+ *
+ * m            (input) int
+ *              The number of rows 
+ *
+ * n            (input) int
+ *              The number of columns
+ * 
+ * kl           (input) int
+ *              The number of subdiagonals
+ *
+ * ku           (input) int
+ *              The number of superdiagonals
+ *
+ * alpha        (input/output) void*
+ *              If alpha_flag = 1, alpha is input.
+ *              If alpha_flag = 0, alpha is output.
+ *
+ * alpha_flag   (input) int
+ *              = 0 : alpha is free, and is output.
+ *              = 1 : alpha is fixed on input.
+ *              
+ * AB           (output) double*
+ *              Matrix A in the banded storage.
+ *
+ *
+ * lda          (input) int
+ *              The first dimension of AB
+ *
+ * x_head       (input/output) void*
+ * x_tail       (input/output) void*
+ *
+ * beta         (input/output) void*
+ *              If beta_flag = 1, beta is input.
+ *              If beta_flag = 0, beta is output.
+ *
+ * beta_flag    (input) int
+ *              = 0 : beta is free, and is output.
+ *              = 1 : beta is fixed on input.
+ *
+ * y            (input/output) void*
+ *
+ * seed         (input/output) int
+ *
+ * r_true_l     (output) double*
+ *              The leading part of the truth in double-double.
+ *
+ * r_true_t     (output) double*
+ *              The trailing part of the truth in double-double.
+ *
+ */
+{
+  double *x_head_i = (double *) x_head;
+  double *x_tail_i = (double *) x_tail;
+  double *y_i = (double *) y;
+  int n_fix2;
+  int n_mix;
+  int ysize;
+  int i;
+
+  double *a_vec;
+  int m_i, n_i;
+  int max_mn;
+  int incy, incAB, incx;
+  double y_elem[2];
+
+  max_mn = MAX(m, n);
+  incx = incy = incAB = 1;
+  incy *= 2;
+
+  incx *= 2;
+
+  if (trans == blas_no_trans) {
+    m_i = m;
+    n_i = n;
+  } else {
+    m_i = n;
+    n_i = m;
+  }
+
+  a_vec = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && a_vec == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  /* calling dot_testgen n time. in each iteration, one row of AB, and one 
+     element of y are produced. the vector x is produced at the first 
+     iteration only */
+  for (i = 0; i < m_i; i++) {
+    /* copy AB to a_vec */
+    dgbmv_prepare(order, trans, m, n, kl, ku, AB, lda, a_vec, i,
+		  &n_fix2, &n_mix, &ysize);
+
+    if (i == 1) {
+      /* from now on, fix alpha and beta */
+      alpha_flag = 1;
+      beta_flag = 1;
+    }
+
+    BLAS_zdot2_z_d_testgen(ysize, n_fix2, n_mix, norm, blas_no_conj, alpha,
+			   alpha_flag, beta, beta_flag, x_head, x_tail, a_vec,
+			   seed, y_elem, &r_true_l[i * incy],
+			   &r_true_t[i * incy]);
+    y_i[i * incy] = y_elem[0];
+    y_i[i * incy + 1] = y_elem[1];
+
+
+    /* copy a_vec to AB */
+    dgbmv_commit(order, trans, m, n, kl, ku, AB, lda, a_vec, i);
+  }
+
+  /* Zero out trailing part of x */
+  for (i = ysize; i < n_i; i++) {
+    x_head_i[i * incx] = 0.0;
+    x_head_i[i * incx + 1] = 0.0;
+    x_tail_i[i * incx] = 0.0;
+    x_tail_i[i * incx + 1] = 0.0;
+  }
+
+  blas_free(a_vec);
+}
+void BLAS_zgbmv2_z_d_testgen(int norm, enum blas_order_type order,
+			     enum blas_trans_type trans, int m, int n, int kl,
+			     int ku, void *alpha, int alpha_flag, void *AB,
+			     int lda, double *x_head, double *x_tail,
+			     void *beta, int beta_flag, void *y, int *seed,
+			     double *r_true_l, double *r_true_t)
+
+/*
+ * Purpose
+ * =======
+ *
+ * Generates alpha, AB, x, beta, and y, where AB is a banded
+ * matrix; and computes r_true.
+ *
+ * Arguments
+ * =========
+ *
+ * norm         (input) blas_norm_type 
+ *
+ * order        (input) blas_order_type
+ *              Order of AB; row or column major
+ *
+ * trans         (input) blas_trans_type
+ *              Whether AB is no trans, trans, or conj trans
+ *
+ * m            (input) int
+ *              The number of rows 
+ *
+ * n            (input) int
+ *              The number of columns
+ * 
+ * kl           (input) int
+ *              The number of subdiagonals
+ *
+ * ku           (input) int
+ *              The number of superdiagonals
+ *
+ * alpha        (input/output) void*
+ *              If alpha_flag = 1, alpha is input.
+ *              If alpha_flag = 0, alpha is output.
+ *
+ * alpha_flag   (input) int
+ *              = 0 : alpha is free, and is output.
+ *              = 1 : alpha is fixed on input.
+ *              
+ * AB           (output) void*
+ *              Matrix A in the banded storage.
+ *
+ *
+ * lda          (input) int
+ *              The first dimension of AB
+ *
+ * x_head       (input/output) double*
+ * x_tail       (input/output) double*
+ *
+ * beta         (input/output) void*
+ *              If beta_flag = 1, beta is input.
+ *              If beta_flag = 0, beta is output.
+ *
+ * beta_flag    (input) int
+ *              = 0 : beta is free, and is output.
+ *              = 1 : beta is fixed on input.
+ *
+ * y            (input/output) void*
+ *
+ * seed         (input/output) int
+ *
+ * r_true_l     (output) double*
+ *              The leading part of the truth in double-double.
+ *
+ * r_true_t     (output) double*
+ *              The trailing part of the truth in double-double.
+ *
+ */
+{
+  double *x_head_i = x_head;
+  double *x_tail_i = x_tail;
+  double *y_i = (double *) y;
+  int n_fix2;
+  int n_mix;
+  int ysize;
+  int i;
+  int j;
+  double *a_vec;
+  int m_i, n_i;
+  int max_mn;
+  int incy, incAB, incx;
+  double y_elem[2];
+
+  max_mn = MAX(m, n);
+  incx = incy = incAB = 1;
+  incy *= 2;
+  incAB *= 2;
+
+
+  if (trans == blas_no_trans) {
+    m_i = m;
+    n_i = n;
+  } else {
+    m_i = n;
+    n_i = m;
+  }
+
+  a_vec = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && a_vec == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  /* calling dot_testgen n time. in each iteration, one row of AB, and one 
+     element of y are produced. the vector x is produced at the first 
+     iteration only */
+  for (i = 0; i < m_i; i++) {
+    /* copy AB to a_vec */
+    zgbmv_prepare(order, trans, m, n, kl, ku, AB, lda, a_vec, i,
+		  &n_fix2, &n_mix, &ysize);
+
+    if (i == 1) {
+      /* from now on, fix alpha and beta */
+      alpha_flag = 1;
+      beta_flag = 1;
+    }
+
+    BLAS_zdot2_d_z_testgen(ysize, n_fix2, n_mix, norm, blas_no_conj, alpha,
+			   alpha_flag, beta, beta_flag, x_head, x_tail, a_vec,
+			   seed, y_elem, &r_true_l[i * incy],
+			   &r_true_t[i * incy]);
+    y_i[i * incy] = y_elem[0];
+    y_i[i * incy + 1] = y_elem[1];
+
+    if (trans == blas_conj_trans) {
+      for (j = 0; j < n_i * incAB; j += 2) {
+	a_vec[j + 1] = -a_vec[j + 1];
+      }
+    }
+    /* copy a_vec to AB */
+    zgbmv_commit(order, trans, m, n, kl, ku, AB, lda, a_vec, i);
+  }
+
+  /* Zero out trailing part of x */
+  for (i = ysize; i < n_i; i++) {
+    x_head_i[i * incx] = 0.0;
+    x_tail_i[i * incx] = 0.0;
+  }
+
+  blas_free(a_vec);
+}
+void BLAS_dgbmv2_s_s_testgen(int norm, enum blas_order_type order,
+			     enum blas_trans_type trans, int m, int n, int kl,
+			     int ku, double *alpha, int alpha_flag, float *AB,
+			     int lda, float *x_head, float *x_tail,
+			     double *beta, int beta_flag, double *y,
+			     int *seed, double *r_true_l, double *r_true_t)
+
+/*
+ * Purpose
+ * =======
+ *
+ * Generates alpha, AB, x, beta, and y, where AB is a banded
+ * matrix; and computes r_true.
+ *
+ * Arguments
+ * =========
+ *
+ * norm         (input) blas_norm_type 
+ *
+ * order        (input) blas_order_type
+ *              Order of AB; row or column major
+ *
+ * trans         (input) blas_trans_type
+ *              Whether AB is no trans, trans, or conj trans
+ *
+ * m            (input) int
+ *              The number of rows 
+ *
+ * n            (input) int
+ *              The number of columns
+ * 
+ * kl           (input) int
+ *              The number of subdiagonals
+ *
+ * ku           (input) int
+ *              The number of superdiagonals
+ *
+ * alpha        (input/output) double*
+ *              If alpha_flag = 1, alpha is input.
+ *              If alpha_flag = 0, alpha is output.
+ *
+ * alpha_flag   (input) int
+ *              = 0 : alpha is free, and is output.
+ *              = 1 : alpha is fixed on input.
+ *              
+ * AB           (output) float*
+ *              Matrix A in the banded storage.
+ *
+ *
+ * lda          (input) int
+ *              The first dimension of AB
+ *
+ * x_head       (input/output) float*
+ * x_tail       (input/output) float*
+ *
+ * beta         (input/output) double*
+ *              If beta_flag = 1, beta is input.
+ *              If beta_flag = 0, beta is output.
+ *
+ * beta_flag    (input) int
+ *              = 0 : beta is free, and is output.
+ *              = 1 : beta is fixed on input.
+ *
+ * y            (input/output) double*
+ *
+ * seed         (input/output) int
+ *
+ * r_true_l     (output) double*
+ *              The leading part of the truth in double-double.
+ *
+ * r_true_t     (output) double*
+ *              The trailing part of the truth in double-double.
+ *
+ */
+{
+  float *x_head_i = x_head;
+  float *x_tail_i = x_tail;
+  double *y_i = y;
+  int n_fix2;
+  int n_mix;
+  int ysize;
+  int i;
+
+  float *a_vec;
+  int m_i, n_i;
+  int max_mn;
+  int incy, incAB, incx;
+  double y_elem;
+
+  max_mn = MAX(m, n);
+  incx = incy = incAB = 1;
+
+
+
+
+  if (trans == blas_no_trans) {
+    m_i = m;
+    n_i = n;
+  } else {
+    m_i = n;
+    n_i = m;
+  }
+
+  a_vec = (float *) blas_malloc(max_mn * sizeof(float));
+  if (max_mn > 0 && a_vec == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  /* calling dot_testgen n time. in each iteration, one row of AB, and one 
+     element of y are produced. the vector x is produced at the first 
+     iteration only */
+  for (i = 0; i < m_i; i++) {
+    /* copy AB to a_vec */
+    sgbmv_prepare(order, trans, m, n, kl, ku, AB, lda, a_vec, i,
+		  &n_fix2, &n_mix, &ysize);
+
+    if (i == 1) {
+      /* from now on, fix alpha and beta */
+      alpha_flag = 1;
+      beta_flag = 1;
+    }
+
+    BLAS_ddot2_s_s_testgen(ysize, n_fix2, n_mix, norm, blas_no_conj, alpha,
+			   alpha_flag, beta, beta_flag, x_head, x_tail, a_vec,
+			   seed, &y_elem, &r_true_l[i * incy],
+			   &r_true_t[i * incy]);
+    y_i[i * incy] = y_elem;
+
+
+    /* copy a_vec to AB */
+    sgbmv_commit(order, trans, m, n, kl, ku, AB, lda, a_vec, i);
+  }
+
+  /* Zero out trailing part of x */
+  for (i = ysize; i < n_i; i++) {
+    x_head_i[i * incx] = 0.0;
+    x_tail_i[i * incx] = 0.0;
+  }
+
+  blas_free(a_vec);
+}
+void BLAS_dgbmv2_s_d_testgen(int norm, enum blas_order_type order,
+			     enum blas_trans_type trans, int m, int n, int kl,
+			     int ku, double *alpha, int alpha_flag, float *AB,
+			     int lda, double *x_head, double *x_tail,
+			     double *beta, int beta_flag, double *y,
+			     int *seed, double *r_true_l, double *r_true_t)
+
+/*
+ * Purpose
+ * =======
+ *
+ * Generates alpha, AB, x, beta, and y, where AB is a banded
+ * matrix; and computes r_true.
+ *
+ * Arguments
+ * =========
+ *
+ * norm         (input) blas_norm_type 
+ *
+ * order        (input) blas_order_type
+ *              Order of AB; row or column major
+ *
+ * trans         (input) blas_trans_type
+ *              Whether AB is no trans, trans, or conj trans
+ *
+ * m            (input) int
+ *              The number of rows 
+ *
+ * n            (input) int
+ *              The number of columns
+ * 
+ * kl           (input) int
+ *              The number of subdiagonals
+ *
+ * ku           (input) int
+ *              The number of superdiagonals
+ *
+ * alpha        (input/output) double*
+ *              If alpha_flag = 1, alpha is input.
+ *              If alpha_flag = 0, alpha is output.
+ *
+ * alpha_flag   (input) int
+ *              = 0 : alpha is free, and is output.
+ *              = 1 : alpha is fixed on input.
+ *              
+ * AB           (output) float*
+ *              Matrix A in the banded storage.
+ *
+ *
+ * lda          (input) int
+ *              The first dimension of AB
+ *
+ * x_head       (input/output) double*
+ * x_tail       (input/output) double*
+ *
+ * beta         (input/output) double*
+ *              If beta_flag = 1, beta is input.
+ *              If beta_flag = 0, beta is output.
+ *
+ * beta_flag    (input) int
+ *              = 0 : beta is free, and is output.
+ *              = 1 : beta is fixed on input.
+ *
+ * y            (input/output) double*
+ *
+ * seed         (input/output) int
+ *
+ * r_true_l     (output) double*
+ *              The leading part of the truth in double-double.
+ *
+ * r_true_t     (output) double*
+ *              The trailing part of the truth in double-double.
+ *
+ */
+{
+  double *x_head_i = x_head;
+  double *x_tail_i = x_tail;
+  double *y_i = y;
+  int n_fix2;
+  int n_mix;
+  int ysize;
+  int i;
+
+  float *a_vec;
+  int m_i, n_i;
+  int max_mn;
+  int incy, incAB, incx;
+  double y_elem;
+
+  max_mn = MAX(m, n);
+  incx = incy = incAB = 1;
+
+
+
+
+  if (trans == blas_no_trans) {
+    m_i = m;
+    n_i = n;
+  } else {
+    m_i = n;
+    n_i = m;
+  }
+
+  a_vec = (float *) blas_malloc(max_mn * sizeof(float));
+  if (max_mn > 0 && a_vec == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  /* calling dot_testgen n time. in each iteration, one row of AB, and one 
+     element of y are produced. the vector x is produced at the first 
+     iteration only */
+  for (i = 0; i < m_i; i++) {
+    /* copy AB to a_vec */
+    sgbmv_prepare(order, trans, m, n, kl, ku, AB, lda, a_vec, i,
+		  &n_fix2, &n_mix, &ysize);
+
+    if (i == 1) {
+      /* from now on, fix alpha and beta */
+      alpha_flag = 1;
+      beta_flag = 1;
+    }
+
+    BLAS_ddot2_d_s_testgen(ysize, n_fix2, n_mix, norm, blas_no_conj, alpha,
+			   alpha_flag, beta, beta_flag, x_head, x_tail, a_vec,
+			   seed, &y_elem, &r_true_l[i * incy],
+			   &r_true_t[i * incy]);
+    y_i[i * incy] = y_elem;
+
+
+    /* copy a_vec to AB */
+    sgbmv_commit(order, trans, m, n, kl, ku, AB, lda, a_vec, i);
+  }
+
+  /* Zero out trailing part of x */
+  for (i = ysize; i < n_i; i++) {
+    x_head_i[i * incx] = 0.0;
+    x_tail_i[i * incx] = 0.0;
+  }
+
+  blas_free(a_vec);
+}
+void BLAS_dgbmv2_d_s_testgen(int norm, enum blas_order_type order,
+			     enum blas_trans_type trans, int m, int n, int kl,
+			     int ku, double *alpha, int alpha_flag,
+			     double *AB, int lda, float *x_head,
+			     float *x_tail, double *beta, int beta_flag,
+			     double *y, int *seed, double *r_true_l,
+			     double *r_true_t)
+
+/*
+ * Purpose
+ * =======
+ *
+ * Generates alpha, AB, x, beta, and y, where AB is a banded
+ * matrix; and computes r_true.
+ *
+ * Arguments
+ * =========
+ *
+ * norm         (input) blas_norm_type 
+ *
+ * order        (input) blas_order_type
+ *              Order of AB; row or column major
+ *
+ * trans         (input) blas_trans_type
+ *              Whether AB is no trans, trans, or conj trans
+ *
+ * m            (input) int
+ *              The number of rows 
+ *
+ * n            (input) int
+ *              The number of columns
+ * 
+ * kl           (input) int
+ *              The number of subdiagonals
+ *
+ * ku           (input) int
+ *              The number of superdiagonals
+ *
+ * alpha        (input/output) double*
+ *              If alpha_flag = 1, alpha is input.
+ *              If alpha_flag = 0, alpha is output.
+ *
+ * alpha_flag   (input) int
+ *              = 0 : alpha is free, and is output.
+ *              = 1 : alpha is fixed on input.
+ *              
+ * AB           (output) double*
+ *              Matrix A in the banded storage.
+ *
+ *
+ * lda          (input) int
+ *              The first dimension of AB
+ *
+ * x_head       (input/output) float*
+ * x_tail       (input/output) float*
+ *
+ * beta         (input/output) double*
+ *              If beta_flag = 1, beta is input.
+ *              If beta_flag = 0, beta is output.
+ *
+ * beta_flag    (input) int
+ *              = 0 : beta is free, and is output.
+ *              = 1 : beta is fixed on input.
+ *
+ * y            (input/output) double*
+ *
+ * seed         (input/output) int
+ *
+ * r_true_l     (output) double*
+ *              The leading part of the truth in double-double.
+ *
+ * r_true_t     (output) double*
+ *              The trailing part of the truth in double-double.
+ *
+ */
+{
+  float *x_head_i = x_head;
+  float *x_tail_i = x_tail;
+  double *y_i = y;
+  int n_fix2;
+  int n_mix;
+  int ysize;
+  int i;
+
+  double *a_vec;
+  int m_i, n_i;
+  int max_mn;
+  int incy, incAB, incx;
+  double y_elem;
+
+  max_mn = MAX(m, n);
+  incx = incy = incAB = 1;
+
+
+
+
+  if (trans == blas_no_trans) {
+    m_i = m;
+    n_i = n;
+  } else {
+    m_i = n;
+    n_i = m;
+  }
+
+  a_vec = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && a_vec == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  /* calling dot_testgen n time. in each iteration, one row of AB, and one 
+     element of y are produced. the vector x is produced at the first 
+     iteration only */
+  for (i = 0; i < m_i; i++) {
+    /* copy AB to a_vec */
+    dgbmv_prepare(order, trans, m, n, kl, ku, AB, lda, a_vec, i,
+		  &n_fix2, &n_mix, &ysize);
+
+    if (i == 1) {
+      /* from now on, fix alpha and beta */
+      alpha_flag = 1;
+      beta_flag = 1;
+    }
+
+    BLAS_ddot2_s_d_testgen(ysize, n_fix2, n_mix, norm, blas_no_conj, alpha,
+			   alpha_flag, beta, beta_flag, x_head, x_tail, a_vec,
+			   seed, &y_elem, &r_true_l[i * incy],
+			   &r_true_t[i * incy]);
+    y_i[i * incy] = y_elem;
+
+
+    /* copy a_vec to AB */
+    dgbmv_commit(order, trans, m, n, kl, ku, AB, lda, a_vec, i);
+  }
+
+  /* Zero out trailing part of x */
+  for (i = ysize; i < n_i; i++) {
+    x_head_i[i * incx] = 0.0;
+    x_tail_i[i * incx] = 0.0;
+  }
+
+  blas_free(a_vec);
+}
+void BLAS_zgbmv2_c_c_testgen(int norm, enum blas_order_type order,
+			     enum blas_trans_type trans, int m, int n, int kl,
+			     int ku, void *alpha, int alpha_flag, void *AB,
+			     int lda, void *x_head, void *x_tail, void *beta,
+			     int beta_flag, void *y, int *seed,
+			     double *r_true_l, double *r_true_t)
+
+/*
+ * Purpose
+ * =======
+ *
+ * Generates alpha, AB, x, beta, and y, where AB is a banded
+ * matrix; and computes r_true.
+ *
+ * Arguments
+ * =========
+ *
+ * norm         (input) blas_norm_type 
+ *
+ * order        (input) blas_order_type
+ *              Order of AB; row or column major
+ *
+ * trans         (input) blas_trans_type
+ *              Whether AB is no trans, trans, or conj trans
+ *
+ * m            (input) int
+ *              The number of rows 
+ *
+ * n            (input) int
+ *              The number of columns
+ * 
+ * kl           (input) int
+ *              The number of subdiagonals
+ *
+ * ku           (input) int
+ *              The number of superdiagonals
+ *
+ * alpha        (input/output) void*
+ *              If alpha_flag = 1, alpha is input.
+ *              If alpha_flag = 0, alpha is output.
+ *
+ * alpha_flag   (input) int
+ *              = 0 : alpha is free, and is output.
+ *              = 1 : alpha is fixed on input.
+ *              
+ * AB           (output) void*
+ *              Matrix A in the banded storage.
+ *
+ *
+ * lda          (input) int
+ *              The first dimension of AB
+ *
+ * x_head       (input/output) void*
+ * x_tail       (input/output) void*
+ *
+ * beta         (input/output) void*
+ *              If beta_flag = 1, beta is input.
+ *              If beta_flag = 0, beta is output.
+ *
+ * beta_flag    (input) int
+ *              = 0 : beta is free, and is output.
+ *              = 1 : beta is fixed on input.
+ *
+ * y            (input/output) void*
+ *
+ * seed         (input/output) int
+ *
+ * r_true_l     (output) double*
+ *              The leading part of the truth in double-double.
+ *
+ * r_true_t     (output) double*
+ *              The trailing part of the truth in double-double.
+ *
+ */
+{
+  float *x_head_i = (float *) x_head;
+  float *x_tail_i = (float *) x_tail;
+  double *y_i = (double *) y;
+  int n_fix2;
+  int n_mix;
+  int ysize;
+  int i;
+  int j;
+  float *a_vec;
+  int m_i, n_i;
+  int max_mn;
+  int incy, incAB, incx;
+  double y_elem[2];
+
+  max_mn = MAX(m, n);
+  incx = incy = incAB = 1;
+  incy *= 2;
+  incAB *= 2;
+  incx *= 2;
+
+  if (trans == blas_no_trans) {
+    m_i = m;
+    n_i = n;
+  } else {
+    m_i = n;
+    n_i = m;
+  }
+
+  a_vec = (float *) blas_malloc(max_mn * sizeof(float) * 2);
+  if (max_mn > 0 && a_vec == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  /* calling dot_testgen n time. in each iteration, one row of AB, and one 
+     element of y are produced. the vector x is produced at the first 
+     iteration only */
+  for (i = 0; i < m_i; i++) {
+    /* copy AB to a_vec */
+    cgbmv_prepare(order, trans, m, n, kl, ku, AB, lda, a_vec, i,
+		  &n_fix2, &n_mix, &ysize);
+
+    if (i == 1) {
+      /* from now on, fix alpha and beta */
+      alpha_flag = 1;
+      beta_flag = 1;
+    }
+
+    BLAS_zdot2_c_c_testgen(ysize, n_fix2, n_mix, norm, blas_no_conj, alpha,
+			   alpha_flag, beta, beta_flag, x_head, x_tail, a_vec,
+			   seed, y_elem, &r_true_l[i * incy],
+			   &r_true_t[i * incy]);
+    y_i[i * incy] = y_elem[0];
+    y_i[i * incy + 1] = y_elem[1];
+
+    if (trans == blas_conj_trans) {
+      for (j = 0; j < n_i * incAB; j += 2) {
+	a_vec[j + 1] = -a_vec[j + 1];
+      }
+    }
+    /* copy a_vec to AB */
+    cgbmv_commit(order, trans, m, n, kl, ku, AB, lda, a_vec, i);
+  }
+
+  /* Zero out trailing part of x */
+  for (i = ysize; i < n_i; i++) {
+    x_head_i[i * incx] = 0.0;
+    x_head_i[i * incx + 1] = 0.0;
+    x_tail_i[i * incx] = 0.0;
+    x_tail_i[i * incx + 1] = 0.0;
+  }
+
+  blas_free(a_vec);
+}
+void BLAS_zgbmv2_c_z_testgen(int norm, enum blas_order_type order,
+			     enum blas_trans_type trans, int m, int n, int kl,
+			     int ku, void *alpha, int alpha_flag, void *AB,
+			     int lda, void *x_head, void *x_tail, void *beta,
+			     int beta_flag, void *y, int *seed,
+			     double *r_true_l, double *r_true_t)
+
+/*
+ * Purpose
+ * =======
+ *
+ * Generates alpha, AB, x, beta, and y, where AB is a banded
+ * matrix; and computes r_true.
+ *
+ * Arguments
+ * =========
+ *
+ * norm         (input) blas_norm_type 
+ *
+ * order        (input) blas_order_type
+ *              Order of AB; row or column major
+ *
+ * trans         (input) blas_trans_type
+ *              Whether AB is no trans, trans, or conj trans
+ *
+ * m            (input) int
+ *              The number of rows 
+ *
+ * n            (input) int
+ *              The number of columns
+ * 
+ * kl           (input) int
+ *              The number of subdiagonals
+ *
+ * ku           (input) int
+ *              The number of superdiagonals
+ *
+ * alpha        (input/output) void*
+ *              If alpha_flag = 1, alpha is input.
+ *              If alpha_flag = 0, alpha is output.
+ *
+ * alpha_flag   (input) int
+ *              = 0 : alpha is free, and is output.
+ *              = 1 : alpha is fixed on input.
+ *              
+ * AB           (output) void*
+ *              Matrix A in the banded storage.
+ *
+ *
+ * lda          (input) int
+ *              The first dimension of AB
+ *
+ * x_head       (input/output) void*
+ * x_tail       (input/output) void*
+ *
+ * beta         (input/output) void*
+ *              If beta_flag = 1, beta is input.
+ *              If beta_flag = 0, beta is output.
+ *
+ * beta_flag    (input) int
+ *              = 0 : beta is free, and is output.
+ *              = 1 : beta is fixed on input.
+ *
+ * y            (input/output) void*
+ *
+ * seed         (input/output) int
+ *
+ * r_true_l     (output) double*
+ *              The leading part of the truth in double-double.
+ *
+ * r_true_t     (output) double*
+ *              The trailing part of the truth in double-double.
+ *
+ */
+{
+  double *x_head_i = (double *) x_head;
+  double *x_tail_i = (double *) x_tail;
+  double *y_i = (double *) y;
+  int n_fix2;
+  int n_mix;
+  int ysize;
+  int i;
+  int j;
+  float *a_vec;
+  int m_i, n_i;
+  int max_mn;
+  int incy, incAB, incx;
+  double y_elem[2];
+
+  max_mn = MAX(m, n);
+  incx = incy = incAB = 1;
+  incy *= 2;
+  incAB *= 2;
+  incx *= 2;
+
+  if (trans == blas_no_trans) {
+    m_i = m;
+    n_i = n;
+  } else {
+    m_i = n;
+    n_i = m;
+  }
+
+  a_vec = (float *) blas_malloc(max_mn * sizeof(float) * 2);
+  if (max_mn > 0 && a_vec == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  /* calling dot_testgen n time. in each iteration, one row of AB, and one 
+     element of y are produced. the vector x is produced at the first 
+     iteration only */
+  for (i = 0; i < m_i; i++) {
+    /* copy AB to a_vec */
+    cgbmv_prepare(order, trans, m, n, kl, ku, AB, lda, a_vec, i,
+		  &n_fix2, &n_mix, &ysize);
+
+    if (i == 1) {
+      /* from now on, fix alpha and beta */
+      alpha_flag = 1;
+      beta_flag = 1;
+    }
+
+    BLAS_zdot2_z_c_testgen(ysize, n_fix2, n_mix, norm, blas_no_conj, alpha,
+			   alpha_flag, beta, beta_flag, x_head, x_tail, a_vec,
+			   seed, y_elem, &r_true_l[i * incy],
+			   &r_true_t[i * incy]);
+    y_i[i * incy] = y_elem[0];
+    y_i[i * incy + 1] = y_elem[1];
+
+    if (trans == blas_conj_trans) {
+      for (j = 0; j < n_i * incAB; j += 2) {
+	a_vec[j + 1] = -a_vec[j + 1];
+      }
+    }
+    /* copy a_vec to AB */
+    cgbmv_commit(order, trans, m, n, kl, ku, AB, lda, a_vec, i);
+  }
+
+  /* Zero out trailing part of x */
+  for (i = ysize; i < n_i; i++) {
+    x_head_i[i * incx] = 0.0;
+    x_head_i[i * incx + 1] = 0.0;
+    x_tail_i[i * incx] = 0.0;
+    x_tail_i[i * incx + 1] = 0.0;
+  }
+
+  blas_free(a_vec);
+}
+void BLAS_zgbmv2_z_c_testgen(int norm, enum blas_order_type order,
+			     enum blas_trans_type trans, int m, int n, int kl,
+			     int ku, void *alpha, int alpha_flag, void *AB,
+			     int lda, void *x_head, void *x_tail, void *beta,
+			     int beta_flag, void *y, int *seed,
+			     double *r_true_l, double *r_true_t)
+
+/*
+ * Purpose
+ * =======
+ *
+ * Generates alpha, AB, x, beta, and y, where AB is a banded
+ * matrix; and computes r_true.
+ *
+ * Arguments
+ * =========
+ *
+ * norm         (input) blas_norm_type 
+ *
+ * order        (input) blas_order_type
+ *              Order of AB; row or column major
+ *
+ * trans         (input) blas_trans_type
+ *              Whether AB is no trans, trans, or conj trans
+ *
+ * m            (input) int
+ *              The number of rows 
+ *
+ * n            (input) int
+ *              The number of columns
+ * 
+ * kl           (input) int
+ *              The number of subdiagonals
+ *
+ * ku           (input) int
+ *              The number of superdiagonals
+ *
+ * alpha        (input/output) void*
+ *              If alpha_flag = 1, alpha is input.
+ *              If alpha_flag = 0, alpha is output.
+ *
+ * alpha_flag   (input) int
+ *              = 0 : alpha is free, and is output.
+ *              = 1 : alpha is fixed on input.
+ *              
+ * AB           (output) void*
+ *              Matrix A in the banded storage.
+ *
+ *
+ * lda          (input) int
+ *              The first dimension of AB
+ *
+ * x_head       (input/output) void*
+ * x_tail       (input/output) void*
+ *
+ * beta         (input/output) void*
+ *              If beta_flag = 1, beta is input.
+ *              If beta_flag = 0, beta is output.
+ *
+ * beta_flag    (input) int
+ *              = 0 : beta is free, and is output.
+ *              = 1 : beta is fixed on input.
+ *
+ * y            (input/output) void*
+ *
+ * seed         (input/output) int
+ *
+ * r_true_l     (output) double*
+ *              The leading part of the truth in double-double.
+ *
+ * r_true_t     (output) double*
+ *              The trailing part of the truth in double-double.
+ *
+ */
+{
+  float *x_head_i = (float *) x_head;
+  float *x_tail_i = (float *) x_tail;
+  double *y_i = (double *) y;
+  int n_fix2;
+  int n_mix;
+  int ysize;
+  int i;
+  int j;
+  double *a_vec;
+  int m_i, n_i;
+  int max_mn;
+  int incy, incAB, incx;
+  double y_elem[2];
+
+  max_mn = MAX(m, n);
+  incx = incy = incAB = 1;
+  incy *= 2;
+  incAB *= 2;
+  incx *= 2;
+
+  if (trans == blas_no_trans) {
+    m_i = m;
+    n_i = n;
+  } else {
+    m_i = n;
+    n_i = m;
+  }
+
+  a_vec = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && a_vec == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  /* calling dot_testgen n time. in each iteration, one row of AB, and one 
+     element of y are produced. the vector x is produced at the first 
+     iteration only */
+  for (i = 0; i < m_i; i++) {
+    /* copy AB to a_vec */
+    zgbmv_prepare(order, trans, m, n, kl, ku, AB, lda, a_vec, i,
+		  &n_fix2, &n_mix, &ysize);
+
+    if (i == 1) {
+      /* from now on, fix alpha and beta */
+      alpha_flag = 1;
+      beta_flag = 1;
+    }
+
+    BLAS_zdot2_c_z_testgen(ysize, n_fix2, n_mix, norm, blas_no_conj, alpha,
+			   alpha_flag, beta, beta_flag, x_head, x_tail, a_vec,
+			   seed, y_elem, &r_true_l[i * incy],
+			   &r_true_t[i * incy]);
+    y_i[i * incy] = y_elem[0];
+    y_i[i * incy + 1] = y_elem[1];
+
+    if (trans == blas_conj_trans) {
+      for (j = 0; j < n_i * incAB; j += 2) {
+	a_vec[j + 1] = -a_vec[j + 1];
+      }
+    }
+    /* copy a_vec to AB */
+    zgbmv_commit(order, trans, m, n, kl, ku, AB, lda, a_vec, i);
+  }
+
+  /* Zero out trailing part of x */
+  for (i = ysize; i < n_i; i++) {
+    x_head_i[i * incx] = 0.0;
+    x_head_i[i * incx + 1] = 0.0;
+    x_tail_i[i * incx] = 0.0;
+    x_tail_i[i * incx + 1] = 0.0;
+  }
+
+  blas_free(a_vec);
+}
diff --git a/XBLAS/testing/test-gbmv2/Makefile b/XBLAS/testing/test-gbmv2/Makefile
new file mode 100644
index 00000000..533f0f49
--- /dev/null
+++ b/XBLAS/testing/test-gbmv2/Makefile
@@ -0,0 +1,43 @@
+include ../../make.conf
+include ../../$(MAKEINC)
+
+TEST_DOT_PATH = ../test-dot
+TEST_DOT2_PATH = ../test-dot2
+TEST_GBMV_PATH=../test-gbmv
+LIB_PATH = ../../$(OUTPUT_DIR)
+HEADER_PATH = -I../../src -I..
+
+TEST_DOT_OBJS = $(TEST_DOT_PATH)/testgen_aux.o \
+		$(TEST_DOT_PATH)/print_vector.o \
+		$(TEST_DOT_PATH)/copy_vector.o
+TEST_DOT2_OBJS = $(TEST_DOT2_PATH)/dot2.o \
+		 $(TEST_DOT2_PATH)/testgen_BLAS_sdot2.o \
+		 $(TEST_DOT2_PATH)/testgen_BLAS_ddot2.o \
+		 $(TEST_DOT2_PATH)/testgen_BLAS_cdot2.o \
+		 $(TEST_DOT2_PATH)/testgen_BLAS_zdot2.o \
+		 $(TEST_DOT2_PATH)/test_dot2.o \
+		 $(TEST_DOT2_PATH)/r_truth2.o \
+		 $(TEST_DOT2_PATH)/BLAS_dot2_testgen.o
+TEST_GBMV_OBJS=../test-gbmv/gbmv-support.o
+
+TEST_SRCS = do_test_gbmv2.c BLAS_gbmv2_testgen.c
+TEST_OBJS = $(TEST_SRCS:.c=.o)
+COMMON_OBJS = ../common/dummy_main.o
+
+ALL_OBJS = $(TEST_OBJS) $(TEST_DOT_OBJS) $(TEST_DOT2_OBJS) \
+           $(TEST_GBMV_OBJS) $(COMMON_OBJS)
+
+test: do_test_gbmv2
+	@echo Testing GBMV2
+	./do_test_gbmv2 12 1 1.0 0 0.01 > gbmv2.results
+
+do_test_gbmv2: $(ALL_OBJS) $(LIB_PATH)/$(XBLASLIB)
+	$(LINKER) $(LDFLAGS) $(ALL_OBJS) $(LIB_PATH)/$(XBLASLIB) \
+	-o do_test_gbmv2 $(EXTRA_LIBS)
+
+.c.o:
+	$(CC) $(CFLAGS) $(HEADER_PATH) -c -o $@ $<
+
+clean:
+	rm -f *.o *~ *.BAK gbmv2.results do_test_gbmv2 core
+
diff --git a/XBLAS/testing/test-gbmv2/do_test_gbmv2.c b/XBLAS/testing/test-gbmv2/do_test_gbmv2.c
new file mode 100644
index 00000000..bc76502d
--- /dev/null
+++ b/XBLAS/testing/test-gbmv2/do_test_gbmv2.c
@@ -0,0 +1,18419 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include "blas_extended.h"
+#include "blas_extended_private.h"
+#include "blas_extended_test.h"
+
+
+double do_test_dgbmv2_d_s(int m, int n, int ntests, int *seed, double thresh,
+			  int debug, float test_prob, double *min_ratio,
+			  int *num_bad_ratio, int *num_tests)
+
+/*
+ * Purpose  
+ * =======
+ *
+ * Runs a series of tests on GBMV2.
+ *
+ * Arguments
+ * =========
+ *  
+ * m         (input) int
+ *           The number of rows
+ *
+ * n         (input) int
+ *           The number of columns
+ *
+ * ntests    (input) int
+ *           The number of tests to run for each set of attributes.
+ *
+ * seed      (input/output) int         
+ *           The seed for the random number generator used in testgen().
+ *
+ * thresh    (input) double
+ *           When the ratio returned from test() exceeds the specified
+ *           threshold, the current size, r_true, r_comp, and ratio will be
+ *           printed.  (Since ratio is supposed to be O(1), we can set thresh
+ *           to ~10.)
+ *
+ * debug     (input) int
+ *           If debug=3, print summary 
+ *           If debug=2, print summary only if the number of bad ratios > 0
+ *           If debug=1, print complete info if tests fail
+ *           If debug=0, return max ratio
+ *
+ * test_prob (input) float
+ *           The specified test will be performed only if the generated 
+ *           random exceeds this threshold.
+ *
+ * min_ratio (output) double
+ *           The minimum ratio
+ * 
+ * num_bad_ratio (output) int
+ *               The number of tests fail; they are above the threshold.
+ *
+ * num_tests (output) int
+ *           The number of tests is being performed.
+ *
+ * Return value
+ * ============
+ *
+ * The maximum ratio if run successfully, otherwise return -1 
+ *
+ * Code structure
+ * ==============
+ * 
+ *  debug loop  -- if debug is one, the first loop computes the max ratio
+ *              -- and the last(second) loop outputs debugging information,
+ *              -- if the test fail and its ratio > 0.5 * max ratio.
+ *              -- if debug is zero, the loop is executed once
+ *    alpha loop  -- varying alpha: 0, 1, or random
+ *      beta loop   -- varying beta: 0, 1, or random
+
+ *          norm loop   -- varying norm: near undeflow, near one, or 
+ *                        -- near overflow
+ *            numtest loop  -- how many times the test is perform with 
+ *                            -- above set of attributes
+ *              order loop   -- varying order type: rowmajor or colmajor
+ *                trans loop    -- varying trans type: no trans, trans, or conj trans
+ *                  ku loop       -- varying ku: 0 to n-1
+ *                    kl loop       -- varying kl: 0 to m-1
+ *                      lda loop      -- varying lda: ku+kl+1, ku+kl+2, 2*(ku+kl+1) 
+ *                        incx loop     -- varying incx: -2, -1, 1, 2
+ *                          incy loop     -- varying incy: -2, -1, 1, 2
+ */
+{
+  /* function name */
+  const char fname[] = "BLAS_dgbmv2_d_s";
+
+  /* max number of debug lines to print */
+  const int max_print = 8;
+
+  /* Variables in the "x_val" form are loop vars for corresponding
+     variables */
+  int i;			/* iterate through the repeating tests */
+  int j;
+  int k;			/* multipurpose counters or variables */
+  int ix, iy;			/* use to index x and y respectively */
+  int incx_val, incy_val,	/* for testing different inc values */
+    incx, incy;
+  int incx_gen, incy_gen;	/* for complex case inc=2, for real case inc=1 */
+  int d_count;			/* counter for debug */
+  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
+  int p_count;			/* counter for the number of debug lines printed */
+  int tot_tests;		/* total number of tests to be done */
+  int norm;			/* input values of near underflow/one/overflow */
+  double ratio_max;		/* the current maximum ratio */
+  double ratio_min;		/* the current minimum ratio */
+  double *ratios;		/* a temporary variable for calculating ratio */
+  double ratio;			/* the per-use test ratio from test() */
+  int bad_ratios = 0;		/* the number of ratios over the threshold */
+  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
+  double un_int;		/* the internal underflow threshold */
+  double alpha;
+  double beta;
+  double *AB;
+  float *x_head;
+  float *x_tail;
+  double *y;
+  double *temp;			/* use for calculating ratio */
+
+  /* x_gen and y_gen are used to store vectors generated by testgen.
+     they eventually are copied back to x and y */
+  float *x_head_gen;
+  float *x_tail_gen;
+  double *y_gen;
+
+  /* the true r calculated by testgen(), in double-double */
+  double *head_r_true, *tail_r_true;
+  int alpha_val;
+  int alpha_flag = 0;		/* input flag for BLAS_dgbmv2_d_s_testgen */
+  int beta_val;
+  int beta_flag = 0;		/* input flag for BLAS_dgbmv2_d_s_testgen */
+  int order_val;
+  enum blas_order_type order_type = 0;
+
+  enum blas_prec_type prec = 0;
+  int trans_val;
+  enum blas_trans_type trans_type = 0;
+  int m_i = 0;
+  int n_i = 0;
+  int max_mn;			/* the max of m and n */
+  int ku;
+  int kl;
+  int lda_val;
+  int lda = 0;
+  int saved_seed;		/* for saving the original seed */
+
+  /* use for counting the number of testgen calls * 2 */
+  int count, old_count = -1;
+
+  FPU_FIX_DECL;
+
+  /* test for bad arguments */
+  if (n < 0 || m < 0 || ntests < 0)
+    BLAS_error(fname, 0, 0, NULL);
+
+  /* initialization */
+  *min_ratio = 0.0;
+  *num_bad_ratio = 0;
+  *num_tests = 0;
+
+  saved_seed = *seed;
+  ratio_min = 1e308;
+  ratio_max = 0.0;
+  ratio = 0.0;
+  tot_tests = 0;
+  p_count = 0;
+  count = 0;
+  find_max_ratio = 0;
+  if (debug == 3)
+    find_max_ratio = 1;
+  max_mn = MAX(m, n);
+
+  if (m == 0 || n == 0) {
+    return 0.0;
+  }
+
+  FPU_FIX_START;
+
+  incx_gen = incy_gen = 1;
+
+
+
+  /* get space for calculation */
+  x_head = (float *) blas_malloc(max_mn * 2 * sizeof(float));
+  if (max_mn * 2 > 0 && x_head == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail = (float *) blas_malloc(max_mn * 2 * sizeof(float));
+  if (max_mn * 2 > 0 && x_tail == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y = (double *) blas_malloc(max_mn * 2 * sizeof(double));
+  if (max_mn * 2 > 0 && y == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_head_gen = (float *) blas_malloc(max_mn * sizeof(float));
+  if (max_mn > 0 && x_head_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail_gen = (float *) blas_malloc(max_mn * sizeof(float));
+  if (max_mn > 0 && x_tail_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y_gen = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && y_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  temp = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && temp == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  head_r_true = (double *) blas_malloc(max_mn * sizeof(double));
+  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  ratios = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && ratios == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  AB =
+    (double *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(double));
+  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && AB == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  /* The debug iteration:
+     If debug=1, then will execute the iteration twice. First, compute the
+     max ratio. Second, print info if ratio > (50% * ratio_max). */
+  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
+    bad_ratios = 0;		/* set to zero */
+
+    if ((debug == 3) && (d_count == find_max_ratio))
+      *seed = saved_seed;	/* restore the original seed */
+
+    /* varying alpha */
+    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
+      alpha_flag = 0;
+      switch (alpha_val) {
+      case 0:
+	alpha = 0.0;
+	alpha_flag = 1;
+	break;
+      case 1:
+	alpha = 1.0;
+	alpha_flag = 1;
+	break;
+      }
+
+      /* varying beta */
+      for (beta_val = 0; beta_val < 3; beta_val++) {
+	beta_flag = 0;
+	switch (beta_val) {
+	case 0:
+	  beta = 0.0;
+	  beta_flag = 1;
+	  break;
+	case 1:
+	  beta = 1.0;
+	  beta_flag = 1;
+	  break;
+	}
+
+
+	eps_int = power(2, -BITS_D);
+	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
+		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
+	prec = blas_prec_double;
+
+	/* values near underflow, 1, or overflow */
+	for (norm = -1; norm <= 1; norm++) {
+
+	  /* number of tests */
+	  for (i = 0; i < ntests; i++) {
+
+	    /* row or col major */
+	    for (order_val = 0; order_val < 2; order_val++) {
+	      switch (order_val) {
+	      case 0:
+		order_type = blas_rowmajor;
+		break;
+	      case 1:
+		order_type = blas_colmajor;
+		break;
+	      }
+
+	      /* no_trans, trans, or conj_trans */
+	      for (trans_val = 0; trans_val < 3; trans_val++) {
+		switch (trans_val) {
+		case 0:
+		  trans_type = blas_no_trans;
+		  m_i = m;
+		  n_i = n;
+		  break;
+		case 1:
+		  trans_type = blas_trans;
+		  m_i = n;
+		  n_i = m;
+		  break;
+		case 2:
+		  trans_type = blas_conj_trans;
+		  m_i = n;
+		  n_i = m;
+		  break;
+		}
+
+		/* ku from 0 to n-1 */
+		for (ku = 0; ku < n; ku++) {
+		  if (ku == n && ku != 0)
+		    continue;	/* the purpose of doing this is 
+				   to test for ku=0 */
+
+		  /* kl from 0 to m-1 */
+		  for (kl = 0; kl < m; kl++) {
+		    if (kl == n && kl != 0)
+		      continue;	/* the purpose of doing this is 
+				   to test for kl=0 */
+
+		    /* lda=ku+kl+1, ku+kl+2, 2*(ku+kl+1) */
+		    for (lda_val = 0; lda_val < 3; lda_val++) {
+		      switch (lda_val) {
+		      case 0:
+			lda = ku + kl + 1;
+			break;
+		      case 1:
+			lda = ku + kl + 2;
+			break;
+		      case 2:
+			lda = 2 * (ku + kl + 1);
+			break;
+		      }
+
+		      if ((order_type == blas_rowmajor && lda < n) ||
+			  (order_type == blas_colmajor && lda < m))
+			continue;
+
+		      /* For the sake of speed, we throw out this case at random */
+		      if (xrand(seed) >= test_prob)
+			continue;
+
+		      /* in the trivial cases, no need to run testgen */
+		      if (m > 0 && n > 0)
+			BLAS_dgbmv2_d_s_testgen(norm, order_type, trans_type,
+						m, n, kl, ku, &alpha,
+						alpha_flag, AB, lda,
+						x_head_gen, x_tail_gen, &beta,
+						beta_flag, y_gen, seed,
+						head_r_true, tail_r_true);
+		      count++;
+
+		      /* varying incx */
+		      for (incx_val = -2; incx_val <= 2; incx_val++) {
+			if (incx_val == 0)
+			  continue;
+
+			/* setting incx */
+			incx = incx_val;
+
+
+			scopy_vector(x_head_gen, n_i, 1, x_head, incx_val);
+			scopy_vector(x_tail_gen, n_i, 1, x_tail, incx_val);
+
+			/* varying incy */
+			for (incy_val = -2; incy_val <= 2; incy_val++) {
+			  if (incy_val == 0)
+			    continue;
+
+			  /* setting incy */
+			  incy = incy_val;
+
+
+			  dcopy_vector(y_gen, m_i, 1, y, incy_val);
+
+			  /* call BLAS_dgbmv2_d_s */
+			  FPU_FIX_STOP;
+			  BLAS_dgbmv2_d_s(order_type, trans_type, m, n, kl,
+					  ku, alpha, AB, lda, x_head, x_tail,
+					  incx_val, beta, y, incy_val);
+			  FPU_FIX_START;
+
+			  /* set y starting index */
+			  iy = 0;
+			  if (incy < 0)
+			    iy = -(m_i - 1) * incy;
+
+			  /* computing the ratio */
+			  for (j = 0; j < m_i; j++) {
+			    /* copy row j of AB to temp */
+			    dgbmv_copy(order_type, trans_type, m, n, kl, ku,
+				       AB, lda, temp, j);
+
+			    test_BLAS_ddot2_d_s(n_i, blas_no_conj, alpha,
+						beta, y_gen[j * incy_gen],
+						y[iy],
+						head_r_true[j * incy_gen],
+						tail_r_true[j * incy_gen],
+						temp, 1, x_head, x_tail,
+						incx_val, eps_int, un_int,
+						&ratios[j]);
+
+			    /* take the max ratio */
+			    if (j == 0) {
+			      ratio = ratios[0];
+			      /* The !<= below causes NaN error to be detected.
+			         Note that (NaN > thresh) is always false. */
+			    } else if (!(ratios[j] <= ratio)) {
+			      ratio = ratios[j];
+			    }
+
+			    iy += incy;
+			  }
+
+			  /* Increase the number of bad ratio, if the ratio
+			     is bigger than the threshold.
+			     The !<= below causes NaN error to be detected.
+			     Note that (NaN > thresh) is always false. */
+			  if (!(ratio <= thresh)) {
+			    bad_ratios++;
+
+			    if ((debug == 3) &&	/* print only when debug is on */
+				(count != old_count) &&	/* print if old vector is different 
+							   from the current one */
+				(d_count == find_max_ratio) &&
+				(p_count <= max_print) &&
+				(ratio > 0.5 * ratio_max)) {
+			      old_count = count;
+			      printf
+				("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
+				 fname, m, n, ntests, thresh);
+
+			      /* Print test info */
+			      switch (prec) {
+			      case blas_prec_single:
+				printf("single ");
+				break;
+			      case blas_prec_double:
+				printf("double ");
+				break;
+			      case blas_prec_indigenous:
+				printf("indigenous ");
+				break;
+			      case blas_prec_extra:
+				printf("extra ");
+				break;
+			      }
+			      switch (norm) {
+			      case -1:
+				printf("near_underflow ");
+				break;
+			      case 0:
+				printf("near_one ");
+				break;
+			      case 1:
+				printf("near_overflow ");
+				break;
+			      }
+			      switch (order_type) {
+			      case blas_rowmajor:
+				printf("row_major ");
+				break;
+			      case blas_colmajor:
+				printf("col_major ");
+				break;
+			      }
+			      switch (trans_type) {
+			      case blas_no_trans:
+				printf("no_trans ");
+				break;
+			      case blas_trans:
+				printf("trans ");
+				break;
+			      case blas_conj_trans:
+				printf("conj_trans ");
+				break;
+			      }
+
+			      printf
+				("ku=%d, kl=%d, lda=%d, incx=%d, incy=%d:\n",
+				 ku, kl, lda, incx, incy);
+
+			      ix = 0;
+			      iy = 0;
+			      if (incx < 0)
+				ix = -(n_i - 1) * incx;
+			      if (incy < 0)
+				iy = -(m_i - 1) * incy;
+
+			      printf("      A=");
+			      for (j = 0; j < m_i; j++) {
+				/* copy row j of A to temp */
+				dgbmv_copy(order_type, trans_type, m, n, kl,
+					   ku, AB, lda, temp, j);
+
+				if (j > 0)
+				  printf("        ");
+				dprint_vector(temp, n_i, 1, NULL);
+			      }
+
+			      for (j = 0, k = 0; j < n_i || k < m_i; j++, k++) {
+				if (j < n_i) {
+				  printf("      ");
+				  printf("%16.8e", x_head[ix]);
+				  printf("\n");
+				}
+				if (j < n_i) {
+				  printf("      ");
+				  printf("%16.8e", x_tail[ix]);
+				  printf("\n");
+				}
+				if (k < m_i) {
+				  printf("      ");
+				  printf("%24.16e", y_gen[k * incy_gen]);
+				  printf("\n");
+				  printf("      ");
+				  printf("y_final[%d] = ", iy);
+				  printf("%24.16e", y[iy]);
+				  printf("\n");
+				}
+				ix += incx;
+				iy += incy;
+			      }
+
+			      printf("      ");
+			      printf("alpha = ");
+			      printf("%24.16e", alpha);
+			      printf("\n      ");
+			      printf("beta = ");
+			      printf("%24.16e", beta);
+			      printf("\n");
+			      for (j = 0; j < m_i; j++) {
+				printf("      ");
+				printf("[%24.16e, %24.16e]",
+				       head_r_true[j * incy_gen],
+				       tail_r_true[j * incy_gen]);
+				printf(", ratio[%d]=%.4e\n", j, ratios[j]);
+			      }
+
+			      printf("      ratio=%.4e\n", ratio);
+			      p_count++;
+			    }
+			    if (bad_ratios >= MAX_BAD_TESTS) {
+			      printf("\ntoo many failures, exiting....");
+			      printf("\nTesting and compilation");
+			      printf(" are incomplete\n\n");
+			      goto end;
+			    }
+			    if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
+			      printf("\nFlagrant ratio error, exiting...");
+			      printf("\nTesting and compilation");
+			      printf(" are incomplete\n\n");
+			      goto end;
+			    }
+			  }
+			  if (d_count == 0) {
+			    if (ratio > ratio_max)
+			      ratio_max = ratio;
+
+			    if (ratio != 0.0 && ratio < ratio_min)
+			      ratio_min = ratio;
+			    tot_tests++;
+			  }
+			}	/* incy */
+		      }		/* incx */
+		    }		/* lda */
+		  }		/* kl */
+		}		/* ku */
+	      }			/* trans */
+	    }			/* order */
+	  }			/* tests */
+	}			/* norm */
+
+      }				/* beta */
+    }				/* alpha */
+  }				/* debug */
+
+  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
+    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n", fname,
+	   m, n, ntests, thresh);
+    printf
+      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
+       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
+       ratio_min, ratio_max);
+  }
+
+end:
+  blas_free(x_head);
+  blas_free(x_tail);
+  blas_free(y);
+  blas_free(x_head_gen);
+  blas_free(x_tail_gen);
+  blas_free(y_gen);
+  blas_free(temp);
+  blas_free(AB);
+  blas_free(head_r_true);
+  blas_free(tail_r_true);
+  blas_free(ratios);
+
+  FPU_FIX_STOP;
+
+  *min_ratio = ratio_min;
+  *num_bad_ratio = bad_ratios;
+  *num_tests = tot_tests;
+  return ratio_max;
+}
+double do_test_dgbmv2_s_d(int m, int n, int ntests, int *seed, double thresh,
+			  int debug, float test_prob, double *min_ratio,
+			  int *num_bad_ratio, int *num_tests)
+
+/*
+ * Purpose  
+ * =======
+ *
+ * Runs a series of tests on GBMV2.
+ *
+ * Arguments
+ * =========
+ *  
+ * m         (input) int
+ *           The number of rows
+ *
+ * n         (input) int
+ *           The number of columns
+ *
+ * ntests    (input) int
+ *           The number of tests to run for each set of attributes.
+ *
+ * seed      (input/output) int         
+ *           The seed for the random number generator used in testgen().
+ *
+ * thresh    (input) double
+ *           When the ratio returned from test() exceeds the specified
+ *           threshold, the current size, r_true, r_comp, and ratio will be
+ *           printed.  (Since ratio is supposed to be O(1), we can set thresh
+ *           to ~10.)
+ *
+ * debug     (input) int
+ *           If debug=3, print summary 
+ *           If debug=2, print summary only if the number of bad ratios > 0
+ *           If debug=1, print complete info if tests fail
+ *           If debug=0, return max ratio
+ *
+ * test_prob (input) float
+ *           The specified test will be performed only if the generated 
+ *           random exceeds this threshold.
+ *
+ * min_ratio (output) double
+ *           The minimum ratio
+ * 
+ * num_bad_ratio (output) int
+ *               The number of tests fail; they are above the threshold.
+ *
+ * num_tests (output) int
+ *           The number of tests is being performed.
+ *
+ * Return value
+ * ============
+ *
+ * The maximum ratio if run successfully, otherwise return -1 
+ *
+ * Code structure
+ * ==============
+ * 
+ *  debug loop  -- if debug is one, the first loop computes the max ratio
+ *              -- and the last(second) loop outputs debugging information,
+ *              -- if the test fail and its ratio > 0.5 * max ratio.
+ *              -- if debug is zero, the loop is executed once
+ *    alpha loop  -- varying alpha: 0, 1, or random
+ *      beta loop   -- varying beta: 0, 1, or random
+
+ *          norm loop   -- varying norm: near undeflow, near one, or 
+ *                        -- near overflow
+ *            numtest loop  -- how many times the test is perform with 
+ *                            -- above set of attributes
+ *              order loop   -- varying order type: rowmajor or colmajor
+ *                trans loop    -- varying trans type: no trans, trans, or conj trans
+ *                  ku loop       -- varying ku: 0 to n-1
+ *                    kl loop       -- varying kl: 0 to m-1
+ *                      lda loop      -- varying lda: ku+kl+1, ku+kl+2, 2*(ku+kl+1) 
+ *                        incx loop     -- varying incx: -2, -1, 1, 2
+ *                          incy loop     -- varying incy: -2, -1, 1, 2
+ */
+{
+  /* function name */
+  const char fname[] = "BLAS_dgbmv2_s_d";
+
+  /* max number of debug lines to print */
+  const int max_print = 8;
+
+  /* Variables in the "x_val" form are loop vars for corresponding
+     variables */
+  int i;			/* iterate through the repeating tests */
+  int j;
+  int k;			/* multipurpose counters or variables */
+  int ix, iy;			/* use to index x and y respectively */
+  int incx_val, incy_val,	/* for testing different inc values */
+    incx, incy;
+  int incx_gen, incy_gen;	/* for complex case inc=2, for real case inc=1 */
+  int d_count;			/* counter for debug */
+  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
+  int p_count;			/* counter for the number of debug lines printed */
+  int tot_tests;		/* total number of tests to be done */
+  int norm;			/* input values of near underflow/one/overflow */
+  double ratio_max;		/* the current maximum ratio */
+  double ratio_min;		/* the current minimum ratio */
+  double *ratios;		/* a temporary variable for calculating ratio */
+  double ratio;			/* the per-use test ratio from test() */
+  int bad_ratios = 0;		/* the number of ratios over the threshold */
+  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
+  double un_int;		/* the internal underflow threshold */
+  double alpha;
+  double beta;
+  float *AB;
+  double *x_head;
+  double *x_tail;
+  double *y;
+  float *temp;			/* use for calculating ratio */
+
+  /* x_gen and y_gen are used to store vectors generated by testgen.
+     they eventually are copied back to x and y */
+  double *x_head_gen;
+  double *x_tail_gen;
+  double *y_gen;
+
+  /* the true r calculated by testgen(), in double-double */
+  double *head_r_true, *tail_r_true;
+  int alpha_val;
+  int alpha_flag = 0;		/* input flag for BLAS_dgbmv2_s_d_testgen */
+  int beta_val;
+  int beta_flag = 0;		/* input flag for BLAS_dgbmv2_s_d_testgen */
+  int order_val;
+  enum blas_order_type order_type = 0;
+
+  enum blas_prec_type prec = 0;
+  int trans_val;
+  enum blas_trans_type trans_type = 0;
+  int m_i = 0;
+  int n_i = 0;
+  int max_mn;			/* the max of m and n */
+  int ku;
+  int kl;
+  int lda_val;
+  int lda = 0;
+  int saved_seed;		/* for saving the original seed */
+
+  /* use for counting the number of testgen calls * 2 */
+  int count, old_count = -1;
+
+  FPU_FIX_DECL;
+
+  /* test for bad arguments */
+  if (n < 0 || m < 0 || ntests < 0)
+    BLAS_error(fname, 0, 0, NULL);
+
+  /* initialization */
+  *min_ratio = 0.0;
+  *num_bad_ratio = 0;
+  *num_tests = 0;
+
+  saved_seed = *seed;
+  ratio_min = 1e308;
+  ratio_max = 0.0;
+  ratio = 0.0;
+  tot_tests = 0;
+  p_count = 0;
+  count = 0;
+  find_max_ratio = 0;
+  if (debug == 3)
+    find_max_ratio = 1;
+  max_mn = MAX(m, n);
+
+  if (m == 0 || n == 0) {
+    return 0.0;
+  }
+
+  FPU_FIX_START;
+
+  incx_gen = incy_gen = 1;
+
+
+
+  /* get space for calculation */
+  x_head = (double *) blas_malloc(max_mn * 2 * sizeof(double));
+  if (max_mn * 2 > 0 && x_head == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail = (double *) blas_malloc(max_mn * 2 * sizeof(double));
+  if (max_mn * 2 > 0 && x_tail == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y = (double *) blas_malloc(max_mn * 2 * sizeof(double));
+  if (max_mn * 2 > 0 && y == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_head_gen = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && x_head_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail_gen = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && x_tail_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y_gen = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && y_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  temp = (float *) blas_malloc(max_mn * sizeof(float));
+  if (max_mn > 0 && temp == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  head_r_true = (double *) blas_malloc(max_mn * sizeof(double));
+  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  ratios = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && ratios == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  AB =
+    (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float));
+  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && AB == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  /* The debug iteration:
+     If debug=1, then will execute the iteration twice. First, compute the
+     max ratio. Second, print info if ratio > (50% * ratio_max). */
+  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
+    bad_ratios = 0;		/* set to zero */
+
+    if ((debug == 3) && (d_count == find_max_ratio))
+      *seed = saved_seed;	/* restore the original seed */
+
+    /* varying alpha */
+    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
+      alpha_flag = 0;
+      switch (alpha_val) {
+      case 0:
+	alpha = 0.0;
+	alpha_flag = 1;
+	break;
+      case 1:
+	alpha = 1.0;
+	alpha_flag = 1;
+	break;
+      }
+
+      /* varying beta */
+      for (beta_val = 0; beta_val < 3; beta_val++) {
+	beta_flag = 0;
+	switch (beta_val) {
+	case 0:
+	  beta = 0.0;
+	  beta_flag = 1;
+	  break;
+	case 1:
+	  beta = 1.0;
+	  beta_flag = 1;
+	  break;
+	}
+
+
+	eps_int = power(2, -BITS_D);
+	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
+		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
+	prec = blas_prec_double;
+
+	/* values near underflow, 1, or overflow */
+	for (norm = -1; norm <= 1; norm++) {
+
+	  /* number of tests */
+	  for (i = 0; i < ntests; i++) {
+
+	    /* row or col major */
+	    for (order_val = 0; order_val < 2; order_val++) {
+	      switch (order_val) {
+	      case 0:
+		order_type = blas_rowmajor;
+		break;
+	      case 1:
+		order_type = blas_colmajor;
+		break;
+	      }
+
+	      /* no_trans, trans, or conj_trans */
+	      for (trans_val = 0; trans_val < 3; trans_val++) {
+		switch (trans_val) {
+		case 0:
+		  trans_type = blas_no_trans;
+		  m_i = m;
+		  n_i = n;
+		  break;
+		case 1:
+		  trans_type = blas_trans;
+		  m_i = n;
+		  n_i = m;
+		  break;
+		case 2:
+		  trans_type = blas_conj_trans;
+		  m_i = n;
+		  n_i = m;
+		  break;
+		}
+
+		/* ku from 0 to n-1 */
+		for (ku = 0; ku < n; ku++) {
+		  if (ku == n && ku != 0)
+		    continue;	/* the purpose of doing this is 
+				   to test for ku=0 */
+
+		  /* kl from 0 to m-1 */
+		  for (kl = 0; kl < m; kl++) {
+		    if (kl == n && kl != 0)
+		      continue;	/* the purpose of doing this is 
+				   to test for kl=0 */
+
+		    /* lda=ku+kl+1, ku+kl+2, 2*(ku+kl+1) */
+		    for (lda_val = 0; lda_val < 3; lda_val++) {
+		      switch (lda_val) {
+		      case 0:
+			lda = ku + kl + 1;
+			break;
+		      case 1:
+			lda = ku + kl + 2;
+			break;
+		      case 2:
+			lda = 2 * (ku + kl + 1);
+			break;
+		      }
+
+		      if ((order_type == blas_rowmajor && lda < n) ||
+			  (order_type == blas_colmajor && lda < m))
+			continue;
+
+		      /* For the sake of speed, we throw out this case at random */
+		      if (xrand(seed) >= test_prob)
+			continue;
+
+		      /* in the trivial cases, no need to run testgen */
+		      if (m > 0 && n > 0)
+			BLAS_dgbmv2_s_d_testgen(norm, order_type, trans_type,
+						m, n, kl, ku, &alpha,
+						alpha_flag, AB, lda,
+						x_head_gen, x_tail_gen, &beta,
+						beta_flag, y_gen, seed,
+						head_r_true, tail_r_true);
+		      count++;
+
+		      /* varying incx */
+		      for (incx_val = -2; incx_val <= 2; incx_val++) {
+			if (incx_val == 0)
+			  continue;
+
+			/* setting incx */
+			incx = incx_val;
+
+
+			dcopy_vector(x_head_gen, n_i, 1, x_head, incx_val);
+			dcopy_vector(x_tail_gen, n_i, 1, x_tail, incx_val);
+
+			/* varying incy */
+			for (incy_val = -2; incy_val <= 2; incy_val++) {
+			  if (incy_val == 0)
+			    continue;
+
+			  /* setting incy */
+			  incy = incy_val;
+
+
+			  dcopy_vector(y_gen, m_i, 1, y, incy_val);
+
+			  /* call BLAS_dgbmv2_s_d */
+			  FPU_FIX_STOP;
+			  BLAS_dgbmv2_s_d(order_type, trans_type, m, n, kl,
+					  ku, alpha, AB, lda, x_head, x_tail,
+					  incx_val, beta, y, incy_val);
+			  FPU_FIX_START;
+
+			  /* set y starting index */
+			  iy = 0;
+			  if (incy < 0)
+			    iy = -(m_i - 1) * incy;
+
+			  /* computing the ratio */
+			  for (j = 0; j < m_i; j++) {
+			    /* copy row j of AB to temp */
+			    sgbmv_copy(order_type, trans_type, m, n, kl, ku,
+				       AB, lda, temp, j);
+
+			    test_BLAS_ddot2_s_d(n_i, blas_no_conj, alpha,
+						beta, y_gen[j * incy_gen],
+						y[iy],
+						head_r_true[j * incy_gen],
+						tail_r_true[j * incy_gen],
+						temp, 1, x_head, x_tail,
+						incx_val, eps_int, un_int,
+						&ratios[j]);
+
+			    /* take the max ratio */
+			    if (j == 0) {
+			      ratio = ratios[0];
+			      /* The !<= below causes NaN error to be detected.
+			         Note that (NaN > thresh) is always false. */
+			    } else if (!(ratios[j] <= ratio)) {
+			      ratio = ratios[j];
+			    }
+
+			    iy += incy;
+			  }
+
+			  /* Increase the number of bad ratio, if the ratio
+			     is bigger than the threshold.
+			     The !<= below causes NaN error to be detected.
+			     Note that (NaN > thresh) is always false. */
+			  if (!(ratio <= thresh)) {
+			    bad_ratios++;
+
+			    if ((debug == 3) &&	/* print only when debug is on */
+				(count != old_count) &&	/* print if old vector is different 
+							   from the current one */
+				(d_count == find_max_ratio) &&
+				(p_count <= max_print) &&
+				(ratio > 0.5 * ratio_max)) {
+			      old_count = count;
+			      printf
+				("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
+				 fname, m, n, ntests, thresh);
+
+			      /* Print test info */
+			      switch (prec) {
+			      case blas_prec_single:
+				printf("single ");
+				break;
+			      case blas_prec_double:
+				printf("double ");
+				break;
+			      case blas_prec_indigenous:
+				printf("indigenous ");
+				break;
+			      case blas_prec_extra:
+				printf("extra ");
+				break;
+			      }
+			      switch (norm) {
+			      case -1:
+				printf("near_underflow ");
+				break;
+			      case 0:
+				printf("near_one ");
+				break;
+			      case 1:
+				printf("near_overflow ");
+				break;
+			      }
+			      switch (order_type) {
+			      case blas_rowmajor:
+				printf("row_major ");
+				break;
+			      case blas_colmajor:
+				printf("col_major ");
+				break;
+			      }
+			      switch (trans_type) {
+			      case blas_no_trans:
+				printf("no_trans ");
+				break;
+			      case blas_trans:
+				printf("trans ");
+				break;
+			      case blas_conj_trans:
+				printf("conj_trans ");
+				break;
+			      }
+
+			      printf
+				("ku=%d, kl=%d, lda=%d, incx=%d, incy=%d:\n",
+				 ku, kl, lda, incx, incy);
+
+			      ix = 0;
+			      iy = 0;
+			      if (incx < 0)
+				ix = -(n_i - 1) * incx;
+			      if (incy < 0)
+				iy = -(m_i - 1) * incy;
+
+			      printf("      A=");
+			      for (j = 0; j < m_i; j++) {
+				/* copy row j of A to temp */
+				sgbmv_copy(order_type, trans_type, m, n, kl,
+					   ku, AB, lda, temp, j);
+
+				if (j > 0)
+				  printf("        ");
+				sprint_vector(temp, n_i, 1, NULL);
+			      }
+
+			      for (j = 0, k = 0; j < n_i || k < m_i; j++, k++) {
+				if (j < n_i) {
+				  printf("      ");
+				  printf("%24.16e", x_head[ix]);
+				  printf("\n");
+				}
+				if (j < n_i) {
+				  printf("      ");
+				  printf("%24.16e", x_tail[ix]);
+				  printf("\n");
+				}
+				if (k < m_i) {
+				  printf("      ");
+				  printf("%24.16e", y_gen[k * incy_gen]);
+				  printf("\n");
+				  printf("      ");
+				  printf("y_final[%d] = ", iy);
+				  printf("%24.16e", y[iy]);
+				  printf("\n");
+				}
+				ix += incx;
+				iy += incy;
+			      }
+
+			      printf("      ");
+			      printf("alpha = ");
+			      printf("%24.16e", alpha);
+			      printf("\n      ");
+			      printf("beta = ");
+			      printf("%24.16e", beta);
+			      printf("\n");
+			      for (j = 0; j < m_i; j++) {
+				printf("      ");
+				printf("[%24.16e, %24.16e]",
+				       head_r_true[j * incy_gen],
+				       tail_r_true[j * incy_gen]);
+				printf(", ratio[%d]=%.4e\n", j, ratios[j]);
+			      }
+
+			      printf("      ratio=%.4e\n", ratio);
+			      p_count++;
+			    }
+			    if (bad_ratios >= MAX_BAD_TESTS) {
+			      printf("\ntoo many failures, exiting....");
+			      printf("\nTesting and compilation");
+			      printf(" are incomplete\n\n");
+			      goto end;
+			    }
+			    if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
+			      printf("\nFlagrant ratio error, exiting...");
+			      printf("\nTesting and compilation");
+			      printf(" are incomplete\n\n");
+			      goto end;
+			    }
+			  }
+			  if (d_count == 0) {
+			    if (ratio > ratio_max)
+			      ratio_max = ratio;
+
+			    if (ratio != 0.0 && ratio < ratio_min)
+			      ratio_min = ratio;
+			    tot_tests++;
+			  }
+			}	/* incy */
+		      }		/* incx */
+		    }		/* lda */
+		  }		/* kl */
+		}		/* ku */
+	      }			/* trans */
+	    }			/* order */
+	  }			/* tests */
+	}			/* norm */
+
+      }				/* beta */
+    }				/* alpha */
+  }				/* debug */
+
+  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
+    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n", fname,
+	   m, n, ntests, thresh);
+    printf
+      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
+       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
+       ratio_min, ratio_max);
+  }
+
+end:
+  blas_free(x_head);
+  blas_free(x_tail);
+  blas_free(y);
+  blas_free(x_head_gen);
+  blas_free(x_tail_gen);
+  blas_free(y_gen);
+  blas_free(temp);
+  blas_free(AB);
+  blas_free(head_r_true);
+  blas_free(tail_r_true);
+  blas_free(ratios);
+
+  FPU_FIX_STOP;
+
+  *min_ratio = ratio_min;
+  *num_bad_ratio = bad_ratios;
+  *num_tests = tot_tests;
+  return ratio_max;
+}
+double do_test_dgbmv2_s_s(int m, int n, int ntests, int *seed, double thresh,
+			  int debug, float test_prob, double *min_ratio,
+			  int *num_bad_ratio, int *num_tests)
+
+/*
+ * Purpose  
+ * =======
+ *
+ * Runs a series of tests on GBMV2.
+ *
+ * Arguments
+ * =========
+ *  
+ * m         (input) int
+ *           The number of rows
+ *
+ * n         (input) int
+ *           The number of columns
+ *
+ * ntests    (input) int
+ *           The number of tests to run for each set of attributes.
+ *
+ * seed      (input/output) int         
+ *           The seed for the random number generator used in testgen().
+ *
+ * thresh    (input) double
+ *           When the ratio returned from test() exceeds the specified
+ *           threshold, the current size, r_true, r_comp, and ratio will be
+ *           printed.  (Since ratio is supposed to be O(1), we can set thresh
+ *           to ~10.)
+ *
+ * debug     (input) int
+ *           If debug=3, print summary 
+ *           If debug=2, print summary only if the number of bad ratios > 0
+ *           If debug=1, print complete info if tests fail
+ *           If debug=0, return max ratio
+ *
+ * test_prob (input) float
+ *           The specified test will be performed only if the generated 
+ *           random exceeds this threshold.
+ *
+ * min_ratio (output) double
+ *           The minimum ratio
+ * 
+ * num_bad_ratio (output) int
+ *               The number of tests fail; they are above the threshold.
+ *
+ * num_tests (output) int
+ *           The number of tests is being performed.
+ *
+ * Return value
+ * ============
+ *
+ * The maximum ratio if run successfully, otherwise return -1 
+ *
+ * Code structure
+ * ==============
+ * 
+ *  debug loop  -- if debug is one, the first loop computes the max ratio
+ *              -- and the last(second) loop outputs debugging information,
+ *              -- if the test fail and its ratio > 0.5 * max ratio.
+ *              -- if debug is zero, the loop is executed once
+ *    alpha loop  -- varying alpha: 0, 1, or random
+ *      beta loop   -- varying beta: 0, 1, or random
+
+ *          norm loop   -- varying norm: near undeflow, near one, or 
+ *                        -- near overflow
+ *            numtest loop  -- how many times the test is perform with 
+ *                            -- above set of attributes
+ *              order loop   -- varying order type: rowmajor or colmajor
+ *                trans loop    -- varying trans type: no trans, trans, or conj trans
+ *                  ku loop       -- varying ku: 0 to n-1
+ *                    kl loop       -- varying kl: 0 to m-1
+ *                      lda loop      -- varying lda: ku+kl+1, ku+kl+2, 2*(ku+kl+1) 
+ *                        incx loop     -- varying incx: -2, -1, 1, 2
+ *                          incy loop     -- varying incy: -2, -1, 1, 2
+ */
+{
+  /* function name */
+  const char fname[] = "BLAS_dgbmv2_s_s";
+
+  /* max number of debug lines to print */
+  const int max_print = 8;
+
+  /* Variables in the "x_val" form are loop vars for corresponding
+     variables */
+  int i;			/* iterate through the repeating tests */
+  int j;
+  int k;			/* multipurpose counters or variables */
+  int ix, iy;			/* use to index x and y respectively */
+  int incx_val, incy_val,	/* for testing different inc values */
+    incx, incy;
+  int incx_gen, incy_gen;	/* for complex case inc=2, for real case inc=1 */
+  int d_count;			/* counter for debug */
+  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
+  int p_count;			/* counter for the number of debug lines printed */
+  int tot_tests;		/* total number of tests to be done */
+  int norm;			/* input values of near underflow/one/overflow */
+  double ratio_max;		/* the current maximum ratio */
+  double ratio_min;		/* the current minimum ratio */
+  double *ratios;		/* a temporary variable for calculating ratio */
+  double ratio;			/* the per-use test ratio from test() */
+  int bad_ratios = 0;		/* the number of ratios over the threshold */
+  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
+  double un_int;		/* the internal underflow threshold */
+  double alpha;
+  double beta;
+  float *AB;
+  float *x_head;
+  float *x_tail;
+  double *y;
+  float *temp;			/* use for calculating ratio */
+
+  /* x_gen and y_gen are used to store vectors generated by testgen.
+     they eventually are copied back to x and y */
+  float *x_head_gen;
+  float *x_tail_gen;
+  double *y_gen;
+
+  /* the true r calculated by testgen(), in double-double */
+  double *head_r_true, *tail_r_true;
+  int alpha_val;
+  int alpha_flag = 0;		/* input flag for BLAS_dgbmv2_s_s_testgen */
+  int beta_val;
+  int beta_flag = 0;		/* input flag for BLAS_dgbmv2_s_s_testgen */
+  int order_val;
+  enum blas_order_type order_type = 0;
+
+  enum blas_prec_type prec = 0;
+  int trans_val;
+  enum blas_trans_type trans_type = 0;
+  int m_i = 0;
+  int n_i = 0;
+  int max_mn;			/* the max of m and n */
+  int ku;
+  int kl;
+  int lda_val;
+  int lda = 0;
+  int saved_seed;		/* for saving the original seed */
+
+  /* use for counting the number of testgen calls * 2 */
+  int count, old_count = -1;
+
+  FPU_FIX_DECL;
+
+  /* test for bad arguments */
+  if (n < 0 || m < 0 || ntests < 0)
+    BLAS_error(fname, 0, 0, NULL);
+
+  /* initialization */
+  *min_ratio = 0.0;
+  *num_bad_ratio = 0;
+  *num_tests = 0;
+
+  saved_seed = *seed;
+  ratio_min = 1e308;
+  ratio_max = 0.0;
+  ratio = 0.0;
+  tot_tests = 0;
+  p_count = 0;
+  count = 0;
+  find_max_ratio = 0;
+  if (debug == 3)
+    find_max_ratio = 1;
+  max_mn = MAX(m, n);
+
+  if (m == 0 || n == 0) {
+    return 0.0;
+  }
+
+  FPU_FIX_START;
+
+  incx_gen = incy_gen = 1;
+
+
+
+  /* get space for calculation */
+  x_head = (float *) blas_malloc(max_mn * 2 * sizeof(float));
+  if (max_mn * 2 > 0 && x_head == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail = (float *) blas_malloc(max_mn * 2 * sizeof(float));
+  if (max_mn * 2 > 0 && x_tail == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y = (double *) blas_malloc(max_mn * 2 * sizeof(double));
+  if (max_mn * 2 > 0 && y == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_head_gen = (float *) blas_malloc(max_mn * sizeof(float));
+  if (max_mn > 0 && x_head_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail_gen = (float *) blas_malloc(max_mn * sizeof(float));
+  if (max_mn > 0 && x_tail_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y_gen = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && y_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  temp = (float *) blas_malloc(max_mn * sizeof(float));
+  if (max_mn > 0 && temp == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  head_r_true = (double *) blas_malloc(max_mn * sizeof(double));
+  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  ratios = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && ratios == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  AB =
+    (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float));
+  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && AB == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  /* The debug iteration:
+     If debug=1, then will execute the iteration twice. First, compute the
+     max ratio. Second, print info if ratio > (50% * ratio_max). */
+  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
+    bad_ratios = 0;		/* set to zero */
+
+    if ((debug == 3) && (d_count == find_max_ratio))
+      *seed = saved_seed;	/* restore the original seed */
+
+    /* varying alpha */
+    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
+      alpha_flag = 0;
+      switch (alpha_val) {
+      case 0:
+	alpha = 0.0;
+	alpha_flag = 1;
+	break;
+      case 1:
+	alpha = 1.0;
+	alpha_flag = 1;
+	break;
+      }
+
+      /* varying beta */
+      for (beta_val = 0; beta_val < 3; beta_val++) {
+	beta_flag = 0;
+	switch (beta_val) {
+	case 0:
+	  beta = 0.0;
+	  beta_flag = 1;
+	  break;
+	case 1:
+	  beta = 1.0;
+	  beta_flag = 1;
+	  break;
+	}
+
+
+	eps_int = power(2, -BITS_D);
+	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
+		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
+	prec = blas_prec_double;
+
+	/* values near underflow, 1, or overflow */
+	for (norm = -1; norm <= 1; norm++) {
+
+	  /* number of tests */
+	  for (i = 0; i < ntests; i++) {
+
+	    /* row or col major */
+	    for (order_val = 0; order_val < 2; order_val++) {
+	      switch (order_val) {
+	      case 0:
+		order_type = blas_rowmajor;
+		break;
+	      case 1:
+		order_type = blas_colmajor;
+		break;
+	      }
+
+	      /* no_trans, trans, or conj_trans */
+	      for (trans_val = 0; trans_val < 3; trans_val++) {
+		switch (trans_val) {
+		case 0:
+		  trans_type = blas_no_trans;
+		  m_i = m;
+		  n_i = n;
+		  break;
+		case 1:
+		  trans_type = blas_trans;
+		  m_i = n;
+		  n_i = m;
+		  break;
+		case 2:
+		  trans_type = blas_conj_trans;
+		  m_i = n;
+		  n_i = m;
+		  break;
+		}
+
+		/* ku from 0 to n-1 */
+		for (ku = 0; ku < n; ku++) {
+		  if (ku == n && ku != 0)
+		    continue;	/* the purpose of doing this is 
+				   to test for ku=0 */
+
+		  /* kl from 0 to m-1 */
+		  for (kl = 0; kl < m; kl++) {
+		    if (kl == n && kl != 0)
+		      continue;	/* the purpose of doing this is 
+				   to test for kl=0 */
+
+		    /* lda=ku+kl+1, ku+kl+2, 2*(ku+kl+1) */
+		    for (lda_val = 0; lda_val < 3; lda_val++) {
+		      switch (lda_val) {
+		      case 0:
+			lda = ku + kl + 1;
+			break;
+		      case 1:
+			lda = ku + kl + 2;
+			break;
+		      case 2:
+			lda = 2 * (ku + kl + 1);
+			break;
+		      }
+
+		      if ((order_type == blas_rowmajor && lda < n) ||
+			  (order_type == blas_colmajor && lda < m))
+			continue;
+
+		      /* For the sake of speed, we throw out this case at random */
+		      if (xrand(seed) >= test_prob)
+			continue;
+
+		      /* in the trivial cases, no need to run testgen */
+		      if (m > 0 && n > 0)
+			BLAS_dgbmv2_s_s_testgen(norm, order_type, trans_type,
+						m, n, kl, ku, &alpha,
+						alpha_flag, AB, lda,
+						x_head_gen, x_tail_gen, &beta,
+						beta_flag, y_gen, seed,
+						head_r_true, tail_r_true);
+		      count++;
+
+		      /* varying incx */
+		      for (incx_val = -2; incx_val <= 2; incx_val++) {
+			if (incx_val == 0)
+			  continue;
+
+			/* setting incx */
+			incx = incx_val;
+
+
+			scopy_vector(x_head_gen, n_i, 1, x_head, incx_val);
+			scopy_vector(x_tail_gen, n_i, 1, x_tail, incx_val);
+
+			/* varying incy */
+			for (incy_val = -2; incy_val <= 2; incy_val++) {
+			  if (incy_val == 0)
+			    continue;
+
+			  /* setting incy */
+			  incy = incy_val;
+
+
+			  dcopy_vector(y_gen, m_i, 1, y, incy_val);
+
+			  /* call BLAS_dgbmv2_s_s */
+			  FPU_FIX_STOP;
+			  BLAS_dgbmv2_s_s(order_type, trans_type, m, n, kl,
+					  ku, alpha, AB, lda, x_head, x_tail,
+					  incx_val, beta, y, incy_val);
+			  FPU_FIX_START;
+
+			  /* set y starting index */
+			  iy = 0;
+			  if (incy < 0)
+			    iy = -(m_i - 1) * incy;
+
+			  /* computing the ratio */
+			  for (j = 0; j < m_i; j++) {
+			    /* copy row j of AB to temp */
+			    sgbmv_copy(order_type, trans_type, m, n, kl, ku,
+				       AB, lda, temp, j);
+
+			    test_BLAS_ddot2_s_s(n_i, blas_no_conj, alpha,
+						beta, y_gen[j * incy_gen],
+						y[iy],
+						head_r_true[j * incy_gen],
+						tail_r_true[j * incy_gen],
+						temp, 1, x_head, x_tail,
+						incx_val, eps_int, un_int,
+						&ratios[j]);
+
+			    /* take the max ratio */
+			    if (j == 0) {
+			      ratio = ratios[0];
+			      /* The !<= below causes NaN error to be detected.
+			         Note that (NaN > thresh) is always false. */
+			    } else if (!(ratios[j] <= ratio)) {
+			      ratio = ratios[j];
+			    }
+
+			    iy += incy;
+			  }
+
+			  /* Increase the number of bad ratio, if the ratio
+			     is bigger than the threshold.
+			     The !<= below causes NaN error to be detected.
+			     Note that (NaN > thresh) is always false. */
+			  if (!(ratio <= thresh)) {
+			    bad_ratios++;
+
+			    if ((debug == 3) &&	/* print only when debug is on */
+				(count != old_count) &&	/* print if old vector is different 
+							   from the current one */
+				(d_count == find_max_ratio) &&
+				(p_count <= max_print) &&
+				(ratio > 0.5 * ratio_max)) {
+			      old_count = count;
+			      printf
+				("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
+				 fname, m, n, ntests, thresh);
+
+			      /* Print test info */
+			      switch (prec) {
+			      case blas_prec_single:
+				printf("single ");
+				break;
+			      case blas_prec_double:
+				printf("double ");
+				break;
+			      case blas_prec_indigenous:
+				printf("indigenous ");
+				break;
+			      case blas_prec_extra:
+				printf("extra ");
+				break;
+			      }
+			      switch (norm) {
+			      case -1:
+				printf("near_underflow ");
+				break;
+			      case 0:
+				printf("near_one ");
+				break;
+			      case 1:
+				printf("near_overflow ");
+				break;
+			      }
+			      switch (order_type) {
+			      case blas_rowmajor:
+				printf("row_major ");
+				break;
+			      case blas_colmajor:
+				printf("col_major ");
+				break;
+			      }
+			      switch (trans_type) {
+			      case blas_no_trans:
+				printf("no_trans ");
+				break;
+			      case blas_trans:
+				printf("trans ");
+				break;
+			      case blas_conj_trans:
+				printf("conj_trans ");
+				break;
+			      }
+
+			      printf
+				("ku=%d, kl=%d, lda=%d, incx=%d, incy=%d:\n",
+				 ku, kl, lda, incx, incy);
+
+			      ix = 0;
+			      iy = 0;
+			      if (incx < 0)
+				ix = -(n_i - 1) * incx;
+			      if (incy < 0)
+				iy = -(m_i - 1) * incy;
+
+			      printf("      A=");
+			      for (j = 0; j < m_i; j++) {
+				/* copy row j of A to temp */
+				sgbmv_copy(order_type, trans_type, m, n, kl,
+					   ku, AB, lda, temp, j);
+
+				if (j > 0)
+				  printf("        ");
+				sprint_vector(temp, n_i, 1, NULL);
+			      }
+
+			      for (j = 0, k = 0; j < n_i || k < m_i; j++, k++) {
+				if (j < n_i) {
+				  printf("      ");
+				  printf("%16.8e", x_head[ix]);
+				  printf("\n");
+				}
+				if (j < n_i) {
+				  printf("      ");
+				  printf("%16.8e", x_tail[ix]);
+				  printf("\n");
+				}
+				if (k < m_i) {
+				  printf("      ");
+				  printf("%24.16e", y_gen[k * incy_gen]);
+				  printf("\n");
+				  printf("      ");
+				  printf("y_final[%d] = ", iy);
+				  printf("%24.16e", y[iy]);
+				  printf("\n");
+				}
+				ix += incx;
+				iy += incy;
+			      }
+
+			      printf("      ");
+			      printf("alpha = ");
+			      printf("%24.16e", alpha);
+			      printf("\n      ");
+			      printf("beta = ");
+			      printf("%24.16e", beta);
+			      printf("\n");
+			      for (j = 0; j < m_i; j++) {
+				printf("      ");
+				printf("[%24.16e, %24.16e]",
+				       head_r_true[j * incy_gen],
+				       tail_r_true[j * incy_gen]);
+				printf(", ratio[%d]=%.4e\n", j, ratios[j]);
+			      }
+
+			      printf("      ratio=%.4e\n", ratio);
+			      p_count++;
+			    }
+			    if (bad_ratios >= MAX_BAD_TESTS) {
+			      printf("\ntoo many failures, exiting....");
+			      printf("\nTesting and compilation");
+			      printf(" are incomplete\n\n");
+			      goto end;
+			    }
+			    if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
+			      printf("\nFlagrant ratio error, exiting...");
+			      printf("\nTesting and compilation");
+			      printf(" are incomplete\n\n");
+			      goto end;
+			    }
+			  }
+			  if (d_count == 0) {
+			    if (ratio > ratio_max)
+			      ratio_max = ratio;
+
+			    if (ratio != 0.0 && ratio < ratio_min)
+			      ratio_min = ratio;
+			    tot_tests++;
+			  }
+			}	/* incy */
+		      }		/* incx */
+		    }		/* lda */
+		  }		/* kl */
+		}		/* ku */
+	      }			/* trans */
+	    }			/* order */
+	  }			/* tests */
+	}			/* norm */
+
+      }				/* beta */
+    }				/* alpha */
+  }				/* debug */
+
+  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
+    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n", fname,
+	   m, n, ntests, thresh);
+    printf
+      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
+       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
+       ratio_min, ratio_max);
+  }
+
+end:
+  blas_free(x_head);
+  blas_free(x_tail);
+  blas_free(y);
+  blas_free(x_head_gen);
+  blas_free(x_tail_gen);
+  blas_free(y_gen);
+  blas_free(temp);
+  blas_free(AB);
+  blas_free(head_r_true);
+  blas_free(tail_r_true);
+  blas_free(ratios);
+
+  FPU_FIX_STOP;
+
+  *min_ratio = ratio_min;
+  *num_bad_ratio = bad_ratios;
+  *num_tests = tot_tests;
+  return ratio_max;
+}
+double do_test_zgbmv2_z_c(int m, int n, int ntests, int *seed, double thresh,
+			  int debug, float test_prob, double *min_ratio,
+			  int *num_bad_ratio, int *num_tests)
+
+/*
+ * Purpose  
+ * =======
+ *
+ * Runs a series of tests on GBMV2.
+ *
+ * Arguments
+ * =========
+ *  
+ * m         (input) int
+ *           The number of rows
+ *
+ * n         (input) int
+ *           The number of columns
+ *
+ * ntests    (input) int
+ *           The number of tests to run for each set of attributes.
+ *
+ * seed      (input/output) int         
+ *           The seed for the random number generator used in testgen().
+ *
+ * thresh    (input) double
+ *           When the ratio returned from test() exceeds the specified
+ *           threshold, the current size, r_true, r_comp, and ratio will be
+ *           printed.  (Since ratio is supposed to be O(1), we can set thresh
+ *           to ~10.)
+ *
+ * debug     (input) int
+ *           If debug=3, print summary 
+ *           If debug=2, print summary only if the number of bad ratios > 0
+ *           If debug=1, print complete info if tests fail
+ *           If debug=0, return max ratio
+ *
+ * test_prob (input) float
+ *           The specified test will be performed only if the generated 
+ *           random exceeds this threshold.
+ *
+ * min_ratio (output) double
+ *           The minimum ratio
+ * 
+ * num_bad_ratio (output) int
+ *               The number of tests fail; they are above the threshold.
+ *
+ * num_tests (output) int
+ *           The number of tests is being performed.
+ *
+ * Return value
+ * ============
+ *
+ * The maximum ratio if run successfully, otherwise return -1 
+ *
+ * Code structure
+ * ==============
+ * 
+ *  debug loop  -- if debug is one, the first loop computes the max ratio
+ *              -- and the last(second) loop outputs debugging information,
+ *              -- if the test fail and its ratio > 0.5 * max ratio.
+ *              -- if debug is zero, the loop is executed once
+ *    alpha loop  -- varying alpha: 0, 1, or random
+ *      beta loop   -- varying beta: 0, 1, or random
+
+ *          norm loop   -- varying norm: near undeflow, near one, or 
+ *                        -- near overflow
+ *            numtest loop  -- how many times the test is perform with 
+ *                            -- above set of attributes
+ *              order loop   -- varying order type: rowmajor or colmajor
+ *                trans loop    -- varying trans type: no trans, trans, or conj trans
+ *                  ku loop       -- varying ku: 0 to n-1
+ *                    kl loop       -- varying kl: 0 to m-1
+ *                      lda loop      -- varying lda: ku+kl+1, ku+kl+2, 2*(ku+kl+1) 
+ *                        incx loop     -- varying incx: -2, -1, 1, 2
+ *                          incy loop     -- varying incy: -2, -1, 1, 2
+ */
+{
+  /* function name */
+  const char fname[] = "BLAS_zgbmv2_z_c";
+
+  /* max number of debug lines to print */
+  const int max_print = 8;
+
+  /* Variables in the "x_val" form are loop vars for corresponding
+     variables */
+  int i;			/* iterate through the repeating tests */
+  int j;
+  int k;			/* multipurpose counters or variables */
+  int ix, iy;			/* use to index x and y respectively */
+  int incx_val, incy_val,	/* for testing different inc values */
+    incx, incy;
+  int incx_gen, incy_gen;	/* for complex case inc=2, for real case inc=1 */
+  int d_count;			/* counter for debug */
+  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
+  int p_count;			/* counter for the number of debug lines printed */
+  int tot_tests;		/* total number of tests to be done */
+  int norm;			/* input values of near underflow/one/overflow */
+  double ratio_max;		/* the current maximum ratio */
+  double ratio_min;		/* the current minimum ratio */
+  double *ratios;		/* a temporary variable for calculating ratio */
+  double ratio;			/* the per-use test ratio from test() */
+  int bad_ratios = 0;		/* the number of ratios over the threshold */
+  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
+  double un_int;		/* the internal underflow threshold */
+  double alpha[2];
+  double beta[2];
+  double *AB;
+  float *x_head;
+  float *x_tail;
+  double *y;
+  double *temp;			/* use for calculating ratio */
+
+  /* x_gen and y_gen are used to store vectors generated by testgen.
+     they eventually are copied back to x and y */
+  float *x_head_gen;
+  float *x_tail_gen;
+  double *y_gen;
+
+  /* the true r calculated by testgen(), in double-double */
+  double *head_r_true, *tail_r_true;
+
+  int alpha_val;
+  int alpha_flag = 0;		/* input flag for BLAS_zgbmv2_z_c_testgen */
+  int beta_val;
+  int beta_flag = 0;		/* input flag for BLAS_zgbmv2_z_c_testgen */
+  int order_val;
+  enum blas_order_type order_type = 0;
+
+  enum blas_prec_type prec = 0;
+  int trans_val;
+  enum blas_trans_type trans_type = 0;
+  int m_i = 0;
+  int n_i = 0;
+  int max_mn;			/* the max of m and n */
+  int ku;
+  int kl;
+  int lda_val;
+  int lda = 0;
+  int saved_seed;		/* for saving the original seed */
+
+  /* use for counting the number of testgen calls * 2 */
+  int count, old_count = -1;
+
+  FPU_FIX_DECL;
+
+  /* test for bad arguments */
+  if (n < 0 || m < 0 || ntests < 0)
+    BLAS_error(fname, 0, 0, NULL);
+
+  /* initialization */
+  *min_ratio = 0.0;
+  *num_bad_ratio = 0;
+  *num_tests = 0;
+
+  saved_seed = *seed;
+  ratio_min = 1e308;
+  ratio_max = 0.0;
+  ratio = 0.0;
+  tot_tests = 0;
+  p_count = 0;
+  count = 0;
+  find_max_ratio = 0;
+  if (debug == 3)
+    find_max_ratio = 1;
+  max_mn = MAX(m, n);
+
+  if (m == 0 || n == 0) {
+    return 0.0;
+  }
+
+  FPU_FIX_START;
+
+  incx_gen = incy_gen = 1;
+  incx_gen *= 2;
+  incy_gen *= 2;
+
+  /* get space for calculation */
+  x_head = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
+  if (max_mn * 2 > 0 && x_head == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
+  if (max_mn * 2 > 0 && x_tail == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
+  if (max_mn * 2 > 0 && y == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_head_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
+  if (max_mn > 0 && x_head_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
+  if (max_mn > 0 && x_tail_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && y_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  temp = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && temp == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  ratios = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && ratios == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  AB =
+    (double *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(double) *
+			   2);
+  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && AB == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  /* The debug iteration:
+     If debug=1, then will execute the iteration twice. First, compute the
+     max ratio. Second, print info if ratio > (50% * ratio_max). */
+  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
+    bad_ratios = 0;		/* set to zero */
+
+    if ((debug == 3) && (d_count == find_max_ratio))
+      *seed = saved_seed;	/* restore the original seed */
+
+    /* varying alpha */
+    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
+      alpha_flag = 0;
+      switch (alpha_val) {
+      case 0:
+	alpha[0] = alpha[1] = 0.0;
+	alpha_flag = 1;
+	break;
+      case 1:
+	alpha[0] = 1.0;
+	alpha[1] = 0.0;
+	alpha_flag = 1;
+	break;
+      }
+
+      /* varying beta */
+      for (beta_val = 0; beta_val < 3; beta_val++) {
+	beta_flag = 0;
+	switch (beta_val) {
+	case 0:
+	  beta[0] = beta[1] = 0.0;
+	  beta_flag = 1;
+	  break;
+	case 1:
+	  beta[0] = 1.0;
+	  beta[1] = 0.0;
+	  beta_flag = 1;
+	  break;
+	}
+
+
+	eps_int = power(2, -BITS_D);
+	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
+		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
+	prec = blas_prec_double;
+
+	/* values near underflow, 1, or overflow */
+	for (norm = -1; norm <= 1; norm++) {
+
+	  /* number of tests */
+	  for (i = 0; i < ntests; i++) {
+
+	    /* row or col major */
+	    for (order_val = 0; order_val < 2; order_val++) {
+	      switch (order_val) {
+	      case 0:
+		order_type = blas_rowmajor;
+		break;
+	      case 1:
+		order_type = blas_colmajor;
+		break;
+	      }
+
+	      /* no_trans, trans, or conj_trans */
+	      for (trans_val = 0; trans_val < 3; trans_val++) {
+		switch (trans_val) {
+		case 0:
+		  trans_type = blas_no_trans;
+		  m_i = m;
+		  n_i = n;
+		  break;
+		case 1:
+		  trans_type = blas_trans;
+		  m_i = n;
+		  n_i = m;
+		  break;
+		case 2:
+		  trans_type = blas_conj_trans;
+		  m_i = n;
+		  n_i = m;
+		  break;
+		}
+
+		/* ku from 0 to n-1 */
+		for (ku = 0; ku < n; ku++) {
+		  if (ku == n && ku != 0)
+		    continue;	/* the purpose of doing this is 
+				   to test for ku=0 */
+
+		  /* kl from 0 to m-1 */
+		  for (kl = 0; kl < m; kl++) {
+		    if (kl == n && kl != 0)
+		      continue;	/* the purpose of doing this is 
+				   to test for kl=0 */
+
+		    /* lda=ku+kl+1, ku+kl+2, 2*(ku+kl+1) */
+		    for (lda_val = 0; lda_val < 3; lda_val++) {
+		      switch (lda_val) {
+		      case 0:
+			lda = ku + kl + 1;
+			break;
+		      case 1:
+			lda = ku + kl + 2;
+			break;
+		      case 2:
+			lda = 2 * (ku + kl + 1);
+			break;
+		      }
+
+		      if ((order_type == blas_rowmajor && lda < n) ||
+			  (order_type == blas_colmajor && lda < m))
+			continue;
+
+		      /* For the sake of speed, we throw out this case at random */
+		      if (xrand(seed) >= test_prob)
+			continue;
+
+		      /* in the trivial cases, no need to run testgen */
+		      if (m > 0 && n > 0)
+			BLAS_zgbmv2_z_c_testgen(norm, order_type, trans_type,
+						m, n, kl, ku, &alpha,
+						alpha_flag, AB, lda,
+						x_head_gen, x_tail_gen, &beta,
+						beta_flag, y_gen, seed,
+						head_r_true, tail_r_true);
+		      count++;
+
+		      /* varying incx */
+		      for (incx_val = -2; incx_val <= 2; incx_val++) {
+			if (incx_val == 0)
+			  continue;
+
+			/* setting incx */
+			incx = incx_val;
+			incx *= 2;
+
+			ccopy_vector(x_head_gen, n_i, 1, x_head, incx_val);
+			ccopy_vector(x_tail_gen, n_i, 1, x_tail, incx_val);
+
+			/* varying incy */
+			for (incy_val = -2; incy_val <= 2; incy_val++) {
+			  if (incy_val == 0)
+			    continue;
+
+			  /* setting incy */
+			  incy = incy_val;
+			  incy *= 2;
+
+			  zcopy_vector(y_gen, m_i, 1, y, incy_val);
+
+			  /* call BLAS_zgbmv2_z_c */
+			  FPU_FIX_STOP;
+			  BLAS_zgbmv2_z_c(order_type, trans_type, m, n, kl,
+					  ku, alpha, AB, lda, x_head, x_tail,
+					  incx_val, beta, y, incy_val);
+			  FPU_FIX_START;
+
+			  /* set y starting index */
+			  iy = 0;
+			  if (incy < 0)
+			    iy = -(m_i - 1) * incy;
+
+			  /* computing the ratio */
+			  for (j = 0; j < m_i; j++) {
+			    /* copy row j of AB to temp */
+			    zgbmv_copy(order_type, trans_type, m, n, kl, ku,
+				       AB, lda, temp, j);
+
+			    test_BLAS_zdot2_z_c(n_i, blas_no_conj, alpha,
+						beta, &y_gen[j * incy_gen],
+						&y[iy],
+						&head_r_true[j * incy_gen],
+						&tail_r_true[j * incy_gen],
+						temp, 1, x_head, x_tail,
+						incx_val, eps_int, un_int,
+						&ratios[j]);
+
+			    /* take the max ratio */
+			    if (j == 0) {
+			      ratio = ratios[0];
+			      /* The !<= below causes NaN error to be detected.
+			         Note that (NaN > thresh) is always false. */
+			    } else if (!(ratios[j] <= ratio)) {
+			      ratio = ratios[j];
+			    }
+
+			    iy += incy;
+			  }
+
+			  /* Increase the number of bad ratio, if the ratio
+			     is bigger than the threshold.
+			     The !<= below causes NaN error to be detected.
+			     Note that (NaN > thresh) is always false. */
+			  if (!(ratio <= thresh)) {
+			    bad_ratios++;
+
+			    if ((debug == 3) &&	/* print only when debug is on */
+				(count != old_count) &&	/* print if old vector is different 
+							   from the current one */
+				(d_count == find_max_ratio) &&
+				(p_count <= max_print) &&
+				(ratio > 0.5 * ratio_max)) {
+			      old_count = count;
+			      printf
+				("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
+				 fname, m, n, ntests, thresh);
+
+			      /* Print test info */
+			      switch (prec) {
+			      case blas_prec_single:
+				printf("single ");
+				break;
+			      case blas_prec_double:
+				printf("double ");
+				break;
+			      case blas_prec_indigenous:
+				printf("indigenous ");
+				break;
+			      case blas_prec_extra:
+				printf("extra ");
+				break;
+			      }
+			      switch (norm) {
+			      case -1:
+				printf("near_underflow ");
+				break;
+			      case 0:
+				printf("near_one ");
+				break;
+			      case 1:
+				printf("near_overflow ");
+				break;
+			      }
+			      switch (order_type) {
+			      case blas_rowmajor:
+				printf("row_major ");
+				break;
+			      case blas_colmajor:
+				printf("col_major ");
+				break;
+			      }
+			      switch (trans_type) {
+			      case blas_no_trans:
+				printf("no_trans ");
+				break;
+			      case blas_trans:
+				printf("trans ");
+				break;
+			      case blas_conj_trans:
+				printf("conj_trans ");
+				break;
+			      }
+
+			      printf
+				("ku=%d, kl=%d, lda=%d, incx=%d, incy=%d:\n",
+				 ku, kl, lda, incx, incy);
+
+			      ix = 0;
+			      iy = 0;
+			      if (incx < 0)
+				ix = -(n_i - 1) * incx;
+			      if (incy < 0)
+				iy = -(m_i - 1) * incy;
+
+			      printf("      A=");
+			      for (j = 0; j < m_i; j++) {
+				/* copy row j of A to temp */
+				zgbmv_copy(order_type, trans_type, m, n, kl,
+					   ku, AB, lda, temp, j);
+
+				if (j > 0)
+				  printf("        ");
+				zprint_vector(temp, n_i, 1, NULL);
+			      }
+
+			      for (j = 0, k = 0; j < n_i || k < m_i; j++, k++) {
+				if (j < n_i) {
+				  printf("      ");
+				  printf("(%16.8e, %16.8e)", x_head[ix],
+					 x_head[ix + 1]);
+				  printf("\n");
+				}
+				if (j < n_i) {
+				  printf("      ");
+				  printf("(%16.8e, %16.8e)", x_tail[ix],
+					 x_tail[ix + 1]);
+				  printf("\n");
+				}
+				if (k < m_i) {
+				  printf("      ");
+				  printf("(%24.16e, %24.16e)",
+					 y_gen[k * incy_gen],
+					 y_gen[k * incy_gen + 1]);
+				  printf("\n");
+				  printf("      ");
+				  printf("y_final[%d] = ", iy);
+				  printf("(%24.16e, %24.16e)", y[iy],
+					 y[iy + 1]);
+				  printf("\n");
+				}
+				ix += incx;
+				iy += incy;
+			      }
+
+			      printf("      ");
+			      printf("alpha = ");
+			      printf("(%24.16e, %24.16e)", alpha[0],
+				     alpha[1]);
+			      printf("\n      ");
+			      printf("beta = ");
+			      printf("(%24.16e, %24.16e)", beta[0], beta[1]);
+			      printf("\n");
+			      for (j = 0; j < m_i; j++) {
+				printf("      ");
+				printf
+				  ("([%24.16e  %24.16e], [%24.16e %24.16e])",
+				   head_r_true[j * incy_gen],
+				   tail_r_true[j * incy_gen],
+				   head_r_true[j * incy_gen + 1],
+				   tail_r_true[j * incy_gen + 1]);
+				printf(", ratio[%d]=%.4e\n", j, ratios[j]);
+			      }
+
+			      printf("      ratio=%.4e\n", ratio);
+			      p_count++;
+			    }
+			    if (bad_ratios >= MAX_BAD_TESTS) {
+			      printf("\ntoo many failures, exiting....");
+			      printf("\nTesting and compilation");
+			      printf(" are incomplete\n\n");
+			      goto end;
+			    }
+			    if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
+			      printf("\nFlagrant ratio error, exiting...");
+			      printf("\nTesting and compilation");
+			      printf(" are incomplete\n\n");
+			      goto end;
+			    }
+			  }
+			  if (d_count == 0) {
+			    if (ratio > ratio_max)
+			      ratio_max = ratio;
+
+			    if (ratio != 0.0 && ratio < ratio_min)
+			      ratio_min = ratio;
+			    tot_tests++;
+			  }
+			}	/* incy */
+		      }		/* incx */
+		    }		/* lda */
+		  }		/* kl */
+		}		/* ku */
+	      }			/* trans */
+	    }			/* order */
+	  }			/* tests */
+	}			/* norm */
+
+      }				/* beta */
+    }				/* alpha */
+  }				/* debug */
+
+  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
+    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n", fname,
+	   m, n, ntests, thresh);
+    printf
+      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
+       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
+       ratio_min, ratio_max);
+  }
+
+end:
+  blas_free(x_head);
+  blas_free(x_tail);
+  blas_free(y);
+  blas_free(x_head_gen);
+  blas_free(x_tail_gen);
+  blas_free(y_gen);
+  blas_free(temp);
+  blas_free(AB);
+  blas_free(head_r_true);
+  blas_free(tail_r_true);
+  blas_free(ratios);
+
+  FPU_FIX_STOP;
+
+  *min_ratio = ratio_min;
+  *num_bad_ratio = bad_ratios;
+  *num_tests = tot_tests;
+  return ratio_max;
+}
+double do_test_zgbmv2_c_z(int m, int n, int ntests, int *seed, double thresh,
+			  int debug, float test_prob, double *min_ratio,
+			  int *num_bad_ratio, int *num_tests)
+
+/*
+ * Purpose  
+ * =======
+ *
+ * Runs a series of tests on GBMV2.
+ *
+ * Arguments
+ * =========
+ *  
+ * m         (input) int
+ *           The number of rows
+ *
+ * n         (input) int
+ *           The number of columns
+ *
+ * ntests    (input) int
+ *           The number of tests to run for each set of attributes.
+ *
+ * seed      (input/output) int         
+ *           The seed for the random number generator used in testgen().
+ *
+ * thresh    (input) double
+ *           When the ratio returned from test() exceeds the specified
+ *           threshold, the current size, r_true, r_comp, and ratio will be
+ *           printed.  (Since ratio is supposed to be O(1), we can set thresh
+ *           to ~10.)
+ *
+ * debug     (input) int
+ *           If debug=3, print summary 
+ *           If debug=2, print summary only if the number of bad ratios > 0
+ *           If debug=1, print complete info if tests fail
+ *           If debug=0, return max ratio
+ *
+ * test_prob (input) float
+ *           The specified test will be performed only if the generated 
+ *           random exceeds this threshold.
+ *
+ * min_ratio (output) double
+ *           The minimum ratio
+ * 
+ * num_bad_ratio (output) int
+ *               The number of tests fail; they are above the threshold.
+ *
+ * num_tests (output) int
+ *           The number of tests is being performed.
+ *
+ * Return value
+ * ============
+ *
+ * The maximum ratio if run successfully, otherwise return -1 
+ *
+ * Code structure
+ * ==============
+ * 
+ *  debug loop  -- if debug is one, the first loop computes the max ratio
+ *              -- and the last(second) loop outputs debugging information,
+ *              -- if the test fail and its ratio > 0.5 * max ratio.
+ *              -- if debug is zero, the loop is executed once
+ *    alpha loop  -- varying alpha: 0, 1, or random
+ *      beta loop   -- varying beta: 0, 1, or random
+
+ *          norm loop   -- varying norm: near undeflow, near one, or 
+ *                        -- near overflow
+ *            numtest loop  -- how many times the test is perform with 
+ *                            -- above set of attributes
+ *              order loop   -- varying order type: rowmajor or colmajor
+ *                trans loop    -- varying trans type: no trans, trans, or conj trans
+ *                  ku loop       -- varying ku: 0 to n-1
+ *                    kl loop       -- varying kl: 0 to m-1
+ *                      lda loop      -- varying lda: ku+kl+1, ku+kl+2, 2*(ku+kl+1) 
+ *                        incx loop     -- varying incx: -2, -1, 1, 2
+ *                          incy loop     -- varying incy: -2, -1, 1, 2
+ */
+{
+  /* function name */
+  const char fname[] = "BLAS_zgbmv2_c_z";
+
+  /* max number of debug lines to print */
+  const int max_print = 8;
+
+  /* Variables in the "x_val" form are loop vars for corresponding
+     variables */
+  int i;			/* iterate through the repeating tests */
+  int j;
+  int k;			/* multipurpose counters or variables */
+  int ix, iy;			/* use to index x and y respectively */
+  int incx_val, incy_val,	/* for testing different inc values */
+    incx, incy;
+  int incx_gen, incy_gen;	/* for complex case inc=2, for real case inc=1 */
+  int d_count;			/* counter for debug */
+  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
+  int p_count;			/* counter for the number of debug lines printed */
+  int tot_tests;		/* total number of tests to be done */
+  int norm;			/* input values of near underflow/one/overflow */
+  double ratio_max;		/* the current maximum ratio */
+  double ratio_min;		/* the current minimum ratio */
+  double *ratios;		/* a temporary variable for calculating ratio */
+  double ratio;			/* the per-use test ratio from test() */
+  int bad_ratios = 0;		/* the number of ratios over the threshold */
+  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
+  double un_int;		/* the internal underflow threshold */
+  double alpha[2];
+  double beta[2];
+  float *AB;
+  double *x_head;
+  double *x_tail;
+  double *y;
+  float *temp;			/* use for calculating ratio */
+
+  /* x_gen and y_gen are used to store vectors generated by testgen.
+     they eventually are copied back to x and y */
+  double *x_head_gen;
+  double *x_tail_gen;
+  double *y_gen;
+
+  /* the true r calculated by testgen(), in double-double */
+  double *head_r_true, *tail_r_true;
+
+  int alpha_val;
+  int alpha_flag = 0;		/* input flag for BLAS_zgbmv2_c_z_testgen */
+  int beta_val;
+  int beta_flag = 0;		/* input flag for BLAS_zgbmv2_c_z_testgen */
+  int order_val;
+  enum blas_order_type order_type = 0;
+
+  enum blas_prec_type prec = 0;
+  int trans_val;
+  enum blas_trans_type trans_type = 0;
+  int m_i = 0;
+  int n_i = 0;
+  int max_mn;			/* the max of m and n */
+  int ku;
+  int kl;
+  int lda_val;
+  int lda = 0;
+  int saved_seed;		/* for saving the original seed */
+
+  /* use for counting the number of testgen calls * 2 */
+  int count, old_count = -1;
+
+  FPU_FIX_DECL;
+
+  /* test for bad arguments */
+  if (n < 0 || m < 0 || ntests < 0)
+    BLAS_error(fname, 0, 0, NULL);
+
+  /* initialization */
+  *min_ratio = 0.0;
+  *num_bad_ratio = 0;
+  *num_tests = 0;
+
+  saved_seed = *seed;
+  ratio_min = 1e308;
+  ratio_max = 0.0;
+  ratio = 0.0;
+  tot_tests = 0;
+  p_count = 0;
+  count = 0;
+  find_max_ratio = 0;
+  if (debug == 3)
+    find_max_ratio = 1;
+  max_mn = MAX(m, n);
+
+  if (m == 0 || n == 0) {
+    return 0.0;
+  }
+
+  FPU_FIX_START;
+
+  incx_gen = incy_gen = 1;
+  incx_gen *= 2;
+  incy_gen *= 2;
+
+  /* get space for calculation */
+  x_head = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
+  if (max_mn * 2 > 0 && x_head == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
+  if (max_mn * 2 > 0 && x_tail == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
+  if (max_mn * 2 > 0 && y == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_head_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && x_head_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && x_tail_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && y_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  temp = (float *) blas_malloc(max_mn * sizeof(float) * 2);
+  if (max_mn > 0 && temp == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  ratios = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && ratios == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  AB =
+    (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float) *
+			  2);
+  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && AB == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  /* The debug iteration:
+     If debug=1, then will execute the iteration twice. First, compute the
+     max ratio. Second, print info if ratio > (50% * ratio_max). */
+  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
+    bad_ratios = 0;		/* set to zero */
+
+    if ((debug == 3) && (d_count == find_max_ratio))
+      *seed = saved_seed;	/* restore the original seed */
+
+    /* varying alpha */
+    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
+      alpha_flag = 0;
+      switch (alpha_val) {
+      case 0:
+	alpha[0] = alpha[1] = 0.0;
+	alpha_flag = 1;
+	break;
+      case 1:
+	alpha[0] = 1.0;
+	alpha[1] = 0.0;
+	alpha_flag = 1;
+	break;
+      }
+
+      /* varying beta */
+      for (beta_val = 0; beta_val < 3; beta_val++) {
+	beta_flag = 0;
+	switch (beta_val) {
+	case 0:
+	  beta[0] = beta[1] = 0.0;
+	  beta_flag = 1;
+	  break;
+	case 1:
+	  beta[0] = 1.0;
+	  beta[1] = 0.0;
+	  beta_flag = 1;
+	  break;
+	}
+
+
+	eps_int = power(2, -BITS_D);
+	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
+		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
+	prec = blas_prec_double;
+
+	/* values near underflow, 1, or overflow */
+	for (norm = -1; norm <= 1; norm++) {
+
+	  /* number of tests */
+	  for (i = 0; i < ntests; i++) {
+
+	    /* row or col major */
+	    for (order_val = 0; order_val < 2; order_val++) {
+	      switch (order_val) {
+	      case 0:
+		order_type = blas_rowmajor;
+		break;
+	      case 1:
+		order_type = blas_colmajor;
+		break;
+	      }
+
+	      /* no_trans, trans, or conj_trans */
+	      for (trans_val = 0; trans_val < 3; trans_val++) {
+		switch (trans_val) {
+		case 0:
+		  trans_type = blas_no_trans;
+		  m_i = m;
+		  n_i = n;
+		  break;
+		case 1:
+		  trans_type = blas_trans;
+		  m_i = n;
+		  n_i = m;
+		  break;
+		case 2:
+		  trans_type = blas_conj_trans;
+		  m_i = n;
+		  n_i = m;
+		  break;
+		}
+
+		/* ku from 0 to n-1 */
+		for (ku = 0; ku < n; ku++) {
+		  if (ku == n && ku != 0)
+		    continue;	/* the purpose of doing this is 
+				   to test for ku=0 */
+
+		  /* kl from 0 to m-1 */
+		  for (kl = 0; kl < m; kl++) {
+		    if (kl == n && kl != 0)
+		      continue;	/* the purpose of doing this is 
+				   to test for kl=0 */
+
+		    /* lda=ku+kl+1, ku+kl+2, 2*(ku+kl+1) */
+		    for (lda_val = 0; lda_val < 3; lda_val++) {
+		      switch (lda_val) {
+		      case 0:
+			lda = ku + kl + 1;
+			break;
+		      case 1:
+			lda = ku + kl + 2;
+			break;
+		      case 2:
+			lda = 2 * (ku + kl + 1);
+			break;
+		      }
+
+		      if ((order_type == blas_rowmajor && lda < n) ||
+			  (order_type == blas_colmajor && lda < m))
+			continue;
+
+		      /* For the sake of speed, we throw out this case at random */
+		      if (xrand(seed) >= test_prob)
+			continue;
+
+		      /* in the trivial cases, no need to run testgen */
+		      if (m > 0 && n > 0)
+			BLAS_zgbmv2_c_z_testgen(norm, order_type, trans_type,
+						m, n, kl, ku, &alpha,
+						alpha_flag, AB, lda,
+						x_head_gen, x_tail_gen, &beta,
+						beta_flag, y_gen, seed,
+						head_r_true, tail_r_true);
+		      count++;
+
+		      /* varying incx */
+		      for (incx_val = -2; incx_val <= 2; incx_val++) {
+			if (incx_val == 0)
+			  continue;
+
+			/* setting incx */
+			incx = incx_val;
+			incx *= 2;
+
+			zcopy_vector(x_head_gen, n_i, 1, x_head, incx_val);
+			zcopy_vector(x_tail_gen, n_i, 1, x_tail, incx_val);
+
+			/* varying incy */
+			for (incy_val = -2; incy_val <= 2; incy_val++) {
+			  if (incy_val == 0)
+			    continue;
+
+			  /* setting incy */
+			  incy = incy_val;
+			  incy *= 2;
+
+			  zcopy_vector(y_gen, m_i, 1, y, incy_val);
+
+			  /* call BLAS_zgbmv2_c_z */
+			  FPU_FIX_STOP;
+			  BLAS_zgbmv2_c_z(order_type, trans_type, m, n, kl,
+					  ku, alpha, AB, lda, x_head, x_tail,
+					  incx_val, beta, y, incy_val);
+			  FPU_FIX_START;
+
+			  /* set y starting index */
+			  iy = 0;
+			  if (incy < 0)
+			    iy = -(m_i - 1) * incy;
+
+			  /* computing the ratio */
+			  for (j = 0; j < m_i; j++) {
+			    /* copy row j of AB to temp */
+			    cgbmv_copy(order_type, trans_type, m, n, kl, ku,
+				       AB, lda, temp, j);
+
+			    test_BLAS_zdot2_c_z(n_i, blas_no_conj, alpha,
+						beta, &y_gen[j * incy_gen],
+						&y[iy],
+						&head_r_true[j * incy_gen],
+						&tail_r_true[j * incy_gen],
+						temp, 1, x_head, x_tail,
+						incx_val, eps_int, un_int,
+						&ratios[j]);
+
+			    /* take the max ratio */
+			    if (j == 0) {
+			      ratio = ratios[0];
+			      /* The !<= below causes NaN error to be detected.
+			         Note that (NaN > thresh) is always false. */
+			    } else if (!(ratios[j] <= ratio)) {
+			      ratio = ratios[j];
+			    }
+
+			    iy += incy;
+			  }
+
+			  /* Increase the number of bad ratio, if the ratio
+			     is bigger than the threshold.
+			     The !<= below causes NaN error to be detected.
+			     Note that (NaN > thresh) is always false. */
+			  if (!(ratio <= thresh)) {
+			    bad_ratios++;
+
+			    if ((debug == 3) &&	/* print only when debug is on */
+				(count != old_count) &&	/* print if old vector is different 
+							   from the current one */
+				(d_count == find_max_ratio) &&
+				(p_count <= max_print) &&
+				(ratio > 0.5 * ratio_max)) {
+			      old_count = count;
+			      printf
+				("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
+				 fname, m, n, ntests, thresh);
+
+			      /* Print test info */
+			      switch (prec) {
+			      case blas_prec_single:
+				printf("single ");
+				break;
+			      case blas_prec_double:
+				printf("double ");
+				break;
+			      case blas_prec_indigenous:
+				printf("indigenous ");
+				break;
+			      case blas_prec_extra:
+				printf("extra ");
+				break;
+			      }
+			      switch (norm) {
+			      case -1:
+				printf("near_underflow ");
+				break;
+			      case 0:
+				printf("near_one ");
+				break;
+			      case 1:
+				printf("near_overflow ");
+				break;
+			      }
+			      switch (order_type) {
+			      case blas_rowmajor:
+				printf("row_major ");
+				break;
+			      case blas_colmajor:
+				printf("col_major ");
+				break;
+			      }
+			      switch (trans_type) {
+			      case blas_no_trans:
+				printf("no_trans ");
+				break;
+			      case blas_trans:
+				printf("trans ");
+				break;
+			      case blas_conj_trans:
+				printf("conj_trans ");
+				break;
+			      }
+
+			      printf
+				("ku=%d, kl=%d, lda=%d, incx=%d, incy=%d:\n",
+				 ku, kl, lda, incx, incy);
+
+			      ix = 0;
+			      iy = 0;
+			      if (incx < 0)
+				ix = -(n_i - 1) * incx;
+			      if (incy < 0)
+				iy = -(m_i - 1) * incy;
+
+			      printf("      A=");
+			      for (j = 0; j < m_i; j++) {
+				/* copy row j of A to temp */
+				cgbmv_copy(order_type, trans_type, m, n, kl,
+					   ku, AB, lda, temp, j);
+
+				if (j > 0)
+				  printf("        ");
+				cprint_vector(temp, n_i, 1, NULL);
+			      }
+
+			      for (j = 0, k = 0; j < n_i || k < m_i; j++, k++) {
+				if (j < n_i) {
+				  printf("      ");
+				  printf("(%24.16e, %24.16e)", x_head[ix],
+					 x_head[ix + 1]);
+				  printf("\n");
+				}
+				if (j < n_i) {
+				  printf("      ");
+				  printf("(%24.16e, %24.16e)", x_tail[ix],
+					 x_tail[ix + 1]);
+				  printf("\n");
+				}
+				if (k < m_i) {
+				  printf("      ");
+				  printf("(%24.16e, %24.16e)",
+					 y_gen[k * incy_gen],
+					 y_gen[k * incy_gen + 1]);
+				  printf("\n");
+				  printf("      ");
+				  printf("y_final[%d] = ", iy);
+				  printf("(%24.16e, %24.16e)", y[iy],
+					 y[iy + 1]);
+				  printf("\n");
+				}
+				ix += incx;
+				iy += incy;
+			      }
+
+			      printf("      ");
+			      printf("alpha = ");
+			      printf("(%24.16e, %24.16e)", alpha[0],
+				     alpha[1]);
+			      printf("\n      ");
+			      printf("beta = ");
+			      printf("(%24.16e, %24.16e)", beta[0], beta[1]);
+			      printf("\n");
+			      for (j = 0; j < m_i; j++) {
+				printf("      ");
+				printf
+				  ("([%24.16e  %24.16e], [%24.16e %24.16e])",
+				   head_r_true[j * incy_gen],
+				   tail_r_true[j * incy_gen],
+				   head_r_true[j * incy_gen + 1],
+				   tail_r_true[j * incy_gen + 1]);
+				printf(", ratio[%d]=%.4e\n", j, ratios[j]);
+			      }
+
+			      printf("      ratio=%.4e\n", ratio);
+			      p_count++;
+			    }
+			    if (bad_ratios >= MAX_BAD_TESTS) {
+			      printf("\ntoo many failures, exiting....");
+			      printf("\nTesting and compilation");
+			      printf(" are incomplete\n\n");
+			      goto end;
+			    }
+			    if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
+			      printf("\nFlagrant ratio error, exiting...");
+			      printf("\nTesting and compilation");
+			      printf(" are incomplete\n\n");
+			      goto end;
+			    }
+			  }
+			  if (d_count == 0) {
+			    if (ratio > ratio_max)
+			      ratio_max = ratio;
+
+			    if (ratio != 0.0 && ratio < ratio_min)
+			      ratio_min = ratio;
+			    tot_tests++;
+			  }
+			}	/* incy */
+		      }		/* incx */
+		    }		/* lda */
+		  }		/* kl */
+		}		/* ku */
+	      }			/* trans */
+	    }			/* order */
+	  }			/* tests */
+	}			/* norm */
+
+      }				/* beta */
+    }				/* alpha */
+  }				/* debug */
+
+  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
+    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n", fname,
+	   m, n, ntests, thresh);
+    printf
+      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
+       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
+       ratio_min, ratio_max);
+  }
+
+end:
+  blas_free(x_head);
+  blas_free(x_tail);
+  blas_free(y);
+  blas_free(x_head_gen);
+  blas_free(x_tail_gen);
+  blas_free(y_gen);
+  blas_free(temp);
+  blas_free(AB);
+  blas_free(head_r_true);
+  blas_free(tail_r_true);
+  blas_free(ratios);
+
+  FPU_FIX_STOP;
+
+  *min_ratio = ratio_min;
+  *num_bad_ratio = bad_ratios;
+  *num_tests = tot_tests;
+  return ratio_max;
+}
+double do_test_zgbmv2_c_c(int m, int n, int ntests, int *seed, double thresh,
+			  int debug, float test_prob, double *min_ratio,
+			  int *num_bad_ratio, int *num_tests)
+
+/*
+ * Purpose  
+ * =======
+ *
+ * Runs a series of tests on GBMV2.
+ *
+ * Arguments
+ * =========
+ *  
+ * m         (input) int
+ *           The number of rows
+ *
+ * n         (input) int
+ *           The number of columns
+ *
+ * ntests    (input) int
+ *           The number of tests to run for each set of attributes.
+ *
+ * seed      (input/output) int         
+ *           The seed for the random number generator used in testgen().
+ *
+ * thresh    (input) double
+ *           When the ratio returned from test() exceeds the specified
+ *           threshold, the current size, r_true, r_comp, and ratio will be
+ *           printed.  (Since ratio is supposed to be O(1), we can set thresh
+ *           to ~10.)
+ *
+ * debug     (input) int
+ *           If debug=3, print summary 
+ *           If debug=2, print summary only if the number of bad ratios > 0
+ *           If debug=1, print complete info if tests fail
+ *           If debug=0, return max ratio
+ *
+ * test_prob (input) float
+ *           The specified test will be performed only if the generated 
+ *           random exceeds this threshold.
+ *
+ * min_ratio (output) double
+ *           The minimum ratio
+ * 
+ * num_bad_ratio (output) int
+ *               The number of tests fail; they are above the threshold.
+ *
+ * num_tests (output) int
+ *           The number of tests is being performed.
+ *
+ * Return value
+ * ============
+ *
+ * The maximum ratio if run successfully, otherwise return -1 
+ *
+ * Code structure
+ * ==============
+ * 
+ *  debug loop  -- if debug is one, the first loop computes the max ratio
+ *              -- and the last(second) loop outputs debugging information,
+ *              -- if the test fail and its ratio > 0.5 * max ratio.
+ *              -- if debug is zero, the loop is executed once
+ *    alpha loop  -- varying alpha: 0, 1, or random
+ *      beta loop   -- varying beta: 0, 1, or random
+
+ *          norm loop   -- varying norm: near undeflow, near one, or 
+ *                        -- near overflow
+ *            numtest loop  -- how many times the test is perform with 
+ *                            -- above set of attributes
+ *              order loop   -- varying order type: rowmajor or colmajor
+ *                trans loop    -- varying trans type: no trans, trans, or conj trans
+ *                  ku loop       -- varying ku: 0 to n-1
+ *                    kl loop       -- varying kl: 0 to m-1
+ *                      lda loop      -- varying lda: ku+kl+1, ku+kl+2, 2*(ku+kl+1) 
+ *                        incx loop     -- varying incx: -2, -1, 1, 2
+ *                          incy loop     -- varying incy: -2, -1, 1, 2
+ */
+{
+  /* function name */
+  const char fname[] = "BLAS_zgbmv2_c_c";
+
+  /* max number of debug lines to print */
+  const int max_print = 8;
+
+  /* Variables in the "x_val" form are loop vars for corresponding
+     variables */
+  int i;			/* iterate through the repeating tests */
+  int j;
+  int k;			/* multipurpose counters or variables */
+  int ix, iy;			/* use to index x and y respectively */
+  int incx_val, incy_val,	/* for testing different inc values */
+    incx, incy;
+  int incx_gen, incy_gen;	/* for complex case inc=2, for real case inc=1 */
+  int d_count;			/* counter for debug */
+  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
+  int p_count;			/* counter for the number of debug lines printed */
+  int tot_tests;		/* total number of tests to be done */
+  int norm;			/* input values of near underflow/one/overflow */
+  double ratio_max;		/* the current maximum ratio */
+  double ratio_min;		/* the current minimum ratio */
+  double *ratios;		/* a temporary variable for calculating ratio */
+  double ratio;			/* the per-use test ratio from test() */
+  int bad_ratios = 0;		/* the number of ratios over the threshold */
+  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
+  double un_int;		/* the internal underflow threshold */
+  double alpha[2];
+  double beta[2];
+  float *AB;
+  float *x_head;
+  float *x_tail;
+  double *y;
+  float *temp;			/* use for calculating ratio */
+
+  /* x_gen and y_gen are used to store vectors generated by testgen.
+     they eventually are copied back to x and y */
+  float *x_head_gen;
+  float *x_tail_gen;
+  double *y_gen;
+
+  /* the true r calculated by testgen(), in double-double */
+  double *head_r_true, *tail_r_true;
+
+  int alpha_val;
+  int alpha_flag = 0;		/* input flag for BLAS_zgbmv2_c_c_testgen */
+  int beta_val;
+  int beta_flag = 0;		/* input flag for BLAS_zgbmv2_c_c_testgen */
+  int order_val;
+  enum blas_order_type order_type = 0;
+
+  enum blas_prec_type prec = 0;
+  int trans_val;
+  enum blas_trans_type trans_type = 0;
+  int m_i = 0;
+  int n_i = 0;
+  int max_mn;			/* the max of m and n */
+  int ku;
+  int kl;
+  int lda_val;
+  int lda = 0;
+  int saved_seed;		/* for saving the original seed */
+
+  /* use for counting the number of testgen calls * 2 */
+  int count, old_count = -1;
+
+  FPU_FIX_DECL;
+
+  /* test for bad arguments */
+  if (n < 0 || m < 0 || ntests < 0)
+    BLAS_error(fname, 0, 0, NULL);
+
+  /* initialization */
+  *min_ratio = 0.0;
+  *num_bad_ratio = 0;
+  *num_tests = 0;
+
+  saved_seed = *seed;
+  ratio_min = 1e308;
+  ratio_max = 0.0;
+  ratio = 0.0;
+  tot_tests = 0;
+  p_count = 0;
+  count = 0;
+  find_max_ratio = 0;
+  if (debug == 3)
+    find_max_ratio = 1;
+  max_mn = MAX(m, n);
+
+  if (m == 0 || n == 0) {
+    return 0.0;
+  }
+
+  FPU_FIX_START;
+
+  incx_gen = incy_gen = 1;
+  incx_gen *= 2;
+  incy_gen *= 2;
+
+  /* get space for calculation */
+  x_head = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
+  if (max_mn * 2 > 0 && x_head == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
+  if (max_mn * 2 > 0 && x_tail == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
+  if (max_mn * 2 > 0 && y == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_head_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
+  if (max_mn > 0 && x_head_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
+  if (max_mn > 0 && x_tail_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && y_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  temp = (float *) blas_malloc(max_mn * sizeof(float) * 2);
+  if (max_mn > 0 && temp == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  ratios = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && ratios == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  AB =
+    (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float) *
+			  2);
+  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && AB == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  /* The debug iteration:
+     If debug=1, then will execute the iteration twice. First, compute the
+     max ratio. Second, print info if ratio > (50% * ratio_max). */
+  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
+    bad_ratios = 0;		/* set to zero */
+
+    if ((debug == 3) && (d_count == find_max_ratio))
+      *seed = saved_seed;	/* restore the original seed */
+
+    /* varying alpha */
+    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
+      alpha_flag = 0;
+      switch (alpha_val) {
+      case 0:
+	alpha[0] = alpha[1] = 0.0;
+	alpha_flag = 1;
+	break;
+      case 1:
+	alpha[0] = 1.0;
+	alpha[1] = 0.0;
+	alpha_flag = 1;
+	break;
+      }
+
+      /* varying beta */
+      for (beta_val = 0; beta_val < 3; beta_val++) {
+	beta_flag = 0;
+	switch (beta_val) {
+	case 0:
+	  beta[0] = beta[1] = 0.0;
+	  beta_flag = 1;
+	  break;
+	case 1:
+	  beta[0] = 1.0;
+	  beta[1] = 0.0;
+	  beta_flag = 1;
+	  break;
+	}
+
+
+	eps_int = power(2, -BITS_D);
+	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
+		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
+	prec = blas_prec_double;
+
+	/* values near underflow, 1, or overflow */
+	for (norm = -1; norm <= 1; norm++) {
+
+	  /* number of tests */
+	  for (i = 0; i < ntests; i++) {
+
+	    /* row or col major */
+	    for (order_val = 0; order_val < 2; order_val++) {
+	      switch (order_val) {
+	      case 0:
+		order_type = blas_rowmajor;
+		break;
+	      case 1:
+		order_type = blas_colmajor;
+		break;
+	      }
+
+	      /* no_trans, trans, or conj_trans */
+	      for (trans_val = 0; trans_val < 3; trans_val++) {
+		switch (trans_val) {
+		case 0:
+		  trans_type = blas_no_trans;
+		  m_i = m;
+		  n_i = n;
+		  break;
+		case 1:
+		  trans_type = blas_trans;
+		  m_i = n;
+		  n_i = m;
+		  break;
+		case 2:
+		  trans_type = blas_conj_trans;
+		  m_i = n;
+		  n_i = m;
+		  break;
+		}
+
+		/* ku from 0 to n-1 */
+		for (ku = 0; ku < n; ku++) {
+		  if (ku == n && ku != 0)
+		    continue;	/* the purpose of doing this is 
+				   to test for ku=0 */
+
+		  /* kl from 0 to m-1 */
+		  for (kl = 0; kl < m; kl++) {
+		    if (kl == n && kl != 0)
+		      continue;	/* the purpose of doing this is 
+				   to test for kl=0 */
+
+		    /* lda=ku+kl+1, ku+kl+2, 2*(ku+kl+1) */
+		    for (lda_val = 0; lda_val < 3; lda_val++) {
+		      switch (lda_val) {
+		      case 0:
+			lda = ku + kl + 1;
+			break;
+		      case 1:
+			lda = ku + kl + 2;
+			break;
+		      case 2:
+			lda = 2 * (ku + kl + 1);
+			break;
+		      }
+
+		      if ((order_type == blas_rowmajor && lda < n) ||
+			  (order_type == blas_colmajor && lda < m))
+			continue;
+
+		      /* For the sake of speed, we throw out this case at random */
+		      if (xrand(seed) >= test_prob)
+			continue;
+
+		      /* in the trivial cases, no need to run testgen */
+		      if (m > 0 && n > 0)
+			BLAS_zgbmv2_c_c_testgen(norm, order_type, trans_type,
+						m, n, kl, ku, &alpha,
+						alpha_flag, AB, lda,
+						x_head_gen, x_tail_gen, &beta,
+						beta_flag, y_gen, seed,
+						head_r_true, tail_r_true);
+		      count++;
+
+		      /* varying incx */
+		      for (incx_val = -2; incx_val <= 2; incx_val++) {
+			if (incx_val == 0)
+			  continue;
+
+			/* setting incx */
+			incx = incx_val;
+			incx *= 2;
+
+			ccopy_vector(x_head_gen, n_i, 1, x_head, incx_val);
+			ccopy_vector(x_tail_gen, n_i, 1, x_tail, incx_val);
+
+			/* varying incy */
+			for (incy_val = -2; incy_val <= 2; incy_val++) {
+			  if (incy_val == 0)
+			    continue;
+
+			  /* setting incy */
+			  incy = incy_val;
+			  incy *= 2;
+
+			  zcopy_vector(y_gen, m_i, 1, y, incy_val);
+
+			  /* call BLAS_zgbmv2_c_c */
+			  FPU_FIX_STOP;
+			  BLAS_zgbmv2_c_c(order_type, trans_type, m, n, kl,
+					  ku, alpha, AB, lda, x_head, x_tail,
+					  incx_val, beta, y, incy_val);
+			  FPU_FIX_START;
+
+			  /* set y starting index */
+			  iy = 0;
+			  if (incy < 0)
+			    iy = -(m_i - 1) * incy;
+
+			  /* computing the ratio */
+			  for (j = 0; j < m_i; j++) {
+			    /* copy row j of AB to temp */
+			    cgbmv_copy(order_type, trans_type, m, n, kl, ku,
+				       AB, lda, temp, j);
+
+			    test_BLAS_zdot2_c_c(n_i, blas_no_conj, alpha,
+						beta, &y_gen[j * incy_gen],
+						&y[iy],
+						&head_r_true[j * incy_gen],
+						&tail_r_true[j * incy_gen],
+						temp, 1, x_head, x_tail,
+						incx_val, eps_int, un_int,
+						&ratios[j]);
+
+			    /* take the max ratio */
+			    if (j == 0) {
+			      ratio = ratios[0];
+			      /* The !<= below causes NaN error to be detected.
+			         Note that (NaN > thresh) is always false. */
+			    } else if (!(ratios[j] <= ratio)) {
+			      ratio = ratios[j];
+			    }
+
+			    iy += incy;
+			  }
+
+			  /* Increase the number of bad ratio, if the ratio
+			     is bigger than the threshold.
+			     The !<= below causes NaN error to be detected.
+			     Note that (NaN > thresh) is always false. */
+			  if (!(ratio <= thresh)) {
+			    bad_ratios++;
+
+			    if ((debug == 3) &&	/* print only when debug is on */
+				(count != old_count) &&	/* print if old vector is different 
+							   from the current one */
+				(d_count == find_max_ratio) &&
+				(p_count <= max_print) &&
+				(ratio > 0.5 * ratio_max)) {
+			      old_count = count;
+			      printf
+				("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
+				 fname, m, n, ntests, thresh);
+
+			      /* Print test info */
+			      switch (prec) {
+			      case blas_prec_single:
+				printf("single ");
+				break;
+			      case blas_prec_double:
+				printf("double ");
+				break;
+			      case blas_prec_indigenous:
+				printf("indigenous ");
+				break;
+			      case blas_prec_extra:
+				printf("extra ");
+				break;
+			      }
+			      switch (norm) {
+			      case -1:
+				printf("near_underflow ");
+				break;
+			      case 0:
+				printf("near_one ");
+				break;
+			      case 1:
+				printf("near_overflow ");
+				break;
+			      }
+			      switch (order_type) {
+			      case blas_rowmajor:
+				printf("row_major ");
+				break;
+			      case blas_colmajor:
+				printf("col_major ");
+				break;
+			      }
+			      switch (trans_type) {
+			      case blas_no_trans:
+				printf("no_trans ");
+				break;
+			      case blas_trans:
+				printf("trans ");
+				break;
+			      case blas_conj_trans:
+				printf("conj_trans ");
+				break;
+			      }
+
+			      printf
+				("ku=%d, kl=%d, lda=%d, incx=%d, incy=%d:\n",
+				 ku, kl, lda, incx, incy);
+
+			      ix = 0;
+			      iy = 0;
+			      if (incx < 0)
+				ix = -(n_i - 1) * incx;
+			      if (incy < 0)
+				iy = -(m_i - 1) * incy;
+
+			      printf("      A=");
+			      for (j = 0; j < m_i; j++) {
+				/* copy row j of A to temp */
+				cgbmv_copy(order_type, trans_type, m, n, kl,
+					   ku, AB, lda, temp, j);
+
+				if (j > 0)
+				  printf("        ");
+				cprint_vector(temp, n_i, 1, NULL);
+			      }
+
+			      for (j = 0, k = 0; j < n_i || k < m_i; j++, k++) {
+				if (j < n_i) {
+				  printf("      ");
+				  printf("(%16.8e, %16.8e)", x_head[ix],
+					 x_head[ix + 1]);
+				  printf("\n");
+				}
+				if (j < n_i) {
+				  printf("      ");
+				  printf("(%16.8e, %16.8e)", x_tail[ix],
+					 x_tail[ix + 1]);
+				  printf("\n");
+				}
+				if (k < m_i) {
+				  printf("      ");
+				  printf("(%24.16e, %24.16e)",
+					 y_gen[k * incy_gen],
+					 y_gen[k * incy_gen + 1]);
+				  printf("\n");
+				  printf("      ");
+				  printf("y_final[%d] = ", iy);
+				  printf("(%24.16e, %24.16e)", y[iy],
+					 y[iy + 1]);
+				  printf("\n");
+				}
+				ix += incx;
+				iy += incy;
+			      }
+
+			      printf("      ");
+			      printf("alpha = ");
+			      printf("(%24.16e, %24.16e)", alpha[0],
+				     alpha[1]);
+			      printf("\n      ");
+			      printf("beta = ");
+			      printf("(%24.16e, %24.16e)", beta[0], beta[1]);
+			      printf("\n");
+			      for (j = 0; j < m_i; j++) {
+				printf("      ");
+				printf
+				  ("([%24.16e  %24.16e], [%24.16e %24.16e])",
+				   head_r_true[j * incy_gen],
+				   tail_r_true[j * incy_gen],
+				   head_r_true[j * incy_gen + 1],
+				   tail_r_true[j * incy_gen + 1]);
+				printf(", ratio[%d]=%.4e\n", j, ratios[j]);
+			      }
+
+			      printf("      ratio=%.4e\n", ratio);
+			      p_count++;
+			    }
+			    if (bad_ratios >= MAX_BAD_TESTS) {
+			      printf("\ntoo many failures, exiting....");
+			      printf("\nTesting and compilation");
+			      printf(" are incomplete\n\n");
+			      goto end;
+			    }
+			    if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
+			      printf("\nFlagrant ratio error, exiting...");
+			      printf("\nTesting and compilation");
+			      printf(" are incomplete\n\n");
+			      goto end;
+			    }
+			  }
+			  if (d_count == 0) {
+			    if (ratio > ratio_max)
+			      ratio_max = ratio;
+
+			    if (ratio != 0.0 && ratio < ratio_min)
+			      ratio_min = ratio;
+			    tot_tests++;
+			  }
+			}	/* incy */
+		      }		/* incx */
+		    }		/* lda */
+		  }		/* kl */
+		}		/* ku */
+	      }			/* trans */
+	    }			/* order */
+	  }			/* tests */
+	}			/* norm */
+
+      }				/* beta */
+    }				/* alpha */
+  }				/* debug */
+
+  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
+    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n", fname,
+	   m, n, ntests, thresh);
+    printf
+      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
+       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
+       ratio_min, ratio_max);
+  }
+
+end:
+  blas_free(x_head);
+  blas_free(x_tail);
+  blas_free(y);
+  blas_free(x_head_gen);
+  blas_free(x_tail_gen);
+  blas_free(y_gen);
+  blas_free(temp);
+  blas_free(AB);
+  blas_free(head_r_true);
+  blas_free(tail_r_true);
+  blas_free(ratios);
+
+  FPU_FIX_STOP;
+
+  *min_ratio = ratio_min;
+  *num_bad_ratio = bad_ratios;
+  *num_tests = tot_tests;
+  return ratio_max;
+}
+double do_test_cgbmv2_c_s(int m, int n, int ntests, int *seed, double thresh,
+			  int debug, float test_prob, double *min_ratio,
+			  int *num_bad_ratio, int *num_tests)
+
+/*
+ * Purpose  
+ * =======
+ *
+ * Runs a series of tests on GBMV2.
+ *
+ * Arguments
+ * =========
+ *  
+ * m         (input) int
+ *           The number of rows
+ *
+ * n         (input) int
+ *           The number of columns
+ *
+ * ntests    (input) int
+ *           The number of tests to run for each set of attributes.
+ *
+ * seed      (input/output) int         
+ *           The seed for the random number generator used in testgen().
+ *
+ * thresh    (input) double
+ *           When the ratio returned from test() exceeds the specified
+ *           threshold, the current size, r_true, r_comp, and ratio will be
+ *           printed.  (Since ratio is supposed to be O(1), we can set thresh
+ *           to ~10.)
+ *
+ * debug     (input) int
+ *           If debug=3, print summary 
+ *           If debug=2, print summary only if the number of bad ratios > 0
+ *           If debug=1, print complete info if tests fail
+ *           If debug=0, return max ratio
+ *
+ * test_prob (input) float
+ *           The specified test will be performed only if the generated 
+ *           random exceeds this threshold.
+ *
+ * min_ratio (output) double
+ *           The minimum ratio
+ * 
+ * num_bad_ratio (output) int
+ *               The number of tests fail; they are above the threshold.
+ *
+ * num_tests (output) int
+ *           The number of tests is being performed.
+ *
+ * Return value
+ * ============
+ *
+ * The maximum ratio if run successfully, otherwise return -1 
+ *
+ * Code structure
+ * ==============
+ * 
+ *  debug loop  -- if debug is one, the first loop computes the max ratio
+ *              -- and the last(second) loop outputs debugging information,
+ *              -- if the test fail and its ratio > 0.5 * max ratio.
+ *              -- if debug is zero, the loop is executed once
+ *    alpha loop  -- varying alpha: 0, 1, or random
+ *      beta loop   -- varying beta: 0, 1, or random
+
+ *          norm loop   -- varying norm: near undeflow, near one, or 
+ *                        -- near overflow
+ *            numtest loop  -- how many times the test is perform with 
+ *                            -- above set of attributes
+ *              order loop   -- varying order type: rowmajor or colmajor
+ *                trans loop    -- varying trans type: no trans, trans, or conj trans
+ *                  ku loop       -- varying ku: 0 to n-1
+ *                    kl loop       -- varying kl: 0 to m-1
+ *                      lda loop      -- varying lda: ku+kl+1, ku+kl+2, 2*(ku+kl+1) 
+ *                        incx loop     -- varying incx: -2, -1, 1, 2
+ *                          incy loop     -- varying incy: -2, -1, 1, 2
+ */
+{
+  /* function name */
+  const char fname[] = "BLAS_cgbmv2_c_s";
+
+  /* max number of debug lines to print */
+  const int max_print = 8;
+
+  /* Variables in the "x_val" form are loop vars for corresponding
+     variables */
+  int i;			/* iterate through the repeating tests */
+  int j;
+  int k;			/* multipurpose counters or variables */
+  int ix, iy;			/* use to index x and y respectively */
+  int incx_val, incy_val,	/* for testing different inc values */
+    incx, incy;
+  int incx_gen, incy_gen;	/* for complex case inc=2, for real case inc=1 */
+  int d_count;			/* counter for debug */
+  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
+  int p_count;			/* counter for the number of debug lines printed */
+  int tot_tests;		/* total number of tests to be done */
+  int norm;			/* input values of near underflow/one/overflow */
+  double ratio_max;		/* the current maximum ratio */
+  double ratio_min;		/* the current minimum ratio */
+  double *ratios;		/* a temporary variable for calculating ratio */
+  double ratio;			/* the per-use test ratio from test() */
+  int bad_ratios = 0;		/* the number of ratios over the threshold */
+  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
+  double un_int;		/* the internal underflow threshold */
+  float alpha[2];
+  float beta[2];
+  float *AB;
+  float *x_head;
+  float *x_tail;
+  float *y;
+  float *temp;			/* use for calculating ratio */
+
+  /* x_gen and y_gen are used to store vectors generated by testgen.
+     they eventually are copied back to x and y */
+  float *x_head_gen;
+  float *x_tail_gen;
+  float *y_gen;
+
+  /* the true r calculated by testgen(), in double-double */
+  double *head_r_true, *tail_r_true;
+
+  int alpha_val;
+  int alpha_flag = 0;		/* input flag for BLAS_cgbmv2_c_s_testgen */
+  int beta_val;
+  int beta_flag = 0;		/* input flag for BLAS_cgbmv2_c_s_testgen */
+  int order_val;
+  enum blas_order_type order_type = 0;
+
+  enum blas_prec_type prec = 0;
+  int trans_val;
+  enum blas_trans_type trans_type = 0;
+  int m_i = 0;
+  int n_i = 0;
+  int max_mn;			/* the max of m and n */
+  int ku;
+  int kl;
+  int lda_val;
+  int lda = 0;
+  int saved_seed;		/* for saving the original seed */
+
+  /* use for counting the number of testgen calls * 2 */
+  int count, old_count = -1;
+
+  FPU_FIX_DECL;
+
+  /* test for bad arguments */
+  if (n < 0 || m < 0 || ntests < 0)
+    BLAS_error(fname, 0, 0, NULL);
+
+  /* initialization */
+  *min_ratio = 0.0;
+  *num_bad_ratio = 0;
+  *num_tests = 0;
+
+  saved_seed = *seed;
+  ratio_min = 1e308;
+  ratio_max = 0.0;
+  ratio = 0.0;
+  tot_tests = 0;
+  p_count = 0;
+  count = 0;
+  find_max_ratio = 0;
+  if (debug == 3)
+    find_max_ratio = 1;
+  max_mn = MAX(m, n);
+
+  if (m == 0 || n == 0) {
+    return 0.0;
+  }
+
+  FPU_FIX_START;
+
+  incx_gen = incy_gen = 1;
+
+  incy_gen *= 2;
+
+  /* get space for calculation */
+  x_head = (float *) blas_malloc(max_mn * 2 * sizeof(float));
+  if (max_mn * 2 > 0 && x_head == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail = (float *) blas_malloc(max_mn * 2 * sizeof(float));
+  if (max_mn * 2 > 0 && x_tail == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
+  if (max_mn * 2 > 0 && y == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_head_gen = (float *) blas_malloc(max_mn * sizeof(float));
+  if (max_mn > 0 && x_head_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail_gen = (float *) blas_malloc(max_mn * sizeof(float));
+  if (max_mn > 0 && x_tail_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
+  if (max_mn > 0 && y_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  temp = (float *) blas_malloc(max_mn * sizeof(float) * 2);
+  if (max_mn > 0 && temp == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  ratios = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && ratios == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  AB =
+    (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float) *
+			  2);
+  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && AB == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  /* The debug iteration:
+     If debug=1, then will execute the iteration twice. First, compute the
+     max ratio. Second, print info if ratio > (50% * ratio_max). */
+  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
+    bad_ratios = 0;		/* set to zero */
+
+    if ((debug == 3) && (d_count == find_max_ratio))
+      *seed = saved_seed;	/* restore the original seed */
+
+    /* varying alpha */
+    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
+      alpha_flag = 0;
+      switch (alpha_val) {
+      case 0:
+	alpha[0] = alpha[1] = 0.0;
+	alpha_flag = 1;
+	break;
+      case 1:
+	alpha[0] = 1.0;
+	alpha[1] = 0.0;
+	alpha_flag = 1;
+	break;
+      }
+
+      /* varying beta */
+      for (beta_val = 0; beta_val < 3; beta_val++) {
+	beta_flag = 0;
+	switch (beta_val) {
+	case 0:
+	  beta[0] = beta[1] = 0.0;
+	  beta_flag = 1;
+	  break;
+	case 1:
+	  beta[0] = 1.0;
+	  beta[1] = 0.0;
+	  beta_flag = 1;
+	  break;
+	}
+
+
+	eps_int = power(2, -BITS_S);
+	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
+		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
+	prec = blas_prec_single;
+
+	/* values near underflow, 1, or overflow */
+	for (norm = -1; norm <= 1; norm++) {
+
+	  /* number of tests */
+	  for (i = 0; i < ntests; i++) {
+
+	    /* row or col major */
+	    for (order_val = 0; order_val < 2; order_val++) {
+	      switch (order_val) {
+	      case 0:
+		order_type = blas_rowmajor;
+		break;
+	      case 1:
+		order_type = blas_colmajor;
+		break;
+	      }
+
+	      /* no_trans, trans, or conj_trans */
+	      for (trans_val = 0; trans_val < 3; trans_val++) {
+		switch (trans_val) {
+		case 0:
+		  trans_type = blas_no_trans;
+		  m_i = m;
+		  n_i = n;
+		  break;
+		case 1:
+		  trans_type = blas_trans;
+		  m_i = n;
+		  n_i = m;
+		  break;
+		case 2:
+		  trans_type = blas_conj_trans;
+		  m_i = n;
+		  n_i = m;
+		  break;
+		}
+
+		/* ku from 0 to n-1 */
+		for (ku = 0; ku < n; ku++) {
+		  if (ku == n && ku != 0)
+		    continue;	/* the purpose of doing this is 
+				   to test for ku=0 */
+
+		  /* kl from 0 to m-1 */
+		  for (kl = 0; kl < m; kl++) {
+		    if (kl == n && kl != 0)
+		      continue;	/* the purpose of doing this is 
+				   to test for kl=0 */
+
+		    /* lda=ku+kl+1, ku+kl+2, 2*(ku+kl+1) */
+		    for (lda_val = 0; lda_val < 3; lda_val++) {
+		      switch (lda_val) {
+		      case 0:
+			lda = ku + kl + 1;
+			break;
+		      case 1:
+			lda = ku + kl + 2;
+			break;
+		      case 2:
+			lda = 2 * (ku + kl + 1);
+			break;
+		      }
+
+		      if ((order_type == blas_rowmajor && lda < n) ||
+			  (order_type == blas_colmajor && lda < m))
+			continue;
+
+		      /* For the sake of speed, we throw out this case at random */
+		      if (xrand(seed) >= test_prob)
+			continue;
+
+		      /* in the trivial cases, no need to run testgen */
+		      if (m > 0 && n > 0)
+			BLAS_cgbmv2_c_s_testgen(norm, order_type, trans_type,
+						m, n, kl, ku, &alpha,
+						alpha_flag, AB, lda,
+						x_head_gen, x_tail_gen, &beta,
+						beta_flag, y_gen, seed,
+						head_r_true, tail_r_true);
+		      count++;
+
+		      /* varying incx */
+		      for (incx_val = -2; incx_val <= 2; incx_val++) {
+			if (incx_val == 0)
+			  continue;
+
+			/* setting incx */
+			incx = incx_val;
+
+
+			scopy_vector(x_head_gen, n_i, 1, x_head, incx_val);
+			scopy_vector(x_tail_gen, n_i, 1, x_tail, incx_val);
+
+			/* varying incy */
+			for (incy_val = -2; incy_val <= 2; incy_val++) {
+			  if (incy_val == 0)
+			    continue;
+
+			  /* setting incy */
+			  incy = incy_val;
+			  incy *= 2;
+
+			  ccopy_vector(y_gen, m_i, 1, y, incy_val);
+
+			  /* call BLAS_cgbmv2_c_s */
+			  FPU_FIX_STOP;
+			  BLAS_cgbmv2_c_s(order_type, trans_type, m, n, kl,
+					  ku, alpha, AB, lda, x_head, x_tail,
+					  incx_val, beta, y, incy_val);
+			  FPU_FIX_START;
+
+			  /* set y starting index */
+			  iy = 0;
+			  if (incy < 0)
+			    iy = -(m_i - 1) * incy;
+
+			  /* computing the ratio */
+			  for (j = 0; j < m_i; j++) {
+			    /* copy row j of AB to temp */
+			    cgbmv_copy(order_type, trans_type, m, n, kl, ku,
+				       AB, lda, temp, j);
+
+			    test_BLAS_cdot2_c_s(n_i, blas_no_conj, alpha,
+						beta, &y_gen[j * incy_gen],
+						&y[iy],
+						&head_r_true[j * incy_gen],
+						&tail_r_true[j * incy_gen],
+						temp, 1, x_head, x_tail,
+						incx_val, eps_int, un_int,
+						&ratios[j]);
+
+			    /* take the max ratio */
+			    if (j == 0) {
+			      ratio = ratios[0];
+			      /* The !<= below causes NaN error to be detected.
+			         Note that (NaN > thresh) is always false. */
+			    } else if (!(ratios[j] <= ratio)) {
+			      ratio = ratios[j];
+			    }
+
+			    iy += incy;
+			  }
+
+			  /* Increase the number of bad ratio, if the ratio
+			     is bigger than the threshold.
+			     The !<= below causes NaN error to be detected.
+			     Note that (NaN > thresh) is always false. */
+			  if (!(ratio <= thresh)) {
+			    bad_ratios++;
+
+			    if ((debug == 3) &&	/* print only when debug is on */
+				(count != old_count) &&	/* print if old vector is different 
+							   from the current one */
+				(d_count == find_max_ratio) &&
+				(p_count <= max_print) &&
+				(ratio > 0.5 * ratio_max)) {
+			      old_count = count;
+			      printf
+				("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
+				 fname, m, n, ntests, thresh);
+
+			      /* Print test info */
+			      switch (prec) {
+			      case blas_prec_single:
+				printf("single ");
+				break;
+			      case blas_prec_double:
+				printf("double ");
+				break;
+			      case blas_prec_indigenous:
+				printf("indigenous ");
+				break;
+			      case blas_prec_extra:
+				printf("extra ");
+				break;
+			      }
+			      switch (norm) {
+			      case -1:
+				printf("near_underflow ");
+				break;
+			      case 0:
+				printf("near_one ");
+				break;
+			      case 1:
+				printf("near_overflow ");
+				break;
+			      }
+			      switch (order_type) {
+			      case blas_rowmajor:
+				printf("row_major ");
+				break;
+			      case blas_colmajor:
+				printf("col_major ");
+				break;
+			      }
+			      switch (trans_type) {
+			      case blas_no_trans:
+				printf("no_trans ");
+				break;
+			      case blas_trans:
+				printf("trans ");
+				break;
+			      case blas_conj_trans:
+				printf("conj_trans ");
+				break;
+			      }
+
+			      printf
+				("ku=%d, kl=%d, lda=%d, incx=%d, incy=%d:\n",
+				 ku, kl, lda, incx, incy);
+
+			      ix = 0;
+			      iy = 0;
+			      if (incx < 0)
+				ix = -(n_i - 1) * incx;
+			      if (incy < 0)
+				iy = -(m_i - 1) * incy;
+
+			      printf("      A=");
+			      for (j = 0; j < m_i; j++) {
+				/* copy row j of A to temp */
+				cgbmv_copy(order_type, trans_type, m, n, kl,
+					   ku, AB, lda, temp, j);
+
+				if (j > 0)
+				  printf("        ");
+				cprint_vector(temp, n_i, 1, NULL);
+			      }
+
+			      for (j = 0, k = 0; j < n_i || k < m_i; j++, k++) {
+				if (j < n_i) {
+				  printf("      ");
+				  printf("%16.8e", x_head[ix]);
+				  printf("\n");
+				}
+				if (j < n_i) {
+				  printf("      ");
+				  printf("%16.8e", x_tail[ix]);
+				  printf("\n");
+				}
+				if (k < m_i) {
+				  printf("      ");
+				  printf("(%16.8e, %16.8e)",
+					 y_gen[k * incy_gen],
+					 y_gen[k * incy_gen + 1]);
+				  printf("\n");
+				  printf("      ");
+				  printf("y_final[%d] = ", iy);
+				  printf("(%16.8e, %16.8e)", y[iy],
+					 y[iy + 1]);
+				  printf("\n");
+				}
+				ix += incx;
+				iy += incy;
+			      }
+
+			      printf("      ");
+			      printf("alpha = ");
+			      printf("(%16.8e, %16.8e)", alpha[0], alpha[1]);
+			      printf("\n      ");
+			      printf("beta = ");
+			      printf("(%16.8e, %16.8e)", beta[0], beta[1]);
+			      printf("\n");
+			      for (j = 0; j < m_i; j++) {
+				printf("      ");
+				printf
+				  ("([%24.16e  %24.16e], [%24.16e %24.16e])",
+				   head_r_true[j * incy_gen],
+				   tail_r_true[j * incy_gen],
+				   head_r_true[j * incy_gen + 1],
+				   tail_r_true[j * incy_gen + 1]);
+				printf(", ratio[%d]=%.4e\n", j, ratios[j]);
+			      }
+
+			      printf("      ratio=%.4e\n", ratio);
+			      p_count++;
+			    }
+			    if (bad_ratios >= MAX_BAD_TESTS) {
+			      printf("\ntoo many failures, exiting....");
+			      printf("\nTesting and compilation");
+			      printf(" are incomplete\n\n");
+			      goto end;
+			    }
+			    if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
+			      printf("\nFlagrant ratio error, exiting...");
+			      printf("\nTesting and compilation");
+			      printf(" are incomplete\n\n");
+			      goto end;
+			    }
+			  }
+			  if (d_count == 0) {
+			    if (ratio > ratio_max)
+			      ratio_max = ratio;
+
+			    if (ratio != 0.0 && ratio < ratio_min)
+			      ratio_min = ratio;
+			    tot_tests++;
+			  }
+			}	/* incy */
+		      }		/* incx */
+		    }		/* lda */
+		  }		/* kl */
+		}		/* ku */
+	      }			/* trans */
+	    }			/* order */
+	  }			/* tests */
+	}			/* norm */
+
+      }				/* beta */
+    }				/* alpha */
+  }				/* debug */
+
+  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
+    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n", fname,
+	   m, n, ntests, thresh);
+    printf
+      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
+       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
+       ratio_min, ratio_max);
+  }
+
+end:
+  blas_free(x_head);
+  blas_free(x_tail);
+  blas_free(y);
+  blas_free(x_head_gen);
+  blas_free(x_tail_gen);
+  blas_free(y_gen);
+  blas_free(temp);
+  blas_free(AB);
+  blas_free(head_r_true);
+  blas_free(tail_r_true);
+  blas_free(ratios);
+
+  FPU_FIX_STOP;
+
+  *min_ratio = ratio_min;
+  *num_bad_ratio = bad_ratios;
+  *num_tests = tot_tests;
+  return ratio_max;
+}
+double do_test_cgbmv2_s_c(int m, int n, int ntests, int *seed, double thresh,
+			  int debug, float test_prob, double *min_ratio,
+			  int *num_bad_ratio, int *num_tests)
+
+/*
+ * Purpose  
+ * =======
+ *
+ * Runs a series of tests on GBMV2.
+ *
+ * Arguments
+ * =========
+ *  
+ * m         (input) int
+ *           The number of rows
+ *
+ * n         (input) int
+ *           The number of columns
+ *
+ * ntests    (input) int
+ *           The number of tests to run for each set of attributes.
+ *
+ * seed      (input/output) int         
+ *           The seed for the random number generator used in testgen().
+ *
+ * thresh    (input) double
+ *           When the ratio returned from test() exceeds the specified
+ *           threshold, the current size, r_true, r_comp, and ratio will be
+ *           printed.  (Since ratio is supposed to be O(1), we can set thresh
+ *           to ~10.)
+ *
+ * debug     (input) int
+ *           If debug=3, print summary 
+ *           If debug=2, print summary only if the number of bad ratios > 0
+ *           If debug=1, print complete info if tests fail
+ *           If debug=0, return max ratio
+ *
+ * test_prob (input) float
+ *           The specified test will be performed only if the generated 
+ *           random exceeds this threshold.
+ *
+ * min_ratio (output) double
+ *           The minimum ratio
+ * 
+ * num_bad_ratio (output) int
+ *               The number of tests fail; they are above the threshold.
+ *
+ * num_tests (output) int
+ *           The number of tests is being performed.
+ *
+ * Return value
+ * ============
+ *
+ * The maximum ratio if run successfully, otherwise return -1 
+ *
+ * Code structure
+ * ==============
+ * 
+ *  debug loop  -- if debug is one, the first loop computes the max ratio
+ *              -- and the last(second) loop outputs debugging information,
+ *              -- if the test fail and its ratio > 0.5 * max ratio.
+ *              -- if debug is zero, the loop is executed once
+ *    alpha loop  -- varying alpha: 0, 1, or random
+ *      beta loop   -- varying beta: 0, 1, or random
+
+ *          norm loop   -- varying norm: near undeflow, near one, or 
+ *                        -- near overflow
+ *            numtest loop  -- how many times the test is perform with 
+ *                            -- above set of attributes
+ *              order loop   -- varying order type: rowmajor or colmajor
+ *                trans loop    -- varying trans type: no trans, trans, or conj trans
+ *                  ku loop       -- varying ku: 0 to n-1
+ *                    kl loop       -- varying kl: 0 to m-1
+ *                      lda loop      -- varying lda: ku+kl+1, ku+kl+2, 2*(ku+kl+1) 
+ *                        incx loop     -- varying incx: -2, -1, 1, 2
+ *                          incy loop     -- varying incy: -2, -1, 1, 2
+ */
+{
+  /* function name */
+  const char fname[] = "BLAS_cgbmv2_s_c";
+
+  /* max number of debug lines to print */
+  const int max_print = 8;
+
+  /* Variables in the "x_val" form are loop vars for corresponding
+     variables */
+  int i;			/* iterate through the repeating tests */
+  int j;
+  int k;			/* multipurpose counters or variables */
+  int ix, iy;			/* use to index x and y respectively */
+  int incx_val, incy_val,	/* for testing different inc values */
+    incx, incy;
+  int incx_gen, incy_gen;	/* for complex case inc=2, for real case inc=1 */
+  int d_count;			/* counter for debug */
+  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
+  int p_count;			/* counter for the number of debug lines printed */
+  int tot_tests;		/* total number of tests to be done */
+  int norm;			/* input values of near underflow/one/overflow */
+  double ratio_max;		/* the current maximum ratio */
+  double ratio_min;		/* the current minimum ratio */
+  double *ratios;		/* a temporary variable for calculating ratio */
+  double ratio;			/* the per-use test ratio from test() */
+  int bad_ratios = 0;		/* the number of ratios over the threshold */
+  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
+  double un_int;		/* the internal underflow threshold */
+  float alpha[2];
+  float beta[2];
+  float *AB;
+  float *x_head;
+  float *x_tail;
+  float *y;
+  float *temp;			/* use for calculating ratio */
+
+  /* x_gen and y_gen are used to store vectors generated by testgen.
+     they eventually are copied back to x and y */
+  float *x_head_gen;
+  float *x_tail_gen;
+  float *y_gen;
+
+  /* the true r calculated by testgen(), in double-double */
+  double *head_r_true, *tail_r_true;
+
+  int alpha_val;
+  int alpha_flag = 0;		/* input flag for BLAS_cgbmv2_s_c_testgen */
+  int beta_val;
+  int beta_flag = 0;		/* input flag for BLAS_cgbmv2_s_c_testgen */
+  int order_val;
+  enum blas_order_type order_type = 0;
+
+  enum blas_prec_type prec = 0;
+  int trans_val;
+  enum blas_trans_type trans_type = 0;
+  int m_i = 0;
+  int n_i = 0;
+  int max_mn;			/* the max of m and n */
+  int ku;
+  int kl;
+  int lda_val;
+  int lda = 0;
+  int saved_seed;		/* for saving the original seed */
+
+  /* use for counting the number of testgen calls * 2 */
+  int count, old_count = -1;
+
+  FPU_FIX_DECL;
+
+  /* test for bad arguments */
+  if (n < 0 || m < 0 || ntests < 0)
+    BLAS_error(fname, 0, 0, NULL);
+
+  /* initialization */
+  *min_ratio = 0.0;
+  *num_bad_ratio = 0;
+  *num_tests = 0;
+
+  saved_seed = *seed;
+  ratio_min = 1e308;
+  ratio_max = 0.0;
+  ratio = 0.0;
+  tot_tests = 0;
+  p_count = 0;
+  count = 0;
+  find_max_ratio = 0;
+  if (debug == 3)
+    find_max_ratio = 1;
+  max_mn = MAX(m, n);
+
+  if (m == 0 || n == 0) {
+    return 0.0;
+  }
+
+  FPU_FIX_START;
+
+  incx_gen = incy_gen = 1;
+  incx_gen *= 2;
+  incy_gen *= 2;
+
+  /* get space for calculation */
+  x_head = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
+  if (max_mn * 2 > 0 && x_head == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
+  if (max_mn * 2 > 0 && x_tail == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
+  if (max_mn * 2 > 0 && y == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_head_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
+  if (max_mn > 0 && x_head_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
+  if (max_mn > 0 && x_tail_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
+  if (max_mn > 0 && y_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  temp = (float *) blas_malloc(max_mn * sizeof(float));
+  if (max_mn > 0 && temp == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  ratios = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && ratios == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  AB =
+    (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float));
+  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && AB == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  /* The debug iteration:
+     If debug=1, then will execute the iteration twice. First, compute the
+     max ratio. Second, print info if ratio > (50% * ratio_max). */
+  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
+    bad_ratios = 0;		/* set to zero */
+
+    if ((debug == 3) && (d_count == find_max_ratio))
+      *seed = saved_seed;	/* restore the original seed */
+
+    /* varying alpha */
+    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
+      alpha_flag = 0;
+      switch (alpha_val) {
+      case 0:
+	alpha[0] = alpha[1] = 0.0;
+	alpha_flag = 1;
+	break;
+      case 1:
+	alpha[0] = 1.0;
+	alpha[1] = 0.0;
+	alpha_flag = 1;
+	break;
+      }
+
+      /* varying beta */
+      for (beta_val = 0; beta_val < 3; beta_val++) {
+	beta_flag = 0;
+	switch (beta_val) {
+	case 0:
+	  beta[0] = beta[1] = 0.0;
+	  beta_flag = 1;
+	  break;
+	case 1:
+	  beta[0] = 1.0;
+	  beta[1] = 0.0;
+	  beta_flag = 1;
+	  break;
+	}
+
+
+	eps_int = power(2, -BITS_S);
+	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
+		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
+	prec = blas_prec_single;
+
+	/* values near underflow, 1, or overflow */
+	for (norm = -1; norm <= 1; norm++) {
+
+	  /* number of tests */
+	  for (i = 0; i < ntests; i++) {
+
+	    /* row or col major */
+	    for (order_val = 0; order_val < 2; order_val++) {
+	      switch (order_val) {
+	      case 0:
+		order_type = blas_rowmajor;
+		break;
+	      case 1:
+		order_type = blas_colmajor;
+		break;
+	      }
+
+	      /* no_trans, trans, or conj_trans */
+	      for (trans_val = 0; trans_val < 3; trans_val++) {
+		switch (trans_val) {
+		case 0:
+		  trans_type = blas_no_trans;
+		  m_i = m;
+		  n_i = n;
+		  break;
+		case 1:
+		  trans_type = blas_trans;
+		  m_i = n;
+		  n_i = m;
+		  break;
+		case 2:
+		  trans_type = blas_conj_trans;
+		  m_i = n;
+		  n_i = m;
+		  break;
+		}
+
+		/* ku from 0 to n-1 */
+		for (ku = 0; ku < n; ku++) {
+		  if (ku == n && ku != 0)
+		    continue;	/* the purpose of doing this is 
+				   to test for ku=0 */
+
+		  /* kl from 0 to m-1 */
+		  for (kl = 0; kl < m; kl++) {
+		    if (kl == n && kl != 0)
+		      continue;	/* the purpose of doing this is 
+				   to test for kl=0 */
+
+		    /* lda=ku+kl+1, ku+kl+2, 2*(ku+kl+1) */
+		    for (lda_val = 0; lda_val < 3; lda_val++) {
+		      switch (lda_val) {
+		      case 0:
+			lda = ku + kl + 1;
+			break;
+		      case 1:
+			lda = ku + kl + 2;
+			break;
+		      case 2:
+			lda = 2 * (ku + kl + 1);
+			break;
+		      }
+
+		      if ((order_type == blas_rowmajor && lda < n) ||
+			  (order_type == blas_colmajor && lda < m))
+			continue;
+
+		      /* For the sake of speed, we throw out this case at random */
+		      if (xrand(seed) >= test_prob)
+			continue;
+
+		      /* in the trivial cases, no need to run testgen */
+		      if (m > 0 && n > 0)
+			BLAS_cgbmv2_s_c_testgen(norm, order_type, trans_type,
+						m, n, kl, ku, &alpha,
+						alpha_flag, AB, lda,
+						x_head_gen, x_tail_gen, &beta,
+						beta_flag, y_gen, seed,
+						head_r_true, tail_r_true);
+		      count++;
+
+		      /* varying incx */
+		      for (incx_val = -2; incx_val <= 2; incx_val++) {
+			if (incx_val == 0)
+			  continue;
+
+			/* setting incx */
+			incx = incx_val;
+			incx *= 2;
+
+			ccopy_vector(x_head_gen, n_i, 1, x_head, incx_val);
+			ccopy_vector(x_tail_gen, n_i, 1, x_tail, incx_val);
+
+			/* varying incy */
+			for (incy_val = -2; incy_val <= 2; incy_val++) {
+			  if (incy_val == 0)
+			    continue;
+
+			  /* setting incy */
+			  incy = incy_val;
+			  incy *= 2;
+
+			  ccopy_vector(y_gen, m_i, 1, y, incy_val);
+
+			  /* call BLAS_cgbmv2_s_c */
+			  FPU_FIX_STOP;
+			  BLAS_cgbmv2_s_c(order_type, trans_type, m, n, kl,
+					  ku, alpha, AB, lda, x_head, x_tail,
+					  incx_val, beta, y, incy_val);
+			  FPU_FIX_START;
+
+			  /* set y starting index */
+			  iy = 0;
+			  if (incy < 0)
+			    iy = -(m_i - 1) * incy;
+
+			  /* computing the ratio */
+			  for (j = 0; j < m_i; j++) {
+			    /* copy row j of AB to temp */
+			    sgbmv_copy(order_type, trans_type, m, n, kl, ku,
+				       AB, lda, temp, j);
+
+			    test_BLAS_cdot2_s_c(n_i, blas_no_conj, alpha,
+						beta, &y_gen[j * incy_gen],
+						&y[iy],
+						&head_r_true[j * incy_gen],
+						&tail_r_true[j * incy_gen],
+						temp, 1, x_head, x_tail,
+						incx_val, eps_int, un_int,
+						&ratios[j]);
+
+			    /* take the max ratio */
+			    if (j == 0) {
+			      ratio = ratios[0];
+			      /* The !<= below causes NaN error to be detected.
+			         Note that (NaN > thresh) is always false. */
+			    } else if (!(ratios[j] <= ratio)) {
+			      ratio = ratios[j];
+			    }
+
+			    iy += incy;
+			  }
+
+			  /* Increase the number of bad ratio, if the ratio
+			     is bigger than the threshold.
+			     The !<= below causes NaN error to be detected.
+			     Note that (NaN > thresh) is always false. */
+			  if (!(ratio <= thresh)) {
+			    bad_ratios++;
+
+			    if ((debug == 3) &&	/* print only when debug is on */
+				(count != old_count) &&	/* print if old vector is different 
+							   from the current one */
+				(d_count == find_max_ratio) &&
+				(p_count <= max_print) &&
+				(ratio > 0.5 * ratio_max)) {
+			      old_count = count;
+			      printf
+				("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
+				 fname, m, n, ntests, thresh);
+
+			      /* Print test info */
+			      switch (prec) {
+			      case blas_prec_single:
+				printf("single ");
+				break;
+			      case blas_prec_double:
+				printf("double ");
+				break;
+			      case blas_prec_indigenous:
+				printf("indigenous ");
+				break;
+			      case blas_prec_extra:
+				printf("extra ");
+				break;
+			      }
+			      switch (norm) {
+			      case -1:
+				printf("near_underflow ");
+				break;
+			      case 0:
+				printf("near_one ");
+				break;
+			      case 1:
+				printf("near_overflow ");
+				break;
+			      }
+			      switch (order_type) {
+			      case blas_rowmajor:
+				printf("row_major ");
+				break;
+			      case blas_colmajor:
+				printf("col_major ");
+				break;
+			      }
+			      switch (trans_type) {
+			      case blas_no_trans:
+				printf("no_trans ");
+				break;
+			      case blas_trans:
+				printf("trans ");
+				break;
+			      case blas_conj_trans:
+				printf("conj_trans ");
+				break;
+			      }
+
+			      printf
+				("ku=%d, kl=%d, lda=%d, incx=%d, incy=%d:\n",
+				 ku, kl, lda, incx, incy);
+
+			      ix = 0;
+			      iy = 0;
+			      if (incx < 0)
+				ix = -(n_i - 1) * incx;
+			      if (incy < 0)
+				iy = -(m_i - 1) * incy;
+
+			      printf("      A=");
+			      for (j = 0; j < m_i; j++) {
+				/* copy row j of A to temp */
+				sgbmv_copy(order_type, trans_type, m, n, kl,
+					   ku, AB, lda, temp, j);
+
+				if (j > 0)
+				  printf("        ");
+				sprint_vector(temp, n_i, 1, NULL);
+			      }
+
+			      for (j = 0, k = 0; j < n_i || k < m_i; j++, k++) {
+				if (j < n_i) {
+				  printf("      ");
+				  printf("(%16.8e, %16.8e)", x_head[ix],
+					 x_head[ix + 1]);
+				  printf("\n");
+				}
+				if (j < n_i) {
+				  printf("      ");
+				  printf("(%16.8e, %16.8e)", x_tail[ix],
+					 x_tail[ix + 1]);
+				  printf("\n");
+				}
+				if (k < m_i) {
+				  printf("      ");
+				  printf("(%16.8e, %16.8e)",
+					 y_gen[k * incy_gen],
+					 y_gen[k * incy_gen + 1]);
+				  printf("\n");
+				  printf("      ");
+				  printf("y_final[%d] = ", iy);
+				  printf("(%16.8e, %16.8e)", y[iy],
+					 y[iy + 1]);
+				  printf("\n");
+				}
+				ix += incx;
+				iy += incy;
+			      }
+
+			      printf("      ");
+			      printf("alpha = ");
+			      printf("(%16.8e, %16.8e)", alpha[0], alpha[1]);
+			      printf("\n      ");
+			      printf("beta = ");
+			      printf("(%16.8e, %16.8e)", beta[0], beta[1]);
+			      printf("\n");
+			      for (j = 0; j < m_i; j++) {
+				printf("      ");
+				printf
+				  ("([%24.16e  %24.16e], [%24.16e %24.16e])",
+				   head_r_true[j * incy_gen],
+				   tail_r_true[j * incy_gen],
+				   head_r_true[j * incy_gen + 1],
+				   tail_r_true[j * incy_gen + 1]);
+				printf(", ratio[%d]=%.4e\n", j, ratios[j]);
+			      }
+
+			      printf("      ratio=%.4e\n", ratio);
+			      p_count++;
+			    }
+			    if (bad_ratios >= MAX_BAD_TESTS) {
+			      printf("\ntoo many failures, exiting....");
+			      printf("\nTesting and compilation");
+			      printf(" are incomplete\n\n");
+			      goto end;
+			    }
+			    if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
+			      printf("\nFlagrant ratio error, exiting...");
+			      printf("\nTesting and compilation");
+			      printf(" are incomplete\n\n");
+			      goto end;
+			    }
+			  }
+			  if (d_count == 0) {
+			    if (ratio > ratio_max)
+			      ratio_max = ratio;
+
+			    if (ratio != 0.0 && ratio < ratio_min)
+			      ratio_min = ratio;
+			    tot_tests++;
+			  }
+			}	/* incy */
+		      }		/* incx */
+		    }		/* lda */
+		  }		/* kl */
+		}		/* ku */
+	      }			/* trans */
+	    }			/* order */
+	  }			/* tests */
+	}			/* norm */
+
+      }				/* beta */
+    }				/* alpha */
+  }				/* debug */
+
+  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
+    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n", fname,
+	   m, n, ntests, thresh);
+    printf
+      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
+       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
+       ratio_min, ratio_max);
+  }
+
+end:
+  blas_free(x_head);
+  blas_free(x_tail);
+  blas_free(y);
+  blas_free(x_head_gen);
+  blas_free(x_tail_gen);
+  blas_free(y_gen);
+  blas_free(temp);
+  blas_free(AB);
+  blas_free(head_r_true);
+  blas_free(tail_r_true);
+  blas_free(ratios);
+
+  FPU_FIX_STOP;
+
+  *min_ratio = ratio_min;
+  *num_bad_ratio = bad_ratios;
+  *num_tests = tot_tests;
+  return ratio_max;
+}
+double do_test_cgbmv2_s_s(int m, int n, int ntests, int *seed, double thresh,
+			  int debug, float test_prob, double *min_ratio,
+			  int *num_bad_ratio, int *num_tests)
+
+/*
+ * Purpose  
+ * =======
+ *
+ * Runs a series of tests on GBMV2.
+ *
+ * Arguments
+ * =========
+ *  
+ * m         (input) int
+ *           The number of rows
+ *
+ * n         (input) int
+ *           The number of columns
+ *
+ * ntests    (input) int
+ *           The number of tests to run for each set of attributes.
+ *
+ * seed      (input/output) int         
+ *           The seed for the random number generator used in testgen().
+ *
+ * thresh    (input) double
+ *           When the ratio returned from test() exceeds the specified
+ *           threshold, the current size, r_true, r_comp, and ratio will be
+ *           printed.  (Since ratio is supposed to be O(1), we can set thresh
+ *           to ~10.)
+ *
+ * debug     (input) int
+ *           If debug=3, print summary 
+ *           If debug=2, print summary only if the number of bad ratios > 0
+ *           If debug=1, print complete info if tests fail
+ *           If debug=0, return max ratio
+ *
+ * test_prob (input) float
+ *           The specified test will be performed only if the generated 
+ *           random exceeds this threshold.
+ *
+ * min_ratio (output) double
+ *           The minimum ratio
+ * 
+ * num_bad_ratio (output) int
+ *               The number of tests fail; they are above the threshold.
+ *
+ * num_tests (output) int
+ *           The number of tests is being performed.
+ *
+ * Return value
+ * ============
+ *
+ * The maximum ratio if run successfully, otherwise return -1 
+ *
+ * Code structure
+ * ==============
+ * 
+ *  debug loop  -- if debug is one, the first loop computes the max ratio
+ *              -- and the last(second) loop outputs debugging information,
+ *              -- if the test fail and its ratio > 0.5 * max ratio.
+ *              -- if debug is zero, the loop is executed once
+ *    alpha loop  -- varying alpha: 0, 1, or random
+ *      beta loop   -- varying beta: 0, 1, or random
+
+ *          norm loop   -- varying norm: near undeflow, near one, or 
+ *                        -- near overflow
+ *            numtest loop  -- how many times the test is perform with 
+ *                            -- above set of attributes
+ *              order loop   -- varying order type: rowmajor or colmajor
+ *                trans loop    -- varying trans type: no trans, trans, or conj trans
+ *                  ku loop       -- varying ku: 0 to n-1
+ *                    kl loop       -- varying kl: 0 to m-1
+ *                      lda loop      -- varying lda: ku+kl+1, ku+kl+2, 2*(ku+kl+1) 
+ *                        incx loop     -- varying incx: -2, -1, 1, 2
+ *                          incy loop     -- varying incy: -2, -1, 1, 2
+ */
+{
+  /* function name */
+  const char fname[] = "BLAS_cgbmv2_s_s";
+
+  /* max number of debug lines to print */
+  const int max_print = 8;
+
+  /* Variables in the "x_val" form are loop vars for corresponding
+     variables */
+  int i;			/* iterate through the repeating tests */
+  int j;
+  int k;			/* multipurpose counters or variables */
+  int ix, iy;			/* use to index x and y respectively */
+  int incx_val, incy_val,	/* for testing different inc values */
+    incx, incy;
+  int incx_gen, incy_gen;	/* for complex case inc=2, for real case inc=1 */
+  int d_count;			/* counter for debug */
+  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
+  int p_count;			/* counter for the number of debug lines printed */
+  int tot_tests;		/* total number of tests to be done */
+  int norm;			/* input values of near underflow/one/overflow */
+  double ratio_max;		/* the current maximum ratio */
+  double ratio_min;		/* the current minimum ratio */
+  double *ratios;		/* a temporary variable for calculating ratio */
+  double ratio;			/* the per-use test ratio from test() */
+  int bad_ratios = 0;		/* the number of ratios over the threshold */
+  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
+  double un_int;		/* the internal underflow threshold */
+  float alpha[2];
+  float beta[2];
+  float *AB;
+  float *x_head;
+  float *x_tail;
+  float *y;
+  float *temp;			/* use for calculating ratio */
+
+  /* x_gen and y_gen are used to store vectors generated by testgen.
+     they eventually are copied back to x and y */
+  float *x_head_gen;
+  float *x_tail_gen;
+  float *y_gen;
+
+  /* the true r calculated by testgen(), in double-double */
+  double *head_r_true, *tail_r_true;
+
+  int alpha_val;
+  int alpha_flag = 0;		/* input flag for BLAS_cgbmv2_s_s_testgen */
+  int beta_val;
+  int beta_flag = 0;		/* input flag for BLAS_cgbmv2_s_s_testgen */
+  int order_val;
+  enum blas_order_type order_type = 0;
+
+  enum blas_prec_type prec = 0;
+  int trans_val;
+  enum blas_trans_type trans_type = 0;
+  int m_i = 0;
+  int n_i = 0;
+  int max_mn;			/* the max of m and n */
+  int ku;
+  int kl;
+  int lda_val;
+  int lda = 0;
+  int saved_seed;		/* for saving the original seed */
+
+  /* use for counting the number of testgen calls * 2 */
+  int count, old_count = -1;
+
+  FPU_FIX_DECL;
+
+  /* test for bad arguments */
+  if (n < 0 || m < 0 || ntests < 0)
+    BLAS_error(fname, 0, 0, NULL);
+
+  /* initialization */
+  *min_ratio = 0.0;
+  *num_bad_ratio = 0;
+  *num_tests = 0;
+
+  saved_seed = *seed;
+  ratio_min = 1e308;
+  ratio_max = 0.0;
+  ratio = 0.0;
+  tot_tests = 0;
+  p_count = 0;
+  count = 0;
+  find_max_ratio = 0;
+  if (debug == 3)
+    find_max_ratio = 1;
+  max_mn = MAX(m, n);
+
+  if (m == 0 || n == 0) {
+    return 0.0;
+  }
+
+  FPU_FIX_START;
+
+  incx_gen = incy_gen = 1;
+
+  incy_gen *= 2;
+
+  /* get space for calculation */
+  x_head = (float *) blas_malloc(max_mn * 2 * sizeof(float));
+  if (max_mn * 2 > 0 && x_head == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail = (float *) blas_malloc(max_mn * 2 * sizeof(float));
+  if (max_mn * 2 > 0 && x_tail == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
+  if (max_mn * 2 > 0 && y == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_head_gen = (float *) blas_malloc(max_mn * sizeof(float));
+  if (max_mn > 0 && x_head_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail_gen = (float *) blas_malloc(max_mn * sizeof(float));
+  if (max_mn > 0 && x_tail_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
+  if (max_mn > 0 && y_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  temp = (float *) blas_malloc(max_mn * sizeof(float));
+  if (max_mn > 0 && temp == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  ratios = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && ratios == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  AB =
+    (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float));
+  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && AB == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  /* The debug iteration:
+     If debug=1, then will execute the iteration twice. First, compute the
+     max ratio. Second, print info if ratio > (50% * ratio_max). */
+  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
+    bad_ratios = 0;		/* set to zero */
+
+    if ((debug == 3) && (d_count == find_max_ratio))
+      *seed = saved_seed;	/* restore the original seed */
+
+    /* varying alpha */
+    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
+      alpha_flag = 0;
+      switch (alpha_val) {
+      case 0:
+	alpha[0] = alpha[1] = 0.0;
+	alpha_flag = 1;
+	break;
+      case 1:
+	alpha[0] = 1.0;
+	alpha[1] = 0.0;
+	alpha_flag = 1;
+	break;
+      }
+
+      /* varying beta */
+      for (beta_val = 0; beta_val < 3; beta_val++) {
+	beta_flag = 0;
+	switch (beta_val) {
+	case 0:
+	  beta[0] = beta[1] = 0.0;
+	  beta_flag = 1;
+	  break;
+	case 1:
+	  beta[0] = 1.0;
+	  beta[1] = 0.0;
+	  beta_flag = 1;
+	  break;
+	}
+
+
+	eps_int = power(2, -BITS_S);
+	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
+		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
+	prec = blas_prec_single;
+
+	/* values near underflow, 1, or overflow */
+	for (norm = -1; norm <= 1; norm++) {
+
+	  /* number of tests */
+	  for (i = 0; i < ntests; i++) {
+
+	    /* row or col major */
+	    for (order_val = 0; order_val < 2; order_val++) {
+	      switch (order_val) {
+	      case 0:
+		order_type = blas_rowmajor;
+		break;
+	      case 1:
+		order_type = blas_colmajor;
+		break;
+	      }
+
+	      /* no_trans, trans, or conj_trans */
+	      for (trans_val = 0; trans_val < 3; trans_val++) {
+		switch (trans_val) {
+		case 0:
+		  trans_type = blas_no_trans;
+		  m_i = m;
+		  n_i = n;
+		  break;
+		case 1:
+		  trans_type = blas_trans;
+		  m_i = n;
+		  n_i = m;
+		  break;
+		case 2:
+		  trans_type = blas_conj_trans;
+		  m_i = n;
+		  n_i = m;
+		  break;
+		}
+
+		/* ku from 0 to n-1 */
+		for (ku = 0; ku < n; ku++) {
+		  if (ku == n && ku != 0)
+		    continue;	/* the purpose of doing this is 
+				   to test for ku=0 */
+
+		  /* kl from 0 to m-1 */
+		  for (kl = 0; kl < m; kl++) {
+		    if (kl == n && kl != 0)
+		      continue;	/* the purpose of doing this is 
+				   to test for kl=0 */
+
+		    /* lda=ku+kl+1, ku+kl+2, 2*(ku+kl+1) */
+		    for (lda_val = 0; lda_val < 3; lda_val++) {
+		      switch (lda_val) {
+		      case 0:
+			lda = ku + kl + 1;
+			break;
+		      case 1:
+			lda = ku + kl + 2;
+			break;
+		      case 2:
+			lda = 2 * (ku + kl + 1);
+			break;
+		      }
+
+		      if ((order_type == blas_rowmajor && lda < n) ||
+			  (order_type == blas_colmajor && lda < m))
+			continue;
+
+		      /* For the sake of speed, we throw out this case at random */
+		      if (xrand(seed) >= test_prob)
+			continue;
+
+		      /* in the trivial cases, no need to run testgen */
+		      if (m > 0 && n > 0)
+			BLAS_cgbmv2_s_s_testgen(norm, order_type, trans_type,
+						m, n, kl, ku, &alpha,
+						alpha_flag, AB, lda,
+						x_head_gen, x_tail_gen, &beta,
+						beta_flag, y_gen, seed,
+						head_r_true, tail_r_true);
+		      count++;
+
+		      /* varying incx */
+		      for (incx_val = -2; incx_val <= 2; incx_val++) {
+			if (incx_val == 0)
+			  continue;
+
+			/* setting incx */
+			incx = incx_val;
+
+
+			scopy_vector(x_head_gen, n_i, 1, x_head, incx_val);
+			scopy_vector(x_tail_gen, n_i, 1, x_tail, incx_val);
+
+			/* varying incy */
+			for (incy_val = -2; incy_val <= 2; incy_val++) {
+			  if (incy_val == 0)
+			    continue;
+
+			  /* setting incy */
+			  incy = incy_val;
+			  incy *= 2;
+
+			  ccopy_vector(y_gen, m_i, 1, y, incy_val);
+
+			  /* call BLAS_cgbmv2_s_s */
+			  FPU_FIX_STOP;
+			  BLAS_cgbmv2_s_s(order_type, trans_type, m, n, kl,
+					  ku, alpha, AB, lda, x_head, x_tail,
+					  incx_val, beta, y, incy_val);
+			  FPU_FIX_START;
+
+			  /* set y starting index */
+			  iy = 0;
+			  if (incy < 0)
+			    iy = -(m_i - 1) * incy;
+
+			  /* computing the ratio */
+			  for (j = 0; j < m_i; j++) {
+			    /* copy row j of AB to temp */
+			    sgbmv_copy(order_type, trans_type, m, n, kl, ku,
+				       AB, lda, temp, j);
+
+			    test_BLAS_cdot2_s_s(n_i, blas_no_conj, alpha,
+						beta, &y_gen[j * incy_gen],
+						&y[iy],
+						&head_r_true[j * incy_gen],
+						&tail_r_true[j * incy_gen],
+						temp, 1, x_head, x_tail,
+						incx_val, eps_int, un_int,
+						&ratios[j]);
+
+			    /* take the max ratio */
+			    if (j == 0) {
+			      ratio = ratios[0];
+			      /* The !<= below causes NaN error to be detected.
+			         Note that (NaN > thresh) is always false. */
+			    } else if (!(ratios[j] <= ratio)) {
+			      ratio = ratios[j];
+			    }
+
+			    iy += incy;
+			  }
+
+			  /* Increase the number of bad ratio, if the ratio
+			     is bigger than the threshold.
+			     The !<= below causes NaN error to be detected.
+			     Note that (NaN > thresh) is always false. */
+			  if (!(ratio <= thresh)) {
+			    bad_ratios++;
+
+			    if ((debug == 3) &&	/* print only when debug is on */
+				(count != old_count) &&	/* print if old vector is different 
+							   from the current one */
+				(d_count == find_max_ratio) &&
+				(p_count <= max_print) &&
+				(ratio > 0.5 * ratio_max)) {
+			      old_count = count;
+			      printf
+				("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
+				 fname, m, n, ntests, thresh);
+
+			      /* Print test info */
+			      switch (prec) {
+			      case blas_prec_single:
+				printf("single ");
+				break;
+			      case blas_prec_double:
+				printf("double ");
+				break;
+			      case blas_prec_indigenous:
+				printf("indigenous ");
+				break;
+			      case blas_prec_extra:
+				printf("extra ");
+				break;
+			      }
+			      switch (norm) {
+			      case -1:
+				printf("near_underflow ");
+				break;
+			      case 0:
+				printf("near_one ");
+				break;
+			      case 1:
+				printf("near_overflow ");
+				break;
+			      }
+			      switch (order_type) {
+			      case blas_rowmajor:
+				printf("row_major ");
+				break;
+			      case blas_colmajor:
+				printf("col_major ");
+				break;
+			      }
+			      switch (trans_type) {
+			      case blas_no_trans:
+				printf("no_trans ");
+				break;
+			      case blas_trans:
+				printf("trans ");
+				break;
+			      case blas_conj_trans:
+				printf("conj_trans ");
+				break;
+			      }
+
+			      printf
+				("ku=%d, kl=%d, lda=%d, incx=%d, incy=%d:\n",
+				 ku, kl, lda, incx, incy);
+
+			      ix = 0;
+			      iy = 0;
+			      if (incx < 0)
+				ix = -(n_i - 1) * incx;
+			      if (incy < 0)
+				iy = -(m_i - 1) * incy;
+
+			      printf("      A=");
+			      for (j = 0; j < m_i; j++) {
+				/* copy row j of A to temp */
+				sgbmv_copy(order_type, trans_type, m, n, kl,
+					   ku, AB, lda, temp, j);
+
+				if (j > 0)
+				  printf("        ");
+				sprint_vector(temp, n_i, 1, NULL);
+			      }
+
+			      for (j = 0, k = 0; j < n_i || k < m_i; j++, k++) {
+				if (j < n_i) {
+				  printf("      ");
+				  printf("%16.8e", x_head[ix]);
+				  printf("\n");
+				}
+				if (j < n_i) {
+				  printf("      ");
+				  printf("%16.8e", x_tail[ix]);
+				  printf("\n");
+				}
+				if (k < m_i) {
+				  printf("      ");
+				  printf("(%16.8e, %16.8e)",
+					 y_gen[k * incy_gen],
+					 y_gen[k * incy_gen + 1]);
+				  printf("\n");
+				  printf("      ");
+				  printf("y_final[%d] = ", iy);
+				  printf("(%16.8e, %16.8e)", y[iy],
+					 y[iy + 1]);
+				  printf("\n");
+				}
+				ix += incx;
+				iy += incy;
+			      }
+
+			      printf("      ");
+			      printf("alpha = ");
+			      printf("(%16.8e, %16.8e)", alpha[0], alpha[1]);
+			      printf("\n      ");
+			      printf("beta = ");
+			      printf("(%16.8e, %16.8e)", beta[0], beta[1]);
+			      printf("\n");
+			      for (j = 0; j < m_i; j++) {
+				printf("      ");
+				printf
+				  ("([%24.16e  %24.16e], [%24.16e %24.16e])",
+				   head_r_true[j * incy_gen],
+				   tail_r_true[j * incy_gen],
+				   head_r_true[j * incy_gen + 1],
+				   tail_r_true[j * incy_gen + 1]);
+				printf(", ratio[%d]=%.4e\n", j, ratios[j]);
+			      }
+
+			      printf("      ratio=%.4e\n", ratio);
+			      p_count++;
+			    }
+			    if (bad_ratios >= MAX_BAD_TESTS) {
+			      printf("\ntoo many failures, exiting....");
+			      printf("\nTesting and compilation");
+			      printf(" are incomplete\n\n");
+			      goto end;
+			    }
+			    if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
+			      printf("\nFlagrant ratio error, exiting...");
+			      printf("\nTesting and compilation");
+			      printf(" are incomplete\n\n");
+			      goto end;
+			    }
+			  }
+			  if (d_count == 0) {
+			    if (ratio > ratio_max)
+			      ratio_max = ratio;
+
+			    if (ratio != 0.0 && ratio < ratio_min)
+			      ratio_min = ratio;
+			    tot_tests++;
+			  }
+			}	/* incy */
+		      }		/* incx */
+		    }		/* lda */
+		  }		/* kl */
+		}		/* ku */
+	      }			/* trans */
+	    }			/* order */
+	  }			/* tests */
+	}			/* norm */
+
+      }				/* beta */
+    }				/* alpha */
+  }				/* debug */
+
+  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
+    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n", fname,
+	   m, n, ntests, thresh);
+    printf
+      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
+       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
+       ratio_min, ratio_max);
+  }
+
+end:
+  blas_free(x_head);
+  blas_free(x_tail);
+  blas_free(y);
+  blas_free(x_head_gen);
+  blas_free(x_tail_gen);
+  blas_free(y_gen);
+  blas_free(temp);
+  blas_free(AB);
+  blas_free(head_r_true);
+  blas_free(tail_r_true);
+  blas_free(ratios);
+
+  FPU_FIX_STOP;
+
+  *min_ratio = ratio_min;
+  *num_bad_ratio = bad_ratios;
+  *num_tests = tot_tests;
+  return ratio_max;
+}
+double do_test_zgbmv2_z_d(int m, int n, int ntests, int *seed, double thresh,
+			  int debug, float test_prob, double *min_ratio,
+			  int *num_bad_ratio, int *num_tests)
+
+/*
+ * Purpose  
+ * =======
+ *
+ * Runs a series of tests on GBMV2.
+ *
+ * Arguments
+ * =========
+ *  
+ * m         (input) int
+ *           The number of rows
+ *
+ * n         (input) int
+ *           The number of columns
+ *
+ * ntests    (input) int
+ *           The number of tests to run for each set of attributes.
+ *
+ * seed      (input/output) int         
+ *           The seed for the random number generator used in testgen().
+ *
+ * thresh    (input) double
+ *           When the ratio returned from test() exceeds the specified
+ *           threshold, the current size, r_true, r_comp, and ratio will be
+ *           printed.  (Since ratio is supposed to be O(1), we can set thresh
+ *           to ~10.)
+ *
+ * debug     (input) int
+ *           If debug=3, print summary 
+ *           If debug=2, print summary only if the number of bad ratios > 0
+ *           If debug=1, print complete info if tests fail
+ *           If debug=0, return max ratio
+ *
+ * test_prob (input) float
+ *           The specified test will be performed only if the generated 
+ *           random exceeds this threshold.
+ *
+ * min_ratio (output) double
+ *           The minimum ratio
+ * 
+ * num_bad_ratio (output) int
+ *               The number of tests fail; they are above the threshold.
+ *
+ * num_tests (output) int
+ *           The number of tests is being performed.
+ *
+ * Return value
+ * ============
+ *
+ * The maximum ratio if run successfully, otherwise return -1 
+ *
+ * Code structure
+ * ==============
+ * 
+ *  debug loop  -- if debug is one, the first loop computes the max ratio
+ *              -- and the last(second) loop outputs debugging information,
+ *              -- if the test fail and its ratio > 0.5 * max ratio.
+ *              -- if debug is zero, the loop is executed once
+ *    alpha loop  -- varying alpha: 0, 1, or random
+ *      beta loop   -- varying beta: 0, 1, or random
+
+ *          norm loop   -- varying norm: near undeflow, near one, or 
+ *                        -- near overflow
+ *            numtest loop  -- how many times the test is perform with 
+ *                            -- above set of attributes
+ *              order loop   -- varying order type: rowmajor or colmajor
+ *                trans loop    -- varying trans type: no trans, trans, or conj trans
+ *                  ku loop       -- varying ku: 0 to n-1
+ *                    kl loop       -- varying kl: 0 to m-1
+ *                      lda loop      -- varying lda: ku+kl+1, ku+kl+2, 2*(ku+kl+1) 
+ *                        incx loop     -- varying incx: -2, -1, 1, 2
+ *                          incy loop     -- varying incy: -2, -1, 1, 2
+ */
+{
+  /* function name */
+  const char fname[] = "BLAS_zgbmv2_z_d";
+
+  /* max number of debug lines to print */
+  const int max_print = 8;
+
+  /* Variables in the "x_val" form are loop vars for corresponding
+     variables */
+  int i;			/* iterate through the repeating tests */
+  int j;
+  int k;			/* multipurpose counters or variables */
+  int ix, iy;			/* use to index x and y respectively */
+  int incx_val, incy_val,	/* for testing different inc values */
+    incx, incy;
+  int incx_gen, incy_gen;	/* for complex case inc=2, for real case inc=1 */
+  int d_count;			/* counter for debug */
+  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
+  int p_count;			/* counter for the number of debug lines printed */
+  int tot_tests;		/* total number of tests to be done */
+  int norm;			/* input values of near underflow/one/overflow */
+  double ratio_max;		/* the current maximum ratio */
+  double ratio_min;		/* the current minimum ratio */
+  double *ratios;		/* a temporary variable for calculating ratio */
+  double ratio;			/* the per-use test ratio from test() */
+  int bad_ratios = 0;		/* the number of ratios over the threshold */
+  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
+  double un_int;		/* the internal underflow threshold */
+  double alpha[2];
+  double beta[2];
+  double *AB;
+  double *x_head;
+  double *x_tail;
+  double *y;
+  double *temp;			/* use for calculating ratio */
+
+  /* x_gen and y_gen are used to store vectors generated by testgen.
+     they eventually are copied back to x and y */
+  double *x_head_gen;
+  double *x_tail_gen;
+  double *y_gen;
+
+  /* the true r calculated by testgen(), in double-double */
+  double *head_r_true, *tail_r_true;
+
+  int alpha_val;
+  int alpha_flag = 0;		/* input flag for BLAS_zgbmv2_z_d_testgen */
+  int beta_val;
+  int beta_flag = 0;		/* input flag for BLAS_zgbmv2_z_d_testgen */
+  int order_val;
+  enum blas_order_type order_type = 0;
+
+  enum blas_prec_type prec = 0;
+  int trans_val;
+  enum blas_trans_type trans_type = 0;
+  int m_i = 0;
+  int n_i = 0;
+  int max_mn;			/* the max of m and n */
+  int ku;
+  int kl;
+  int lda_val;
+  int lda = 0;
+  int saved_seed;		/* for saving the original seed */
+
+  /* use for counting the number of testgen calls * 2 */
+  int count, old_count = -1;
+
+  FPU_FIX_DECL;
+
+  /* test for bad arguments */
+  if (n < 0 || m < 0 || ntests < 0)
+    BLAS_error(fname, 0, 0, NULL);
+
+  /* initialization */
+  *min_ratio = 0.0;
+  *num_bad_ratio = 0;
+  *num_tests = 0;
+
+  saved_seed = *seed;
+  ratio_min = 1e308;
+  ratio_max = 0.0;
+  ratio = 0.0;
+  tot_tests = 0;
+  p_count = 0;
+  count = 0;
+  find_max_ratio = 0;
+  if (debug == 3)
+    find_max_ratio = 1;
+  max_mn = MAX(m, n);
+
+  if (m == 0 || n == 0) {
+    return 0.0;
+  }
+
+  FPU_FIX_START;
+
+  incx_gen = incy_gen = 1;
+
+  incy_gen *= 2;
+
+  /* get space for calculation */
+  x_head = (double *) blas_malloc(max_mn * 2 * sizeof(double));
+  if (max_mn * 2 > 0 && x_head == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail = (double *) blas_malloc(max_mn * 2 * sizeof(double));
+  if (max_mn * 2 > 0 && x_tail == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
+  if (max_mn * 2 > 0 && y == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_head_gen = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && x_head_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail_gen = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && x_tail_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && y_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  temp = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && temp == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  ratios = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && ratios == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  AB =
+    (double *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(double) *
+			   2);
+  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && AB == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  /* The debug iteration:
+     If debug=1, then will execute the iteration twice. First, compute the
+     max ratio. Second, print info if ratio > (50% * ratio_max). */
+  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
+    bad_ratios = 0;		/* set to zero */
+
+    if ((debug == 3) && (d_count == find_max_ratio))
+      *seed = saved_seed;	/* restore the original seed */
+
+    /* varying alpha */
+    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
+      alpha_flag = 0;
+      switch (alpha_val) {
+      case 0:
+	alpha[0] = alpha[1] = 0.0;
+	alpha_flag = 1;
+	break;
+      case 1:
+	alpha[0] = 1.0;
+	alpha[1] = 0.0;
+	alpha_flag = 1;
+	break;
+      }
+
+      /* varying beta */
+      for (beta_val = 0; beta_val < 3; beta_val++) {
+	beta_flag = 0;
+	switch (beta_val) {
+	case 0:
+	  beta[0] = beta[1] = 0.0;
+	  beta_flag = 1;
+	  break;
+	case 1:
+	  beta[0] = 1.0;
+	  beta[1] = 0.0;
+	  beta_flag = 1;
+	  break;
+	}
+
+
+	eps_int = power(2, -BITS_D);
+	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
+		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
+	prec = blas_prec_double;
+
+	/* values near underflow, 1, or overflow */
+	for (norm = -1; norm <= 1; norm++) {
+
+	  /* number of tests */
+	  for (i = 0; i < ntests; i++) {
+
+	    /* row or col major */
+	    for (order_val = 0; order_val < 2; order_val++) {
+	      switch (order_val) {
+	      case 0:
+		order_type = blas_rowmajor;
+		break;
+	      case 1:
+		order_type = blas_colmajor;
+		break;
+	      }
+
+	      /* no_trans, trans, or conj_trans */
+	      for (trans_val = 0; trans_val < 3; trans_val++) {
+		switch (trans_val) {
+		case 0:
+		  trans_type = blas_no_trans;
+		  m_i = m;
+		  n_i = n;
+		  break;
+		case 1:
+		  trans_type = blas_trans;
+		  m_i = n;
+		  n_i = m;
+		  break;
+		case 2:
+		  trans_type = blas_conj_trans;
+		  m_i = n;
+		  n_i = m;
+		  break;
+		}
+
+		/* ku from 0 to n-1 */
+		for (ku = 0; ku < n; ku++) {
+		  if (ku == n && ku != 0)
+		    continue;	/* the purpose of doing this is 
+				   to test for ku=0 */
+
+		  /* kl from 0 to m-1 */
+		  for (kl = 0; kl < m; kl++) {
+		    if (kl == n && kl != 0)
+		      continue;	/* the purpose of doing this is 
+				   to test for kl=0 */
+
+		    /* lda=ku+kl+1, ku+kl+2, 2*(ku+kl+1) */
+		    for (lda_val = 0; lda_val < 3; lda_val++) {
+		      switch (lda_val) {
+		      case 0:
+			lda = ku + kl + 1;
+			break;
+		      case 1:
+			lda = ku + kl + 2;
+			break;
+		      case 2:
+			lda = 2 * (ku + kl + 1);
+			break;
+		      }
+
+		      if ((order_type == blas_rowmajor && lda < n) ||
+			  (order_type == blas_colmajor && lda < m))
+			continue;
+
+		      /* For the sake of speed, we throw out this case at random */
+		      if (xrand(seed) >= test_prob)
+			continue;
+
+		      /* in the trivial cases, no need to run testgen */
+		      if (m > 0 && n > 0)
+			BLAS_zgbmv2_z_d_testgen(norm, order_type, trans_type,
+						m, n, kl, ku, &alpha,
+						alpha_flag, AB, lda,
+						x_head_gen, x_tail_gen, &beta,
+						beta_flag, y_gen, seed,
+						head_r_true, tail_r_true);
+		      count++;
+
+		      /* varying incx */
+		      for (incx_val = -2; incx_val <= 2; incx_val++) {
+			if (incx_val == 0)
+			  continue;
+
+			/* setting incx */
+			incx = incx_val;
+
+
+			dcopy_vector(x_head_gen, n_i, 1, x_head, incx_val);
+			dcopy_vector(x_tail_gen, n_i, 1, x_tail, incx_val);
+
+			/* varying incy */
+			for (incy_val = -2; incy_val <= 2; incy_val++) {
+			  if (incy_val == 0)
+			    continue;
+
+			  /* setting incy */
+			  incy = incy_val;
+			  incy *= 2;
+
+			  zcopy_vector(y_gen, m_i, 1, y, incy_val);
+
+			  /* call BLAS_zgbmv2_z_d */
+			  FPU_FIX_STOP;
+			  BLAS_zgbmv2_z_d(order_type, trans_type, m, n, kl,
+					  ku, alpha, AB, lda, x_head, x_tail,
+					  incx_val, beta, y, incy_val);
+			  FPU_FIX_START;
+
+			  /* set y starting index */
+			  iy = 0;
+			  if (incy < 0)
+			    iy = -(m_i - 1) * incy;
+
+			  /* computing the ratio */
+			  for (j = 0; j < m_i; j++) {
+			    /* copy row j of AB to temp */
+			    zgbmv_copy(order_type, trans_type, m, n, kl, ku,
+				       AB, lda, temp, j);
+
+			    test_BLAS_zdot2_z_d(n_i, blas_no_conj, alpha,
+						beta, &y_gen[j * incy_gen],
+						&y[iy],
+						&head_r_true[j * incy_gen],
+						&tail_r_true[j * incy_gen],
+						temp, 1, x_head, x_tail,
+						incx_val, eps_int, un_int,
+						&ratios[j]);
+
+			    /* take the max ratio */
+			    if (j == 0) {
+			      ratio = ratios[0];
+			      /* The !<= below causes NaN error to be detected.
+			         Note that (NaN > thresh) is always false. */
+			    } else if (!(ratios[j] <= ratio)) {
+			      ratio = ratios[j];
+			    }
+
+			    iy += incy;
+			  }
+
+			  /* Increase the number of bad ratio, if the ratio
+			     is bigger than the threshold.
+			     The !<= below causes NaN error to be detected.
+			     Note that (NaN > thresh) is always false. */
+			  if (!(ratio <= thresh)) {
+			    bad_ratios++;
+
+			    if ((debug == 3) &&	/* print only when debug is on */
+				(count != old_count) &&	/* print if old vector is different 
+							   from the current one */
+				(d_count == find_max_ratio) &&
+				(p_count <= max_print) &&
+				(ratio > 0.5 * ratio_max)) {
+			      old_count = count;
+			      printf
+				("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
+				 fname, m, n, ntests, thresh);
+
+			      /* Print test info */
+			      switch (prec) {
+			      case blas_prec_single:
+				printf("single ");
+				break;
+			      case blas_prec_double:
+				printf("double ");
+				break;
+			      case blas_prec_indigenous:
+				printf("indigenous ");
+				break;
+			      case blas_prec_extra:
+				printf("extra ");
+				break;
+			      }
+			      switch (norm) {
+			      case -1:
+				printf("near_underflow ");
+				break;
+			      case 0:
+				printf("near_one ");
+				break;
+			      case 1:
+				printf("near_overflow ");
+				break;
+			      }
+			      switch (order_type) {
+			      case blas_rowmajor:
+				printf("row_major ");
+				break;
+			      case blas_colmajor:
+				printf("col_major ");
+				break;
+			      }
+			      switch (trans_type) {
+			      case blas_no_trans:
+				printf("no_trans ");
+				break;
+			      case blas_trans:
+				printf("trans ");
+				break;
+			      case blas_conj_trans:
+				printf("conj_trans ");
+				break;
+			      }
+
+			      printf
+				("ku=%d, kl=%d, lda=%d, incx=%d, incy=%d:\n",
+				 ku, kl, lda, incx, incy);
+
+			      ix = 0;
+			      iy = 0;
+			      if (incx < 0)
+				ix = -(n_i - 1) * incx;
+			      if (incy < 0)
+				iy = -(m_i - 1) * incy;
+
+			      printf("      A=");
+			      for (j = 0; j < m_i; j++) {
+				/* copy row j of A to temp */
+				zgbmv_copy(order_type, trans_type, m, n, kl,
+					   ku, AB, lda, temp, j);
+
+				if (j > 0)
+				  printf("        ");
+				zprint_vector(temp, n_i, 1, NULL);
+			      }
+
+			      for (j = 0, k = 0; j < n_i || k < m_i; j++, k++) {
+				if (j < n_i) {
+				  printf("      ");
+				  printf("%24.16e", x_head[ix]);
+				  printf("\n");
+				}
+				if (j < n_i) {
+				  printf("      ");
+				  printf("%24.16e", x_tail[ix]);
+				  printf("\n");
+				}
+				if (k < m_i) {
+				  printf("      ");
+				  printf("(%24.16e, %24.16e)",
+					 y_gen[k * incy_gen],
+					 y_gen[k * incy_gen + 1]);
+				  printf("\n");
+				  printf("      ");
+				  printf("y_final[%d] = ", iy);
+				  printf("(%24.16e, %24.16e)", y[iy],
+					 y[iy + 1]);
+				  printf("\n");
+				}
+				ix += incx;
+				iy += incy;
+			      }
+
+			      printf("      ");
+			      printf("alpha = ");
+			      printf("(%24.16e, %24.16e)", alpha[0],
+				     alpha[1]);
+			      printf("\n      ");
+			      printf("beta = ");
+			      printf("(%24.16e, %24.16e)", beta[0], beta[1]);
+			      printf("\n");
+			      for (j = 0; j < m_i; j++) {
+				printf("      ");
+				printf
+				  ("([%24.16e  %24.16e], [%24.16e %24.16e])",
+				   head_r_true[j * incy_gen],
+				   tail_r_true[j * incy_gen],
+				   head_r_true[j * incy_gen + 1],
+				   tail_r_true[j * incy_gen + 1]);
+				printf(", ratio[%d]=%.4e\n", j, ratios[j]);
+			      }
+
+			      printf("      ratio=%.4e\n", ratio);
+			      p_count++;
+			    }
+			    if (bad_ratios >= MAX_BAD_TESTS) {
+			      printf("\ntoo many failures, exiting....");
+			      printf("\nTesting and compilation");
+			      printf(" are incomplete\n\n");
+			      goto end;
+			    }
+			    if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
+			      printf("\nFlagrant ratio error, exiting...");
+			      printf("\nTesting and compilation");
+			      printf(" are incomplete\n\n");
+			      goto end;
+			    }
+			  }
+			  if (d_count == 0) {
+			    if (ratio > ratio_max)
+			      ratio_max = ratio;
+
+			    if (ratio != 0.0 && ratio < ratio_min)
+			      ratio_min = ratio;
+			    tot_tests++;
+			  }
+			}	/* incy */
+		      }		/* incx */
+		    }		/* lda */
+		  }		/* kl */
+		}		/* ku */
+	      }			/* trans */
+	    }			/* order */
+	  }			/* tests */
+	}			/* norm */
+
+      }				/* beta */
+    }				/* alpha */
+  }				/* debug */
+
+  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
+    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n", fname,
+	   m, n, ntests, thresh);
+    printf
+      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
+       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
+       ratio_min, ratio_max);
+  }
+
+end:
+  blas_free(x_head);
+  blas_free(x_tail);
+  blas_free(y);
+  blas_free(x_head_gen);
+  blas_free(x_tail_gen);
+  blas_free(y_gen);
+  blas_free(temp);
+  blas_free(AB);
+  blas_free(head_r_true);
+  blas_free(tail_r_true);
+  blas_free(ratios);
+
+  FPU_FIX_STOP;
+
+  *min_ratio = ratio_min;
+  *num_bad_ratio = bad_ratios;
+  *num_tests = tot_tests;
+  return ratio_max;
+}
+double do_test_zgbmv2_d_z(int m, int n, int ntests, int *seed, double thresh,
+			  int debug, float test_prob, double *min_ratio,
+			  int *num_bad_ratio, int *num_tests)
+
+/*
+ * Purpose  
+ * =======
+ *
+ * Runs a series of tests on GBMV2.
+ *
+ * Arguments
+ * =========
+ *  
+ * m         (input) int
+ *           The number of rows
+ *
+ * n         (input) int
+ *           The number of columns
+ *
+ * ntests    (input) int
+ *           The number of tests to run for each set of attributes.
+ *
+ * seed      (input/output) int         
+ *           The seed for the random number generator used in testgen().
+ *
+ * thresh    (input) double
+ *           When the ratio returned from test() exceeds the specified
+ *           threshold, the current size, r_true, r_comp, and ratio will be
+ *           printed.  (Since ratio is supposed to be O(1), we can set thresh
+ *           to ~10.)
+ *
+ * debug     (input) int
+ *           If debug=3, print summary 
+ *           If debug=2, print summary only if the number of bad ratios > 0
+ *           If debug=1, print complete info if tests fail
+ *           If debug=0, return max ratio
+ *
+ * test_prob (input) float
+ *           The specified test will be performed only if the generated 
+ *           random exceeds this threshold.
+ *
+ * min_ratio (output) double
+ *           The minimum ratio
+ * 
+ * num_bad_ratio (output) int
+ *               The number of tests fail; they are above the threshold.
+ *
+ * num_tests (output) int
+ *           The number of tests is being performed.
+ *
+ * Return value
+ * ============
+ *
+ * The maximum ratio if run successfully, otherwise return -1 
+ *
+ * Code structure
+ * ==============
+ * 
+ *  debug loop  -- if debug is one, the first loop computes the max ratio
+ *              -- and the last(second) loop outputs debugging information,
+ *              -- if the test fail and its ratio > 0.5 * max ratio.
+ *              -- if debug is zero, the loop is executed once
+ *    alpha loop  -- varying alpha: 0, 1, or random
+ *      beta loop   -- varying beta: 0, 1, or random
+
+ *          norm loop   -- varying norm: near undeflow, near one, or 
+ *                        -- near overflow
+ *            numtest loop  -- how many times the test is perform with 
+ *                            -- above set of attributes
+ *              order loop   -- varying order type: rowmajor or colmajor
+ *                trans loop    -- varying trans type: no trans, trans, or conj trans
+ *                  ku loop       -- varying ku: 0 to n-1
+ *                    kl loop       -- varying kl: 0 to m-1
+ *                      lda loop      -- varying lda: ku+kl+1, ku+kl+2, 2*(ku+kl+1) 
+ *                        incx loop     -- varying incx: -2, -1, 1, 2
+ *                          incy loop     -- varying incy: -2, -1, 1, 2
+ */
+{
+  /* function name */
+  const char fname[] = "BLAS_zgbmv2_d_z";
+
+  /* max number of debug lines to print */
+  const int max_print = 8;
+
+  /* Variables in the "x_val" form are loop vars for corresponding
+     variables */
+  int i;			/* iterate through the repeating tests */
+  int j;
+  int k;			/* multipurpose counters or variables */
+  int ix, iy;			/* use to index x and y respectively */
+  int incx_val, incy_val,	/* for testing different inc values */
+    incx, incy;
+  int incx_gen, incy_gen;	/* for complex case inc=2, for real case inc=1 */
+  int d_count;			/* counter for debug */
+  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
+  int p_count;			/* counter for the number of debug lines printed */
+  int tot_tests;		/* total number of tests to be done */
+  int norm;			/* input values of near underflow/one/overflow */
+  double ratio_max;		/* the current maximum ratio */
+  double ratio_min;		/* the current minimum ratio */
+  double *ratios;		/* a temporary variable for calculating ratio */
+  double ratio;			/* the per-use test ratio from test() */
+  int bad_ratios = 0;		/* the number of ratios over the threshold */
+  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
+  double un_int;		/* the internal underflow threshold */
+  double alpha[2];
+  double beta[2];
+  double *AB;
+  double *x_head;
+  double *x_tail;
+  double *y;
+  double *temp;			/* use for calculating ratio */
+
+  /* x_gen and y_gen are used to store vectors generated by testgen.
+     they eventually are copied back to x and y */
+  double *x_head_gen;
+  double *x_tail_gen;
+  double *y_gen;
+
+  /* the true r calculated by testgen(), in double-double */
+  double *head_r_true, *tail_r_true;
+
+  int alpha_val;
+  int alpha_flag = 0;		/* input flag for BLAS_zgbmv2_d_z_testgen */
+  int beta_val;
+  int beta_flag = 0;		/* input flag for BLAS_zgbmv2_d_z_testgen */
+  int order_val;
+  enum blas_order_type order_type = 0;
+
+  enum blas_prec_type prec = 0;
+  int trans_val;
+  enum blas_trans_type trans_type = 0;
+  int m_i = 0;
+  int n_i = 0;
+  int max_mn;			/* the max of m and n */
+  int ku;
+  int kl;
+  int lda_val;
+  int lda = 0;
+  int saved_seed;		/* for saving the original seed */
+
+  /* use for counting the number of testgen calls * 2 */
+  int count, old_count = -1;
+
+  FPU_FIX_DECL;
+
+  /* test for bad arguments */
+  if (n < 0 || m < 0 || ntests < 0)
+    BLAS_error(fname, 0, 0, NULL);
+
+  /* initialization */
+  *min_ratio = 0.0;
+  *num_bad_ratio = 0;
+  *num_tests = 0;
+
+  saved_seed = *seed;
+  ratio_min = 1e308;
+  ratio_max = 0.0;
+  ratio = 0.0;
+  tot_tests = 0;
+  p_count = 0;
+  count = 0;
+  find_max_ratio = 0;
+  if (debug == 3)
+    find_max_ratio = 1;
+  max_mn = MAX(m, n);
+
+  if (m == 0 || n == 0) {
+    return 0.0;
+  }
+
+  FPU_FIX_START;
+
+  incx_gen = incy_gen = 1;
+  incx_gen *= 2;
+  incy_gen *= 2;
+
+  /* get space for calculation */
+  x_head = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
+  if (max_mn * 2 > 0 && x_head == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
+  if (max_mn * 2 > 0 && x_tail == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
+  if (max_mn * 2 > 0 && y == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_head_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && x_head_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && x_tail_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && y_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  temp = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && temp == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  ratios = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && ratios == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  AB =
+    (double *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(double));
+  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && AB == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  /* The debug iteration:
+     If debug=1, then will execute the iteration twice. First, compute the
+     max ratio. Second, print info if ratio > (50% * ratio_max). */
+  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
+    bad_ratios = 0;		/* set to zero */
+
+    if ((debug == 3) && (d_count == find_max_ratio))
+      *seed = saved_seed;	/* restore the original seed */
+
+    /* varying alpha */
+    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
+      alpha_flag = 0;
+      switch (alpha_val) {
+      case 0:
+	alpha[0] = alpha[1] = 0.0;
+	alpha_flag = 1;
+	break;
+      case 1:
+	alpha[0] = 1.0;
+	alpha[1] = 0.0;
+	alpha_flag = 1;
+	break;
+      }
+
+      /* varying beta */
+      for (beta_val = 0; beta_val < 3; beta_val++) {
+	beta_flag = 0;
+	switch (beta_val) {
+	case 0:
+	  beta[0] = beta[1] = 0.0;
+	  beta_flag = 1;
+	  break;
+	case 1:
+	  beta[0] = 1.0;
+	  beta[1] = 0.0;
+	  beta_flag = 1;
+	  break;
+	}
+
+
+	eps_int = power(2, -BITS_D);
+	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
+		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
+	prec = blas_prec_double;
+
+	/* values near underflow, 1, or overflow */
+	for (norm = -1; norm <= 1; norm++) {
+
+	  /* number of tests */
+	  for (i = 0; i < ntests; i++) {
+
+	    /* row or col major */
+	    for (order_val = 0; order_val < 2; order_val++) {
+	      switch (order_val) {
+	      case 0:
+		order_type = blas_rowmajor;
+		break;
+	      case 1:
+		order_type = blas_colmajor;
+		break;
+	      }
+
+	      /* no_trans, trans, or conj_trans */
+	      for (trans_val = 0; trans_val < 3; trans_val++) {
+		switch (trans_val) {
+		case 0:
+		  trans_type = blas_no_trans;
+		  m_i = m;
+		  n_i = n;
+		  break;
+		case 1:
+		  trans_type = blas_trans;
+		  m_i = n;
+		  n_i = m;
+		  break;
+		case 2:
+		  trans_type = blas_conj_trans;
+		  m_i = n;
+		  n_i = m;
+		  break;
+		}
+
+		/* ku from 0 to n-1 */
+		for (ku = 0; ku < n; ku++) {
+		  if (ku == n && ku != 0)
+		    continue;	/* the purpose of doing this is 
+				   to test for ku=0 */
+
+		  /* kl from 0 to m-1 */
+		  for (kl = 0; kl < m; kl++) {
+		    if (kl == n && kl != 0)
+		      continue;	/* the purpose of doing this is 
+				   to test for kl=0 */
+
+		    /* lda=ku+kl+1, ku+kl+2, 2*(ku+kl+1) */
+		    for (lda_val = 0; lda_val < 3; lda_val++) {
+		      switch (lda_val) {
+		      case 0:
+			lda = ku + kl + 1;
+			break;
+		      case 1:
+			lda = ku + kl + 2;
+			break;
+		      case 2:
+			lda = 2 * (ku + kl + 1);
+			break;
+		      }
+
+		      if ((order_type == blas_rowmajor && lda < n) ||
+			  (order_type == blas_colmajor && lda < m))
+			continue;
+
+		      /* For the sake of speed, we throw out this case at random */
+		      if (xrand(seed) >= test_prob)
+			continue;
+
+		      /* in the trivial cases, no need to run testgen */
+		      if (m > 0 && n > 0)
+			BLAS_zgbmv2_d_z_testgen(norm, order_type, trans_type,
+						m, n, kl, ku, &alpha,
+						alpha_flag, AB, lda,
+						x_head_gen, x_tail_gen, &beta,
+						beta_flag, y_gen, seed,
+						head_r_true, tail_r_true);
+		      count++;
+
+		      /* varying incx */
+		      for (incx_val = -2; incx_val <= 2; incx_val++) {
+			if (incx_val == 0)
+			  continue;
+
+			/* setting incx */
+			incx = incx_val;
+			incx *= 2;
+
+			zcopy_vector(x_head_gen, n_i, 1, x_head, incx_val);
+			zcopy_vector(x_tail_gen, n_i, 1, x_tail, incx_val);
+
+			/* varying incy */
+			for (incy_val = -2; incy_val <= 2; incy_val++) {
+			  if (incy_val == 0)
+			    continue;
+
+			  /* setting incy */
+			  incy = incy_val;
+			  incy *= 2;
+
+			  zcopy_vector(y_gen, m_i, 1, y, incy_val);
+
+			  /* call BLAS_zgbmv2_d_z */
+			  FPU_FIX_STOP;
+			  BLAS_zgbmv2_d_z(order_type, trans_type, m, n, kl,
+					  ku, alpha, AB, lda, x_head, x_tail,
+					  incx_val, beta, y, incy_val);
+			  FPU_FIX_START;
+
+			  /* set y starting index */
+			  iy = 0;
+			  if (incy < 0)
+			    iy = -(m_i - 1) * incy;
+
+			  /* computing the ratio */
+			  for (j = 0; j < m_i; j++) {
+			    /* copy row j of AB to temp */
+			    dgbmv_copy(order_type, trans_type, m, n, kl, ku,
+				       AB, lda, temp, j);
+
+			    test_BLAS_zdot2_d_z(n_i, blas_no_conj, alpha,
+						beta, &y_gen[j * incy_gen],
+						&y[iy],
+						&head_r_true[j * incy_gen],
+						&tail_r_true[j * incy_gen],
+						temp, 1, x_head, x_tail,
+						incx_val, eps_int, un_int,
+						&ratios[j]);
+
+			    /* take the max ratio */
+			    if (j == 0) {
+			      ratio = ratios[0];
+			      /* The !<= below causes NaN error to be detected.
+			         Note that (NaN > thresh) is always false. */
+			    } else if (!(ratios[j] <= ratio)) {
+			      ratio = ratios[j];
+			    }
+
+			    iy += incy;
+			  }
+
+			  /* Increase the number of bad ratio, if the ratio
+			     is bigger than the threshold.
+			     The !<= below causes NaN error to be detected.
+			     Note that (NaN > thresh) is always false. */
+			  if (!(ratio <= thresh)) {
+			    bad_ratios++;
+
+			    if ((debug == 3) &&	/* print only when debug is on */
+				(count != old_count) &&	/* print if old vector is different 
+							   from the current one */
+				(d_count == find_max_ratio) &&
+				(p_count <= max_print) &&
+				(ratio > 0.5 * ratio_max)) {
+			      old_count = count;
+			      printf
+				("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
+				 fname, m, n, ntests, thresh);
+
+			      /* Print test info */
+			      switch (prec) {
+			      case blas_prec_single:
+				printf("single ");
+				break;
+			      case blas_prec_double:
+				printf("double ");
+				break;
+			      case blas_prec_indigenous:
+				printf("indigenous ");
+				break;
+			      case blas_prec_extra:
+				printf("extra ");
+				break;
+			      }
+			      switch (norm) {
+			      case -1:
+				printf("near_underflow ");
+				break;
+			      case 0:
+				printf("near_one ");
+				break;
+			      case 1:
+				printf("near_overflow ");
+				break;
+			      }
+			      switch (order_type) {
+			      case blas_rowmajor:
+				printf("row_major ");
+				break;
+			      case blas_colmajor:
+				printf("col_major ");
+				break;
+			      }
+			      switch (trans_type) {
+			      case blas_no_trans:
+				printf("no_trans ");
+				break;
+			      case blas_trans:
+				printf("trans ");
+				break;
+			      case blas_conj_trans:
+				printf("conj_trans ");
+				break;
+			      }
+
+			      printf
+				("ku=%d, kl=%d, lda=%d, incx=%d, incy=%d:\n",
+				 ku, kl, lda, incx, incy);
+
+			      ix = 0;
+			      iy = 0;
+			      if (incx < 0)
+				ix = -(n_i - 1) * incx;
+			      if (incy < 0)
+				iy = -(m_i - 1) * incy;
+
+			      printf("      A=");
+			      for (j = 0; j < m_i; j++) {
+				/* copy row j of A to temp */
+				dgbmv_copy(order_type, trans_type, m, n, kl,
+					   ku, AB, lda, temp, j);
+
+				if (j > 0)
+				  printf("        ");
+				dprint_vector(temp, n_i, 1, NULL);
+			      }
+
+			      for (j = 0, k = 0; j < n_i || k < m_i; j++, k++) {
+				if (j < n_i) {
+				  printf("      ");
+				  printf("(%24.16e, %24.16e)", x_head[ix],
+					 x_head[ix + 1]);
+				  printf("\n");
+				}
+				if (j < n_i) {
+				  printf("      ");
+				  printf("(%24.16e, %24.16e)", x_tail[ix],
+					 x_tail[ix + 1]);
+				  printf("\n");
+				}
+				if (k < m_i) {
+				  printf("      ");
+				  printf("(%24.16e, %24.16e)",
+					 y_gen[k * incy_gen],
+					 y_gen[k * incy_gen + 1]);
+				  printf("\n");
+				  printf("      ");
+				  printf("y_final[%d] = ", iy);
+				  printf("(%24.16e, %24.16e)", y[iy],
+					 y[iy + 1]);
+				  printf("\n");
+				}
+				ix += incx;
+				iy += incy;
+			      }
+
+			      printf("      ");
+			      printf("alpha = ");
+			      printf("(%24.16e, %24.16e)", alpha[0],
+				     alpha[1]);
+			      printf("\n      ");
+			      printf("beta = ");
+			      printf("(%24.16e, %24.16e)", beta[0], beta[1]);
+			      printf("\n");
+			      for (j = 0; j < m_i; j++) {
+				printf("      ");
+				printf
+				  ("([%24.16e  %24.16e], [%24.16e %24.16e])",
+				   head_r_true[j * incy_gen],
+				   tail_r_true[j * incy_gen],
+				   head_r_true[j * incy_gen + 1],
+				   tail_r_true[j * incy_gen + 1]);
+				printf(", ratio[%d]=%.4e\n", j, ratios[j]);
+			      }
+
+			      printf("      ratio=%.4e\n", ratio);
+			      p_count++;
+			    }
+			    if (bad_ratios >= MAX_BAD_TESTS) {
+			      printf("\ntoo many failures, exiting....");
+			      printf("\nTesting and compilation");
+			      printf(" are incomplete\n\n");
+			      goto end;
+			    }
+			    if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
+			      printf("\nFlagrant ratio error, exiting...");
+			      printf("\nTesting and compilation");
+			      printf(" are incomplete\n\n");
+			      goto end;
+			    }
+			  }
+			  if (d_count == 0) {
+			    if (ratio > ratio_max)
+			      ratio_max = ratio;
+
+			    if (ratio != 0.0 && ratio < ratio_min)
+			      ratio_min = ratio;
+			    tot_tests++;
+			  }
+			}	/* incy */
+		      }		/* incx */
+		    }		/* lda */
+		  }		/* kl */
+		}		/* ku */
+	      }			/* trans */
+	    }			/* order */
+	  }			/* tests */
+	}			/* norm */
+
+      }				/* beta */
+    }				/* alpha */
+  }				/* debug */
+
+  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
+    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n", fname,
+	   m, n, ntests, thresh);
+    printf
+      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
+       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
+       ratio_min, ratio_max);
+  }
+
+end:
+  blas_free(x_head);
+  blas_free(x_tail);
+  blas_free(y);
+  blas_free(x_head_gen);
+  blas_free(x_tail_gen);
+  blas_free(y_gen);
+  blas_free(temp);
+  blas_free(AB);
+  blas_free(head_r_true);
+  blas_free(tail_r_true);
+  blas_free(ratios);
+
+  FPU_FIX_STOP;
+
+  *min_ratio = ratio_min;
+  *num_bad_ratio = bad_ratios;
+  *num_tests = tot_tests;
+  return ratio_max;
+}
+double do_test_zgbmv2_d_d(int m, int n, int ntests, int *seed, double thresh,
+			  int debug, float test_prob, double *min_ratio,
+			  int *num_bad_ratio, int *num_tests)
+
+/*
+ * Purpose  
+ * =======
+ *
+ * Runs a series of tests on GBMV2.
+ *
+ * Arguments
+ * =========
+ *  
+ * m         (input) int
+ *           The number of rows
+ *
+ * n         (input) int
+ *           The number of columns
+ *
+ * ntests    (input) int
+ *           The number of tests to run for each set of attributes.
+ *
+ * seed      (input/output) int         
+ *           The seed for the random number generator used in testgen().
+ *
+ * thresh    (input) double
+ *           When the ratio returned from test() exceeds the specified
+ *           threshold, the current size, r_true, r_comp, and ratio will be
+ *           printed.  (Since ratio is supposed to be O(1), we can set thresh
+ *           to ~10.)
+ *
+ * debug     (input) int
+ *           If debug=3, print summary 
+ *           If debug=2, print summary only if the number of bad ratios > 0
+ *           If debug=1, print complete info if tests fail
+ *           If debug=0, return max ratio
+ *
+ * test_prob (input) float
+ *           The specified test will be performed only if the generated 
+ *           random exceeds this threshold.
+ *
+ * min_ratio (output) double
+ *           The minimum ratio
+ * 
+ * num_bad_ratio (output) int
+ *               The number of tests fail; they are above the threshold.
+ *
+ * num_tests (output) int
+ *           The number of tests is being performed.
+ *
+ * Return value
+ * ============
+ *
+ * The maximum ratio if run successfully, otherwise return -1 
+ *
+ * Code structure
+ * ==============
+ * 
+ *  debug loop  -- if debug is one, the first loop computes the max ratio
+ *              -- and the last(second) loop outputs debugging information,
+ *              -- if the test fail and its ratio > 0.5 * max ratio.
+ *              -- if debug is zero, the loop is executed once
+ *    alpha loop  -- varying alpha: 0, 1, or random
+ *      beta loop   -- varying beta: 0, 1, or random
+
+ *          norm loop   -- varying norm: near undeflow, near one, or 
+ *                        -- near overflow
+ *            numtest loop  -- how many times the test is perform with 
+ *                            -- above set of attributes
+ *              order loop   -- varying order type: rowmajor or colmajor
+ *                trans loop    -- varying trans type: no trans, trans, or conj trans
+ *                  ku loop       -- varying ku: 0 to n-1
+ *                    kl loop       -- varying kl: 0 to m-1
+ *                      lda loop      -- varying lda: ku+kl+1, ku+kl+2, 2*(ku+kl+1) 
+ *                        incx loop     -- varying incx: -2, -1, 1, 2
+ *                          incy loop     -- varying incy: -2, -1, 1, 2
+ */
+{
+  /* function name */
+  const char fname[] = "BLAS_zgbmv2_d_d";
+
+  /* max number of debug lines to print */
+  const int max_print = 8;
+
+  /* Variables in the "x_val" form are loop vars for corresponding
+     variables */
+  int i;			/* iterate through the repeating tests */
+  int j;
+  int k;			/* multipurpose counters or variables */
+  int ix, iy;			/* use to index x and y respectively */
+  int incx_val, incy_val,	/* for testing different inc values */
+    incx, incy;
+  int incx_gen, incy_gen;	/* for complex case inc=2, for real case inc=1 */
+  int d_count;			/* counter for debug */
+  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
+  int p_count;			/* counter for the number of debug lines printed */
+  int tot_tests;		/* total number of tests to be done */
+  int norm;			/* input values of near underflow/one/overflow */
+  double ratio_max;		/* the current maximum ratio */
+  double ratio_min;		/* the current minimum ratio */
+  double *ratios;		/* a temporary variable for calculating ratio */
+  double ratio;			/* the per-use test ratio from test() */
+  int bad_ratios = 0;		/* the number of ratios over the threshold */
+  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
+  double un_int;		/* the internal underflow threshold */
+  double alpha[2];
+  double beta[2];
+  double *AB;
+  double *x_head;
+  double *x_tail;
+  double *y;
+  double *temp;			/* use for calculating ratio */
+
+  /* x_gen and y_gen are used to store vectors generated by testgen.
+     they eventually are copied back to x and y */
+  double *x_head_gen;
+  double *x_tail_gen;
+  double *y_gen;
+
+  /* the true r calculated by testgen(), in double-double */
+  double *head_r_true, *tail_r_true;
+
+  int alpha_val;
+  int alpha_flag = 0;		/* input flag for BLAS_zgbmv2_d_d_testgen */
+  int beta_val;
+  int beta_flag = 0;		/* input flag for BLAS_zgbmv2_d_d_testgen */
+  int order_val;
+  enum blas_order_type order_type = 0;
+
+  enum blas_prec_type prec = 0;
+  int trans_val;
+  enum blas_trans_type trans_type = 0;
+  int m_i = 0;
+  int n_i = 0;
+  int max_mn;			/* the max of m and n */
+  int ku;
+  int kl;
+  int lda_val;
+  int lda = 0;
+  int saved_seed;		/* for saving the original seed */
+
+  /* use for counting the number of testgen calls * 2 */
+  int count, old_count = -1;
+
+  FPU_FIX_DECL;
+
+  /* test for bad arguments */
+  if (n < 0 || m < 0 || ntests < 0)
+    BLAS_error(fname, 0, 0, NULL);
+
+  /* initialization */
+  *min_ratio = 0.0;
+  *num_bad_ratio = 0;
+  *num_tests = 0;
+
+  saved_seed = *seed;
+  ratio_min = 1e308;
+  ratio_max = 0.0;
+  ratio = 0.0;
+  tot_tests = 0;
+  p_count = 0;
+  count = 0;
+  find_max_ratio = 0;
+  if (debug == 3)
+    find_max_ratio = 1;
+  max_mn = MAX(m, n);
+
+  if (m == 0 || n == 0) {
+    return 0.0;
+  }
+
+  FPU_FIX_START;
+
+  incx_gen = incy_gen = 1;
+
+  incy_gen *= 2;
+
+  /* get space for calculation */
+  x_head = (double *) blas_malloc(max_mn * 2 * sizeof(double));
+  if (max_mn * 2 > 0 && x_head == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail = (double *) blas_malloc(max_mn * 2 * sizeof(double));
+  if (max_mn * 2 > 0 && x_tail == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
+  if (max_mn * 2 > 0 && y == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_head_gen = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && x_head_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail_gen = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && x_tail_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && y_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  temp = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && temp == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  ratios = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && ratios == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  AB =
+    (double *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(double));
+  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && AB == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  /* The debug iteration:
+     If debug=1, then will execute the iteration twice. First, compute the
+     max ratio. Second, print info if ratio > (50% * ratio_max). */
+  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
+    bad_ratios = 0;		/* set to zero */
+
+    if ((debug == 3) && (d_count == find_max_ratio))
+      *seed = saved_seed;	/* restore the original seed */
+
+    /* varying alpha */
+    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
+      alpha_flag = 0;
+      switch (alpha_val) {
+      case 0:
+	alpha[0] = alpha[1] = 0.0;
+	alpha_flag = 1;
+	break;
+      case 1:
+	alpha[0] = 1.0;
+	alpha[1] = 0.0;
+	alpha_flag = 1;
+	break;
+      }
+
+      /* varying beta */
+      for (beta_val = 0; beta_val < 3; beta_val++) {
+	beta_flag = 0;
+	switch (beta_val) {
+	case 0:
+	  beta[0] = beta[1] = 0.0;
+	  beta_flag = 1;
+	  break;
+	case 1:
+	  beta[0] = 1.0;
+	  beta[1] = 0.0;
+	  beta_flag = 1;
+	  break;
+	}
+
+
+	eps_int = power(2, -BITS_D);
+	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
+		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
+	prec = blas_prec_double;
+
+	/* values near underflow, 1, or overflow */
+	for (norm = -1; norm <= 1; norm++) {
+
+	  /* number of tests */
+	  for (i = 0; i < ntests; i++) {
+
+	    /* row or col major */
+	    for (order_val = 0; order_val < 2; order_val++) {
+	      switch (order_val) {
+	      case 0:
+		order_type = blas_rowmajor;
+		break;
+	      case 1:
+		order_type = blas_colmajor;
+		break;
+	      }
+
+	      /* no_trans, trans, or conj_trans */
+	      for (trans_val = 0; trans_val < 3; trans_val++) {
+		switch (trans_val) {
+		case 0:
+		  trans_type = blas_no_trans;
+		  m_i = m;
+		  n_i = n;
+		  break;
+		case 1:
+		  trans_type = blas_trans;
+		  m_i = n;
+		  n_i = m;
+		  break;
+		case 2:
+		  trans_type = blas_conj_trans;
+		  m_i = n;
+		  n_i = m;
+		  break;
+		}
+
+		/* ku from 0 to n-1 */
+		for (ku = 0; ku < n; ku++) {
+		  if (ku == n && ku != 0)
+		    continue;	/* the purpose of doing this is 
+				   to test for ku=0 */
+
+		  /* kl from 0 to m-1 */
+		  for (kl = 0; kl < m; kl++) {
+		    if (kl == n && kl != 0)
+		      continue;	/* the purpose of doing this is 
+				   to test for kl=0 */
+
+		    /* lda=ku+kl+1, ku+kl+2, 2*(ku+kl+1) */
+		    for (lda_val = 0; lda_val < 3; lda_val++) {
+		      switch (lda_val) {
+		      case 0:
+			lda = ku + kl + 1;
+			break;
+		      case 1:
+			lda = ku + kl + 2;
+			break;
+		      case 2:
+			lda = 2 * (ku + kl + 1);
+			break;
+		      }
+
+		      if ((order_type == blas_rowmajor && lda < n) ||
+			  (order_type == blas_colmajor && lda < m))
+			continue;
+
+		      /* For the sake of speed, we throw out this case at random */
+		      if (xrand(seed) >= test_prob)
+			continue;
+
+		      /* in the trivial cases, no need to run testgen */
+		      if (m > 0 && n > 0)
+			BLAS_zgbmv2_d_d_testgen(norm, order_type, trans_type,
+						m, n, kl, ku, &alpha,
+						alpha_flag, AB, lda,
+						x_head_gen, x_tail_gen, &beta,
+						beta_flag, y_gen, seed,
+						head_r_true, tail_r_true);
+		      count++;
+
+		      /* varying incx */
+		      for (incx_val = -2; incx_val <= 2; incx_val++) {
+			if (incx_val == 0)
+			  continue;
+
+			/* setting incx */
+			incx = incx_val;
+
+
+			dcopy_vector(x_head_gen, n_i, 1, x_head, incx_val);
+			dcopy_vector(x_tail_gen, n_i, 1, x_tail, incx_val);
+
+			/* varying incy */
+			for (incy_val = -2; incy_val <= 2; incy_val++) {
+			  if (incy_val == 0)
+			    continue;
+
+			  /* setting incy */
+			  incy = incy_val;
+			  incy *= 2;
+
+			  zcopy_vector(y_gen, m_i, 1, y, incy_val);
+
+			  /* call BLAS_zgbmv2_d_d */
+			  FPU_FIX_STOP;
+			  BLAS_zgbmv2_d_d(order_type, trans_type, m, n, kl,
+					  ku, alpha, AB, lda, x_head, x_tail,
+					  incx_val, beta, y, incy_val);
+			  FPU_FIX_START;
+
+			  /* set y starting index */
+			  iy = 0;
+			  if (incy < 0)
+			    iy = -(m_i - 1) * incy;
+
+			  /* computing the ratio */
+			  for (j = 0; j < m_i; j++) {
+			    /* copy row j of AB to temp */
+			    dgbmv_copy(order_type, trans_type, m, n, kl, ku,
+				       AB, lda, temp, j);
+
+			    test_BLAS_zdot2_d_d(n_i, blas_no_conj, alpha,
+						beta, &y_gen[j * incy_gen],
+						&y[iy],
+						&head_r_true[j * incy_gen],
+						&tail_r_true[j * incy_gen],
+						temp, 1, x_head, x_tail,
+						incx_val, eps_int, un_int,
+						&ratios[j]);
+
+			    /* take the max ratio */
+			    if (j == 0) {
+			      ratio = ratios[0];
+			      /* The !<= below causes NaN error to be detected.
+			         Note that (NaN > thresh) is always false. */
+			    } else if (!(ratios[j] <= ratio)) {
+			      ratio = ratios[j];
+			    }
+
+			    iy += incy;
+			  }
+
+			  /* Increase the number of bad ratio, if the ratio
+			     is bigger than the threshold.
+			     The !<= below causes NaN error to be detected.
+			     Note that (NaN > thresh) is always false. */
+			  if (!(ratio <= thresh)) {
+			    bad_ratios++;
+
+			    if ((debug == 3) &&	/* print only when debug is on */
+				(count != old_count) &&	/* print if old vector is different 
+							   from the current one */
+				(d_count == find_max_ratio) &&
+				(p_count <= max_print) &&
+				(ratio > 0.5 * ratio_max)) {
+			      old_count = count;
+			      printf
+				("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
+				 fname, m, n, ntests, thresh);
+
+			      /* Print test info */
+			      switch (prec) {
+			      case blas_prec_single:
+				printf("single ");
+				break;
+			      case blas_prec_double:
+				printf("double ");
+				break;
+			      case blas_prec_indigenous:
+				printf("indigenous ");
+				break;
+			      case blas_prec_extra:
+				printf("extra ");
+				break;
+			      }
+			      switch (norm) {
+			      case -1:
+				printf("near_underflow ");
+				break;
+			      case 0:
+				printf("near_one ");
+				break;
+			      case 1:
+				printf("near_overflow ");
+				break;
+			      }
+			      switch (order_type) {
+			      case blas_rowmajor:
+				printf("row_major ");
+				break;
+			      case blas_colmajor:
+				printf("col_major ");
+				break;
+			      }
+			      switch (trans_type) {
+			      case blas_no_trans:
+				printf("no_trans ");
+				break;
+			      case blas_trans:
+				printf("trans ");
+				break;
+			      case blas_conj_trans:
+				printf("conj_trans ");
+				break;
+			      }
+
+			      printf
+				("ku=%d, kl=%d, lda=%d, incx=%d, incy=%d:\n",
+				 ku, kl, lda, incx, incy);
+
+			      ix = 0;
+			      iy = 0;
+			      if (incx < 0)
+				ix = -(n_i - 1) * incx;
+			      if (incy < 0)
+				iy = -(m_i - 1) * incy;
+
+			      printf("      A=");
+			      for (j = 0; j < m_i; j++) {
+				/* copy row j of A to temp */
+				dgbmv_copy(order_type, trans_type, m, n, kl,
+					   ku, AB, lda, temp, j);
+
+				if (j > 0)
+				  printf("        ");
+				dprint_vector(temp, n_i, 1, NULL);
+			      }
+
+			      for (j = 0, k = 0; j < n_i || k < m_i; j++, k++) {
+				if (j < n_i) {
+				  printf("      ");
+				  printf("%24.16e", x_head[ix]);
+				  printf("\n");
+				}
+				if (j < n_i) {
+				  printf("      ");
+				  printf("%24.16e", x_tail[ix]);
+				  printf("\n");
+				}
+				if (k < m_i) {
+				  printf("      ");
+				  printf("(%24.16e, %24.16e)",
+					 y_gen[k * incy_gen],
+					 y_gen[k * incy_gen + 1]);
+				  printf("\n");
+				  printf("      ");
+				  printf("y_final[%d] = ", iy);
+				  printf("(%24.16e, %24.16e)", y[iy],
+					 y[iy + 1]);
+				  printf("\n");
+				}
+				ix += incx;
+				iy += incy;
+			      }
+
+			      printf("      ");
+			      printf("alpha = ");
+			      printf("(%24.16e, %24.16e)", alpha[0],
+				     alpha[1]);
+			      printf("\n      ");
+			      printf("beta = ");
+			      printf("(%24.16e, %24.16e)", beta[0], beta[1]);
+			      printf("\n");
+			      for (j = 0; j < m_i; j++) {
+				printf("      ");
+				printf
+				  ("([%24.16e  %24.16e], [%24.16e %24.16e])",
+				   head_r_true[j * incy_gen],
+				   tail_r_true[j * incy_gen],
+				   head_r_true[j * incy_gen + 1],
+				   tail_r_true[j * incy_gen + 1]);
+				printf(", ratio[%d]=%.4e\n", j, ratios[j]);
+			      }
+
+			      printf("      ratio=%.4e\n", ratio);
+			      p_count++;
+			    }
+			    if (bad_ratios >= MAX_BAD_TESTS) {
+			      printf("\ntoo many failures, exiting....");
+			      printf("\nTesting and compilation");
+			      printf(" are incomplete\n\n");
+			      goto end;
+			    }
+			    if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
+			      printf("\nFlagrant ratio error, exiting...");
+			      printf("\nTesting and compilation");
+			      printf(" are incomplete\n\n");
+			      goto end;
+			    }
+			  }
+			  if (d_count == 0) {
+			    if (ratio > ratio_max)
+			      ratio_max = ratio;
+
+			    if (ratio != 0.0 && ratio < ratio_min)
+			      ratio_min = ratio;
+			    tot_tests++;
+			  }
+			}	/* incy */
+		      }		/* incx */
+		    }		/* lda */
+		  }		/* kl */
+		}		/* ku */
+	      }			/* trans */
+	    }			/* order */
+	  }			/* tests */
+	}			/* norm */
+
+      }				/* beta */
+    }				/* alpha */
+  }				/* debug */
+
+  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
+    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n", fname,
+	   m, n, ntests, thresh);
+    printf
+      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
+       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
+       ratio_min, ratio_max);
+  }
+
+end:
+  blas_free(x_head);
+  blas_free(x_tail);
+  blas_free(y);
+  blas_free(x_head_gen);
+  blas_free(x_tail_gen);
+  blas_free(y_gen);
+  blas_free(temp);
+  blas_free(AB);
+  blas_free(head_r_true);
+  blas_free(tail_r_true);
+  blas_free(ratios);
+
+  FPU_FIX_STOP;
+
+  *min_ratio = ratio_min;
+  *num_bad_ratio = bad_ratios;
+  *num_tests = tot_tests;
+  return ratio_max;
+}
+double do_test_sgbmv2_x(int m, int n, int ntests, int *seed, double thresh,
+			int debug, float test_prob, double *min_ratio,
+			int *num_bad_ratio, int *num_tests)
+
+/*
+ * Purpose  
+ * =======
+ *
+ * Runs a series of tests on GBMV2.
+ *
+ * Arguments
+ * =========
+ *  
+ * m         (input) int
+ *           The number of rows
+ *
+ * n         (input) int
+ *           The number of columns
+ *
+ * ntests    (input) int
+ *           The number of tests to run for each set of attributes.
+ *
+ * seed      (input/output) int         
+ *           The seed for the random number generator used in testgen().
+ *
+ * thresh    (input) double
+ *           When the ratio returned from test() exceeds the specified
+ *           threshold, the current size, r_true, r_comp, and ratio will be
+ *           printed.  (Since ratio is supposed to be O(1), we can set thresh
+ *           to ~10.)
+ *
+ * debug     (input) int
+ *           If debug=3, print summary 
+ *           If debug=2, print summary only if the number of bad ratios > 0
+ *           If debug=1, print complete info if tests fail
+ *           If debug=0, return max ratio
+ *
+ * test_prob (input) float
+ *           The specified test will be performed only if the generated 
+ *           random exceeds this threshold.
+ *
+ * min_ratio (output) double
+ *           The minimum ratio
+ * 
+ * num_bad_ratio (output) int
+ *               The number of tests fail; they are above the threshold.
+ *
+ * num_tests (output) int
+ *           The number of tests is being performed.
+ *
+ * Return value
+ * ============
+ *
+ * The maximum ratio if run successfully, otherwise return -1 
+ *
+ * Code structure
+ * ==============
+ * 
+ *  debug loop  -- if debug is one, the first loop computes the max ratio
+ *              -- and the last(second) loop outputs debugging information,
+ *              -- if the test fail and its ratio > 0.5 * max ratio.
+ *              -- if debug is zero, the loop is executed once
+ *    alpha loop  -- varying alpha: 0, 1, or random
+ *      beta loop   -- varying beta: 0, 1, or random
+   *        prec loop   -- varying internal prec: single, double, or extra
+ *          norm loop   -- varying norm: near undeflow, near one, or 
+ *                        -- near overflow
+ *            numtest loop  -- how many times the test is perform with 
+ *                            -- above set of attributes
+ *              order loop   -- varying order type: rowmajor or colmajor
+ *                trans loop    -- varying trans type: no trans, trans, or conj trans
+ *                  ku loop       -- varying ku: 0 to n-1
+ *                    kl loop       -- varying kl: 0 to m-1
+ *                      lda loop      -- varying lda: ku+kl+1, ku+kl+2, 2*(ku+kl+1) 
+ *                        incx loop     -- varying incx: -2, -1, 1, 2
+ *                          incy loop     -- varying incy: -2, -1, 1, 2
+ */
+{
+  /* function name */
+  const char fname[] = "BLAS_sgbmv2_x";
+
+  /* max number of debug lines to print */
+  const int max_print = 8;
+
+  /* Variables in the "x_val" form are loop vars for corresponding
+     variables */
+  int i;			/* iterate through the repeating tests */
+  int j;
+  int k;			/* multipurpose counters or variables */
+  int ix, iy;			/* use to index x and y respectively */
+  int incx_val, incy_val,	/* for testing different inc values */
+    incx, incy;
+  int incx_gen, incy_gen;	/* for complex case inc=2, for real case inc=1 */
+  int d_count;			/* counter for debug */
+  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
+  int p_count;			/* counter for the number of debug lines printed */
+  int tot_tests;		/* total number of tests to be done */
+  int norm;			/* input values of near underflow/one/overflow */
+  double ratio_max;		/* the current maximum ratio */
+  double ratio_min;		/* the current minimum ratio */
+  double *ratios;		/* a temporary variable for calculating ratio */
+  double ratio;			/* the per-use test ratio from test() */
+  int bad_ratios = 0;		/* the number of ratios over the threshold */
+  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
+  double un_int;		/* the internal underflow threshold */
+  float alpha;
+  float beta;
+  float *AB;
+  float *x_head;
+  float *x_tail;
+  float *y;
+  float *temp;			/* use for calculating ratio */
+
+  /* x_gen and y_gen are used to store vectors generated by testgen.
+     they eventually are copied back to x and y */
+  float *x_head_gen;
+  float *x_tail_gen;
+  float *y_gen;
+
+  /* the true r calculated by testgen(), in double-double */
+  double *head_r_true, *tail_r_true;
+  int alpha_val;
+  int alpha_flag = 0;		/* input flag for BLAS_sgbmv2_testgen */
+  int beta_val;
+  int beta_flag = 0;		/* input flag for BLAS_sgbmv2_testgen */
+  int order_val;
+  enum blas_order_type order_type = 0;
+  int prec_val;
+  enum blas_prec_type prec = 0;
+  int trans_val;
+  enum blas_trans_type trans_type = 0;
+  int m_i = 0;
+  int n_i = 0;
+  int max_mn;			/* the max of m and n */
+  int ku;
+  int kl;
+  int lda_val;
+  int lda = 0;
+  int saved_seed;		/* for saving the original seed */
+
+  /* use for counting the number of testgen calls * 2 */
+  int count, old_count = -1;
+
+  FPU_FIX_DECL;
+
+  /* test for bad arguments */
+  if (n < 0 || m < 0 || ntests < 0)
+    BLAS_error(fname, 0, 0, NULL);
+
+  /* initialization */
+  *min_ratio = 0.0;
+  *num_bad_ratio = 0;
+  *num_tests = 0;
+
+  saved_seed = *seed;
+  ratio_min = 1e308;
+  ratio_max = 0.0;
+  ratio = 0.0;
+  tot_tests = 0;
+  p_count = 0;
+  count = 0;
+  find_max_ratio = 0;
+  if (debug == 3)
+    find_max_ratio = 1;
+  max_mn = MAX(m, n);
+
+  if (m == 0 || n == 0) {
+    return 0.0;
+  }
+
+  FPU_FIX_START;
+
+  incx_gen = incy_gen = 1;
+
+
+
+  /* get space for calculation */
+  x_head = (float *) blas_malloc(max_mn * 2 * sizeof(float));
+  if (max_mn * 2 > 0 && x_head == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail = (float *) blas_malloc(max_mn * 2 * sizeof(float));
+  if (max_mn * 2 > 0 && x_tail == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y = (float *) blas_malloc(max_mn * 2 * sizeof(float));
+  if (max_mn * 2 > 0 && y == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_head_gen = (float *) blas_malloc(max_mn * sizeof(float));
+  if (max_mn > 0 && x_head_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail_gen = (float *) blas_malloc(max_mn * sizeof(float));
+  if (max_mn > 0 && x_tail_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y_gen = (float *) blas_malloc(max_mn * sizeof(float));
+  if (max_mn > 0 && y_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  temp = (float *) blas_malloc(max_mn * sizeof(float));
+  if (max_mn > 0 && temp == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  head_r_true = (double *) blas_malloc(max_mn * sizeof(double));
+  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  ratios = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && ratios == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  AB =
+    (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float));
+  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && AB == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  /* The debug iteration:
+     If debug=1, then will execute the iteration twice. First, compute the
+     max ratio. Second, print info if ratio > (50% * ratio_max). */
+  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
+    bad_ratios = 0;		/* set to zero */
+
+    if ((debug == 3) && (d_count == find_max_ratio))
+      *seed = saved_seed;	/* restore the original seed */
+
+    /* varying alpha */
+    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
+      alpha_flag = 0;
+      switch (alpha_val) {
+      case 0:
+	alpha = 0.0;
+	alpha_flag = 1;
+	break;
+      case 1:
+	alpha = 1.0;
+	alpha_flag = 1;
+	break;
+      }
+
+      /* varying beta */
+      for (beta_val = 0; beta_val < 3; beta_val++) {
+	beta_flag = 0;
+	switch (beta_val) {
+	case 0:
+	  beta = 0.0;
+	  beta_flag = 1;
+	  break;
+	case 1:
+	  beta = 1.0;
+	  beta_flag = 1;
+	  break;
+	}
+
+
+	/* varying extra precs */
+	for (prec_val = 0; prec_val <= 2; prec_val++) {
+	  switch (prec_val) {
+	  case 0:
+	    eps_int = power(2, -BITS_S);
+	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
+			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
+	    prec = blas_prec_single;
+	    break;
+	  case 1:
+	    eps_int = power(2, -BITS_D);
+	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
+			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
+	    prec = blas_prec_double;
+	    break;
+	  case 2:
+	  default:
+	    eps_int = power(2, -BITS_E);
+	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
+			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
+	    prec = blas_prec_extra;
+	    break;
+	  }
+
+	  /* values near underflow, 1, or overflow */
+	  for (norm = -1; norm <= 1; norm++) {
+
+	    /* number of tests */
+	    for (i = 0; i < ntests; i++) {
+
+	      /* row or col major */
+	      for (order_val = 0; order_val < 2; order_val++) {
+		switch (order_val) {
+		case 0:
+		  order_type = blas_rowmajor;
+		  break;
+		case 1:
+		  order_type = blas_colmajor;
+		  break;
+		}
+
+		/* no_trans, trans, or conj_trans */
+		for (trans_val = 0; trans_val < 3; trans_val++) {
+		  switch (trans_val) {
+		  case 0:
+		    trans_type = blas_no_trans;
+		    m_i = m;
+		    n_i = n;
+		    break;
+		  case 1:
+		    trans_type = blas_trans;
+		    m_i = n;
+		    n_i = m;
+		    break;
+		  case 2:
+		    trans_type = blas_conj_trans;
+		    m_i = n;
+		    n_i = m;
+		    break;
+		  }
+
+		  /* ku from 0 to n-1 */
+		  for (ku = 0; ku < n; ku++) {
+		    if (ku == n && ku != 0)
+		      continue;	/* the purpose of doing this is 
+				   to test for ku=0 */
+
+		    /* kl from 0 to m-1 */
+		    for (kl = 0; kl < m; kl++) {
+		      if (kl == n && kl != 0)
+			continue;	/* the purpose of doing this is 
+					   to test for kl=0 */
+
+		      /* lda=ku+kl+1, ku+kl+2, 2*(ku+kl+1) */
+		      for (lda_val = 0; lda_val < 3; lda_val++) {
+			switch (lda_val) {
+			case 0:
+			  lda = ku + kl + 1;
+			  break;
+			case 1:
+			  lda = ku + kl + 2;
+			  break;
+			case 2:
+			  lda = 2 * (ku + kl + 1);
+			  break;
+			}
+
+			if ((order_type == blas_rowmajor && lda < n) ||
+			    (order_type == blas_colmajor && lda < m))
+			  continue;
+
+			/* For the sake of speed, we throw out this case at random */
+			if (xrand(seed) >= test_prob)
+			  continue;
+
+			/* in the trivial cases, no need to run testgen */
+			if (m > 0 && n > 0)
+			  BLAS_sgbmv2_testgen(norm, order_type, trans_type, m,
+					      n, kl, ku, &alpha, alpha_flag,
+					      AB, lda, x_head_gen, x_tail_gen,
+					      &beta, beta_flag, y_gen, seed,
+					      head_r_true, tail_r_true);
+			count++;
+
+			/* varying incx */
+			for (incx_val = -2; incx_val <= 2; incx_val++) {
+			  if (incx_val == 0)
+			    continue;
+
+			  /* setting incx */
+			  incx = incx_val;
+
+
+			  scopy_vector(x_head_gen, n_i, 1, x_head, incx_val);
+			  scopy_vector(x_tail_gen, n_i, 1, x_tail, incx_val);
+
+			  /* varying incy */
+			  for (incy_val = -2; incy_val <= 2; incy_val++) {
+			    if (incy_val == 0)
+			      continue;
+
+			    /* setting incy */
+			    incy = incy_val;
+
+
+			    scopy_vector(y_gen, m_i, 1, y, incy_val);
+
+			    /* call BLAS_sgbmv2_x */
+			    FPU_FIX_STOP;
+			    BLAS_sgbmv2_x(order_type, trans_type, m, n, kl,
+					  ku, alpha, AB, lda, x_head, x_tail,
+					  incx_val, beta, y, incy_val, prec);
+			    FPU_FIX_START;
+
+			    /* set y starting index */
+			    iy = 0;
+			    if (incy < 0)
+			      iy = -(m_i - 1) * incy;
+
+			    /* computing the ratio */
+			    for (j = 0; j < m_i; j++) {
+			      /* copy row j of AB to temp */
+			      sgbmv_copy(order_type, trans_type, m, n, kl, ku,
+					 AB, lda, temp, j);
+
+			      test_BLAS_sdot2(n_i, blas_no_conj, alpha, beta,
+					      y_gen[j * incy_gen],
+					      y[iy],
+					      head_r_true[j * incy_gen],
+					      tail_r_true[j * incy_gen], temp,
+					      1, x_head, x_tail, incx_val,
+					      eps_int, un_int, &ratios[j]);
+
+			      /* take the max ratio */
+			      if (j == 0) {
+				ratio = ratios[0];
+				/* The !<= below causes NaN error to be detected.
+				   Note that (NaN > thresh) is always false. */
+			      } else if (!(ratios[j] <= ratio)) {
+				ratio = ratios[j];
+			      }
+
+			      iy += incy;
+			    }
+
+			    /* Increase the number of bad ratio, if the ratio
+			       is bigger than the threshold.
+			       The !<= below causes NaN error to be detected.
+			       Note that (NaN > thresh) is always false. */
+			    if (!(ratio <= thresh)) {
+			      bad_ratios++;
+
+			      if ((debug == 3) &&	/* print only when debug is on */
+				  (count != old_count) &&	/* print if old vector is different 
+								   from the current one */
+				  (d_count == find_max_ratio) &&
+				  (p_count <= max_print) &&
+				  (ratio > 0.5 * ratio_max)) {
+				old_count = count;
+				printf
+				  ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
+				   fname, m, n, ntests, thresh);
+
+				/* Print test info */
+				switch (prec) {
+				case blas_prec_single:
+				  printf("single ");
+				  break;
+				case blas_prec_double:
+				  printf("double ");
+				  break;
+				case blas_prec_indigenous:
+				  printf("indigenous ");
+				  break;
+				case blas_prec_extra:
+				  printf("extra ");
+				  break;
+				}
+				switch (norm) {
+				case -1:
+				  printf("near_underflow ");
+				  break;
+				case 0:
+				  printf("near_one ");
+				  break;
+				case 1:
+				  printf("near_overflow ");
+				  break;
+				}
+				switch (order_type) {
+				case blas_rowmajor:
+				  printf("row_major ");
+				  break;
+				case blas_colmajor:
+				  printf("col_major ");
+				  break;
+				}
+				switch (trans_type) {
+				case blas_no_trans:
+				  printf("no_trans ");
+				  break;
+				case blas_trans:
+				  printf("trans ");
+				  break;
+				case blas_conj_trans:
+				  printf("conj_trans ");
+				  break;
+				}
+
+				printf
+				  ("ku=%d, kl=%d, lda=%d, incx=%d, incy=%d:\n",
+				   ku, kl, lda, incx, incy);
+
+				ix = 0;
+				iy = 0;
+				if (incx < 0)
+				  ix = -(n_i - 1) * incx;
+				if (incy < 0)
+				  iy = -(m_i - 1) * incy;
+
+				printf("      A=");
+				for (j = 0; j < m_i; j++) {
+				  /* copy row j of A to temp */
+				  sgbmv_copy(order_type, trans_type, m, n, kl,
+					     ku, AB, lda, temp, j);
+
+				  if (j > 0)
+				    printf("        ");
+				  sprint_vector(temp, n_i, 1, NULL);
+				}
+
+				for (j = 0, k = 0; j < n_i || k < m_i;
+				     j++, k++) {
+				  if (j < n_i) {
+				    printf("      ");
+				    printf("%16.8e", x_head[ix]);
+				    printf("\n");
+				  }
+				  if (j < n_i) {
+				    printf("      ");
+				    printf("%16.8e", x_tail[ix]);
+				    printf("\n");
+				  }
+				  if (k < m_i) {
+				    printf("      ");
+				    printf("%16.8e", y_gen[k * incy_gen]);
+				    printf("\n");
+				    printf("      ");
+				    printf("y_final[%d] = ", iy);
+				    printf("%16.8e", y[iy]);
+				    printf("\n");
+				  }
+				  ix += incx;
+				  iy += incy;
+				}
+
+				printf("      ");
+				printf("alpha = ");
+				printf("%16.8e", alpha);
+				printf("\n      ");
+				printf("beta = ");
+				printf("%16.8e", beta);
+				printf("\n");
+				for (j = 0; j < m_i; j++) {
+				  printf("      ");
+				  printf("[%24.16e, %24.16e]",
+					 head_r_true[j * incy_gen],
+					 tail_r_true[j * incy_gen]);
+				  printf(", ratio[%d]=%.4e\n", j, ratios[j]);
+				}
+
+				printf("      ratio=%.4e\n", ratio);
+				p_count++;
+			      }
+			      if (bad_ratios >= MAX_BAD_TESTS) {
+				printf("\ntoo many failures, exiting....");
+				printf("\nTesting and compilation");
+				printf(" are incomplete\n\n");
+				goto end;
+			      }
+			      if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
+				printf("\nFlagrant ratio error, exiting...");
+				printf("\nTesting and compilation");
+				printf(" are incomplete\n\n");
+				goto end;
+			      }
+			    }
+			    if (d_count == 0) {
+			      if (ratio > ratio_max)
+				ratio_max = ratio;
+
+			      if (ratio != 0.0 && ratio < ratio_min)
+				ratio_min = ratio;
+			      tot_tests++;
+			    }
+			  }	/* incy */
+			}	/* incx */
+		      }		/* lda */
+		    }		/* kl */
+		  }		/* ku */
+		}		/* trans */
+	      }			/* order */
+	    }			/* tests */
+	  }			/* norm */
+	}			/* prec */
+      }				/* beta */
+    }				/* alpha */
+  }				/* debug */
+
+  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
+    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n", fname,
+	   m, n, ntests, thresh);
+    printf
+      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
+       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
+       ratio_min, ratio_max);
+  }
+
+end:
+  blas_free(x_head);
+  blas_free(x_tail);
+  blas_free(y);
+  blas_free(x_head_gen);
+  blas_free(x_tail_gen);
+  blas_free(y_gen);
+  blas_free(temp);
+  blas_free(AB);
+  blas_free(head_r_true);
+  blas_free(tail_r_true);
+  blas_free(ratios);
+
+  FPU_FIX_STOP;
+
+  *min_ratio = ratio_min;
+  *num_bad_ratio = bad_ratios;
+  *num_tests = tot_tests;
+  return ratio_max;
+}
+double do_test_dgbmv2_x(int m, int n, int ntests, int *seed, double thresh,
+			int debug, float test_prob, double *min_ratio,
+			int *num_bad_ratio, int *num_tests)
+
+/*
+ * Purpose  
+ * =======
+ *
+ * Runs a series of tests on GBMV2.
+ *
+ * Arguments
+ * =========
+ *  
+ * m         (input) int
+ *           The number of rows
+ *
+ * n         (input) int
+ *           The number of columns
+ *
+ * ntests    (input) int
+ *           The number of tests to run for each set of attributes.
+ *
+ * seed      (input/output) int         
+ *           The seed for the random number generator used in testgen().
+ *
+ * thresh    (input) double
+ *           When the ratio returned from test() exceeds the specified
+ *           threshold, the current size, r_true, r_comp, and ratio will be
+ *           printed.  (Since ratio is supposed to be O(1), we can set thresh
+ *           to ~10.)
+ *
+ * debug     (input) int
+ *           If debug=3, print summary 
+ *           If debug=2, print summary only if the number of bad ratios > 0
+ *           If debug=1, print complete info if tests fail
+ *           If debug=0, return max ratio
+ *
+ * test_prob (input) float
+ *           The specified test will be performed only if the generated 
+ *           random exceeds this threshold.
+ *
+ * min_ratio (output) double
+ *           The minimum ratio
+ * 
+ * num_bad_ratio (output) int
+ *               The number of tests fail; they are above the threshold.
+ *
+ * num_tests (output) int
+ *           The number of tests is being performed.
+ *
+ * Return value
+ * ============
+ *
+ * The maximum ratio if run successfully, otherwise return -1 
+ *
+ * Code structure
+ * ==============
+ * 
+ *  debug loop  -- if debug is one, the first loop computes the max ratio
+ *              -- and the last(second) loop outputs debugging information,
+ *              -- if the test fail and its ratio > 0.5 * max ratio.
+ *              -- if debug is zero, the loop is executed once
+ *    alpha loop  -- varying alpha: 0, 1, or random
+ *      beta loop   -- varying beta: 0, 1, or random
+   *        prec loop   -- varying internal prec: single, double, or extra
+ *          norm loop   -- varying norm: near undeflow, near one, or 
+ *                        -- near overflow
+ *            numtest loop  -- how many times the test is perform with 
+ *                            -- above set of attributes
+ *              order loop   -- varying order type: rowmajor or colmajor
+ *                trans loop    -- varying trans type: no trans, trans, or conj trans
+ *                  ku loop       -- varying ku: 0 to n-1
+ *                    kl loop       -- varying kl: 0 to m-1
+ *                      lda loop      -- varying lda: ku+kl+1, ku+kl+2, 2*(ku+kl+1) 
+ *                        incx loop     -- varying incx: -2, -1, 1, 2
+ *                          incy loop     -- varying incy: -2, -1, 1, 2
+ */
+{
+  /* function name */
+  const char fname[] = "BLAS_dgbmv2_x";
+
+  /* max number of debug lines to print */
+  const int max_print = 8;
+
+  /* Variables in the "x_val" form are loop vars for corresponding
+     variables */
+  int i;			/* iterate through the repeating tests */
+  int j;
+  int k;			/* multipurpose counters or variables */
+  int ix, iy;			/* use to index x and y respectively */
+  int incx_val, incy_val,	/* for testing different inc values */
+    incx, incy;
+  int incx_gen, incy_gen;	/* for complex case inc=2, for real case inc=1 */
+  int d_count;			/* counter for debug */
+  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
+  int p_count;			/* counter for the number of debug lines printed */
+  int tot_tests;		/* total number of tests to be done */
+  int norm;			/* input values of near underflow/one/overflow */
+  double ratio_max;		/* the current maximum ratio */
+  double ratio_min;		/* the current minimum ratio */
+  double *ratios;		/* a temporary variable for calculating ratio */
+  double ratio;			/* the per-use test ratio from test() */
+  int bad_ratios = 0;		/* the number of ratios over the threshold */
+  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
+  double un_int;		/* the internal underflow threshold */
+  double alpha;
+  double beta;
+  double *AB;
+  double *x_head;
+  double *x_tail;
+  double *y;
+  double *temp;			/* use for calculating ratio */
+
+  /* x_gen and y_gen are used to store vectors generated by testgen.
+     they eventually are copied back to x and y */
+  double *x_head_gen;
+  double *x_tail_gen;
+  double *y_gen;
+
+  /* the true r calculated by testgen(), in double-double */
+  double *head_r_true, *tail_r_true;
+  int alpha_val;
+  int alpha_flag = 0;		/* input flag for BLAS_dgbmv2_testgen */
+  int beta_val;
+  int beta_flag = 0;		/* input flag for BLAS_dgbmv2_testgen */
+  int order_val;
+  enum blas_order_type order_type = 0;
+  int prec_val;
+  enum blas_prec_type prec = 0;
+  int trans_val;
+  enum blas_trans_type trans_type = 0;
+  int m_i = 0;
+  int n_i = 0;
+  int max_mn;			/* the max of m and n */
+  int ku;
+  int kl;
+  int lda_val;
+  int lda = 0;
+  int saved_seed;		/* for saving the original seed */
+
+  /* use for counting the number of testgen calls * 2 */
+  int count, old_count = -1;
+
+  FPU_FIX_DECL;
+
+  /* test for bad arguments */
+  if (n < 0 || m < 0 || ntests < 0)
+    BLAS_error(fname, 0, 0, NULL);
+
+  /* initialization */
+  *min_ratio = 0.0;
+  *num_bad_ratio = 0;
+  *num_tests = 0;
+
+  saved_seed = *seed;
+  ratio_min = 1e308;
+  ratio_max = 0.0;
+  ratio = 0.0;
+  tot_tests = 0;
+  p_count = 0;
+  count = 0;
+  find_max_ratio = 0;
+  if (debug == 3)
+    find_max_ratio = 1;
+  max_mn = MAX(m, n);
+
+  if (m == 0 || n == 0) {
+    return 0.0;
+  }
+
+  FPU_FIX_START;
+
+  incx_gen = incy_gen = 1;
+
+
+
+  /* get space for calculation */
+  x_head = (double *) blas_malloc(max_mn * 2 * sizeof(double));
+  if (max_mn * 2 > 0 && x_head == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail = (double *) blas_malloc(max_mn * 2 * sizeof(double));
+  if (max_mn * 2 > 0 && x_tail == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y = (double *) blas_malloc(max_mn * 2 * sizeof(double));
+  if (max_mn * 2 > 0 && y == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_head_gen = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && x_head_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail_gen = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && x_tail_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y_gen = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && y_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  temp = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && temp == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  head_r_true = (double *) blas_malloc(max_mn * sizeof(double));
+  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  ratios = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && ratios == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  AB =
+    (double *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(double));
+  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && AB == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  /* The debug iteration:
+     If debug=1, then will execute the iteration twice. First, compute the
+     max ratio. Second, print info if ratio > (50% * ratio_max). */
+  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
+    bad_ratios = 0;		/* set to zero */
+
+    if ((debug == 3) && (d_count == find_max_ratio))
+      *seed = saved_seed;	/* restore the original seed */
+
+    /* varying alpha */
+    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
+      alpha_flag = 0;
+      switch (alpha_val) {
+      case 0:
+	alpha = 0.0;
+	alpha_flag = 1;
+	break;
+      case 1:
+	alpha = 1.0;
+	alpha_flag = 1;
+	break;
+      }
+
+      /* varying beta */
+      for (beta_val = 0; beta_val < 3; beta_val++) {
+	beta_flag = 0;
+	switch (beta_val) {
+	case 0:
+	  beta = 0.0;
+	  beta_flag = 1;
+	  break;
+	case 1:
+	  beta = 1.0;
+	  beta_flag = 1;
+	  break;
+	}
+
+
+	/* varying extra precs */
+	for (prec_val = 0; prec_val <= 2; prec_val++) {
+	  switch (prec_val) {
+	  case 0:
+	    eps_int = power(2, -BITS_D);
+	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
+			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
+	    prec = blas_prec_double;
+	    break;
+	  case 1:
+	    eps_int = power(2, -BITS_D);
+	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
+			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
+	    prec = blas_prec_double;
+	    break;
+	  case 2:
+	  default:
+	    eps_int = power(2, -BITS_E);
+	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
+			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
+	    prec = blas_prec_extra;
+	    break;
+	  }
+
+	  /* values near underflow, 1, or overflow */
+	  for (norm = -1; norm <= 1; norm++) {
+
+	    /* number of tests */
+	    for (i = 0; i < ntests; i++) {
+
+	      /* row or col major */
+	      for (order_val = 0; order_val < 2; order_val++) {
+		switch (order_val) {
+		case 0:
+		  order_type = blas_rowmajor;
+		  break;
+		case 1:
+		  order_type = blas_colmajor;
+		  break;
+		}
+
+		/* no_trans, trans, or conj_trans */
+		for (trans_val = 0; trans_val < 3; trans_val++) {
+		  switch (trans_val) {
+		  case 0:
+		    trans_type = blas_no_trans;
+		    m_i = m;
+		    n_i = n;
+		    break;
+		  case 1:
+		    trans_type = blas_trans;
+		    m_i = n;
+		    n_i = m;
+		    break;
+		  case 2:
+		    trans_type = blas_conj_trans;
+		    m_i = n;
+		    n_i = m;
+		    break;
+		  }
+
+		  /* ku from 0 to n-1 */
+		  for (ku = 0; ku < n; ku++) {
+		    if (ku == n && ku != 0)
+		      continue;	/* the purpose of doing this is 
+				   to test for ku=0 */
+
+		    /* kl from 0 to m-1 */
+		    for (kl = 0; kl < m; kl++) {
+		      if (kl == n && kl != 0)
+			continue;	/* the purpose of doing this is 
+					   to test for kl=0 */
+
+		      /* lda=ku+kl+1, ku+kl+2, 2*(ku+kl+1) */
+		      for (lda_val = 0; lda_val < 3; lda_val++) {
+			switch (lda_val) {
+			case 0:
+			  lda = ku + kl + 1;
+			  break;
+			case 1:
+			  lda = ku + kl + 2;
+			  break;
+			case 2:
+			  lda = 2 * (ku + kl + 1);
+			  break;
+			}
+
+			if ((order_type == blas_rowmajor && lda < n) ||
+			    (order_type == blas_colmajor && lda < m))
+			  continue;
+
+			/* For the sake of speed, we throw out this case at random */
+			if (xrand(seed) >= test_prob)
+			  continue;
+
+			/* in the trivial cases, no need to run testgen */
+			if (m > 0 && n > 0)
+			  BLAS_dgbmv2_testgen(norm, order_type, trans_type, m,
+					      n, kl, ku, &alpha, alpha_flag,
+					      AB, lda, x_head_gen, x_tail_gen,
+					      &beta, beta_flag, y_gen, seed,
+					      head_r_true, tail_r_true);
+			count++;
+
+			/* varying incx */
+			for (incx_val = -2; incx_val <= 2; incx_val++) {
+			  if (incx_val == 0)
+			    continue;
+
+			  /* setting incx */
+			  incx = incx_val;
+
+
+			  dcopy_vector(x_head_gen, n_i, 1, x_head, incx_val);
+			  dcopy_vector(x_tail_gen, n_i, 1, x_tail, incx_val);
+
+			  /* varying incy */
+			  for (incy_val = -2; incy_val <= 2; incy_val++) {
+			    if (incy_val == 0)
+			      continue;
+
+			    /* setting incy */
+			    incy = incy_val;
+
+
+			    dcopy_vector(y_gen, m_i, 1, y, incy_val);
+
+			    /* call BLAS_dgbmv2_x */
+			    FPU_FIX_STOP;
+			    BLAS_dgbmv2_x(order_type, trans_type, m, n, kl,
+					  ku, alpha, AB, lda, x_head, x_tail,
+					  incx_val, beta, y, incy_val, prec);
+			    FPU_FIX_START;
+
+			    /* set y starting index */
+			    iy = 0;
+			    if (incy < 0)
+			      iy = -(m_i - 1) * incy;
+
+			    /* computing the ratio */
+			    for (j = 0; j < m_i; j++) {
+			      /* copy row j of AB to temp */
+			      dgbmv_copy(order_type, trans_type, m, n, kl, ku,
+					 AB, lda, temp, j);
+
+			      test_BLAS_ddot2(n_i, blas_no_conj, alpha, beta,
+					      y_gen[j * incy_gen],
+					      y[iy],
+					      head_r_true[j * incy_gen],
+					      tail_r_true[j * incy_gen], temp,
+					      1, x_head, x_tail, incx_val,
+					      eps_int, un_int, &ratios[j]);
+
+			      /* take the max ratio */
+			      if (j == 0) {
+				ratio = ratios[0];
+				/* The !<= below causes NaN error to be detected.
+				   Note that (NaN > thresh) is always false. */
+			      } else if (!(ratios[j] <= ratio)) {
+				ratio = ratios[j];
+			      }
+
+			      iy += incy;
+			    }
+
+			    /* Increase the number of bad ratio, if the ratio
+			       is bigger than the threshold.
+			       The !<= below causes NaN error to be detected.
+			       Note that (NaN > thresh) is always false. */
+			    if (!(ratio <= thresh)) {
+			      bad_ratios++;
+
+			      if ((debug == 3) &&	/* print only when debug is on */
+				  (count != old_count) &&	/* print if old vector is different 
+								   from the current one */
+				  (d_count == find_max_ratio) &&
+				  (p_count <= max_print) &&
+				  (ratio > 0.5 * ratio_max)) {
+				old_count = count;
+				printf
+				  ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
+				   fname, m, n, ntests, thresh);
+
+				/* Print test info */
+				switch (prec) {
+				case blas_prec_single:
+				  printf("single ");
+				  break;
+				case blas_prec_double:
+				  printf("double ");
+				  break;
+				case blas_prec_indigenous:
+				  printf("indigenous ");
+				  break;
+				case blas_prec_extra:
+				  printf("extra ");
+				  break;
+				}
+				switch (norm) {
+				case -1:
+				  printf("near_underflow ");
+				  break;
+				case 0:
+				  printf("near_one ");
+				  break;
+				case 1:
+				  printf("near_overflow ");
+				  break;
+				}
+				switch (order_type) {
+				case blas_rowmajor:
+				  printf("row_major ");
+				  break;
+				case blas_colmajor:
+				  printf("col_major ");
+				  break;
+				}
+				switch (trans_type) {
+				case blas_no_trans:
+				  printf("no_trans ");
+				  break;
+				case blas_trans:
+				  printf("trans ");
+				  break;
+				case blas_conj_trans:
+				  printf("conj_trans ");
+				  break;
+				}
+
+				printf
+				  ("ku=%d, kl=%d, lda=%d, incx=%d, incy=%d:\n",
+				   ku, kl, lda, incx, incy);
+
+				ix = 0;
+				iy = 0;
+				if (incx < 0)
+				  ix = -(n_i - 1) * incx;
+				if (incy < 0)
+				  iy = -(m_i - 1) * incy;
+
+				printf("      A=");
+				for (j = 0; j < m_i; j++) {
+				  /* copy row j of A to temp */
+				  dgbmv_copy(order_type, trans_type, m, n, kl,
+					     ku, AB, lda, temp, j);
+
+				  if (j > 0)
+				    printf("        ");
+				  dprint_vector(temp, n_i, 1, NULL);
+				}
+
+				for (j = 0, k = 0; j < n_i || k < m_i;
+				     j++, k++) {
+				  if (j < n_i) {
+				    printf("      ");
+				    printf("%24.16e", x_head[ix]);
+				    printf("\n");
+				  }
+				  if (j < n_i) {
+				    printf("      ");
+				    printf("%24.16e", x_tail[ix]);
+				    printf("\n");
+				  }
+				  if (k < m_i) {
+				    printf("      ");
+				    printf("%24.16e", y_gen[k * incy_gen]);
+				    printf("\n");
+				    printf("      ");
+				    printf("y_final[%d] = ", iy);
+				    printf("%24.16e", y[iy]);
+				    printf("\n");
+				  }
+				  ix += incx;
+				  iy += incy;
+				}
+
+				printf("      ");
+				printf("alpha = ");
+				printf("%24.16e", alpha);
+				printf("\n      ");
+				printf("beta = ");
+				printf("%24.16e", beta);
+				printf("\n");
+				for (j = 0; j < m_i; j++) {
+				  printf("      ");
+				  printf("[%24.16e, %24.16e]",
+					 head_r_true[j * incy_gen],
+					 tail_r_true[j * incy_gen]);
+				  printf(", ratio[%d]=%.4e\n", j, ratios[j]);
+				}
+
+				printf("      ratio=%.4e\n", ratio);
+				p_count++;
+			      }
+			      if (bad_ratios >= MAX_BAD_TESTS) {
+				printf("\ntoo many failures, exiting....");
+				printf("\nTesting and compilation");
+				printf(" are incomplete\n\n");
+				goto end;
+			      }
+			      if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
+				printf("\nFlagrant ratio error, exiting...");
+				printf("\nTesting and compilation");
+				printf(" are incomplete\n\n");
+				goto end;
+			      }
+			    }
+			    if (d_count == 0) {
+			      if (ratio > ratio_max)
+				ratio_max = ratio;
+
+			      if (ratio != 0.0 && ratio < ratio_min)
+				ratio_min = ratio;
+			      tot_tests++;
+			    }
+			  }	/* incy */
+			}	/* incx */
+		      }		/* lda */
+		    }		/* kl */
+		  }		/* ku */
+		}		/* trans */
+	      }			/* order */
+	    }			/* tests */
+	  }			/* norm */
+	}			/* prec */
+      }				/* beta */
+    }				/* alpha */
+  }				/* debug */
+
+  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
+    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n", fname,
+	   m, n, ntests, thresh);
+    printf
+      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
+       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
+       ratio_min, ratio_max);
+  }
+
+end:
+  blas_free(x_head);
+  blas_free(x_tail);
+  blas_free(y);
+  blas_free(x_head_gen);
+  blas_free(x_tail_gen);
+  blas_free(y_gen);
+  blas_free(temp);
+  blas_free(AB);
+  blas_free(head_r_true);
+  blas_free(tail_r_true);
+  blas_free(ratios);
+
+  FPU_FIX_STOP;
+
+  *min_ratio = ratio_min;
+  *num_bad_ratio = bad_ratios;
+  *num_tests = tot_tests;
+  return ratio_max;
+}
+double do_test_cgbmv2_x(int m, int n, int ntests, int *seed, double thresh,
+			int debug, float test_prob, double *min_ratio,
+			int *num_bad_ratio, int *num_tests)
+
+/*
+ * Purpose  
+ * =======
+ *
+ * Runs a series of tests on GBMV2.
+ *
+ * Arguments
+ * =========
+ *  
+ * m         (input) int
+ *           The number of rows
+ *
+ * n         (input) int
+ *           The number of columns
+ *
+ * ntests    (input) int
+ *           The number of tests to run for each set of attributes.
+ *
+ * seed      (input/output) int         
+ *           The seed for the random number generator used in testgen().
+ *
+ * thresh    (input) double
+ *           When the ratio returned from test() exceeds the specified
+ *           threshold, the current size, r_true, r_comp, and ratio will be
+ *           printed.  (Since ratio is supposed to be O(1), we can set thresh
+ *           to ~10.)
+ *
+ * debug     (input) int
+ *           If debug=3, print summary 
+ *           If debug=2, print summary only if the number of bad ratios > 0
+ *           If debug=1, print complete info if tests fail
+ *           If debug=0, return max ratio
+ *
+ * test_prob (input) float
+ *           The specified test will be performed only if the generated 
+ *           random exceeds this threshold.
+ *
+ * min_ratio (output) double
+ *           The minimum ratio
+ * 
+ * num_bad_ratio (output) int
+ *               The number of tests fail; they are above the threshold.
+ *
+ * num_tests (output) int
+ *           The number of tests is being performed.
+ *
+ * Return value
+ * ============
+ *
+ * The maximum ratio if run successfully, otherwise return -1 
+ *
+ * Code structure
+ * ==============
+ * 
+ *  debug loop  -- if debug is one, the first loop computes the max ratio
+ *              -- and the last(second) loop outputs debugging information,
+ *              -- if the test fail and its ratio > 0.5 * max ratio.
+ *              -- if debug is zero, the loop is executed once
+ *    alpha loop  -- varying alpha: 0, 1, or random
+ *      beta loop   -- varying beta: 0, 1, or random
+   *        prec loop   -- varying internal prec: single, double, or extra
+ *          norm loop   -- varying norm: near undeflow, near one, or 
+ *                        -- near overflow
+ *            numtest loop  -- how many times the test is perform with 
+ *                            -- above set of attributes
+ *              order loop   -- varying order type: rowmajor or colmajor
+ *                trans loop    -- varying trans type: no trans, trans, or conj trans
+ *                  ku loop       -- varying ku: 0 to n-1
+ *                    kl loop       -- varying kl: 0 to m-1
+ *                      lda loop      -- varying lda: ku+kl+1, ku+kl+2, 2*(ku+kl+1) 
+ *                        incx loop     -- varying incx: -2, -1, 1, 2
+ *                          incy loop     -- varying incy: -2, -1, 1, 2
+ */
+{
+  /* function name */
+  const char fname[] = "BLAS_cgbmv2_x";
+
+  /* max number of debug lines to print */
+  const int max_print = 8;
+
+  /* Variables in the "x_val" form are loop vars for corresponding
+     variables */
+  int i;			/* iterate through the repeating tests */
+  int j;
+  int k;			/* multipurpose counters or variables */
+  int ix, iy;			/* use to index x and y respectively */
+  int incx_val, incy_val,	/* for testing different inc values */
+    incx, incy;
+  int incx_gen, incy_gen;	/* for complex case inc=2, for real case inc=1 */
+  int d_count;			/* counter for debug */
+  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
+  int p_count;			/* counter for the number of debug lines printed */
+  int tot_tests;		/* total number of tests to be done */
+  int norm;			/* input values of near underflow/one/overflow */
+  double ratio_max;		/* the current maximum ratio */
+  double ratio_min;		/* the current minimum ratio */
+  double *ratios;		/* a temporary variable for calculating ratio */
+  double ratio;			/* the per-use test ratio from test() */
+  int bad_ratios = 0;		/* the number of ratios over the threshold */
+  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
+  double un_int;		/* the internal underflow threshold */
+  float alpha[2];
+  float beta[2];
+  float *AB;
+  float *x_head;
+  float *x_tail;
+  float *y;
+  float *temp;			/* use for calculating ratio */
+
+  /* x_gen and y_gen are used to store vectors generated by testgen.
+     they eventually are copied back to x and y */
+  float *x_head_gen;
+  float *x_tail_gen;
+  float *y_gen;
+
+  /* the true r calculated by testgen(), in double-double */
+  double *head_r_true, *tail_r_true;
+
+  int alpha_val;
+  int alpha_flag = 0;		/* input flag for BLAS_cgbmv2_testgen */
+  int beta_val;
+  int beta_flag = 0;		/* input flag for BLAS_cgbmv2_testgen */
+  int order_val;
+  enum blas_order_type order_type = 0;
+  int prec_val;
+  enum blas_prec_type prec = 0;
+  int trans_val;
+  enum blas_trans_type trans_type = 0;
+  int m_i = 0;
+  int n_i = 0;
+  int max_mn;			/* the max of m and n */
+  int ku;
+  int kl;
+  int lda_val;
+  int lda = 0;
+  int saved_seed;		/* for saving the original seed */
+
+  /* use for counting the number of testgen calls * 2 */
+  int count, old_count = -1;
+
+  FPU_FIX_DECL;
+
+  /* test for bad arguments */
+  if (n < 0 || m < 0 || ntests < 0)
+    BLAS_error(fname, 0, 0, NULL);
+
+  /* initialization */
+  *min_ratio = 0.0;
+  *num_bad_ratio = 0;
+  *num_tests = 0;
+
+  saved_seed = *seed;
+  ratio_min = 1e308;
+  ratio_max = 0.0;
+  ratio = 0.0;
+  tot_tests = 0;
+  p_count = 0;
+  count = 0;
+  find_max_ratio = 0;
+  if (debug == 3)
+    find_max_ratio = 1;
+  max_mn = MAX(m, n);
+
+  if (m == 0 || n == 0) {
+    return 0.0;
+  }
+
+  FPU_FIX_START;
+
+  incx_gen = incy_gen = 1;
+  incx_gen *= 2;
+  incy_gen *= 2;
+
+  /* get space for calculation */
+  x_head = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
+  if (max_mn * 2 > 0 && x_head == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
+  if (max_mn * 2 > 0 && x_tail == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
+  if (max_mn * 2 > 0 && y == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_head_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
+  if (max_mn > 0 && x_head_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
+  if (max_mn > 0 && x_tail_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
+  if (max_mn > 0 && y_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  temp = (float *) blas_malloc(max_mn * sizeof(float) * 2);
+  if (max_mn > 0 && temp == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  ratios = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && ratios == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  AB =
+    (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float) *
+			  2);
+  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && AB == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  /* The debug iteration:
+     If debug=1, then will execute the iteration twice. First, compute the
+     max ratio. Second, print info if ratio > (50% * ratio_max). */
+  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
+    bad_ratios = 0;		/* set to zero */
+
+    if ((debug == 3) && (d_count == find_max_ratio))
+      *seed = saved_seed;	/* restore the original seed */
+
+    /* varying alpha */
+    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
+      alpha_flag = 0;
+      switch (alpha_val) {
+      case 0:
+	alpha[0] = alpha[1] = 0.0;
+	alpha_flag = 1;
+	break;
+      case 1:
+	alpha[0] = 1.0;
+	alpha[1] = 0.0;
+	alpha_flag = 1;
+	break;
+      }
+
+      /* varying beta */
+      for (beta_val = 0; beta_val < 3; beta_val++) {
+	beta_flag = 0;
+	switch (beta_val) {
+	case 0:
+	  beta[0] = beta[1] = 0.0;
+	  beta_flag = 1;
+	  break;
+	case 1:
+	  beta[0] = 1.0;
+	  beta[1] = 0.0;
+	  beta_flag = 1;
+	  break;
+	}
+
+
+	/* varying extra precs */
+	for (prec_val = 0; prec_val <= 2; prec_val++) {
+	  switch (prec_val) {
+	  case 0:
+	    eps_int = power(2, -BITS_S);
+	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
+			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
+	    prec = blas_prec_single;
+	    break;
+	  case 1:
+	    eps_int = power(2, -BITS_D);
+	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
+			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
+	    prec = blas_prec_double;
+	    break;
+	  case 2:
+	  default:
+	    eps_int = power(2, -BITS_E);
+	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
+			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
+	    prec = blas_prec_extra;
+	    break;
+	  }
+
+	  /* values near underflow, 1, or overflow */
+	  for (norm = -1; norm <= 1; norm++) {
+
+	    /* number of tests */
+	    for (i = 0; i < ntests; i++) {
+
+	      /* row or col major */
+	      for (order_val = 0; order_val < 2; order_val++) {
+		switch (order_val) {
+		case 0:
+		  order_type = blas_rowmajor;
+		  break;
+		case 1:
+		  order_type = blas_colmajor;
+		  break;
+		}
+
+		/* no_trans, trans, or conj_trans */
+		for (trans_val = 0; trans_val < 3; trans_val++) {
+		  switch (trans_val) {
+		  case 0:
+		    trans_type = blas_no_trans;
+		    m_i = m;
+		    n_i = n;
+		    break;
+		  case 1:
+		    trans_type = blas_trans;
+		    m_i = n;
+		    n_i = m;
+		    break;
+		  case 2:
+		    trans_type = blas_conj_trans;
+		    m_i = n;
+		    n_i = m;
+		    break;
+		  }
+
+		  /* ku from 0 to n-1 */
+		  for (ku = 0; ku < n; ku++) {
+		    if (ku == n && ku != 0)
+		      continue;	/* the purpose of doing this is 
+				   to test for ku=0 */
+
+		    /* kl from 0 to m-1 */
+		    for (kl = 0; kl < m; kl++) {
+		      if (kl == n && kl != 0)
+			continue;	/* the purpose of doing this is 
+					   to test for kl=0 */
+
+		      /* lda=ku+kl+1, ku+kl+2, 2*(ku+kl+1) */
+		      for (lda_val = 0; lda_val < 3; lda_val++) {
+			switch (lda_val) {
+			case 0:
+			  lda = ku + kl + 1;
+			  break;
+			case 1:
+			  lda = ku + kl + 2;
+			  break;
+			case 2:
+			  lda = 2 * (ku + kl + 1);
+			  break;
+			}
+
+			if ((order_type == blas_rowmajor && lda < n) ||
+			    (order_type == blas_colmajor && lda < m))
+			  continue;
+
+			/* For the sake of speed, we throw out this case at random */
+			if (xrand(seed) >= test_prob)
+			  continue;
+
+			/* in the trivial cases, no need to run testgen */
+			if (m > 0 && n > 0)
+			  BLAS_cgbmv2_testgen(norm, order_type, trans_type, m,
+					      n, kl, ku, &alpha, alpha_flag,
+					      AB, lda, x_head_gen, x_tail_gen,
+					      &beta, beta_flag, y_gen, seed,
+					      head_r_true, tail_r_true);
+			count++;
+
+			/* varying incx */
+			for (incx_val = -2; incx_val <= 2; incx_val++) {
+			  if (incx_val == 0)
+			    continue;
+
+			  /* setting incx */
+			  incx = incx_val;
+			  incx *= 2;
+
+			  ccopy_vector(x_head_gen, n_i, 1, x_head, incx_val);
+			  ccopy_vector(x_tail_gen, n_i, 1, x_tail, incx_val);
+
+			  /* varying incy */
+			  for (incy_val = -2; incy_val <= 2; incy_val++) {
+			    if (incy_val == 0)
+			      continue;
+
+			    /* setting incy */
+			    incy = incy_val;
+			    incy *= 2;
+
+			    ccopy_vector(y_gen, m_i, 1, y, incy_val);
+
+			    /* call BLAS_cgbmv2_x */
+			    FPU_FIX_STOP;
+			    BLAS_cgbmv2_x(order_type, trans_type, m, n, kl,
+					  ku, alpha, AB, lda, x_head, x_tail,
+					  incx_val, beta, y, incy_val, prec);
+			    FPU_FIX_START;
+
+			    /* set y starting index */
+			    iy = 0;
+			    if (incy < 0)
+			      iy = -(m_i - 1) * incy;
+
+			    /* computing the ratio */
+			    for (j = 0; j < m_i; j++) {
+			      /* copy row j of AB to temp */
+			      cgbmv_copy(order_type, trans_type, m, n, kl, ku,
+					 AB, lda, temp, j);
+
+			      test_BLAS_cdot2(n_i, blas_no_conj, alpha, beta,
+					      &y_gen[j * incy_gen],
+					      &y[iy],
+					      &head_r_true[j * incy_gen],
+					      &tail_r_true[j * incy_gen],
+					      temp, 1, x_head, x_tail,
+					      incx_val, eps_int, un_int,
+					      &ratios[j]);
+
+			      /* take the max ratio */
+			      if (j == 0) {
+				ratio = ratios[0];
+				/* The !<= below causes NaN error to be detected.
+				   Note that (NaN > thresh) is always false. */
+			      } else if (!(ratios[j] <= ratio)) {
+				ratio = ratios[j];
+			      }
+
+			      iy += incy;
+			    }
+
+			    /* Increase the number of bad ratio, if the ratio
+			       is bigger than the threshold.
+			       The !<= below causes NaN error to be detected.
+			       Note that (NaN > thresh) is always false. */
+			    if (!(ratio <= thresh)) {
+			      bad_ratios++;
+
+			      if ((debug == 3) &&	/* print only when debug is on */
+				  (count != old_count) &&	/* print if old vector is different 
+								   from the current one */
+				  (d_count == find_max_ratio) &&
+				  (p_count <= max_print) &&
+				  (ratio > 0.5 * ratio_max)) {
+				old_count = count;
+				printf
+				  ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
+				   fname, m, n, ntests, thresh);
+
+				/* Print test info */
+				switch (prec) {
+				case blas_prec_single:
+				  printf("single ");
+				  break;
+				case blas_prec_double:
+				  printf("double ");
+				  break;
+				case blas_prec_indigenous:
+				  printf("indigenous ");
+				  break;
+				case blas_prec_extra:
+				  printf("extra ");
+				  break;
+				}
+				switch (norm) {
+				case -1:
+				  printf("near_underflow ");
+				  break;
+				case 0:
+				  printf("near_one ");
+				  break;
+				case 1:
+				  printf("near_overflow ");
+				  break;
+				}
+				switch (order_type) {
+				case blas_rowmajor:
+				  printf("row_major ");
+				  break;
+				case blas_colmajor:
+				  printf("col_major ");
+				  break;
+				}
+				switch (trans_type) {
+				case blas_no_trans:
+				  printf("no_trans ");
+				  break;
+				case blas_trans:
+				  printf("trans ");
+				  break;
+				case blas_conj_trans:
+				  printf("conj_trans ");
+				  break;
+				}
+
+				printf
+				  ("ku=%d, kl=%d, lda=%d, incx=%d, incy=%d:\n",
+				   ku, kl, lda, incx, incy);
+
+				ix = 0;
+				iy = 0;
+				if (incx < 0)
+				  ix = -(n_i - 1) * incx;
+				if (incy < 0)
+				  iy = -(m_i - 1) * incy;
+
+				printf("      A=");
+				for (j = 0; j < m_i; j++) {
+				  /* copy row j of A to temp */
+				  cgbmv_copy(order_type, trans_type, m, n, kl,
+					     ku, AB, lda, temp, j);
+
+				  if (j > 0)
+				    printf("        ");
+				  cprint_vector(temp, n_i, 1, NULL);
+				}
+
+				for (j = 0, k = 0; j < n_i || k < m_i;
+				     j++, k++) {
+				  if (j < n_i) {
+				    printf("      ");
+				    printf("(%16.8e, %16.8e)", x_head[ix],
+					   x_head[ix + 1]);
+				    printf("\n");
+				  }
+				  if (j < n_i) {
+				    printf("      ");
+				    printf("(%16.8e, %16.8e)", x_tail[ix],
+					   x_tail[ix + 1]);
+				    printf("\n");
+				  }
+				  if (k < m_i) {
+				    printf("      ");
+				    printf("(%16.8e, %16.8e)",
+					   y_gen[k * incy_gen],
+					   y_gen[k * incy_gen + 1]);
+				    printf("\n");
+				    printf("      ");
+				    printf("y_final[%d] = ", iy);
+				    printf("(%16.8e, %16.8e)", y[iy],
+					   y[iy + 1]);
+				    printf("\n");
+				  }
+				  ix += incx;
+				  iy += incy;
+				}
+
+				printf("      ");
+				printf("alpha = ");
+				printf("(%16.8e, %16.8e)", alpha[0],
+				       alpha[1]);
+				printf("\n      ");
+				printf("beta = ");
+				printf("(%16.8e, %16.8e)", beta[0], beta[1]);
+				printf("\n");
+				for (j = 0; j < m_i; j++) {
+				  printf("      ");
+				  printf
+				    ("([%24.16e  %24.16e], [%24.16e %24.16e])",
+				     head_r_true[j * incy_gen],
+				     tail_r_true[j * incy_gen],
+				     head_r_true[j * incy_gen + 1],
+				     tail_r_true[j * incy_gen + 1]);
+				  printf(", ratio[%d]=%.4e\n", j, ratios[j]);
+				}
+
+				printf("      ratio=%.4e\n", ratio);
+				p_count++;
+			      }
+			      if (bad_ratios >= MAX_BAD_TESTS) {
+				printf("\ntoo many failures, exiting....");
+				printf("\nTesting and compilation");
+				printf(" are incomplete\n\n");
+				goto end;
+			      }
+			      if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
+				printf("\nFlagrant ratio error, exiting...");
+				printf("\nTesting and compilation");
+				printf(" are incomplete\n\n");
+				goto end;
+			      }
+			    }
+			    if (d_count == 0) {
+			      if (ratio > ratio_max)
+				ratio_max = ratio;
+
+			      if (ratio != 0.0 && ratio < ratio_min)
+				ratio_min = ratio;
+			      tot_tests++;
+			    }
+			  }	/* incy */
+			}	/* incx */
+		      }		/* lda */
+		    }		/* kl */
+		  }		/* ku */
+		}		/* trans */
+	      }			/* order */
+	    }			/* tests */
+	  }			/* norm */
+	}			/* prec */
+      }				/* beta */
+    }				/* alpha */
+  }				/* debug */
+
+  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
+    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n", fname,
+	   m, n, ntests, thresh);
+    printf
+      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
+       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
+       ratio_min, ratio_max);
+  }
+
+end:
+  blas_free(x_head);
+  blas_free(x_tail);
+  blas_free(y);
+  blas_free(x_head_gen);
+  blas_free(x_tail_gen);
+  blas_free(y_gen);
+  blas_free(temp);
+  blas_free(AB);
+  blas_free(head_r_true);
+  blas_free(tail_r_true);
+  blas_free(ratios);
+
+  FPU_FIX_STOP;
+
+  *min_ratio = ratio_min;
+  *num_bad_ratio = bad_ratios;
+  *num_tests = tot_tests;
+  return ratio_max;
+}
+double do_test_zgbmv2_x(int m, int n, int ntests, int *seed, double thresh,
+			int debug, float test_prob, double *min_ratio,
+			int *num_bad_ratio, int *num_tests)
+
+/*
+ * Purpose  
+ * =======
+ *
+ * Runs a series of tests on GBMV2.
+ *
+ * Arguments
+ * =========
+ *  
+ * m         (input) int
+ *           The number of rows
+ *
+ * n         (input) int
+ *           The number of columns
+ *
+ * ntests    (input) int
+ *           The number of tests to run for each set of attributes.
+ *
+ * seed      (input/output) int         
+ *           The seed for the random number generator used in testgen().
+ *
+ * thresh    (input) double
+ *           When the ratio returned from test() exceeds the specified
+ *           threshold, the current size, r_true, r_comp, and ratio will be
+ *           printed.  (Since ratio is supposed to be O(1), we can set thresh
+ *           to ~10.)
+ *
+ * debug     (input) int
+ *           If debug=3, print summary 
+ *           If debug=2, print summary only if the number of bad ratios > 0
+ *           If debug=1, print complete info if tests fail
+ *           If debug=0, return max ratio
+ *
+ * test_prob (input) float
+ *           The specified test will be performed only if the generated 
+ *           random exceeds this threshold.
+ *
+ * min_ratio (output) double
+ *           The minimum ratio
+ * 
+ * num_bad_ratio (output) int
+ *               The number of tests fail; they are above the threshold.
+ *
+ * num_tests (output) int
+ *           The number of tests is being performed.
+ *
+ * Return value
+ * ============
+ *
+ * The maximum ratio if run successfully, otherwise return -1 
+ *
+ * Code structure
+ * ==============
+ * 
+ *  debug loop  -- if debug is one, the first loop computes the max ratio
+ *              -- and the last(second) loop outputs debugging information,
+ *              -- if the test fail and its ratio > 0.5 * max ratio.
+ *              -- if debug is zero, the loop is executed once
+ *    alpha loop  -- varying alpha: 0, 1, or random
+ *      beta loop   -- varying beta: 0, 1, or random
+   *        prec loop   -- varying internal prec: single, double, or extra
+ *          norm loop   -- varying norm: near undeflow, near one, or 
+ *                        -- near overflow
+ *            numtest loop  -- how many times the test is perform with 
+ *                            -- above set of attributes
+ *              order loop   -- varying order type: rowmajor or colmajor
+ *                trans loop    -- varying trans type: no trans, trans, or conj trans
+ *                  ku loop       -- varying ku: 0 to n-1
+ *                    kl loop       -- varying kl: 0 to m-1
+ *                      lda loop      -- varying lda: ku+kl+1, ku+kl+2, 2*(ku+kl+1) 
+ *                        incx loop     -- varying incx: -2, -1, 1, 2
+ *                          incy loop     -- varying incy: -2, -1, 1, 2
+ */
+{
+  /* function name */
+  const char fname[] = "BLAS_zgbmv2_x";
+
+  /* max number of debug lines to print */
+  const int max_print = 8;
+
+  /* Variables in the "x_val" form are loop vars for corresponding
+     variables */
+  int i;			/* iterate through the repeating tests */
+  int j;
+  int k;			/* multipurpose counters or variables */
+  int ix, iy;			/* use to index x and y respectively */
+  int incx_val, incy_val,	/* for testing different inc values */
+    incx, incy;
+  int incx_gen, incy_gen;	/* for complex case inc=2, for real case inc=1 */
+  int d_count;			/* counter for debug */
+  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
+  int p_count;			/* counter for the number of debug lines printed */
+  int tot_tests;		/* total number of tests to be done */
+  int norm;			/* input values of near underflow/one/overflow */
+  double ratio_max;		/* the current maximum ratio */
+  double ratio_min;		/* the current minimum ratio */
+  double *ratios;		/* a temporary variable for calculating ratio */
+  double ratio;			/* the per-use test ratio from test() */
+  int bad_ratios = 0;		/* the number of ratios over the threshold */
+  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
+  double un_int;		/* the internal underflow threshold */
+  double alpha[2];
+  double beta[2];
+  double *AB;
+  double *x_head;
+  double *x_tail;
+  double *y;
+  double *temp;			/* use for calculating ratio */
+
+  /* x_gen and y_gen are used to store vectors generated by testgen.
+     they eventually are copied back to x and y */
+  double *x_head_gen;
+  double *x_tail_gen;
+  double *y_gen;
+
+  /* the true r calculated by testgen(), in double-double */
+  double *head_r_true, *tail_r_true;
+
+  int alpha_val;
+  int alpha_flag = 0;		/* input flag for BLAS_zgbmv2_testgen */
+  int beta_val;
+  int beta_flag = 0;		/* input flag for BLAS_zgbmv2_testgen */
+  int order_val;
+  enum blas_order_type order_type = 0;
+  int prec_val;
+  enum blas_prec_type prec = 0;
+  int trans_val;
+  enum blas_trans_type trans_type = 0;
+  int m_i = 0;
+  int n_i = 0;
+  int max_mn;			/* the max of m and n */
+  int ku;
+  int kl;
+  int lda_val;
+  int lda = 0;
+  int saved_seed;		/* for saving the original seed */
+
+  /* use for counting the number of testgen calls * 2 */
+  int count, old_count = -1;
+
+  FPU_FIX_DECL;
+
+  /* test for bad arguments */
+  if (n < 0 || m < 0 || ntests < 0)
+    BLAS_error(fname, 0, 0, NULL);
+
+  /* initialization */
+  *min_ratio = 0.0;
+  *num_bad_ratio = 0;
+  *num_tests = 0;
+
+  saved_seed = *seed;
+  ratio_min = 1e308;
+  ratio_max = 0.0;
+  ratio = 0.0;
+  tot_tests = 0;
+  p_count = 0;
+  count = 0;
+  find_max_ratio = 0;
+  if (debug == 3)
+    find_max_ratio = 1;
+  max_mn = MAX(m, n);
+
+  if (m == 0 || n == 0) {
+    return 0.0;
+  }
+
+  FPU_FIX_START;
+
+  incx_gen = incy_gen = 1;
+  incx_gen *= 2;
+  incy_gen *= 2;
+
+  /* get space for calculation */
+  x_head = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
+  if (max_mn * 2 > 0 && x_head == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
+  if (max_mn * 2 > 0 && x_tail == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
+  if (max_mn * 2 > 0 && y == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_head_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && x_head_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && x_tail_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && y_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  temp = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && temp == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  ratios = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && ratios == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  AB =
+    (double *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(double) *
+			   2);
+  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && AB == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  /* The debug iteration:
+     If debug=1, then will execute the iteration twice. First, compute the
+     max ratio. Second, print info if ratio > (50% * ratio_max). */
+  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
+    bad_ratios = 0;		/* set to zero */
+
+    if ((debug == 3) && (d_count == find_max_ratio))
+      *seed = saved_seed;	/* restore the original seed */
+
+    /* varying alpha */
+    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
+      alpha_flag = 0;
+      switch (alpha_val) {
+      case 0:
+	alpha[0] = alpha[1] = 0.0;
+	alpha_flag = 1;
+	break;
+      case 1:
+	alpha[0] = 1.0;
+	alpha[1] = 0.0;
+	alpha_flag = 1;
+	break;
+      }
+
+      /* varying beta */
+      for (beta_val = 0; beta_val < 3; beta_val++) {
+	beta_flag = 0;
+	switch (beta_val) {
+	case 0:
+	  beta[0] = beta[1] = 0.0;
+	  beta_flag = 1;
+	  break;
+	case 1:
+	  beta[0] = 1.0;
+	  beta[1] = 0.0;
+	  beta_flag = 1;
+	  break;
+	}
+
+
+	/* varying extra precs */
+	for (prec_val = 0; prec_val <= 2; prec_val++) {
+	  switch (prec_val) {
+	  case 0:
+	    eps_int = power(2, -BITS_D);
+	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
+			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
+	    prec = blas_prec_double;
+	    break;
+	  case 1:
+	    eps_int = power(2, -BITS_D);
+	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
+			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
+	    prec = blas_prec_double;
+	    break;
+	  case 2:
+	  default:
+	    eps_int = power(2, -BITS_E);
+	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
+			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
+	    prec = blas_prec_extra;
+	    break;
+	  }
+
+	  /* values near underflow, 1, or overflow */
+	  for (norm = -1; norm <= 1; norm++) {
+
+	    /* number of tests */
+	    for (i = 0; i < ntests; i++) {
+
+	      /* row or col major */
+	      for (order_val = 0; order_val < 2; order_val++) {
+		switch (order_val) {
+		case 0:
+		  order_type = blas_rowmajor;
+		  break;
+		case 1:
+		  order_type = blas_colmajor;
+		  break;
+		}
+
+		/* no_trans, trans, or conj_trans */
+		for (trans_val = 0; trans_val < 3; trans_val++) {
+		  switch (trans_val) {
+		  case 0:
+		    trans_type = blas_no_trans;
+		    m_i = m;
+		    n_i = n;
+		    break;
+		  case 1:
+		    trans_type = blas_trans;
+		    m_i = n;
+		    n_i = m;
+		    break;
+		  case 2:
+		    trans_type = blas_conj_trans;
+		    m_i = n;
+		    n_i = m;
+		    break;
+		  }
+
+		  /* ku from 0 to n-1 */
+		  for (ku = 0; ku < n; ku++) {
+		    if (ku == n && ku != 0)
+		      continue;	/* the purpose of doing this is 
+				   to test for ku=0 */
+
+		    /* kl from 0 to m-1 */
+		    for (kl = 0; kl < m; kl++) {
+		      if (kl == n && kl != 0)
+			continue;	/* the purpose of doing this is 
+					   to test for kl=0 */
+
+		      /* lda=ku+kl+1, ku+kl+2, 2*(ku+kl+1) */
+		      for (lda_val = 0; lda_val < 3; lda_val++) {
+			switch (lda_val) {
+			case 0:
+			  lda = ku + kl + 1;
+			  break;
+			case 1:
+			  lda = ku + kl + 2;
+			  break;
+			case 2:
+			  lda = 2 * (ku + kl + 1);
+			  break;
+			}
+
+			if ((order_type == blas_rowmajor && lda < n) ||
+			    (order_type == blas_colmajor && lda < m))
+			  continue;
+
+			/* For the sake of speed, we throw out this case at random */
+			if (xrand(seed) >= test_prob)
+			  continue;
+
+			/* in the trivial cases, no need to run testgen */
+			if (m > 0 && n > 0)
+			  BLAS_zgbmv2_testgen(norm, order_type, trans_type, m,
+					      n, kl, ku, &alpha, alpha_flag,
+					      AB, lda, x_head_gen, x_tail_gen,
+					      &beta, beta_flag, y_gen, seed,
+					      head_r_true, tail_r_true);
+			count++;
+
+			/* varying incx */
+			for (incx_val = -2; incx_val <= 2; incx_val++) {
+			  if (incx_val == 0)
+			    continue;
+
+			  /* setting incx */
+			  incx = incx_val;
+			  incx *= 2;
+
+			  zcopy_vector(x_head_gen, n_i, 1, x_head, incx_val);
+			  zcopy_vector(x_tail_gen, n_i, 1, x_tail, incx_val);
+
+			  /* varying incy */
+			  for (incy_val = -2; incy_val <= 2; incy_val++) {
+			    if (incy_val == 0)
+			      continue;
+
+			    /* setting incy */
+			    incy = incy_val;
+			    incy *= 2;
+
+			    zcopy_vector(y_gen, m_i, 1, y, incy_val);
+
+			    /* call BLAS_zgbmv2_x */
+			    FPU_FIX_STOP;
+			    BLAS_zgbmv2_x(order_type, trans_type, m, n, kl,
+					  ku, alpha, AB, lda, x_head, x_tail,
+					  incx_val, beta, y, incy_val, prec);
+			    FPU_FIX_START;
+
+			    /* set y starting index */
+			    iy = 0;
+			    if (incy < 0)
+			      iy = -(m_i - 1) * incy;
+
+			    /* computing the ratio */
+			    for (j = 0; j < m_i; j++) {
+			      /* copy row j of AB to temp */
+			      zgbmv_copy(order_type, trans_type, m, n, kl, ku,
+					 AB, lda, temp, j);
+
+			      test_BLAS_zdot2(n_i, blas_no_conj, alpha, beta,
+					      &y_gen[j * incy_gen],
+					      &y[iy],
+					      &head_r_true[j * incy_gen],
+					      &tail_r_true[j * incy_gen],
+					      temp, 1, x_head, x_tail,
+					      incx_val, eps_int, un_int,
+					      &ratios[j]);
+
+			      /* take the max ratio */
+			      if (j == 0) {
+				ratio = ratios[0];
+				/* The !<= below causes NaN error to be detected.
+				   Note that (NaN > thresh) is always false. */
+			      } else if (!(ratios[j] <= ratio)) {
+				ratio = ratios[j];
+			      }
+
+			      iy += incy;
+			    }
+
+			    /* Increase the number of bad ratio, if the ratio
+			       is bigger than the threshold.
+			       The !<= below causes NaN error to be detected.
+			       Note that (NaN > thresh) is always false. */
+			    if (!(ratio <= thresh)) {
+			      bad_ratios++;
+
+			      if ((debug == 3) &&	/* print only when debug is on */
+				  (count != old_count) &&	/* print if old vector is different 
+								   from the current one */
+				  (d_count == find_max_ratio) &&
+				  (p_count <= max_print) &&
+				  (ratio > 0.5 * ratio_max)) {
+				old_count = count;
+				printf
+				  ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
+				   fname, m, n, ntests, thresh);
+
+				/* Print test info */
+				switch (prec) {
+				case blas_prec_single:
+				  printf("single ");
+				  break;
+				case blas_prec_double:
+				  printf("double ");
+				  break;
+				case blas_prec_indigenous:
+				  printf("indigenous ");
+				  break;
+				case blas_prec_extra:
+				  printf("extra ");
+				  break;
+				}
+				switch (norm) {
+				case -1:
+				  printf("near_underflow ");
+				  break;
+				case 0:
+				  printf("near_one ");
+				  break;
+				case 1:
+				  printf("near_overflow ");
+				  break;
+				}
+				switch (order_type) {
+				case blas_rowmajor:
+				  printf("row_major ");
+				  break;
+				case blas_colmajor:
+				  printf("col_major ");
+				  break;
+				}
+				switch (trans_type) {
+				case blas_no_trans:
+				  printf("no_trans ");
+				  break;
+				case blas_trans:
+				  printf("trans ");
+				  break;
+				case blas_conj_trans:
+				  printf("conj_trans ");
+				  break;
+				}
+
+				printf
+				  ("ku=%d, kl=%d, lda=%d, incx=%d, incy=%d:\n",
+				   ku, kl, lda, incx, incy);
+
+				ix = 0;
+				iy = 0;
+				if (incx < 0)
+				  ix = -(n_i - 1) * incx;
+				if (incy < 0)
+				  iy = -(m_i - 1) * incy;
+
+				printf("      A=");
+				for (j = 0; j < m_i; j++) {
+				  /* copy row j of A to temp */
+				  zgbmv_copy(order_type, trans_type, m, n, kl,
+					     ku, AB, lda, temp, j);
+
+				  if (j > 0)
+				    printf("        ");
+				  zprint_vector(temp, n_i, 1, NULL);
+				}
+
+				for (j = 0, k = 0; j < n_i || k < m_i;
+				     j++, k++) {
+				  if (j < n_i) {
+				    printf("      ");
+				    printf("(%24.16e, %24.16e)", x_head[ix],
+					   x_head[ix + 1]);
+				    printf("\n");
+				  }
+				  if (j < n_i) {
+				    printf("      ");
+				    printf("(%24.16e, %24.16e)", x_tail[ix],
+					   x_tail[ix + 1]);
+				    printf("\n");
+				  }
+				  if (k < m_i) {
+				    printf("      ");
+				    printf("(%24.16e, %24.16e)",
+					   y_gen[k * incy_gen],
+					   y_gen[k * incy_gen + 1]);
+				    printf("\n");
+				    printf("      ");
+				    printf("y_final[%d] = ", iy);
+				    printf("(%24.16e, %24.16e)", y[iy],
+					   y[iy + 1]);
+				    printf("\n");
+				  }
+				  ix += incx;
+				  iy += incy;
+				}
+
+				printf("      ");
+				printf("alpha = ");
+				printf("(%24.16e, %24.16e)", alpha[0],
+				       alpha[1]);
+				printf("\n      ");
+				printf("beta = ");
+				printf("(%24.16e, %24.16e)", beta[0],
+				       beta[1]);
+				printf("\n");
+				for (j = 0; j < m_i; j++) {
+				  printf("      ");
+				  printf
+				    ("([%24.16e  %24.16e], [%24.16e %24.16e])",
+				     head_r_true[j * incy_gen],
+				     tail_r_true[j * incy_gen],
+				     head_r_true[j * incy_gen + 1],
+				     tail_r_true[j * incy_gen + 1]);
+				  printf(", ratio[%d]=%.4e\n", j, ratios[j]);
+				}
+
+				printf("      ratio=%.4e\n", ratio);
+				p_count++;
+			      }
+			      if (bad_ratios >= MAX_BAD_TESTS) {
+				printf("\ntoo many failures, exiting....");
+				printf("\nTesting and compilation");
+				printf(" are incomplete\n\n");
+				goto end;
+			      }
+			      if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
+				printf("\nFlagrant ratio error, exiting...");
+				printf("\nTesting and compilation");
+				printf(" are incomplete\n\n");
+				goto end;
+			      }
+			    }
+			    if (d_count == 0) {
+			      if (ratio > ratio_max)
+				ratio_max = ratio;
+
+			      if (ratio != 0.0 && ratio < ratio_min)
+				ratio_min = ratio;
+			      tot_tests++;
+			    }
+			  }	/* incy */
+			}	/* incx */
+		      }		/* lda */
+		    }		/* kl */
+		  }		/* ku */
+		}		/* trans */
+	      }			/* order */
+	    }			/* tests */
+	  }			/* norm */
+	}			/* prec */
+      }				/* beta */
+    }				/* alpha */
+  }				/* debug */
+
+  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
+    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n", fname,
+	   m, n, ntests, thresh);
+    printf
+      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
+       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
+       ratio_min, ratio_max);
+  }
+
+end:
+  blas_free(x_head);
+  blas_free(x_tail);
+  blas_free(y);
+  blas_free(x_head_gen);
+  blas_free(x_tail_gen);
+  blas_free(y_gen);
+  blas_free(temp);
+  blas_free(AB);
+  blas_free(head_r_true);
+  blas_free(tail_r_true);
+  blas_free(ratios);
+
+  FPU_FIX_STOP;
+
+  *min_ratio = ratio_min;
+  *num_bad_ratio = bad_ratios;
+  *num_tests = tot_tests;
+  return ratio_max;
+}
+double do_test_dgbmv2_d_s_x(int m, int n, int ntests, int *seed,
+			    double thresh, int debug, float test_prob,
+			    double *min_ratio, int *num_bad_ratio,
+			    int *num_tests)
+
+/*
+ * Purpose  
+ * =======
+ *
+ * Runs a series of tests on GBMV2.
+ *
+ * Arguments
+ * =========
+ *  
+ * m         (input) int
+ *           The number of rows
+ *
+ * n         (input) int
+ *           The number of columns
+ *
+ * ntests    (input) int
+ *           The number of tests to run for each set of attributes.
+ *
+ * seed      (input/output) int         
+ *           The seed for the random number generator used in testgen().
+ *
+ * thresh    (input) double
+ *           When the ratio returned from test() exceeds the specified
+ *           threshold, the current size, r_true, r_comp, and ratio will be
+ *           printed.  (Since ratio is supposed to be O(1), we can set thresh
+ *           to ~10.)
+ *
+ * debug     (input) int
+ *           If debug=3, print summary 
+ *           If debug=2, print summary only if the number of bad ratios > 0
+ *           If debug=1, print complete info if tests fail
+ *           If debug=0, return max ratio
+ *
+ * test_prob (input) float
+ *           The specified test will be performed only if the generated 
+ *           random exceeds this threshold.
+ *
+ * min_ratio (output) double
+ *           The minimum ratio
+ * 
+ * num_bad_ratio (output) int
+ *               The number of tests fail; they are above the threshold.
+ *
+ * num_tests (output) int
+ *           The number of tests is being performed.
+ *
+ * Return value
+ * ============
+ *
+ * The maximum ratio if run successfully, otherwise return -1 
+ *
+ * Code structure
+ * ==============
+ * 
+ *  debug loop  -- if debug is one, the first loop computes the max ratio
+ *              -- and the last(second) loop outputs debugging information,
+ *              -- if the test fail and its ratio > 0.5 * max ratio.
+ *              -- if debug is zero, the loop is executed once
+ *    alpha loop  -- varying alpha: 0, 1, or random
+ *      beta loop   -- varying beta: 0, 1, or random
+   *        prec loop   -- varying internal prec: single, double, or extra
+ *          norm loop   -- varying norm: near undeflow, near one, or 
+ *                        -- near overflow
+ *            numtest loop  -- how many times the test is perform with 
+ *                            -- above set of attributes
+ *              order loop   -- varying order type: rowmajor or colmajor
+ *                trans loop    -- varying trans type: no trans, trans, or conj trans
+ *                  ku loop       -- varying ku: 0 to n-1
+ *                    kl loop       -- varying kl: 0 to m-1
+ *                      lda loop      -- varying lda: ku+kl+1, ku+kl+2, 2*(ku+kl+1) 
+ *                        incx loop     -- varying incx: -2, -1, 1, 2
+ *                          incy loop     -- varying incy: -2, -1, 1, 2
+ */
+{
+  /* function name */
+  const char fname[] = "BLAS_dgbmv2_d_s_x";
+
+  /* max number of debug lines to print */
+  const int max_print = 8;
+
+  /* Variables in the "x_val" form are loop vars for corresponding
+     variables */
+  int i;			/* iterate through the repeating tests */
+  int j;
+  int k;			/* multipurpose counters or variables */
+  int ix, iy;			/* use to index x and y respectively */
+  int incx_val, incy_val,	/* for testing different inc values */
+    incx, incy;
+  int incx_gen, incy_gen;	/* for complex case inc=2, for real case inc=1 */
+  int d_count;			/* counter for debug */
+  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
+  int p_count;			/* counter for the number of debug lines printed */
+  int tot_tests;		/* total number of tests to be done */
+  int norm;			/* input values of near underflow/one/overflow */
+  double ratio_max;		/* the current maximum ratio */
+  double ratio_min;		/* the current minimum ratio */
+  double *ratios;		/* a temporary variable for calculating ratio */
+  double ratio;			/* the per-use test ratio from test() */
+  int bad_ratios = 0;		/* the number of ratios over the threshold */
+  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
+  double un_int;		/* the internal underflow threshold */
+  double alpha;
+  double beta;
+  double *AB;
+  float *x_head;
+  float *x_tail;
+  double *y;
+  double *temp;			/* use for calculating ratio */
+
+  /* x_gen and y_gen are used to store vectors generated by testgen.
+     they eventually are copied back to x and y */
+  float *x_head_gen;
+  float *x_tail_gen;
+  double *y_gen;
+
+  /* the true r calculated by testgen(), in double-double */
+  double *head_r_true, *tail_r_true;
+  int alpha_val;
+  int alpha_flag = 0;		/* input flag for BLAS_dgbmv2_d_s_testgen */
+  int beta_val;
+  int beta_flag = 0;		/* input flag for BLAS_dgbmv2_d_s_testgen */
+  int order_val;
+  enum blas_order_type order_type = 0;
+  int prec_val;
+  enum blas_prec_type prec = 0;
+  int trans_val;
+  enum blas_trans_type trans_type = 0;
+  int m_i = 0;
+  int n_i = 0;
+  int max_mn;			/* the max of m and n */
+  int ku;
+  int kl;
+  int lda_val;
+  int lda = 0;
+  int saved_seed;		/* for saving the original seed */
+
+  /* use for counting the number of testgen calls * 2 */
+  int count, old_count = -1;
+
+  FPU_FIX_DECL;
+
+  /* test for bad arguments */
+  if (n < 0 || m < 0 || ntests < 0)
+    BLAS_error(fname, 0, 0, NULL);
+
+  /* initialization */
+  *min_ratio = 0.0;
+  *num_bad_ratio = 0;
+  *num_tests = 0;
+
+  saved_seed = *seed;
+  ratio_min = 1e308;
+  ratio_max = 0.0;
+  ratio = 0.0;
+  tot_tests = 0;
+  p_count = 0;
+  count = 0;
+  find_max_ratio = 0;
+  if (debug == 3)
+    find_max_ratio = 1;
+  max_mn = MAX(m, n);
+
+  if (m == 0 || n == 0) {
+    return 0.0;
+  }
+
+  FPU_FIX_START;
+
+  incx_gen = incy_gen = 1;
+
+
+
+  /* get space for calculation */
+  x_head = (float *) blas_malloc(max_mn * 2 * sizeof(float));
+  if (max_mn * 2 > 0 && x_head == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail = (float *) blas_malloc(max_mn * 2 * sizeof(float));
+  if (max_mn * 2 > 0 && x_tail == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y = (double *) blas_malloc(max_mn * 2 * sizeof(double));
+  if (max_mn * 2 > 0 && y == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_head_gen = (float *) blas_malloc(max_mn * sizeof(float));
+  if (max_mn > 0 && x_head_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail_gen = (float *) blas_malloc(max_mn * sizeof(float));
+  if (max_mn > 0 && x_tail_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y_gen = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && y_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  temp = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && temp == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  head_r_true = (double *) blas_malloc(max_mn * sizeof(double));
+  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  ratios = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && ratios == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  AB =
+    (double *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(double));
+  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && AB == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  /* The debug iteration:
+     If debug=1, then will execute the iteration twice. First, compute the
+     max ratio. Second, print info if ratio > (50% * ratio_max). */
+  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
+    bad_ratios = 0;		/* set to zero */
+
+    if ((debug == 3) && (d_count == find_max_ratio))
+      *seed = saved_seed;	/* restore the original seed */
+
+    /* varying alpha */
+    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
+      alpha_flag = 0;
+      switch (alpha_val) {
+      case 0:
+	alpha = 0.0;
+	alpha_flag = 1;
+	break;
+      case 1:
+	alpha = 1.0;
+	alpha_flag = 1;
+	break;
+      }
+
+      /* varying beta */
+      for (beta_val = 0; beta_val < 3; beta_val++) {
+	beta_flag = 0;
+	switch (beta_val) {
+	case 0:
+	  beta = 0.0;
+	  beta_flag = 1;
+	  break;
+	case 1:
+	  beta = 1.0;
+	  beta_flag = 1;
+	  break;
+	}
+
+
+	/* varying extra precs */
+	for (prec_val = 0; prec_val <= 2; prec_val++) {
+	  switch (prec_val) {
+	  case 0:
+	    eps_int = power(2, -BITS_D);
+	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
+			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
+	    prec = blas_prec_double;
+	    break;
+	  case 1:
+	    eps_int = power(2, -BITS_D);
+	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
+			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
+	    prec = blas_prec_double;
+	    break;
+	  case 2:
+	  default:
+	    eps_int = power(2, -BITS_E);
+	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
+			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
+	    prec = blas_prec_extra;
+	    break;
+	  }
+
+	  /* values near underflow, 1, or overflow */
+	  for (norm = -1; norm <= 1; norm++) {
+
+	    /* number of tests */
+	    for (i = 0; i < ntests; i++) {
+
+	      /* row or col major */
+	      for (order_val = 0; order_val < 2; order_val++) {
+		switch (order_val) {
+		case 0:
+		  order_type = blas_rowmajor;
+		  break;
+		case 1:
+		  order_type = blas_colmajor;
+		  break;
+		}
+
+		/* no_trans, trans, or conj_trans */
+		for (trans_val = 0; trans_val < 3; trans_val++) {
+		  switch (trans_val) {
+		  case 0:
+		    trans_type = blas_no_trans;
+		    m_i = m;
+		    n_i = n;
+		    break;
+		  case 1:
+		    trans_type = blas_trans;
+		    m_i = n;
+		    n_i = m;
+		    break;
+		  case 2:
+		    trans_type = blas_conj_trans;
+		    m_i = n;
+		    n_i = m;
+		    break;
+		  }
+
+		  /* ku from 0 to n-1 */
+		  for (ku = 0; ku < n; ku++) {
+		    if (ku == n && ku != 0)
+		      continue;	/* the purpose of doing this is 
+				   to test for ku=0 */
+
+		    /* kl from 0 to m-1 */
+		    for (kl = 0; kl < m; kl++) {
+		      if (kl == n && kl != 0)
+			continue;	/* the purpose of doing this is 
+					   to test for kl=0 */
+
+		      /* lda=ku+kl+1, ku+kl+2, 2*(ku+kl+1) */
+		      for (lda_val = 0; lda_val < 3; lda_val++) {
+			switch (lda_val) {
+			case 0:
+			  lda = ku + kl + 1;
+			  break;
+			case 1:
+			  lda = ku + kl + 2;
+			  break;
+			case 2:
+			  lda = 2 * (ku + kl + 1);
+			  break;
+			}
+
+			if ((order_type == blas_rowmajor && lda < n) ||
+			    (order_type == blas_colmajor && lda < m))
+			  continue;
+
+			/* For the sake of speed, we throw out this case at random */
+			if (xrand(seed) >= test_prob)
+			  continue;
+
+			/* in the trivial cases, no need to run testgen */
+			if (m > 0 && n > 0)
+			  BLAS_dgbmv2_d_s_testgen(norm, order_type,
+						  trans_type, m, n, kl, ku,
+						  &alpha, alpha_flag, AB, lda,
+						  x_head_gen, x_tail_gen,
+						  &beta, beta_flag, y_gen,
+						  seed, head_r_true,
+						  tail_r_true);
+			count++;
+
+			/* varying incx */
+			for (incx_val = -2; incx_val <= 2; incx_val++) {
+			  if (incx_val == 0)
+			    continue;
+
+			  /* setting incx */
+			  incx = incx_val;
+
+
+			  scopy_vector(x_head_gen, n_i, 1, x_head, incx_val);
+			  scopy_vector(x_tail_gen, n_i, 1, x_tail, incx_val);
+
+			  /* varying incy */
+			  for (incy_val = -2; incy_val <= 2; incy_val++) {
+			    if (incy_val == 0)
+			      continue;
+
+			    /* setting incy */
+			    incy = incy_val;
+
+
+			    dcopy_vector(y_gen, m_i, 1, y, incy_val);
+
+			    /* call BLAS_dgbmv2_d_s_x */
+			    FPU_FIX_STOP;
+			    BLAS_dgbmv2_d_s_x(order_type, trans_type, m, n,
+					      kl, ku, alpha, AB, lda, x_head,
+					      x_tail, incx_val, beta, y,
+					      incy_val, prec);
+			    FPU_FIX_START;
+
+			    /* set y starting index */
+			    iy = 0;
+			    if (incy < 0)
+			      iy = -(m_i - 1) * incy;
+
+			    /* computing the ratio */
+			    for (j = 0; j < m_i; j++) {
+			      /* copy row j of AB to temp */
+			      dgbmv_copy(order_type, trans_type, m, n, kl, ku,
+					 AB, lda, temp, j);
+
+			      test_BLAS_ddot2_d_s(n_i, blas_no_conj, alpha,
+						  beta, y_gen[j * incy_gen],
+						  y[iy],
+						  head_r_true[j * incy_gen],
+						  tail_r_true[j * incy_gen],
+						  temp, 1, x_head, x_tail,
+						  incx_val, eps_int, un_int,
+						  &ratios[j]);
+
+			      /* take the max ratio */
+			      if (j == 0) {
+				ratio = ratios[0];
+				/* The !<= below causes NaN error to be detected.
+				   Note that (NaN > thresh) is always false. */
+			      } else if (!(ratios[j] <= ratio)) {
+				ratio = ratios[j];
+			      }
+
+			      iy += incy;
+			    }
+
+			    /* Increase the number of bad ratio, if the ratio
+			       is bigger than the threshold.
+			       The !<= below causes NaN error to be detected.
+			       Note that (NaN > thresh) is always false. */
+			    if (!(ratio <= thresh)) {
+			      bad_ratios++;
+
+			      if ((debug == 3) &&	/* print only when debug is on */
+				  (count != old_count) &&	/* print if old vector is different 
+								   from the current one */
+				  (d_count == find_max_ratio) &&
+				  (p_count <= max_print) &&
+				  (ratio > 0.5 * ratio_max)) {
+				old_count = count;
+				printf
+				  ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
+				   fname, m, n, ntests, thresh);
+
+				/* Print test info */
+				switch (prec) {
+				case blas_prec_single:
+				  printf("single ");
+				  break;
+				case blas_prec_double:
+				  printf("double ");
+				  break;
+				case blas_prec_indigenous:
+				  printf("indigenous ");
+				  break;
+				case blas_prec_extra:
+				  printf("extra ");
+				  break;
+				}
+				switch (norm) {
+				case -1:
+				  printf("near_underflow ");
+				  break;
+				case 0:
+				  printf("near_one ");
+				  break;
+				case 1:
+				  printf("near_overflow ");
+				  break;
+				}
+				switch (order_type) {
+				case blas_rowmajor:
+				  printf("row_major ");
+				  break;
+				case blas_colmajor:
+				  printf("col_major ");
+				  break;
+				}
+				switch (trans_type) {
+				case blas_no_trans:
+				  printf("no_trans ");
+				  break;
+				case blas_trans:
+				  printf("trans ");
+				  break;
+				case blas_conj_trans:
+				  printf("conj_trans ");
+				  break;
+				}
+
+				printf
+				  ("ku=%d, kl=%d, lda=%d, incx=%d, incy=%d:\n",
+				   ku, kl, lda, incx, incy);
+
+				ix = 0;
+				iy = 0;
+				if (incx < 0)
+				  ix = -(n_i - 1) * incx;
+				if (incy < 0)
+				  iy = -(m_i - 1) * incy;
+
+				printf("      A=");
+				for (j = 0; j < m_i; j++) {
+				  /* copy row j of A to temp */
+				  dgbmv_copy(order_type, trans_type, m, n, kl,
+					     ku, AB, lda, temp, j);
+
+				  if (j > 0)
+				    printf("        ");
+				  dprint_vector(temp, n_i, 1, NULL);
+				}
+
+				for (j = 0, k = 0; j < n_i || k < m_i;
+				     j++, k++) {
+				  if (j < n_i) {
+				    printf("      ");
+				    printf("%16.8e", x_head[ix]);
+				    printf("\n");
+				  }
+				  if (j < n_i) {
+				    printf("      ");
+				    printf("%16.8e", x_tail[ix]);
+				    printf("\n");
+				  }
+				  if (k < m_i) {
+				    printf("      ");
+				    printf("%24.16e", y_gen[k * incy_gen]);
+				    printf("\n");
+				    printf("      ");
+				    printf("y_final[%d] = ", iy);
+				    printf("%24.16e", y[iy]);
+				    printf("\n");
+				  }
+				  ix += incx;
+				  iy += incy;
+				}
+
+				printf("      ");
+				printf("alpha = ");
+				printf("%24.16e", alpha);
+				printf("\n      ");
+				printf("beta = ");
+				printf("%24.16e", beta);
+				printf("\n");
+				for (j = 0; j < m_i; j++) {
+				  printf("      ");
+				  printf("[%24.16e, %24.16e]",
+					 head_r_true[j * incy_gen],
+					 tail_r_true[j * incy_gen]);
+				  printf(", ratio[%d]=%.4e\n", j, ratios[j]);
+				}
+
+				printf("      ratio=%.4e\n", ratio);
+				p_count++;
+			      }
+			      if (bad_ratios >= MAX_BAD_TESTS) {
+				printf("\ntoo many failures, exiting....");
+				printf("\nTesting and compilation");
+				printf(" are incomplete\n\n");
+				goto end;
+			      }
+			      if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
+				printf("\nFlagrant ratio error, exiting...");
+				printf("\nTesting and compilation");
+				printf(" are incomplete\n\n");
+				goto end;
+			      }
+			    }
+			    if (d_count == 0) {
+			      if (ratio > ratio_max)
+				ratio_max = ratio;
+
+			      if (ratio != 0.0 && ratio < ratio_min)
+				ratio_min = ratio;
+			      tot_tests++;
+			    }
+			  }	/* incy */
+			}	/* incx */
+		      }		/* lda */
+		    }		/* kl */
+		  }		/* ku */
+		}		/* trans */
+	      }			/* order */
+	    }			/* tests */
+	  }			/* norm */
+	}			/* prec */
+      }				/* beta */
+    }				/* alpha */
+  }				/* debug */
+
+  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
+    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n", fname,
+	   m, n, ntests, thresh);
+    printf
+      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
+       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
+       ratio_min, ratio_max);
+  }
+
+end:
+  blas_free(x_head);
+  blas_free(x_tail);
+  blas_free(y);
+  blas_free(x_head_gen);
+  blas_free(x_tail_gen);
+  blas_free(y_gen);
+  blas_free(temp);
+  blas_free(AB);
+  blas_free(head_r_true);
+  blas_free(tail_r_true);
+  blas_free(ratios);
+
+  FPU_FIX_STOP;
+
+  *min_ratio = ratio_min;
+  *num_bad_ratio = bad_ratios;
+  *num_tests = tot_tests;
+  return ratio_max;
+}
+double do_test_dgbmv2_s_d_x(int m, int n, int ntests, int *seed,
+			    double thresh, int debug, float test_prob,
+			    double *min_ratio, int *num_bad_ratio,
+			    int *num_tests)
+
+/*
+ * Purpose  
+ * =======
+ *
+ * Runs a series of tests on GBMV2.
+ *
+ * Arguments
+ * =========
+ *  
+ * m         (input) int
+ *           The number of rows
+ *
+ * n         (input) int
+ *           The number of columns
+ *
+ * ntests    (input) int
+ *           The number of tests to run for each set of attributes.
+ *
+ * seed      (input/output) int         
+ *           The seed for the random number generator used in testgen().
+ *
+ * thresh    (input) double
+ *           When the ratio returned from test() exceeds the specified
+ *           threshold, the current size, r_true, r_comp, and ratio will be
+ *           printed.  (Since ratio is supposed to be O(1), we can set thresh
+ *           to ~10.)
+ *
+ * debug     (input) int
+ *           If debug=3, print summary 
+ *           If debug=2, print summary only if the number of bad ratios > 0
+ *           If debug=1, print complete info if tests fail
+ *           If debug=0, return max ratio
+ *
+ * test_prob (input) float
+ *           The specified test will be performed only if the generated 
+ *           random exceeds this threshold.
+ *
+ * min_ratio (output) double
+ *           The minimum ratio
+ * 
+ * num_bad_ratio (output) int
+ *               The number of tests fail; they are above the threshold.
+ *
+ * num_tests (output) int
+ *           The number of tests is being performed.
+ *
+ * Return value
+ * ============
+ *
+ * The maximum ratio if run successfully, otherwise return -1 
+ *
+ * Code structure
+ * ==============
+ * 
+ *  debug loop  -- if debug is one, the first loop computes the max ratio
+ *              -- and the last(second) loop outputs debugging information,
+ *              -- if the test fail and its ratio > 0.5 * max ratio.
+ *              -- if debug is zero, the loop is executed once
+ *    alpha loop  -- varying alpha: 0, 1, or random
+ *      beta loop   -- varying beta: 0, 1, or random
+   *        prec loop   -- varying internal prec: single, double, or extra
+ *          norm loop   -- varying norm: near undeflow, near one, or 
+ *                        -- near overflow
+ *            numtest loop  -- how many times the test is perform with 
+ *                            -- above set of attributes
+ *              order loop   -- varying order type: rowmajor or colmajor
+ *                trans loop    -- varying trans type: no trans, trans, or conj trans
+ *                  ku loop       -- varying ku: 0 to n-1
+ *                    kl loop       -- varying kl: 0 to m-1
+ *                      lda loop      -- varying lda: ku+kl+1, ku+kl+2, 2*(ku+kl+1) 
+ *                        incx loop     -- varying incx: -2, -1, 1, 2
+ *                          incy loop     -- varying incy: -2, -1, 1, 2
+ */
+{
+  /* function name */
+  const char fname[] = "BLAS_dgbmv2_s_d_x";
+
+  /* max number of debug lines to print */
+  const int max_print = 8;
+
+  /* Variables in the "x_val" form are loop vars for corresponding
+     variables */
+  int i;			/* iterate through the repeating tests */
+  int j;
+  int k;			/* multipurpose counters or variables */
+  int ix, iy;			/* use to index x and y respectively */
+  int incx_val, incy_val,	/* for testing different inc values */
+    incx, incy;
+  int incx_gen, incy_gen;	/* for complex case inc=2, for real case inc=1 */
+  int d_count;			/* counter for debug */
+  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
+  int p_count;			/* counter for the number of debug lines printed */
+  int tot_tests;		/* total number of tests to be done */
+  int norm;			/* input values of near underflow/one/overflow */
+  double ratio_max;		/* the current maximum ratio */
+  double ratio_min;		/* the current minimum ratio */
+  double *ratios;		/* a temporary variable for calculating ratio */
+  double ratio;			/* the per-use test ratio from test() */
+  int bad_ratios = 0;		/* the number of ratios over the threshold */
+  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
+  double un_int;		/* the internal underflow threshold */
+  double alpha;
+  double beta;
+  float *AB;
+  double *x_head;
+  double *x_tail;
+  double *y;
+  float *temp;			/* use for calculating ratio */
+
+  /* x_gen and y_gen are used to store vectors generated by testgen.
+     they eventually are copied back to x and y */
+  double *x_head_gen;
+  double *x_tail_gen;
+  double *y_gen;
+
+  /* the true r calculated by testgen(), in double-double */
+  double *head_r_true, *tail_r_true;
+  int alpha_val;
+  int alpha_flag = 0;		/* input flag for BLAS_dgbmv2_s_d_testgen */
+  int beta_val;
+  int beta_flag = 0;		/* input flag for BLAS_dgbmv2_s_d_testgen */
+  int order_val;
+  enum blas_order_type order_type = 0;
+  int prec_val;
+  enum blas_prec_type prec = 0;
+  int trans_val;
+  enum blas_trans_type trans_type = 0;
+  int m_i = 0;
+  int n_i = 0;
+  int max_mn;			/* the max of m and n */
+  int ku;
+  int kl;
+  int lda_val;
+  int lda = 0;
+  int saved_seed;		/* for saving the original seed */
+
+  /* use for counting the number of testgen calls * 2 */
+  int count, old_count = -1;
+
+  FPU_FIX_DECL;
+
+  /* test for bad arguments */
+  if (n < 0 || m < 0 || ntests < 0)
+    BLAS_error(fname, 0, 0, NULL);
+
+  /* initialization */
+  *min_ratio = 0.0;
+  *num_bad_ratio = 0;
+  *num_tests = 0;
+
+  saved_seed = *seed;
+  ratio_min = 1e308;
+  ratio_max = 0.0;
+  ratio = 0.0;
+  tot_tests = 0;
+  p_count = 0;
+  count = 0;
+  find_max_ratio = 0;
+  if (debug == 3)
+    find_max_ratio = 1;
+  max_mn = MAX(m, n);
+
+  if (m == 0 || n == 0) {
+    return 0.0;
+  }
+
+  FPU_FIX_START;
+
+  incx_gen = incy_gen = 1;
+
+
+
+  /* get space for calculation */
+  x_head = (double *) blas_malloc(max_mn * 2 * sizeof(double));
+  if (max_mn * 2 > 0 && x_head == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail = (double *) blas_malloc(max_mn * 2 * sizeof(double));
+  if (max_mn * 2 > 0 && x_tail == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y = (double *) blas_malloc(max_mn * 2 * sizeof(double));
+  if (max_mn * 2 > 0 && y == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_head_gen = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && x_head_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail_gen = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && x_tail_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y_gen = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && y_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  temp = (float *) blas_malloc(max_mn * sizeof(float));
+  if (max_mn > 0 && temp == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  head_r_true = (double *) blas_malloc(max_mn * sizeof(double));
+  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  ratios = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && ratios == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  AB =
+    (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float));
+  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && AB == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  /* The debug iteration:
+     If debug=1, then will execute the iteration twice. First, compute the
+     max ratio. Second, print info if ratio > (50% * ratio_max). */
+  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
+    bad_ratios = 0;		/* set to zero */
+
+    if ((debug == 3) && (d_count == find_max_ratio))
+      *seed = saved_seed;	/* restore the original seed */
+
+    /* varying alpha */
+    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
+      alpha_flag = 0;
+      switch (alpha_val) {
+      case 0:
+	alpha = 0.0;
+	alpha_flag = 1;
+	break;
+      case 1:
+	alpha = 1.0;
+	alpha_flag = 1;
+	break;
+      }
+
+      /* varying beta */
+      for (beta_val = 0; beta_val < 3; beta_val++) {
+	beta_flag = 0;
+	switch (beta_val) {
+	case 0:
+	  beta = 0.0;
+	  beta_flag = 1;
+	  break;
+	case 1:
+	  beta = 1.0;
+	  beta_flag = 1;
+	  break;
+	}
+
+
+	/* varying extra precs */
+	for (prec_val = 0; prec_val <= 2; prec_val++) {
+	  switch (prec_val) {
+	  case 0:
+	    eps_int = power(2, -BITS_D);
+	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
+			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
+	    prec = blas_prec_double;
+	    break;
+	  case 1:
+	    eps_int = power(2, -BITS_D);
+	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
+			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
+	    prec = blas_prec_double;
+	    break;
+	  case 2:
+	  default:
+	    eps_int = power(2, -BITS_E);
+	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
+			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
+	    prec = blas_prec_extra;
+	    break;
+	  }
+
+	  /* values near underflow, 1, or overflow */
+	  for (norm = -1; norm <= 1; norm++) {
+
+	    /* number of tests */
+	    for (i = 0; i < ntests; i++) {
+
+	      /* row or col major */
+	      for (order_val = 0; order_val < 2; order_val++) {
+		switch (order_val) {
+		case 0:
+		  order_type = blas_rowmajor;
+		  break;
+		case 1:
+		  order_type = blas_colmajor;
+		  break;
+		}
+
+		/* no_trans, trans, or conj_trans */
+		for (trans_val = 0; trans_val < 3; trans_val++) {
+		  switch (trans_val) {
+		  case 0:
+		    trans_type = blas_no_trans;
+		    m_i = m;
+		    n_i = n;
+		    break;
+		  case 1:
+		    trans_type = blas_trans;
+		    m_i = n;
+		    n_i = m;
+		    break;
+		  case 2:
+		    trans_type = blas_conj_trans;
+		    m_i = n;
+		    n_i = m;
+		    break;
+		  }
+
+		  /* ku from 0 to n-1 */
+		  for (ku = 0; ku < n; ku++) {
+		    if (ku == n && ku != 0)
+		      continue;	/* the purpose of doing this is 
+				   to test for ku=0 */
+
+		    /* kl from 0 to m-1 */
+		    for (kl = 0; kl < m; kl++) {
+		      if (kl == n && kl != 0)
+			continue;	/* the purpose of doing this is 
+					   to test for kl=0 */
+
+		      /* lda=ku+kl+1, ku+kl+2, 2*(ku+kl+1) */
+		      for (lda_val = 0; lda_val < 3; lda_val++) {
+			switch (lda_val) {
+			case 0:
+			  lda = ku + kl + 1;
+			  break;
+			case 1:
+			  lda = ku + kl + 2;
+			  break;
+			case 2:
+			  lda = 2 * (ku + kl + 1);
+			  break;
+			}
+
+			if ((order_type == blas_rowmajor && lda < n) ||
+			    (order_type == blas_colmajor && lda < m))
+			  continue;
+
+			/* For the sake of speed, we throw out this case at random */
+			if (xrand(seed) >= test_prob)
+			  continue;
+
+			/* in the trivial cases, no need to run testgen */
+			if (m > 0 && n > 0)
+			  BLAS_dgbmv2_s_d_testgen(norm, order_type,
+						  trans_type, m, n, kl, ku,
+						  &alpha, alpha_flag, AB, lda,
+						  x_head_gen, x_tail_gen,
+						  &beta, beta_flag, y_gen,
+						  seed, head_r_true,
+						  tail_r_true);
+			count++;
+
+			/* varying incx */
+			for (incx_val = -2; incx_val <= 2; incx_val++) {
+			  if (incx_val == 0)
+			    continue;
+
+			  /* setting incx */
+			  incx = incx_val;
+
+
+			  dcopy_vector(x_head_gen, n_i, 1, x_head, incx_val);
+			  dcopy_vector(x_tail_gen, n_i, 1, x_tail, incx_val);
+
+			  /* varying incy */
+			  for (incy_val = -2; incy_val <= 2; incy_val++) {
+			    if (incy_val == 0)
+			      continue;
+
+			    /* setting incy */
+			    incy = incy_val;
+
+
+			    dcopy_vector(y_gen, m_i, 1, y, incy_val);
+
+			    /* call BLAS_dgbmv2_s_d_x */
+			    FPU_FIX_STOP;
+			    BLAS_dgbmv2_s_d_x(order_type, trans_type, m, n,
+					      kl, ku, alpha, AB, lda, x_head,
+					      x_tail, incx_val, beta, y,
+					      incy_val, prec);
+			    FPU_FIX_START;
+
+			    /* set y starting index */
+			    iy = 0;
+			    if (incy < 0)
+			      iy = -(m_i - 1) * incy;
+
+			    /* computing the ratio */
+			    for (j = 0; j < m_i; j++) {
+			      /* copy row j of AB to temp */
+			      sgbmv_copy(order_type, trans_type, m, n, kl, ku,
+					 AB, lda, temp, j);
+
+			      test_BLAS_ddot2_s_d(n_i, blas_no_conj, alpha,
+						  beta, y_gen[j * incy_gen],
+						  y[iy],
+						  head_r_true[j * incy_gen],
+						  tail_r_true[j * incy_gen],
+						  temp, 1, x_head, x_tail,
+						  incx_val, eps_int, un_int,
+						  &ratios[j]);
+
+			      /* take the max ratio */
+			      if (j == 0) {
+				ratio = ratios[0];
+				/* The !<= below causes NaN error to be detected.
+				   Note that (NaN > thresh) is always false. */
+			      } else if (!(ratios[j] <= ratio)) {
+				ratio = ratios[j];
+			      }
+
+			      iy += incy;
+			    }
+
+			    /* Increase the number of bad ratio, if the ratio
+			       is bigger than the threshold.
+			       The !<= below causes NaN error to be detected.
+			       Note that (NaN > thresh) is always false. */
+			    if (!(ratio <= thresh)) {
+			      bad_ratios++;
+
+			      if ((debug == 3) &&	/* print only when debug is on */
+				  (count != old_count) &&	/* print if old vector is different 
+								   from the current one */
+				  (d_count == find_max_ratio) &&
+				  (p_count <= max_print) &&
+				  (ratio > 0.5 * ratio_max)) {
+				old_count = count;
+				printf
+				  ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
+				   fname, m, n, ntests, thresh);
+
+				/* Print test info */
+				switch (prec) {
+				case blas_prec_single:
+				  printf("single ");
+				  break;
+				case blas_prec_double:
+				  printf("double ");
+				  break;
+				case blas_prec_indigenous:
+				  printf("indigenous ");
+				  break;
+				case blas_prec_extra:
+				  printf("extra ");
+				  break;
+				}
+				switch (norm) {
+				case -1:
+				  printf("near_underflow ");
+				  break;
+				case 0:
+				  printf("near_one ");
+				  break;
+				case 1:
+				  printf("near_overflow ");
+				  break;
+				}
+				switch (order_type) {
+				case blas_rowmajor:
+				  printf("row_major ");
+				  break;
+				case blas_colmajor:
+				  printf("col_major ");
+				  break;
+				}
+				switch (trans_type) {
+				case blas_no_trans:
+				  printf("no_trans ");
+				  break;
+				case blas_trans:
+				  printf("trans ");
+				  break;
+				case blas_conj_trans:
+				  printf("conj_trans ");
+				  break;
+				}
+
+				printf
+				  ("ku=%d, kl=%d, lda=%d, incx=%d, incy=%d:\n",
+				   ku, kl, lda, incx, incy);
+
+				ix = 0;
+				iy = 0;
+				if (incx < 0)
+				  ix = -(n_i - 1) * incx;
+				if (incy < 0)
+				  iy = -(m_i - 1) * incy;
+
+				printf("      A=");
+				for (j = 0; j < m_i; j++) {
+				  /* copy row j of A to temp */
+				  sgbmv_copy(order_type, trans_type, m, n, kl,
+					     ku, AB, lda, temp, j);
+
+				  if (j > 0)
+				    printf("        ");
+				  sprint_vector(temp, n_i, 1, NULL);
+				}
+
+				for (j = 0, k = 0; j < n_i || k < m_i;
+				     j++, k++) {
+				  if (j < n_i) {
+				    printf("      ");
+				    printf("%24.16e", x_head[ix]);
+				    printf("\n");
+				  }
+				  if (j < n_i) {
+				    printf("      ");
+				    printf("%24.16e", x_tail[ix]);
+				    printf("\n");
+				  }
+				  if (k < m_i) {
+				    printf("      ");
+				    printf("%24.16e", y_gen[k * incy_gen]);
+				    printf("\n");
+				    printf("      ");
+				    printf("y_final[%d] = ", iy);
+				    printf("%24.16e", y[iy]);
+				    printf("\n");
+				  }
+				  ix += incx;
+				  iy += incy;
+				}
+
+				printf("      ");
+				printf("alpha = ");
+				printf("%24.16e", alpha);
+				printf("\n      ");
+				printf("beta = ");
+				printf("%24.16e", beta);
+				printf("\n");
+				for (j = 0; j < m_i; j++) {
+				  printf("      ");
+				  printf("[%24.16e, %24.16e]",
+					 head_r_true[j * incy_gen],
+					 tail_r_true[j * incy_gen]);
+				  printf(", ratio[%d]=%.4e\n", j, ratios[j]);
+				}
+
+				printf("      ratio=%.4e\n", ratio);
+				p_count++;
+			      }
+			      if (bad_ratios >= MAX_BAD_TESTS) {
+				printf("\ntoo many failures, exiting....");
+				printf("\nTesting and compilation");
+				printf(" are incomplete\n\n");
+				goto end;
+			      }
+			      if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
+				printf("\nFlagrant ratio error, exiting...");
+				printf("\nTesting and compilation");
+				printf(" are incomplete\n\n");
+				goto end;
+			      }
+			    }
+			    if (d_count == 0) {
+			      if (ratio > ratio_max)
+				ratio_max = ratio;
+
+			      if (ratio != 0.0 && ratio < ratio_min)
+				ratio_min = ratio;
+			      tot_tests++;
+			    }
+			  }	/* incy */
+			}	/* incx */
+		      }		/* lda */
+		    }		/* kl */
+		  }		/* ku */
+		}		/* trans */
+	      }			/* order */
+	    }			/* tests */
+	  }			/* norm */
+	}			/* prec */
+      }				/* beta */
+    }				/* alpha */
+  }				/* debug */
+
+  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
+    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n", fname,
+	   m, n, ntests, thresh);
+    printf
+      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
+       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
+       ratio_min, ratio_max);
+  }
+
+end:
+  blas_free(x_head);
+  blas_free(x_tail);
+  blas_free(y);
+  blas_free(x_head_gen);
+  blas_free(x_tail_gen);
+  blas_free(y_gen);
+  blas_free(temp);
+  blas_free(AB);
+  blas_free(head_r_true);
+  blas_free(tail_r_true);
+  blas_free(ratios);
+
+  FPU_FIX_STOP;
+
+  *min_ratio = ratio_min;
+  *num_bad_ratio = bad_ratios;
+  *num_tests = tot_tests;
+  return ratio_max;
+}
+double do_test_dgbmv2_s_s_x(int m, int n, int ntests, int *seed,
+			    double thresh, int debug, float test_prob,
+			    double *min_ratio, int *num_bad_ratio,
+			    int *num_tests)
+
+/*
+ * Purpose  
+ * =======
+ *
+ * Runs a series of tests on GBMV2.
+ *
+ * Arguments
+ * =========
+ *  
+ * m         (input) int
+ *           The number of rows
+ *
+ * n         (input) int
+ *           The number of columns
+ *
+ * ntests    (input) int
+ *           The number of tests to run for each set of attributes.
+ *
+ * seed      (input/output) int         
+ *           The seed for the random number generator used in testgen().
+ *
+ * thresh    (input) double
+ *           When the ratio returned from test() exceeds the specified
+ *           threshold, the current size, r_true, r_comp, and ratio will be
+ *           printed.  (Since ratio is supposed to be O(1), we can set thresh
+ *           to ~10.)
+ *
+ * debug     (input) int
+ *           If debug=3, print summary 
+ *           If debug=2, print summary only if the number of bad ratios > 0
+ *           If debug=1, print complete info if tests fail
+ *           If debug=0, return max ratio
+ *
+ * test_prob (input) float
+ *           The specified test will be performed only if the generated 
+ *           random exceeds this threshold.
+ *
+ * min_ratio (output) double
+ *           The minimum ratio
+ * 
+ * num_bad_ratio (output) int
+ *               The number of tests fail; they are above the threshold.
+ *
+ * num_tests (output) int
+ *           The number of tests is being performed.
+ *
+ * Return value
+ * ============
+ *
+ * The maximum ratio if run successfully, otherwise return -1 
+ *
+ * Code structure
+ * ==============
+ * 
+ *  debug loop  -- if debug is one, the first loop computes the max ratio
+ *              -- and the last(second) loop outputs debugging information,
+ *              -- if the test fail and its ratio > 0.5 * max ratio.
+ *              -- if debug is zero, the loop is executed once
+ *    alpha loop  -- varying alpha: 0, 1, or random
+ *      beta loop   -- varying beta: 0, 1, or random
+   *        prec loop   -- varying internal prec: single, double, or extra
+ *          norm loop   -- varying norm: near undeflow, near one, or 
+ *                        -- near overflow
+ *            numtest loop  -- how many times the test is perform with 
+ *                            -- above set of attributes
+ *              order loop   -- varying order type: rowmajor or colmajor
+ *                trans loop    -- varying trans type: no trans, trans, or conj trans
+ *                  ku loop       -- varying ku: 0 to n-1
+ *                    kl loop       -- varying kl: 0 to m-1
+ *                      lda loop      -- varying lda: ku+kl+1, ku+kl+2, 2*(ku+kl+1) 
+ *                        incx loop     -- varying incx: -2, -1, 1, 2
+ *                          incy loop     -- varying incy: -2, -1, 1, 2
+ */
+{
+  /* function name */
+  const char fname[] = "BLAS_dgbmv2_s_s_x";
+
+  /* max number of debug lines to print */
+  const int max_print = 8;
+
+  /* Variables in the "x_val" form are loop vars for corresponding
+     variables */
+  int i;			/* iterate through the repeating tests */
+  int j;
+  int k;			/* multipurpose counters or variables */
+  int ix, iy;			/* use to index x and y respectively */
+  int incx_val, incy_val,	/* for testing different inc values */
+    incx, incy;
+  int incx_gen, incy_gen;	/* for complex case inc=2, for real case inc=1 */
+  int d_count;			/* counter for debug */
+  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
+  int p_count;			/* counter for the number of debug lines printed */
+  int tot_tests;		/* total number of tests to be done */
+  int norm;			/* input values of near underflow/one/overflow */
+  double ratio_max;		/* the current maximum ratio */
+  double ratio_min;		/* the current minimum ratio */
+  double *ratios;		/* a temporary variable for calculating ratio */
+  double ratio;			/* the per-use test ratio from test() */
+  int bad_ratios = 0;		/* the number of ratios over the threshold */
+  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
+  double un_int;		/* the internal underflow threshold */
+  double alpha;
+  double beta;
+  float *AB;
+  float *x_head;
+  float *x_tail;
+  double *y;
+  float *temp;			/* use for calculating ratio */
+
+  /* x_gen and y_gen are used to store vectors generated by testgen.
+     they eventually are copied back to x and y */
+  float *x_head_gen;
+  float *x_tail_gen;
+  double *y_gen;
+
+  /* the true r calculated by testgen(), in double-double */
+  double *head_r_true, *tail_r_true;
+  int alpha_val;
+  int alpha_flag = 0;		/* input flag for BLAS_dgbmv2_s_s_testgen */
+  int beta_val;
+  int beta_flag = 0;		/* input flag for BLAS_dgbmv2_s_s_testgen */
+  int order_val;
+  enum blas_order_type order_type = 0;
+  int prec_val;
+  enum blas_prec_type prec = 0;
+  int trans_val;
+  enum blas_trans_type trans_type = 0;
+  int m_i = 0;
+  int n_i = 0;
+  int max_mn;			/* the max of m and n */
+  int ku;
+  int kl;
+  int lda_val;
+  int lda = 0;
+  int saved_seed;		/* for saving the original seed */
+
+  /* use for counting the number of testgen calls * 2 */
+  int count, old_count = -1;
+
+  FPU_FIX_DECL;
+
+  /* test for bad arguments */
+  if (n < 0 || m < 0 || ntests < 0)
+    BLAS_error(fname, 0, 0, NULL);
+
+  /* initialization */
+  *min_ratio = 0.0;
+  *num_bad_ratio = 0;
+  *num_tests = 0;
+
+  saved_seed = *seed;
+  ratio_min = 1e308;
+  ratio_max = 0.0;
+  ratio = 0.0;
+  tot_tests = 0;
+  p_count = 0;
+  count = 0;
+  find_max_ratio = 0;
+  if (debug == 3)
+    find_max_ratio = 1;
+  max_mn = MAX(m, n);
+
+  if (m == 0 || n == 0) {
+    return 0.0;
+  }
+
+  FPU_FIX_START;
+
+  incx_gen = incy_gen = 1;
+
+
+
+  /* get space for calculation */
+  x_head = (float *) blas_malloc(max_mn * 2 * sizeof(float));
+  if (max_mn * 2 > 0 && x_head == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail = (float *) blas_malloc(max_mn * 2 * sizeof(float));
+  if (max_mn * 2 > 0 && x_tail == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y = (double *) blas_malloc(max_mn * 2 * sizeof(double));
+  if (max_mn * 2 > 0 && y == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_head_gen = (float *) blas_malloc(max_mn * sizeof(float));
+  if (max_mn > 0 && x_head_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail_gen = (float *) blas_malloc(max_mn * sizeof(float));
+  if (max_mn > 0 && x_tail_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y_gen = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && y_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  temp = (float *) blas_malloc(max_mn * sizeof(float));
+  if (max_mn > 0 && temp == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  head_r_true = (double *) blas_malloc(max_mn * sizeof(double));
+  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  ratios = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && ratios == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  AB =
+    (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float));
+  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && AB == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  /* The debug iteration:
+     If debug=1, then will execute the iteration twice. First, compute the
+     max ratio. Second, print info if ratio > (50% * ratio_max). */
+  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
+    bad_ratios = 0;		/* set to zero */
+
+    if ((debug == 3) && (d_count == find_max_ratio))
+      *seed = saved_seed;	/* restore the original seed */
+
+    /* varying alpha */
+    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
+      alpha_flag = 0;
+      switch (alpha_val) {
+      case 0:
+	alpha = 0.0;
+	alpha_flag = 1;
+	break;
+      case 1:
+	alpha = 1.0;
+	alpha_flag = 1;
+	break;
+      }
+
+      /* varying beta */
+      for (beta_val = 0; beta_val < 3; beta_val++) {
+	beta_flag = 0;
+	switch (beta_val) {
+	case 0:
+	  beta = 0.0;
+	  beta_flag = 1;
+	  break;
+	case 1:
+	  beta = 1.0;
+	  beta_flag = 1;
+	  break;
+	}
+
+
+	/* varying extra precs */
+	for (prec_val = 0; prec_val <= 2; prec_val++) {
+	  switch (prec_val) {
+	  case 0:
+	    eps_int = power(2, -BITS_D);
+	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
+			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
+	    prec = blas_prec_double;
+	    break;
+	  case 1:
+	    eps_int = power(2, -BITS_D);
+	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
+			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
+	    prec = blas_prec_double;
+	    break;
+	  case 2:
+	  default:
+	    eps_int = power(2, -BITS_E);
+	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
+			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
+	    prec = blas_prec_extra;
+	    break;
+	  }
+
+	  /* values near underflow, 1, or overflow */
+	  for (norm = -1; norm <= 1; norm++) {
+
+	    /* number of tests */
+	    for (i = 0; i < ntests; i++) {
+
+	      /* row or col major */
+	      for (order_val = 0; order_val < 2; order_val++) {
+		switch (order_val) {
+		case 0:
+		  order_type = blas_rowmajor;
+		  break;
+		case 1:
+		  order_type = blas_colmajor;
+		  break;
+		}
+
+		/* no_trans, trans, or conj_trans */
+		for (trans_val = 0; trans_val < 3; trans_val++) {
+		  switch (trans_val) {
+		  case 0:
+		    trans_type = blas_no_trans;
+		    m_i = m;
+		    n_i = n;
+		    break;
+		  case 1:
+		    trans_type = blas_trans;
+		    m_i = n;
+		    n_i = m;
+		    break;
+		  case 2:
+		    trans_type = blas_conj_trans;
+		    m_i = n;
+		    n_i = m;
+		    break;
+		  }
+
+		  /* ku from 0 to n-1 */
+		  for (ku = 0; ku < n; ku++) {
+		    if (ku == n && ku != 0)
+		      continue;	/* the purpose of doing this is 
+				   to test for ku=0 */
+
+		    /* kl from 0 to m-1 */
+		    for (kl = 0; kl < m; kl++) {
+		      if (kl == n && kl != 0)
+			continue;	/* the purpose of doing this is 
+					   to test for kl=0 */
+
+		      /* lda=ku+kl+1, ku+kl+2, 2*(ku+kl+1) */
+		      for (lda_val = 0; lda_val < 3; lda_val++) {
+			switch (lda_val) {
+			case 0:
+			  lda = ku + kl + 1;
+			  break;
+			case 1:
+			  lda = ku + kl + 2;
+			  break;
+			case 2:
+			  lda = 2 * (ku + kl + 1);
+			  break;
+			}
+
+			if ((order_type == blas_rowmajor && lda < n) ||
+			    (order_type == blas_colmajor && lda < m))
+			  continue;
+
+			/* For the sake of speed, we throw out this case at random */
+			if (xrand(seed) >= test_prob)
+			  continue;
+
+			/* in the trivial cases, no need to run testgen */
+			if (m > 0 && n > 0)
+			  BLAS_dgbmv2_s_s_testgen(norm, order_type,
+						  trans_type, m, n, kl, ku,
+						  &alpha, alpha_flag, AB, lda,
+						  x_head_gen, x_tail_gen,
+						  &beta, beta_flag, y_gen,
+						  seed, head_r_true,
+						  tail_r_true);
+			count++;
+
+			/* varying incx */
+			for (incx_val = -2; incx_val <= 2; incx_val++) {
+			  if (incx_val == 0)
+			    continue;
+
+			  /* setting incx */
+			  incx = incx_val;
+
+
+			  scopy_vector(x_head_gen, n_i, 1, x_head, incx_val);
+			  scopy_vector(x_tail_gen, n_i, 1, x_tail, incx_val);
+
+			  /* varying incy */
+			  for (incy_val = -2; incy_val <= 2; incy_val++) {
+			    if (incy_val == 0)
+			      continue;
+
+			    /* setting incy */
+			    incy = incy_val;
+
+
+			    dcopy_vector(y_gen, m_i, 1, y, incy_val);
+
+			    /* call BLAS_dgbmv2_s_s_x */
+			    FPU_FIX_STOP;
+			    BLAS_dgbmv2_s_s_x(order_type, trans_type, m, n,
+					      kl, ku, alpha, AB, lda, x_head,
+					      x_tail, incx_val, beta, y,
+					      incy_val, prec);
+			    FPU_FIX_START;
+
+			    /* set y starting index */
+			    iy = 0;
+			    if (incy < 0)
+			      iy = -(m_i - 1) * incy;
+
+			    /* computing the ratio */
+			    for (j = 0; j < m_i; j++) {
+			      /* copy row j of AB to temp */
+			      sgbmv_copy(order_type, trans_type, m, n, kl, ku,
+					 AB, lda, temp, j);
+
+			      test_BLAS_ddot2_s_s(n_i, blas_no_conj, alpha,
+						  beta, y_gen[j * incy_gen],
+						  y[iy],
+						  head_r_true[j * incy_gen],
+						  tail_r_true[j * incy_gen],
+						  temp, 1, x_head, x_tail,
+						  incx_val, eps_int, un_int,
+						  &ratios[j]);
+
+			      /* take the max ratio */
+			      if (j == 0) {
+				ratio = ratios[0];
+				/* The !<= below causes NaN error to be detected.
+				   Note that (NaN > thresh) is always false. */
+			      } else if (!(ratios[j] <= ratio)) {
+				ratio = ratios[j];
+			      }
+
+			      iy += incy;
+			    }
+
+			    /* Increase the number of bad ratio, if the ratio
+			       is bigger than the threshold.
+			       The !<= below causes NaN error to be detected.
+			       Note that (NaN > thresh) is always false. */
+			    if (!(ratio <= thresh)) {
+			      bad_ratios++;
+
+			      if ((debug == 3) &&	/* print only when debug is on */
+				  (count != old_count) &&	/* print if old vector is different 
+								   from the current one */
+				  (d_count == find_max_ratio) &&
+				  (p_count <= max_print) &&
+				  (ratio > 0.5 * ratio_max)) {
+				old_count = count;
+				printf
+				  ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
+				   fname, m, n, ntests, thresh);
+
+				/* Print test info */
+				switch (prec) {
+				case blas_prec_single:
+				  printf("single ");
+				  break;
+				case blas_prec_double:
+				  printf("double ");
+				  break;
+				case blas_prec_indigenous:
+				  printf("indigenous ");
+				  break;
+				case blas_prec_extra:
+				  printf("extra ");
+				  break;
+				}
+				switch (norm) {
+				case -1:
+				  printf("near_underflow ");
+				  break;
+				case 0:
+				  printf("near_one ");
+				  break;
+				case 1:
+				  printf("near_overflow ");
+				  break;
+				}
+				switch (order_type) {
+				case blas_rowmajor:
+				  printf("row_major ");
+				  break;
+				case blas_colmajor:
+				  printf("col_major ");
+				  break;
+				}
+				switch (trans_type) {
+				case blas_no_trans:
+				  printf("no_trans ");
+				  break;
+				case blas_trans:
+				  printf("trans ");
+				  break;
+				case blas_conj_trans:
+				  printf("conj_trans ");
+				  break;
+				}
+
+				printf
+				  ("ku=%d, kl=%d, lda=%d, incx=%d, incy=%d:\n",
+				   ku, kl, lda, incx, incy);
+
+				ix = 0;
+				iy = 0;
+				if (incx < 0)
+				  ix = -(n_i - 1) * incx;
+				if (incy < 0)
+				  iy = -(m_i - 1) * incy;
+
+				printf("      A=");
+				for (j = 0; j < m_i; j++) {
+				  /* copy row j of A to temp */
+				  sgbmv_copy(order_type, trans_type, m, n, kl,
+					     ku, AB, lda, temp, j);
+
+				  if (j > 0)
+				    printf("        ");
+				  sprint_vector(temp, n_i, 1, NULL);
+				}
+
+				for (j = 0, k = 0; j < n_i || k < m_i;
+				     j++, k++) {
+				  if (j < n_i) {
+				    printf("      ");
+				    printf("%16.8e", x_head[ix]);
+				    printf("\n");
+				  }
+				  if (j < n_i) {
+				    printf("      ");
+				    printf("%16.8e", x_tail[ix]);
+				    printf("\n");
+				  }
+				  if (k < m_i) {
+				    printf("      ");
+				    printf("%24.16e", y_gen[k * incy_gen]);
+				    printf("\n");
+				    printf("      ");
+				    printf("y_final[%d] = ", iy);
+				    printf("%24.16e", y[iy]);
+				    printf("\n");
+				  }
+				  ix += incx;
+				  iy += incy;
+				}
+
+				printf("      ");
+				printf("alpha = ");
+				printf("%24.16e", alpha);
+				printf("\n      ");
+				printf("beta = ");
+				printf("%24.16e", beta);
+				printf("\n");
+				for (j = 0; j < m_i; j++) {
+				  printf("      ");
+				  printf("[%24.16e, %24.16e]",
+					 head_r_true[j * incy_gen],
+					 tail_r_true[j * incy_gen]);
+				  printf(", ratio[%d]=%.4e\n", j, ratios[j]);
+				}
+
+				printf("      ratio=%.4e\n", ratio);
+				p_count++;
+			      }
+			      if (bad_ratios >= MAX_BAD_TESTS) {
+				printf("\ntoo many failures, exiting....");
+				printf("\nTesting and compilation");
+				printf(" are incomplete\n\n");
+				goto end;
+			      }
+			      if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
+				printf("\nFlagrant ratio error, exiting...");
+				printf("\nTesting and compilation");
+				printf(" are incomplete\n\n");
+				goto end;
+			      }
+			    }
+			    if (d_count == 0) {
+			      if (ratio > ratio_max)
+				ratio_max = ratio;
+
+			      if (ratio != 0.0 && ratio < ratio_min)
+				ratio_min = ratio;
+			      tot_tests++;
+			    }
+			  }	/* incy */
+			}	/* incx */
+		      }		/* lda */
+		    }		/* kl */
+		  }		/* ku */
+		}		/* trans */
+	      }			/* order */
+	    }			/* tests */
+	  }			/* norm */
+	}			/* prec */
+      }				/* beta */
+    }				/* alpha */
+  }				/* debug */
+
+  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
+    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n", fname,
+	   m, n, ntests, thresh);
+    printf
+      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
+       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
+       ratio_min, ratio_max);
+  }
+
+end:
+  blas_free(x_head);
+  blas_free(x_tail);
+  blas_free(y);
+  blas_free(x_head_gen);
+  blas_free(x_tail_gen);
+  blas_free(y_gen);
+  blas_free(temp);
+  blas_free(AB);
+  blas_free(head_r_true);
+  blas_free(tail_r_true);
+  blas_free(ratios);
+
+  FPU_FIX_STOP;
+
+  *min_ratio = ratio_min;
+  *num_bad_ratio = bad_ratios;
+  *num_tests = tot_tests;
+  return ratio_max;
+}
+double do_test_zgbmv2_z_c_x(int m, int n, int ntests, int *seed,
+			    double thresh, int debug, float test_prob,
+			    double *min_ratio, int *num_bad_ratio,
+			    int *num_tests)
+
+/*
+ * Purpose  
+ * =======
+ *
+ * Runs a series of tests on GBMV2.
+ *
+ * Arguments
+ * =========
+ *  
+ * m         (input) int
+ *           The number of rows
+ *
+ * n         (input) int
+ *           The number of columns
+ *
+ * ntests    (input) int
+ *           The number of tests to run for each set of attributes.
+ *
+ * seed      (input/output) int         
+ *           The seed for the random number generator used in testgen().
+ *
+ * thresh    (input) double
+ *           When the ratio returned from test() exceeds the specified
+ *           threshold, the current size, r_true, r_comp, and ratio will be
+ *           printed.  (Since ratio is supposed to be O(1), we can set thresh
+ *           to ~10.)
+ *
+ * debug     (input) int
+ *           If debug=3, print summary 
+ *           If debug=2, print summary only if the number of bad ratios > 0
+ *           If debug=1, print complete info if tests fail
+ *           If debug=0, return max ratio
+ *
+ * test_prob (input) float
+ *           The specified test will be performed only if the generated 
+ *           random exceeds this threshold.
+ *
+ * min_ratio (output) double
+ *           The minimum ratio
+ * 
+ * num_bad_ratio (output) int
+ *               The number of tests fail; they are above the threshold.
+ *
+ * num_tests (output) int
+ *           The number of tests is being performed.
+ *
+ * Return value
+ * ============
+ *
+ * The maximum ratio if run successfully, otherwise return -1 
+ *
+ * Code structure
+ * ==============
+ * 
+ *  debug loop  -- if debug is one, the first loop computes the max ratio
+ *              -- and the last(second) loop outputs debugging information,
+ *              -- if the test fail and its ratio > 0.5 * max ratio.
+ *              -- if debug is zero, the loop is executed once
+ *    alpha loop  -- varying alpha: 0, 1, or random
+ *      beta loop   -- varying beta: 0, 1, or random
+   *        prec loop   -- varying internal prec: single, double, or extra
+ *          norm loop   -- varying norm: near undeflow, near one, or 
+ *                        -- near overflow
+ *            numtest loop  -- how many times the test is perform with 
+ *                            -- above set of attributes
+ *              order loop   -- varying order type: rowmajor or colmajor
+ *                trans loop    -- varying trans type: no trans, trans, or conj trans
+ *                  ku loop       -- varying ku: 0 to n-1
+ *                    kl loop       -- varying kl: 0 to m-1
+ *                      lda loop      -- varying lda: ku+kl+1, ku+kl+2, 2*(ku+kl+1) 
+ *                        incx loop     -- varying incx: -2, -1, 1, 2
+ *                          incy loop     -- varying incy: -2, -1, 1, 2
+ */
+{
+  /* function name */
+  const char fname[] = "BLAS_zgbmv2_z_c_x";
+
+  /* max number of debug lines to print */
+  const int max_print = 8;
+
+  /* Variables in the "x_val" form are loop vars for corresponding
+     variables */
+  int i;			/* iterate through the repeating tests */
+  int j;
+  int k;			/* multipurpose counters or variables */
+  int ix, iy;			/* use to index x and y respectively */
+  int incx_val, incy_val,	/* for testing different inc values */
+    incx, incy;
+  int incx_gen, incy_gen;	/* for complex case inc=2, for real case inc=1 */
+  int d_count;			/* counter for debug */
+  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
+  int p_count;			/* counter for the number of debug lines printed */
+  int tot_tests;		/* total number of tests to be done */
+  int norm;			/* input values of near underflow/one/overflow */
+  double ratio_max;		/* the current maximum ratio */
+  double ratio_min;		/* the current minimum ratio */
+  double *ratios;		/* a temporary variable for calculating ratio */
+  double ratio;			/* the per-use test ratio from test() */
+  int bad_ratios = 0;		/* the number of ratios over the threshold */
+  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
+  double un_int;		/* the internal underflow threshold */
+  double alpha[2];
+  double beta[2];
+  double *AB;
+  float *x_head;
+  float *x_tail;
+  double *y;
+  double *temp;			/* use for calculating ratio */
+
+  /* x_gen and y_gen are used to store vectors generated by testgen.
+     they eventually are copied back to x and y */
+  float *x_head_gen;
+  float *x_tail_gen;
+  double *y_gen;
+
+  /* the true r calculated by testgen(), in double-double */
+  double *head_r_true, *tail_r_true;
+
+  int alpha_val;
+  int alpha_flag = 0;		/* input flag for BLAS_zgbmv2_z_c_testgen */
+  int beta_val;
+  int beta_flag = 0;		/* input flag for BLAS_zgbmv2_z_c_testgen */
+  int order_val;
+  enum blas_order_type order_type = 0;
+  int prec_val;
+  enum blas_prec_type prec = 0;
+  int trans_val;
+  enum blas_trans_type trans_type = 0;
+  int m_i = 0;
+  int n_i = 0;
+  int max_mn;			/* the max of m and n */
+  int ku;
+  int kl;
+  int lda_val;
+  int lda = 0;
+  int saved_seed;		/* for saving the original seed */
+
+  /* use for counting the number of testgen calls * 2 */
+  int count, old_count = -1;
+
+  FPU_FIX_DECL;
+
+  /* test for bad arguments */
+  if (n < 0 || m < 0 || ntests < 0)
+    BLAS_error(fname, 0, 0, NULL);
+
+  /* initialization */
+  *min_ratio = 0.0;
+  *num_bad_ratio = 0;
+  *num_tests = 0;
+
+  saved_seed = *seed;
+  ratio_min = 1e308;
+  ratio_max = 0.0;
+  ratio = 0.0;
+  tot_tests = 0;
+  p_count = 0;
+  count = 0;
+  find_max_ratio = 0;
+  if (debug == 3)
+    find_max_ratio = 1;
+  max_mn = MAX(m, n);
+
+  if (m == 0 || n == 0) {
+    return 0.0;
+  }
+
+  FPU_FIX_START;
+
+  incx_gen = incy_gen = 1;
+  incx_gen *= 2;
+  incy_gen *= 2;
+
+  /* get space for calculation */
+  x_head = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
+  if (max_mn * 2 > 0 && x_head == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
+  if (max_mn * 2 > 0 && x_tail == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
+  if (max_mn * 2 > 0 && y == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_head_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
+  if (max_mn > 0 && x_head_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
+  if (max_mn > 0 && x_tail_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && y_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  temp = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && temp == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  ratios = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && ratios == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  AB =
+    (double *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(double) *
+			   2);
+  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && AB == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  /* The debug iteration:
+     If debug=1, then will execute the iteration twice. First, compute the
+     max ratio. Second, print info if ratio > (50% * ratio_max). */
+  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
+    bad_ratios = 0;		/* set to zero */
+
+    if ((debug == 3) && (d_count == find_max_ratio))
+      *seed = saved_seed;	/* restore the original seed */
+
+    /* varying alpha */
+    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
+      alpha_flag = 0;
+      switch (alpha_val) {
+      case 0:
+	alpha[0] = alpha[1] = 0.0;
+	alpha_flag = 1;
+	break;
+      case 1:
+	alpha[0] = 1.0;
+	alpha[1] = 0.0;
+	alpha_flag = 1;
+	break;
+      }
+
+      /* varying beta */
+      for (beta_val = 0; beta_val < 3; beta_val++) {
+	beta_flag = 0;
+	switch (beta_val) {
+	case 0:
+	  beta[0] = beta[1] = 0.0;
+	  beta_flag = 1;
+	  break;
+	case 1:
+	  beta[0] = 1.0;
+	  beta[1] = 0.0;
+	  beta_flag = 1;
+	  break;
+	}
+
+
+	/* varying extra precs */
+	for (prec_val = 0; prec_val <= 2; prec_val++) {
+	  switch (prec_val) {
+	  case 0:
+	    eps_int = power(2, -BITS_D);
+	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
+			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
+	    prec = blas_prec_double;
+	    break;
+	  case 1:
+	    eps_int = power(2, -BITS_D);
+	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
+			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
+	    prec = blas_prec_double;
+	    break;
+	  case 2:
+	  default:
+	    eps_int = power(2, -BITS_E);
+	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
+			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
+	    prec = blas_prec_extra;
+	    break;
+	  }
+
+	  /* values near underflow, 1, or overflow */
+	  for (norm = -1; norm <= 1; norm++) {
+
+	    /* number of tests */
+	    for (i = 0; i < ntests; i++) {
+
+	      /* row or col major */
+	      for (order_val = 0; order_val < 2; order_val++) {
+		switch (order_val) {
+		case 0:
+		  order_type = blas_rowmajor;
+		  break;
+		case 1:
+		  order_type = blas_colmajor;
+		  break;
+		}
+
+		/* no_trans, trans, or conj_trans */
+		for (trans_val = 0; trans_val < 3; trans_val++) {
+		  switch (trans_val) {
+		  case 0:
+		    trans_type = blas_no_trans;
+		    m_i = m;
+		    n_i = n;
+		    break;
+		  case 1:
+		    trans_type = blas_trans;
+		    m_i = n;
+		    n_i = m;
+		    break;
+		  case 2:
+		    trans_type = blas_conj_trans;
+		    m_i = n;
+		    n_i = m;
+		    break;
+		  }
+
+		  /* ku from 0 to n-1 */
+		  for (ku = 0; ku < n; ku++) {
+		    if (ku == n && ku != 0)
+		      continue;	/* the purpose of doing this is 
+				   to test for ku=0 */
+
+		    /* kl from 0 to m-1 */
+		    for (kl = 0; kl < m; kl++) {
+		      if (kl == n && kl != 0)
+			continue;	/* the purpose of doing this is 
+					   to test for kl=0 */
+
+		      /* lda=ku+kl+1, ku+kl+2, 2*(ku+kl+1) */
+		      for (lda_val = 0; lda_val < 3; lda_val++) {
+			switch (lda_val) {
+			case 0:
+			  lda = ku + kl + 1;
+			  break;
+			case 1:
+			  lda = ku + kl + 2;
+			  break;
+			case 2:
+			  lda = 2 * (ku + kl + 1);
+			  break;
+			}
+
+			if ((order_type == blas_rowmajor && lda < n) ||
+			    (order_type == blas_colmajor && lda < m))
+			  continue;
+
+			/* For the sake of speed, we throw out this case at random */
+			if (xrand(seed) >= test_prob)
+			  continue;
+
+			/* in the trivial cases, no need to run testgen */
+			if (m > 0 && n > 0)
+			  BLAS_zgbmv2_z_c_testgen(norm, order_type,
+						  trans_type, m, n, kl, ku,
+						  &alpha, alpha_flag, AB, lda,
+						  x_head_gen, x_tail_gen,
+						  &beta, beta_flag, y_gen,
+						  seed, head_r_true,
+						  tail_r_true);
+			count++;
+
+			/* varying incx */
+			for (incx_val = -2; incx_val <= 2; incx_val++) {
+			  if (incx_val == 0)
+			    continue;
+
+			  /* setting incx */
+			  incx = incx_val;
+			  incx *= 2;
+
+			  ccopy_vector(x_head_gen, n_i, 1, x_head, incx_val);
+			  ccopy_vector(x_tail_gen, n_i, 1, x_tail, incx_val);
+
+			  /* varying incy */
+			  for (incy_val = -2; incy_val <= 2; incy_val++) {
+			    if (incy_val == 0)
+			      continue;
+
+			    /* setting incy */
+			    incy = incy_val;
+			    incy *= 2;
+
+			    zcopy_vector(y_gen, m_i, 1, y, incy_val);
+
+			    /* call BLAS_zgbmv2_z_c_x */
+			    FPU_FIX_STOP;
+			    BLAS_zgbmv2_z_c_x(order_type, trans_type, m, n,
+					      kl, ku, alpha, AB, lda, x_head,
+					      x_tail, incx_val, beta, y,
+					      incy_val, prec);
+			    FPU_FIX_START;
+
+			    /* set y starting index */
+			    iy = 0;
+			    if (incy < 0)
+			      iy = -(m_i - 1) * incy;
+
+			    /* computing the ratio */
+			    for (j = 0; j < m_i; j++) {
+			      /* copy row j of AB to temp */
+			      zgbmv_copy(order_type, trans_type, m, n, kl, ku,
+					 AB, lda, temp, j);
+
+			      test_BLAS_zdot2_z_c(n_i, blas_no_conj, alpha,
+						  beta, &y_gen[j * incy_gen],
+						  &y[iy],
+						  &head_r_true[j * incy_gen],
+						  &tail_r_true[j * incy_gen],
+						  temp, 1, x_head, x_tail,
+						  incx_val, eps_int, un_int,
+						  &ratios[j]);
+
+			      /* take the max ratio */
+			      if (j == 0) {
+				ratio = ratios[0];
+				/* The !<= below causes NaN error to be detected.
+				   Note that (NaN > thresh) is always false. */
+			      } else if (!(ratios[j] <= ratio)) {
+				ratio = ratios[j];
+			      }
+
+			      iy += incy;
+			    }
+
+			    /* Increase the number of bad ratio, if the ratio
+			       is bigger than the threshold.
+			       The !<= below causes NaN error to be detected.
+			       Note that (NaN > thresh) is always false. */
+			    if (!(ratio <= thresh)) {
+			      bad_ratios++;
+
+			      if ((debug == 3) &&	/* print only when debug is on */
+				  (count != old_count) &&	/* print if old vector is different 
+								   from the current one */
+				  (d_count == find_max_ratio) &&
+				  (p_count <= max_print) &&
+				  (ratio > 0.5 * ratio_max)) {
+				old_count = count;
+				printf
+				  ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
+				   fname, m, n, ntests, thresh);
+
+				/* Print test info */
+				switch (prec) {
+				case blas_prec_single:
+				  printf("single ");
+				  break;
+				case blas_prec_double:
+				  printf("double ");
+				  break;
+				case blas_prec_indigenous:
+				  printf("indigenous ");
+				  break;
+				case blas_prec_extra:
+				  printf("extra ");
+				  break;
+				}
+				switch (norm) {
+				case -1:
+				  printf("near_underflow ");
+				  break;
+				case 0:
+				  printf("near_one ");
+				  break;
+				case 1:
+				  printf("near_overflow ");
+				  break;
+				}
+				switch (order_type) {
+				case blas_rowmajor:
+				  printf("row_major ");
+				  break;
+				case blas_colmajor:
+				  printf("col_major ");
+				  break;
+				}
+				switch (trans_type) {
+				case blas_no_trans:
+				  printf("no_trans ");
+				  break;
+				case blas_trans:
+				  printf("trans ");
+				  break;
+				case blas_conj_trans:
+				  printf("conj_trans ");
+				  break;
+				}
+
+				printf
+				  ("ku=%d, kl=%d, lda=%d, incx=%d, incy=%d:\n",
+				   ku, kl, lda, incx, incy);
+
+				ix = 0;
+				iy = 0;
+				if (incx < 0)
+				  ix = -(n_i - 1) * incx;
+				if (incy < 0)
+				  iy = -(m_i - 1) * incy;
+
+				printf("      A=");
+				for (j = 0; j < m_i; j++) {
+				  /* copy row j of A to temp */
+				  zgbmv_copy(order_type, trans_type, m, n, kl,
+					     ku, AB, lda, temp, j);
+
+				  if (j > 0)
+				    printf("        ");
+				  zprint_vector(temp, n_i, 1, NULL);
+				}
+
+				for (j = 0, k = 0; j < n_i || k < m_i;
+				     j++, k++) {
+				  if (j < n_i) {
+				    printf("      ");
+				    printf("(%16.8e, %16.8e)", x_head[ix],
+					   x_head[ix + 1]);
+				    printf("\n");
+				  }
+				  if (j < n_i) {
+				    printf("      ");
+				    printf("(%16.8e, %16.8e)", x_tail[ix],
+					   x_tail[ix + 1]);
+				    printf("\n");
+				  }
+				  if (k < m_i) {
+				    printf("      ");
+				    printf("(%24.16e, %24.16e)",
+					   y_gen[k * incy_gen],
+					   y_gen[k * incy_gen + 1]);
+				    printf("\n");
+				    printf("      ");
+				    printf("y_final[%d] = ", iy);
+				    printf("(%24.16e, %24.16e)", y[iy],
+					   y[iy + 1]);
+				    printf("\n");
+				  }
+				  ix += incx;
+				  iy += incy;
+				}
+
+				printf("      ");
+				printf("alpha = ");
+				printf("(%24.16e, %24.16e)", alpha[0],
+				       alpha[1]);
+				printf("\n      ");
+				printf("beta = ");
+				printf("(%24.16e, %24.16e)", beta[0],
+				       beta[1]);
+				printf("\n");
+				for (j = 0; j < m_i; j++) {
+				  printf("      ");
+				  printf
+				    ("([%24.16e  %24.16e], [%24.16e %24.16e])",
+				     head_r_true[j * incy_gen],
+				     tail_r_true[j * incy_gen],
+				     head_r_true[j * incy_gen + 1],
+				     tail_r_true[j * incy_gen + 1]);
+				  printf(", ratio[%d]=%.4e\n", j, ratios[j]);
+				}
+
+				printf("      ratio=%.4e\n", ratio);
+				p_count++;
+			      }
+			      if (bad_ratios >= MAX_BAD_TESTS) {
+				printf("\ntoo many failures, exiting....");
+				printf("\nTesting and compilation");
+				printf(" are incomplete\n\n");
+				goto end;
+			      }
+			      if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
+				printf("\nFlagrant ratio error, exiting...");
+				printf("\nTesting and compilation");
+				printf(" are incomplete\n\n");
+				goto end;
+			      }
+			    }
+			    if (d_count == 0) {
+			      if (ratio > ratio_max)
+				ratio_max = ratio;
+
+			      if (ratio != 0.0 && ratio < ratio_min)
+				ratio_min = ratio;
+			      tot_tests++;
+			    }
+			  }	/* incy */
+			}	/* incx */
+		      }		/* lda */
+		    }		/* kl */
+		  }		/* ku */
+		}		/* trans */
+	      }			/* order */
+	    }			/* tests */
+	  }			/* norm */
+	}			/* prec */
+      }				/* beta */
+    }				/* alpha */
+  }				/* debug */
+
+  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
+    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n", fname,
+	   m, n, ntests, thresh);
+    printf
+      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
+       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
+       ratio_min, ratio_max);
+  }
+
+end:
+  blas_free(x_head);
+  blas_free(x_tail);
+  blas_free(y);
+  blas_free(x_head_gen);
+  blas_free(x_tail_gen);
+  blas_free(y_gen);
+  blas_free(temp);
+  blas_free(AB);
+  blas_free(head_r_true);
+  blas_free(tail_r_true);
+  blas_free(ratios);
+
+  FPU_FIX_STOP;
+
+  *min_ratio = ratio_min;
+  *num_bad_ratio = bad_ratios;
+  *num_tests = tot_tests;
+  return ratio_max;
+}
+double do_test_zgbmv2_c_z_x(int m, int n, int ntests, int *seed,
+			    double thresh, int debug, float test_prob,
+			    double *min_ratio, int *num_bad_ratio,
+			    int *num_tests)
+
+/*
+ * Purpose  
+ * =======
+ *
+ * Runs a series of tests on GBMV2.
+ *
+ * Arguments
+ * =========
+ *  
+ * m         (input) int
+ *           The number of rows
+ *
+ * n         (input) int
+ *           The number of columns
+ *
+ * ntests    (input) int
+ *           The number of tests to run for each set of attributes.
+ *
+ * seed      (input/output) int         
+ *           The seed for the random number generator used in testgen().
+ *
+ * thresh    (input) double
+ *           When the ratio returned from test() exceeds the specified
+ *           threshold, the current size, r_true, r_comp, and ratio will be
+ *           printed.  (Since ratio is supposed to be O(1), we can set thresh
+ *           to ~10.)
+ *
+ * debug     (input) int
+ *           If debug=3, print summary 
+ *           If debug=2, print summary only if the number of bad ratios > 0
+ *           If debug=1, print complete info if tests fail
+ *           If debug=0, return max ratio
+ *
+ * test_prob (input) float
+ *           The specified test will be performed only if the generated 
+ *           random exceeds this threshold.
+ *
+ * min_ratio (output) double
+ *           The minimum ratio
+ * 
+ * num_bad_ratio (output) int
+ *               The number of tests fail; they are above the threshold.
+ *
+ * num_tests (output) int
+ *           The number of tests is being performed.
+ *
+ * Return value
+ * ============
+ *
+ * The maximum ratio if run successfully, otherwise return -1 
+ *
+ * Code structure
+ * ==============
+ * 
+ *  debug loop  -- if debug is one, the first loop computes the max ratio
+ *              -- and the last(second) loop outputs debugging information,
+ *              -- if the test fail and its ratio > 0.5 * max ratio.
+ *              -- if debug is zero, the loop is executed once
+ *    alpha loop  -- varying alpha: 0, 1, or random
+ *      beta loop   -- varying beta: 0, 1, or random
+   *        prec loop   -- varying internal prec: single, double, or extra
+ *          norm loop   -- varying norm: near undeflow, near one, or 
+ *                        -- near overflow
+ *            numtest loop  -- how many times the test is perform with 
+ *                            -- above set of attributes
+ *              order loop   -- varying order type: rowmajor or colmajor
+ *                trans loop    -- varying trans type: no trans, trans, or conj trans
+ *                  ku loop       -- varying ku: 0 to n-1
+ *                    kl loop       -- varying kl: 0 to m-1
+ *                      lda loop      -- varying lda: ku+kl+1, ku+kl+2, 2*(ku+kl+1) 
+ *                        incx loop     -- varying incx: -2, -1, 1, 2
+ *                          incy loop     -- varying incy: -2, -1, 1, 2
+ */
+{
+  /* function name */
+  const char fname[] = "BLAS_zgbmv2_c_z_x";
+
+  /* max number of debug lines to print */
+  const int max_print = 8;
+
+  /* Variables in the "x_val" form are loop vars for corresponding
+     variables */
+  int i;			/* iterate through the repeating tests */
+  int j;
+  int k;			/* multipurpose counters or variables */
+  int ix, iy;			/* use to index x and y respectively */
+  int incx_val, incy_val,	/* for testing different inc values */
+    incx, incy;
+  int incx_gen, incy_gen;	/* for complex case inc=2, for real case inc=1 */
+  int d_count;			/* counter for debug */
+  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
+  int p_count;			/* counter for the number of debug lines printed */
+  int tot_tests;		/* total number of tests to be done */
+  int norm;			/* input values of near underflow/one/overflow */
+  double ratio_max;		/* the current maximum ratio */
+  double ratio_min;		/* the current minimum ratio */
+  double *ratios;		/* a temporary variable for calculating ratio */
+  double ratio;			/* the per-use test ratio from test() */
+  int bad_ratios = 0;		/* the number of ratios over the threshold */
+  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
+  double un_int;		/* the internal underflow threshold */
+  double alpha[2];
+  double beta[2];
+  float *AB;
+  double *x_head;
+  double *x_tail;
+  double *y;
+  float *temp;			/* use for calculating ratio */
+
+  /* x_gen and y_gen are used to store vectors generated by testgen.
+     they eventually are copied back to x and y */
+  double *x_head_gen;
+  double *x_tail_gen;
+  double *y_gen;
+
+  /* the true r calculated by testgen(), in double-double */
+  double *head_r_true, *tail_r_true;
+
+  int alpha_val;
+  int alpha_flag = 0;		/* input flag for BLAS_zgbmv2_c_z_testgen */
+  int beta_val;
+  int beta_flag = 0;		/* input flag for BLAS_zgbmv2_c_z_testgen */
+  int order_val;
+  enum blas_order_type order_type = 0;
+  int prec_val;
+  enum blas_prec_type prec = 0;
+  int trans_val;
+  enum blas_trans_type trans_type = 0;
+  int m_i = 0;
+  int n_i = 0;
+  int max_mn;			/* the max of m and n */
+  int ku;
+  int kl;
+  int lda_val;
+  int lda = 0;
+  int saved_seed;		/* for saving the original seed */
+
+  /* use for counting the number of testgen calls * 2 */
+  int count, old_count = -1;
+
+  FPU_FIX_DECL;
+
+  /* test for bad arguments */
+  if (n < 0 || m < 0 || ntests < 0)
+    BLAS_error(fname, 0, 0, NULL);
+
+  /* initialization */
+  *min_ratio = 0.0;
+  *num_bad_ratio = 0;
+  *num_tests = 0;
+
+  saved_seed = *seed;
+  ratio_min = 1e308;
+  ratio_max = 0.0;
+  ratio = 0.0;
+  tot_tests = 0;
+  p_count = 0;
+  count = 0;
+  find_max_ratio = 0;
+  if (debug == 3)
+    find_max_ratio = 1;
+  max_mn = MAX(m, n);
+
+  if (m == 0 || n == 0) {
+    return 0.0;
+  }
+
+  FPU_FIX_START;
+
+  incx_gen = incy_gen = 1;
+  incx_gen *= 2;
+  incy_gen *= 2;
+
+  /* get space for calculation */
+  x_head = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
+  if (max_mn * 2 > 0 && x_head == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
+  if (max_mn * 2 > 0 && x_tail == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
+  if (max_mn * 2 > 0 && y == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_head_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && x_head_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && x_tail_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && y_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  temp = (float *) blas_malloc(max_mn * sizeof(float) * 2);
+  if (max_mn > 0 && temp == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  ratios = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && ratios == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  AB =
+    (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float) *
+			  2);
+  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && AB == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  /* The debug iteration:
+     If debug=1, then will execute the iteration twice. First, compute the
+     max ratio. Second, print info if ratio > (50% * ratio_max). */
+  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
+    bad_ratios = 0;		/* set to zero */
+
+    if ((debug == 3) && (d_count == find_max_ratio))
+      *seed = saved_seed;	/* restore the original seed */
+
+    /* varying alpha */
+    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
+      alpha_flag = 0;
+      switch (alpha_val) {
+      case 0:
+	alpha[0] = alpha[1] = 0.0;
+	alpha_flag = 1;
+	break;
+      case 1:
+	alpha[0] = 1.0;
+	alpha[1] = 0.0;
+	alpha_flag = 1;
+	break;
+      }
+
+      /* varying beta */
+      for (beta_val = 0; beta_val < 3; beta_val++) {
+	beta_flag = 0;
+	switch (beta_val) {
+	case 0:
+	  beta[0] = beta[1] = 0.0;
+	  beta_flag = 1;
+	  break;
+	case 1:
+	  beta[0] = 1.0;
+	  beta[1] = 0.0;
+	  beta_flag = 1;
+	  break;
+	}
+
+
+	/* varying extra precs */
+	for (prec_val = 0; prec_val <= 2; prec_val++) {
+	  switch (prec_val) {
+	  case 0:
+	    eps_int = power(2, -BITS_D);
+	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
+			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
+	    prec = blas_prec_double;
+	    break;
+	  case 1:
+	    eps_int = power(2, -BITS_D);
+	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
+			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
+	    prec = blas_prec_double;
+	    break;
+	  case 2:
+	  default:
+	    eps_int = power(2, -BITS_E);
+	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
+			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
+	    prec = blas_prec_extra;
+	    break;
+	  }
+
+	  /* values near underflow, 1, or overflow */
+	  for (norm = -1; norm <= 1; norm++) {
+
+	    /* number of tests */
+	    for (i = 0; i < ntests; i++) {
+
+	      /* row or col major */
+	      for (order_val = 0; order_val < 2; order_val++) {
+		switch (order_val) {
+		case 0:
+		  order_type = blas_rowmajor;
+		  break;
+		case 1:
+		  order_type = blas_colmajor;
+		  break;
+		}
+
+		/* no_trans, trans, or conj_trans */
+		for (trans_val = 0; trans_val < 3; trans_val++) {
+		  switch (trans_val) {
+		  case 0:
+		    trans_type = blas_no_trans;
+		    m_i = m;
+		    n_i = n;
+		    break;
+		  case 1:
+		    trans_type = blas_trans;
+		    m_i = n;
+		    n_i = m;
+		    break;
+		  case 2:
+		    trans_type = blas_conj_trans;
+		    m_i = n;
+		    n_i = m;
+		    break;
+		  }
+
+		  /* ku from 0 to n-1 */
+		  for (ku = 0; ku < n; ku++) {
+		    if (ku == n && ku != 0)
+		      continue;	/* the purpose of doing this is 
+				   to test for ku=0 */
+
+		    /* kl from 0 to m-1 */
+		    for (kl = 0; kl < m; kl++) {
+		      if (kl == n && kl != 0)
+			continue;	/* the purpose of doing this is 
+					   to test for kl=0 */
+
+		      /* lda=ku+kl+1, ku+kl+2, 2*(ku+kl+1) */
+		      for (lda_val = 0; lda_val < 3; lda_val++) {
+			switch (lda_val) {
+			case 0:
+			  lda = ku + kl + 1;
+			  break;
+			case 1:
+			  lda = ku + kl + 2;
+			  break;
+			case 2:
+			  lda = 2 * (ku + kl + 1);
+			  break;
+			}
+
+			if ((order_type == blas_rowmajor && lda < n) ||
+			    (order_type == blas_colmajor && lda < m))
+			  continue;
+
+			/* For the sake of speed, we throw out this case at random */
+			if (xrand(seed) >= test_prob)
+			  continue;
+
+			/* in the trivial cases, no need to run testgen */
+			if (m > 0 && n > 0)
+			  BLAS_zgbmv2_c_z_testgen(norm, order_type,
+						  trans_type, m, n, kl, ku,
+						  &alpha, alpha_flag, AB, lda,
+						  x_head_gen, x_tail_gen,
+						  &beta, beta_flag, y_gen,
+						  seed, head_r_true,
+						  tail_r_true);
+			count++;
+
+			/* varying incx */
+			for (incx_val = -2; incx_val <= 2; incx_val++) {
+			  if (incx_val == 0)
+			    continue;
+
+			  /* setting incx */
+			  incx = incx_val;
+			  incx *= 2;
+
+			  zcopy_vector(x_head_gen, n_i, 1, x_head, incx_val);
+			  zcopy_vector(x_tail_gen, n_i, 1, x_tail, incx_val);
+
+			  /* varying incy */
+			  for (incy_val = -2; incy_val <= 2; incy_val++) {
+			    if (incy_val == 0)
+			      continue;
+
+			    /* setting incy */
+			    incy = incy_val;
+			    incy *= 2;
+
+			    zcopy_vector(y_gen, m_i, 1, y, incy_val);
+
+			    /* call BLAS_zgbmv2_c_z_x */
+			    FPU_FIX_STOP;
+			    BLAS_zgbmv2_c_z_x(order_type, trans_type, m, n,
+					      kl, ku, alpha, AB, lda, x_head,
+					      x_tail, incx_val, beta, y,
+					      incy_val, prec);
+			    FPU_FIX_START;
+
+			    /* set y starting index */
+			    iy = 0;
+			    if (incy < 0)
+			      iy = -(m_i - 1) * incy;
+
+			    /* computing the ratio */
+			    for (j = 0; j < m_i; j++) {
+			      /* copy row j of AB to temp */
+			      cgbmv_copy(order_type, trans_type, m, n, kl, ku,
+					 AB, lda, temp, j);
+
+			      test_BLAS_zdot2_c_z(n_i, blas_no_conj, alpha,
+						  beta, &y_gen[j * incy_gen],
+						  &y[iy],
+						  &head_r_true[j * incy_gen],
+						  &tail_r_true[j * incy_gen],
+						  temp, 1, x_head, x_tail,
+						  incx_val, eps_int, un_int,
+						  &ratios[j]);
+
+			      /* take the max ratio */
+			      if (j == 0) {
+				ratio = ratios[0];
+				/* The !<= below causes NaN error to be detected.
+				   Note that (NaN > thresh) is always false. */
+			      } else if (!(ratios[j] <= ratio)) {
+				ratio = ratios[j];
+			      }
+
+			      iy += incy;
+			    }
+
+			    /* Increase the number of bad ratio, if the ratio
+			       is bigger than the threshold.
+			       The !<= below causes NaN error to be detected.
+			       Note that (NaN > thresh) is always false. */
+			    if (!(ratio <= thresh)) {
+			      bad_ratios++;
+
+			      if ((debug == 3) &&	/* print only when debug is on */
+				  (count != old_count) &&	/* print if old vector is different 
+								   from the current one */
+				  (d_count == find_max_ratio) &&
+				  (p_count <= max_print) &&
+				  (ratio > 0.5 * ratio_max)) {
+				old_count = count;
+				printf
+				  ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
+				   fname, m, n, ntests, thresh);
+
+				/* Print test info */
+				switch (prec) {
+				case blas_prec_single:
+				  printf("single ");
+				  break;
+				case blas_prec_double:
+				  printf("double ");
+				  break;
+				case blas_prec_indigenous:
+				  printf("indigenous ");
+				  break;
+				case blas_prec_extra:
+				  printf("extra ");
+				  break;
+				}
+				switch (norm) {
+				case -1:
+				  printf("near_underflow ");
+				  break;
+				case 0:
+				  printf("near_one ");
+				  break;
+				case 1:
+				  printf("near_overflow ");
+				  break;
+				}
+				switch (order_type) {
+				case blas_rowmajor:
+				  printf("row_major ");
+				  break;
+				case blas_colmajor:
+				  printf("col_major ");
+				  break;
+				}
+				switch (trans_type) {
+				case blas_no_trans:
+				  printf("no_trans ");
+				  break;
+				case blas_trans:
+				  printf("trans ");
+				  break;
+				case blas_conj_trans:
+				  printf("conj_trans ");
+				  break;
+				}
+
+				printf
+				  ("ku=%d, kl=%d, lda=%d, incx=%d, incy=%d:\n",
+				   ku, kl, lda, incx, incy);
+
+				ix = 0;
+				iy = 0;
+				if (incx < 0)
+				  ix = -(n_i - 1) * incx;
+				if (incy < 0)
+				  iy = -(m_i - 1) * incy;
+
+				printf("      A=");
+				for (j = 0; j < m_i; j++) {
+				  /* copy row j of A to temp */
+				  cgbmv_copy(order_type, trans_type, m, n, kl,
+					     ku, AB, lda, temp, j);
+
+				  if (j > 0)
+				    printf("        ");
+				  cprint_vector(temp, n_i, 1, NULL);
+				}
+
+				for (j = 0, k = 0; j < n_i || k < m_i;
+				     j++, k++) {
+				  if (j < n_i) {
+				    printf("      ");
+				    printf("(%24.16e, %24.16e)", x_head[ix],
+					   x_head[ix + 1]);
+				    printf("\n");
+				  }
+				  if (j < n_i) {
+				    printf("      ");
+				    printf("(%24.16e, %24.16e)", x_tail[ix],
+					   x_tail[ix + 1]);
+				    printf("\n");
+				  }
+				  if (k < m_i) {
+				    printf("      ");
+				    printf("(%24.16e, %24.16e)",
+					   y_gen[k * incy_gen],
+					   y_gen[k * incy_gen + 1]);
+				    printf("\n");
+				    printf("      ");
+				    printf("y_final[%d] = ", iy);
+				    printf("(%24.16e, %24.16e)", y[iy],
+					   y[iy + 1]);
+				    printf("\n");
+				  }
+				  ix += incx;
+				  iy += incy;
+				}
+
+				printf("      ");
+				printf("alpha = ");
+				printf("(%24.16e, %24.16e)", alpha[0],
+				       alpha[1]);
+				printf("\n      ");
+				printf("beta = ");
+				printf("(%24.16e, %24.16e)", beta[0],
+				       beta[1]);
+				printf("\n");
+				for (j = 0; j < m_i; j++) {
+				  printf("      ");
+				  printf
+				    ("([%24.16e  %24.16e], [%24.16e %24.16e])",
+				     head_r_true[j * incy_gen],
+				     tail_r_true[j * incy_gen],
+				     head_r_true[j * incy_gen + 1],
+				     tail_r_true[j * incy_gen + 1]);
+				  printf(", ratio[%d]=%.4e\n", j, ratios[j]);
+				}
+
+				printf("      ratio=%.4e\n", ratio);
+				p_count++;
+			      }
+			      if (bad_ratios >= MAX_BAD_TESTS) {
+				printf("\ntoo many failures, exiting....");
+				printf("\nTesting and compilation");
+				printf(" are incomplete\n\n");
+				goto end;
+			      }
+			      if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
+				printf("\nFlagrant ratio error, exiting...");
+				printf("\nTesting and compilation");
+				printf(" are incomplete\n\n");
+				goto end;
+			      }
+			    }
+			    if (d_count == 0) {
+			      if (ratio > ratio_max)
+				ratio_max = ratio;
+
+			      if (ratio != 0.0 && ratio < ratio_min)
+				ratio_min = ratio;
+			      tot_tests++;
+			    }
+			  }	/* incy */
+			}	/* incx */
+		      }		/* lda */
+		    }		/* kl */
+		  }		/* ku */
+		}		/* trans */
+	      }			/* order */
+	    }			/* tests */
+	  }			/* norm */
+	}			/* prec */
+      }				/* beta */
+    }				/* alpha */
+  }				/* debug */
+
+  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
+    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n", fname,
+	   m, n, ntests, thresh);
+    printf
+      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
+       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
+       ratio_min, ratio_max);
+  }
+
+end:
+  blas_free(x_head);
+  blas_free(x_tail);
+  blas_free(y);
+  blas_free(x_head_gen);
+  blas_free(x_tail_gen);
+  blas_free(y_gen);
+  blas_free(temp);
+  blas_free(AB);
+  blas_free(head_r_true);
+  blas_free(tail_r_true);
+  blas_free(ratios);
+
+  FPU_FIX_STOP;
+
+  *min_ratio = ratio_min;
+  *num_bad_ratio = bad_ratios;
+  *num_tests = tot_tests;
+  return ratio_max;
+}
+double do_test_zgbmv2_c_c_x(int m, int n, int ntests, int *seed,
+			    double thresh, int debug, float test_prob,
+			    double *min_ratio, int *num_bad_ratio,
+			    int *num_tests)
+
+/*
+ * Purpose  
+ * =======
+ *
+ * Runs a series of tests on GBMV2.
+ *
+ * Arguments
+ * =========
+ *  
+ * m         (input) int
+ *           The number of rows
+ *
+ * n         (input) int
+ *           The number of columns
+ *
+ * ntests    (input) int
+ *           The number of tests to run for each set of attributes.
+ *
+ * seed      (input/output) int         
+ *           The seed for the random number generator used in testgen().
+ *
+ * thresh    (input) double
+ *           When the ratio returned from test() exceeds the specified
+ *           threshold, the current size, r_true, r_comp, and ratio will be
+ *           printed.  (Since ratio is supposed to be O(1), we can set thresh
+ *           to ~10.)
+ *
+ * debug     (input) int
+ *           If debug=3, print summary 
+ *           If debug=2, print summary only if the number of bad ratios > 0
+ *           If debug=1, print complete info if tests fail
+ *           If debug=0, return max ratio
+ *
+ * test_prob (input) float
+ *           The specified test will be performed only if the generated 
+ *           random exceeds this threshold.
+ *
+ * min_ratio (output) double
+ *           The minimum ratio
+ * 
+ * num_bad_ratio (output) int
+ *               The number of tests fail; they are above the threshold.
+ *
+ * num_tests (output) int
+ *           The number of tests is being performed.
+ *
+ * Return value
+ * ============
+ *
+ * The maximum ratio if run successfully, otherwise return -1 
+ *
+ * Code structure
+ * ==============
+ * 
+ *  debug loop  -- if debug is one, the first loop computes the max ratio
+ *              -- and the last(second) loop outputs debugging information,
+ *              -- if the test fail and its ratio > 0.5 * max ratio.
+ *              -- if debug is zero, the loop is executed once
+ *    alpha loop  -- varying alpha: 0, 1, or random
+ *      beta loop   -- varying beta: 0, 1, or random
+   *        prec loop   -- varying internal prec: single, double, or extra
+ *          norm loop   -- varying norm: near undeflow, near one, or 
+ *                        -- near overflow
+ *            numtest loop  -- how many times the test is perform with 
+ *                            -- above set of attributes
+ *              order loop   -- varying order type: rowmajor or colmajor
+ *                trans loop    -- varying trans type: no trans, trans, or conj trans
+ *                  ku loop       -- varying ku: 0 to n-1
+ *                    kl loop       -- varying kl: 0 to m-1
+ *                      lda loop      -- varying lda: ku+kl+1, ku+kl+2, 2*(ku+kl+1) 
+ *                        incx loop     -- varying incx: -2, -1, 1, 2
+ *                          incy loop     -- varying incy: -2, -1, 1, 2
+ */
+{
+  /* function name */
+  const char fname[] = "BLAS_zgbmv2_c_c_x";
+
+  /* max number of debug lines to print */
+  const int max_print = 8;
+
+  /* Variables in the "x_val" form are loop vars for corresponding
+     variables */
+  int i;			/* iterate through the repeating tests */
+  int j;
+  int k;			/* multipurpose counters or variables */
+  int ix, iy;			/* use to index x and y respectively */
+  int incx_val, incy_val,	/* for testing different inc values */
+    incx, incy;
+  int incx_gen, incy_gen;	/* for complex case inc=2, for real case inc=1 */
+  int d_count;			/* counter for debug */
+  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
+  int p_count;			/* counter for the number of debug lines printed */
+  int tot_tests;		/* total number of tests to be done */
+  int norm;			/* input values of near underflow/one/overflow */
+  double ratio_max;		/* the current maximum ratio */
+  double ratio_min;		/* the current minimum ratio */
+  double *ratios;		/* a temporary variable for calculating ratio */
+  double ratio;			/* the per-use test ratio from test() */
+  int bad_ratios = 0;		/* the number of ratios over the threshold */
+  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
+  double un_int;		/* the internal underflow threshold */
+  double alpha[2];
+  double beta[2];
+  float *AB;
+  float *x_head;
+  float *x_tail;
+  double *y;
+  float *temp;			/* use for calculating ratio */
+
+  /* x_gen and y_gen are used to store vectors generated by testgen.
+     they eventually are copied back to x and y */
+  float *x_head_gen;
+  float *x_tail_gen;
+  double *y_gen;
+
+  /* the true r calculated by testgen(), in double-double */
+  double *head_r_true, *tail_r_true;
+
+  int alpha_val;
+  int alpha_flag = 0;		/* input flag for BLAS_zgbmv2_c_c_testgen */
+  int beta_val;
+  int beta_flag = 0;		/* input flag for BLAS_zgbmv2_c_c_testgen */
+  int order_val;
+  enum blas_order_type order_type = 0;
+  int prec_val;
+  enum blas_prec_type prec = 0;
+  int trans_val;
+  enum blas_trans_type trans_type = 0;
+  int m_i = 0;
+  int n_i = 0;
+  int max_mn;			/* the max of m and n */
+  int ku;
+  int kl;
+  int lda_val;
+  int lda = 0;
+  int saved_seed;		/* for saving the original seed */
+
+  /* use for counting the number of testgen calls * 2 */
+  int count, old_count = -1;
+
+  FPU_FIX_DECL;
+
+  /* test for bad arguments */
+  if (n < 0 || m < 0 || ntests < 0)
+    BLAS_error(fname, 0, 0, NULL);
+
+  /* initialization */
+  *min_ratio = 0.0;
+  *num_bad_ratio = 0;
+  *num_tests = 0;
+
+  saved_seed = *seed;
+  ratio_min = 1e308;
+  ratio_max = 0.0;
+  ratio = 0.0;
+  tot_tests = 0;
+  p_count = 0;
+  count = 0;
+  find_max_ratio = 0;
+  if (debug == 3)
+    find_max_ratio = 1;
+  max_mn = MAX(m, n);
+
+  if (m == 0 || n == 0) {
+    return 0.0;
+  }
+
+  FPU_FIX_START;
+
+  incx_gen = incy_gen = 1;
+  incx_gen *= 2;
+  incy_gen *= 2;
+
+  /* get space for calculation */
+  x_head = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
+  if (max_mn * 2 > 0 && x_head == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
+  if (max_mn * 2 > 0 && x_tail == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
+  if (max_mn * 2 > 0 && y == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_head_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
+  if (max_mn > 0 && x_head_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
+  if (max_mn > 0 && x_tail_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && y_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  temp = (float *) blas_malloc(max_mn * sizeof(float) * 2);
+  if (max_mn > 0 && temp == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  ratios = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && ratios == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  AB =
+    (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float) *
+			  2);
+  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && AB == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  /* The debug iteration:
+     If debug=1, then will execute the iteration twice. First, compute the
+     max ratio. Second, print info if ratio > (50% * ratio_max). */
+  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
+    bad_ratios = 0;		/* set to zero */
+
+    if ((debug == 3) && (d_count == find_max_ratio))
+      *seed = saved_seed;	/* restore the original seed */
+
+    /* varying alpha */
+    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
+      alpha_flag = 0;
+      switch (alpha_val) {
+      case 0:
+	alpha[0] = alpha[1] = 0.0;
+	alpha_flag = 1;
+	break;
+      case 1:
+	alpha[0] = 1.0;
+	alpha[1] = 0.0;
+	alpha_flag = 1;
+	break;
+      }
+
+      /* varying beta */
+      for (beta_val = 0; beta_val < 3; beta_val++) {
+	beta_flag = 0;
+	switch (beta_val) {
+	case 0:
+	  beta[0] = beta[1] = 0.0;
+	  beta_flag = 1;
+	  break;
+	case 1:
+	  beta[0] = 1.0;
+	  beta[1] = 0.0;
+	  beta_flag = 1;
+	  break;
+	}
+
+
+	/* varying extra precs */
+	for (prec_val = 0; prec_val <= 2; prec_val++) {
+	  switch (prec_val) {
+	  case 0:
+	    eps_int = power(2, -BITS_D);
+	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
+			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
+	    prec = blas_prec_double;
+	    break;
+	  case 1:
+	    eps_int = power(2, -BITS_D);
+	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
+			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
+	    prec = blas_prec_double;
+	    break;
+	  case 2:
+	  default:
+	    eps_int = power(2, -BITS_E);
+	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
+			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
+	    prec = blas_prec_extra;
+	    break;
+	  }
+
+	  /* values near underflow, 1, or overflow */
+	  for (norm = -1; norm <= 1; norm++) {
+
+	    /* number of tests */
+	    for (i = 0; i < ntests; i++) {
+
+	      /* row or col major */
+	      for (order_val = 0; order_val < 2; order_val++) {
+		switch (order_val) {
+		case 0:
+		  order_type = blas_rowmajor;
+		  break;
+		case 1:
+		  order_type = blas_colmajor;
+		  break;
+		}
+
+		/* no_trans, trans, or conj_trans */
+		for (trans_val = 0; trans_val < 3; trans_val++) {
+		  switch (trans_val) {
+		  case 0:
+		    trans_type = blas_no_trans;
+		    m_i = m;
+		    n_i = n;
+		    break;
+		  case 1:
+		    trans_type = blas_trans;
+		    m_i = n;
+		    n_i = m;
+		    break;
+		  case 2:
+		    trans_type = blas_conj_trans;
+		    m_i = n;
+		    n_i = m;
+		    break;
+		  }
+
+		  /* ku from 0 to n-1 */
+		  for (ku = 0; ku < n; ku++) {
+		    if (ku == n && ku != 0)
+		      continue;	/* the purpose of doing this is 
+				   to test for ku=0 */
+
+		    /* kl from 0 to m-1 */
+		    for (kl = 0; kl < m; kl++) {
+		      if (kl == n && kl != 0)
+			continue;	/* the purpose of doing this is 
+					   to test for kl=0 */
+
+		      /* lda=ku+kl+1, ku+kl+2, 2*(ku+kl+1) */
+		      for (lda_val = 0; lda_val < 3; lda_val++) {
+			switch (lda_val) {
+			case 0:
+			  lda = ku + kl + 1;
+			  break;
+			case 1:
+			  lda = ku + kl + 2;
+			  break;
+			case 2:
+			  lda = 2 * (ku + kl + 1);
+			  break;
+			}
+
+			if ((order_type == blas_rowmajor && lda < n) ||
+			    (order_type == blas_colmajor && lda < m))
+			  continue;
+
+			/* For the sake of speed, we throw out this case at random */
+			if (xrand(seed) >= test_prob)
+			  continue;
+
+			/* in the trivial cases, no need to run testgen */
+			if (m > 0 && n > 0)
+			  BLAS_zgbmv2_c_c_testgen(norm, order_type,
+						  trans_type, m, n, kl, ku,
+						  &alpha, alpha_flag, AB, lda,
+						  x_head_gen, x_tail_gen,
+						  &beta, beta_flag, y_gen,
+						  seed, head_r_true,
+						  tail_r_true);
+			count++;
+
+			/* varying incx */
+			for (incx_val = -2; incx_val <= 2; incx_val++) {
+			  if (incx_val == 0)
+			    continue;
+
+			  /* setting incx */
+			  incx = incx_val;
+			  incx *= 2;
+
+			  ccopy_vector(x_head_gen, n_i, 1, x_head, incx_val);
+			  ccopy_vector(x_tail_gen, n_i, 1, x_tail, incx_val);
+
+			  /* varying incy */
+			  for (incy_val = -2; incy_val <= 2; incy_val++) {
+			    if (incy_val == 0)
+			      continue;
+
+			    /* setting incy */
+			    incy = incy_val;
+			    incy *= 2;
+
+			    zcopy_vector(y_gen, m_i, 1, y, incy_val);
+
+			    /* call BLAS_zgbmv2_c_c_x */
+			    FPU_FIX_STOP;
+			    BLAS_zgbmv2_c_c_x(order_type, trans_type, m, n,
+					      kl, ku, alpha, AB, lda, x_head,
+					      x_tail, incx_val, beta, y,
+					      incy_val, prec);
+			    FPU_FIX_START;
+
+			    /* set y starting index */
+			    iy = 0;
+			    if (incy < 0)
+			      iy = -(m_i - 1) * incy;
+
+			    /* computing the ratio */
+			    for (j = 0; j < m_i; j++) {
+			      /* copy row j of AB to temp */
+			      cgbmv_copy(order_type, trans_type, m, n, kl, ku,
+					 AB, lda, temp, j);
+
+			      test_BLAS_zdot2_c_c(n_i, blas_no_conj, alpha,
+						  beta, &y_gen[j * incy_gen],
+						  &y[iy],
+						  &head_r_true[j * incy_gen],
+						  &tail_r_true[j * incy_gen],
+						  temp, 1, x_head, x_tail,
+						  incx_val, eps_int, un_int,
+						  &ratios[j]);
+
+			      /* take the max ratio */
+			      if (j == 0) {
+				ratio = ratios[0];
+				/* The !<= below causes NaN error to be detected.
+				   Note that (NaN > thresh) is always false. */
+			      } else if (!(ratios[j] <= ratio)) {
+				ratio = ratios[j];
+			      }
+
+			      iy += incy;
+			    }
+
+			    /* Increase the number of bad ratio, if the ratio
+			       is bigger than the threshold.
+			       The !<= below causes NaN error to be detected.
+			       Note that (NaN > thresh) is always false. */
+			    if (!(ratio <= thresh)) {
+			      bad_ratios++;
+
+			      if ((debug == 3) &&	/* print only when debug is on */
+				  (count != old_count) &&	/* print if old vector is different 
+								   from the current one */
+				  (d_count == find_max_ratio) &&
+				  (p_count <= max_print) &&
+				  (ratio > 0.5 * ratio_max)) {
+				old_count = count;
+				printf
+				  ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
+				   fname, m, n, ntests, thresh);
+
+				/* Print test info */
+				switch (prec) {
+				case blas_prec_single:
+				  printf("single ");
+				  break;
+				case blas_prec_double:
+				  printf("double ");
+				  break;
+				case blas_prec_indigenous:
+				  printf("indigenous ");
+				  break;
+				case blas_prec_extra:
+				  printf("extra ");
+				  break;
+				}
+				switch (norm) {
+				case -1:
+				  printf("near_underflow ");
+				  break;
+				case 0:
+				  printf("near_one ");
+				  break;
+				case 1:
+				  printf("near_overflow ");
+				  break;
+				}
+				switch (order_type) {
+				case blas_rowmajor:
+				  printf("row_major ");
+				  break;
+				case blas_colmajor:
+				  printf("col_major ");
+				  break;
+				}
+				switch (trans_type) {
+				case blas_no_trans:
+				  printf("no_trans ");
+				  break;
+				case blas_trans:
+				  printf("trans ");
+				  break;
+				case blas_conj_trans:
+				  printf("conj_trans ");
+				  break;
+				}
+
+				printf
+				  ("ku=%d, kl=%d, lda=%d, incx=%d, incy=%d:\n",
+				   ku, kl, lda, incx, incy);
+
+				ix = 0;
+				iy = 0;
+				if (incx < 0)
+				  ix = -(n_i - 1) * incx;
+				if (incy < 0)
+				  iy = -(m_i - 1) * incy;
+
+				printf("      A=");
+				for (j = 0; j < m_i; j++) {
+				  /* copy row j of A to temp */
+				  cgbmv_copy(order_type, trans_type, m, n, kl,
+					     ku, AB, lda, temp, j);
+
+				  if (j > 0)
+				    printf("        ");
+				  cprint_vector(temp, n_i, 1, NULL);
+				}
+
+				for (j = 0, k = 0; j < n_i || k < m_i;
+				     j++, k++) {
+				  if (j < n_i) {
+				    printf("      ");
+				    printf("(%16.8e, %16.8e)", x_head[ix],
+					   x_head[ix + 1]);
+				    printf("\n");
+				  }
+				  if (j < n_i) {
+				    printf("      ");
+				    printf("(%16.8e, %16.8e)", x_tail[ix],
+					   x_tail[ix + 1]);
+				    printf("\n");
+				  }
+				  if (k < m_i) {
+				    printf("      ");
+				    printf("(%24.16e, %24.16e)",
+					   y_gen[k * incy_gen],
+					   y_gen[k * incy_gen + 1]);
+				    printf("\n");
+				    printf("      ");
+				    printf("y_final[%d] = ", iy);
+				    printf("(%24.16e, %24.16e)", y[iy],
+					   y[iy + 1]);
+				    printf("\n");
+				  }
+				  ix += incx;
+				  iy += incy;
+				}
+
+				printf("      ");
+				printf("alpha = ");
+				printf("(%24.16e, %24.16e)", alpha[0],
+				       alpha[1]);
+				printf("\n      ");
+				printf("beta = ");
+				printf("(%24.16e, %24.16e)", beta[0],
+				       beta[1]);
+				printf("\n");
+				for (j = 0; j < m_i; j++) {
+				  printf("      ");
+				  printf
+				    ("([%24.16e  %24.16e], [%24.16e %24.16e])",
+				     head_r_true[j * incy_gen],
+				     tail_r_true[j * incy_gen],
+				     head_r_true[j * incy_gen + 1],
+				     tail_r_true[j * incy_gen + 1]);
+				  printf(", ratio[%d]=%.4e\n", j, ratios[j]);
+				}
+
+				printf("      ratio=%.4e\n", ratio);
+				p_count++;
+			      }
+			      if (bad_ratios >= MAX_BAD_TESTS) {
+				printf("\ntoo many failures, exiting....");
+				printf("\nTesting and compilation");
+				printf(" are incomplete\n\n");
+				goto end;
+			      }
+			      if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
+				printf("\nFlagrant ratio error, exiting...");
+				printf("\nTesting and compilation");
+				printf(" are incomplete\n\n");
+				goto end;
+			      }
+			    }
+			    if (d_count == 0) {
+			      if (ratio > ratio_max)
+				ratio_max = ratio;
+
+			      if (ratio != 0.0 && ratio < ratio_min)
+				ratio_min = ratio;
+			      tot_tests++;
+			    }
+			  }	/* incy */
+			}	/* incx */
+		      }		/* lda */
+		    }		/* kl */
+		  }		/* ku */
+		}		/* trans */
+	      }			/* order */
+	    }			/* tests */
+	  }			/* norm */
+	}			/* prec */
+      }				/* beta */
+    }				/* alpha */
+  }				/* debug */
+
+  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
+    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n", fname,
+	   m, n, ntests, thresh);
+    printf
+      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
+       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
+       ratio_min, ratio_max);
+  }
+
+end:
+  blas_free(x_head);
+  blas_free(x_tail);
+  blas_free(y);
+  blas_free(x_head_gen);
+  blas_free(x_tail_gen);
+  blas_free(y_gen);
+  blas_free(temp);
+  blas_free(AB);
+  blas_free(head_r_true);
+  blas_free(tail_r_true);
+  blas_free(ratios);
+
+  FPU_FIX_STOP;
+
+  *min_ratio = ratio_min;
+  *num_bad_ratio = bad_ratios;
+  *num_tests = tot_tests;
+  return ratio_max;
+}
+double do_test_cgbmv2_c_s_x(int m, int n, int ntests, int *seed,
+			    double thresh, int debug, float test_prob,
+			    double *min_ratio, int *num_bad_ratio,
+			    int *num_tests)
+
+/*
+ * Purpose  
+ * =======
+ *
+ * Runs a series of tests on GBMV2.
+ *
+ * Arguments
+ * =========
+ *  
+ * m         (input) int
+ *           The number of rows
+ *
+ * n         (input) int
+ *           The number of columns
+ *
+ * ntests    (input) int
+ *           The number of tests to run for each set of attributes.
+ *
+ * seed      (input/output) int         
+ *           The seed for the random number generator used in testgen().
+ *
+ * thresh    (input) double
+ *           When the ratio returned from test() exceeds the specified
+ *           threshold, the current size, r_true, r_comp, and ratio will be
+ *           printed.  (Since ratio is supposed to be O(1), we can set thresh
+ *           to ~10.)
+ *
+ * debug     (input) int
+ *           If debug=3, print summary 
+ *           If debug=2, print summary only if the number of bad ratios > 0
+ *           If debug=1, print complete info if tests fail
+ *           If debug=0, return max ratio
+ *
+ * test_prob (input) float
+ *           The specified test will be performed only if the generated 
+ *           random exceeds this threshold.
+ *
+ * min_ratio (output) double
+ *           The minimum ratio
+ * 
+ * num_bad_ratio (output) int
+ *               The number of tests fail; they are above the threshold.
+ *
+ * num_tests (output) int
+ *           The number of tests is being performed.
+ *
+ * Return value
+ * ============
+ *
+ * The maximum ratio if run successfully, otherwise return -1 
+ *
+ * Code structure
+ * ==============
+ * 
+ *  debug loop  -- if debug is one, the first loop computes the max ratio
+ *              -- and the last(second) loop outputs debugging information,
+ *              -- if the test fail and its ratio > 0.5 * max ratio.
+ *              -- if debug is zero, the loop is executed once
+ *    alpha loop  -- varying alpha: 0, 1, or random
+ *      beta loop   -- varying beta: 0, 1, or random
+   *        prec loop   -- varying internal prec: single, double, or extra
+ *          norm loop   -- varying norm: near undeflow, near one, or 
+ *                        -- near overflow
+ *            numtest loop  -- how many times the test is perform with 
+ *                            -- above set of attributes
+ *              order loop   -- varying order type: rowmajor or colmajor
+ *                trans loop    -- varying trans type: no trans, trans, or conj trans
+ *                  ku loop       -- varying ku: 0 to n-1
+ *                    kl loop       -- varying kl: 0 to m-1
+ *                      lda loop      -- varying lda: ku+kl+1, ku+kl+2, 2*(ku+kl+1) 
+ *                        incx loop     -- varying incx: -2, -1, 1, 2
+ *                          incy loop     -- varying incy: -2, -1, 1, 2
+ */
+{
+  /* function name */
+  const char fname[] = "BLAS_cgbmv2_c_s_x";
+
+  /* max number of debug lines to print */
+  const int max_print = 8;
+
+  /* Variables in the "x_val" form are loop vars for corresponding
+     variables */
+  int i;			/* iterate through the repeating tests */
+  int j;
+  int k;			/* multipurpose counters or variables */
+  int ix, iy;			/* use to index x and y respectively */
+  int incx_val, incy_val,	/* for testing different inc values */
+    incx, incy;
+  int incx_gen, incy_gen;	/* for complex case inc=2, for real case inc=1 */
+  int d_count;			/* counter for debug */
+  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
+  int p_count;			/* counter for the number of debug lines printed */
+  int tot_tests;		/* total number of tests to be done */
+  int norm;			/* input values of near underflow/one/overflow */
+  double ratio_max;		/* the current maximum ratio */
+  double ratio_min;		/* the current minimum ratio */
+  double *ratios;		/* a temporary variable for calculating ratio */
+  double ratio;			/* the per-use test ratio from test() */
+  int bad_ratios = 0;		/* the number of ratios over the threshold */
+  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
+  double un_int;		/* the internal underflow threshold */
+  float alpha[2];
+  float beta[2];
+  float *AB;
+  float *x_head;
+  float *x_tail;
+  float *y;
+  float *temp;			/* use for calculating ratio */
+
+  /* x_gen and y_gen are used to store vectors generated by testgen.
+     they eventually are copied back to x and y */
+  float *x_head_gen;
+  float *x_tail_gen;
+  float *y_gen;
+
+  /* the true r calculated by testgen(), in double-double */
+  double *head_r_true, *tail_r_true;
+
+  int alpha_val;
+  int alpha_flag = 0;		/* input flag for BLAS_cgbmv2_c_s_testgen */
+  int beta_val;
+  int beta_flag = 0;		/* input flag for BLAS_cgbmv2_c_s_testgen */
+  int order_val;
+  enum blas_order_type order_type = 0;
+  int prec_val;
+  enum blas_prec_type prec = 0;
+  int trans_val;
+  enum blas_trans_type trans_type = 0;
+  int m_i = 0;
+  int n_i = 0;
+  int max_mn;			/* the max of m and n */
+  int ku;
+  int kl;
+  int lda_val;
+  int lda = 0;
+  int saved_seed;		/* for saving the original seed */
+
+  /* use for counting the number of testgen calls * 2 */
+  int count, old_count = -1;
+
+  FPU_FIX_DECL;
+
+  /* test for bad arguments */
+  if (n < 0 || m < 0 || ntests < 0)
+    BLAS_error(fname, 0, 0, NULL);
+
+  /* initialization */
+  *min_ratio = 0.0;
+  *num_bad_ratio = 0;
+  *num_tests = 0;
+
+  saved_seed = *seed;
+  ratio_min = 1e308;
+  ratio_max = 0.0;
+  ratio = 0.0;
+  tot_tests = 0;
+  p_count = 0;
+  count = 0;
+  find_max_ratio = 0;
+  if (debug == 3)
+    find_max_ratio = 1;
+  max_mn = MAX(m, n);
+
+  if (m == 0 || n == 0) {
+    return 0.0;
+  }
+
+  FPU_FIX_START;
+
+  incx_gen = incy_gen = 1;
+
+  incy_gen *= 2;
+
+  /* get space for calculation */
+  x_head = (float *) blas_malloc(max_mn * 2 * sizeof(float));
+  if (max_mn * 2 > 0 && x_head == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail = (float *) blas_malloc(max_mn * 2 * sizeof(float));
+  if (max_mn * 2 > 0 && x_tail == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
+  if (max_mn * 2 > 0 && y == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_head_gen = (float *) blas_malloc(max_mn * sizeof(float));
+  if (max_mn > 0 && x_head_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail_gen = (float *) blas_malloc(max_mn * sizeof(float));
+  if (max_mn > 0 && x_tail_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
+  if (max_mn > 0 && y_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  temp = (float *) blas_malloc(max_mn * sizeof(float) * 2);
+  if (max_mn > 0 && temp == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  ratios = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && ratios == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  AB =
+    (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float) *
+			  2);
+  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && AB == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  /* The debug iteration:
+     If debug=1, then will execute the iteration twice. First, compute the
+     max ratio. Second, print info if ratio > (50% * ratio_max). */
+  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
+    bad_ratios = 0;		/* set to zero */
+
+    if ((debug == 3) && (d_count == find_max_ratio))
+      *seed = saved_seed;	/* restore the original seed */
+
+    /* varying alpha */
+    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
+      alpha_flag = 0;
+      switch (alpha_val) {
+      case 0:
+	alpha[0] = alpha[1] = 0.0;
+	alpha_flag = 1;
+	break;
+      case 1:
+	alpha[0] = 1.0;
+	alpha[1] = 0.0;
+	alpha_flag = 1;
+	break;
+      }
+
+      /* varying beta */
+      for (beta_val = 0; beta_val < 3; beta_val++) {
+	beta_flag = 0;
+	switch (beta_val) {
+	case 0:
+	  beta[0] = beta[1] = 0.0;
+	  beta_flag = 1;
+	  break;
+	case 1:
+	  beta[0] = 1.0;
+	  beta[1] = 0.0;
+	  beta_flag = 1;
+	  break;
+	}
+
+
+	/* varying extra precs */
+	for (prec_val = 0; prec_val <= 2; prec_val++) {
+	  switch (prec_val) {
+	  case 0:
+	    eps_int = power(2, -BITS_S);
+	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
+			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
+	    prec = blas_prec_single;
+	    break;
+	  case 1:
+	    eps_int = power(2, -BITS_D);
+	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
+			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
+	    prec = blas_prec_double;
+	    break;
+	  case 2:
+	  default:
+	    eps_int = power(2, -BITS_E);
+	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
+			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
+	    prec = blas_prec_extra;
+	    break;
+	  }
+
+	  /* values near underflow, 1, or overflow */
+	  for (norm = -1; norm <= 1; norm++) {
+
+	    /* number of tests */
+	    for (i = 0; i < ntests; i++) {
+
+	      /* row or col major */
+	      for (order_val = 0; order_val < 2; order_val++) {
+		switch (order_val) {
+		case 0:
+		  order_type = blas_rowmajor;
+		  break;
+		case 1:
+		  order_type = blas_colmajor;
+		  break;
+		}
+
+		/* no_trans, trans, or conj_trans */
+		for (trans_val = 0; trans_val < 3; trans_val++) {
+		  switch (trans_val) {
+		  case 0:
+		    trans_type = blas_no_trans;
+		    m_i = m;
+		    n_i = n;
+		    break;
+		  case 1:
+		    trans_type = blas_trans;
+		    m_i = n;
+		    n_i = m;
+		    break;
+		  case 2:
+		    trans_type = blas_conj_trans;
+		    m_i = n;
+		    n_i = m;
+		    break;
+		  }
+
+		  /* ku from 0 to n-1 */
+		  for (ku = 0; ku < n; ku++) {
+		    if (ku == n && ku != 0)
+		      continue;	/* the purpose of doing this is 
+				   to test for ku=0 */
+
+		    /* kl from 0 to m-1 */
+		    for (kl = 0; kl < m; kl++) {
+		      if (kl == n && kl != 0)
+			continue;	/* the purpose of doing this is 
+					   to test for kl=0 */
+
+		      /* lda=ku+kl+1, ku+kl+2, 2*(ku+kl+1) */
+		      for (lda_val = 0; lda_val < 3; lda_val++) {
+			switch (lda_val) {
+			case 0:
+			  lda = ku + kl + 1;
+			  break;
+			case 1:
+			  lda = ku + kl + 2;
+			  break;
+			case 2:
+			  lda = 2 * (ku + kl + 1);
+			  break;
+			}
+
+			if ((order_type == blas_rowmajor && lda < n) ||
+			    (order_type == blas_colmajor && lda < m))
+			  continue;
+
+			/* For the sake of speed, we throw out this case at random */
+			if (xrand(seed) >= test_prob)
+			  continue;
+
+			/* in the trivial cases, no need to run testgen */
+			if (m > 0 && n > 0)
+			  BLAS_cgbmv2_c_s_testgen(norm, order_type,
+						  trans_type, m, n, kl, ku,
+						  &alpha, alpha_flag, AB, lda,
+						  x_head_gen, x_tail_gen,
+						  &beta, beta_flag, y_gen,
+						  seed, head_r_true,
+						  tail_r_true);
+			count++;
+
+			/* varying incx */
+			for (incx_val = -2; incx_val <= 2; incx_val++) {
+			  if (incx_val == 0)
+			    continue;
+
+			  /* setting incx */
+			  incx = incx_val;
+
+
+			  scopy_vector(x_head_gen, n_i, 1, x_head, incx_val);
+			  scopy_vector(x_tail_gen, n_i, 1, x_tail, incx_val);
+
+			  /* varying incy */
+			  for (incy_val = -2; incy_val <= 2; incy_val++) {
+			    if (incy_val == 0)
+			      continue;
+
+			    /* setting incy */
+			    incy = incy_val;
+			    incy *= 2;
+
+			    ccopy_vector(y_gen, m_i, 1, y, incy_val);
+
+			    /* call BLAS_cgbmv2_c_s_x */
+			    FPU_FIX_STOP;
+			    BLAS_cgbmv2_c_s_x(order_type, trans_type, m, n,
+					      kl, ku, alpha, AB, lda, x_head,
+					      x_tail, incx_val, beta, y,
+					      incy_val, prec);
+			    FPU_FIX_START;
+
+			    /* set y starting index */
+			    iy = 0;
+			    if (incy < 0)
+			      iy = -(m_i - 1) * incy;
+
+			    /* computing the ratio */
+			    for (j = 0; j < m_i; j++) {
+			      /* copy row j of AB to temp */
+			      cgbmv_copy(order_type, trans_type, m, n, kl, ku,
+					 AB, lda, temp, j);
+
+			      test_BLAS_cdot2_c_s(n_i, blas_no_conj, alpha,
+						  beta, &y_gen[j * incy_gen],
+						  &y[iy],
+						  &head_r_true[j * incy_gen],
+						  &tail_r_true[j * incy_gen],
+						  temp, 1, x_head, x_tail,
+						  incx_val, eps_int, un_int,
+						  &ratios[j]);
+
+			      /* take the max ratio */
+			      if (j == 0) {
+				ratio = ratios[0];
+				/* The !<= below causes NaN error to be detected.
+				   Note that (NaN > thresh) is always false. */
+			      } else if (!(ratios[j] <= ratio)) {
+				ratio = ratios[j];
+			      }
+
+			      iy += incy;
+			    }
+
+			    /* Increase the number of bad ratio, if the ratio
+			       is bigger than the threshold.
+			       The !<= below causes NaN error to be detected.
+			       Note that (NaN > thresh) is always false. */
+			    if (!(ratio <= thresh)) {
+			      bad_ratios++;
+
+			      if ((debug == 3) &&	/* print only when debug is on */
+				  (count != old_count) &&	/* print if old vector is different 
+								   from the current one */
+				  (d_count == find_max_ratio) &&
+				  (p_count <= max_print) &&
+				  (ratio > 0.5 * ratio_max)) {
+				old_count = count;
+				printf
+				  ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
+				   fname, m, n, ntests, thresh);
+
+				/* Print test info */
+				switch (prec) {
+				case blas_prec_single:
+				  printf("single ");
+				  break;
+				case blas_prec_double:
+				  printf("double ");
+				  break;
+				case blas_prec_indigenous:
+				  printf("indigenous ");
+				  break;
+				case blas_prec_extra:
+				  printf("extra ");
+				  break;
+				}
+				switch (norm) {
+				case -1:
+				  printf("near_underflow ");
+				  break;
+				case 0:
+				  printf("near_one ");
+				  break;
+				case 1:
+				  printf("near_overflow ");
+				  break;
+				}
+				switch (order_type) {
+				case blas_rowmajor:
+				  printf("row_major ");
+				  break;
+				case blas_colmajor:
+				  printf("col_major ");
+				  break;
+				}
+				switch (trans_type) {
+				case blas_no_trans:
+				  printf("no_trans ");
+				  break;
+				case blas_trans:
+				  printf("trans ");
+				  break;
+				case blas_conj_trans:
+				  printf("conj_trans ");
+				  break;
+				}
+
+				printf
+				  ("ku=%d, kl=%d, lda=%d, incx=%d, incy=%d:\n",
+				   ku, kl, lda, incx, incy);
+
+				ix = 0;
+				iy = 0;
+				if (incx < 0)
+				  ix = -(n_i - 1) * incx;
+				if (incy < 0)
+				  iy = -(m_i - 1) * incy;
+
+				printf("      A=");
+				for (j = 0; j < m_i; j++) {
+				  /* copy row j of A to temp */
+				  cgbmv_copy(order_type, trans_type, m, n, kl,
+					     ku, AB, lda, temp, j);
+
+				  if (j > 0)
+				    printf("        ");
+				  cprint_vector(temp, n_i, 1, NULL);
+				}
+
+				for (j = 0, k = 0; j < n_i || k < m_i;
+				     j++, k++) {
+				  if (j < n_i) {
+				    printf("      ");
+				    printf("%16.8e", x_head[ix]);
+				    printf("\n");
+				  }
+				  if (j < n_i) {
+				    printf("      ");
+				    printf("%16.8e", x_tail[ix]);
+				    printf("\n");
+				  }
+				  if (k < m_i) {
+				    printf("      ");
+				    printf("(%16.8e, %16.8e)",
+					   y_gen[k * incy_gen],
+					   y_gen[k * incy_gen + 1]);
+				    printf("\n");
+				    printf("      ");
+				    printf("y_final[%d] = ", iy);
+				    printf("(%16.8e, %16.8e)", y[iy],
+					   y[iy + 1]);
+				    printf("\n");
+				  }
+				  ix += incx;
+				  iy += incy;
+				}
+
+				printf("      ");
+				printf("alpha = ");
+				printf("(%16.8e, %16.8e)", alpha[0],
+				       alpha[1]);
+				printf("\n      ");
+				printf("beta = ");
+				printf("(%16.8e, %16.8e)", beta[0], beta[1]);
+				printf("\n");
+				for (j = 0; j < m_i; j++) {
+				  printf("      ");
+				  printf
+				    ("([%24.16e  %24.16e], [%24.16e %24.16e])",
+				     head_r_true[j * incy_gen],
+				     tail_r_true[j * incy_gen],
+				     head_r_true[j * incy_gen + 1],
+				     tail_r_true[j * incy_gen + 1]);
+				  printf(", ratio[%d]=%.4e\n", j, ratios[j]);
+				}
+
+				printf("      ratio=%.4e\n", ratio);
+				p_count++;
+			      }
+			      if (bad_ratios >= MAX_BAD_TESTS) {
+				printf("\ntoo many failures, exiting....");
+				printf("\nTesting and compilation");
+				printf(" are incomplete\n\n");
+				goto end;
+			      }
+			      if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
+				printf("\nFlagrant ratio error, exiting...");
+				printf("\nTesting and compilation");
+				printf(" are incomplete\n\n");
+				goto end;
+			      }
+			    }
+			    if (d_count == 0) {
+			      if (ratio > ratio_max)
+				ratio_max = ratio;
+
+			      if (ratio != 0.0 && ratio < ratio_min)
+				ratio_min = ratio;
+			      tot_tests++;
+			    }
+			  }	/* incy */
+			}	/* incx */
+		      }		/* lda */
+		    }		/* kl */
+		  }		/* ku */
+		}		/* trans */
+	      }			/* order */
+	    }			/* tests */
+	  }			/* norm */
+	}			/* prec */
+      }				/* beta */
+    }				/* alpha */
+  }				/* debug */
+
+  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
+    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n", fname,
+	   m, n, ntests, thresh);
+    printf
+      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
+       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
+       ratio_min, ratio_max);
+  }
+
+end:
+  blas_free(x_head);
+  blas_free(x_tail);
+  blas_free(y);
+  blas_free(x_head_gen);
+  blas_free(x_tail_gen);
+  blas_free(y_gen);
+  blas_free(temp);
+  blas_free(AB);
+  blas_free(head_r_true);
+  blas_free(tail_r_true);
+  blas_free(ratios);
+
+  FPU_FIX_STOP;
+
+  *min_ratio = ratio_min;
+  *num_bad_ratio = bad_ratios;
+  *num_tests = tot_tests;
+  return ratio_max;
+}
+double do_test_cgbmv2_s_c_x(int m, int n, int ntests, int *seed,
+			    double thresh, int debug, float test_prob,
+			    double *min_ratio, int *num_bad_ratio,
+			    int *num_tests)
+
+/*
+ * Purpose  
+ * =======
+ *
+ * Runs a series of tests on GBMV2.
+ *
+ * Arguments
+ * =========
+ *  
+ * m         (input) int
+ *           The number of rows
+ *
+ * n         (input) int
+ *           The number of columns
+ *
+ * ntests    (input) int
+ *           The number of tests to run for each set of attributes.
+ *
+ * seed      (input/output) int         
+ *           The seed for the random number generator used in testgen().
+ *
+ * thresh    (input) double
+ *           When the ratio returned from test() exceeds the specified
+ *           threshold, the current size, r_true, r_comp, and ratio will be
+ *           printed.  (Since ratio is supposed to be O(1), we can set thresh
+ *           to ~10.)
+ *
+ * debug     (input) int
+ *           If debug=3, print summary 
+ *           If debug=2, print summary only if the number of bad ratios > 0
+ *           If debug=1, print complete info if tests fail
+ *           If debug=0, return max ratio
+ *
+ * test_prob (input) float
+ *           The specified test will be performed only if the generated 
+ *           random exceeds this threshold.
+ *
+ * min_ratio (output) double
+ *           The minimum ratio
+ * 
+ * num_bad_ratio (output) int
+ *               The number of tests fail; they are above the threshold.
+ *
+ * num_tests (output) int
+ *           The number of tests is being performed.
+ *
+ * Return value
+ * ============
+ *
+ * The maximum ratio if run successfully, otherwise return -1 
+ *
+ * Code structure
+ * ==============
+ * 
+ *  debug loop  -- if debug is one, the first loop computes the max ratio
+ *              -- and the last(second) loop outputs debugging information,
+ *              -- if the test fail and its ratio > 0.5 * max ratio.
+ *              -- if debug is zero, the loop is executed once
+ *    alpha loop  -- varying alpha: 0, 1, or random
+ *      beta loop   -- varying beta: 0, 1, or random
+   *        prec loop   -- varying internal prec: single, double, or extra
+ *          norm loop   -- varying norm: near undeflow, near one, or 
+ *                        -- near overflow
+ *            numtest loop  -- how many times the test is perform with 
+ *                            -- above set of attributes
+ *              order loop   -- varying order type: rowmajor or colmajor
+ *                trans loop    -- varying trans type: no trans, trans, or conj trans
+ *                  ku loop       -- varying ku: 0 to n-1
+ *                    kl loop       -- varying kl: 0 to m-1
+ *                      lda loop      -- varying lda: ku+kl+1, ku+kl+2, 2*(ku+kl+1) 
+ *                        incx loop     -- varying incx: -2, -1, 1, 2
+ *                          incy loop     -- varying incy: -2, -1, 1, 2
+ */
+{
+  /* function name */
+  const char fname[] = "BLAS_cgbmv2_s_c_x";
+
+  /* max number of debug lines to print */
+  const int max_print = 8;
+
+  /* Variables in the "x_val" form are loop vars for corresponding
+     variables */
+  int i;			/* iterate through the repeating tests */
+  int j;
+  int k;			/* multipurpose counters or variables */
+  int ix, iy;			/* use to index x and y respectively */
+  int incx_val, incy_val,	/* for testing different inc values */
+    incx, incy;
+  int incx_gen, incy_gen;	/* for complex case inc=2, for real case inc=1 */
+  int d_count;			/* counter for debug */
+  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
+  int p_count;			/* counter for the number of debug lines printed */
+  int tot_tests;		/* total number of tests to be done */
+  int norm;			/* input values of near underflow/one/overflow */
+  double ratio_max;		/* the current maximum ratio */
+  double ratio_min;		/* the current minimum ratio */
+  double *ratios;		/* a temporary variable for calculating ratio */
+  double ratio;			/* the per-use test ratio from test() */
+  int bad_ratios = 0;		/* the number of ratios over the threshold */
+  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
+  double un_int;		/* the internal underflow threshold */
+  float alpha[2];
+  float beta[2];
+  float *AB;
+  float *x_head;
+  float *x_tail;
+  float *y;
+  float *temp;			/* use for calculating ratio */
+
+  /* x_gen and y_gen are used to store vectors generated by testgen.
+     they eventually are copied back to x and y */
+  float *x_head_gen;
+  float *x_tail_gen;
+  float *y_gen;
+
+  /* the true r calculated by testgen(), in double-double */
+  double *head_r_true, *tail_r_true;
+
+  int alpha_val;
+  int alpha_flag = 0;		/* input flag for BLAS_cgbmv2_s_c_testgen */
+  int beta_val;
+  int beta_flag = 0;		/* input flag for BLAS_cgbmv2_s_c_testgen */
+  int order_val;
+  enum blas_order_type order_type = 0;
+  int prec_val;
+  enum blas_prec_type prec = 0;
+  int trans_val;
+  enum blas_trans_type trans_type = 0;
+  int m_i = 0;
+  int n_i = 0;
+  int max_mn;			/* the max of m and n */
+  int ku;
+  int kl;
+  int lda_val;
+  int lda = 0;
+  int saved_seed;		/* for saving the original seed */
+
+  /* use for counting the number of testgen calls * 2 */
+  int count, old_count = -1;
+
+  FPU_FIX_DECL;
+
+  /* test for bad arguments */
+  if (n < 0 || m < 0 || ntests < 0)
+    BLAS_error(fname, 0, 0, NULL);
+
+  /* initialization */
+  *min_ratio = 0.0;
+  *num_bad_ratio = 0;
+  *num_tests = 0;
+
+  saved_seed = *seed;
+  ratio_min = 1e308;
+  ratio_max = 0.0;
+  ratio = 0.0;
+  tot_tests = 0;
+  p_count = 0;
+  count = 0;
+  find_max_ratio = 0;
+  if (debug == 3)
+    find_max_ratio = 1;
+  max_mn = MAX(m, n);
+
+  if (m == 0 || n == 0) {
+    return 0.0;
+  }
+
+  FPU_FIX_START;
+
+  incx_gen = incy_gen = 1;
+  incx_gen *= 2;
+  incy_gen *= 2;
+
+  /* get space for calculation */
+  x_head = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
+  if (max_mn * 2 > 0 && x_head == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
+  if (max_mn * 2 > 0 && x_tail == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
+  if (max_mn * 2 > 0 && y == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_head_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
+  if (max_mn > 0 && x_head_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
+  if (max_mn > 0 && x_tail_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
+  if (max_mn > 0 && y_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  temp = (float *) blas_malloc(max_mn * sizeof(float));
+  if (max_mn > 0 && temp == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  ratios = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && ratios == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  AB =
+    (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float));
+  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && AB == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  /* The debug iteration:
+     If debug=1, then will execute the iteration twice. First, compute the
+     max ratio. Second, print info if ratio > (50% * ratio_max). */
+  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
+    bad_ratios = 0;		/* set to zero */
+
+    if ((debug == 3) && (d_count == find_max_ratio))
+      *seed = saved_seed;	/* restore the original seed */
+
+    /* varying alpha */
+    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
+      alpha_flag = 0;
+      switch (alpha_val) {
+      case 0:
+	alpha[0] = alpha[1] = 0.0;
+	alpha_flag = 1;
+	break;
+      case 1:
+	alpha[0] = 1.0;
+	alpha[1] = 0.0;
+	alpha_flag = 1;
+	break;
+      }
+
+      /* varying beta */
+      for (beta_val = 0; beta_val < 3; beta_val++) {
+	beta_flag = 0;
+	switch (beta_val) {
+	case 0:
+	  beta[0] = beta[1] = 0.0;
+	  beta_flag = 1;
+	  break;
+	case 1:
+	  beta[0] = 1.0;
+	  beta[1] = 0.0;
+	  beta_flag = 1;
+	  break;
+	}
+
+
+	/* varying extra precs */
+	for (prec_val = 0; prec_val <= 2; prec_val++) {
+	  switch (prec_val) {
+	  case 0:
+	    eps_int = power(2, -BITS_S);
+	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
+			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
+	    prec = blas_prec_single;
+	    break;
+	  case 1:
+	    eps_int = power(2, -BITS_D);
+	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
+			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
+	    prec = blas_prec_double;
+	    break;
+	  case 2:
+	  default:
+	    eps_int = power(2, -BITS_E);
+	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
+			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
+	    prec = blas_prec_extra;
+	    break;
+	  }
+
+	  /* values near underflow, 1, or overflow */
+	  for (norm = -1; norm <= 1; norm++) {
+
+	    /* number of tests */
+	    for (i = 0; i < ntests; i++) {
+
+	      /* row or col major */
+	      for (order_val = 0; order_val < 2; order_val++) {
+		switch (order_val) {
+		case 0:
+		  order_type = blas_rowmajor;
+		  break;
+		case 1:
+		  order_type = blas_colmajor;
+		  break;
+		}
+
+		/* no_trans, trans, or conj_trans */
+		for (trans_val = 0; trans_val < 3; trans_val++) {
+		  switch (trans_val) {
+		  case 0:
+		    trans_type = blas_no_trans;
+		    m_i = m;
+		    n_i = n;
+		    break;
+		  case 1:
+		    trans_type = blas_trans;
+		    m_i = n;
+		    n_i = m;
+		    break;
+		  case 2:
+		    trans_type = blas_conj_trans;
+		    m_i = n;
+		    n_i = m;
+		    break;
+		  }
+
+		  /* ku from 0 to n-1 */
+		  for (ku = 0; ku < n; ku++) {
+		    if (ku == n && ku != 0)
+		      continue;	/* the purpose of doing this is 
+				   to test for ku=0 */
+
+		    /* kl from 0 to m-1 */
+		    for (kl = 0; kl < m; kl++) {
+		      if (kl == n && kl != 0)
+			continue;	/* the purpose of doing this is 
+					   to test for kl=0 */
+
+		      /* lda=ku+kl+1, ku+kl+2, 2*(ku+kl+1) */
+		      for (lda_val = 0; lda_val < 3; lda_val++) {
+			switch (lda_val) {
+			case 0:
+			  lda = ku + kl + 1;
+			  break;
+			case 1:
+			  lda = ku + kl + 2;
+			  break;
+			case 2:
+			  lda = 2 * (ku + kl + 1);
+			  break;
+			}
+
+			if ((order_type == blas_rowmajor && lda < n) ||
+			    (order_type == blas_colmajor && lda < m))
+			  continue;
+
+			/* For the sake of speed, we throw out this case at random */
+			if (xrand(seed) >= test_prob)
+			  continue;
+
+			/* in the trivial cases, no need to run testgen */
+			if (m > 0 && n > 0)
+			  BLAS_cgbmv2_s_c_testgen(norm, order_type,
+						  trans_type, m, n, kl, ku,
+						  &alpha, alpha_flag, AB, lda,
+						  x_head_gen, x_tail_gen,
+						  &beta, beta_flag, y_gen,
+						  seed, head_r_true,
+						  tail_r_true);
+			count++;
+
+			/* varying incx */
+			for (incx_val = -2; incx_val <= 2; incx_val++) {
+			  if (incx_val == 0)
+			    continue;
+
+			  /* setting incx */
+			  incx = incx_val;
+			  incx *= 2;
+
+			  ccopy_vector(x_head_gen, n_i, 1, x_head, incx_val);
+			  ccopy_vector(x_tail_gen, n_i, 1, x_tail, incx_val);
+
+			  /* varying incy */
+			  for (incy_val = -2; incy_val <= 2; incy_val++) {
+			    if (incy_val == 0)
+			      continue;
+
+			    /* setting incy */
+			    incy = incy_val;
+			    incy *= 2;
+
+			    ccopy_vector(y_gen, m_i, 1, y, incy_val);
+
+			    /* call BLAS_cgbmv2_s_c_x */
+			    FPU_FIX_STOP;
+			    BLAS_cgbmv2_s_c_x(order_type, trans_type, m, n,
+					      kl, ku, alpha, AB, lda, x_head,
+					      x_tail, incx_val, beta, y,
+					      incy_val, prec);
+			    FPU_FIX_START;
+
+			    /* set y starting index */
+			    iy = 0;
+			    if (incy < 0)
+			      iy = -(m_i - 1) * incy;
+
+			    /* computing the ratio */
+			    for (j = 0; j < m_i; j++) {
+			      /* copy row j of AB to temp */
+			      sgbmv_copy(order_type, trans_type, m, n, kl, ku,
+					 AB, lda, temp, j);
+
+			      test_BLAS_cdot2_s_c(n_i, blas_no_conj, alpha,
+						  beta, &y_gen[j * incy_gen],
+						  &y[iy],
+						  &head_r_true[j * incy_gen],
+						  &tail_r_true[j * incy_gen],
+						  temp, 1, x_head, x_tail,
+						  incx_val, eps_int, un_int,
+						  &ratios[j]);
+
+			      /* take the max ratio */
+			      if (j == 0) {
+				ratio = ratios[0];
+				/* The !<= below causes NaN error to be detected.
+				   Note that (NaN > thresh) is always false. */
+			      } else if (!(ratios[j] <= ratio)) {
+				ratio = ratios[j];
+			      }
+
+			      iy += incy;
+			    }
+
+			    /* Increase the number of bad ratio, if the ratio
+			       is bigger than the threshold.
+			       The !<= below causes NaN error to be detected.
+			       Note that (NaN > thresh) is always false. */
+			    if (!(ratio <= thresh)) {
+			      bad_ratios++;
+
+			      if ((debug == 3) &&	/* print only when debug is on */
+				  (count != old_count) &&	/* print if old vector is different 
+								   from the current one */
+				  (d_count == find_max_ratio) &&
+				  (p_count <= max_print) &&
+				  (ratio > 0.5 * ratio_max)) {
+				old_count = count;
+				printf
+				  ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
+				   fname, m, n, ntests, thresh);
+
+				/* Print test info */
+				switch (prec) {
+				case blas_prec_single:
+				  printf("single ");
+				  break;
+				case blas_prec_double:
+				  printf("double ");
+				  break;
+				case blas_prec_indigenous:
+				  printf("indigenous ");
+				  break;
+				case blas_prec_extra:
+				  printf("extra ");
+				  break;
+				}
+				switch (norm) {
+				case -1:
+				  printf("near_underflow ");
+				  break;
+				case 0:
+				  printf("near_one ");
+				  break;
+				case 1:
+				  printf("near_overflow ");
+				  break;
+				}
+				switch (order_type) {
+				case blas_rowmajor:
+				  printf("row_major ");
+				  break;
+				case blas_colmajor:
+				  printf("col_major ");
+				  break;
+				}
+				switch (trans_type) {
+				case blas_no_trans:
+				  printf("no_trans ");
+				  break;
+				case blas_trans:
+				  printf("trans ");
+				  break;
+				case blas_conj_trans:
+				  printf("conj_trans ");
+				  break;
+				}
+
+				printf
+				  ("ku=%d, kl=%d, lda=%d, incx=%d, incy=%d:\n",
+				   ku, kl, lda, incx, incy);
+
+				ix = 0;
+				iy = 0;
+				if (incx < 0)
+				  ix = -(n_i - 1) * incx;
+				if (incy < 0)
+				  iy = -(m_i - 1) * incy;
+
+				printf("      A=");
+				for (j = 0; j < m_i; j++) {
+				  /* copy row j of A to temp */
+				  sgbmv_copy(order_type, trans_type, m, n, kl,
+					     ku, AB, lda, temp, j);
+
+				  if (j > 0)
+				    printf("        ");
+				  sprint_vector(temp, n_i, 1, NULL);
+				}
+
+				for (j = 0, k = 0; j < n_i || k < m_i;
+				     j++, k++) {
+				  if (j < n_i) {
+				    printf("      ");
+				    printf("(%16.8e, %16.8e)", x_head[ix],
+					   x_head[ix + 1]);
+				    printf("\n");
+				  }
+				  if (j < n_i) {
+				    printf("      ");
+				    printf("(%16.8e, %16.8e)", x_tail[ix],
+					   x_tail[ix + 1]);
+				    printf("\n");
+				  }
+				  if (k < m_i) {
+				    printf("      ");
+				    printf("(%16.8e, %16.8e)",
+					   y_gen[k * incy_gen],
+					   y_gen[k * incy_gen + 1]);
+				    printf("\n");
+				    printf("      ");
+				    printf("y_final[%d] = ", iy);
+				    printf("(%16.8e, %16.8e)", y[iy],
+					   y[iy + 1]);
+				    printf("\n");
+				  }
+				  ix += incx;
+				  iy += incy;
+				}
+
+				printf("      ");
+				printf("alpha = ");
+				printf("(%16.8e, %16.8e)", alpha[0],
+				       alpha[1]);
+				printf("\n      ");
+				printf("beta = ");
+				printf("(%16.8e, %16.8e)", beta[0], beta[1]);
+				printf("\n");
+				for (j = 0; j < m_i; j++) {
+				  printf("      ");
+				  printf
+				    ("([%24.16e  %24.16e], [%24.16e %24.16e])",
+				     head_r_true[j * incy_gen],
+				     tail_r_true[j * incy_gen],
+				     head_r_true[j * incy_gen + 1],
+				     tail_r_true[j * incy_gen + 1]);
+				  printf(", ratio[%d]=%.4e\n", j, ratios[j]);
+				}
+
+				printf("      ratio=%.4e\n", ratio);
+				p_count++;
+			      }
+			      if (bad_ratios >= MAX_BAD_TESTS) {
+				printf("\ntoo many failures, exiting....");
+				printf("\nTesting and compilation");
+				printf(" are incomplete\n\n");
+				goto end;
+			      }
+			      if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
+				printf("\nFlagrant ratio error, exiting...");
+				printf("\nTesting and compilation");
+				printf(" are incomplete\n\n");
+				goto end;
+			      }
+			    }
+			    if (d_count == 0) {
+			      if (ratio > ratio_max)
+				ratio_max = ratio;
+
+			      if (ratio != 0.0 && ratio < ratio_min)
+				ratio_min = ratio;
+			      tot_tests++;
+			    }
+			  }	/* incy */
+			}	/* incx */
+		      }		/* lda */
+		    }		/* kl */
+		  }		/* ku */
+		}		/* trans */
+	      }			/* order */
+	    }			/* tests */
+	  }			/* norm */
+	}			/* prec */
+      }				/* beta */
+    }				/* alpha */
+  }				/* debug */
+
+  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
+    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n", fname,
+	   m, n, ntests, thresh);
+    printf
+      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
+       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
+       ratio_min, ratio_max);
+  }
+
+end:
+  blas_free(x_head);
+  blas_free(x_tail);
+  blas_free(y);
+  blas_free(x_head_gen);
+  blas_free(x_tail_gen);
+  blas_free(y_gen);
+  blas_free(temp);
+  blas_free(AB);
+  blas_free(head_r_true);
+  blas_free(tail_r_true);
+  blas_free(ratios);
+
+  FPU_FIX_STOP;
+
+  *min_ratio = ratio_min;
+  *num_bad_ratio = bad_ratios;
+  *num_tests = tot_tests;
+  return ratio_max;
+}
+double do_test_cgbmv2_s_s_x(int m, int n, int ntests, int *seed,
+			    double thresh, int debug, float test_prob,
+			    double *min_ratio, int *num_bad_ratio,
+			    int *num_tests)
+
+/*
+ * Purpose  
+ * =======
+ *
+ * Runs a series of tests on GBMV2.
+ *
+ * Arguments
+ * =========
+ *  
+ * m         (input) int
+ *           The number of rows
+ *
+ * n         (input) int
+ *           The number of columns
+ *
+ * ntests    (input) int
+ *           The number of tests to run for each set of attributes.
+ *
+ * seed      (input/output) int         
+ *           The seed for the random number generator used in testgen().
+ *
+ * thresh    (input) double
+ *           When the ratio returned from test() exceeds the specified
+ *           threshold, the current size, r_true, r_comp, and ratio will be
+ *           printed.  (Since ratio is supposed to be O(1), we can set thresh
+ *           to ~10.)
+ *
+ * debug     (input) int
+ *           If debug=3, print summary 
+ *           If debug=2, print summary only if the number of bad ratios > 0
+ *           If debug=1, print complete info if tests fail
+ *           If debug=0, return max ratio
+ *
+ * test_prob (input) float
+ *           The specified test will be performed only if the generated 
+ *           random exceeds this threshold.
+ *
+ * min_ratio (output) double
+ *           The minimum ratio
+ * 
+ * num_bad_ratio (output) int
+ *               The number of tests fail; they are above the threshold.
+ *
+ * num_tests (output) int
+ *           The number of tests is being performed.
+ *
+ * Return value
+ * ============
+ *
+ * The maximum ratio if run successfully, otherwise return -1 
+ *
+ * Code structure
+ * ==============
+ * 
+ *  debug loop  -- if debug is one, the first loop computes the max ratio
+ *              -- and the last(second) loop outputs debugging information,
+ *              -- if the test fail and its ratio > 0.5 * max ratio.
+ *              -- if debug is zero, the loop is executed once
+ *    alpha loop  -- varying alpha: 0, 1, or random
+ *      beta loop   -- varying beta: 0, 1, or random
+   *        prec loop   -- varying internal prec: single, double, or extra
+ *          norm loop   -- varying norm: near undeflow, near one, or 
+ *                        -- near overflow
+ *            numtest loop  -- how many times the test is perform with 
+ *                            -- above set of attributes
+ *              order loop   -- varying order type: rowmajor or colmajor
+ *                trans loop    -- varying trans type: no trans, trans, or conj trans
+ *                  ku loop       -- varying ku: 0 to n-1
+ *                    kl loop       -- varying kl: 0 to m-1
+ *                      lda loop      -- varying lda: ku+kl+1, ku+kl+2, 2*(ku+kl+1) 
+ *                        incx loop     -- varying incx: -2, -1, 1, 2
+ *                          incy loop     -- varying incy: -2, -1, 1, 2
+ */
+{
+  /* function name */
+  const char fname[] = "BLAS_cgbmv2_s_s_x";
+
+  /* max number of debug lines to print */
+  const int max_print = 8;
+
+  /* Variables in the "x_val" form are loop vars for corresponding
+     variables */
+  int i;			/* iterate through the repeating tests */
+  int j;
+  int k;			/* multipurpose counters or variables */
+  int ix, iy;			/* use to index x and y respectively */
+  int incx_val, incy_val,	/* for testing different inc values */
+    incx, incy;
+  int incx_gen, incy_gen;	/* for complex case inc=2, for real case inc=1 */
+  int d_count;			/* counter for debug */
+  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
+  int p_count;			/* counter for the number of debug lines printed */
+  int tot_tests;		/* total number of tests to be done */
+  int norm;			/* input values of near underflow/one/overflow */
+  double ratio_max;		/* the current maximum ratio */
+  double ratio_min;		/* the current minimum ratio */
+  double *ratios;		/* a temporary variable for calculating ratio */
+  double ratio;			/* the per-use test ratio from test() */
+  int bad_ratios = 0;		/* the number of ratios over the threshold */
+  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
+  double un_int;		/* the internal underflow threshold */
+  float alpha[2];
+  float beta[2];
+  float *AB;
+  float *x_head;
+  float *x_tail;
+  float *y;
+  float *temp;			/* use for calculating ratio */
+
+  /* x_gen and y_gen are used to store vectors generated by testgen.
+     they eventually are copied back to x and y */
+  float *x_head_gen;
+  float *x_tail_gen;
+  float *y_gen;
+
+  /* the true r calculated by testgen(), in double-double */
+  double *head_r_true, *tail_r_true;
+
+  int alpha_val;
+  int alpha_flag = 0;		/* input flag for BLAS_cgbmv2_s_s_testgen */
+  int beta_val;
+  int beta_flag = 0;		/* input flag for BLAS_cgbmv2_s_s_testgen */
+  int order_val;
+  enum blas_order_type order_type = 0;
+  int prec_val;
+  enum blas_prec_type prec = 0;
+  int trans_val;
+  enum blas_trans_type trans_type = 0;
+  int m_i = 0;
+  int n_i = 0;
+  int max_mn;			/* the max of m and n */
+  int ku;
+  int kl;
+  int lda_val;
+  int lda = 0;
+  int saved_seed;		/* for saving the original seed */
+
+  /* use for counting the number of testgen calls * 2 */
+  int count, old_count = -1;
+
+  FPU_FIX_DECL;
+
+  /* test for bad arguments */
+  if (n < 0 || m < 0 || ntests < 0)
+    BLAS_error(fname, 0, 0, NULL);
+
+  /* initialization */
+  *min_ratio = 0.0;
+  *num_bad_ratio = 0;
+  *num_tests = 0;
+
+  saved_seed = *seed;
+  ratio_min = 1e308;
+  ratio_max = 0.0;
+  ratio = 0.0;
+  tot_tests = 0;
+  p_count = 0;
+  count = 0;
+  find_max_ratio = 0;
+  if (debug == 3)
+    find_max_ratio = 1;
+  max_mn = MAX(m, n);
+
+  if (m == 0 || n == 0) {
+    return 0.0;
+  }
+
+  FPU_FIX_START;
+
+  incx_gen = incy_gen = 1;
+
+  incy_gen *= 2;
+
+  /* get space for calculation */
+  x_head = (float *) blas_malloc(max_mn * 2 * sizeof(float));
+  if (max_mn * 2 > 0 && x_head == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail = (float *) blas_malloc(max_mn * 2 * sizeof(float));
+  if (max_mn * 2 > 0 && x_tail == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
+  if (max_mn * 2 > 0 && y == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_head_gen = (float *) blas_malloc(max_mn * sizeof(float));
+  if (max_mn > 0 && x_head_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail_gen = (float *) blas_malloc(max_mn * sizeof(float));
+  if (max_mn > 0 && x_tail_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
+  if (max_mn > 0 && y_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  temp = (float *) blas_malloc(max_mn * sizeof(float));
+  if (max_mn > 0 && temp == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  ratios = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && ratios == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  AB =
+    (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float));
+  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && AB == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  /* The debug iteration:
+     If debug=1, then will execute the iteration twice. First, compute the
+     max ratio. Second, print info if ratio > (50% * ratio_max). */
+  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
+    bad_ratios = 0;		/* set to zero */
+
+    if ((debug == 3) && (d_count == find_max_ratio))
+      *seed = saved_seed;	/* restore the original seed */
+
+    /* varying alpha */
+    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
+      alpha_flag = 0;
+      switch (alpha_val) {
+      case 0:
+	alpha[0] = alpha[1] = 0.0;
+	alpha_flag = 1;
+	break;
+      case 1:
+	alpha[0] = 1.0;
+	alpha[1] = 0.0;
+	alpha_flag = 1;
+	break;
+      }
+
+      /* varying beta */
+      for (beta_val = 0; beta_val < 3; beta_val++) {
+	beta_flag = 0;
+	switch (beta_val) {
+	case 0:
+	  beta[0] = beta[1] = 0.0;
+	  beta_flag = 1;
+	  break;
+	case 1:
+	  beta[0] = 1.0;
+	  beta[1] = 0.0;
+	  beta_flag = 1;
+	  break;
+	}
+
+
+	/* varying extra precs */
+	for (prec_val = 0; prec_val <= 2; prec_val++) {
+	  switch (prec_val) {
+	  case 0:
+	    eps_int = power(2, -BITS_S);
+	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
+			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
+	    prec = blas_prec_single;
+	    break;
+	  case 1:
+	    eps_int = power(2, -BITS_D);
+	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
+			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
+	    prec = blas_prec_double;
+	    break;
+	  case 2:
+	  default:
+	    eps_int = power(2, -BITS_E);
+	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
+			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
+	    prec = blas_prec_extra;
+	    break;
+	  }
+
+	  /* values near underflow, 1, or overflow */
+	  for (norm = -1; norm <= 1; norm++) {
+
+	    /* number of tests */
+	    for (i = 0; i < ntests; i++) {
+
+	      /* row or col major */
+	      for (order_val = 0; order_val < 2; order_val++) {
+		switch (order_val) {
+		case 0:
+		  order_type = blas_rowmajor;
+		  break;
+		case 1:
+		  order_type = blas_colmajor;
+		  break;
+		}
+
+		/* no_trans, trans, or conj_trans */
+		for (trans_val = 0; trans_val < 3; trans_val++) {
+		  switch (trans_val) {
+		  case 0:
+		    trans_type = blas_no_trans;
+		    m_i = m;
+		    n_i = n;
+		    break;
+		  case 1:
+		    trans_type = blas_trans;
+		    m_i = n;
+		    n_i = m;
+		    break;
+		  case 2:
+		    trans_type = blas_conj_trans;
+		    m_i = n;
+		    n_i = m;
+		    break;
+		  }
+
+		  /* ku from 0 to n-1 */
+		  for (ku = 0; ku < n; ku++) {
+		    if (ku == n && ku != 0)
+		      continue;	/* the purpose of doing this is 
+				   to test for ku=0 */
+
+		    /* kl from 0 to m-1 */
+		    for (kl = 0; kl < m; kl++) {
+		      if (kl == n && kl != 0)
+			continue;	/* the purpose of doing this is 
+					   to test for kl=0 */
+
+		      /* lda=ku+kl+1, ku+kl+2, 2*(ku+kl+1) */
+		      for (lda_val = 0; lda_val < 3; lda_val++) {
+			switch (lda_val) {
+			case 0:
+			  lda = ku + kl + 1;
+			  break;
+			case 1:
+			  lda = ku + kl + 2;
+			  break;
+			case 2:
+			  lda = 2 * (ku + kl + 1);
+			  break;
+			}
+
+			if ((order_type == blas_rowmajor && lda < n) ||
+			    (order_type == blas_colmajor && lda < m))
+			  continue;
+
+			/* For the sake of speed, we throw out this case at random */
+			if (xrand(seed) >= test_prob)
+			  continue;
+
+			/* in the trivial cases, no need to run testgen */
+			if (m > 0 && n > 0)
+			  BLAS_cgbmv2_s_s_testgen(norm, order_type,
+						  trans_type, m, n, kl, ku,
+						  &alpha, alpha_flag, AB, lda,
+						  x_head_gen, x_tail_gen,
+						  &beta, beta_flag, y_gen,
+						  seed, head_r_true,
+						  tail_r_true);
+			count++;
+
+			/* varying incx */
+			for (incx_val = -2; incx_val <= 2; incx_val++) {
+			  if (incx_val == 0)
+			    continue;
+
+			  /* setting incx */
+			  incx = incx_val;
+
+
+			  scopy_vector(x_head_gen, n_i, 1, x_head, incx_val);
+			  scopy_vector(x_tail_gen, n_i, 1, x_tail, incx_val);
+
+			  /* varying incy */
+			  for (incy_val = -2; incy_val <= 2; incy_val++) {
+			    if (incy_val == 0)
+			      continue;
+
+			    /* setting incy */
+			    incy = incy_val;
+			    incy *= 2;
+
+			    ccopy_vector(y_gen, m_i, 1, y, incy_val);
+
+			    /* call BLAS_cgbmv2_s_s_x */
+			    FPU_FIX_STOP;
+			    BLAS_cgbmv2_s_s_x(order_type, trans_type, m, n,
+					      kl, ku, alpha, AB, lda, x_head,
+					      x_tail, incx_val, beta, y,
+					      incy_val, prec);
+			    FPU_FIX_START;
+
+			    /* set y starting index */
+			    iy = 0;
+			    if (incy < 0)
+			      iy = -(m_i - 1) * incy;
+
+			    /* computing the ratio */
+			    for (j = 0; j < m_i; j++) {
+			      /* copy row j of AB to temp */
+			      sgbmv_copy(order_type, trans_type, m, n, kl, ku,
+					 AB, lda, temp, j);
+
+			      test_BLAS_cdot2_s_s(n_i, blas_no_conj, alpha,
+						  beta, &y_gen[j * incy_gen],
+						  &y[iy],
+						  &head_r_true[j * incy_gen],
+						  &tail_r_true[j * incy_gen],
+						  temp, 1, x_head, x_tail,
+						  incx_val, eps_int, un_int,
+						  &ratios[j]);
+
+			      /* take the max ratio */
+			      if (j == 0) {
+				ratio = ratios[0];
+				/* The !<= below causes NaN error to be detected.
+				   Note that (NaN > thresh) is always false. */
+			      } else if (!(ratios[j] <= ratio)) {
+				ratio = ratios[j];
+			      }
+
+			      iy += incy;
+			    }
+
+			    /* Increase the number of bad ratio, if the ratio
+			       is bigger than the threshold.
+			       The !<= below causes NaN error to be detected.
+			       Note that (NaN > thresh) is always false. */
+			    if (!(ratio <= thresh)) {
+			      bad_ratios++;
+
+			      if ((debug == 3) &&	/* print only when debug is on */
+				  (count != old_count) &&	/* print if old vector is different 
+								   from the current one */
+				  (d_count == find_max_ratio) &&
+				  (p_count <= max_print) &&
+				  (ratio > 0.5 * ratio_max)) {
+				old_count = count;
+				printf
+				  ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
+				   fname, m, n, ntests, thresh);
+
+				/* Print test info */
+				switch (prec) {
+				case blas_prec_single:
+				  printf("single ");
+				  break;
+				case blas_prec_double:
+				  printf("double ");
+				  break;
+				case blas_prec_indigenous:
+				  printf("indigenous ");
+				  break;
+				case blas_prec_extra:
+				  printf("extra ");
+				  break;
+				}
+				switch (norm) {
+				case -1:
+				  printf("near_underflow ");
+				  break;
+				case 0:
+				  printf("near_one ");
+				  break;
+				case 1:
+				  printf("near_overflow ");
+				  break;
+				}
+				switch (order_type) {
+				case blas_rowmajor:
+				  printf("row_major ");
+				  break;
+				case blas_colmajor:
+				  printf("col_major ");
+				  break;
+				}
+				switch (trans_type) {
+				case blas_no_trans:
+				  printf("no_trans ");
+				  break;
+				case blas_trans:
+				  printf("trans ");
+				  break;
+				case blas_conj_trans:
+				  printf("conj_trans ");
+				  break;
+				}
+
+				printf
+				  ("ku=%d, kl=%d, lda=%d, incx=%d, incy=%d:\n",
+				   ku, kl, lda, incx, incy);
+
+				ix = 0;
+				iy = 0;
+				if (incx < 0)
+				  ix = -(n_i - 1) * incx;
+				if (incy < 0)
+				  iy = -(m_i - 1) * incy;
+
+				printf("      A=");
+				for (j = 0; j < m_i; j++) {
+				  /* copy row j of A to temp */
+				  sgbmv_copy(order_type, trans_type, m, n, kl,
+					     ku, AB, lda, temp, j);
+
+				  if (j > 0)
+				    printf("        ");
+				  sprint_vector(temp, n_i, 1, NULL);
+				}
+
+				for (j = 0, k = 0; j < n_i || k < m_i;
+				     j++, k++) {
+				  if (j < n_i) {
+				    printf("      ");
+				    printf("%16.8e", x_head[ix]);
+				    printf("\n");
+				  }
+				  if (j < n_i) {
+				    printf("      ");
+				    printf("%16.8e", x_tail[ix]);
+				    printf("\n");
+				  }
+				  if (k < m_i) {
+				    printf("      ");
+				    printf("(%16.8e, %16.8e)",
+					   y_gen[k * incy_gen],
+					   y_gen[k * incy_gen + 1]);
+				    printf("\n");
+				    printf("      ");
+				    printf("y_final[%d] = ", iy);
+				    printf("(%16.8e, %16.8e)", y[iy],
+					   y[iy + 1]);
+				    printf("\n");
+				  }
+				  ix += incx;
+				  iy += incy;
+				}
+
+				printf("      ");
+				printf("alpha = ");
+				printf("(%16.8e, %16.8e)", alpha[0],
+				       alpha[1]);
+				printf("\n      ");
+				printf("beta = ");
+				printf("(%16.8e, %16.8e)", beta[0], beta[1]);
+				printf("\n");
+				for (j = 0; j < m_i; j++) {
+				  printf("      ");
+				  printf
+				    ("([%24.16e  %24.16e], [%24.16e %24.16e])",
+				     head_r_true[j * incy_gen],
+				     tail_r_true[j * incy_gen],
+				     head_r_true[j * incy_gen + 1],
+				     tail_r_true[j * incy_gen + 1]);
+				  printf(", ratio[%d]=%.4e\n", j, ratios[j]);
+				}
+
+				printf("      ratio=%.4e\n", ratio);
+				p_count++;
+			      }
+			      if (bad_ratios >= MAX_BAD_TESTS) {
+				printf("\ntoo many failures, exiting....");
+				printf("\nTesting and compilation");
+				printf(" are incomplete\n\n");
+				goto end;
+			      }
+			      if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
+				printf("\nFlagrant ratio error, exiting...");
+				printf("\nTesting and compilation");
+				printf(" are incomplete\n\n");
+				goto end;
+			      }
+			    }
+			    if (d_count == 0) {
+			      if (ratio > ratio_max)
+				ratio_max = ratio;
+
+			      if (ratio != 0.0 && ratio < ratio_min)
+				ratio_min = ratio;
+			      tot_tests++;
+			    }
+			  }	/* incy */
+			}	/* incx */
+		      }		/* lda */
+		    }		/* kl */
+		  }		/* ku */
+		}		/* trans */
+	      }			/* order */
+	    }			/* tests */
+	  }			/* norm */
+	}			/* prec */
+      }				/* beta */
+    }				/* alpha */
+  }				/* debug */
+
+  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
+    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n", fname,
+	   m, n, ntests, thresh);
+    printf
+      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
+       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
+       ratio_min, ratio_max);
+  }
+
+end:
+  blas_free(x_head);
+  blas_free(x_tail);
+  blas_free(y);
+  blas_free(x_head_gen);
+  blas_free(x_tail_gen);
+  blas_free(y_gen);
+  blas_free(temp);
+  blas_free(AB);
+  blas_free(head_r_true);
+  blas_free(tail_r_true);
+  blas_free(ratios);
+
+  FPU_FIX_STOP;
+
+  *min_ratio = ratio_min;
+  *num_bad_ratio = bad_ratios;
+  *num_tests = tot_tests;
+  return ratio_max;
+}
+double do_test_zgbmv2_z_d_x(int m, int n, int ntests, int *seed,
+			    double thresh, int debug, float test_prob,
+			    double *min_ratio, int *num_bad_ratio,
+			    int *num_tests)
+
+/*
+ * Purpose  
+ * =======
+ *
+ * Runs a series of tests on GBMV2.
+ *
+ * Arguments
+ * =========
+ *  
+ * m         (input) int
+ *           The number of rows
+ *
+ * n         (input) int
+ *           The number of columns
+ *
+ * ntests    (input) int
+ *           The number of tests to run for each set of attributes.
+ *
+ * seed      (input/output) int         
+ *           The seed for the random number generator used in testgen().
+ *
+ * thresh    (input) double
+ *           When the ratio returned from test() exceeds the specified
+ *           threshold, the current size, r_true, r_comp, and ratio will be
+ *           printed.  (Since ratio is supposed to be O(1), we can set thresh
+ *           to ~10.)
+ *
+ * debug     (input) int
+ *           If debug=3, print summary 
+ *           If debug=2, print summary only if the number of bad ratios > 0
+ *           If debug=1, print complete info if tests fail
+ *           If debug=0, return max ratio
+ *
+ * test_prob (input) float
+ *           The specified test will be performed only if the generated 
+ *           random exceeds this threshold.
+ *
+ * min_ratio (output) double
+ *           The minimum ratio
+ * 
+ * num_bad_ratio (output) int
+ *               The number of tests fail; they are above the threshold.
+ *
+ * num_tests (output) int
+ *           The number of tests is being performed.
+ *
+ * Return value
+ * ============
+ *
+ * The maximum ratio if run successfully, otherwise return -1 
+ *
+ * Code structure
+ * ==============
+ * 
+ *  debug loop  -- if debug is one, the first loop computes the max ratio
+ *              -- and the last(second) loop outputs debugging information,
+ *              -- if the test fail and its ratio > 0.5 * max ratio.
+ *              -- if debug is zero, the loop is executed once
+ *    alpha loop  -- varying alpha: 0, 1, or random
+ *      beta loop   -- varying beta: 0, 1, or random
+   *        prec loop   -- varying internal prec: single, double, or extra
+ *          norm loop   -- varying norm: near undeflow, near one, or 
+ *                        -- near overflow
+ *            numtest loop  -- how many times the test is perform with 
+ *                            -- above set of attributes
+ *              order loop   -- varying order type: rowmajor or colmajor
+ *                trans loop    -- varying trans type: no trans, trans, or conj trans
+ *                  ku loop       -- varying ku: 0 to n-1
+ *                    kl loop       -- varying kl: 0 to m-1
+ *                      lda loop      -- varying lda: ku+kl+1, ku+kl+2, 2*(ku+kl+1) 
+ *                        incx loop     -- varying incx: -2, -1, 1, 2
+ *                          incy loop     -- varying incy: -2, -1, 1, 2
+ */
+{
+  /* function name */
+  const char fname[] = "BLAS_zgbmv2_z_d_x";
+
+  /* max number of debug lines to print */
+  const int max_print = 8;
+
+  /* Variables in the "x_val" form are loop vars for corresponding
+     variables */
+  int i;			/* iterate through the repeating tests */
+  int j;
+  int k;			/* multipurpose counters or variables */
+  int ix, iy;			/* use to index x and y respectively */
+  int incx_val, incy_val,	/* for testing different inc values */
+    incx, incy;
+  int incx_gen, incy_gen;	/* for complex case inc=2, for real case inc=1 */
+  int d_count;			/* counter for debug */
+  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
+  int p_count;			/* counter for the number of debug lines printed */
+  int tot_tests;		/* total number of tests to be done */
+  int norm;			/* input values of near underflow/one/overflow */
+  double ratio_max;		/* the current maximum ratio */
+  double ratio_min;		/* the current minimum ratio */
+  double *ratios;		/* a temporary variable for calculating ratio */
+  double ratio;			/* the per-use test ratio from test() */
+  int bad_ratios = 0;		/* the number of ratios over the threshold */
+  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
+  double un_int;		/* the internal underflow threshold */
+  double alpha[2];
+  double beta[2];
+  double *AB;
+  double *x_head;
+  double *x_tail;
+  double *y;
+  double *temp;			/* use for calculating ratio */
+
+  /* x_gen and y_gen are used to store vectors generated by testgen.
+     they eventually are copied back to x and y */
+  double *x_head_gen;
+  double *x_tail_gen;
+  double *y_gen;
+
+  /* the true r calculated by testgen(), in double-double */
+  double *head_r_true, *tail_r_true;
+
+  int alpha_val;
+  int alpha_flag = 0;		/* input flag for BLAS_zgbmv2_z_d_testgen */
+  int beta_val;
+  int beta_flag = 0;		/* input flag for BLAS_zgbmv2_z_d_testgen */
+  int order_val;
+  enum blas_order_type order_type = 0;
+  int prec_val;
+  enum blas_prec_type prec = 0;
+  int trans_val;
+  enum blas_trans_type trans_type = 0;
+  int m_i = 0;
+  int n_i = 0;
+  int max_mn;			/* the max of m and n */
+  int ku;
+  int kl;
+  int lda_val;
+  int lda = 0;
+  int saved_seed;		/* for saving the original seed */
+
+  /* use for counting the number of testgen calls * 2 */
+  int count, old_count = -1;
+
+  FPU_FIX_DECL;
+
+  /* test for bad arguments */
+  if (n < 0 || m < 0 || ntests < 0)
+    BLAS_error(fname, 0, 0, NULL);
+
+  /* initialization */
+  *min_ratio = 0.0;
+  *num_bad_ratio = 0;
+  *num_tests = 0;
+
+  saved_seed = *seed;
+  ratio_min = 1e308;
+  ratio_max = 0.0;
+  ratio = 0.0;
+  tot_tests = 0;
+  p_count = 0;
+  count = 0;
+  find_max_ratio = 0;
+  if (debug == 3)
+    find_max_ratio = 1;
+  max_mn = MAX(m, n);
+
+  if (m == 0 || n == 0) {
+    return 0.0;
+  }
+
+  FPU_FIX_START;
+
+  incx_gen = incy_gen = 1;
+
+  incy_gen *= 2;
+
+  /* get space for calculation */
+  x_head = (double *) blas_malloc(max_mn * 2 * sizeof(double));
+  if (max_mn * 2 > 0 && x_head == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail = (double *) blas_malloc(max_mn * 2 * sizeof(double));
+  if (max_mn * 2 > 0 && x_tail == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
+  if (max_mn * 2 > 0 && y == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_head_gen = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && x_head_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail_gen = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && x_tail_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && y_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  temp = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && temp == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  ratios = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && ratios == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  AB =
+    (double *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(double) *
+			   2);
+  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && AB == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  /* The debug iteration:
+     If debug=1, then will execute the iteration twice. First, compute the
+     max ratio. Second, print info if ratio > (50% * ratio_max). */
+  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
+    bad_ratios = 0;		/* set to zero */
+
+    if ((debug == 3) && (d_count == find_max_ratio))
+      *seed = saved_seed;	/* restore the original seed */
+
+    /* varying alpha */
+    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
+      alpha_flag = 0;
+      switch (alpha_val) {
+      case 0:
+	alpha[0] = alpha[1] = 0.0;
+	alpha_flag = 1;
+	break;
+      case 1:
+	alpha[0] = 1.0;
+	alpha[1] = 0.0;
+	alpha_flag = 1;
+	break;
+      }
+
+      /* varying beta */
+      for (beta_val = 0; beta_val < 3; beta_val++) {
+	beta_flag = 0;
+	switch (beta_val) {
+	case 0:
+	  beta[0] = beta[1] = 0.0;
+	  beta_flag = 1;
+	  break;
+	case 1:
+	  beta[0] = 1.0;
+	  beta[1] = 0.0;
+	  beta_flag = 1;
+	  break;
+	}
+
+
+	/* varying extra precs */
+	for (prec_val = 0; prec_val <= 2; prec_val++) {
+	  switch (prec_val) {
+	  case 0:
+	    eps_int = power(2, -BITS_D);
+	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
+			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
+	    prec = blas_prec_double;
+	    break;
+	  case 1:
+	    eps_int = power(2, -BITS_D);
+	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
+			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
+	    prec = blas_prec_double;
+	    break;
+	  case 2:
+	  default:
+	    eps_int = power(2, -BITS_E);
+	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
+			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
+	    prec = blas_prec_extra;
+	    break;
+	  }
+
+	  /* values near underflow, 1, or overflow */
+	  for (norm = -1; norm <= 1; norm++) {
+
+	    /* number of tests */
+	    for (i = 0; i < ntests; i++) {
+
+	      /* row or col major */
+	      for (order_val = 0; order_val < 2; order_val++) {
+		switch (order_val) {
+		case 0:
+		  order_type = blas_rowmajor;
+		  break;
+		case 1:
+		  order_type = blas_colmajor;
+		  break;
+		}
+
+		/* no_trans, trans, or conj_trans */
+		for (trans_val = 0; trans_val < 3; trans_val++) {
+		  switch (trans_val) {
+		  case 0:
+		    trans_type = blas_no_trans;
+		    m_i = m;
+		    n_i = n;
+		    break;
+		  case 1:
+		    trans_type = blas_trans;
+		    m_i = n;
+		    n_i = m;
+		    break;
+		  case 2:
+		    trans_type = blas_conj_trans;
+		    m_i = n;
+		    n_i = m;
+		    break;
+		  }
+
+		  /* ku from 0 to n-1 */
+		  for (ku = 0; ku < n; ku++) {
+		    if (ku == n && ku != 0)
+		      continue;	/* the purpose of doing this is 
+				   to test for ku=0 */
+
+		    /* kl from 0 to m-1 */
+		    for (kl = 0; kl < m; kl++) {
+		      if (kl == n && kl != 0)
+			continue;	/* the purpose of doing this is 
+					   to test for kl=0 */
+
+		      /* lda=ku+kl+1, ku+kl+2, 2*(ku+kl+1) */
+		      for (lda_val = 0; lda_val < 3; lda_val++) {
+			switch (lda_val) {
+			case 0:
+			  lda = ku + kl + 1;
+			  break;
+			case 1:
+			  lda = ku + kl + 2;
+			  break;
+			case 2:
+			  lda = 2 * (ku + kl + 1);
+			  break;
+			}
+
+			if ((order_type == blas_rowmajor && lda < n) ||
+			    (order_type == blas_colmajor && lda < m))
+			  continue;
+
+			/* For the sake of speed, we throw out this case at random */
+			if (xrand(seed) >= test_prob)
+			  continue;
+
+			/* in the trivial cases, no need to run testgen */
+			if (m > 0 && n > 0)
+			  BLAS_zgbmv2_z_d_testgen(norm, order_type,
+						  trans_type, m, n, kl, ku,
+						  &alpha, alpha_flag, AB, lda,
+						  x_head_gen, x_tail_gen,
+						  &beta, beta_flag, y_gen,
+						  seed, head_r_true,
+						  tail_r_true);
+			count++;
+
+			/* varying incx */
+			for (incx_val = -2; incx_val <= 2; incx_val++) {
+			  if (incx_val == 0)
+			    continue;
+
+			  /* setting incx */
+			  incx = incx_val;
+
+
+			  dcopy_vector(x_head_gen, n_i, 1, x_head, incx_val);
+			  dcopy_vector(x_tail_gen, n_i, 1, x_tail, incx_val);
+
+			  /* varying incy */
+			  for (incy_val = -2; incy_val <= 2; incy_val++) {
+			    if (incy_val == 0)
+			      continue;
+
+			    /* setting incy */
+			    incy = incy_val;
+			    incy *= 2;
+
+			    zcopy_vector(y_gen, m_i, 1, y, incy_val);
+
+			    /* call BLAS_zgbmv2_z_d_x */
+			    FPU_FIX_STOP;
+			    BLAS_zgbmv2_z_d_x(order_type, trans_type, m, n,
+					      kl, ku, alpha, AB, lda, x_head,
+					      x_tail, incx_val, beta, y,
+					      incy_val, prec);
+			    FPU_FIX_START;
+
+			    /* set y starting index */
+			    iy = 0;
+			    if (incy < 0)
+			      iy = -(m_i - 1) * incy;
+
+			    /* computing the ratio */
+			    for (j = 0; j < m_i; j++) {
+			      /* copy row j of AB to temp */
+			      zgbmv_copy(order_type, trans_type, m, n, kl, ku,
+					 AB, lda, temp, j);
+
+			      test_BLAS_zdot2_z_d(n_i, blas_no_conj, alpha,
+						  beta, &y_gen[j * incy_gen],
+						  &y[iy],
+						  &head_r_true[j * incy_gen],
+						  &tail_r_true[j * incy_gen],
+						  temp, 1, x_head, x_tail,
+						  incx_val, eps_int, un_int,
+						  &ratios[j]);
+
+			      /* take the max ratio */
+			      if (j == 0) {
+				ratio = ratios[0];
+				/* The !<= below causes NaN error to be detected.
+				   Note that (NaN > thresh) is always false. */
+			      } else if (!(ratios[j] <= ratio)) {
+				ratio = ratios[j];
+			      }
+
+			      iy += incy;
+			    }
+
+			    /* Increase the number of bad ratio, if the ratio
+			       is bigger than the threshold.
+			       The !<= below causes NaN error to be detected.
+			       Note that (NaN > thresh) is always false. */
+			    if (!(ratio <= thresh)) {
+			      bad_ratios++;
+
+			      if ((debug == 3) &&	/* print only when debug is on */
+				  (count != old_count) &&	/* print if old vector is different 
+								   from the current one */
+				  (d_count == find_max_ratio) &&
+				  (p_count <= max_print) &&
+				  (ratio > 0.5 * ratio_max)) {
+				old_count = count;
+				printf
+				  ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
+				   fname, m, n, ntests, thresh);
+
+				/* Print test info */
+				switch (prec) {
+				case blas_prec_single:
+				  printf("single ");
+				  break;
+				case blas_prec_double:
+				  printf("double ");
+				  break;
+				case blas_prec_indigenous:
+				  printf("indigenous ");
+				  break;
+				case blas_prec_extra:
+				  printf("extra ");
+				  break;
+				}
+				switch (norm) {
+				case -1:
+				  printf("near_underflow ");
+				  break;
+				case 0:
+				  printf("near_one ");
+				  break;
+				case 1:
+				  printf("near_overflow ");
+				  break;
+				}
+				switch (order_type) {
+				case blas_rowmajor:
+				  printf("row_major ");
+				  break;
+				case blas_colmajor:
+				  printf("col_major ");
+				  break;
+				}
+				switch (trans_type) {
+				case blas_no_trans:
+				  printf("no_trans ");
+				  break;
+				case blas_trans:
+				  printf("trans ");
+				  break;
+				case blas_conj_trans:
+				  printf("conj_trans ");
+				  break;
+				}
+
+				printf
+				  ("ku=%d, kl=%d, lda=%d, incx=%d, incy=%d:\n",
+				   ku, kl, lda, incx, incy);
+
+				ix = 0;
+				iy = 0;
+				if (incx < 0)
+				  ix = -(n_i - 1) * incx;
+				if (incy < 0)
+				  iy = -(m_i - 1) * incy;
+
+				printf("      A=");
+				for (j = 0; j < m_i; j++) {
+				  /* copy row j of A to temp */
+				  zgbmv_copy(order_type, trans_type, m, n, kl,
+					     ku, AB, lda, temp, j);
+
+				  if (j > 0)
+				    printf("        ");
+				  zprint_vector(temp, n_i, 1, NULL);
+				}
+
+				for (j = 0, k = 0; j < n_i || k < m_i;
+				     j++, k++) {
+				  if (j < n_i) {
+				    printf("      ");
+				    printf("%24.16e", x_head[ix]);
+				    printf("\n");
+				  }
+				  if (j < n_i) {
+				    printf("      ");
+				    printf("%24.16e", x_tail[ix]);
+				    printf("\n");
+				  }
+				  if (k < m_i) {
+				    printf("      ");
+				    printf("(%24.16e, %24.16e)",
+					   y_gen[k * incy_gen],
+					   y_gen[k * incy_gen + 1]);
+				    printf("\n");
+				    printf("      ");
+				    printf("y_final[%d] = ", iy);
+				    printf("(%24.16e, %24.16e)", y[iy],
+					   y[iy + 1]);
+				    printf("\n");
+				  }
+				  ix += incx;
+				  iy += incy;
+				}
+
+				printf("      ");
+				printf("alpha = ");
+				printf("(%24.16e, %24.16e)", alpha[0],
+				       alpha[1]);
+				printf("\n      ");
+				printf("beta = ");
+				printf("(%24.16e, %24.16e)", beta[0],
+				       beta[1]);
+				printf("\n");
+				for (j = 0; j < m_i; j++) {
+				  printf("      ");
+				  printf
+				    ("([%24.16e  %24.16e], [%24.16e %24.16e])",
+				     head_r_true[j * incy_gen],
+				     tail_r_true[j * incy_gen],
+				     head_r_true[j * incy_gen + 1],
+				     tail_r_true[j * incy_gen + 1]);
+				  printf(", ratio[%d]=%.4e\n", j, ratios[j]);
+				}
+
+				printf("      ratio=%.4e\n", ratio);
+				p_count++;
+			      }
+			      if (bad_ratios >= MAX_BAD_TESTS) {
+				printf("\ntoo many failures, exiting....");
+				printf("\nTesting and compilation");
+				printf(" are incomplete\n\n");
+				goto end;
+			      }
+			      if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
+				printf("\nFlagrant ratio error, exiting...");
+				printf("\nTesting and compilation");
+				printf(" are incomplete\n\n");
+				goto end;
+			      }
+			    }
+			    if (d_count == 0) {
+			      if (ratio > ratio_max)
+				ratio_max = ratio;
+
+			      if (ratio != 0.0 && ratio < ratio_min)
+				ratio_min = ratio;
+			      tot_tests++;
+			    }
+			  }	/* incy */
+			}	/* incx */
+		      }		/* lda */
+		    }		/* kl */
+		  }		/* ku */
+		}		/* trans */
+	      }			/* order */
+	    }			/* tests */
+	  }			/* norm */
+	}			/* prec */
+      }				/* beta */
+    }				/* alpha */
+  }				/* debug */
+
+  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
+    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n", fname,
+	   m, n, ntests, thresh);
+    printf
+      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
+       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
+       ratio_min, ratio_max);
+  }
+
+end:
+  blas_free(x_head);
+  blas_free(x_tail);
+  blas_free(y);
+  blas_free(x_head_gen);
+  blas_free(x_tail_gen);
+  blas_free(y_gen);
+  blas_free(temp);
+  blas_free(AB);
+  blas_free(head_r_true);
+  blas_free(tail_r_true);
+  blas_free(ratios);
+
+  FPU_FIX_STOP;
+
+  *min_ratio = ratio_min;
+  *num_bad_ratio = bad_ratios;
+  *num_tests = tot_tests;
+  return ratio_max;
+}
+double do_test_zgbmv2_d_z_x(int m, int n, int ntests, int *seed,
+			    double thresh, int debug, float test_prob,
+			    double *min_ratio, int *num_bad_ratio,
+			    int *num_tests)
+
+/*
+ * Purpose  
+ * =======
+ *
+ * Runs a series of tests on GBMV2.
+ *
+ * Arguments
+ * =========
+ *  
+ * m         (input) int
+ *           The number of rows
+ *
+ * n         (input) int
+ *           The number of columns
+ *
+ * ntests    (input) int
+ *           The number of tests to run for each set of attributes.
+ *
+ * seed      (input/output) int         
+ *           The seed for the random number generator used in testgen().
+ *
+ * thresh    (input) double
+ *           When the ratio returned from test() exceeds the specified
+ *           threshold, the current size, r_true, r_comp, and ratio will be
+ *           printed.  (Since ratio is supposed to be O(1), we can set thresh
+ *           to ~10.)
+ *
+ * debug     (input) int
+ *           If debug=3, print summary 
+ *           If debug=2, print summary only if the number of bad ratios > 0
+ *           If debug=1, print complete info if tests fail
+ *           If debug=0, return max ratio
+ *
+ * test_prob (input) float
+ *           The specified test will be performed only if the generated 
+ *           random exceeds this threshold.
+ *
+ * min_ratio (output) double
+ *           The minimum ratio
+ * 
+ * num_bad_ratio (output) int
+ *               The number of tests fail; they are above the threshold.
+ *
+ * num_tests (output) int
+ *           The number of tests is being performed.
+ *
+ * Return value
+ * ============
+ *
+ * The maximum ratio if run successfully, otherwise return -1 
+ *
+ * Code structure
+ * ==============
+ * 
+ *  debug loop  -- if debug is one, the first loop computes the max ratio
+ *              -- and the last(second) loop outputs debugging information,
+ *              -- if the test fail and its ratio > 0.5 * max ratio.
+ *              -- if debug is zero, the loop is executed once
+ *    alpha loop  -- varying alpha: 0, 1, or random
+ *      beta loop   -- varying beta: 0, 1, or random
+   *        prec loop   -- varying internal prec: single, double, or extra
+ *          norm loop   -- varying norm: near undeflow, near one, or 
+ *                        -- near overflow
+ *            numtest loop  -- how many times the test is perform with 
+ *                            -- above set of attributes
+ *              order loop   -- varying order type: rowmajor or colmajor
+ *                trans loop    -- varying trans type: no trans, trans, or conj trans
+ *                  ku loop       -- varying ku: 0 to n-1
+ *                    kl loop       -- varying kl: 0 to m-1
+ *                      lda loop      -- varying lda: ku+kl+1, ku+kl+2, 2*(ku+kl+1) 
+ *                        incx loop     -- varying incx: -2, -1, 1, 2
+ *                          incy loop     -- varying incy: -2, -1, 1, 2
+ */
+{
+  /* function name */
+  const char fname[] = "BLAS_zgbmv2_d_z_x";
+
+  /* max number of debug lines to print */
+  const int max_print = 8;
+
+  /* Variables in the "x_val" form are loop vars for corresponding
+     variables */
+  int i;			/* iterate through the repeating tests */
+  int j;
+  int k;			/* multipurpose counters or variables */
+  int ix, iy;			/* use to index x and y respectively */
+  int incx_val, incy_val,	/* for testing different inc values */
+    incx, incy;
+  int incx_gen, incy_gen;	/* for complex case inc=2, for real case inc=1 */
+  int d_count;			/* counter for debug */
+  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
+  int p_count;			/* counter for the number of debug lines printed */
+  int tot_tests;		/* total number of tests to be done */
+  int norm;			/* input values of near underflow/one/overflow */
+  double ratio_max;		/* the current maximum ratio */
+  double ratio_min;		/* the current minimum ratio */
+  double *ratios;		/* a temporary variable for calculating ratio */
+  double ratio;			/* the per-use test ratio from test() */
+  int bad_ratios = 0;		/* the number of ratios over the threshold */
+  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
+  double un_int;		/* the internal underflow threshold */
+  double alpha[2];
+  double beta[2];
+  double *AB;
+  double *x_head;
+  double *x_tail;
+  double *y;
+  double *temp;			/* use for calculating ratio */
+
+  /* x_gen and y_gen are used to store vectors generated by testgen.
+     they eventually are copied back to x and y */
+  double *x_head_gen;
+  double *x_tail_gen;
+  double *y_gen;
+
+  /* the true r calculated by testgen(), in double-double */
+  double *head_r_true, *tail_r_true;
+
+  int alpha_val;
+  int alpha_flag = 0;		/* input flag for BLAS_zgbmv2_d_z_testgen */
+  int beta_val;
+  int beta_flag = 0;		/* input flag for BLAS_zgbmv2_d_z_testgen */
+  int order_val;
+  enum blas_order_type order_type = 0;
+  int prec_val;
+  enum blas_prec_type prec = 0;
+  int trans_val;
+  enum blas_trans_type trans_type = 0;
+  int m_i = 0;
+  int n_i = 0;
+  int max_mn;			/* the max of m and n */
+  int ku;
+  int kl;
+  int lda_val;
+  int lda = 0;
+  int saved_seed;		/* for saving the original seed */
+
+  /* use for counting the number of testgen calls * 2 */
+  int count, old_count = -1;
+
+  FPU_FIX_DECL;
+
+  /* test for bad arguments */
+  if (n < 0 || m < 0 || ntests < 0)
+    BLAS_error(fname, 0, 0, NULL);
+
+  /* initialization */
+  *min_ratio = 0.0;
+  *num_bad_ratio = 0;
+  *num_tests = 0;
+
+  saved_seed = *seed;
+  ratio_min = 1e308;
+  ratio_max = 0.0;
+  ratio = 0.0;
+  tot_tests = 0;
+  p_count = 0;
+  count = 0;
+  find_max_ratio = 0;
+  if (debug == 3)
+    find_max_ratio = 1;
+  max_mn = MAX(m, n);
+
+  if (m == 0 || n == 0) {
+    return 0.0;
+  }
+
+  FPU_FIX_START;
+
+  incx_gen = incy_gen = 1;
+  incx_gen *= 2;
+  incy_gen *= 2;
+
+  /* get space for calculation */
+  x_head = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
+  if (max_mn * 2 > 0 && x_head == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
+  if (max_mn * 2 > 0 && x_tail == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
+  if (max_mn * 2 > 0 && y == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_head_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && x_head_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && x_tail_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && y_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  temp = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && temp == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  ratios = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && ratios == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  AB =
+    (double *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(double));
+  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && AB == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  /* The debug iteration:
+     If debug=1, then will execute the iteration twice. First, compute the
+     max ratio. Second, print info if ratio > (50% * ratio_max). */
+  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
+    bad_ratios = 0;		/* set to zero */
+
+    if ((debug == 3) && (d_count == find_max_ratio))
+      *seed = saved_seed;	/* restore the original seed */
+
+    /* varying alpha */
+    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
+      alpha_flag = 0;
+      switch (alpha_val) {
+      case 0:
+	alpha[0] = alpha[1] = 0.0;
+	alpha_flag = 1;
+	break;
+      case 1:
+	alpha[0] = 1.0;
+	alpha[1] = 0.0;
+	alpha_flag = 1;
+	break;
+      }
+
+      /* varying beta */
+      for (beta_val = 0; beta_val < 3; beta_val++) {
+	beta_flag = 0;
+	switch (beta_val) {
+	case 0:
+	  beta[0] = beta[1] = 0.0;
+	  beta_flag = 1;
+	  break;
+	case 1:
+	  beta[0] = 1.0;
+	  beta[1] = 0.0;
+	  beta_flag = 1;
+	  break;
+	}
+
+
+	/* varying extra precs */
+	for (prec_val = 0; prec_val <= 2; prec_val++) {
+	  switch (prec_val) {
+	  case 0:
+	    eps_int = power(2, -BITS_D);
+	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
+			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
+	    prec = blas_prec_double;
+	    break;
+	  case 1:
+	    eps_int = power(2, -BITS_D);
+	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
+			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
+	    prec = blas_prec_double;
+	    break;
+	  case 2:
+	  default:
+	    eps_int = power(2, -BITS_E);
+	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
+			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
+	    prec = blas_prec_extra;
+	    break;
+	  }
+
+	  /* values near underflow, 1, or overflow */
+	  for (norm = -1; norm <= 1; norm++) {
+
+	    /* number of tests */
+	    for (i = 0; i < ntests; i++) {
+
+	      /* row or col major */
+	      for (order_val = 0; order_val < 2; order_val++) {
+		switch (order_val) {
+		case 0:
+		  order_type = blas_rowmajor;
+		  break;
+		case 1:
+		  order_type = blas_colmajor;
+		  break;
+		}
+
+		/* no_trans, trans, or conj_trans */
+		for (trans_val = 0; trans_val < 3; trans_val++) {
+		  switch (trans_val) {
+		  case 0:
+		    trans_type = blas_no_trans;
+		    m_i = m;
+		    n_i = n;
+		    break;
+		  case 1:
+		    trans_type = blas_trans;
+		    m_i = n;
+		    n_i = m;
+		    break;
+		  case 2:
+		    trans_type = blas_conj_trans;
+		    m_i = n;
+		    n_i = m;
+		    break;
+		  }
+
+		  /* ku from 0 to n-1 */
+		  for (ku = 0; ku < n; ku++) {
+		    if (ku == n && ku != 0)
+		      continue;	/* the purpose of doing this is 
+				   to test for ku=0 */
+
+		    /* kl from 0 to m-1 */
+		    for (kl = 0; kl < m; kl++) {
+		      if (kl == n && kl != 0)
+			continue;	/* the purpose of doing this is 
+					   to test for kl=0 */
+
+		      /* lda=ku+kl+1, ku+kl+2, 2*(ku+kl+1) */
+		      for (lda_val = 0; lda_val < 3; lda_val++) {
+			switch (lda_val) {
+			case 0:
+			  lda = ku + kl + 1;
+			  break;
+			case 1:
+			  lda = ku + kl + 2;
+			  break;
+			case 2:
+			  lda = 2 * (ku + kl + 1);
+			  break;
+			}
+
+			if ((order_type == blas_rowmajor && lda < n) ||
+			    (order_type == blas_colmajor && lda < m))
+			  continue;
+
+			/* For the sake of speed, we throw out this case at random */
+			if (xrand(seed) >= test_prob)
+			  continue;
+
+			/* in the trivial cases, no need to run testgen */
+			if (m > 0 && n > 0)
+			  BLAS_zgbmv2_d_z_testgen(norm, order_type,
+						  trans_type, m, n, kl, ku,
+						  &alpha, alpha_flag, AB, lda,
+						  x_head_gen, x_tail_gen,
+						  &beta, beta_flag, y_gen,
+						  seed, head_r_true,
+						  tail_r_true);
+			count++;
+
+			/* varying incx */
+			for (incx_val = -2; incx_val <= 2; incx_val++) {
+			  if (incx_val == 0)
+			    continue;
+
+			  /* setting incx */
+			  incx = incx_val;
+			  incx *= 2;
+
+			  zcopy_vector(x_head_gen, n_i, 1, x_head, incx_val);
+			  zcopy_vector(x_tail_gen, n_i, 1, x_tail, incx_val);
+
+			  /* varying incy */
+			  for (incy_val = -2; incy_val <= 2; incy_val++) {
+			    if (incy_val == 0)
+			      continue;
+
+			    /* setting incy */
+			    incy = incy_val;
+			    incy *= 2;
+
+			    zcopy_vector(y_gen, m_i, 1, y, incy_val);
+
+			    /* call BLAS_zgbmv2_d_z_x */
+			    FPU_FIX_STOP;
+			    BLAS_zgbmv2_d_z_x(order_type, trans_type, m, n,
+					      kl, ku, alpha, AB, lda, x_head,
+					      x_tail, incx_val, beta, y,
+					      incy_val, prec);
+			    FPU_FIX_START;
+
+			    /* set y starting index */
+			    iy = 0;
+			    if (incy < 0)
+			      iy = -(m_i - 1) * incy;
+
+			    /* computing the ratio */
+			    for (j = 0; j < m_i; j++) {
+			      /* copy row j of AB to temp */
+			      dgbmv_copy(order_type, trans_type, m, n, kl, ku,
+					 AB, lda, temp, j);
+
+			      test_BLAS_zdot2_d_z(n_i, blas_no_conj, alpha,
+						  beta, &y_gen[j * incy_gen],
+						  &y[iy],
+						  &head_r_true[j * incy_gen],
+						  &tail_r_true[j * incy_gen],
+						  temp, 1, x_head, x_tail,
+						  incx_val, eps_int, un_int,
+						  &ratios[j]);
+
+			      /* take the max ratio */
+			      if (j == 0) {
+				ratio = ratios[0];
+				/* The !<= below causes NaN error to be detected.
+				   Note that (NaN > thresh) is always false. */
+			      } else if (!(ratios[j] <= ratio)) {
+				ratio = ratios[j];
+			      }
+
+			      iy += incy;
+			    }
+
+			    /* Increase the number of bad ratio, if the ratio
+			       is bigger than the threshold.
+			       The !<= below causes NaN error to be detected.
+			       Note that (NaN > thresh) is always false. */
+			    if (!(ratio <= thresh)) {
+			      bad_ratios++;
+
+			      if ((debug == 3) &&	/* print only when debug is on */
+				  (count != old_count) &&	/* print if old vector is different 
+								   from the current one */
+				  (d_count == find_max_ratio) &&
+				  (p_count <= max_print) &&
+				  (ratio > 0.5 * ratio_max)) {
+				old_count = count;
+				printf
+				  ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
+				   fname, m, n, ntests, thresh);
+
+				/* Print test info */
+				switch (prec) {
+				case blas_prec_single:
+				  printf("single ");
+				  break;
+				case blas_prec_double:
+				  printf("double ");
+				  break;
+				case blas_prec_indigenous:
+				  printf("indigenous ");
+				  break;
+				case blas_prec_extra:
+				  printf("extra ");
+				  break;
+				}
+				switch (norm) {
+				case -1:
+				  printf("near_underflow ");
+				  break;
+				case 0:
+				  printf("near_one ");
+				  break;
+				case 1:
+				  printf("near_overflow ");
+				  break;
+				}
+				switch (order_type) {
+				case blas_rowmajor:
+				  printf("row_major ");
+				  break;
+				case blas_colmajor:
+				  printf("col_major ");
+				  break;
+				}
+				switch (trans_type) {
+				case blas_no_trans:
+				  printf("no_trans ");
+				  break;
+				case blas_trans:
+				  printf("trans ");
+				  break;
+				case blas_conj_trans:
+				  printf("conj_trans ");
+				  break;
+				}
+
+				printf
+				  ("ku=%d, kl=%d, lda=%d, incx=%d, incy=%d:\n",
+				   ku, kl, lda, incx, incy);
+
+				ix = 0;
+				iy = 0;
+				if (incx < 0)
+				  ix = -(n_i - 1) * incx;
+				if (incy < 0)
+				  iy = -(m_i - 1) * incy;
+
+				printf("      A=");
+				for (j = 0; j < m_i; j++) {
+				  /* copy row j of A to temp */
+				  dgbmv_copy(order_type, trans_type, m, n, kl,
+					     ku, AB, lda, temp, j);
+
+				  if (j > 0)
+				    printf("        ");
+				  dprint_vector(temp, n_i, 1, NULL);
+				}
+
+				for (j = 0, k = 0; j < n_i || k < m_i;
+				     j++, k++) {
+				  if (j < n_i) {
+				    printf("      ");
+				    printf("(%24.16e, %24.16e)", x_head[ix],
+					   x_head[ix + 1]);
+				    printf("\n");
+				  }
+				  if (j < n_i) {
+				    printf("      ");
+				    printf("(%24.16e, %24.16e)", x_tail[ix],
+					   x_tail[ix + 1]);
+				    printf("\n");
+				  }
+				  if (k < m_i) {
+				    printf("      ");
+				    printf("(%24.16e, %24.16e)",
+					   y_gen[k * incy_gen],
+					   y_gen[k * incy_gen + 1]);
+				    printf("\n");
+				    printf("      ");
+				    printf("y_final[%d] = ", iy);
+				    printf("(%24.16e, %24.16e)", y[iy],
+					   y[iy + 1]);
+				    printf("\n");
+				  }
+				  ix += incx;
+				  iy += incy;
+				}
+
+				printf("      ");
+				printf("alpha = ");
+				printf("(%24.16e, %24.16e)", alpha[0],
+				       alpha[1]);
+				printf("\n      ");
+				printf("beta = ");
+				printf("(%24.16e, %24.16e)", beta[0],
+				       beta[1]);
+				printf("\n");
+				for (j = 0; j < m_i; j++) {
+				  printf("      ");
+				  printf
+				    ("([%24.16e  %24.16e], [%24.16e %24.16e])",
+				     head_r_true[j * incy_gen],
+				     tail_r_true[j * incy_gen],
+				     head_r_true[j * incy_gen + 1],
+				     tail_r_true[j * incy_gen + 1]);
+				  printf(", ratio[%d]=%.4e\n", j, ratios[j]);
+				}
+
+				printf("      ratio=%.4e\n", ratio);
+				p_count++;
+			      }
+			      if (bad_ratios >= MAX_BAD_TESTS) {
+				printf("\ntoo many failures, exiting....");
+				printf("\nTesting and compilation");
+				printf(" are incomplete\n\n");
+				goto end;
+			      }
+			      if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
+				printf("\nFlagrant ratio error, exiting...");
+				printf("\nTesting and compilation");
+				printf(" are incomplete\n\n");
+				goto end;
+			      }
+			    }
+			    if (d_count == 0) {
+			      if (ratio > ratio_max)
+				ratio_max = ratio;
+
+			      if (ratio != 0.0 && ratio < ratio_min)
+				ratio_min = ratio;
+			      tot_tests++;
+			    }
+			  }	/* incy */
+			}	/* incx */
+		      }		/* lda */
+		    }		/* kl */
+		  }		/* ku */
+		}		/* trans */
+	      }			/* order */
+	    }			/* tests */
+	  }			/* norm */
+	}			/* prec */
+      }				/* beta */
+    }				/* alpha */
+  }				/* debug */
+
+  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
+    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n", fname,
+	   m, n, ntests, thresh);
+    printf
+      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
+       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
+       ratio_min, ratio_max);
+  }
+
+end:
+  blas_free(x_head);
+  blas_free(x_tail);
+  blas_free(y);
+  blas_free(x_head_gen);
+  blas_free(x_tail_gen);
+  blas_free(y_gen);
+  blas_free(temp);
+  blas_free(AB);
+  blas_free(head_r_true);
+  blas_free(tail_r_true);
+  blas_free(ratios);
+
+  FPU_FIX_STOP;
+
+  *min_ratio = ratio_min;
+  *num_bad_ratio = bad_ratios;
+  *num_tests = tot_tests;
+  return ratio_max;
+}
+double do_test_zgbmv2_d_d_x(int m, int n, int ntests, int *seed,
+			    double thresh, int debug, float test_prob,
+			    double *min_ratio, int *num_bad_ratio,
+			    int *num_tests)
+
+/*
+ * Purpose  
+ * =======
+ *
+ * Runs a series of tests on GBMV2.
+ *
+ * Arguments
+ * =========
+ *  
+ * m         (input) int
+ *           The number of rows
+ *
+ * n         (input) int
+ *           The number of columns
+ *
+ * ntests    (input) int
+ *           The number of tests to run for each set of attributes.
+ *
+ * seed      (input/output) int         
+ *           The seed for the random number generator used in testgen().
+ *
+ * thresh    (input) double
+ *           When the ratio returned from test() exceeds the specified
+ *           threshold, the current size, r_true, r_comp, and ratio will be
+ *           printed.  (Since ratio is supposed to be O(1), we can set thresh
+ *           to ~10.)
+ *
+ * debug     (input) int
+ *           If debug=3, print summary 
+ *           If debug=2, print summary only if the number of bad ratios > 0
+ *           If debug=1, print complete info if tests fail
+ *           If debug=0, return max ratio
+ *
+ * test_prob (input) float
+ *           The specified test will be performed only if the generated 
+ *           random exceeds this threshold.
+ *
+ * min_ratio (output) double
+ *           The minimum ratio
+ * 
+ * num_bad_ratio (output) int
+ *               The number of tests fail; they are above the threshold.
+ *
+ * num_tests (output) int
+ *           The number of tests is being performed.
+ *
+ * Return value
+ * ============
+ *
+ * The maximum ratio if run successfully, otherwise return -1 
+ *
+ * Code structure
+ * ==============
+ * 
+ *  debug loop  -- if debug is one, the first loop computes the max ratio
+ *              -- and the last(second) loop outputs debugging information,
+ *              -- if the test fail and its ratio > 0.5 * max ratio.
+ *              -- if debug is zero, the loop is executed once
+ *    alpha loop  -- varying alpha: 0, 1, or random
+ *      beta loop   -- varying beta: 0, 1, or random
+   *        prec loop   -- varying internal prec: single, double, or extra
+ *          norm loop   -- varying norm: near undeflow, near one, or 
+ *                        -- near overflow
+ *            numtest loop  -- how many times the test is perform with 
+ *                            -- above set of attributes
+ *              order loop   -- varying order type: rowmajor or colmajor
+ *                trans loop    -- varying trans type: no trans, trans, or conj trans
+ *                  ku loop       -- varying ku: 0 to n-1
+ *                    kl loop       -- varying kl: 0 to m-1
+ *                      lda loop      -- varying lda: ku+kl+1, ku+kl+2, 2*(ku+kl+1) 
+ *                        incx loop     -- varying incx: -2, -1, 1, 2
+ *                          incy loop     -- varying incy: -2, -1, 1, 2
+ */
+{
+  /* function name */
+  const char fname[] = "BLAS_zgbmv2_d_d_x";
+
+  /* max number of debug lines to print */
+  const int max_print = 8;
+
+  /* Variables in the "x_val" form are loop vars for corresponding
+     variables */
+  int i;			/* iterate through the repeating tests */
+  int j;
+  int k;			/* multipurpose counters or variables */
+  int ix, iy;			/* use to index x and y respectively */
+  int incx_val, incy_val,	/* for testing different inc values */
+    incx, incy;
+  int incx_gen, incy_gen;	/* for complex case inc=2, for real case inc=1 */
+  int d_count;			/* counter for debug */
+  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
+  int p_count;			/* counter for the number of debug lines printed */
+  int tot_tests;		/* total number of tests to be done */
+  int norm;			/* input values of near underflow/one/overflow */
+  double ratio_max;		/* the current maximum ratio */
+  double ratio_min;		/* the current minimum ratio */
+  double *ratios;		/* a temporary variable for calculating ratio */
+  double ratio;			/* the per-use test ratio from test() */
+  int bad_ratios = 0;		/* the number of ratios over the threshold */
+  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
+  double un_int;		/* the internal underflow threshold */
+  double alpha[2];
+  double beta[2];
+  double *AB;
+  double *x_head;
+  double *x_tail;
+  double *y;
+  double *temp;			/* use for calculating ratio */
+
+  /* x_gen and y_gen are used to store vectors generated by testgen.
+     they eventually are copied back to x and y */
+  double *x_head_gen;
+  double *x_tail_gen;
+  double *y_gen;
+
+  /* the true r calculated by testgen(), in double-double */
+  double *head_r_true, *tail_r_true;
+
+  int alpha_val;
+  int alpha_flag = 0;		/* input flag for BLAS_zgbmv2_d_d_testgen */
+  int beta_val;
+  int beta_flag = 0;		/* input flag for BLAS_zgbmv2_d_d_testgen */
+  int order_val;
+  enum blas_order_type order_type = 0;
+  int prec_val;
+  enum blas_prec_type prec = 0;
+  int trans_val;
+  enum blas_trans_type trans_type = 0;
+  int m_i = 0;
+  int n_i = 0;
+  int max_mn;			/* the max of m and n */
+  int ku;
+  int kl;
+  int lda_val;
+  int lda = 0;
+  int saved_seed;		/* for saving the original seed */
+
+  /* use for counting the number of testgen calls * 2 */
+  int count, old_count = -1;
+
+  FPU_FIX_DECL;
+
+  /* test for bad arguments */
+  if (n < 0 || m < 0 || ntests < 0)
+    BLAS_error(fname, 0, 0, NULL);
+
+  /* initialization */
+  *min_ratio = 0.0;
+  *num_bad_ratio = 0;
+  *num_tests = 0;
+
+  saved_seed = *seed;
+  ratio_min = 1e308;
+  ratio_max = 0.0;
+  ratio = 0.0;
+  tot_tests = 0;
+  p_count = 0;
+  count = 0;
+  find_max_ratio = 0;
+  if (debug == 3)
+    find_max_ratio = 1;
+  max_mn = MAX(m, n);
+
+  if (m == 0 || n == 0) {
+    return 0.0;
+  }
+
+  FPU_FIX_START;
+
+  incx_gen = incy_gen = 1;
+
+  incy_gen *= 2;
+
+  /* get space for calculation */
+  x_head = (double *) blas_malloc(max_mn * 2 * sizeof(double));
+  if (max_mn * 2 > 0 && x_head == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail = (double *) blas_malloc(max_mn * 2 * sizeof(double));
+  if (max_mn * 2 > 0 && x_tail == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
+  if (max_mn * 2 > 0 && y == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_head_gen = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && x_head_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  x_tail_gen = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && x_tail_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && y_gen == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  temp = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && temp == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
+  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+  ratios = (double *) blas_malloc(max_mn * sizeof(double));
+  if (max_mn > 0 && ratios == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  AB =
+    (double *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(double));
+  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && AB == NULL) {
+    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
+  }
+
+  /* The debug iteration:
+     If debug=1, then will execute the iteration twice. First, compute the
+     max ratio. Second, print info if ratio > (50% * ratio_max). */
+  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
+    bad_ratios = 0;		/* set to zero */
+
+    if ((debug == 3) && (d_count == find_max_ratio))
+      *seed = saved_seed;	/* restore the original seed */
+
+    /* varying alpha */
+    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
+      alpha_flag = 0;
+      switch (alpha_val) {
+      case 0:
+	alpha[0] = alpha[1] = 0.0;
+	alpha_flag = 1;
+	break;
+      case 1:
+	alpha[0] = 1.0;
+	alpha[1] = 0.0;
+	alpha_flag = 1;
+	break;
+      }
+
+      /* varying beta */
+      for (beta_val = 0; beta_val < 3; beta_val++) {
+	beta_flag = 0;
+	switch (beta_val) {
+	case 0:
+	  beta[0] = beta[1] = 0.0;
+	  beta_flag = 1;
+	  break;
+	case 1:
+	  beta[0] = 1.0;
+	  beta[1] = 0.0;
+	  beta_flag = 1;
+	  break;
+	}
+
+
+	/* varying extra precs */
+	for (prec_val = 0; prec_val <= 2; prec_val++) {
+	  switch (prec_val) {
+	  case 0:
+	    eps_int = power(2, -BITS_D);
+	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
+			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
+	    prec = blas_prec_double;
+	    break;
+	  case 1:
+	    eps_int = power(2, -BITS_D);
+	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
+			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
+	    prec = blas_prec_double;
+	    break;
+	  case 2:
+	  default:
+	    eps_int = power(2, -BITS_E);
+	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
+			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
+	    prec = blas_prec_extra;
+	    break;
+	  }
+
+	  /* values near underflow, 1, or overflow */
+	  for (norm = -1; norm <= 1; norm++) {
+
+	    /* number of tests */
+	    for (i = 0; i < ntests; i++) {
+
+	      /* row or col major */
+	      for (order_val = 0; order_val < 2; order_val++) {
+		switch (order_val) {
+		case 0:
+		  order_type = blas_rowmajor;
+		  break;
+		case 1:
+		  order_type = blas_colmajor;
+		  break;
+		}
+
+		/* no_trans, trans, or conj_trans */
+		for (trans_val = 0; trans_val < 3; trans_val++) {
+		  switch (trans_val) {
+		  case 0:
+		    trans_type = blas_no_trans;
+		    m_i = m;
+		    n_i = n;
+		    break;
+		  case 1:
+		    trans_type = blas_trans;
+		    m_i = n;
+		    n_i = m;
+		    break;
+		  case 2:
+		    trans_type = blas_conj_trans;
+		    m_i = n;
+		    n_i = m;
+		    break;
+		  }
+
+		  /* ku from 0 to n-1 */
+		  for (ku = 0; ku < n; ku++) {
+		    if (ku == n && ku != 0)
+		      continue;	/* the purpose of doing this is 
+				   to test for ku=0 */
+
+		    /* kl from 0 to m-1 */
+		    for (kl = 0; kl < m; kl++) {
+		      if (kl == n && kl != 0)
+			continue;	/* the purpose of doing this is 
+					   to test for kl=0 */
+
+		      /* lda=ku+kl+1, ku+kl+2, 2*(ku+kl+1) */
+		      for (lda_val = 0; lda_val < 3; lda_val++) {
+			switch (lda_val) {
+			case 0:
+			  lda = ku + kl + 1;
+			  break;
+			case 1:
+			  lda = ku + kl + 2;
+			  break;
+			case 2:
+			  lda = 2 * (ku + kl + 1);
+			  break;
+			}
+
+			if ((order_type == blas_rowmajor && lda < n) ||
+			    (order_type == blas_colmajor && lda < m))
+			  continue;
+
+			/* For the sake of speed, we throw out this case at random */
+			if (xrand(seed) >= test_prob)
+			  continue;
+
+			/* in the trivial cases, no need to run testgen */
+			if (m > 0 && n > 0)
+			  BLAS_zgbmv2_d_d_testgen(norm, order_type,
+						  trans_type, m, n, kl, ku,
+						  &alpha, alpha_flag, AB, lda,
+						  x_head_gen, x_tail_gen,
+						  &beta, beta_flag, y_gen,
+						  seed, head_r_true,
+						  tail_r_true);
+			count++;
+
+			/* varying incx */
+			for (incx_val = -2; incx_val <= 2; incx_val++) {
+			  if (incx_val == 0)
+			    continue;
+
+			  /* setting incx */
+			  incx = incx_val;
+
+
+			  dcopy_vector(x_head_gen, n_i, 1, x_head, incx_val);
+			  dcopy_vector(x_tail_gen, n_i, 1, x_tail, incx_val);
+
+			  /* varying incy */
+			  for (incy_val = -2; incy_val <= 2; incy_val++) {
+			    if (incy_val == 0)
+			      continue;
+
+			    /* setting incy */
+			    incy = incy_val;
+			    incy *= 2;
+
+			    zcopy_vector(y_gen, m_i, 1, y, incy_val);
+
+			    /* call BLAS_zgbmv2_d_d_x */
+			    FPU_FIX_STOP;
+			    BLAS_zgbmv2_d_d_x(order_type, trans_type, m, n,
+					      kl, ku, alpha, AB, lda, x_head,
+					      x_tail, incx_val, beta, y,
+					      incy_val, prec);
+			    FPU_FIX_START;
+
+			    /* set y starting index */
+			    iy = 0;
+			    if (incy < 0)
+			      iy = -(m_i - 1) * incy;
+
+			    /* computing the ratio */
+			    for (j = 0; j < m_i; j++) {
+			      /* copy row j of AB to temp */
+			      dgbmv_copy(order_type, trans_type, m, n, kl, ku,
+					 AB, lda, temp, j);
+
+			      test_BLAS_zdot2_d_d(n_i, blas_no_conj, alpha,
+						  beta, &y_gen[j * incy_gen],
+						  &y[iy],
+						  &head_r_true[j * incy_gen],
+						  &tail_r_true[j * incy_gen],
+						  temp, 1, x_head, x_tail,
+						  incx_val, eps_int, un_int,
+						  &ratios[j]);
+
+			      /* take the max ratio */
+			      if (j == 0) {
+				ratio = ratios[0];
+				/* The !<= below causes NaN error to be detected.
+				   Note that (NaN > thresh) is always false. */
+			      } else if (!(ratios[j] <= ratio)) {
+				ratio = ratios[j];
+			      }
+
+			      iy += incy;
+			    }
+
+			    /* Increase the number of bad ratio, if the ratio
+			       is bigger than the threshold.
+			       The !<= below causes NaN error to be detected.
+			       Note that (NaN > thresh) is always false. */
+			    if (!(ratio <= thresh)) {
+			      bad_ratios++;
+
+			      if ((debug == 3) &&	/* print only when debug is on */
+				  (count != old_count) &&	/* print if old vector is different 
+								   from the current one */
+				  (d_count == find_max_ratio) &&
+				  (p_count <= max_print) &&
+				  (ratio > 0.5 * ratio_max)) {
+				old_count = count;
+				printf
+				  ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
+				   fname, m, n, ntests, thresh);
+
+				/* Print test info */
+				switch (prec) {
+				case blas_prec_single:
+				  printf("single ");
+				  break;
+				case blas_prec_double:
+				  printf("double ");
+				  break;
+				case blas_prec_indigenous:
+				  printf("indigenous ");
+				  break;
+				case blas_prec_extra:
+				  printf("extra ");
+				  break;
+				}
+				switch (norm) {
+				case -1:
+				  printf("near_underflow ");
+				  break;
+				case 0:
+				  printf("near_one ");
+				  break;
+				case 1:
+				  printf("near_overflow ");
+				  break;
+				}
+				switch (order_type) {
+				case blas_rowmajor:
+				  printf("row_major ");
+				  break;
+				case blas_colmajor:
+				  printf("col_major ");
+				  break;
+				}
+				switch (trans_type) {
+				case blas_no_trans:
+				  printf("no_trans ");
+				  break;
+				case blas_trans:
+				  printf("trans ");
+				  break;
+				case blas_conj_trans:
+				  printf("conj_trans ");
+				  break;
+				}
+
+				printf
+				  ("ku=%d, kl=%d, lda=%d, incx=%d, incy=%d:\n",
+				   ku, kl, lda, incx, incy);
+
+				ix = 0;
+				iy = 0;
+				if (incx < 0)
+				  ix = -(n_i - 1) * incx;
+				if (incy < 0)
+				  iy = -(m_i - 1) * incy;
+
+				printf("      A=");
+				for (j = 0; j < m_i; j++) {
+				  /* copy row j of A to temp */
+				  dgbmv_copy(order_type, trans_type, m, n, kl,
+					     ku, AB, lda, temp, j);
+
+				  if (j > 0)
+				    printf("        ");
+				  dprint_vector(temp, n_i, 1, NULL);
+				}
+
+				for (j = 0, k = 0; j < n_i || k < m_i;
+				     j++, k++) {
+				  if (j < n_i) {
+				    printf("      ");
+				    printf("%24.16e", x_head[ix]);
+				    printf("\n");
+				  }
+				  if (j < n_i) {
+				    printf("      ");
+				    printf("%24.16e", x_tail[ix]);
+				    printf("\n");
+				  }
+				  if (k < m_i) {
+				    printf("      ");
+				    printf("(%24.16e, %24.16e)",
+					   y_gen[k * incy_gen],
+					   y_gen[k * incy_gen + 1]);
+				    printf("\n");
+				    printf("      ");
+				    printf("y_final[%d] = ", iy);
+				    printf("(%24.16e, %24.16e)", y[iy],
+					   y[iy + 1]);
+				    printf("\n");
+				  }
+				  ix += incx;
+				  iy += incy;
+				}
+
+				printf("      ");
+				printf("alpha = ");
+				printf("(%24.16e, %24.16e)", alpha[0],
+				       alpha[1]);
+				printf("\n      ");
+				printf("beta = ");
+				printf("(%24.16e, %24.16e)", beta[0],
+				       beta[1]);
+				printf("\n");
+				for (j = 0; j < m_i; j++) {
+				  printf("      ");
+				  printf
+				    ("([%24.16e  %24.16e], [%24.16e %24.16e])",
+				     head_r_true[j * incy_gen],
+				     tail_r_true[j * incy_gen],
+				     head_r_true[j * incy_gen + 1],
+				     tail_r_true[j * incy_gen + 1]);
+				  printf(", ratio[%d]=%.4e\n", j, ratios[j]);
+				}
+
+				printf("      ratio=%.4e\n", ratio);
+				p_count++;
+			      }
+			      if (bad_ratios >= MAX_BAD_TESTS) {
+				printf("\ntoo many failures, exiting....");
+				printf("\nTesting and compilation");
+				printf(" are incomplete\n\n");
+				goto end;
+			      }
+			      if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
+				printf("\nFlagrant ratio error, exiting...");
+				printf("\nTesting and compilation");
+				printf(" are incomplete\n\n");
+				goto end;
+			      }
+			    }
+			    if (d_count == 0) {
+			      if (ratio > ratio_max)
+				ratio_max = ratio;
+
+			      if (ratio != 0.0 && ratio < ratio_min)
+				ratio_min = ratio;
+			      tot_tests++;
+			    }
+			  }	/* incy */
+			}	/* incx */
+		      }		/* lda */
+		    }		/* kl */
+		  }		/* ku */
+		}		/* trans */
+	      }			/* order */
+	    }			/* tests */
+	  }			/* norm */
+	}			/* prec */
+      }				/* beta */
+    }				/* alpha */
+  }				/* debug */
+
+  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
+    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n", fname,
+	   m, n, ntests, thresh);
+    printf
+      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
+       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
+       ratio_min, ratio_max);
+  }
+
+end:
+  blas_free(x_head);
+  blas_free(x_tail);
+  blas_free(y);
+  blas_free(x_head_gen);
+  blas_free(x_tail_gen);
+  blas_free(y_gen);
+  blas_free(temp);
+  blas_free(AB);
+  blas_free(head_r_true);
+  blas_free(tail_r_true);
+  blas_free(ratios);
+
+  FPU_FIX_STOP;
+
+  *min_ratio = ratio_min;
+  *num_bad_ratio = bad_ratios;
+  *num_tests = tot_tests;
+  return ratio_max;
+}
+
+#define NUMPAIRS 12
+
+int main(int argc, char **argv)
+{
+  int nsizes, ntests, debug;
+  double thresh, test_prob;
+  double total_min_ratio, total_max_ratio;
+  int total_bad_ratios;
+  int seed, num_bad_ratio, num_tests;
+  int total_tests, nr_failed_routines = 0, nr_routines = 0;
+  double min_ratio, max_ratio;
+  const char *base_routine = "gbmv2";
+  char *fname;
+  int n;
+
+  int m, i;
+  int mn_pairs[NUMPAIRS][2] =
+    { {0, 0}, {1, 0}, {0, 1}, {1, 2}, {2, 1}, {1, 3},
+  {3, 1}, {2, 3}, {4, 3}, {2, 4}, {6, 6}, {10, 8}
+  };
+
+  if (argc != 6) {
+    printf("Usage:\n");
+    printf("do_test_gbmv2 <nsizes> <ntests> <thresh> <debug> <test_prob>\n");
+    printf("   <nsizes>: number of sizes to be run.\n");
+    printf
+      ("   <ntests>: the number of tests performed for each set of attributes\n");
+    printf
+      ("   <thresh>: to catch bad ratios if it is greater than <thresh>\n");
+    printf("    <debug>: 0, 1, 2, or 3; \n");
+    printf("        if 0, no printing \n");
+    printf("        if 1, print error summary only if tests fail\n");
+    printf("        if 2, print error summary for each n\n");
+    printf("        if 3, print complete info each test fails \n");
+    printf("<test_prob>: probability of preforming a given \n");
+    printf("           test case: 0.0 does no tests, 1.0 does all tests\n");
+    return -1;
+  } else {
+    nsizes = atoi(argv[1]);
+    ntests = atoi(argv[2]);
+    thresh = atof(argv[3]);
+    debug = atoi(argv[4]);
+    test_prob = atof(argv[5]);
+  }
+
+  seed = 1999;
+
+  if (nsizes < 0 || ntests < 0 || debug < 0 || debug > 3)
+    BLAS_error("Testing gbmv2", 0, 0, NULL);
+
+  printf("Testing %s...\n", base_routine);
+  printf("INPUT: nsizes = %d, ntests = %d, thresh = %4.2f, debug = %d\n\n",
+	 nsizes, ntests, thresh, debug);
+
+
+
+  min_ratio = 1e308;
+  max_ratio = 0.0;
+  total_bad_ratios = 0;
+  total_tests = 0;
+  fname = "BLAS_dgbmv2_d_s";
+  printf("Testing %s...\n", fname);
+  for (i = 0; i < nsizes; i++) {
+    m = mn_pairs[i][0];
+    n = mn_pairs[i][1];
+    total_max_ratio =
+      do_test_dgbmv2_d_s(m, n, 1, &seed, thresh, debug, test_prob,
+			 &total_min_ratio, &num_bad_ratio, &num_tests);
+    if (total_max_ratio > max_ratio)
+      max_ratio = total_max_ratio;
+
+    if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
+      min_ratio = total_min_ratio;
+
+    total_bad_ratios += num_bad_ratio;
+    total_tests += num_tests;
+  }
+
+  if (total_bad_ratios == 0)
+    printf("PASS> ");
+  else {
+    nr_failed_routines++;
+    printf("FAIL> ");
+  }
+  nr_routines++;
+
+  if (min_ratio == 1e308)
+    min_ratio = 0.0;
+
+  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n", fname,
+	 total_bad_ratios, total_tests, max_ratio);
+  min_ratio = 1e308;
+  max_ratio = 0.0;
+  total_bad_ratios = 0;
+  total_tests = 0;
+  fname = "BLAS_dgbmv2_s_d";
+  printf("Testing %s...\n", fname);
+  for (i = 0; i < nsizes; i++) {
+    m = mn_pairs[i][0];
+    n = mn_pairs[i][1];
+    total_max_ratio =
+      do_test_dgbmv2_s_d(m, n, 1, &seed, thresh, debug, test_prob,
+			 &total_min_ratio, &num_bad_ratio, &num_tests);
+    if (total_max_ratio > max_ratio)
+      max_ratio = total_max_ratio;
+
+    if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
+      min_ratio = total_min_ratio;
+
+    total_bad_ratios += num_bad_ratio;
+    total_tests += num_tests;
+  }
+
+  if (total_bad_ratios == 0)
+    printf("PASS> ");
+  else {
+    nr_failed_routines++;
+    printf("FAIL> ");
+  }
+  nr_routines++;
+
+  if (min_ratio == 1e308)
+    min_ratio = 0.0;
+
+  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n", fname,
+	 total_bad_ratios, total_tests, max_ratio);
+  min_ratio = 1e308;
+  max_ratio = 0.0;
+  total_bad_ratios = 0;
+  total_tests = 0;
+  fname = "BLAS_dgbmv2_s_s";
+  printf("Testing %s...\n", fname);
+  for (i = 0; i < nsizes; i++) {
+    m = mn_pairs[i][0];
+    n = mn_pairs[i][1];
+    total_max_ratio =
+      do_test_dgbmv2_s_s(m, n, 1, &seed, thresh, debug, test_prob,
+			 &total_min_ratio, &num_bad_ratio, &num_tests);
+    if (total_max_ratio > max_ratio)
+      max_ratio = total_max_ratio;
+
+    if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
+      min_ratio = total_min_ratio;
+
+    total_bad_ratios += num_bad_ratio;
+    total_tests += num_tests;
+  }
+
+  if (total_bad_ratios == 0)
+    printf("PASS> ");
+  else {
+    nr_failed_routines++;
+    printf("FAIL> ");
+  }
+  nr_routines++;
+
+  if (min_ratio == 1e308)
+    min_ratio = 0.0;
+
+  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n", fname,
+	 total_bad_ratios, total_tests, max_ratio);
+  min_ratio = 1e308;
+  max_ratio = 0.0;
+  total_bad_ratios = 0;
+  total_tests = 0;
+  fname = "BLAS_zgbmv2_z_c";
+  printf("Testing %s...\n", fname);
+  for (i = 0; i < nsizes; i++) {
+    m = mn_pairs[i][0];
+    n = mn_pairs[i][1];
+    total_max_ratio =
+      do_test_zgbmv2_z_c(m, n, 1, &seed, thresh, debug, test_prob,
+			 &total_min_ratio, &num_bad_ratio, &num_tests);
+    if (total_max_ratio > max_ratio)
+      max_ratio = total_max_ratio;
+
+    if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
+      min_ratio = total_min_ratio;
+
+    total_bad_ratios += num_bad_ratio;
+    total_tests += num_tests;
+  }
+
+  if (total_bad_ratios == 0)
+    printf("PASS> ");
+  else {
+    nr_failed_routines++;
+    printf("FAIL> ");
+  }
+  nr_routines++;
+
+  if (min_ratio == 1e308)
+    min_ratio = 0.0;
+
+  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n", fname,
+	 total_bad_ratios, total_tests, max_ratio);
+  min_ratio = 1e308;
+  max_ratio = 0.0;
+  total_bad_ratios = 0;
+  total_tests = 0;
+  fname = "BLAS_zgbmv2_c_z";
+  printf("Testing %s...\n", fname);
+  for (i = 0; i < nsizes; i++) {
+    m = mn_pairs[i][0];
+    n = mn_pairs[i][1];
+    total_max_ratio =
+      do_test_zgbmv2_c_z(m, n, 1, &seed, thresh, debug, test_prob,
+			 &total_min_ratio, &num_bad_ratio, &num_tests);
+    if (total_max_ratio > max_ratio)
+      max_ratio = total_max_ratio;
+
+    if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
+      min_ratio = total_min_ratio;
+
+    total_bad_ratios += num_bad_ratio;
+    total_tests += num_tests;
+  }
+
+  if (total_bad_ratios == 0)
+    printf("PASS> ");
+  else {
+    nr_failed_routines++;
+    printf("FAIL> ");
+  }
+  nr_routines++;
+
+  if (min_ratio == 1e308)
+    min_ratio = 0.0;
+
+  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n", fname,
+	 total_bad_ratios, total_tests, max_ratio);
+  min_ratio = 1e308;
+  max_ratio = 0.0;
+  total_bad_ratios = 0;
+  total_tests = 0;
+  fname = "BLAS_zgbmv2_c_c";
+  printf("Testing %s...\n", fname);
+  for (i = 0; i < nsizes; i++) {
+    m = mn_pairs[i][0];
+    n = mn_pairs[i][1];
+    total_max_ratio =
+      do_test_zgbmv2_c_c(m, n, 1, &seed, thresh, debug, test_prob,
+			 &total_min_ratio, &num_bad_ratio, &num_tests);
+    if (total_max_ratio > max_ratio)
+      max_ratio = total_max_ratio;
+
+    if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
+      min_ratio = total_min_ratio;
+
+    total_bad_ratios += num_bad_ratio;
+    total_tests += num_tests;
+  }
+
+  if (total_bad_ratios == 0)
+    printf("PASS> ");
+  else {
+    nr_failed_routines++;
+    printf("FAIL> ");
+  }
+  nr_routines++;
+
+  if (min_ratio == 1e308)
+    min_ratio = 0.0;
+
+  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n", fname,
+	 total_bad_ratios, total_tests, max_ratio);
+  min_ratio = 1e308;
+  max_ratio = 0.0;
+  total_bad_ratios = 0;
+  total_tests = 0;
+  fname = "BLAS_cgbmv2_c_s";
+  printf("Testing %s...\n", fname);
+  for (i = 0; i < nsizes; i++) {
+    m = mn_pairs[i][0];
+    n = mn_pairs[i][1];
+    total_max_ratio =
+      do_test_cgbmv2_c_s(m, n, 1, &seed, thresh, debug, test_prob,
+			 &total_min_ratio, &num_bad_ratio, &num_tests);
+    if (total_max_ratio > max_ratio)
+      max_ratio = total_max_ratio;
+
+    if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
+      min_ratio = total_min_ratio;
+
+    total_bad_ratios += num_bad_ratio;
+    total_tests += num_tests;
+  }
+
+  if (total_bad_ratios == 0)
+    printf("PASS> ");
+  else {
+    nr_failed_routines++;
+    printf("FAIL> ");
+  }
+  nr_routines++;
+
+  if (min_ratio == 1e308)
+    min_ratio = 0.0;
+
+  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n", fname,
+	 total_bad_ratios, total_tests, max_ratio);
+  min_ratio = 1e308;
+  max_ratio = 0.0;
+  total_bad_ratios = 0;
+  total_tests = 0;
+  fname = "BLAS_cgbmv2_s_c";
+  printf("Testing %s...\n", fname);
+  for (i = 0; i < nsizes; i++) {
+    m = mn_pairs[i][0];
+    n = mn_pairs[i][1];
+    total_max_ratio =
+      do_test_cgbmv2_s_c(m, n, 1, &seed, thresh, debug, test_prob,
+			 &total_min_ratio, &num_bad_ratio, &num_tests);
+    if (total_max_ratio > max_ratio)
+      max_ratio = total_max_ratio;
+
+    if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
+      min_ratio = total_min_ratio;
+
+    total_bad_ratios += num_bad_ratio;
+    total_tests += num_tests;
+  }
+
+  if (total_bad_ratios == 0)
+    printf("PASS> ");
+  else {
+    nr_failed_routines++;
+    printf("FAIL> ");
+  }
+  nr_routines++;
+
+  if (min_ratio == 1e308)
+    min_ratio = 0.0;
+
+  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n", fname,
+	 total_bad_ratios, total_tests, max_ratio);
+  min_ratio = 1e308;
+  max_ratio = 0.0;
+  total_bad_ratios = 0;
+  total_tests = 0;
+  fname = "BLAS_cgbmv2_s_s";
+  printf("Testing %s...\n", fname);
+  for (i = 0; i < nsizes; i++) {
+    m = mn_pairs[i][0];
+    n = mn_pairs[i][1];
+    total_max_ratio =
+      do_test_cgbmv2_s_s(m, n, 1, &seed, thresh, debug, test_prob,
+			 &total_min_ratio, &num_bad_ratio, &num_tests);
+    if (total_max_ratio > max_ratio)
+      max_ratio = total_max_ratio;
+
+    if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
+      min_ratio = total_min_ratio;
+
+    total_bad_ratios += num_bad_ratio;
+    total_tests += num_tests;
+  }
+
+  if (total_bad_ratios == 0)
+    printf("PASS> ");
+  else {
+    nr_failed_routines++;
+    printf("FAIL> ");
+  }
+  nr_routines++;
+
+  if (min_ratio == 1e308)
+    min_ratio = 0.0;
+
+  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n", fname,
+	 total_bad_ratios, total_tests, max_ratio);
+  min_ratio = 1e308;
+  max_ratio = 0.0;
+  total_bad_ratios = 0;
+  total_tests = 0;
+  fname = "BLAS_zgbmv2_z_d";
+  printf("Testing %s...\n", fname);
+  for (i = 0; i < nsizes; i++) {
+    m = mn_pairs[i][0];
+    n = mn_pairs[i][1];
+    total_max_ratio =
+      do_test_zgbmv2_z_d(m, n, 1, &seed, thresh, debug, test_prob,
+			 &total_min_ratio, &num_bad_ratio, &num_tests);
+    if (total_max_ratio > max_ratio)
+      max_ratio = total_max_ratio;
+
+    if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
+      min_ratio = total_min_ratio;
+
+    total_bad_ratios += num_bad_ratio;
+    total_tests += num_tests;
+  }
+
+  if (total_bad_ratios == 0)
+    printf("PASS> ");
+  else {
+    nr_failed_routines++;
+    printf("FAIL> ");
+  }
+  nr_routines++;
+
+  if (min_ratio == 1e308)
+    min_ratio = 0.0;
+
+  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n", fname,
+	 total_bad_ratios, total_tests, max_ratio);
+  min_ratio = 1e308;
+  max_ratio = 0.0;
+  total_bad_ratios = 0;
+  total_tests = 0;
+  fname = "BLAS_zgbmv2_d_z";
+  printf("Testing %s...\n", fname);
+  for (i = 0; i < nsizes; i++) {
+    m = mn_pairs[i][0];
+    n = mn_pairs[i][1];
+    total_max_ratio =
+      do_test_zgbmv2_d_z(m, n, 1, &seed, thresh, debug, test_prob,
+			 &total_min_ratio, &num_bad_ratio, &num_tests);
+    if (total_max_ratio > max_ratio)
+      max_ratio = total_max_ratio;
+
+    if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
+      min_ratio = total_min_ratio;
+
+    total_bad_ratios += num_bad_ratio;
+    total_tests += num_tests;
+  }
+
+  if (total_bad_ratios == 0)
+    printf("PASS> ");
+  else {
+    nr_failed_routines++;
+    printf("FAIL> ");
+  }
+  nr_routines++;
+
+  if (min_ratio == 1e308)
+    min_ratio = 0.0;
+
+  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n", fname,
+	 total_bad_ratios, total_tests, max_ratio);
+  min_ratio = 1e308;
+  max_ratio = 0.0;
+  total_bad_ratios = 0;
+  total_tests = 0;
+  fname = "BLAS_zgbmv2_d_d";
+  printf("Testing %s...\n", fname);
+  for (i = 0; i < nsizes; i++) {
+    m = mn_pairs[i][0];
+    n = mn_pairs[i][1];
+    total_max_ratio =
+      do_test_zgbmv2_d_d(m, n, 1, &seed, thresh, debug, test_prob,
+			 &total_min_ratio, &num_bad_ratio, &num_tests);
+    if (total_max_ratio > max_ratio)
+      max_ratio = total_max_ratio;
+
+    if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
+      min_ratio = total_min_ratio;
+
+    total_bad_ratios += num_bad_ratio;
+    total_tests += num_tests;
+  }
+
+  if (total_bad_ratios == 0)
+    printf("PASS> ");
+  else {
+    nr_failed_routines++;
+    printf("FAIL> ");
+  }
+  nr_routines++;
+
+  if (min_ratio == 1e308)
+    min_ratio = 0.0;
+
+  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n", fname,
+	 total_bad_ratios, total_tests, max_ratio);
+  min_ratio = 1e308;
+  max_ratio = 0.0;
+  total_bad_ratios = 0;
+  total_tests = 0;
+  fname = "BLAS_sgbmv2_x";
+  printf("Testing %s...\n", fname);
+  for (i = 0; i < nsizes; i++) {
+    m = mn_pairs[i][0];
+    n = mn_pairs[i][1];
+    total_max_ratio =
+      do_test_sgbmv2_x(m, n, 1, &seed, thresh, debug, test_prob,
+		       &total_min_ratio, &num_bad_ratio, &num_tests);
+    if (total_max_ratio > max_ratio)
+      max_ratio = total_max_ratio;
+
+    if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
+      min_ratio = total_min_ratio;
+
+    total_bad_ratios += num_bad_ratio;
+    total_tests += num_tests;
+  }
+
+  if (total_bad_ratios == 0)
+    printf("PASS> ");
+  else {
+    nr_failed_routines++;
+    printf("FAIL> ");
+  }
+  nr_routines++;
+
+  if (min_ratio == 1e308)
+    min_ratio = 0.0;
+
+  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n", fname,
+	 total_bad_ratios, total_tests, max_ratio);
+  min_ratio = 1e308;
+  max_ratio = 0.0;
+  total_bad_ratios = 0;
+  total_tests = 0;
+  fname = "BLAS_dgbmv2_x";
+  printf("Testing %s...\n", fname);
+  for (i = 0; i < nsizes; i++) {
+    m = mn_pairs[i][0];
+    n = mn_pairs[i][1];
+    total_max_ratio =
+      do_test_dgbmv2_x(m, n, 1, &seed, thresh, debug, test_prob,
+		       &total_min_ratio, &num_bad_ratio, &num_tests);
+    if (total_max_ratio > max_ratio)
+      max_ratio = total_max_ratio;
+
+    if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
+      min_ratio = total_min_ratio;
+
+    total_bad_ratios += num_bad_ratio;
+    total_tests += num_tests;
+  }
+
+  if (total_bad_ratios == 0)
+    printf("PASS> ");
+  else {
+    nr_failed_routines++;
+    printf("FAIL> ");
+  }
+  nr_routines++;
+
+  if (min_ratio == 1e308)
+    min_ratio = 0.0;
+
+  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n", fname,
+	 total_bad_ratios, total_tests, max_ratio);
+  min_ratio = 1e308;
+  max_ratio = 0.0;
+  total_bad_ratios = 0;
+  total_tests = 0;
+  fname = "BLAS_cgbmv2_x";
+  printf("Testing %s...\n", fname);
+  for (i = 0; i < nsizes; i++) {
+    m = mn_pairs[i][0];
+    n = mn_pairs[i][1];
+    total_max_ratio =
+      do_test_cgbmv2_x(m, n, 1, &seed, thresh, debug, test_prob,
+		       &total_min_ratio, &num_bad_ratio, &num_tests);
+    if (total_max_ratio > max_ratio)
+      max_ratio = total_max_ratio;
+
+    if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
+      min_ratio = total_min_ratio;
+
+    total_bad_ratios += num_bad_ratio;
+    total_tests += num_tests;
+  }
+
+  if (total_bad_ratios == 0)
+    printf("PASS> ");
+  else {
+    nr_failed_routines++;
+    printf("FAIL> ");
+  }
+  nr_routines++;
+
+  if (min_ratio == 1e308)
+    min_ratio = 0.0;
+
+  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n", fname,
+	 total_bad_ratios, total_tests, max_ratio);
+  min_ratio = 1e308;
+  max_ratio = 0.0;
+  total_bad_ratios = 0;
+  total_tests = 0;
+  fname = "BLAS_zgbmv2_x";
+  printf("Testing %s...\n", fname);
+  for (i = 0; i < nsizes; i++) {
+    m = mn_pairs[i][0];
+    n = mn_pairs[i][1];
+    total_max_ratio =
+      do_test_zgbmv2_x(m, n, 1, &seed, thresh, debug, test_prob,
+		       &total_min_ratio, &num_bad_ratio, &num_tests);
+    if (total_max_ratio > max_ratio)
+      max_ratio = total_max_ratio;
+
+    if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
+      min_ratio = total_min_ratio;
+
+    total_bad_ratios += num_bad_ratio;
+    total_tests += num_tests;
+  }
+
+  if (total_bad_ratios == 0)
+    printf("PASS> ");
+  else {
+    nr_failed_routines++;
+    printf("FAIL> ");
+  }
+  nr_routines++;
+
+  if (min_ratio == 1e308)
+    min_ratio = 0.0;
+
+  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n", fname,
+	 total_bad_ratios, total_tests, max_ratio);
+  min_ratio = 1e308;
+  max_ratio = 0.0;
+  total_bad_ratios = 0;
+  total_tests = 0;
+  fname = "BLAS_dgbmv2_d_s_x";
+  printf("Testing %s...\n", fname);
+  for (i = 0; i < nsizes; i++) {
+    m = mn_pairs[i][0];
+    n = mn_pairs[i][1];
+    total_max_ratio =
+      do_test_dgbmv2_d_s_x(m, n, 1, &seed, thresh, debug, test_prob,
+			   &total_min_ratio, &num_bad_ratio, &num_tests);
+    if (total_max_ratio > max_ratio)
+      max_ratio = total_max_ratio;
+
+    if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
+      min_ratio = total_min_ratio;
+
+    total_bad_ratios += num_bad_ratio;
+    total_tests += num_tests;
+  }
+
+  if (total_bad_ratios == 0)
+    printf("PASS> ");
+  else {
+    nr_failed_routines++;
+    printf("FAIL> ");
+  }
+  nr_routines++;
+
+  if (min_ratio == 1e308)
+    min_ratio = 0.0;
+
+  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n", fname,
+	 total_bad_ratios, total_tests, max_ratio);
+  min_ratio = 1e308;
+  max_ratio = 0.0;
+  total_bad_ratios = 0;
+  total_tests = 0;
+  fname = "BLAS_dgbmv2_s_d_x";
+  printf("Testing %s...\n", fname);
+  for (i = 0; i < nsizes; i++) {
+    m = mn_pairs[i][0];
+    n = mn_pairs[i][1];
+    total_max_ratio =
+      do_test_dgbmv2_s_d_x(m, n, 1, &seed, thresh, debug, test_prob,
+			   &total_min_ratio, &num_bad_ratio, &num_tests);
+    if (total_max_ratio > max_ratio)
+      max_ratio = total_max_ratio;
+
+    if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
+      min_ratio = total_min_ratio;
+
+    total_bad_ratios += num_bad_ratio;
+    total_tests += num_tests;
+  }
+
+  if (total_bad_ratios == 0)
+    printf("PASS> ");
+  else {
+    nr_failed_routines++;
+    printf("FAIL> ");
+  }
+  nr_routines++;
+
+  if (min_ratio == 1e308)
+    min_ratio = 0.0;
+
+  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n", fname,
+	 total_bad_ratios, total_tests, max_ratio);
+  min_ratio = 1e308;
+  max_ratio = 0.0;
+  total_bad_ratios = 0;
+  total_tests = 0;
+  fname = "BLAS_dgbmv2_s_s_x";
+  printf("Testing %s...\n", fname);
+  for (i = 0; i < nsizes; i++) {
+    m = mn_pairs[i][0];
+    n = mn_pairs[i][1];
+    total_max_ratio =
+      do_test_dgbmv2_s_s_x(m, n, 1, &seed, thresh, debug, test_prob,
+			   &total_min_ratio, &num_bad_ratio, &num_tests);
+    if (total_max_ratio > max_ratio)
+      max_ratio = total_max_ratio;
+
+    if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
+      min_ratio = total_min_ratio;
+
+    total_bad_ratios += num_bad_ratio;
+    total_tests += num_tests;
+  }
+
+  if (total_bad_ratios == 0)
+    printf("PASS> ");
+  else {
+    nr_failed_routines++;
+    printf("FAIL> ");
+  }
+  nr_routines++;
+
+  if (min_ratio == 1e308)
+    min_ratio = 0.0;
+
+  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n", fname,
+	 total_bad_ratios, total_tests, max_ratio);
+  min_ratio = 1e308;
+  max_ratio = 0.0;
+  total_bad_ratios = 0;
+  total_tests = 0;
+  fname = "BLAS_zgbmv2_z_c_x";
+  printf("Testing %s...\n", fname);
+  for (i = 0; i < nsizes; i++) {
+    m = mn_pairs[i][0];
+    n = mn_pairs[i][1];
+    total_max_ratio =
+      do_test_zgbmv2_z_c_x(m, n, 1, &seed, thresh, debug, test_prob,
+			   &total_min_ratio, &num_bad_ratio, &num_tests);
+    if (total_max_ratio > max_ratio)
+      max_ratio = total_max_ratio;
+
+    if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
+      min_ratio = total_min_ratio;
+
+    total_bad_ratios += num_bad_ratio;
+    total_tests += num_tests;
+  }
+
+  if (total_bad_ratios == 0)
+    printf("PASS> ");
+  else {
+    nr_failed_routines++;
+    printf("FAIL> ");
+  }
+  nr_routines++;
+
+  if (min_ratio == 1e308)
+    min_ratio = 0.0;
+
+  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n", fname,
+	 total_bad_ratios, total_tests, max_ratio);
+  min_ratio = 1e308;
+  max_ratio = 0.0;
+  total_bad_ratios = 0;
+  total_tests = 0;
+  fname = "BLAS_zgbmv2_c_z_x";
+  printf("Testing %s...\n", fname);
+  for (i = 0; i < nsizes; i++) {
+    m = mn_pairs[i][0];
+    n = mn_pairs[i][1];
+    total_max_ratio =
+      do_test_zgbmv2_c_z_x(m, n, 1, &seed, thresh, debug, test_prob,
+			   &total_min_ratio, &num_bad_ratio, &num_tests);
+    if (total_max_ratio > max_ratio)
+      max_ratio = total_max_ratio;
+
+    if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
+      min_ratio = total_min_ratio;
+
+    total_bad_ratios += num_bad_ratio;
+    total_tests += num_tests;
+  }
+
+  if (total_bad_ratios == 0)
+    printf("PASS> ");
+  else {
+    nr_failed_routines++;
+    printf("FAIL> ");
+  }
+  nr_routines++;
+
+  if (min_ratio == 1e308)
+    min_ratio = 0.0;
+
+  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n", fname,
+	 total_bad_ratios, total_tests, max_ratio);
+  min_ratio = 1e308;
+  max_ratio = 0.0;
+  total_bad_ratios = 0;
+  total_tests = 0;
+  fname = "BLAS_zgbmv2_c_c_x";
+  printf("Testing %s...\n", fname);
+  for (i = 0; i < nsizes; i++) {
+    m = mn_pairs[i][0];
+    n = mn_pairs[i][1];
+    total_max_ratio =
+      do_test_zgbmv2_c_c_x(m, n, 1, &seed, thresh, debug, test_prob,
+			   &total_min_ratio, &num_bad_ratio, &num_tests);
+    if (total_max_ratio > max_ratio)
+      max_ratio = total_max_ratio;
+
+    if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
+      min_ratio = total_min_ratio;
+
+    total_bad_ratios += num_bad_ratio;
+    total_tests += num_tests;
+  }
+
+  if (total_bad_ratios == 0)
+    printf("PASS> ");
+  else {
+    nr_failed_routines++;
+    printf("FAIL> ");
+  }
+  nr_routines++;
+
+  if (min_ratio == 1e308)
+    min_ratio = 0.0;
+
+  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n", fname,
+	 total_bad_ratios, total_tests, max_ratio);
+  min_ratio = 1e308;
+  max_ratio = 0.0;
+  total_bad_ratios = 0;
+  total_tests = 0;
+  fname = "BLAS_cgbmv2_c_s_x";
+  printf("Testing %s...\n", fname);
+  for (i = 0; i < nsizes; i++) {
+    m = mn_pairs[i][0];
+    n = mn_pairs[i][1];
+    total_max_ratio =
+      do_test_cgbmv2_c_s_x(m, n, 1, &seed, thresh, debug, test_prob,
+			   &total_min_ratio, &num_bad_ratio, &num_tests);
+    if (total_max_ratio > max_ratio)
+      max_ratio = total_max_ratio;
+
+    if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
+      min_ratio = total_min_ratio;
+
+    total_bad_ratios += num_bad_ratio;
+    total_tests += num_tests;
+  }
+
+  if (total_bad_ratios == 0)
+    printf("PASS> ");
+  else {
+    nr_failed_routines++;
+    printf("FAIL> ");
+  }
+  nr_routines++;
+
+  if (min_ratio == 1e308)
+    min_ratio = 0.0;
+
+  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n", fname,
+	 total_bad_ratios, total_tests, max_ratio);
+  min_ratio = 1e308;
+  max_ratio = 0.0;
+  total_bad_ratios = 0;
+  total_tests = 0;
+  fname = "BLAS_cgbmv2_s_c_x";
+  printf("Testing %s...\n", fname);
+  for (i = 0; i < nsizes; i++) {
+    m = mn_pairs[i][0];
+    n = mn_pairs[i][1];
+    total_max_ratio =
+      do_test_cgbmv2_s_c_x(m, n, 1, &seed, thresh, debug, test_prob,
+			   &total_min_ratio, &num_bad_ratio, &num_tests);
+    if (total_max_ratio > max_ratio)
+      max_ratio = total_max_ratio;
+
+    if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
+      min_ratio = total_min_ratio;
+
+    total_bad_ratios += num_bad_ratio;
+    total_tests += num_tests;
+  }
+
+  if (total_bad_ratios == 0)
+    printf("PASS> ");
+  else {
+    nr_failed_routines++;
+    printf("FAIL> ");
+  }
+  nr_routines++;
+
+  if (min_ratio == 1e308)
+    min_ratio = 0.0;
+
+  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n", fname,
+	 total_bad_ratios, total_tests, max_ratio);
+  min_ratio = 1e308;
+  max_ratio = 0.0;
+  total_bad_ratios = 0;
+  total_tests = 0;
+  fname = "BLAS_cgbmv2_s_s_x";
+  printf("Testing %s...\n", fname);
+  for (i = 0; i < nsizes; i++) {
+    m = mn_pairs[i][0];
+    n = mn_pairs[i][1];
+    total_max_ratio =
+      do_test_cgbmv2_s_s_x(m, n, 1, &seed, thresh, debug, test_prob,
+			   &total_min_ratio, &num_bad_ratio, &num_tests);
+    if (total_max_ratio > max_ratio)
+      max_ratio = total_max_ratio;
+
+    if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
+      min_ratio = total_min_ratio;
+
+    total_bad_ratios += num_bad_ratio;
+    total_tests += num_tests;
+  }
+
+  if (total_bad_ratios == 0)
+    printf("PASS> ");
+  else {
+    nr_failed_routines++;
+    printf("FAIL> ");
+  }
+  nr_routines++;
+
+  if (min_ratio == 1e308)
+    min_ratio = 0.0;
+
+  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n", fname,
+	 total_bad_ratios, total_tests, max_ratio);
+  min_ratio = 1e308;
+  max_ratio = 0.0;
+  total_bad_ratios = 0;
+  total_tests = 0;
+  fname = "BLAS_zgbmv2_z_d_x";
+  printf("Testing %s...\n", fname);
+  for (i = 0; i < nsizes; i++) {
+    m = mn_pairs[i][0];
+    n = mn_pairs[i][1];
+    total_max_ratio =
+      do_test_zgbmv2_z_d_x(m, n, 1, &seed, thresh, debug, test_prob,
+			   &total_min_ratio, &num_bad_ratio, &num_tests);
+    if (total_max_ratio > max_ratio)
+      max_ratio = total_max_ratio;
+
+    if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
+      min_ratio = total_min_ratio;
+
+    total_bad_ratios += num_bad_ratio;
+    total_tests += num_tests;
+  }
+
+  if (total_bad_ratios == 0)
+    printf("PASS> ");
+  else {
+    nr_failed_routines++;
+    printf("FAIL> ");
+  }
+  nr_routines++;
+
+  if (min_ratio == 1e308)
+    min_ratio = 0.0;
+
+  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n", fname,
+	 total_bad_ratios, total_tests, max_ratio);
+  min_ratio = 1e308;
+  max_ratio = 0.0;
+  total_bad_ratios = 0;
+  total_tests = 0;
+  fname = "BLAS_zgbmv2_d_z_x";
+  printf("Testing %s...\n", fname);
+  for (i = 0; i < nsizes; i++) {
+    m = mn_pairs[i][0];
+    n = mn_pairs[i][1];
+    total_max_ratio =
+      do_test_zgbmv2_d_z_x(m, n, 1, &seed, thresh, debug, test_prob,
+			   &total_min_ratio, &num_bad_ratio, &num_tests);
+    if (total_max_ratio > max_ratio)
+      max_ratio = total_max_ratio;
+
+    if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
+      min_ratio = total_min_ratio;
+
+    total_bad_ratios += num_bad_ratio;
+    total_tests += num_tests;
+  }
+
+  if (total_bad_ratios == 0)
+    printf("PASS> ");
+  else {
+    nr_failed_routines++;
+    printf("FAIL> ");
+  }
+  nr_routines++;
+
+  if (min_ratio == 1e308)
+    min_ratio = 0.0;
+
+  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n", fname,
+	 total_bad_ratios, total_tests, max_ratio);
+  min_ratio = 1e308;
+  max_ratio = 0.0;
+  total_bad_ratios = 0;
+  total_tests = 0;
+  fname = "BLAS_zgbmv2_d_d_x";
+  printf("Testing %s...\n", fname);
+  for (i = 0; i < nsizes; i++) {
+    m = mn_pairs[i][0];
+    n = mn_pairs[i][1];
+    total_max_ratio =
+      do_test_zgbmv2_d_d_x(m, n, 1, &seed, thresh, debug, test_prob,
+			   &total_min_ratio, &num_bad_ratio, &num_tests);
+    if (total_max_ratio > max_ratio)
+      max_ratio = total_max_ratio;
+
+    if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
+      min_ratio = total_min_ratio;
+
+    total_bad_ratios += num_bad_ratio;
+    total_tests += num_tests;
+  }
+
+  if (total_bad_ratios == 0)
+    printf("PASS> ");
+  else {
+    nr_failed_routines++;
+    printf("FAIL> ");
+  }
+  nr_routines++;
+
+  if (min_ratio == 1e308)
+    min_ratio = 0.0;
+
+  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n", fname,
+	 total_bad_ratios, total_tests, max_ratio);
+
+
+  printf("\n");
+  if (nr_failed_routines)
+    printf("FAILED ");
+  else
+    printf("PASSED ");
+  printf("%-10s: FAIL/TOTAL = %d/%d\n",
+	 base_routine, nr_failed_routines, nr_routines);
+
+  return 0;
+}