/*
   DGELSS.C
   LAPACK matrix solver using SVD.

   $Id$
 */

#include "dg.h"


/*-----prototypes of functions defined here-----*/
extern void dlabrd( long m, long n, long nb, double a[], long lda,
		   double d[], double e[], double tauq[], double taup[],
		   double x[], long ldx, double y[],long ldy );
extern void dgebd2( long m, long n, double a[], long lda,
		   double d[], double e[], double tauq[], double taup[],
		   double work[], long *info );
extern void dgebrd( long m, long n, double a[], long lda,
		   double d[], double e[], double tauq[], double taup[],
		   double work[], long lwork,long *info );
extern void dormbr( char vect, char side, char trans, long m, long n, long k,
		   double a[], long lda, double tau[], double c[],long ldc,
		   double work[], long lwork, long *info );
extern void dorg2r( long m, long n, long k, double a[], long lda,
		   double tau[], double work[], long *info );
extern void dorgqr( long m, long n, long k, double a[], long lda,
		   double tau[], double work[], long lwork, long *info );
extern void dorgl2( long m, long n, long k, double a[], long lda,
		   double tau[], double work[], long *info );
extern void dorglq( long m, long n, long k, double a[], long lda,
		   double tau[], double work[], long lwork, long *info );
extern void dorgbr( char vect, long m, long n, long k, double a[], long lda,
		   double tau[], double work[], long lwork, long *info );
extern void dlartg( double f, double g, double *cs, double *sn, double *r );
extern void dlasr( char side, char pivot, char direct, long m, long n,
		  double c[], double s[], double a[], long lda );
extern void dlasv2( double f, double g, double h,
		   double *ssmin, double *ssmax, double *snr,
		   double *csr, double *snl, double *csl );
extern void dlas2( double f, double g, double h,
		  double *ssmin, double *ssmax );
extern void dbdsqr( char uplo, long n, long ncvt, long nru, long ncc,
		   double d[], double e[], double vt[], long ldvt,
		   double u[],long ldu, double c[], long ldc,
		   double work[], long *info );
extern void dlacpy( char uplo, long m, long n, double a[], long lda,
		   double b[], long ldb );
extern void dgelss( long m, long n, long nrhs, double a[], long lda,
		   double b[], long ldb, double s[], double rcond,
		   long *rank,double work[], long lwork, long *info );
/*-----end of prototypes-----*/




/*---other lapack routines---*/
extern double   dlange( char norm, long m, long n, double a[], long lda,
		       double work[] );
extern double   dlamch( char cmach );
extern void dlabad( double *small, double *large );
extern long ilaenv( long ispec, char *name, char *opts,
		   long n1, long n2, long n3,long n4 );

extern void drscl( long n, double sa, double sx[], long incx );
extern void dlarft( char direct, char storev, long n, long k,
		   double v[], long ldv, double tau[], double t[], long ldt );
extern void dlarfb( char side, char trans, char direct, char storev,
		   long m, long n, long k, double v[], long ldv,
		   double t[], long ldt, double c[], long ldc,
		   double work[], long ldwork );
extern void dlarfg( long n, double *alpha, double x[], long incx,
		   double *tau );
extern void dlarf( char side, long m, long n, double v[], long incv,
		  double tau, double c[], long ldc, double work[] );
extern void dormqr( char side, char trans, long m, long n, long k,
		   double a[], long lda, double tau[], double c[], long ldc,
		   double work[], long lwork, long *info );
extern void dormlq( char side, char trans, long m, long n, long k,
		   double a[], long lda, double tau[], double c[], long ldc,
		   double work[], long lwork, long *info );
extern void dgelqf( long m, long n, double a[], long lda, double tau[],
		   double work[], long lwork, long *info );
extern void dgeqrf( long m, long n, double a[], long lda, double tau[],
		   double work[], long lwork, long *info );
extern void dlaset( char uplo, long m, long n, double alpha, double beta,
		   double a[], long lda );
extern void dlascl( char type, long kl, long ku, double cfrom, double cto,
		   long m, long n, double a[], long lda, long *info );



/*---blas routines---*/
extern void dgemv ( char trans, long m, long n, double alpha,
		   double a[], long lda, double x[], long incx,
		   double beta, double y[], long incy );
extern void dgemm ( char transa, char transb, long m, long n, long k,
		   double alpha, double a[], long lda, double b[], long ldb,
		   double beta, double c[], long ldc );
extern void  dscal(long n,double da,double dx[],long incx);
extern void  drot (long n,double dx[],long incx,double dy[],long incy,
		   double c,double s);
extern void  dswap (long n,double dx[],long incx,double dy[],long incy);
extern void  dcopy(long n,double dx[],long incx,double dy[],long incy);



/*---converted nutty string switches to single characters (lower case)---*/
#define lsame(x,y) ((x)==(y))

extern void xerbla(char *,long);



/*-----Fortran intrinsics converted-----*/
extern double pow(double,double);  /* used only to take 1/8th power */
#define abs(x) ((x)>=0?(x):-(x))
extern double sqrt(double);
#define dble(x) ((double)x)
#define sign(x,y) ((((x)<0)!=((y)<0))?-(x):(x))
#define min(x,y) ((x)<(y)?(x):(y))
#define max(x,y) ((x)>(y)?(x):(y))
/*-----end of Fortran intrinsics-----*/



void dgelss( long m, long n, long nrhs, double a[], long lda,
	    double b[], long ldb, double s[], double rcond,
	    long *rank,double work[], long lwork, long *info )
{
  /**
   *  -- LAPACK driver routine (version 1.1) --
   *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
   *     Courant Institute, Argonne National Lab, and Rice University
   *     March 31, 1993
   *
   *     .. Scalar Arguments ..*/
  /**     ..
   *     .. Array Arguments ..*/
#undef work_1
#define work_1(a1) work[a1-1]
#undef s_1
#define s_1(a1) s[a1-1]
#undef b_2
#define b_2(a1,a2) b[a1-1+ldb*(a2-1)]
#undef a_2
#define a_2(a1,a2) a[a1-1+lda*(a2-1)]
  /**     ..
   *
   *  Purpose
   *  =======
   *
   *  DGELSS computes the minimum norm solution to a real linear least
   *  squares problem:
   *
   *  Minimize 2-norm(| b - A*x |).
   *
   *  using the singular value decomposition (SVD) of A. A is an M-by-N
   *  matrix which may be rank-deficient.
   *
   *  Several right hand side vectors b and solution vectors x can be
   *  handled in a single call; they are stored as the columns of the
   *  M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix
   *  X.
   *
   *  The effective rank of A is determined by treating as zero those
   *  singular values which are less than RCOND times the largest singular
   *  value.
   *
   *  Arguments
   *  =========
   *
   *  M       (input) INTEGER
   *          The number of rows of the matrix A. M >= 0.
   *
   *  N       (input) INTEGER
   *          The number of columns of the matrix A. N >= 0.
   *
   *  NRHS    (input) INTEGER
   *          The number of right hand sides, i.e., the number of columns
   *          of the matrices B and X. NRHS >= 0.
   *
   *  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
   *          On entry, the M-by-N matrix A.
   *          On exit, the first min(m,n) rows of A are overwritten with
   *          its right singular vectors, stored rowwise.
   *
   *  LDA     (input) INTEGER
   *          The leading dimension of the array A.  LDA >= max(1,M).
   *
   *  B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
   *          On entry, the M-by-NRHS right hand side matrix B.
   *          On exit, B is overwritten by the N-by-NRHS solution
   *          matrix X.  If m >= n and RANK = n, the residual
   *          sum-of-squares for the solution in the i-th column is given
   *          by the sum of squares of elements n+1:m in that column.
   *
   *  LDB     (input) INTEGER
   *          The leading dimension of the array B. LDB >= max(1,MAX(M,N)).
   *
   *  S       (output) DOUBLE PRECISION array, dimension (min(M,N))
   *          The singular values of A in decreasing order.
   *          The condition number of A in the 2-norm = S(1)/S(min(m,n)).
   *
   *  RCOND   (input) DOUBLE PRECISION
   *          RCOND is used to determine the effective rank of A.
   *          Singular values S(i) <= RCOND*S(1) are treated as zero.
   *          If RCOND $<$ 0, machine precision is used instead.
   *
   *  RANK    (output) INTEGER
   *          The effective rank of A, i.e., the number of singular values
   *          which are greater than RCOND*S(1).
   *
   *  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
   *          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
   *
   *  LWORK   (input) INTEGER
   *          The dimension of the array WORK. LWORK >= 1, and also:
   *          LWORK >= 3*N+MAX(2*N,NRHS,M) if M >= N,
   *          LWORK >= 3*M+MAX(2*M,NRHS,N) if M < N.
   *          For good performance, LWORK should generally be larger.
   *
   *  INFO    (output) INTEGER
   *          = 0:  successful exit
   *          < 0:  if INFO = -i, the i-th argument had an illegal value.
   *          > 0:  the algorithm for computing the SVD failed to converge;
   *                if INFO = i, i off-diagonal elements of an intermediate
   *                bidiagonal form did not converge to zero.
   *
   *  =====================================================================
   *
   *     .. Parameters ..*/
#undef zero
#define zero 0.0e0
#undef one
#define one 1.0e0
  /**     ..
   *     .. Local Scalars ..*/
  long    bl, chunk, i, iascl, ibscl, ie, il, itau,
          itaup, itauq, iwork, ldwork, maxmn, maxwrk=0,
          minmn, minwrk, mm, mnthr;
  double    anrm, bignum, bnrm, eps, sfmin, smlnum, thr;
  /**     ..
   *     .. Local Arrays ..*/
  double    vdum[1];
#undef vdum_1
#define vdum_1(a1) [a1-1]
  /**     ..
   *     .. Intrinsic Functions ..*/
  /*      intrinsic          max, min;*/
  /**     ..
   *     .. Executable Statements ..
   *
   *     Test the input arguments
   **/
  /*-----implicit-declarations-----*/
  /*-----end-of-declarations-----*/
  *info = 0;
  minmn = min( m, n );
  maxmn = max( m, n );
  mnthr = ilaenv( 6, "dgelss", " ", m, n, nrhs, -1 );
  if( m<0 ) {
    *info = -1;
  } else if( n<0 ) {
    *info = -2;
  } else if( nrhs<0 ) {
    *info = -3;
  } else if( lda<max( 1, m ) ) {
    *info = -5;
  } else if( ldb<max( 1, maxmn ) ) {
    *info = -7;
  }
  /**
   *     Compute workspace
   *      (Note: Comments in the code beginning "Workspace:" describe the
   *       minimal amount of workspace needed at that point in the code,
   *       as well as the preferred amount for good performance.
   *       NB refers to the optimal block size for the immediately
   *       following subroutine, as returned by ILAENV.)
   **/
  minwrk = 1;
  if( *info==0 && lwork>=1 ) {
    maxwrk = 0;
    mm = m;
    if( m>=n && m>=mnthr ) {
      /**
       *           Path 1a - overdetermined, with many more rows than columns
       **/
      mm = n;
      maxwrk = max( maxwrk, n+n*ilaenv( 1, "dgeqrf", " ", m, n,
				       -1, -1 ) );
      maxwrk = max( maxwrk, n+nrhs*
		   ilaenv( 1, "dormqr", "lt", m, nrhs, n, -1 ) );
    }
    if( m>=n ) {
      /**
       *           Path 1 - overdetermined or exactly determined
       **/
      maxwrk = max( maxwrk, 3*n+( mm+n )*
		   ilaenv( 1, "dgebrd", " ", mm, n, -1, -1 ) );
      maxwrk = max( maxwrk, 3*n+nrhs*
		   ilaenv( 1, "dormbr", "qlt", mm, nrhs, n, -1 ) );
      maxwrk = max( maxwrk, 3*n+( n-1 )*
		   ilaenv( 1, "dorgbr", "p", n, n, n, -1 ) );
      maxwrk = max( maxwrk, 5*n-4 );
      maxwrk = max( maxwrk, n*nrhs );
      minwrk = max( max(3*n+mm, 3*n+nrhs), 5*n-4 );
    }
    if( n>m ) {
      minwrk = max( max(3*m+nrhs, 3*m+n), 5*m-4 );
      if( n>=mnthr ) {
	/**
	 *              Path 2a - underdetermined, with many more columns
	 *              than rows
	 **/
	maxwrk = m + m*ilaenv( 1, "dgelqf", " ", m, n, -1, -1 );
	maxwrk = max( maxwrk, m*m+4*m+2*m*
		     ilaenv( 1, "dgebrd", " ", m, m, -1, -1 ) );
	maxwrk = max( maxwrk, m*m+4*m+nrhs*
		     ilaenv( 1, "dormbr", "qlt", m, nrhs, m, -1 ) );
	maxwrk = max( maxwrk, m*m+4*m+( m-1 )*
		     ilaenv( 1, "dorgbr", "p", m, m, m, -1 ) );
	maxwrk = max( maxwrk, m*m+6*m-4 );
	if( nrhs>1 ) {
	  maxwrk = max( maxwrk, m*m+m+m*nrhs );
	} else {
	  maxwrk = max( maxwrk, m*m+2*m );
	}
	maxwrk = max( maxwrk, m+nrhs*
		     ilaenv( 1, "dormlq", "lt", n, nrhs, m, -1 ) );
      } else {
	/**
	 *              Path 2 - underdetermined
	 **/
	maxwrk = 3*m + ( n+m )*ilaenv( 1, "dgebrd", " ", m, n,
				      -1, -1 );
	maxwrk = max( maxwrk, 3*m+nrhs*
		     ilaenv( 1, "dormbr", "qlt", m, nrhs, m, -1 ) );
	maxwrk = max( maxwrk, 3*m+m*
		     ilaenv( 1, "dorgbr", "p", m, n, m, -1 ) );
	maxwrk = max( maxwrk, 5*m-4 );
	maxwrk = max( maxwrk, n*nrhs );
      }
    }
    minwrk = min( minwrk, maxwrk );
    work_1( 1 ) = maxwrk;
  }

  minwrk = max( minwrk, 1 );
  if( lwork<minwrk )
    *info = -12;
  if( *info!=0 ) {
    xerbla( "dgelss", -*info );
    return;
  }
  /**
   *     Quick return if possible
   **/
  if( m==0 || n==0 ) {
    *rank = 0;
    return;
  }
  /**
   *     Get machine parameters
   **/
  eps = dlamch( 'p' );
  sfmin = dlamch( 's' );
  smlnum = sfmin / eps;
  bignum = one / smlnum;
  dlabad( &smlnum, &bignum );
  /**
   *     Scale A if max entry outside range [SMLNUM,BIGNUM]
   **/
  anrm = dlange( 'm', m, n, a, lda, work );
  iascl = 0;
  if( anrm>zero && anrm<smlnum ) {
    /**
     *        Scale matrix norm up to SMLNUM
     **/
    dlascl( 'g', 0, 0, anrm, smlnum, m, n, a, lda, info );
    iascl = 1;
  } else if( anrm>bignum ) {
    /**
     *        Scale matrix norm down to BIGNUM
     **/
    dlascl( 'g', 0, 0, anrm, bignum, m, n, a, lda, info );
    iascl = 2;
  } else if( anrm==zero ) {
    /**
     *        Matrix all zero. Return zero solution.
     **/
    dlaset( 'f', max( m, n ), nrhs, zero, zero, b, ldb );
    dlaset( 'f', minmn, 1, zero, zero, s, 1 );
    *rank = 0;
    goto L_70;
  }
  /**
   *     Scale B if max entry outside range [SMLNUM,BIGNUM]
   **/
  bnrm = dlange( 'm', m, nrhs, b, ldb, work );
  ibscl = 0;
  if( bnrm>zero && bnrm<smlnum ) {
    /**
     *        Scale matrix norm up to SMLNUM
     **/
    dlascl( 'g', 0, 0, bnrm, smlnum, m, nrhs, b, ldb, info );
    ibscl = 1;
  } else if( bnrm>bignum ) {
    /**
     *        Scale matrix norm down to BIGNUM
     **/
    dlascl( 'g', 0, 0, bnrm, bignum, m, nrhs, b, ldb, info );
    ibscl = 2;
  }
  /**
   *     Overdetermined case
   **/
  if( m>=n ) {
    /**
     *        Path 1 - overdetermined or exactly determined
     **/
    mm = m;
    if( m>=mnthr ) {
      /**
       *           Path 1a - overdetermined, with many more rows than columns
       **/
      mm = n;
      itau = 1;
      iwork = itau + n;
      /**
       *           Compute A=Q*R
       *           (Workspace: need 2*N, prefer N+N*NB)
       **/
      dgeqrf( m, n, a, lda, &work_1( itau ), &work_1( iwork ),
	     lwork-iwork+1, info );
      /**
       *           Multiply B by transpose(Q)
       *           (Workspace: need N+NRHS, prefer N+NRHS*NB)
       **/
      dormqr( 'l', 't', m, nrhs, n, a, lda, &work_1( itau ), b,
	     ldb, &work_1( iwork ), lwork-iwork+1, info );
      /**
       *           Zero out below R
       **/
      if( n>1 )
	dlaset( 'l', n-1, n-1, zero, zero, &a_2( 2, 1 ), lda );
    }

    ie = 1;
    itauq = ie + n;
    itaup = itauq + n;
    iwork = itaup + n;
    /**
     *        Bidiagonalize R in A
     *        (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB)
     **/
    dgebrd( mm, n, a, lda, s, &work_1( ie ), &work_1( itauq ),
	   &work_1( itaup ), &work_1( iwork ), lwork-iwork+1,
	   info );
    /**
     *        Multiply B by transpose of left bidiagonalizing vectors of R
     *        (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB)
     **/
    dormbr( 'q', 'l', 't', mm, nrhs, n, a, lda, &work_1( itauq ),
	   b, ldb, &work_1( iwork ), lwork-iwork+1, info );
    /**
     *        Generate right bidiagonalizing vectors of R in A
     *        (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
     **/
    dorgbr( 'p', n, n, n, a, lda, &work_1( itaup ),
	   &work_1( iwork ), lwork-iwork+1, info );
    iwork = ie + n;
    /**
     *        Perform bidiagonal QR iteration
     *          multiply B by transpose of left singular vectors
     *          compute right singular vectors in A
     *        (Workspace: need 5*N-4)
     **/
    dbdsqr( 'u', n, n, 0, nrhs, s, &work_1( ie ), a, lda, vdum,
	   1, b, ldb, &work_1( iwork ), info );
    if( *info!=0 )
      goto L_70;
    /**
     *        Multiply B by reciprocals of singular values
     **/
    thr = max( rcond*s_1( 1 ), sfmin );
    if( thr<zero )
      thr = max( eps*s_1( 1 ), sfmin );
    *rank = 0;
    for (i=1 ; i<=n ; i+=1) {
      if( s_1( i )>thr ) {
	drscl( nrhs, s_1( i ), &b_2( i, 1 ), ldb );
	*rank = *rank + 1;
      } else {
	dlaset( 'f', 1, nrhs, zero, zero, &b_2( i, 1 ), ldb );
      }
    }
    /**
     *        Multiply B by right singular vectors
     *        (Workspace: need N, prefer N*NRHS)
     **/
    if( lwork>=ldb*nrhs && nrhs>1 ) {
      dgemm( 't', 'n', n, nrhs, n, one, a, lda, b, ldb, zero,
	    work, ldb );
      dlacpy( 'g', n, nrhs, work, ldb, b, ldb );
    } else if( nrhs>1 ) {
      chunk = lwork / n;
      for (i=1 ; chunk>0?i<=nrhs:i>=nrhs ; i+=chunk) {
	bl = min( nrhs-i+1, chunk );
	dgemm( 't', 'n', n, bl, n, one, a, lda, b, ldb,
	      zero, work, n );
	dlacpy( 'g', n, bl, work, n, b, ldb );
      }
    } else {
      dgemv( 't', n, n, one, a, lda, b, 1, zero, work, 1 );
      dcopy( n, work, 1, b, 1 );
    }

  } else if( n>=mnthr && lwork>=4*m+m*m+
	    max( max(m, 2*m-4), max(nrhs, n-3*m) ) ) {
    /**
     *        Path 2a - underdetermined, with many more columns than rows
     *        and sufficient workspace for an efficient algorithm
     **/
    ldwork = m;
    if( lwork>=max( 4*m+m*lda+max( max(m, 2*m-4), max(nrhs, n-3*m) ),
		   m*lda+m+m*nrhs ) )ldwork = lda;
    itau = 1;
    iwork = m + 1;
    /**
     *        Compute A=L*Q
     *        (Workspace: need 2*M, prefer M+M*NB)
     **/
    dgelqf( m, n, a, lda, &work_1( itau ), &work_1( iwork ),
	   lwork-iwork+1, info );
    il = iwork;
    /**
     *        Copy L to WORK(IL), zeroing out above it
     **/
    dlacpy( 'l', m, m, a, lda, &work_1( il ), ldwork );
    dlaset( 'u', m-1, m-1, zero, zero, &work_1( il+ldwork ),
	   ldwork );
    ie = il + ldwork*m;
    itauq = ie + m;
    itaup = itauq + m;
    iwork = itaup + m;
    /**
     *        Bidiagonalize L in WORK(IL)
     *        (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB)
     **/
    dgebrd( m, m, &work_1( il ), ldwork, s, &work_1( ie ),
	   &work_1( itauq ), &work_1( itaup ), &work_1( iwork ),
	   lwork-iwork+1, info );
    /**
     *        Multiply B by transpose of left bidiagonalizing vectors of L
     *        (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB)
     **/
    dormbr( 'q', 'l', 't', m, nrhs, m, &work_1( il ), ldwork,
	   &work_1( itauq ), b, ldb, &work_1( iwork ),
	   lwork-iwork+1, info );
    /**
     *        Generate right bidiagonalizing vectors of R in WORK(IL)
     *        (Workspace: need M*M+5*M-1, prefer M*M+4*M+(M-1)*NB)
     **/
    dorgbr( 'p', m, m, m, &work_1( il ), ldwork, &work_1( itaup ),
	   &work_1( iwork ), lwork-iwork+1, info );
    iwork = ie + m;
    /**
     *        Perform bidiagonal QR iteration,
     *           computing right singular vectors of L in WORK(IL) and
     *           multiplying B by transpose of left singular vectors
     *        (Workspace: need M*M+6*M-4)
     **/
    dbdsqr( 'u', m, m, 0, nrhs, s, &work_1( ie ), &work_1( il ),
	   ldwork, a, lda, b, ldb, &work_1( iwork ), info );
    if( *info!=0 )
      goto L_70;
    /**
     *        Multiply B by reciprocals of singular values
     **/
    thr = max( rcond*s_1( 1 ), sfmin );
    if( thr<zero )
      thr = max( eps*s_1( 1 ), sfmin );
    *rank = 0;
    for (i=1 ; i<=m ; i+=1) {
      if( s_1( i )>thr ) {
	drscl( nrhs, s_1( i ), &b_2( i, 1 ), ldb );
	*rank = *rank + 1;
      } else {
	dlaset( 'f', 1, nrhs, zero, zero, &b_2( i, 1 ), ldb );
      }
    }
    iwork = ie;
    /**
     *        Multiply B by right singular vectors of L in WORK(IL)
     *        (Workspace: need M*M+2*M, prefer M*M+M+M*NRHS)
     **/
    if( lwork>=ldb*nrhs+iwork-1 && nrhs>1 ) {
      dgemm( 't', 'n', m, nrhs, m, one, &work_1( il ), ldwork,
	    b, ldb, zero, &work_1( iwork ), ldb );
      dlacpy( 'g', m, nrhs, &work_1( iwork ), ldb, b, ldb );
    } else if( nrhs>1 ) {
      chunk = ( lwork-iwork+1 ) / m;
      for (i=1 ; chunk>0?i<=nrhs:i>=nrhs ; i+=chunk) {
	bl = min( nrhs-i+1, chunk );
	dgemm( 't', 'n', m, bl, m, one, &work_1( il ), ldwork,
	      &b_2( 1, i ), ldb, zero, &work_1( iwork ), n );
	dlacpy( 'g', m, bl, &work_1( iwork ), n, b, ldb );
      }
    } else {
      dgemv( 't', m, m, one, &work_1( il ), ldwork, &b_2( 1, 1 ),
	    1, zero, &work_1( iwork ), 1 );
      dcopy( m, &work_1( iwork ), 1, &b_2( 1, 1 ), 1 );
    }
    /**
     *        Zero out below first M rows of B
     **/
    dlaset( 'f', n-m, nrhs, zero, zero, &b_2( m+1, 1 ), ldb );
    iwork = itau + m;
    /**
     *        Multiply transpose(Q) by B
     *        (Workspace: need M+NRHS, prefer M+NRHS*NB)
     **/
    dormlq( 'l', 't', n, nrhs, m, a, lda, &work_1( itau ), b,
	   ldb, &work_1( iwork ), lwork-iwork+1, info );

  } else {
    /**
     *        Path 2 - remaining underdetermined cases
     **/
    ie = 1;
    itauq = ie + m;
    itaup = itauq + m;
    iwork = itaup + m;
    /**
     *        Bidiagonalize A
     *        (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
     **/
    dgebrd( m, n, a, lda, s, &work_1( ie ), &work_1( itauq ),
	   &work_1( itaup ), &work_1( iwork ), lwork-iwork+1,
	   info );
    /**
     *        Multiply B by transpose of left bidiagonalizing vectors
     *        (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB)
     **/
    dormbr( 'q', 'l', 't', m, nrhs, n, a, lda, &work_1( itauq ),
	   b, ldb, &work_1( iwork ), lwork-iwork+1, info );
    /**
     *        Generate right bidiagonalizing vectors in A
     *        (Workspace: need 4*M, prefer 3*M+M*NB)
     **/
    dorgbr( 'p', m, n, m, a, lda, &work_1( itaup ),
	   &work_1( iwork ), lwork-iwork+1, info );
    iwork = ie + m;
    /**
     *        Perform bidiagonal QR iteration,
     *           computing right singular vectors of A in A and
     *           multiplying B by transpose of left singular vectors
     *        (Workspace: need 5*M-4)
     **/
    dbdsqr( 'l', m, n, 0, nrhs, s, &work_1( ie ), a, lda, vdum,
	   1, b, ldb, &work_1( iwork ), info );
    if( *info!=0 )
      goto L_70;
    /**
     *        Multiply B by reciprocals of singular values
     **/
    thr = max( rcond*s_1( 1 ), sfmin );
    if( thr<zero )
      thr = max( eps*s_1( 1 ), sfmin );
    *rank = 0;
    for (i=1 ; i<=m ; i+=1) {
      if( s_1( i )>thr ) {
	drscl( nrhs, s_1( i ), &b_2( i, 1 ), ldb );
	*rank = *rank + 1;
      } else {
	dlaset( 'f', 1, nrhs, zero, zero, &b_2( i, 1 ), ldb );
      }
    }
    /**
     *        Multiply B by right singular vectors of A
     *        (Workspace: need N, prefer N*NRHS)
     **/
    if( lwork>=ldb*nrhs && nrhs>1 ) {
      dgemm( 't', 'n', n, nrhs, m, one, a, lda, b, ldb, zero,
	    work, ldb );
      dlacpy( 'f', n, nrhs, work, ldb, b, ldb );
    } else if( nrhs>1 ) {
      chunk = lwork / n;
      for (i=1 ; chunk>0?i<=nrhs:i>=nrhs ; i+=chunk) {
	bl = min( nrhs-i+1, chunk );
	dgemm( 't', 'n', n, bl, m, one, a, lda, &b_2( 1, i ),
	      ldb, zero, work, n );
	dlacpy( 'f', n, bl, work, n, &b_2( 1, i ), ldb );
      }
    } else {
      dgemv( 't', m, n, one, a, lda, b, 1, zero, work, 1 );
      dcopy( n, work, 1, b, 1 );
    }
  }
  /**
   *     Undo scaling
   **/
  if( iascl==1 ) {
    dlascl( 'g', 0, 0, anrm, smlnum, n, nrhs, b, ldb, info );
    dlascl( 'g', 0, 0, smlnum, anrm, minmn, 1, s, minmn,
	   info );
  } else if( iascl==2 ) {
    dlascl( 'g', 0, 0, anrm, bignum, n, nrhs, b, ldb, info );
    dlascl( 'g', 0, 0, bignum, anrm, minmn, 1, s, minmn,
	   info );
  }
  if( ibscl==1 ) {
    dlascl( 'g', 0, 0, smlnum, bnrm, n, nrhs, b, ldb, info );
  } else if( ibscl==2 ) {
    dlascl( 'g', 0, 0, bignum, bnrm, n, nrhs, b, ldb, info );
  }

 L_70:
  work_1( 1 ) = maxwrk;
  return;
  /**
   *     End of DGELSS
   **/
}



void dlacpy( char uplo, long m, long n, double a[], long lda,
	    double b[], long ldb )
{
  /**
   *  -- LAPACK auxiliary routine (version 1.1) --
   *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
   *     Courant Institute, Argonne National Lab, and Rice University
   *     February 29, 1992
   *
   *     .. Scalar Arguments ..*/
  /**     ..
   *     .. Array Arguments ..*/
#undef b_2
#define b_2(a1,a2) b[a1-1+ldb*(a2-1)]
#undef a_2
#define a_2(a1,a2) a[a1-1+lda*(a2-1)]
  /**     ..
   *
   *  Purpose
   *  =======
   *
   *  DLACPY copies all or part of a two-dimensional matrix A to another
   *  matrix B.
   *
   *  Arguments
   *  =========
   *
   *  UPLO    (input) CHARACTER*1
   *          Specifies the part of the matrix A to be copied to B.
   *          = 'U':      Upper triangular part
   *          = 'L':      Lower triangular part
   *          Otherwise:  All of the matrix A
   *
   *  M       (input) INTEGER
   *          The number of rows of the matrix A.  M >= 0.
   *
   *  N       (input) INTEGER
   *          The number of columns of the matrix A.  N >= 0.
   *
   *  A       (input) DOUBLE PRECISION array, dimension (LDA,N)
   *          The m by n matrix A.  If UPLO = 'U', only the upper triangle
   *          or trapezoid is accessed; if UPLO = 'L', only the lower
   *          triangle or trapezoid is accessed.
   *
   *  LDA     (input) INTEGER
   *          The leading dimension of the array A.  LDA >= max(1,M).
   *
   *  B       (output) DOUBLE PRECISION array, dimension (LDB,N)
   *          On exit, B = A in the locations specified by UPLO.
   *
   *  LDB     (input) INTEGER
   *          The leading dimension of the array B.  LDB >= max(1,M).
   *
   *  =====================================================================
   *
   *     .. Local Scalars ..*/
  long            i, j;
  /**     ..
   *     .. Intrinsic Functions ..*/
  /*      intrinsic          min;*/
  /**     ..
   *     .. Executable Statements ..
   **/
  /*-----implicit-declarations-----*/
  /*-----end-of-declarations-----*/
  if( lsame( uplo, 'u' ) ) {
    for (j=1 ; j<=n ; j+=1) {
      for (i=1 ; i<=min( j, m ) ; i+=1) {
	b_2( i, j ) = a_2( i, j );
      }
    }
  } else if( lsame( uplo, 'l' ) ) {
    for (j=1 ; j<=n ; j+=1) {
      for (i=j ; i<=m ; i+=1) {
	b_2( i, j ) = a_2( i, j );
      }
    }
  } else {
    for (j=1 ; j<=n ; j+=1) {
      for (i=1 ; i<=m ; i+=1) {
	b_2( i, j ) = a_2( i, j );
      }
    }
  }
  return;
  /**
   *     End of DLACPY
   **/
}



void dbdsqr( char uplo, long n, long ncvt, long nru, long ncc,
	    double d[], double e[], double vt[], long ldvt,
	    double u[],long ldu, double c[], long ldc,
	    double work[], long *info )
{
  /**
   *  -- LAPACK routine (version 1.1) --
   *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
   *     Courant Institute, Argonne National Lab, and Rice University
   *     March 31, 1993
   *
   *     .. Scalar Arguments ..*/
  /**     ..
   *     .. Array Arguments ..*/
#undef work_1
#define work_1(a1) work[a1-1]
#undef vt_2
#define vt_2(a1,a2) vt[a1-1+ldvt*(a2-1)]
#undef u_2
#define u_2(a1,a2) u[a1-1+ldu*(a2-1)]
#undef e_1
#define e_1(a1) e[a1-1]
#undef d_1
#define d_1(a1) d[a1-1]
#undef c_2
#define c_2(a1,a2) c[a1-1+ldc*(a2-1)]
  /**     ..
   *
   *  Purpose
   *  =======
   *
   *  DBDSQR computes the singular value decomposition (SVD) of a real
   *  N-by-N (upper or lower) bidiagonal matrix B:  B = Q * S * P' (P'
   *  denotes the transpose of P), where S is a diagonal matrix with
   *  non-negative diagonal elements (the singular values of B), and Q
   *  and P are orthogonal matrices.
   *
   *  The routine computes S, and optionally computes U * Q, P' * VT,
   *  or Q' * C, for given real input matrices U, VT, and C.
   *
   *  See "Computing  Small Singular Values of Bidiagonal Matrices With
   *  Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan,
   *  LAPACK Working Note #3, for a detailed description of the algorithm.
   *
   *  Arguments
   *  =========
   *
   *  UPLO    (input) CHARACTER*1
   *          = 'U':  B is upper bidiagonal;
   *          = 'L':  B is lower bidiagonal.
   *
   *  N       (input) INTEGER
   *          The order of the matrix B.  N >= 0.
   *
   *  NCVT    (input) INTEGER
   *          The number of columns of the matrix VT. NCVT >= 0.
   *
   *  NRU     (input) INTEGER
   *          The number of rows of the matrix U. NRU >= 0.
   *
   *  NCC     (input) INTEGER
   *          The number of columns of the matrix C. NCC >= 0.
   *
   *  D       (input/output) DOUBLE PRECISION array, dimension (N)
   *          On entry, the n diagonal elements of the bidiagonal matrix B.
   *          On exit, if INFO=0, the singular values of B in decreasing
   *          order.
   *
   *  E       (input/output) DOUBLE PRECISION array, dimension (N-1)
   *          On entry, the (n-1) off-diagonal elements of the bidiagonal
   *          matrix B.
   *          On normal exit, E is destroyed.
   *
   *  VT      (input/output) DOUBLE PRECISION array, dimension (LDVT, NCVT)
   *          On entry, an N-by-NCVT matrix VT.
   *          On exit, VT is overwritten by P' * VT.
   *          VT is not referenced if NCVT = 0.
   *
   *  LDVT    (input) INTEGER
   *          The leading dimension of the array VT.
   *          LDVT >= max(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0.
   *
   *  U       (input/output) DOUBLE PRECISION array, dimension (LDU, N)
   *          On entry, an NRU-by-N matrix U.
   *          On exit, U is overwritten by U * Q.
   *          U is not referenced if NRU = 0.
   *
   *  LDU     (input) INTEGER
   *          The leading dimension of the array U.  LDU >= max(1,NRU).
   *
   *  C       (input/output) DOUBLE PRECISION array, dimension (LDC, NCC)
   *          On entry, an N-by-NCC matrix C.
   *          On exit, C is overwritten by Q' * C.
   *          C is not referenced if NCC = 0.
   *
   *  LDC     (input) INTEGER
   *          The leading dimension of the array C.
   *          LDC >= max(1,N) if NCC > 0; LDC >=1 if NCC = 0.
   *
   *  WORK    (workspace) DOUBLE PRECISION array, dimension
   *                      (MAX( 1, 4*N-4 ))
   *          WORK is not referenced if NCVT = NRU = NCC = 0.
   *
   *  INFO    (output) INTEGER
   *          = 0:  successful exit
   *          < 0:  If INFO = -i, the i-th argument had an illegal value
   *          > 0:  the algorithm did not converge; D and E contain the
   *                elements of a bidiagonal matrix which is orthogonally
   *                similar to the input matrix B;  if INFO = i, i
   *                elements of E have not converged to zero.
   *
   *  Internal Parameters
   *  ===================
   *
   *  TOLMUL  DOUBLE PRECISION, default = max(10,min(100,EPS**(-1/8)))
   *          TOLMUL controls the convergence criterion of the QR loop.
   *          If it is positive, TOLMUL*EPS is the desired relative
   *             precision in the computed singular values.
   *          If it is negative, abs(TOLMUL*EPS*sigma_max) is the
   *             desired absolute accuracy in the computed singular
   *             values (corresponds to relative accuracy
   *             abs(TOLMUL*EPS) in the largest singular value.
   *          abs(TOLMUL) should be between 1 and 1/EPS, and preferably
   *             between 10 (for fast convergence) and .1/EPS
   *             (for there to be some accuracy in the results).
   *          Default is to lose at either one eighth or 2 of the
   *             available decimal digits in each computed singular value
   *             (whichever is smaller).
   *
   *  MAXITR  INTEGER, default = 6
   *          MAXITR controls the maximum number of passes of the
   *          algorithm through its inner loop. The algorithms stops
   *          (and so fails to converge) if the number of passes
   *          through the inner loop exceeds MAXITR*N**2.
   *
   *  =====================================================================
   *
   *     .. Parameters ..*/
#undef zero
#define zero 0.0e0
#undef one
#define one 1.0e0
#undef negone
#define negone -1.0e0
#undef hndrth
#define hndrth 0.01e0
#undef ten
#define ten 10.0e0
#undef hndrd
#define hndrd 100.0e0
#undef meigth
#define meigth -0.125e0
#undef maxitr
#define maxitr 6
  /**     ..
   *     .. Local Scalars ..*/
  int            rotate;
  long     i, idir=0, irot, isub, iter, iuplo, j, job, ll=0,
           lll, m, maxit, nm1, nm12, nm13, oldll, oldm;
  double    abse, abss, cosl, cosr, cs, eps, f, g, gap,
            gmax, h, mu, oldcs, oldsn, r, shift, sigmn,
            sigmx, sinl, sinr, sll, smax, smin, sminl,
            sminlo=0.0, sminoa=0.0, sn, thresh, tol, tolmul, unfl;
  /**     ..
   *     .. Intrinsic Functions ..*/
  /*      intrinsic          abs, dble, max, min, sign, sqrt;*/
  /**     ..
   *     .. Executable Statements ..
   *
   *     Test the input parameters.
   **/
  /*-----implicit-declarations-----*/
  /*-----end-of-declarations-----*/
  *info = 0;
  iuplo = 0;
  if( lsame( uplo, 'u' ) )
    iuplo = 1;
  if( lsame( uplo, 'l' ) )
    iuplo = 2;
  if( iuplo==0 ) {
    *info = -1;
  } else if( n<0 ) {
    *info = -2;
  } else if( ncvt<0 ) {
    *info = -3;
  } else if( nru<0 ) {
    *info = -4;
  } else if( ncc<0 ) {
    *info = -5;
  } else if( ( ncvt==0 && ldvt<1 ) ||
	    ( ncvt>0 && ldvt<max( 1, n ) ) ) {
    *info = -9;
  } else if( ldu<max( 1, nru ) ) {
    *info = -11;
  } else if( ( ncc==0 && ldc<1 ) ||
	    ( ncc>0 && ldc<max( 1, n ) ) ) {
    *info = -13;
  }
  if( *info!=0 ) {
    xerbla( "dbdsqr", -*info );
    return;
  }
  if( n==0 )
    return;
  if( n==1 )
    goto L_190;
  /**
   *     ROTATE is true if any singular vectors desired, false otherwise
   **/
  rotate = ( ncvt>0 ) || ( nru>0 ) || ( ncc>0 );
  nm1 = n - 1;
  nm12 = nm1 + nm1;
  nm13 = nm12 + nm1;
  /**
   *     Get machine constants
   **/
  eps = dlamch( 'e'/*psilon*/ );
  unfl = dlamch( 's'/*afe minimum*/ );
  tolmul = max( ten, min( hndrd, pow(eps,meigth) ) );
  tol = tolmul*eps;
  /**
   *     If matrix lower bidiagonal, rotate to be upper bidiagonal
   *     by applying Givens rotations on the left
   **/
  if( iuplo==2 ) {
    for (i=1 ; i<=n - 1 ; i+=1) {
      dlartg( d_1( i ), e_1( i ), &cs, &sn, &r );
      d_1( i ) = r;
      e_1( i ) = sn*d_1( i+1 );
      d_1( i+1 ) = cs*d_1( i+1 );
      if( rotate ) {
	work_1( i ) = cs;
	work_1( nm1+i ) = sn;
      }
    }
    /**
     *        Update singular vectors if desired
     **/
    if( nru>0 )
      dlasr( 'r', 'v', 'f', nru, n, &work_1( 1 ), &work_1( n ), u,
	    ldu );
    if( ncc>0 )
      dlasr( 'l', 'v', 'f', n, ncc, &work_1( 1 ), &work_1( n ), c,
	    ldc );
  }
  /**
   *     Compute approximate maximum, minimum singular values
   **/
  smax = abs( d_1( n ) );
  for (i=1 ; i<=n - 1 ; i+=1) {
    smax = max( smax, max(abs( d_1( i ) ), abs( e_1( i ) )) );
  }
  sminl = zero;
  if( tol>=zero ) {
    sminoa = abs( d_1( 1 ) );
    if( sminoa==zero )
      goto L_40;
    mu = sminoa;
    for (i=2 ; i<=n ; i+=1) {
      mu = abs( d_1( i ) )*( mu / ( mu+abs( e_1( i-1 ) ) ) );
      sminoa = min( sminoa, mu );
      if( sminoa==zero )
	goto L_40;
    }
  L_40:
    sminoa = sminoa / sqrt( dble( n ) );
  }
  /**
   *     Prepare for main iteration loop for the singular values
   **/
  maxit = maxitr*n*n;
  iter = 0;
  oldll = -1;
  oldm = -1;
  if( ncc==0 && nru==0 && ncvt==0 ) {
    /**
     *        No singular vectors desired
     **/
    job = 0;
  } else {
    /**
     *        Singular vectors desired
     **/
    job = 1;
  }
  if( tol>=zero ) {
    /**
     *        Relative accuracy desired
     **/
    thresh = max( tol*sminoa, maxit*unfl );
  } else {
    /**
     *        Absolute accuracy desired
     **/
    thresh = max( abs( tol )*smax, maxit*unfl );
  }
  /**
   *     M points to last entry of unconverged part of matrix
   **/
  m = n;
  /**
   *     Begin main iteration loop
   **/
 L_50:
  /**
   *     Check for convergence or exceeding iteration count
   **/
  if( m<=1 )
    goto L_190;
  if( iter>maxit )
    goto L_230;
  /**
   *     Find diagonal block of matrix to work on
   **/
  if( tol<zero && abs( d_1( m ) )<=thresh )
    d_1( m ) = zero;
  smax = abs( d_1( m ) );
  smin = smax;
  for (lll=1 ; lll<=m ; lll+=1) {
    ll = m - lll;
    if( ll==0 )
      goto L_80;
    abss = abs( d_1( ll ) );
    abse = abs( e_1( ll ) );
    if( tol<zero && abss<=thresh )
      d_1( ll ) = zero;
    if( abse<=thresh )
      goto L_70;
    smin = min( smin, abss );
    smax = max( smax, max(abss, abse) );
  }
 L_70:
  e_1( ll ) = zero;
  /**
   *     Matrix splits since E(LL) = 0
   **/
  if( ll==m-1 ) {
    /**
     *        Convergence of bottom singular value, return to top of loop
     **/
    m = m - 1;
    goto L_50;
  }
 L_80:
  ll = ll + 1;
  /**
   *     E(LL) through E(M-1) are nonzero, E(LL-1) is zero
   **/
  if( ll==m-1 ) {
    /**
     *        2 by 2 block, handle separately
     **/
    dlasv2( d_1( m-1 ), e_1( m-1 ), d_1( m ), &sigmn, &sigmx, &sinr,
	   &cosr, &sinl, &cosl );
    d_1( m-1 ) = sigmx;
    e_1( m-1 ) = zero;
    d_1( m ) = sigmn;
    /**
     *        Compute singular vectors, if desired
     **/
    if( ncvt>0 )
      drot( ncvt, &vt_2( m-1, 1 ), ldvt, &vt_2( m, 1 ), ldvt, cosr,
	   sinr );
    if( nru>0 )
      drot( nru, &u_2( 1, m-1 ), 1, &u_2( 1, m ), 1, cosl, sinl );
    if( ncc>0 )
      drot( ncc, &c_2( m-1, 1 ), ldc, &c_2( m, 1 ), ldc, cosl,
	   sinl );
    m = m - 2;
    goto L_50;
  }
  /**
   *     If working on new submatrix, choose shift direction
   *     (from larger end diagonal entry towards smaller)
   **/
  if( ll>oldm || m<oldll ) {
    if( abs( d_1( ll ) )>=abs( d_1( m ) ) ) {
      /**
       *           Chase bulge from top (big end) to bottom (small end)
       **/
      idir = 1;
    } else {
      /**
       *           Chase bulge from bottom (big end) to top (small end)
       **/
      idir = 2;
    }
  }
  /**
   *     Apply convergence tests
   **/
  if( idir==1 ) {
    /**
     *        Run convergence test in forward direction
     *        First apply standard test to bottom of matrix
     **/
    if( abs( e_1( m-1 ) )<=abs( tol )*abs( d_1( m ) ) ||
       ( tol<zero && abs( e_1( m-1 ) )<=thresh ) ) {
      e_1( m-1 ) = zero;
      goto L_50;
    }

    if( tol>=zero ) {
      /**
       *           If relative accuracy desired,
       *           apply convergence criterion forward
       **/
      mu = abs( d_1( ll ) );
      sminl = mu;
      for (lll=ll ; lll<=m - 1 ; lll+=1) {
	if( abs( e_1( lll ) )<=tol*mu ) {
	  e_1( lll ) = zero;
	  goto L_50;
	}
	sminlo = sminl;
	mu = abs( d_1( lll+1 ) )*( mu / ( mu+abs( e_1( lll ) ) ) );
	sminl = min( sminl, mu );
      }
      /**
       *           If singular values only wanted, apply gap test to bottom
       *           end of matrix
       **/
      if( job==0 ) {
	gap = sminlo / sqrt( dble( m-ll ) ) - abs( d_1( m ) );
	if( gap>zero ) {
	  abss = abs( d_1( m ) );
	  abse = abs( e_1( m-1 ) );
	  gmax = max( gap, max(abss, abse) );
	  if( ( abse / gmax )*( abse / gmax )<=tol*( gap / gmax )*
	     ( abss / gmax ) ) {
	    e_1( m-1 ) = zero;
	    goto L_50;
	  }
	}
      }
    }
  } else {
    /**
     *        Run convergence test in backward direction
     *        First apply standard test to top of matrix
     **/
    if( abs( e_1( ll ) )<=abs( tol )*abs( d_1( ll ) ) ||
       ( tol<zero && abs( e_1( ll ) )<=thresh ) ) {
      e_1( ll ) = zero;
      goto L_50;
    }

    if( tol>=zero ) {
      /**
       *           If relative accuracy desired,
       *           apply convergence criterion backward
       **/
      mu = abs( d_1( m ) );
      sminl = mu;
      for (lll=m - 1 ; lll>=ll ; lll+=-1) {
	if( abs( e_1( lll ) )<=tol*mu ) {
	  e_1( lll ) = zero;
	  goto L_50;
	}
	sminlo = sminl;
	mu = abs( d_1( lll ) )*( mu / ( mu+abs( e_1( lll ) ) ) );
	sminl = min( sminl, mu );
      }
      /**
       *           If singular values only wanted, apply gap test to top
       *           end of matrix
       **/
      if( job==0 ) {
	gap = sminlo / sqrt( dble( m-ll ) ) - abs( d_1( ll ) );
	if( gap>zero ) {
	  abss = abs( d_1( ll ) );
	  abse = abs( e_1( ll ) );
	  gmax = max( gap, max(abss, abse) );
	  if( ( abse / gmax )*( abse / gmax )<=tol*( gap / gmax )*
	     ( abss / gmax ) ) {
	    e_1( ll ) = zero;
	    goto L_50;
	  }
	}
      }
    }
  }
  oldll = ll;
  oldm = m;
  /**
   *     Compute shift.  First, test if shifting would ruin relative
   *     accuracy, and if so set the shift to zero.
   **/
  if( tol>=zero && n*tol*( sminl / smax )<=
     max( eps, hndrth*tol ) ) {
    /**
     *        Use a zero shift to avoid loss of relative accuracy
     **/
    shift = zero;
  } else {
    /**
     *        Compute the shift from 2-by-2 block at end of matrix
     **/
    if( idir==1 ) {
      sll = abs( d_1( ll ) );
      dlas2( d_1( m-1 ), e_1( m-1 ), d_1( m ), &shift, &r );
    } else {
      sll = abs( d_1( m ) );
      dlas2( d_1( ll ), e_1( ll ), d_1( ll+1 ), &shift, &r );
    }
    /**
     *        Test if shift negligible, and if so set to zero
     **/
    if( sll>zero ) {
      if( ( shift / sll )*( shift / sll )<eps )
	shift = zero;
    }
  }
  /**
   *     Increment iteration count
   **/
  iter = iter + m - ll;
  /**
   *     If SHIFT = 0, do simplified QR iteration
   **/
  if( shift==zero ) {
    if( idir==1 ) {
      /**
       *           Chase bulge from top to bottom
       **/
      cs = one;
      oldcs = one;
      /**
       *           Save cosines and sines if singular vectors desired
       **/
      if( rotate ) {

	dlartg( d_1( ll )*cs, e_1( ll ), &cs, &sn, &r );
	dlartg( oldcs*r, d_1( ll+1 )*sn, &oldcs, &oldsn,
	       &d_1( ll ) );
	work_1( 1 ) = cs;
	work_1( 1+nm1 ) = sn;
	work_1( 1+nm12 ) = oldcs;
	work_1( 1+nm13 ) = oldsn;
	irot = 1;
	for (i=ll + 1 ; i<=m - 1 ; i+=1) {
	  dlartg( d_1( i )*cs, e_1( i ), &cs, &sn, &r );
	  e_1( i-1 ) = oldsn*r;
	  dlartg( oldcs*r, d_1( i+1 )*sn, &oldcs, &oldsn,
		 &d_1( i ) );
	  irot = irot + 1;
	  work_1( irot ) = cs;
	  work_1( irot+nm1 ) = sn;
	  work_1( irot+nm12 ) = oldcs;
	  work_1( irot+nm13 ) = oldsn;
	}
	h = d_1( m )*cs;
	d_1( m ) = h*oldcs;
	e_1( m-1 ) = h*oldsn;
	/**
	 *              Update singular vectors
	 **/
	if( ncvt>0 )
	  dlasr( 'l', 'v', 'f', m-ll+1, ncvt, &work_1( 1 ),
		&work_1( n ), &vt_2( ll, 1 ), ldvt );
	if( nru>0 )
	  dlasr( 'r', 'v', 'f', nru, m-ll+1,
		&work_1( nm12+1 ), &work_1( nm13+1 ),
		&u_2( 1, ll ), ldu );
	if( ncc>0 )
	  dlasr( 'l', 'v', 'f', m-ll+1, ncc,
		&work_1( nm12+1 ), &work_1( nm13+1 ),
		&c_2( ll, 1 ), ldc );

      } else {

	dlartg( d_1( ll )*cs, e_1( ll ), &cs, &sn, &r );
	dlartg( oldcs*r, d_1( ll+1 )*sn, &oldcs, &oldsn,
	       &d_1( ll ) );
	for (i=ll + 1 ; i<=m - 1 ; i+=1) {
	  dlartg( d_1( i )*cs, e_1( i ), &cs, &sn, &r );
	  e_1( i-1 ) = oldsn*r;
	  dlartg( oldcs*r, d_1( i+1 )*sn, &oldcs, &oldsn,
		 &d_1( i ) );
	}
	h = d_1( m )*cs;
	d_1( m ) = h*oldcs;
	e_1( m-1 ) = h*oldsn;

      }
      /**
       *           Test convergence
       **/
      if( abs( e_1( m-1 ) )<=thresh )
	e_1( m-1 ) = zero;

    } else {
      /**
       *           Chase bulge from bottom to top
       **/
      cs = one;
      oldcs = one;
      /**
       *           Save cosines and sines if singular vectors desired
       **/
      if( rotate ) {

	dlartg( d_1( m )*cs, e_1( m-1 ), &cs, &sn, &r );
	dlartg( oldcs*r, d_1( m-1 )*sn, &oldcs, &oldsn, &d_1( m ) );
	work_1( m-ll ) = cs;
	work_1( m-ll+nm1 ) = -sn;
	work_1( m-ll+nm12 ) = oldcs;
	work_1( m-ll+nm13 ) = -oldsn;
	irot = m - ll;
	for (i=m - 1 ; i>=ll + 1 ; i+=-1) {
	  dlartg( d_1( i )*cs, e_1( i-1 ), &cs, &sn, &r );
	  e_1( i ) = oldsn*r;
	  dlartg( oldcs*r, d_1( i-1 )*sn, &oldcs, &oldsn,
		 &d_1( i ) );
	  irot = irot - 1;
	  work_1( irot ) = cs;
	  work_1( irot+nm1 ) = -sn;
	  work_1( irot+nm12 ) = oldcs;
	  work_1( irot+nm13 ) = -oldsn;
	}
	h = d_1( ll )*cs;
	d_1( ll ) = h*oldcs;
	e_1( ll ) = h*oldsn;
	/**
	 *              Update singular vectors
	 **/
	if( ncvt>0 )
	  dlasr( 'l', 'v', 'b', m-ll+1, ncvt,
		&work_1( nm12+1 ), &work_1( nm13+1 ),
		&vt_2( ll, 1 ), ldvt );
	if( nru>0 )
	  dlasr( 'r', 'v', 'b', nru, m-ll+1, &work_1( 1 ),
		&work_1( n ), &u_2( 1, ll ), ldu );
	if( ncc>0 )
	  dlasr( 'l', 'v', 'b', m-ll+1, ncc, &work_1( 1 ),
		&work_1( n ), &c_2( ll, 1 ), ldc );

      } else {

	dlartg( d_1( m )*cs, e_1( m-1 ), &cs, &sn, &r );
	dlartg( oldcs*r, d_1( m-1 )*sn, &oldcs, &oldsn, &d_1( m ) );
	for (i=m - 1 ; i>=ll + 1 ; i+=-1) {
	  dlartg( d_1( i )*cs, e_1( i-1 ), &cs, &sn, &r );
	  e_1( i ) = oldsn*r;
	  dlartg( oldcs*r, d_1( i-1 )*sn, &oldcs, &oldsn,
		 &d_1( i ) );
	}
	h = d_1( ll )*cs;
	d_1( ll ) = h*oldcs;
	e_1( ll ) = h*oldsn;

      }
      /**
       *           Test convergence
       **/
      if( abs( e_1( ll ) )<=thresh )
	e_1( ll ) = zero;
    }
  } else {
    /**
     *        Use nonzero shift
     **/
    if( idir==1 ) {
      /**
       *           Chase bulge from top to bottom
       **/
      f = ( abs( d_1( ll ) )-shift )*
	( sign( one, d_1( ll ) )+shift / d_1( ll ) );
      g = e_1( ll );
      /**
       *           Save cosines and sines if singular vectors desired
       **/
      if( rotate ) {

	dlartg( f, g, &cosr, &sinr, &r );
	f = cosr*d_1( ll ) + sinr*e_1( ll );
	e_1( ll ) = cosr*e_1( ll ) - sinr*d_1( ll );
	g = sinr*d_1( ll+1 );
	d_1( ll+1 ) = cosr*d_1( ll+1 );
	dlartg( f, g, &cosl, &sinl, &r );
	d_1( ll ) = r;
	f = cosl*e_1( ll ) + sinl*d_1( ll+1 );
	d_1( ll+1 ) = cosl*d_1( ll+1 ) - sinl*e_1( ll );
	g = sinl*e_1( ll+1 );
	e_1( ll+1 ) = cosl*e_1( ll+1 );
	work_1( 1 ) = cosr;
	work_1( 1+nm1 ) = sinr;
	work_1( 1+nm12 ) = cosl;
	work_1( 1+nm13 ) = sinl;
	irot = 1;
	for (i=ll + 1 ; i<=m - 2 ; i+=1) {
	  dlartg( f, g, &cosr, &sinr, &r );
	  e_1( i-1 ) = r;
	  f = cosr*d_1( i ) + sinr*e_1( i );
	  e_1( i ) = cosr*e_1( i ) - sinr*d_1( i );
	  g = sinr*d_1( i+1 );
	  d_1( i+1 ) = cosr*d_1( i+1 );
	  dlartg( f, g, &cosl, &sinl, &r );
	  d_1( i ) = r;
	  f = cosl*e_1( i ) + sinl*d_1( i+1 );
	  d_1( i+1 ) = cosl*d_1( i+1 ) - sinl*e_1( i );
	  g = sinl*e_1( i+1 );
	  e_1( i+1 ) = cosl*e_1( i+1 );
	  irot = irot + 1;
	  work_1( irot ) = cosr;
	  work_1( irot+nm1 ) = sinr;
	  work_1( irot+nm12 ) = cosl;
	  work_1( irot+nm13 ) = sinl;
	}
	dlartg( f, g, &cosr, &sinr, &r );
	e_1( m-2 ) = r;
	f = cosr*d_1( m-1 ) + sinr*e_1( m-1 );
	e_1( m-1 ) = cosr*e_1( m-1 ) - sinr*d_1( m-1 );
	g = sinr*d_1( m );
	d_1( m ) = cosr*d_1( m );
	dlartg( f, g, &cosl, &sinl, &r );
	d_1( m-1 ) = r;
	f = cosl*e_1( m-1 ) + sinl*d_1( m );
	d_1( m ) = cosl*d_1( m ) - sinl*e_1( m-1 );
	irot = irot + 1;
	work_1( irot ) = cosr;
	work_1( irot+nm1 ) = sinr;
	work_1( irot+nm12 ) = cosl;
	work_1( irot+nm13 ) = sinl;
	e_1( m-1 ) = f;
	/**
	 *              Update singular vectors
	 **/
	if( ncvt>0 )
	  dlasr( 'l', 'v', 'f', m-ll+1, ncvt, &work_1( 1 ),
		&work_1( n ), &vt_2( ll, 1 ), ldvt );
	if( nru>0 )
	  dlasr( 'r', 'v', 'f', nru, m-ll+1,
		&work_1( nm12+1 ), &work_1( nm13+1 ),
		&u_2( 1, ll ), ldu );
	if( ncc>0 )
	  dlasr( 'l', 'v', 'f', m-ll+1, ncc,
		&work_1( nm12+1 ), &work_1( nm13+1 ),
		&c_2( ll, 1 ), ldc );

      } else {

	dlartg( f, g, &cosr, &sinr, &r );
	f = cosr*d_1( ll ) + sinr*e_1( ll );
	e_1( ll ) = cosr*e_1( ll ) - sinr*d_1( ll );
	g = sinr*d_1( ll+1 );
	d_1( ll+1 ) = cosr*d_1( ll+1 );
	dlartg( f, g, &cosl, &sinl, &r );
	d_1( ll ) = r;
	f = cosl*e_1( ll ) + sinl*d_1( ll+1 );
	d_1( ll+1 ) = cosl*d_1( ll+1 ) - sinl*e_1( ll );
	g = sinl*e_1( ll+1 );
	e_1( ll+1 ) = cosl*e_1( ll+1 );
	for (i=ll + 1 ; i<=m - 2 ; i+=1) {
	  dlartg( f, g, &cosr, &sinr, &r );
	  e_1( i-1 ) = r;
	  f = cosr*d_1( i ) + sinr*e_1( i );
	  e_1( i ) = cosr*e_1( i ) - sinr*d_1( i );
	  g = sinr*d_1( i+1 );
	  d_1( i+1 ) = cosr*d_1( i+1 );
	  dlartg( f, g, &cosl, &sinl, &r );
	  d_1( i ) = r;
	  f = cosl*e_1( i ) + sinl*d_1( i+1 );
	  d_1( i+1 ) = cosl*d_1( i+1 ) - sinl*e_1( i );
	  g = sinl*e_1( i+1 );
	  e_1( i+1 ) = cosl*e_1( i+1 );
	}
	dlartg( f, g, &cosr, &sinr, &r );
	e_1( m-2 ) = r;
	f = cosr*d_1( m-1 ) + sinr*e_1( m-1 );
	e_1( m-1 ) = cosr*e_1( m-1 ) - sinr*d_1( m-1 );
	g = sinr*d_1( m );
	d_1( m ) = cosr*d_1( m );
	dlartg( f, g, &cosl, &sinl, &r );
	d_1( m-1 ) = r;
	f = cosl*e_1( m-1 ) + sinl*d_1( m );
	d_1( m ) = cosl*d_1( m ) - sinl*e_1( m-1 );
	e_1( m-1 ) = f;

      }
      /**
       *           Test convergence
       **/
      if( abs( e_1( m-1 ) )<=thresh )
	e_1( m-1 ) = zero;

    } else {
      /**
       *           Chase bulge from bottom to top
       **/
      f = ( abs( d_1( m ) )-shift )*( sign( one, d_1( m ) )+shift /
				     d_1( m ) );
      g = e_1( m-1 );
      /**
       *           Save cosines and sines if singular vectors desired
       **/
      if( rotate ) {

	dlartg( f, g, &cosr, &sinr, &r );
	f = cosr*d_1( m ) + sinr*e_1( m-1 );
	e_1( m-1 ) = cosr*e_1( m-1 ) - sinr*d_1( m );
	g = sinr*d_1( m-1 );
	d_1( m-1 ) = cosr*d_1( m-1 );
	dlartg( f, g, &cosl, &sinl, &r );
	d_1( m ) = r;
	f = cosl*e_1( m-1 ) + sinl*d_1( m-1 );
	d_1( m-1 ) = cosl*d_1( m-1 ) - sinl*e_1( m-1 );
	g = sinl*e_1( m-2 );
	e_1( m-2 ) = cosl*e_1( m-2 );
	work_1( m-ll ) = cosr;
	work_1( m-ll+nm1 ) = -sinr;
	work_1( m-ll+nm12 ) = cosl;
	work_1( m-ll+nm13 ) = -sinl;
	irot = m - ll;
	for (i=m - 1 ; i>=ll + 2 ; i+=-1) {
	  dlartg( f, g, &cosr, &sinr, &r );
	  e_1( i ) = r;
	  f = cosr*d_1( i ) + sinr*e_1( i-1 );
	  e_1( i-1 ) = cosr*e_1( i-1 ) - sinr*d_1( i );
	  g = sinr*d_1( i-1 );
	  d_1( i-1 ) = cosr*d_1( i-1 );
	  dlartg( f, g, &cosl, &sinl, &r );
	  d_1( i ) = r;
	  f = cosl*e_1( i-1 ) + sinl*d_1( i-1 );
	  d_1( i-1 ) = cosl*d_1( i-1 ) - sinl*e_1( i-1 );
	  g = sinl*e_1( i-2 );
	  e_1( i-2 ) = cosl*e_1( i-2 );
	  irot = irot - 1;
	  work_1( irot ) = cosr;
	  work_1( irot+nm1 ) = -sinr;
	  work_1( irot+nm12 ) = cosl;
	  work_1( irot+nm13 ) = -sinl;
	}
	dlartg( f, g, &cosr, &sinr, &r );
	e_1( ll+1 ) = r;
	f = cosr*d_1( ll+1 ) + sinr*e_1( ll );
	e_1( ll ) = cosr*e_1( ll ) - sinr*d_1( ll+1 );
	g = sinr*d_1( ll );
	d_1( ll ) = cosr*d_1( ll );
	dlartg( f, g, &cosl, &sinl, &r );
	d_1( ll+1 ) = r;
	f = cosl*e_1( ll ) + sinl*d_1( ll );
	d_1( ll ) = cosl*d_1( ll ) - sinl*e_1( ll );
	irot = irot - 1;
	work_1( irot ) = cosr;
	work_1( irot+nm1 ) = -sinr;
	work_1( irot+nm12 ) = cosl;
	work_1( irot+nm13 ) = -sinl;
	e_1( ll ) = f;

      } else {

	dlartg( f, g, &cosr, &sinr, &r );
	f = cosr*d_1( m ) + sinr*e_1( m-1 );
	e_1( m-1 ) = cosr*e_1( m-1 ) - sinr*d_1( m );
	g = sinr*d_1( m-1 );
	d_1( m-1 ) = cosr*d_1( m-1 );
	dlartg( f, g, &cosl, &sinl, &r );
	d_1( m ) = r;
	f = cosl*e_1( m-1 ) + sinl*d_1( m-1 );
	d_1( m-1 ) = cosl*d_1( m-1 ) - sinl*e_1( m-1 );
	g = sinl*e_1( m-2 );
	e_1( m-2 ) = cosl*e_1( m-2 );
	for (i=m - 1 ; i>=ll + 2 ; i+=-1) {
	  dlartg( f, g, &cosr, &sinr, &r );
	  e_1( i ) = r;
	  f = cosr*d_1( i ) + sinr*e_1( i-1 );
	  e_1( i-1 ) = cosr*e_1( i-1 ) - sinr*d_1( i );
	  g = sinr*d_1( i-1 );
	  d_1( i-1 ) = cosr*d_1( i-1 );
	  dlartg( f, g, &cosl, &sinl, &r );
	  d_1( i ) = r;
	  f = cosl*e_1( i-1 ) + sinl*d_1( i-1 );
	  d_1( i-1 ) = cosl*d_1( i-1 ) - sinl*e_1( i-1 );
	  g = sinl*e_1( i-2 );
	  e_1( i-2 ) = cosl*e_1( i-2 );
	}
	dlartg( f, g, &cosr, &sinr, &r );
	e_1( ll+1 ) = r;
	f = cosr*d_1( ll+1 ) + sinr*e_1( ll );
	e_1( ll ) = cosr*e_1( ll ) - sinr*d_1( ll+1 );
	g = sinr*d_1( ll );
	d_1( ll ) = cosr*d_1( ll );
	dlartg( f, g, &cosl, &sinl, &r );
	d_1( ll+1 ) = r;
	f = cosl*e_1( ll ) + sinl*d_1( ll );
	d_1( ll ) = cosl*d_1( ll ) - sinl*e_1( ll );
	e_1( ll ) = f;

      }
      /**
       *           Test convergence
       **/
      if( abs( e_1( ll ) )<=thresh )
	e_1( ll ) = zero;
      /**
       *           Update singular vectors if desired
       **/
      if( ncvt>0 )
	dlasr( 'l', 'v', 'b', m-ll+1, ncvt, &work_1( nm12+1 ),
	      &work_1( nm13+1 ), &vt_2( ll, 1 ), ldvt );
      if( nru>0 )
	dlasr( 'r', 'v', 'b', nru, m-ll+1, &work_1( 1 ),
	      &work_1( n ), &u_2( 1, ll ), ldu );
      if( ncc>0 )
	dlasr( 'l', 'v', 'b', m-ll+1, ncc, &work_1( 1 ),
	      &work_1( n ), &c_2( ll, 1 ), ldc );
    }
  }
  /**
   *     QR iteration finished, go back and check convergence
   **/
  goto L_50;
  /**
   *     All singular values converged, so make them positive
   **/
 L_190:
  for (i=1 ; i<=n ; i+=1) {
    if( d_1( i )<zero ) {
      d_1( i ) = -d_1( i );
      /**
       *           Change sign of singular vectors, if desired
       **/
      if( ncvt>0 )
	dscal( ncvt, negone, &vt_2( i, 1 ), ldvt );
    }
  }
  /**
   *     Sort the singular values into decreasing order (insertion sort on
   *     singular values, but only one transposition per singular vector)
   **/
  for (i=1 ; i<=n - 1 ; i+=1) {
    /**
     *        Scan for smallest D(I)
     **/
    isub = 1;
    smin = d_1( 1 );
    for (j=2 ; j<=n + 1 - i ; j+=1) {
      if( d_1( j )<=smin ) {
	isub = j;
	smin = d_1( j );
      }
    }
    if( isub!=n+1-i ) {
      /**
       *           Swap singular values and vectors
       **/
      d_1( isub ) = d_1( n+1-i );
      d_1( n+1-i ) = smin;
      if( ncvt>0 )
	dswap( ncvt, &vt_2( isub, 1 ), ldvt, &vt_2( n+1-i, 1 ),
	      ldvt );
      if( nru>0 )
	dswap( nru, &u_2( 1, isub ), 1, &u_2( 1, n+1-i ), 1 );
      if( ncc>0 )
	dswap( ncc, &c_2( isub, 1 ), ldc, &c_2( n+1-i, 1 ), ldc );
    }
  }
  goto L_250;
  /**
   *     Maximum number of iterations exceeded, failure to converge
   **/
 L_230:
  *info = 0;
  for (i=1 ; i<=n - 1 ; i+=1) {
    if( e_1( i )!=zero )
      *info = *info + 1;
  }
 L_250:
  return;
  /**
   *     End of DBDSQR
   **/
}



void dlas2( double f, double g, double h, double *ssmin, double *ssmax )
{
  /**
   *  -- LAPACK auxiliary routine (version 1.1) --
   *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
   *     Courant Institute, Argonne National Lab, and Rice University
   *     March 31, 1993
   *
   *     .. Scalar Arguments ..*/
  /**     ..
   *
   *  Purpose
   *  =======
   *
   *  DLAS2  computes the singular values of the 2-by-2 matrix
   *     [  F   G  ]
   *     [  0   H  ].
   *  On return, SSMIN is the smaller singular value and SSMAX is the
   *  larger singular value.
   *
   *  Arguments
   *  =========
   *
   *  F       (input) DOUBLE PRECISION
   *          The (1,1) entry of the 2-by-2 matrix.
   *
   *  G       (input) DOUBLE PRECISION
   *          The (1,2) entry of the 2-by-2 matrix.
   *
   *  H       (input) DOUBLE PRECISION
   *          The (2,2) entry of the 2-by-2 matrix.
   *
   *  SSMIN   (output) DOUBLE PRECISION
   *          The smaller singular value.
   *
   *  SSMAX   (output) DOUBLE PRECISION
   *          The larger singular value.
   *
   *  Further Details
   *  ===============
   *
   *  Barring over/underflow, all output quantities are correct to within
   *  a few units in the last place (ulps), even in the absence of a guard
   *  digit in addition/subtraction.
   *
   *  In IEEE arithmetic, the code works correctly if one matrix entry is
   *  infinite.
   *
   *  Overflow will not occur unless the largest singular value itself
   *  overflows, or is within a few ulps of overflow. (On machines with
   *  partial overflow, like the Cray, overflow may occur if the largest
   *  singular value is within a factor of 2 of overflow.)
   *
   *  Underflow is harmless if underflow is gradual. Otherwise, results
   *  may correspond to a matrix modified by perturbations of size near
   *  the underflow threshold.
   *
   *  ====================================================================
   *
   *     .. Parameters ..*/
#undef zero
#define zero 0.0e0
#undef one
#define one 1.0e0
#undef two
#define two 2.0e0
  /**     ..
   *     .. Local Scalars ..*/
  double    as, at, au, c, fa, fhmn, fhmx, ga, ha;
  /**     ..
   *     .. Intrinsic Functions ..*/
  /*      intrinsic          abs, max, min, sqrt;*/
  /**     ..
   *     .. Executable Statements ..
   **/
  /*-----implicit-declarations-----*/
  /*-----end-of-declarations-----*/
  fa = abs( f );
  ga = abs( g );
  ha = abs( h );
  fhmn = min( fa, ha );
  fhmx = max( fa, ha );
  if( fhmn==zero ) {
    *ssmin = zero;
    if( fhmx==zero ) {
      *ssmax = zero;
    } else {
      long min_max= ( min( fhmx, ga ) / max( fhmx, ga ) );
      *ssmax = max( fhmx, ga )*sqrt( one+min_max*min_max );
    }
  } else {
    if( ga<fhmx ) {
      as = one + fhmn / fhmx;
      at = ( fhmx-fhmn ) / fhmx;
      au = ( ga / fhmx )*( ga / fhmx );
      c = two / ( sqrt( as*as+au )+sqrt( at*at+au ) );
      *ssmin = fhmn*c;
      *ssmax = fhmx / c;
    } else {
      au = fhmx / ga;
      if( au==zero ) {
	/**
	 *              Avoid possible harmful underflow if exponent range
	 *              asymmetric (true SSMIN may not underflow even if
	 *              AU underflows)
	 **/
	*ssmin = ( fhmn*fhmx ) / ga;
	*ssmax = ga;
      } else {
	as = one + fhmn / fhmx;
	at = ( fhmx-fhmn ) / fhmx;
	c = one / ( sqrt( one+( as*au )*( as*au ) )+
                   sqrt( one+( at*au )*( at*au ) ) );
	*ssmin = ( fhmn*c )*au;
	*ssmin = *ssmin + *ssmin;
	*ssmax = ga / ( c+c );
      }
    }
  }
  return;
  /**
   *     End of DLAS2
   **/
}



void dlasv2( double f, double g, double h,
	    double *ssmin, double *ssmax, double *snr,
	    double *csr, double *snl, double *csl )
{
  /**
   *  -- LAPACK auxiliary routine (version 1.1) --
   *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
   *     Courant Institute, Argonne National Lab, and Rice University
   *     October 31, 1992
   *
   *     .. Scalar Arguments ..*/
  /**     ..
   *
   *  Purpose
   *  =======
   *
   *  DLASV2 computes the singular value decomposition of a 2-by-2
   *  triangular matrix
   *     [  F   G  ]
   *     [  0   H  ].
   *  On return, abs(SSMAX) is the larger singular value, abs(SSMIN) is the
   *  smaller singular value, and (CSL,SNL) and (CSR,SNR) are the left and
   *  right singular vectors for abs(SSMAX), giving the decomposition
   *
   *     [ CSL  SNL ] [  F   G  ] [ CSR -SNR ]  =  [ SSMAX   0   ]
   *     [-SNL  CSL ] [  0   H  ] [ SNR  CSR ]     [  0    SSMIN ].
   *
   *  Arguments
   *  =========
   *
   *  F       (input) DOUBLE PRECISION
   *          The (1,1) entry of the 2-by-2 matrix.
   *
   *  G       (input) DOUBLE PRECISION
   *          The (1,2) entry of the 2-by-2 matrix.
   *
   *  H       (input) DOUBLE PRECISION
   *          The (2,2) entry of the 2-by-2 matrix.
   *
   *  SSMIN   (output) DOUBLE PRECISION
   *          abs(SSMIN) is the smaller singular value.
   *
   *  SSMAX   (output) DOUBLE PRECISION
   *          abs(SSMAX) is the larger singular value.
   *
   *  SNL     (output) DOUBLE PRECISION
   *  CSL     (output) DOUBLE PRECISION
   *          The vector (CSL, SNL) is a unit left singular vector for the
   *          singular value abs(SSMAX).
   *
   *  SNR     (output) DOUBLE PRECISION
   *  CSR     (output) DOUBLE PRECISION
   *          The vector (CSR, SNR) is a unit right singular vector for the
   *          singular value abs(SSMAX).
   *
   *  Further Details
   *  ===============
   *
   *  Any input parameter may be aliased with any output parameter.
   *
   *  Barring over/underflow and assuming a guard digit in subtraction, all
   *  output quantities are correct to within a few units in the last
   *  place (ulps).
   *
   *  In IEEE arithmetic, the code works correctly if one matrix entry is
   *  infinite.
   *
   *  Overflow will not occur unless the largest singular value itself
   *  overflows or is within a few ulps of overflow. (On machines with
   *  partial overflow, like the Cray, overflow may occur if the largest
   *  singular value is within a factor of 2 of overflow.)
   *
   *  Underflow is harmless if underflow is gradual. Otherwise, results
   *  may correspond to a matrix modified by perturbations of size near
   *  the underflow threshold.
   *
   * =====================================================================
   *
   *     .. Parameters ..*/
#undef zero
#define zero 0.0e0
#undef half
#define half 0.5e0
#undef one
#define one 1.0e0
#undef two
#define two 2.0e0
#undef four
#define four 4.0e0
  /**     ..
   *     .. Local Scalars ..*/
  int            gasmal, swap;
  long            pmax;
  double    a, clt=0.0, crt=0.0, d, fa, ft, ga, gt, ha, ht, l, m,
            mm, r, s, slt=0.0, srt=0.0, t, temp, tsign=0.0, tt;
  /**     ..
   *     .. Intrinsic Functions ..*/
  /*      intrinsic          abs, sign, sqrt;*/
  /**     ..
   *     .. Executable Statements ..
   **/
  /*-----implicit-declarations-----*/
  /*-----end-of-declarations-----*/
  ft = f;
  fa = abs( ft );
  ht = h;
  ha = abs( h );
  /**
   *     PMAX points to the maximum absolute entry of matrix
   *       PMAX = 1 if F largest in absolute values
   *       PMAX = 2 if G largest in absolute values
   *       PMAX = 3 if H largest in absolute values
   **/
  pmax = 1;
  swap = ( ha>fa );
  if( swap ) {
    pmax = 3;
    temp = ft;
    ft = ht;
    ht = temp;
    temp = fa;
    fa = ha;
    ha = temp;
    /**
     *        Now FA .ge. HA
     **/
  }
  gt = g;
  ga = abs( gt );
  if( ga==zero ) {
    /**
     *        Diagonal matrix
     **/
    *ssmin = ha;
    *ssmax = fa;
    clt = one;
    crt = one;
    slt = zero;
    srt = zero;
  } else {
    gasmal = 1;
    if( ga>fa ) {
      pmax = 2;
      if( ( fa / ga )<dlamch( 'e'/*ps*/ ) ) {
	/**
	 *              Case of very large GA
	 **/
	gasmal = 0;
	*ssmax = ga;
	if( ha>one ) {
	  *ssmin = fa / ( ga / ha );
	} else {
	  *ssmin = ( fa / ga )*ha;
	}
	clt = one;
	slt = ht / gt;
	srt = one;
	crt = ft / gt;
      }
    }
    if( gasmal ) {
      /**
       *           Normal case
       **/
      d = fa - ha;
      if( d==fa ) {
	/**
	 *              Copes with infinite F or H
	 **/
	l = one;
      } else {
	l = d / fa;
      }
      /**
       *           Note that 0 .le. L .le. 1
       **/
      m = gt / ft;
      /**
       *           Note that abs(M) .le. 1/macheps
       **/
      t = two - l;
      /**
       *           Note that T .ge. 1
       **/
      mm = m*m;
      tt = t*t;
      s = sqrt( tt+mm );
      /**
       *           Note that 1 .le. S .le. 1 + 1/macheps
       **/
      if( l==zero ) {
	r = abs( m );
      } else {
	r = sqrt( l*l+mm );
      }
      /**
       *           Note that 0 .le. R .le. 1 + 1/macheps
       **/
      a = half*( s+r );
      /**
       *           Note that 1 .le. A .le. 1 + abs(M)
       **/
      *ssmin = ha / a;
      *ssmax = fa*a;
      if( mm==zero ) {
	/**
	 *              Note that M is very tiny
	 **/
	if( l==zero ) {
	  t = sign( two, ft )*sign( one, gt );
	} else {
	  t = gt / sign( d, ft ) + m / t;
	}
      } else {
	t = ( m / ( s+t )+m / ( r+l ) )*( one+a );
      }
      l = sqrt( t*t+four );
      crt = two / l;
      srt = t / l;
      clt = ( crt+srt*m ) / a;
      slt = ( ht / ft )*srt / a;
    }
  }
  if( swap ) {
    *csl = srt;
    *snl = crt;
    *csr = slt;
    *snr = clt;
  } else {
    *csl = clt;
    *snl = slt;
    *csr = crt;
    *snr = srt;
  }
  /**
   *     Correct signs of SSMAX and SSMIN
   **/
  if( pmax==1 )
    tsign = sign( one, *csr )*sign( one, *csl )*sign( one, f );
  if( pmax==2 )
    tsign = sign( one, *snr )*sign( one, *csl )*sign( one, g );
  if( pmax==3 )
    tsign = sign( one, *snr )*sign( one, *snl )*sign( one, h );
  *ssmax = sign( *ssmax, tsign );
  *ssmin = sign( *ssmin, tsign*sign( one, f )*sign( one, h ) );
  return;
  /**
   *     End of DLASV2
   **/
}



void dlasr( char side, char pivot, char direct, long m, long n,
	   double c[], double s[], double a[], long lda )
{
  /**
   *  -- LAPACK auxiliary routine (version 1.1) --
   *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
   *     Courant Institute, Argonne National Lab, and Rice University
   *     October 31, 1992
   *
   *     .. Scalar Arguments ..*/
  /**     ..
   *     .. Array Arguments ..*/
#undef s_1
#define s_1(a1) s[a1-1]
#undef c_1
#define c_1(a1) c[a1-1]
#undef a_2
#define a_2(a1,a2) a[a1-1+lda*(a2-1)]
  /**     ..
   *
   *  Purpose
   *  =======
   *
   *  DLASR   performs the transformation
   *
   *     A := P*A,   when SIDE = 'L' or 'l'  (  Left-hand side )
   *
   *     A := A*P',  when SIDE = 'R' or 'r'  ( Right-hand side )
   *
   *  where A is an m by n real matrix and P is an orthogonal matrix,
   *  consisting of a sequence of plane rotations determined by the
   *  parameters PIVOT and DIRECT as follows ( z = m when SIDE = 'L' or 'l'
   *  and z = n when SIDE = 'R' or 'r' ):
   *
   *  When  DIRECT = 'F' or 'f'  ( Forward sequence ) then
   *
   *     P = P( z - 1 )*...*P( 2 )*P( 1 ),
   *
   *  and when DIRECT = 'B' or 'b'  ( Backward sequence ) then
   *
   *     P = P( 1 )*P( 2 )*...*P( z - 1 ),
   *
   *  where  P( k ) is a plane rotation matrix for the following planes:
   *
   *     when  PIVOT = 'V' or 'v'  ( Variable pivot ),
   *        the plane ( k, k + 1 )
   *
   *     when  PIVOT = 'T' or 't'  ( Top pivot ),
   *        the plane ( 1, k + 1 )
   *
   *     when  PIVOT = 'B' or 'b'  ( Bottom pivot ),
   *        the plane ( k, z )
   *
   *  c_1( k ) and s_1( k )  must contain the  cosine and sine that define the
   *  matrix  P( k ).  The two by two plane rotation part of the matrix
   *  P( k ), R( k ), is assumed to be of the form
   *
   *     R( k ) = (  c_1( k )  s_1( k ) ).
   *              ( -s_1( k )  c_1( k ) )
   *
   *  This version vectorises across rows of the array A when SIDE = 'L'.
   *
   *  Arguments
   *  =========
   *
   *  SIDE    (input) CHARACTER*1
   *          Specifies whether the plane rotation matrix P is applied to
   *          A on the left or the right.
   *          = 'L':  Left, compute A := P*A
   *          = 'R':  Right, compute A:= A*P'
   *
   *  DIRECT  (input) CHARACTER*1
   *          Specifies whether P is a forward or backward sequence of
   *          plane rotations.
   *          = 'F':  Forward, P = P( z - 1 )*...*P( 2 )*P( 1 )
   *          = 'B':  Backward, P = P( 1 )*P( 2 )*...*P( z - 1 )
   *
   *  PIVOT   (input) CHARACTER*1
   *          Specifies the plane for which P(k) is a plane rotation
   *          matrix.
   *          = 'V':  Variable pivot, the plane (k,k+1)
   *          = 'T':  Top pivot, the plane (1,k+1)
   *          = 'B':  Bottom pivot, the plane (k,z)
   *
   *  M       (input) INTEGER
   *          The number of rows of the matrix A.  If m <= 1, an immediate
   *          return is effected.
   *
   *  N       (input) INTEGER
   *          The number of columns of the matrix A.  If n <= 1, an
   *          immediate return is effected.
   *
   *  C, S    (input) DOUBLE PRECISION arrays, dimension
   *                  (M-1) if SIDE = 'L'
   *                  (N-1) if SIDE = 'R'
   *          c_1(k) and s_1(k) contain the cosine and sine that define the
   *          matrix P(k).  The two by two plane rotation part of the
   *          matrix P(k), R(k), is assumed to be of the form
   *          R( k ) = (  c_1( k )  s_1( k ) ).
   *                   ( -s_1( k )  c_1( k ) )
   *
   *  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
   *          The m by n matrix A.  On exit, A is overwritten by P*A if
   *          SIDE = 'R' or by A*P' if SIDE = 'L'.
   *
   *  LDA     (input) INTEGER
   *          The leading dimension of the array A.  LDA >= max(1,M).
   *
   *  =====================================================================
   *
   *     .. Parameters ..*/
#undef one
#define one 1.0e+0
#undef zero
#define zero 0.0e+0
  /**     ..
   *     .. Local Scalars ..*/
  long            i, info, j;
  double    ctemp, stemp, temp;
  /**     ..
   *     .. Intrinsic Functions ..*/
  /*      intrinsic          max;*/
  /**     ..
   *     .. Executable Statements ..
   *
   *     Test the input parameters
   **/
  /*-----implicit-declarations-----*/
  /*-----end-of-declarations-----*/
  info = 0;
  if( !( lsame( side, 'l' ) || lsame( side, 'r' ) ) ) {
    info = 1;
  } else if( !( lsame( pivot, 'v' ) || lsame( pivot,
					     't' ) || lsame( pivot, 'b' ) ) ) {
    info = 2;
  } else if( !( lsame( direct, 'f' ) || lsame( direct, 'b' ) ) )
    {
      info = 3;
    } else if( m<0 ) {
      info = 4;
    } else if( n<0 ) {
      info = 5;
    } else if( lda<max( 1, m ) ) {
      info = 9;
    }
  if( info!=0 ) {
    xerbla( "dlasr ", info );
    return;
  }
  /**
   *     Quick return if possible
   **/
  if( ( m==0 ) || ( n==0 ) )
    return;
  if( lsame( side, 'l' ) ) {
    /**
     *        Form  P * A
     **/
    if( lsame( pivot, 'v' ) ) {
      if( lsame( direct, 'f' ) ) {
	for (j=1 ; j<=m - 1 ; j+=1) {
	  ctemp = c_1( j );
	  stemp = s_1( j );
	  if( ( ctemp!=one ) || ( stemp!=zero ) ) {
	    for (i=1 ; i<=n ; i+=1) {
	      temp = a_2( j+1, i );
	      a_2( j+1, i ) = ctemp*temp - stemp*a_2( j, i );
	      a_2( j, i ) = stemp*temp + ctemp*a_2( j, i );
	    }
	  }
	}
      } else if( lsame( direct, 'b' ) ) {
	for (j=m - 1 ; j>=1 ; j+=-1) {
	  ctemp = c_1( j );
	  stemp = s_1( j );
	  if( ( ctemp!=one ) || ( stemp!=zero ) ) {
	    for (i=1 ; i<=n ; i+=1) {
	      temp = a_2( j+1, i );
	      a_2( j+1, i ) = ctemp*temp - stemp*a_2( j, i );
	      a_2( j, i ) = stemp*temp + ctemp*a_2( j, i );
	    }
	  }
	}
      }
    } else if( lsame( pivot, 't' ) ) {
      if( lsame( direct, 'f' ) ) {
	for (j=2 ; j<=m ; j+=1) {
	  ctemp = c_1( j-1 );
	  stemp = s_1( j-1 );
	  if( ( ctemp!=one ) || ( stemp!=zero ) ) {
	    for (i=1 ; i<=n ; i+=1) {
	      temp = a_2( j, i );
	      a_2( j, i ) = ctemp*temp - stemp*a_2( 1, i );
	      a_2( 1, i ) = stemp*temp + ctemp*a_2( 1, i );
	    }
	  }
	}
      } else if( lsame( direct, 'b' ) ) {
	for (j=m ; j>=2 ; j+=-1) {
	  ctemp = c_1( j-1 );
	  stemp = s_1( j-1 );
	  if( ( ctemp!=one ) || ( stemp!=zero ) ) {
	    for (i=1 ; i<=n ; i+=1) {
	      temp = a_2( j, i );
	      a_2( j, i ) = ctemp*temp - stemp*a_2( 1, i );
	      a_2( 1, i ) = stemp*temp + ctemp*a_2( 1, i );
	    }
	  }
	}
      }
    } else if( lsame( pivot, 'b' ) ) {
      if( lsame( direct, 'f' ) ) {
	for (j=1 ; j<=m - 1 ; j+=1) {
	  ctemp = c_1( j );
	  stemp = s_1( j );
	  if( ( ctemp!=one ) || ( stemp!=zero ) ) {
	    for (i=1 ; i<=n ; i+=1) {
	      temp = a_2( j, i );
	      a_2( j, i ) = stemp*a_2( m, i ) + ctemp*temp;
	      a_2( m, i ) = ctemp*a_2( m, i ) - stemp*temp;
	    }
	  }
	}
      } else if( lsame( direct, 'b' ) ) {
	for (j=m - 1 ; j>=1 ; j+=-1) {
	  ctemp = c_1( j );
	  stemp = s_1( j );
	  if( ( ctemp!=one ) || ( stemp!=zero ) ) {
	    for (i=1 ; i<=n ; i+=1) {
	      temp = a_2( j, i );
	      a_2( j, i ) = stemp*a_2( m, i ) + ctemp*temp;
	      a_2( m, i ) = ctemp*a_2( m, i ) - stemp*temp;
	    }
	  }
	}
      }
    }
  } else if( lsame( side, 'r' ) ) {
    /**
     *        Form A * P'
     **/
    if( lsame( pivot, 'v' ) ) {
      if( lsame( direct, 'f' ) ) {
	for (j=1 ; j<=n - 1 ; j+=1) {
	  ctemp = c_1( j );
	  stemp = s_1( j );
	  if( ( ctemp!=one ) || ( stemp!=zero ) ) {
	    for (i=1 ; i<=m ; i+=1) {
	      temp = a_2( i, j+1 );
	      a_2( i, j+1 ) = ctemp*temp - stemp*a_2( i, j );
	      a_2( i, j ) = stemp*temp + ctemp*a_2( i, j );
	    }
	  }
	}
      } else if( lsame( direct, 'b' ) ) {
	for (j=n - 1 ; j>=1 ; j+=-1) {
	  ctemp = c_1( j );
	  stemp = s_1( j );
	  if( ( ctemp!=one ) || ( stemp!=zero ) ) {
	    for (i=1 ; i<=m ; i+=1) {
	      temp = a_2( i, j+1 );
	      a_2( i, j+1 ) = ctemp*temp - stemp*a_2( i, j );
	      a_2( i, j ) = stemp*temp + ctemp*a_2( i, j );
	    }
	  }
	}
      }
    } else if( lsame( pivot, 't' ) ) {
      if( lsame( direct, 'f' ) ) {
	for (j=2 ; j<=n ; j+=1) {
	  ctemp = c_1( j-1 );
	  stemp = s_1( j-1 );
	  if( ( ctemp!=one ) || ( stemp!=zero ) ) {
	    for (i=1 ; i<=m ; i+=1) {
	      temp = a_2( i, j );
	      a_2( i, j ) = ctemp*temp - stemp*a_2( i, 1 );
	      a_2( i, 1 ) = stemp*temp + ctemp*a_2( i, 1 );
	    }
	  }
	}
      } else if( lsame( direct, 'b' ) ) {
	for (j=n ; j>=2 ; j+=-1) {
	  ctemp = c_1( j-1 );
	  stemp = s_1( j-1 );
	  if( ( ctemp!=one ) || ( stemp!=zero ) ) {
	    for (i=1 ; i<=m ; i+=1) {
	      temp = a_2( i, j );
	      a_2( i, j ) = ctemp*temp - stemp*a_2( i, 1 );
	      a_2( i, 1 ) = stemp*temp + ctemp*a_2( i, 1 );
	    }
	  }
	}
      }
    } else if( lsame( pivot, 'b' ) ) {
      if( lsame( direct, 'f' ) ) {
	for (j=1 ; j<=n - 1 ; j+=1) {
	  ctemp = c_1( j );
	  stemp = s_1( j );
	  if( ( ctemp!=one ) || ( stemp!=zero ) ) {
	    for (i=1 ; i<=m ; i+=1) {
	      temp = a_2( i, j );
	      a_2( i, j ) = stemp*a_2( i, n ) + ctemp*temp;
	      a_2( i, n ) = ctemp*a_2( i, n ) - stemp*temp;
	    }
	  }
	}
      } else if( lsame( direct, 'b' ) ) {
	for (j=n - 1 ; j>=1 ; j+=-1) {
	  ctemp = c_1( j );
	  stemp = s_1( j );
	  if( ( ctemp!=one ) || ( stemp!=zero ) ) {
	    for (i=1 ; i<=m ; i+=1) {
	      temp = a_2( i, j );
	      a_2( i, j ) = stemp*a_2( i, n ) + ctemp*temp;
	      a_2( i, n ) = ctemp*a_2( i, n ) - stemp*temp;
	    }
	  }
	}
      }
    }
  }

  return;
  /**
   *     End of DLASR
   **/
}



void dlartg( double f, double g, double *cs, double *sn, double *r )
{
  /**
   *  -- LAPACK auxiliary routine (version 1.1) --
   *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
   *     Courant Institute, Argonne National Lab, and Rice University
   *     October 31, 1992
   *
   *     .. Scalar Arguments ..*/
  /**     ..
   *
   *  Purpose
   *  =======
   *
   *  DLARTG generate a plane rotation so that
   *
   *     [  CS  SN  ]  .  [ F ]  =  [ R ]   where CS**2 + SN**2 = 1.
   *     [ -SN  CS  ]     [ G ]     [ 0 ]
   *
   *  This is a faster version of the BLAS1 routine DROTG, except for
   *  the following differences:
   *     F and G are unchanged on return.
   *     If G=0, then CS=1 and SN=0.
   *     If F=0 and (G .ne. 0), then CS=0 and SN=1 without doing any
   *        floating point operations (saves work in DBDSQR when
   *        there are zeros on the diagonal).
   *
   *  Arguments
   *  =========
   *
   *  F       (input) DOUBLE PRECISION
   *          The first component of vector to be rotated.
   *
   *  G       (input) DOUBLE PRECISION
   *          The second component of vector to be rotated.
   *
   *  CS      (output) DOUBLE PRECISION
   *          The cosine of the rotation.
   *
   *  SN      (output) DOUBLE PRECISION
   *          The sine of the rotation.
   *
   *  R       (output) DOUBLE PRECISION
   *          The nonzero component of the rotated vector.
   *
   *  =====================================================================
   *
   *     .. Parameters ..*/
#undef zero
#define zero 0.0e0
#undef one
#define one 1.0e0
  /**     ..
   *     .. Local Scalars ..*/
  double    t, tt;
  /**     ..
   *     .. Intrinsic Functions ..*/
  /*      intrinsic          abs, sqrt;*/
  /**     ..
   *     .. Executable Statements ..
   **/
  /*-----implicit-declarations-----*/
  /*-----end-of-declarations-----*/
  if( g==zero ) {
    *cs = one;
    *sn = zero;
    *r = f;
  } else if( f==zero ) {
    *cs = zero;
    *sn = one;
    *r = g;
  } else {
    if( abs( f )>abs( g ) ) {
      t = g / f;
      tt = sqrt( one+t*t );
      *cs = one / tt;
      *sn = t*(*cs);
      *r = f*tt;
    } else {
      t = f / g;
      tt = sqrt( one+t*t );
      *sn = one / tt;
      *cs = t*(*sn);
      *r = g*tt;
    }
  }
  return;
  /**
   *     End of DLARTG
   **/
}



void dorgbr( char vect, long m, long n, long k, double a[], long lda,
	    double tau[], double work[], long lwork, long *info )
{
  /**
   *  -- LAPACK routine (version 1.1) --
   *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
   *     Courant Institute, Argonne National Lab, and Rice University
   *     March 31, 1993
   *
   *     .. Scalar Arguments ..*/
  /**     ..
   *     .. Array Arguments ..*/
#undef work_1
#define work_1(a1) work[a1-1]
#undef tau_1
#define tau_1(a1) tau[a1-1]
#undef a_2
#define a_2(a1,a2) a[a1-1+lda*(a2-1)]
  /**     ..
   *
   *  Purpose
   *  =======
   *
   *  DORGBR generates one of the matrices Q or P**T determined by DGEBRD
   *  when reducing a real matrix A to bidiagonal form: A = Q * B * P**T.
   *  Q and P**T are defined as products of elementary reflectors H(i) or
   *  G(i) respectively.
   *
   *  If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q
   *  is of order M:
   *  if m >= k, Q = H(1) H(2) . . . H(k) and DORGBR returns the first n
   *  columns of Q, where m >= n >= k;
   *  if m < k, Q = H(1) H(2) . . . H(m-1) and DORGBR returns Q as an
   *  M-by-M matrix.
   *
   *  If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**T
   *  is of order N:
   *  if k < n, P**T = G(k) . . . G(2) G(1) and DORGBR returns the first m
   *  rows of P**T, where n >= m >= k;
   *  if k >= n, P**T = G(n-1) . . . G(2) G(1) and DORGBR returns P**T as
   *  an N-by-N matrix.
   *
   *  Arguments
   *  =========
   *
   *  VECT    (input) CHARACTER*1
   *          Specifies whether the matrix Q or the matrix P**T is
   *          required, as defined in the transformation applied by DGEBRD:
   *          = 'Q':  generate Q;
   *          = 'P':  generate P**T.
   *
   *  M       (input) INTEGER
   *          The number of rows of the matrix Q or P**T to be returned.
   *          M >= 0.
   *
   *  N       (input) INTEGER
   *          The number of columns of the matrix Q or P**T to be returned.
   *          N >= 0.
   *          If VECT = 'Q', M >= N >= min(M,K);
   *          if VECT = 'P', N >= M >= min(N,K).
   *
   *  K       (input) INTEGER
   *          K >= 0.
   *          If VECT = 'Q', the number of columns in the original M-by-K
   *          matrix reduced by DGEBRD.
   *          If VECT = 'P', the number of rows in the original K-by-N
   *          matrix reduced by DGEBRD.
   *
   *  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
   *          On entry, the vectors which define the elementary reflectors,
   *          as returned by DGEBRD.
   *          On exit, the M-by-N matrix Q or P**T.
   *
   *  LDA     (input) INTEGER
   *          The leading dimension of the array A. LDA >= max(1,M).
   *
   *  TAU     (input) DOUBLE PRECISION array, dimension
   *                                (min(M,K)) if VECT = 'Q'
   *                                (min(N,K)) if VECT = 'P'
   *          TAU(i) must contain the scalar factor of the elementary
   *          reflector H(i) or G(i), which determines Q or P**T, as
   *          returned by DGEBRD in its array argument TAUQ or TAUP.
   *
   *  WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK)
   *          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
   *
   *  LWORK   (input) INTEGER
   *          The dimension of the array WORK. LWORK >= max(1,min(M,N)).
   *          For optimum performance LWORK >= min(M,N)*NB, where NB
   *          is the optimal blocksize.
   *
   *  INFO    (output) INTEGER
   *          = 0:  successful exit
   *          < 0:  if INFO = -i, the i-th argument had an illegal value
   *
   *  =====================================================================
   *
   *     .. Parameters ..*/
#undef zero
#define zero 0.0e+0
#undef one
#define one 1.0e+0
  /**     ..
   *     .. Local Scalars ..*/
  int            wantq;
  long            i, iinfo, j;
  /**     ..
   *     .. Intrinsic Functions ..*/
  /*      intrinsic          max, min;*/
  /**     ..
   *     .. Executable Statements ..
   *
   *     Test the input arguments
   **/
  /*-----implicit-declarations-----*/
  /*-----end-of-declarations-----*/
  *info = 0;
  wantq = lsame( vect, 'q' );
  if( !wantq && !lsame( vect, 'p' ) ) {
    *info = -1;
  } else if( m<0 ) {
    *info = -2;
  } else if( n<0 || ( wantq && ( n>m || n<min( m,
					      k ) ) ) || ( !wantq && ( m>n || m<
								      min( n, k ) ) ) ) {
    *info = -3;
  } else if( k<0 ) {
    *info = -4;
  } else if( lda<max( 1, m ) ) {
    *info = -6;
  } else if( lwork<max( 1, min( m, n ) ) ) {
    *info = -9;
  }
  if( *info!=0 ) {
    xerbla( "dorgbr", -*info );
    return;
  }
  /**
   *     Quick return if possible
   **/
  if( m==0 || n==0 ) {
    work_1( 1 ) = 1;
    return;
  }

  if( wantq ) {
    /**
     *        Form Q, determined by a call to DGEBRD to reduce an m-by-k
     *        matrix
     **/
    if( m>=k ) {
      /**
       *           If m >= k, assume m >= n >= k
       **/
      dorgqr( m, n, k, a, lda, tau, work, lwork, &iinfo );

    } else {
      /**
       *           If m < k, assume *m = n
       *
       *           Shift the vectors which define the elementary reflectors one
       *           column to the right, and set the first row and column of Q
       *           to those of the unit matrix
       **/
      for (j=m ; j>=2 ; j+=-1) {
	a_2( 1, j ) = zero;
	for (i=j + 1 ; i<=m ; i+=1) {
	  a_2( i, j ) = a_2( i, j-1 );
	}
      }
      a_2( 1, 1 ) = one;
      for (i=2 ; i<=m ; i+=1) {
	a_2( i, 1 ) = zero;
      }
      if( m>1 ) {
	/**
	 *              Form Q(2:m,2:m)
	 **/
	dorgqr( m-1, m-1, m-1, &a_2( 2, 2 ), lda, tau, work,
	       lwork, &iinfo );
      }
    }
  } else {
    /**
     *        Form P', determined by a call to DGEBRD to reduce a k-by-n
     *        matrix
     **/
    if( k<n ) {
      /**
       *           If k < n, assume k <= m <= n
       **/
      dorglq( m, n, k, a, lda, tau, work, lwork, &iinfo );

    } else {
      /**
       *           If k >= n, assume *m = n
       *
       *           Shift the vectors which define the elementary reflectors one
       *           row downward, and set the first row and column of P' to
       *           those of the unit matrix
       **/
      a_2( 1, 1 ) = one;
      for (i=2 ; i<=n ; i+=1) {
	a_2( i, 1 ) = zero;
      }
      for (j=2 ; j<=n ; j+=1) {
	for (i=j - 1 ; i>=2 ; i+=-1) {
	  a_2( i, j ) = a_2( i-1, j );
	}
	a_2( 1, j ) = zero;
      }
      if( n>1 ) {
	/**
	 *              Form P'(2:n,2:n)
	 **/
	dorglq( n-1, n-1, n-1, &a_2( 2, 2 ), lda, tau, work,
	       lwork, &iinfo );
      }
    }
  }
  return;
  /**
   *     End of DORGBR
   **/
}



void dorglq( long m, long n, long k, double a[], long lda,
	    double tau[], double work[], long lwork, long *info )
{
  /**
   *  -- LAPACK routine (version 1.1) --
   *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
   *     Courant Institute, Argonne National Lab, and Rice University
   *     March 31, 1993
   *
   *     .. Scalar Arguments ..*/
  /**     ..
   *     .. Array Arguments ..*/
#undef work_1
#define work_1(a1) work[a1-1]
#undef tau_1
#define tau_1(a1) tau[a1-1]
#undef a_2
#define a_2(a1,a2) a[a1-1+lda*(a2-1)]
  /**     ..
   *
   *  Purpose
   *  =======
   *
   *  DORGLQ generates an M-by-N real matrix Q with orthonormal rows,
   *  which is defined as the first M rows of a product of K elementary
   *  reflectors of order N
   *
   *        Q  =  H(k) . . . H(2) H(1)
   *
   *  as returned by DGELQF.
   *
   *  Arguments
   *  =========
   *
   *  M       (input) INTEGER
   *          The number of rows of the matrix Q. M >= 0.
   *
   *  N       (input) INTEGER
   *          The number of columns of the matrix Q. N >= M.
   *
   *  K       (input) INTEGER
   *          The number of elementary reflectors whose product defines the
   *          matrix Q. M >= K >= 0.
   *
   *  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
   *          On entry, the i-th row must contain the vector which defines
   *          the elementary reflector H(i), for i = 1,2,...,k, as returned
   *          by DGELQF in the first k rows of its array argument A.
   *          On exit, the M-by-N matrix Q.
   *
   *  LDA     (input) INTEGER
   *          The first dimension of the array A. LDA >= max(1,M).
   *
   *  TAU     (input) DOUBLE PRECISION array, dimension (K)
   *          TAU(i) must contain the scalar factor of the elementary
   *          reflector H(i), as returned by DGELQF.
   *
   *  WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK)
   *          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
   *
   *  LWORK   (input) INTEGER
   *          The dimension of the array WORK. LWORK >= max(1,M).
   *          For optimum performance LWORK >= M*NB, where NB is
   *          the optimal blocksize.
   *
   *  INFO    (output) INTEGER
   *          = 0:  successful exit
   *          < 0:  if INFO = -i, the i-th argument has an illegal value
   *
   *  =====================================================================
   *
   *     .. Parameters ..*/
#undef zero
#define zero 0.0e+0
  /**     ..
   *     .. Local Scalars ..*/
  long    i, ib, iinfo, iws, j, ki=0, kk, l, ldwork=0, nb,
          nbmin, nx;
  /**     ..
   *     .. Intrinsic Functions ..*/
  /*      intrinsic          max, min;*/
  /**     ..
   *     .. Executable Statements ..
   *
   *     Test the input arguments
   **/
  /*-----implicit-declarations-----*/
  /*-----end-of-declarations-----*/
  *info = 0;
  if( m<0 ) {
    *info = -1;
  } else if( n<m ) {
    *info = -2;
  } else if( k<0 || k>m ) {
    *info = -3;
  } else if( lda<max( 1, m ) ) {
    *info = -5;
  } else if( lwork<max( 1, m ) ) {
    *info = -8;
  }
  if( *info!=0 ) {
    xerbla( "dorglq", -*info );
    return;
  }
  /**
   *     Quick return if possible
   **/
  if( m<=0 ) {
    work_1( 1 ) = 1;
    return;
  }
  /**
   *     Determine the block size.
   **/
  nb = ilaenv( 1, "dorglq", " ", m, n, k, -1 );
  nbmin = 2;
  nx = 0;
  iws = m;
  if( nb>1 && nb<k ) {
    /**
     *        Determine when to cross over from blocked to unblocked code.
     **/
    nx = max( 0, ilaenv( 3, "dorglq", " ", m, n, k, -1 ) );
    if( nx<k ) {
      /**
       *           Determine if workspace is large enough for blocked code.
       **/
      ldwork = m;
      iws = ldwork*nb;
      if( lwork<iws ) {
	/**
	 *              Not enough workspace to use optimal NB:  reduce NB and
	 *              determine the minimum value of NB.
	 **/
	nb = lwork / ldwork;
	nbmin = max( 2, ilaenv( 2, "dorglq", " ", m, n, k, -1 ) );
      }
    }
  }

  if( nb>=nbmin && nb<k && nx<k ) {
    /**
     *        Use blocked code after the last block.
     *        The first kk rows are handled by the block method.
     **/
    ki = ( ( k-nx-1 ) / nb )*nb;
    kk = min( k, ki+nb );
    /**
     *        Set A(kk+1:m,1:kk) to zero.
     **/
    for (j=1 ; j<=kk ; j+=1) {
      for (i=kk + 1 ; i<=m ; i+=1) {
	a_2( i, j ) = zero;
      }
    }
  } else {
    kk = 0;
  }
  /**
   *     Use unblocked code for the last or only block.
   **/
  if( kk<m )
    dorgl2( m-kk, n-kk, k-kk, &a_2( kk+1, kk+1 ), lda,
	   &tau_1( kk+1 ), work, &iinfo );

  if( kk>0 ) {
    /**
     *        Use blocked code
     **/
    for (i=ki + 1 ; -nb>0?i<=1:i>=1 ; i+=-nb) {
      ib = min( nb, k-i+1 );
      if( i+ib<=m ) {
	/**
	 *              Form the triangular factor of the block reflector
	 *              H = H(i) H(i+1) . . . H(i+ib-1)
	 **/
	dlarft( 'f'/*orward*/, 'r'/*owwise*/, n-i+1, ib, &a_2( i, i ),
	       lda, &tau_1( i ), work, ldwork );
	/**
	 *              Apply H' to A(i+ib:m,i:n) from the right
	 **/
	dlarfb( 'r'/*ight*/, 't'/*ranspose*/, 'f'/*orward*/, 'r'/*owwise*/,
	       m-i-ib+1, n-i+1, ib, &a_2( i, i ), lda, work,
	       ldwork, &a_2( i+ib, i ), lda, &work_1( ib+1 ),
	       ldwork );
      }
      /**
       *           Apply H' to columns i:n of current block
       **/
      dorgl2( ib, n-i+1, ib, &a_2( i, i ), lda, &tau_1( i ), work,
	     &iinfo );
      /**
       *           Set columns 1:i-1 of current block to zero
       **/
      for (j=1 ; j<=i - 1 ; j+=1) {
	for (l=i ; l<=i + ib - 1 ; l+=1) {
	  a_2( l, j ) = zero;
	}
      }
    }
  }

  work_1( 1 ) = iws;
  return;
  /**
   *     End of DORGLQ
   **/
}



void dorgl2( long m, long n, long k, double a[], long lda, double tau[],
	    double work[], long *info )
{
  /**
   *  -- LAPACK routine (version 1.1) --
   *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
   *     Courant Institute, Argonne National Lab, and Rice University
   *     February 29, 1992
   *
   *     .. Scalar Arguments ..*/
  /**     ..
   *     .. Array Arguments ..*/
#undef work_1
#define work_1(a1) work[a1-1]
#undef tau_1
#define tau_1(a1) tau[a1-1]
#undef a_2
#define a_2(a1,a2) a[a1-1+lda*(a2-1)]
  /**     ..
   *
   *  Purpose
   *  =======
   *
   *  DORGL2 generates an m by n real matrix Q with orthonormal rows,
   *  which is defined as the first m rows of a product of k elementary
   *  reflectors of order n
   *
   *        Q  =  H(k) . . . H(2) H(1)
   *
   *  as returned by DGELQF.
   *
   *  Arguments
   *  =========
   *
   *  M       (input) INTEGER
   *          The number of rows of the matrix Q. M >= 0.
   *
   *  N       (input) INTEGER
   *          The number of columns of the matrix Q. N >= M.
   *
   *  K       (input) INTEGER
   *          The number of elementary reflectors whose product defines the
   *          matrix Q. M >= K >= 0.
   *
   *  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
   *          On entry, the i-th row must contain the vector which defines
   *          the elementary reflector H(i), for i = 1,2,...,k, as returned
   *          by DGELQF in the first k rows of its array argument A.
   *          On exit, the m-by-n matrix Q.
   *
   *  LDA     (input) INTEGER
   *          The first dimension of the array A. LDA >= max(1,M).
   *
   *  TAU     (input) DOUBLE PRECISION array, dimension (K)
   *          TAU(i) must contain the scalar factor of the elementary
   *          reflector H(i), as returned by DGELQF.
   *
   *  WORK    (workspace) DOUBLE PRECISION array, dimension (M)
   *
   *  INFO    (output) INTEGER
   *          = 0: successful exit
   *          < 0: if INFO = -i, the i-th argument has an illegal value
   *
   *  =====================================================================
   *
   *     .. Parameters ..*/
#undef one
#define one 1.0e+0
#undef zero
#define zero 0.0e+0
  /**     ..
   *     .. Local Scalars ..*/
  long            i, j, l;
  /**     ..
   *     .. Intrinsic Functions ..*/
  /*      intrinsic          max;*/
  /**     ..
   *     .. Executable Statements ..
   *
   *     Test the input arguments
   **/
  /*-----implicit-declarations-----*/
  /*-----end-of-declarations-----*/
  *info = 0;
  if( m<0 ) {
    *info = -1;
  } else if( n<m ) {
    *info = -2;
  } else if( k<0 || k>m ) {
    *info = -3;
  } else if( lda<max( 1, m ) ) {
    *info = -5;
  }
  if( *info!=0 ) {
    xerbla( "dorgl2", -*info );
    return;
  }
  /**
   *     Quick return if possible
   **/
  if( m<=0 )
    return;

  if( k<m ) {
    /**
     *        Initialise rows k+1:m to rows of the unit matrix
     **/
    for (j=1 ; j<=n ; j+=1) {
      for (l=k + 1 ; l<=m ; l+=1) {
	a_2( l, j ) = zero;
      }
      if( j>k && j<=m )
	a_2( j, j ) = one;
    }
  }

  for (i=k ; i>=1 ; i+=-1) {
    /**
     *        Apply H(i) to A(i:m,i:n) from the right
     **/
    if( i<n ) {
      if( i<m ) {
	a_2( i, i ) = one;
	dlarf( 'r'/*ight*/, m-i, n-i+1, &a_2( i, i ), lda,
	      tau_1( i ), &a_2( i+1, i ), lda, work );
      }
      dscal( n-i, -tau_1( i ), &a_2( i, i+1 ), lda );
    }
    a_2( i, i ) = one - tau_1( i );
    /**
     *        Set A(1:i-1,i) to zero
     **/
    for (l=1 ; l<=i - 1 ; l+=1) {
      a_2( i, l ) = zero;
    }
  }
  return;
  /**
   *     End of DORGL2
   **/
}



void dorgqr( long m, long n, long k, double a[], long lda,
	    double tau[], double work[], long lwork, long *info )
{
  /**
   *  -- LAPACK routine (version 1.1) --
   *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
   *     Courant Institute, Argonne National Lab, and Rice University
   *     March 31, 1993
   *
   *     .. Scalar Arguments ..*/
  /**     ..
   *     .. Array Arguments ..*/
#undef work_1
#define work_1(a1) work[a1-1]
#undef tau_1
#define tau_1(a1) tau[a1-1]
#undef a_2
#define a_2(a1,a2) a[a1-1+lda*(a2-1)]
  /**     ..
   *
   *  Purpose
   *  =======
   *
   *  DORGQR generates an M-by-N real matrix Q with orthonormal columns,
   *  which is defined as the first N columns of a product of K elementary
   *  reflectors of order M
   *
   *        Q  =  H(1) H(2) . . . H(k)
   *
   *  as returned by DGEQRF.
   *
   *  Arguments
   *  =========
   *
   *  M       (input) INTEGER
   *          The number of rows of the matrix Q. M >= 0.
   *
   *  N       (input) INTEGER
   *          The number of columns of the matrix Q. M >= N >= 0.
   *
   *  K       (input) INTEGER
   *          The number of elementary reflectors whose product defines the
   *          matrix Q. N >= K >= 0.
   *
   *  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
   *          On entry, the i-th column must contain the vector which
   *          defines the elementary reflector H(i), for i = 1,2,...,k, as
   *          returned by DGEQRF in the first k columns of its array
   *          argument A.
   *          On exit, the M-by-N matrix Q.
   *
   *  LDA     (input) INTEGER
   *          The first dimension of the array A. LDA >= max(1,M).
   *
   *  TAU     (input) DOUBLE PRECISION array, dimension (K)
   *          TAU(i) must contain the scalar factor of the elementary
   *          reflector H(i), as returned by DGEQRF.
   *
   *  WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK)
   *          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
   *
   *  LWORK   (input) INTEGER
   *          The dimension of the array WORK. LWORK >= max(1,N).
   *          For optimum performance LWORK >= N*NB, where NB is the
   *          optimal blocksize.
   *
   *  INFO    (output) INTEGER
   *          = 0:  successful exit
   *          < 0:  if INFO = -i, the i-th argument has an illegal value
   *
   *  =====================================================================
   *
   *     .. Parameters ..*/
#undef zero
#define zero 0.0e+0
  /**     ..
   *     .. Local Scalars ..*/
  long     i, ib, iinfo, iws, j, ki=0, kk, l, ldwork=0, nb,
           nbmin, nx;
  /**     ..
   *     .. Intrinsic Functions ..*/
  /*      intrinsic          max, min;*/
  /**     ..
   *     .. Executable Statements ..
   *
   *     Test the input arguments
   **/
  /*-----implicit-declarations-----*/
  /*-----end-of-declarations-----*/
  *info = 0;
  if( m<0 ) {
    *info = -1;
  } else if( n<0 || n>m ) {
    *info = -2;
  } else if( k<0 || k>n ) {
    *info = -3;
  } else if( lda<max( 1, m ) ) {
    *info = -5;
  } else if( lwork<max( 1, n ) ) {
    *info = -8;
  }
  if( *info!=0 ) {
    xerbla( "dorgqr", -*info );
    return;
  }
  /**
   *     Quick return if possible
   **/
  if( n<=0 ) {
    work_1( 1 ) = 1;
    return;
  }
  /**
   *     Determine the block size.
   **/
  nb = ilaenv( 1, "dorgqr", " ", m, n, k, -1 );
  nbmin = 2;
  nx = 0;
  iws = n;
  if( nb>1 && nb<k ) {
    /**
     *        Determine when to cross over from blocked to unblocked code.
     **/
    nx = max( 0, ilaenv( 3, "dorgqr", " ", m, n, k, -1 ) );
    if( nx<k ) {
      /**
       *           Determine if workspace is large enough for blocked code.
       **/
      ldwork = n;
      iws = ldwork*nb;
      if( lwork<iws ) {
	/**
	 *              Not enough workspace to use optimal NB:  reduce NB and
	 *              determine the minimum value of NB.
	 **/
	nb = lwork / ldwork;
	nbmin = max( 2, ilaenv( 2, "dorgqr", " ", m, n, k, -1 ) );
      }
    }
  }

  if( nb>=nbmin && nb<k && nx<k ) {
    /**
     *        Use blocked code after the last block.
     *        The first kk columns are handled by the block method.
     **/
    ki = ( ( k-nx-1 ) / nb )*nb;
    kk = min( k, ki+nb );
    /**
     *        Set A(1:kk,kk+1:n) to zero.
     **/
    for (j=kk + 1 ; j<=n ; j+=1) {
      for (i=1 ; i<=kk ; i+=1) {
	a_2( i, j ) = zero;
      }
    }
  } else {
    kk = 0;
  }
  /**
   *     Use unblocked code for the last or only block.
   **/
  if( kk<n )
    dorg2r( m-kk, n-kk, k-kk, &a_2( kk+1, kk+1 ), lda,
	   &tau_1( kk+1 ), work, &iinfo );

  if( kk>0 ) {
    /**
     *        Use blocked code
     **/
    for (i=ki + 1 ; -nb>0?i<=1:i>=1 ; i+=-nb) {
      ib = min( nb, k-i+1 );
      if( i+ib<=n ) {
	/**
	 *              Form the triangular factor of the block reflector
	 *              H = H(i) H(i+1) . . . H(i+ib-1)
	 **/
	dlarft( 'f'/*orward*/, 'c'/*olumnwise*/, m-i+1, ib,
	       &a_2( i, i ), lda, &tau_1( i ), work, ldwork );
	/**
	 *              Apply H to A(i:m,i+ib:n) from the left
	 **/
	dlarfb( 'l'/*eft*/, 'n'/*o transpose*/, 'f'/*orward*/,
	       'c'/*olumnwise*/, m-i+1, n-i-ib+1, ib,
	       &a_2( i, i ), lda, work, ldwork, &a_2( i, i+ib ),
	       lda, &work_1( ib+1 ), ldwork );
      }
      /**
       *           Apply H to rows i:m of current block
       **/
      dorg2r( m-i+1, ib, ib, &a_2( i, i ), lda, &tau_1( i ), work,
	     &iinfo );
      /**
       *           Set rows 1:i-1 of current block to zero
       **/
      for (j=i ; j<=i + ib - 1 ; j+=1) {
	for (l=1 ; l<=i - 1 ; l+=1) {
	  a_2( l, j ) = zero;
	}
      }
    }
  }

  work_1( 1 ) = iws;
  return;
  /**
   *     End of DORGQR
   **/
}



void dorg2r( long m, long n, long k, double a[], long lda,
	    double tau[], double work[], long *info )
{
  /**
   *  -- LAPACK routine (version 1.1) --
   *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
   *     Courant Institute, Argonne National Lab, and Rice University
   *     February 29, 1992
   *
   *     .. Scalar Arguments ..*/
  /**     ..
   *     .. Array Arguments ..*/
#undef work_1
#define work_1(a1) work[a1-1]
#undef tau_1
#define tau_1(a1) tau[a1-1]
#undef a_2
#define a_2(a1,a2) a[a1-1+lda*(a2-1)]
  /**     ..
   *
   *  Purpose
   *  =======
   *
   *  DORG2R generates an m by n real matrix Q with orthonormal columns,
   *  which is defined as the first n columns of a product of k elementary
   *  reflectors of order m
   *
   *        Q  =  H(1) H(2) . . . H(k)
   *
   *  as returned by DGEQRF.
   *
   *  Arguments
   *  =========
   *
   *  M       (input) INTEGER
   *          The number of rows of the matrix Q. M >= 0.
   *
   *  N       (input) INTEGER
   *          The number of columns of the matrix Q. M >= N >= 0.
   *
   *  K       (input) INTEGER
   *          The number of elementary reflectors whose product defines the
   *          matrix Q. N >= K >= 0.
   *
   *  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
   *          On entry, the i-th column must contain the vector which
   *          defines the elementary reflector H(i), for i = 1,2,...,k, as
   *          returned by DGEQRF in the first k columns of its array
   *          argument A.
   *          On exit, the m-by-n matrix Q.
   *
   *  LDA     (input) INTEGER
   *          The first dimension of the array A. LDA >= max(1,M).
   *
   *  TAU     (input) DOUBLE PRECISION array, dimension (K)
   *          TAU(i) must contain the scalar factor of the elementary
   *          reflector H(i), as returned by DGEQRF.
   *
   *  WORK    (workspace) DOUBLE PRECISION array, dimension (N)
   *
   *  INFO    (output) INTEGER
   *          = 0: successful exit
   *          < 0: if INFO = -i, the i-th argument has an illegal value
   *
   *  =====================================================================
   *
   *     .. Parameters ..*/
#undef one
#define one 1.0e+0
#undef zero
#define zero 0.0e+0
  /**     ..
   *     .. Local Scalars ..*/
  long            i, j, l;
  /**     ..
   *     .. Intrinsic Functions ..*/
  /*      intrinsic          max;*/
  /**     ..
   *     .. Executable Statements ..
   *
   *     Test the input arguments
   **/
  /*-----implicit-declarations-----*/
  /*-----end-of-declarations-----*/
  *info = 0;
  if( m<0 ) {
    *info = -1;
  } else if( n<0 || n>m ) {
    *info = -2;
  } else if( k<0 || k>n ) {
    *info = -3;
  } else if( lda<max( 1, m ) ) {
    *info = -5;
  }
  if( *info!=0 ) {
    xerbla( "dorg2r", -*info );
    return;
  }
  /**
   *     Quick return if possible
   **/
  if( n<=0 )
    return;
  /**
   *     Initialise columns k+1:n to columns of the unit matrix
   **/
  for (j=k + 1 ; j<=n ; j+=1) {
    for (l=1 ; l<=m ; l+=1) {
      a_2( l, j ) = zero;
    }
    a_2( j, j ) = one;
  }

  for (i=k ; i>=1 ; i+=-1) {
    /**
     *        Apply H(i) to A(i:m,i:n) from the left
     **/
    if( i<n ) {
      a_2( i, i ) = one;
      dlarf( 'l'/*eft*/, m-i+1, n-i, &a_2( i, i ), 1, tau_1( i ),
	    &a_2( i, i+1 ), lda, work );
    }
    if( i<m )
      dscal( m-i, -tau_1( i ), &a_2( i+1, i ), 1 );
    a_2( i, i ) = one - tau_1( i );
    /**
     *        Set A(1:i-1,i) to zero
     **/
    for (l=1 ; l<=i - 1 ; l+=1) {
      a_2( l, i ) = zero;
    }
  }
  return;
  /**
   *     End of DORG2R
   **/
}



void dormbr( char vect, char side, char trans, long m, long n, long k,
	    double a[], long lda, double tau[], double c[],long ldc,
	    double work[], long lwork, long *info )
{
  /**
   *  -- LAPACK routine (version 1.1) --
   *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
   *     Courant Institute, Argonne National Lab, and Rice University
   *     March 31, 1993
   *
   *     .. Scalar Arguments ..*/
  /**     ..
   *     .. Array Arguments ..*/
#undef work_1
#define work_1(a1) work[a1-1]
#undef tau_1
#define tau_1(a1) tau[a1-1]
#undef c_2
#define c_2(a1,a2) c[a1-1+ldc*(a2-1)]
#undef a_2
#define a_2(a1,a2) a[a1-1+lda*(a2-1)]
  /**     ..
   *
   *  Purpose
   *  =======
   *
   *  If VECT = 'Q', DORMBR overwrites the general real M-by-N matrix C
   *  with
   *                  SIDE = 'L'     SIDE = 'R'
   *  TRANS = 'N':      Q * C          C * Q
   *  TRANS = 'T':      Q**T * C       C * Q**T
   *
   *  If VECT = 'P', DORMBR overwrites the general real M-by-N matrix C
   *  with
   *                  SIDE = 'L'     SIDE = 'R'
   *  TRANS = 'N':      P * C          C * P
   *  TRANS = 'T':      P**T * C       C * P**T
   *
   *  Here Q and P**T are the orthogonal matrices determined by DGEBRD when
   *  reducing a real matrix A to bidiagonal form: A = Q * B * P**T. Q and
   *  P**T are defined as products of elementary reflectors H(i) and G(i)
   *  respectively.
   *
   *  Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the
   *  order of the orthogonal matrix Q or P**T that is applied.
   *
   *  If VECT = 'Q', A is assumed to have been an NQ-by-K matrix:
   *  if nq >= k, Q = H(1) H(2) . . . H(k);
   *  if nq < k, Q = H(1) H(2) . . . H(nq-1).
   *
   *  If VECT = 'P', A is assumed to have been a K-by-NQ matrix:
   *  if k < nq, P = G(1) G(2) . . . G(k);
   *  if k >= nq, P = G(1) G(2) . . . G(nq-1).
   *
   *  Arguments
   *  =========
   *
   *  VECT    (input) CHARACTER*1
   *          = 'Q': apply Q or Q**T;
   *          = 'P': apply P or P**T.
   *
   *  SIDE    (input) CHARACTER*1
   *          = 'L': apply Q, Q**T, P or P**T from the Left;
   *          = 'R': apply Q, Q**T, P or P**T from the Right.
   *
   *  TRANS   (input) CHARACTER*1
   *          = 'N':  No transpose, apply Q  or P;
   *          = 'T':  Transpose, apply Q**T or P**T.
   *
   *  M       (input) INTEGER
   *          The number of rows of the matrix C. M >= 0.
   *
   *  N       (input) INTEGER
   *          The number of columns of the matrix C. N >= 0.
   *
   *  K       (input) INTEGER
   *          K >= 0.
   *          If VECT = 'Q', the number of columns in the original
   *          matrix reduced by DGEBRD.
   *          If VECT = 'P', the number of rows in the original
   *          matrix reduced by DGEBRD.
   *
   *  A       (input) DOUBLE PRECISION array, dimension
   *                                (LDA,min(nq,K)) if VECT = 'Q'
   *                                (LDA,nq)        if VECT = 'P'
   *          The vectors which define the elementary reflectors H(i) and
   *          G(i), whose products determine the matrices Q and P, as
   *          returned by DGEBRD.
   *
   *  LDA     (input) INTEGER
   *          The leading dimension of the array A.
   *          If VECT = 'Q', LDA >= max(1,nq);
   *          if VECT = 'P', LDA >= max(1,min(nq,K)).
   *
   *  TAU     (input) DOUBLE PRECISION array, dimension (min(nq,K))
   *          TAU(i) must contain the scalar factor of the elementary
   *          reflector H(i) or G(i) which determines Q or P, as returned
   *          by DGEBRD in the array argument TAUQ or TAUP.
   *
   *  C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
   *          On entry, the M-by-N matrix C.
   *          On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q
   *          or P*C or P**T*C or C*P or C*P**T.
   *
   *  LDC     (input) INTEGER
   *          The leading dimension of the array C. LDC >= max(1,M).
   *
   *  WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK)
   *          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
   *
   *  LWORK   (input) INTEGER
   *          The dimension of the array WORK.
   *          If SIDE = 'L', LWORK >= max(1,N);
   *          if SIDE = 'R', LWORK >= max(1,M).
   *          For optimum performance LWORK >= N*NB if SIDE = 'L', and
   *          LWORK >= M*NB if SIDE = 'R', where NB is the optimal
   *          blocksize.
   *
   *  INFO    (output) INTEGER
   *          = 0:  successful exit
   *          < 0:  if INFO = -i, the i-th argument had an illegal value
   *
   *  =====================================================================
   *
   *     .. Local Scalars ..*/
  int            applyq, left, notran;
  char          transt;
  long            i1, i2, iinfo, mi, ni, nq, nw;
  /**     ..
   *     .. Intrinsic Functions ..*/
  /*      intrinsic          max, min;*/
  /**     ..
   *     .. Executable Statements ..
   *
   *     Test the input arguments
   **/
  /*-----implicit-declarations-----*/
  /*-----end-of-declarations-----*/
  *info = 0;
  applyq = lsame( vect, 'q' );
  left = lsame( side, 'l' );
  notran = lsame( trans, 'n' );
  /**
   *     NQ is the order of Q or P and NW is the minimum dimension of WORK
   **/
  if( left ) {
    nq = m;
    nw = n;
  } else {
    nq = n;
    nw = m;
  }
  if( !applyq && !lsame( vect, 'p' ) ) {
    *info = -1;
  } else if( !left && !lsame( side, 'r' ) ) {
    *info = -2;
  } else if( !notran && !lsame( trans, 't' ) ) {
    *info = -3;
  } else if( m<0 ) {
    *info = -4;
  } else if( n<0 ) {
    *info = -5;
  } else if( k<0 ) {
    *info = -6;
  } else if( ( applyq && lda<max( 1, nq ) ) ||
	    ( !applyq && lda<max( 1, min( nq, k ) ) ) )
    {
      *info = -8;
    } else if( ldc<max( 1, m ) ) {
      *info = -11;
    } else if( lwork<max( 1, nw ) ) {
      *info = -13;
    }
  if( *info!=0 ) {
    xerbla( "dormbr", -*info );
    return;
  }
  /**
   *     Quick return if possible
   **/
  if( m==0 || n==0 ) {
    work_1( 1 ) = 1;
    return;
  }

  if( applyq ) {
    /**
     *        Apply Q
     **/
    if( nq>=k ) {
      /**
       *           Q was determined by a call to DGEBRD with nq >= k
       **/
      dormqr( side, trans, m, n, k, a, lda, tau, c, ldc,
	     work, lwork, &iinfo );
    } else {
      /**
       *           Q was determined by a call to DGEBRD with nq < k
       **/
      if( left ) {
	mi = m - 1;
	ni = n;
	i1 = 2;
	i2 = 1;
      } else {
	mi = m;
	ni = n - 1;
	i1 = 1;
	i2 = 2;
      }
      dormqr( side, trans, mi, ni, nq-1, &a_2( 2, 1 ), lda, tau,
	     &c_2( i1, i2 ), ldc, work, lwork, &iinfo );
    }
  } else {
    /**
     *        Apply P
     **/
    if( notran ) {
      transt = 't';
    } else {
      transt = 'n';
    }
    if( nq>k ) {
      /**
       *           P was determined by a call to DGEBRD with nq > k
       **/
      dormlq( side, transt, m, n, k, a, lda, tau, c, ldc,
	     work, lwork, &iinfo );
    } else {
      /**
       *           P was determined by a call to DGEBRD with nq <= k
       **/
      if( left ) {
	mi = m - 1;
	ni = n;
	i1 = 2;
	i2 = 1;
      } else {
	mi = m;
	ni = n - 1;
	i1 = 1;
	i2 = 2;
      }
      dormlq( side, transt, mi, ni, nq-1, &a_2( 1, 2 ), lda,
	     tau, &c_2( i1, i2 ), ldc, work, lwork, &iinfo );
    }
  }
  return;
  /**
   *     End of DORMBR
   **/
}



void dgebrd( long m, long n, double a[], long lda,
	    double d[], double e[], double tauq[], double taup[],
	    double work[], long lwork,long *info )
{
  /**
   *  -- LAPACK routine (version 1.1) --
   *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
   *     Courant Institute, Argonne National Lab, and Rice University
   *     March 31, 1993
   *
   *     .. Scalar Arguments ..*/
  /**     ..
   *     .. Array Arguments ..*/
#undef work_1
#define work_1(a1) work[a1-1]
#undef tauq_1
#define tauq_1(a1) tauq[a1-1]
#undef taup_1
#define taup_1(a1) taup[a1-1]
#undef e_1
#define e_1(a1) e[a1-1]
#undef d_1
#define d_1(a1) d[a1-1]
#undef a_2
#define a_2(a1,a2) a[a1-1+lda*(a2-1)]
  /**     ..
   *
   *  Purpose
   *  =======
   *
   *  DGEBRD reduces a general real M-by-N matrix A to upper or lower
   *  bidiagonal form B by an orthogonal transformation: Q**T * A * P = B.
   *
   *  If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
   *
   *  Arguments
   *  =========
   *
   *  M       (input) INTEGER
   *          The number of rows in the matrix A.  M >= 0.
   *
   *  N       (input) INTEGER
   *          The number of columns in the matrix A.  N >= 0.
   *
   *  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
   *          On entry, the M-by-N general matrix to be reduced.
   *          On exit,
   *          if m >= n, the diagonal and the first superdiagonal are
   *            overwritten with the upper bidiagonal matrix B; the
   *            elements below the diagonal, with the array TAUQ, represent
   *            the orthogonal matrix Q as a product of elementary
   *            reflectors, and the elements above the first superdiagonal,
   *            with the array TAUP, represent the orthogonal matrix P as
   *            a product of elementary reflectors;
   *          if m < n, the diagonal and the first subdiagonal are
   *            overwritten with the lower bidiagonal matrix B; the
   *            elements below the first subdiagonal, with the array TAUQ,
   *            represent the orthogonal matrix Q as a product of
   *            elementary reflectors, and the elements above the diagonal,
   *            with the array TAUP, represent the orthogonal matrix P as
   *            a product of elementary reflectors.
   *          See Further Details.
   *
   *  LDA     (input) INTEGER
   *          The leading dimension of the array A.  LDA >= max(1,M).
   *
   *  D       (output) DOUBLE PRECISION array, dimension (min(M,N))
   *          The diagonal elements of the bidiagonal matrix B:
   *          D(i) = A(i,i).
   *
   *  E       (output) DOUBLE PRECISION array, dimension (min(M,N)-1)
   *          The off-diagonal elements of the bidiagonal matrix B:
   *          if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
   *          if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
   *
   *  TAUQ    (output) DOUBLE PRECISION array dimension (min(M,N))
   *          The scalar factors of the elementary reflectors which
   *          represent the orthogonal matrix Q. See Further Details.
   *
   *  TAUP    (output) DOUBLE PRECISION array, dimension (min(M,N))
   *          The scalar factors of the elementary reflectors which
   *          represent the orthogonal matrix P. See Further Details.
   *
   *  WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK)
   *          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
   *
   *  LWORK   (input) INTEGER
   *          The length of the array WORK.  LWORK >= max(1,M,N).
   *          For optimum performance LWORK >= (M+N)*NB, where NB
   *          is the optimal blocksize.
   *
   *  INFO    (output) INTEGER
   *          = 0:  successful exit
   *          < 0:  if INFO = -i, the i-th argument had an illegal value.
   *
   *  Further Details
   *  ===============
   *
   *  The matrices Q and P are represented as products of elementary
   *  reflectors:
   *
   *  If m >= n,
   *
   *     Q = H(1) H(2) . . . H(n)  and  P = G(1) G(2) . . . G(n-1)
   *
   *  Each H(i) and G(i) has the form:
   *
   *     H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
   *
   *  where tauq and taup are real scalars, and v and u are real vectors;
   *  v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i);
   *  u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n);
   *  tauq is stored in TAUQ(i) and taup in TAUP(i).
   *
   *  If m < n,
   *
   *     Q = H(1) H(2) . . . H(m-1)  and  P = G(1) G(2) . . . G(m)
   *
   *  Each H(i) and G(i) has the form:
   *
   *     H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
   *
   *  where tauq and taup are real scalars, and v and u are real vectors;
   *  v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i);
   *  u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n);
   *  tauq is stored in TAUQ(i) and taup in TAUP(i).
   *
   *  The contents of A on exit are illustrated by the following examples:
   *
   *  m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
   *
   *    (  d   e   u1  u1  u1 )           (  d   u1  u1  u1  u1  u1 )
   *    (  v1  d   e   u2  u2 )           (  e   d   u2  u2  u2  u2 )
   *    (  v1  v2  d   e   u3 )           (  v1  e   d   u3  u3  u3 )
   *    (  v1  v2  v3  d   e  )           (  v1  v2  e   d   u4  u4 )
   *    (  v1  v2  v3  v4  d  )           (  v1  v2  v3  e   d   u5 )
   *    (  v1  v2  v3  v4  v5 )
   *
   *  where d and e denote diagonal and off-diagonal elements of B, vi
   *  denotes an element of the vector defining H(i), and ui an element of
   *  the vector defining G(i).
   *
   *  =====================================================================
   *
   *     .. Parameters ..*/
#undef one
#define one 1.0e+0
  /**     ..
   *     .. Local Scalars ..*/
  long            i, iinfo, j, ldwrkx, ldwrky, minmn, nb, nbmin,
  nx;
  double    ws;
  /**     ..
   *     .. Intrinsic Functions ..*/
  /*      intrinsic          max, min;*/
  /**     ..
   *     .. Executable Statements ..
   *
   *     Test the input parameters
   **/
  /*-----implicit-declarations-----*/
  /*-----end-of-declarations-----*/
  *info = 0;
  if( m<0 ) {
    *info = -1;
  } else if( n<0 ) {
    *info = -2;
  } else if( lda<max( 1, m ) ) {
    *info = -4;
  } else if( lwork<max( 1, max(m, n) ) ) {
    *info = -10;
  }
  if( *info<0 ) {
    xerbla( "dgebrd", -*info );
    return;
  }
  /**
   *     Quick return if possible
   **/
  minmn = min( m, n );
  if( minmn==0 ) {
    work_1( 1 ) = 1;
    return;
  }

  ws = max( m, n );
  ldwrkx = m;
  ldwrky = n;
  /**
   *     Set the block size NB and the crossover point NX.
   **/
  nb = max( 1, ilaenv( 1, "dgebrd", " ", m, n, -1, -1 ) );

  if( nb>1 && nb<minmn ) {
    /**
     *        Determine when to switch from blocked to unblocked code.
     **/
    nx = max( nb, ilaenv( 3, "dgebrd", " ", m, n, -1, -1 ) );
    if( nx<minmn ) {
      ws = ( m+n )*nb;
      if( lwork<ws ) {
	/**
	 *              Not enough work space for the optimal NB, consider using
	 *              a smaller block size.
	 **/
	nbmin = ilaenv( 2, "dgebrd", " ", m, n, -1, -1 );
	if( lwork>=( m+n )*nbmin ) {
	  nb = lwork / ( m+n );
	} else {
	  nb = 1;
	  nx = minmn;
	}
      }
    }
  } else {
    nx = minmn;
  }

  for (i=1 ; nb>0?i<=minmn - nx:i>=minmn - nx ; i+=nb) {
    /**
     *        Reduce rows and columns i:i+nb-1 to bidiagonal form and return
     *        the matrices X and Y which are needed to update the unreduced
     *        part of the matrix
     **/
    dlabrd( m-i+1, n-i+1, nb, &a_2( i, i ), lda, &d_1( i ), &e_1( i ),
	   &tauq_1( i ), &taup_1( i ), work, ldwrkx,
	   &work_1( ldwrkx*nb+1 ), ldwrky );
    /**
     *        Update the trailing submatrix A(i+nb:m,i+nb:n), using an update
     *        of the form  A := A - V*Y' - X*U'
     **/
    dgemm( 'n'/*o transpose*/, 't'/*ranspose*/, m-i-nb+1, n-i-nb+1,
	  nb, -one, &a_2( i+nb, i ), lda,
	  &work_1( ldwrkx*nb+nb+1 ), ldwrky, one,
	  &a_2( i+nb, i+nb ), lda );
    dgemm( 'n'/*o transpose*/, 'n'/*o transpose*/, m-i-nb+1, n-i-nb+1,
	  nb, -one, &work_1( nb+1 ), ldwrkx, &a_2( i, i+nb ), lda,
	  one, &a_2( i+nb, i+nb ), lda );
    /**
     *        Copy diagonal and off-diagonal elements of B back into A
     **/
    if( m>=n ) {
      for (j=i ; j<=i + nb - 1 ; j+=1) {
	a_2( j, j ) = d_1( j );
	a_2( j, j+1 ) = e_1( j );
      }
    } else {
      for (j=i ; j<=i + nb - 1 ; j+=1) {
	a_2( j, j ) = d_1( j );
	a_2( j+1, j ) = e_1( j );
      }
    }
  }
  /**
   *     Use unblocked code to reduce the remainder of the matrix
   **/
  dgebd2( m-i+1, n-i+1, &a_2( i, i ), lda, &d_1( i ), &e_1( i ),
	 &tauq_1( i ), &taup_1( i ), work, &iinfo );
  work_1( 1 ) = ws;
  return;
  /**
   *     End of DGEBRD
   **/
}



void dgebd2( long m, long n, double a[], long lda,
	    double d[], double e[], double tauq[], double taup[],
	    double work[], long *info )
{
  /**
   *  -- LAPACK routine (version 1.1) --
   *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
   *     Courant Institute, Argonne National Lab, and Rice University
   *     February 29, 1992
   *
   *     .. Scalar Arguments ..*/
  /**     ..
   *     .. Array Arguments ..*/
#undef work_1
#define work_1(a1) work[a1-1]
#undef tauq_1
#define tauq_1(a1) tauq[a1-1]
#undef taup_1
#define taup_1(a1) taup[a1-1]
#undef e_1
#define e_1(a1) e[a1-1]
#undef d_1
#define d_1(a1) d[a1-1]
#undef a_2
#define a_2(a1,a2) a[a1-1+lda*(a2-1)]
  /**     ..
   *
   *  Purpose
   *  =======
   *
   *  DGEBD2 reduces a real general m by n matrix A to upper or lower
   *  bidiagonal form B by an orthogonal transformation: Q' * A * P = B.
   *
   *  If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
   *
   *  Arguments
   *  =========
   *
   *  M       (input) INTEGER
   *          The number of rows in the matrix A.  M >= 0.
   *
   *  N       (input) INTEGER
   *          The number of columns in the matrix A.  N >= 0.
   *
   *  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
   *          On entry, the m by n general matrix to be reduced.
   *          On exit,
   *          if m >= n, the diagonal and the first superdiagonal are
   *            overwritten with the upper bidiagonal matrix B; the
   *            elements below the diagonal, with the array TAUQ, represent
   *            the orthogonal matrix Q as a product of elementary
   *            reflectors, and the elements above the first superdiagonal,
   *            with the array TAUP, represent the orthogonal matrix P as
   *            a product of elementary reflectors;
   *          if m < n, the diagonal and the first subdiagonal are
   *            overwritten with the lower bidiagonal matrix B; the
   *            elements below the first subdiagonal, with the array TAUQ,
   *            represent the orthogonal matrix Q as a product of
   *            elementary reflectors, and the elements above the diagonal,
   *            with the array TAUP, represent the orthogonal matrix P as
   *            a product of elementary reflectors.
   *          See Further Details.
   *
   *  LDA     (input) INTEGER
   *          The leading dimension of the array A.  LDA >= max(1,M).
   *
   *  D       (output) DOUBLE PRECISION array, dimension (min(M,N))
   *          The diagonal elements of the bidiagonal matrix B:
   *          D(i) = A(i,i).
   *
   *  E       (output) DOUBLE PRECISION array, dimension (min(M,N)-1)
   *          The off-diagonal elements of the bidiagonal matrix B:
   *          if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
   *          if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
   *
   *  TAUQ    (output) DOUBLE PRECISION array dimension (min(M,N))
   *          The scalar factors of the elementary reflectors which
   *          represent the orthogonal matrix Q. See Further Details.
   *
   *  TAUP    (output) DOUBLE PRECISION array, dimension (min(M,N))
   *          The scalar factors of the elementary reflectors which
   *          represent the orthogonal matrix P. See Further Details.
   *
   *  WORK    (workspace) DOUBLE PRECISION array, dimension (max(M,N))
   *
   *  INFO    (output) INTEGER
   *          = 0: successful exit.
   *          < 0: if INFO = -i, the i-th argument had an illegal value.
   *
   *  Further Details
   *  ===============
   *
   *  The matrices Q and P are represented as products of elementary
   *  reflectors:
   *
   *  If m >= n,
   *
   *     Q = H(1) H(2) . . . H(n)  and  P = G(1) G(2) . . . G(n-1)
   *
   *  Each H(i) and G(i) has the form:
   *
   *     H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
   *
   *  where tauq and taup are real scalars, and v and u are real vectors;
   *  v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i);
   *  u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n);
   *  tauq is stored in TAUQ(i) and taup in TAUP(i).
   *
   *  If m < n,
   *
   *     Q = H(1) H(2) . . . H(m-1)  and  P = G(1) G(2) . . . G(m)
   *
   *  Each H(i) and G(i) has the form:
   *
   *     H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
   *
   *  where tauq and taup are real scalars, and v and u are real vectors;
   *  v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i);
   *  u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n);
   *  tauq is stored in TAUQ(i) and taup in TAUP(i).
   *
   *  The contents of A on exit are illustrated by the following examples:
   *
   *  m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
   *
   *    (  d   e   u1  u1  u1 )           (  d   u1  u1  u1  u1  u1 )
   *    (  v1  d   e   u2  u2 )           (  e   d   u2  u2  u2  u2 )
   *    (  v1  v2  d   e   u3 )           (  v1  e   d   u3  u3  u3 )
   *    (  v1  v2  v3  d   e  )           (  v1  v2  e   d   u4  u4 )
   *    (  v1  v2  v3  v4  d  )           (  v1  v2  v3  e   d   u5 )
   *    (  v1  v2  v3  v4  v5 )
   *
   *  where d and e denote diagonal and off-diagonal elements of B, vi
   *  denotes an element of the vector defining H(i), and ui an element of
   *  the vector defining G(i).
   *
   *  =====================================================================
   *
   *     .. Parameters ..*/
#undef zero
#define zero 0.0e+0
#undef one
#define one 1.0e+0
  /**     ..
   *     .. Local Scalars ..*/
  long            i;
  /**     ..
   *     .. Intrinsic Functions ..*/
  /*      intrinsic          max, min;*/
  /**     ..
   *     .. Executable Statements ..
   *
   *     Test the input parameters
   **/
  /*-----implicit-declarations-----*/
  /*-----end-of-declarations-----*/
  *info = 0;
  if( m<0 ) {
    *info = -1;
  } else if( n<0 ) {
    *info = -2;
  } else if( lda<max( 1, m ) ) {
    *info = -4;
  }
  if( *info<0 ) {
    xerbla( "dgebd2", -*info );
    return;
  }

  if( m>=n ) {
    /**
     *        Reduce to upper bidiagonal form
     **/
    for (i=1 ; i<=n ; i+=1) {
      /**
       *           Generate elementary reflector H(i) to annihilate A(i+1:m,i)
       **/
      dlarfg( m-i+1, &a_2( i, i ), &a_2( min( i+1, m ), i ), 1,
	     &tauq_1( i ) );
      d_1( i ) = a_2( i, i );
      a_2( i, i ) = one;
      /**
       *           Apply H(i) to A(i:m,i+1:n) from the left
       **/
      dlarf( 'l'/*eft*/, m-i+1, n-i, &a_2( i, i ), 1, tauq_1( i ),
	    &a_2( i, i+1 ), lda, work );
      a_2( i, i ) = d_1( i );

      if( i<n ) {
	/**
	 *              Generate elementary reflector G(i) to annihilate
	 *              A(i,i+2:n)
	 **/
	dlarfg( n-i, &a_2( i, i+1 ), &a_2( i, min( i+2, n ) ),
	       lda, &taup_1( i ) );
	e_1( i ) = a_2( i, i+1 );
	a_2( i, i+1 ) = one;
	/**
	 *              Apply G(i) to A(i+1:m,i+1:n) from the right
	 **/
	dlarf( 'r'/*ight*/, m-i, n-i, &a_2( i, i+1 ), lda,
	      taup_1( i ), &a_2( i+1, i+1 ), lda, work );
	a_2( i, i+1 ) = e_1( i );
      } else {
	taup_1( i ) = zero;
      }
    }
  } else {
    /**
     *        Reduce to lower bidiagonal form
     **/
    for (i=1 ; i<=m ; i+=1) {
      /**
       *           Generate elementary reflector G(i) to annihilate A(i,i+1:n)
       **/
      dlarfg( n-i+1, &a_2( i, i ), &a_2( i, min( i+1, n ) ), lda,
	     &taup_1( i ) );
      d_1( i ) = a_2( i, i );
      a_2( i, i ) = one;
      /**
       *           Apply G(i) to A(i+1:m,i:n) from the right
       **/
      dlarf( 'r'/*ight*/, m-i, n-i+1, &a_2( i, i ), lda, taup_1( i ),
	    &a_2( min( i+1, m ), i ), lda, work );
      a_2( i, i ) = d_1( i );

      if( i<m ) {
	/**
	 *              Generate elementary reflector H(i) to annihilate
	 *              A(i+2:m,i)
	 **/
	dlarfg( m-i, &a_2( i+1, i ), &a_2( min( i+2, m ), i ), 1,
	       &tauq_1( i ) );
	e_1( i ) = a_2( i+1, i );
	a_2( i+1, i ) = one;
	/**
	 *              Apply H(i) to A(i+1:m,i+1:n) from the left
	 **/
	dlarf( 'l'/*eft*/, m-i, n-i, &a_2( i+1, i ), 1, tauq_1( i ),
	      &a_2( i+1, i+1 ), lda, work );
	a_2( i+1, i ) = e_1( i );
      } else {
	tauq_1( i ) = zero;
      }
    }
  }
  return;
  /**
   *     End of DGEBD2
   **/
}



void dlabrd( long m, long n, long nb, double a[], long lda,
	    double d[], double e[], double tauq[], double taup[],
	    double x[], long ldx, double y[],long ldy )
{
  /**
   *  -- LAPACK auxiliary routine (version 1.1) --
   *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
   *     Courant Institute, Argonne National Lab, and Rice University
   *     February 29, 1992
   *
   *     .. Scalar Arguments ..*/
  /**     ..
   *     .. Array Arguments ..*/
#undef y_2
#define y_2(a1,a2) y[a1-1+ldy*(a2-1)]
#undef x_2
#define x_2(a1,a2) x[a1-1+ldx*(a2-1)]
#undef tauq_1
#define tauq_1(a1) tauq[a1-1]
#undef taup_1
#define taup_1(a1) taup[a1-1]
#undef e_1
#define e_1(a1) e[a1-1]
#undef d_1
#define d_1(a1) d[a1-1]
#undef a_2
#define a_2(a1,a2) a[a1-1+lda*(a2-1)]
  /**     ..
   *
   *  Purpose
   *  =======
   *
   *  DLABRD reduces the first NB rows and columns of a real general
   *  m by n matrix A to upper or lower bidiagonal form by an orthogonal
   *  transformation Q' * A * P, and returns the matrices X and Y which
   *  are needed to apply the transformation to the unreduced part of A.
   *
   *  If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower
   *  bidiagonal form.
   *
   *  This is an auxiliary routine called by DGEBRD
   *
   *  Arguments
   *  =========
   *
   *  M       (input) INTEGER
   *          The number of rows in the matrix A.
   *
   *  N       (input) INTEGER
   *          The number of columns in the matrix A.
   *
   *  NB      (input) INTEGER
   *          The number of leading rows and columns of A to be reduced.
   *
   *  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
   *          On entry, the m by n general matrix to be reduced.
   *          On exit, the first NB rows and columns of the matrix are
   *          overwritten; the rest of the array is unchanged.
   *          If m >= n, elements on and below the diagonal in the first NB
   *            columns, with the array TAUQ, represent the orthogonal
   *            matrix Q as a product of elementary reflectors; and
   *            elements above the diagonal in the first NB rows, with the
   *            array TAUP, represent the orthogonal matrix P as a product
   *            of elementary reflectors.
   *          If m < n, elements below the diagonal in the first NB
   *            columns, with the array TAUQ, represent the orthogonal
   *            matrix Q as a product of elementary reflectors, and
   *            elements on and above the diagonal in the first NB rows,
   *            with the array TAUP, represent the orthogonal matrix P as
   *            a product of elementary reflectors.
   *          See Further Details.
   *
   *  LDA     (input) INTEGER
   *          The leading dimension of the array A.  LDA >= max(1,M).
   *
   *  D       (output) DOUBLE PRECISION array, dimension (NB)
   *          The diagonal elements of the first NB rows and columns of
   *          the reduced matrix.  D(i) = A(i,i).
   *
   *  E       (output) DOUBLE PRECISION array, dimension (NB)
   *          The off-diagonal elements of the first NB rows and columns of
   *          the reduced matrix.
   *
   *  TAUQ    (output) DOUBLE PRECISION array dimension (NB)
   *          The scalar factors of the elementary reflectors which
   *          represent the orthogonal matrix Q. See Further Details.
   *
   *  TAUP    (output) DOUBLE PRECISION array, dimension (NB)
   *          The scalar factors of the elementary reflectors which
   *          represent the orthogonal matrix P. See Further Details.
   *
   *  X       (output) DOUBLE PRECISION array, dimension (LDX,NB)
   *          The m-by-nb matrix X required to update the unreduced part
   *          of A.
   *
   *  LDX     (input) INTEGER
   *          The leading dimension of the array X. LDX >= M.
   *
   *  Y       (output) DOUBLE PRECISION array, dimension (LDY,NB)
   *          The n-by-nb matrix Y required to update the unreduced part
   *          of A.
   *
   *  LDY     (output) INTEGER
   *          The leading dimension of the array Y. LDY >= N.
   *
   *  Further Details
   *  ===============
   *
   *  The matrices Q and P are represented as products of elementary
   *  reflectors:
   *
   *     Q = H(1) H(2) . . . H(nb)  and  P = G(1) G(2) . . . G(nb)
   *
   *  Each H(i) and G(i) has the form:
   *
   *     H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
   *
   *  where tauq and taup are real scalars, and v and u are real vectors.
   *
   *  If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in
   *  A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in
   *  A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
   *
   *  If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in
   *  A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in
   *  A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
   *
   *  The elements of the vectors v and u together form the m-by-nb matrix
   *  V and the nb-by-n matrix U' which are needed, with X and Y, to apply
   *  the transformation to the unreduced part of the matrix, using a block
   *  update of the form:  A := A - V*Y' - X*U'.
   *
   *  The contents of A on exit are illustrated by the following examples
   *  with nb = 2:
   *
   *  m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
   *
   *    (  1   1   u1  u1  u1 )           (  1   u1  u1  u1  u1  u1 )
   *    (  v1  1   1   u2  u2 )           (  1   1   u2  u2  u2  u2 )
   *    (  v1  v2  a   a   a  )           (  v1  1   a   a   a   a  )
   *    (  v1  v2  a   a   a  )           (  v1  v2  a   a   a   a  )
   *    (  v1  v2  a   a   a  )           (  v1  v2  a   a   a   a  )
   *    (  v1  v2  a   a   a  )
   *
   *  where a denotes an element of the original matrix which is unchanged,
   *  vi denotes an element of the vector defining H(i), and ui an element
   *  of the vector defining G(i).
   *
   *  =====================================================================
   *
   *     .. Parameters ..*/
#undef zero
#define zero 0.0e0
#undef one
#define one 1.0e0
  /**     ..
   *     .. Local Scalars ..*/
  long            i;
  /**     ..
   *     .. Intrinsic Functions ..*/
  /*      intrinsic          min;*/
  /**     ..
   *     .. Executable Statements ..
   *
   *     Quick return if possible
   **/
  /*-----implicit-declarations-----*/
  /*-----end-of-declarations-----*/
  if( m<=0 || n<=0 )
    return;

  if( m>=n ) {
    /**
     *        Reduce to upper bidiagonal form
     **/
    for (i=1 ; i<=nb ; i+=1) {
      /**
       *           Update A(i:m,i)
       **/
      dgemv( 'n'/*o transpose*/, m-i+1, i-1, -one, &a_2( i, 1 ),
	    lda, &y_2( i, 1 ), ldy, one, &a_2( i, i ), 1 );
      dgemv( 'n'/*o transpose*/, m-i+1, i-1, -one, &x_2( i, 1 ),
	    ldx, &a_2( 1, i ), 1, one, &a_2( i, i ), 1 );
      /**
       *           Generate reflection Q(i) to annihilate A(i+1:m,i)
       **/
      dlarfg( m-i+1, &a_2( i, i ), &a_2( min( i+1, m ), i ), 1,
	     &tauq_1( i ) );
      d_1( i ) = a_2( i, i );
      if( i<n ) {
	a_2( i, i ) = one;
	/**
	 *              Compute Y(i+1:n,i)
	 **/
	dgemv( 't'/*ranspose*/, m-i+1, n-i, one, &a_2( i, i+1 ),
	      lda, &a_2( i, i ), 1, zero, &y_2( i+1, i ), 1 );
	dgemv( 't'/*ranspose*/, m-i+1, i-1, one, &a_2( i, 1 ), lda,
	      &a_2( i, i ), 1, zero, &y_2( 1, i ), 1 );
	dgemv( 'n'/*o transpose*/, n-i, i-1, -one, &y_2( i+1, 1 ),
	      ldy, &y_2( 1, i ), 1, one, &y_2( i+1, i ), 1 );
	dgemv( 't'/*ranspose*/, m-i+1, i-1, one, &x_2( i, 1 ), ldx,
	      &a_2( i, i ), 1, zero, &y_2( 1, i ), 1 );
	dgemv( 't'/*ranspose*/, i-1, n-i, -one, &a_2( 1, i+1 ),
	      lda, &y_2( 1, i ), 1, one, &y_2( i+1, i ), 1 );
	dscal( n-i, tauq_1( i ), &y_2( i+1, i ), 1 );
	/**
	 *              Update A(i,i+1:n)
	 **/
	dgemv( 'n'/*o transpose*/, n-i, i, -one, &y_2( i+1, 1 ),
	      ldy, &a_2( i, 1 ), lda, one, &a_2( i, i+1 ), lda );
	dgemv( 't'/*ranspose*/, i-1, n-i, -one, &a_2( 1, i+1 ),
	      lda, &x_2( i, 1 ), ldx, one, &a_2( i, i+1 ), lda );
	/**
	 *              Generate reflection P(i) to annihilate A(i,i+2:n)
	 **/
	dlarfg( n-i, &a_2( i, i+1 ), &a_2( i, min( i+2, n ) ),
	       lda, &taup_1( i ) );
	e_1( i ) = a_2( i, i+1 );
	a_2( i, i+1 ) = one;
	/**
	 *              Compute X(i+1:m,i)
	 **/
	dgemv( 'n'/*o transpose*/, m-i, n-i, one, &a_2( i+1, i+1 ),
	      lda, &a_2( i, i+1 ), lda, zero, &x_2( i+1, i ), 1 );
	dgemv( 't'/*ranspose*/, n-i, i, one, &y_2( i+1, 1 ), ldy,
	      &a_2( i, i+1 ), lda, zero, &x_2( 1, i ), 1 );
	dgemv( 'n'/*o transpose*/, m-i, i, -one, &a_2( i+1, 1 ),
	      lda, &x_2( 1, i ), 1, one, &x_2( i+1, i ), 1 );
	dgemv( 'n'/*o transpose*/, i-1, n-i, one, &a_2( 1, i+1 ),
	      lda, &a_2( i, i+1 ), lda, zero, &x_2( 1, i ), 1 );
	dgemv( 'n'/*o transpose*/, m-i, i-1, -one, &x_2( i+1, 1 ),
	      ldx, &x_2( 1, i ), 1, one, &x_2( i+1, i ), 1 );
	dscal( m-i, taup_1( i ), &x_2( i+1, i ), 1 );
      }
    }
  } else {
    /**
     *        Reduce to lower bidiagonal form
     **/
    for (i=1 ; i<=nb ; i+=1) {
      /**
       *           Update A(i,i:n)
       **/
      dgemv( 'n'/*o transpose*/, n-i+1, i-1, -one, &y_2( i, 1 ),
	    ldy, &a_2( i, 1 ), lda, one, &a_2( i, i ), lda );
      dgemv( 't'/*ranspose*/, i-1, n-i+1, -one, &a_2( 1, i ), lda,
	    &x_2( i, 1 ), ldx, one, &a_2( i, i ), lda );
      /**
       *           Generate reflection P(i) to annihilate A(i,i+1:n)
       **/
      dlarfg( n-i+1, &a_2( i, i ), &a_2( i, min( i+1, n ) ), lda,
	     &taup_1( i ) );
      d_1( i ) = a_2( i, i );
      if( i<m ) {
	a_2( i, i ) = one;
	/**
	 *              Compute X(i+1:m,i)
	 **/
	dgemv( 'n'/*o transpose*/, m-i, n-i+1, one, &a_2( i+1, i ),
	      lda, &a_2( i, i ), lda, zero, &x_2( i+1, i ), 1 );
	dgemv( 't'/*ranspose*/, n-i+1, i-1, one, &y_2( i, 1 ), ldy,
	      &a_2( i, i ), lda, zero, &x_2( 1, i ), 1 );
	dgemv( 'n'/*o transpose*/, m-i, i-1, -one, &a_2( i+1, 1 ),
	      lda, &x_2( 1, i ), 1, one, &x_2( i+1, i ), 1 );
	dgemv( 'n'/*o transpose*/, i-1, n-i+1, one, &a_2( 1, i ),
	      lda, &a_2( i, i ), lda, zero, &x_2( 1, i ), 1 );
	dgemv( 'n'/*o transpose*/, m-i, i-1, -one, &x_2( i+1, 1 ),
	      ldx, &x_2( 1, i ), 1, one, &x_2( i+1, i ), 1 );
	dscal( m-i, taup_1( i ), &x_2( i+1, i ), 1 );
	/**
	 *              Update A(i+1:m,i)
	 **/
	dgemv( 'n'/*o transpose*/, m-i, i-1, -one, &a_2( i+1, 1 ),
	      lda, &y_2( i, 1 ), ldy, one, &a_2( i+1, i ), 1 );
	dgemv( 'n'/*o transpose*/, m-i, i, -one, &x_2( i+1, 1 ),
	      ldx, &a_2( 1, i ), 1, one, &a_2( i+1, i ), 1 );
	/**
	 *              Generate reflection Q(i) to annihilate A(i+2:m,i)
	 **/
	dlarfg( m-i, &a_2( i+1, i ), &a_2( min( i+2, m ), i ), 1,
	       &tauq_1( i ) );
	e_1( i ) = a_2( i+1, i );
	a_2( i+1, i ) = one;
	/**
	 *              Compute Y(i+1:n,i)
	 **/
	dgemv( 't'/*ranspose*/, m-i, n-i, one, &a_2( i+1, i+1 ),
	      lda, &a_2( i+1, i ), 1, zero, &y_2( i+1, i ), 1 );
	dgemv( 't'/*ranspose*/, m-i, i-1, one, &a_2( i+1, 1 ), lda,
	      &a_2( i+1, i ), 1, zero, &y_2( 1, i ), 1 );
	dgemv( 'n'/*o transpose*/, n-i, i-1, -one, &y_2( i+1, 1 ),
	      ldy, &y_2( 1, i ), 1, one, &y_2( i+1, i ), 1 );
	dgemv( 't'/*ranspose*/, m-i, i, one, &x_2( i+1, 1 ), ldx,
	      &a_2( i+1, i ), 1, zero, &y_2( 1, i ), 1 );
	dgemv( 't'/*ranspose*/, i, n-i, -one, &a_2( 1, i+1 ), lda,
	      &y_2( 1, i ), 1, one, &y_2( i+1, i ), 1 );
	dscal( n-i, tauq_1( i ), &y_2( i+1, i ), 1 );
      }
    }
  }
  return;
  /**
   *     End of DLABRD
   **/
}
